1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | |
---|
4 | """ |
---|
5 | histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> |
---|
6 | a generic histogram builder based on gnuplot backend |
---|
7 | |
---|
8 | data_file - tab delimited file with data |
---|
9 | xtic_column - column containing labels for x ticks [integer, 0 means no ticks] |
---|
10 | column_list - comma separated list of columns to plot |
---|
11 | title - title for the entire histrogram |
---|
12 | ylabel - y axis label |
---|
13 | yrange_max - minimal value at the y axis (integer) |
---|
14 | yrange_max - maximal value at the y_axis (integer) |
---|
15 | to set yrange to autoscaling assign 0 to yrange_min and yrange_max |
---|
16 | graph_file - file to write histogram image to |
---|
17 | img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) |
---|
18 | |
---|
19 | |
---|
20 | This tool required gnuplot and gnuplot.py |
---|
21 | |
---|
22 | anton nekrutenko | anton@bx.psu.edu |
---|
23 | |
---|
24 | """ |
---|
25 | |
---|
26 | import Gnuplot, Gnuplot.funcutils |
---|
27 | import sys, string, tempfile, os |
---|
28 | |
---|
29 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
30 | |
---|
31 | def stop_err(msg): |
---|
32 | sys.stderr.write(msg) |
---|
33 | sys.exit() |
---|
34 | |
---|
35 | def main(tmpFileName): |
---|
36 | skipped_lines_count = 0 |
---|
37 | skipped_lines_index = [] |
---|
38 | gf = open(tmpFileName, 'w') |
---|
39 | |
---|
40 | |
---|
41 | try: |
---|
42 | in_file = open( sys.argv[1], 'r' ) |
---|
43 | xtic = int( sys.argv[2] ) |
---|
44 | col_list = string.split( sys.argv[3],"," ) |
---|
45 | title = 'set title "' + sys.argv[4] + '"' |
---|
46 | ylabel = 'set ylabel "' + sys.argv[5] + '"' |
---|
47 | ymin = sys.argv[6] |
---|
48 | ymax = sys.argv[7] |
---|
49 | img_file = sys.argv[8] |
---|
50 | img_size = sys.argv[9] |
---|
51 | except: |
---|
52 | stop_err("Check arguments\n") |
---|
53 | |
---|
54 | try: |
---|
55 | int( col_list[0] ) |
---|
56 | except: |
---|
57 | stop_err('You forgot to set columns for plotting\n') |
---|
58 | |
---|
59 | |
---|
60 | for i, line in enumerate( in_file ): |
---|
61 | valid = True |
---|
62 | line = line.rstrip('\r\n') |
---|
63 | if line and not line.startswith( '#' ): |
---|
64 | row = [] |
---|
65 | try: |
---|
66 | fields = line.split( '\t' ) |
---|
67 | for col in col_list: |
---|
68 | row.append( str( float( fields[int( col )-1] ) ) ) |
---|
69 | |
---|
70 | except: |
---|
71 | valid = False |
---|
72 | skipped_lines_count += 1 |
---|
73 | skipped_lines_index.append(i) |
---|
74 | |
---|
75 | else: |
---|
76 | valid = False |
---|
77 | skipped_lines_count += 1 |
---|
78 | skipped_lines_index.append(i) |
---|
79 | |
---|
80 | if valid and xtic > 0: |
---|
81 | row.append( fields[xtic-1] ) |
---|
82 | elif valid and xtic == 0: |
---|
83 | row.append( str( i ) ) |
---|
84 | |
---|
85 | if valid: |
---|
86 | gf.write( '\t'.join( row ) ) |
---|
87 | gf.write( '\n' ) |
---|
88 | |
---|
89 | if skipped_lines_count < i: |
---|
90 | |
---|
91 | #prepare 'using' clause of plot statement |
---|
92 | |
---|
93 | g_plot_command = ' '; |
---|
94 | |
---|
95 | #set the first column |
---|
96 | if xtic > 0: |
---|
97 | g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] ) |
---|
98 | else: |
---|
99 | g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] ) |
---|
100 | |
---|
101 | #set subsequent columns |
---|
102 | |
---|
103 | for i in range(1,len(col_list)): |
---|
104 | g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] ) |
---|
105 | |
---|
106 | g_plot_command = g_plot_command.rstrip( ', ' ) |
---|
107 | |
---|
108 | yrange = 'set yrange [' + ymin + ":" + ymax + ']' |
---|
109 | |
---|
110 | try: |
---|
111 | g = Gnuplot.Gnuplot() |
---|
112 | g('reset') |
---|
113 | g('set boxwidth 0.9 absolute') |
---|
114 | g('set style fill solid 1.00 border -1') |
---|
115 | g('set style histogram clustered gap 5 title offset character 0, 0, 0') |
---|
116 | g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') |
---|
117 | g('set key invert reverse Left outside') |
---|
118 | if xtic == 0: g('unset xtics') |
---|
119 | g(title) |
---|
120 | g(ylabel) |
---|
121 | g_term = 'set terminal png tiny size ' + img_size |
---|
122 | g(g_term) |
---|
123 | g_out = 'set output "' + img_file + '"' |
---|
124 | if ymin != ymax: |
---|
125 | g(yrange) |
---|
126 | g(g_out) |
---|
127 | g('set style data histograms') |
---|
128 | g.plot(g_plot_command) |
---|
129 | except: |
---|
130 | stop_err("Gnuplot error: Data cannot be plotted") |
---|
131 | else: |
---|
132 | sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] ) |
---|
133 | |
---|
134 | if skipped_lines_count > 0: |
---|
135 | sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) ) |
---|
136 | |
---|
137 | |
---|
138 | if __name__ == "__main__": |
---|
139 | # The tempfile initialization is here because while inside the main() it seems to create a condition |
---|
140 | # when the file is removed before gnuplot has a chance of accessing it |
---|
141 | gp_data_file = tempfile.NamedTemporaryFile('w') |
---|
142 | Gnuplot.gp.GnuplotOpts.default_term = 'png' |
---|
143 | main(gp_data_file.name) |
---|
144 | |
---|