[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | |
---|
| 4 | """ |
---|
| 5 | histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file> |
---|
| 6 | a generic histogram builder based on gnuplot backend |
---|
| 7 | |
---|
| 8 | data_file - tab delimited file with data |
---|
| 9 | xtic_column - column containing labels for x ticks [integer, 0 means no ticks] |
---|
| 10 | column_list - comma separated list of columns to plot |
---|
| 11 | title - title for the entire histrogram |
---|
| 12 | ylabel - y axis label |
---|
| 13 | yrange_max - minimal value at the y axis (integer) |
---|
| 14 | yrange_max - maximal value at the y_axis (integer) |
---|
| 15 | to set yrange to autoscaling assign 0 to yrange_min and yrange_max |
---|
| 16 | graph_file - file to write histogram image to |
---|
| 17 | img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) |
---|
| 18 | |
---|
| 19 | |
---|
| 20 | This tool required gnuplot and gnuplot.py |
---|
| 21 | |
---|
| 22 | anton nekrutenko | anton@bx.psu.edu |
---|
| 23 | |
---|
| 24 | """ |
---|
| 25 | |
---|
| 26 | import Gnuplot, Gnuplot.funcutils |
---|
| 27 | import sys, string, tempfile, os |
---|
| 28 | |
---|
| 29 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 30 | |
---|
| 31 | def stop_err(msg): |
---|
| 32 | sys.stderr.write(msg) |
---|
| 33 | sys.exit() |
---|
| 34 | |
---|
| 35 | def main(tmpFileName): |
---|
| 36 | skipped_lines_count = 0 |
---|
| 37 | skipped_lines_index = [] |
---|
| 38 | gf = open(tmpFileName, 'w') |
---|
| 39 | |
---|
| 40 | |
---|
| 41 | try: |
---|
| 42 | in_file = open( sys.argv[1], 'r' ) |
---|
| 43 | xtic = int( sys.argv[2] ) |
---|
| 44 | col_list = string.split( sys.argv[3],"," ) |
---|
| 45 | title = 'set title "' + sys.argv[4] + '"' |
---|
| 46 | ylabel = 'set ylabel "' + sys.argv[5] + '"' |
---|
| 47 | ymin = sys.argv[6] |
---|
| 48 | ymax = sys.argv[7] |
---|
| 49 | img_file = sys.argv[8] |
---|
| 50 | img_size = sys.argv[9] |
---|
| 51 | except: |
---|
| 52 | stop_err("Check arguments\n") |
---|
| 53 | |
---|
| 54 | try: |
---|
| 55 | int( col_list[0] ) |
---|
| 56 | except: |
---|
| 57 | stop_err('You forgot to set columns for plotting\n') |
---|
| 58 | |
---|
| 59 | |
---|
| 60 | for i, line in enumerate( in_file ): |
---|
| 61 | valid = True |
---|
| 62 | line = line.rstrip('\r\n') |
---|
| 63 | if line and not line.startswith( '#' ): |
---|
| 64 | row = [] |
---|
| 65 | try: |
---|
| 66 | fields = line.split( '\t' ) |
---|
| 67 | for col in col_list: |
---|
| 68 | row.append( str( float( fields[int( col )-1] ) ) ) |
---|
| 69 | |
---|
| 70 | except: |
---|
| 71 | valid = False |
---|
| 72 | skipped_lines_count += 1 |
---|
| 73 | skipped_lines_index.append(i) |
---|
| 74 | |
---|
| 75 | else: |
---|
| 76 | valid = False |
---|
| 77 | skipped_lines_count += 1 |
---|
| 78 | skipped_lines_index.append(i) |
---|
| 79 | |
---|
| 80 | if valid and xtic > 0: |
---|
| 81 | row.append( fields[xtic-1] ) |
---|
| 82 | elif valid and xtic == 0: |
---|
| 83 | row.append( str( i ) ) |
---|
| 84 | |
---|
| 85 | if valid: |
---|
| 86 | gf.write( '\t'.join( row ) ) |
---|
| 87 | gf.write( '\n' ) |
---|
| 88 | |
---|
| 89 | if skipped_lines_count < i: |
---|
| 90 | |
---|
| 91 | #prepare 'using' clause of plot statement |
---|
| 92 | |
---|
| 93 | g_plot_command = ' '; |
---|
| 94 | |
---|
| 95 | #set the first column |
---|
| 96 | if xtic > 0: |
---|
| 97 | g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] ) |
---|
| 98 | else: |
---|
| 99 | g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] ) |
---|
| 100 | |
---|
| 101 | #set subsequent columns |
---|
| 102 | |
---|
| 103 | for i in range(1,len(col_list)): |
---|
| 104 | g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] ) |
---|
| 105 | |
---|
| 106 | g_plot_command = g_plot_command.rstrip( ', ' ) |
---|
| 107 | |
---|
| 108 | yrange = 'set yrange [' + ymin + ":" + ymax + ']' |
---|
| 109 | |
---|
| 110 | try: |
---|
| 111 | g = Gnuplot.Gnuplot() |
---|
| 112 | g('reset') |
---|
| 113 | g('set boxwidth 0.9 absolute') |
---|
| 114 | g('set style fill solid 1.00 border -1') |
---|
| 115 | g('set style histogram clustered gap 5 title offset character 0, 0, 0') |
---|
| 116 | g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0') |
---|
| 117 | g('set key invert reverse Left outside') |
---|
| 118 | if xtic == 0: g('unset xtics') |
---|
| 119 | g(title) |
---|
| 120 | g(ylabel) |
---|
| 121 | g_term = 'set terminal png tiny size ' + img_size |
---|
| 122 | g(g_term) |
---|
| 123 | g_out = 'set output "' + img_file + '"' |
---|
| 124 | if ymin != ymax: |
---|
| 125 | g(yrange) |
---|
| 126 | g(g_out) |
---|
| 127 | g('set style data histograms') |
---|
| 128 | g.plot(g_plot_command) |
---|
| 129 | except: |
---|
| 130 | stop_err("Gnuplot error: Data cannot be plotted") |
---|
| 131 | else: |
---|
| 132 | sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] ) |
---|
| 133 | |
---|
| 134 | if skipped_lines_count > 0: |
---|
| 135 | sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) ) |
---|
| 136 | |
---|
| 137 | |
---|
| 138 | if __name__ == "__main__": |
---|
| 139 | # The tempfile initialization is here because while inside the main() it seems to create a condition |
---|
| 140 | # when the file is removed before gnuplot has a chance of accessing it |
---|
| 141 | gp_data_file = tempfile.NamedTemporaryFile('w') |
---|
| 142 | Gnuplot.gp.GnuplotOpts.default_term = 'png' |
---|
| 143 | main(gp_data_file.name) |
---|
| 144 | |
---|