[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | # python histogram input_file output_file column bins |
---|
| 4 | import sys, os |
---|
| 5 | import matplotlib; matplotlib.use('Agg') |
---|
| 6 | |
---|
| 7 | from pylab import * |
---|
| 8 | |
---|
| 9 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 10 | |
---|
| 11 | def stop_err(msg): |
---|
| 12 | sys.stderr.write(msg) |
---|
| 13 | sys.exit() |
---|
| 14 | |
---|
| 15 | if __name__ == '__main__': |
---|
| 16 | # parse the arguments |
---|
| 17 | |
---|
| 18 | if len(sys.argv) != 6: |
---|
| 19 | stop_err('Usage: python histogram.py input_file column bins output_file style') |
---|
| 20 | sys.exit() |
---|
| 21 | |
---|
| 22 | mode = sys.argv[5] |
---|
| 23 | HIST = mode == 'hist' |
---|
| 24 | try: |
---|
| 25 | col = int(float(sys.argv[2])) |
---|
| 26 | if HIST: |
---|
| 27 | bin = int(float(sys.argv[3])) |
---|
| 28 | else: |
---|
| 29 | # hack, this parameter is the plotting style for scatter plots |
---|
| 30 | if sys.argv[3] == 'P': |
---|
| 31 | style = 'o' |
---|
| 32 | elif sys.argv[3] == 'LP': |
---|
| 33 | style = 'o-' |
---|
| 34 | else: |
---|
| 35 | style = '-' |
---|
| 36 | |
---|
| 37 | except: |
---|
| 38 | msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4]) |
---|
| 39 | stop_err(msg) |
---|
| 40 | |
---|
| 41 | # validate arguments |
---|
| 42 | inp_file = sys.argv[1] |
---|
| 43 | out_file = sys.argv[4] |
---|
| 44 | |
---|
| 45 | if HIST: |
---|
| 46 | print "Histogram on column %s (%s bins)" % (col, bin) |
---|
| 47 | else: |
---|
| 48 | print "Scatterplot on column %s" % (col) |
---|
| 49 | |
---|
| 50 | xcol= col -1 |
---|
| 51 | # read the file |
---|
| 52 | values = [] |
---|
| 53 | try: |
---|
| 54 | count = 0 |
---|
| 55 | for line in file(inp_file): |
---|
| 56 | count += 1 |
---|
| 57 | line = line.strip() |
---|
| 58 | if line and line[0] != '#': |
---|
| 59 | values.append(float(line.split()[xcol])) |
---|
| 60 | except Exception, e: |
---|
| 61 | stop_err('%s' % e) |
---|
| 62 | stop_err("Non numerical data at line %d, column %d" % (count, col) ) |
---|
| 63 | |
---|
| 64 | # plot the data |
---|
| 65 | |
---|
| 66 | if HIST: |
---|
| 67 | n, bins, patches = hist(values, bins=bin, normed=0) |
---|
| 68 | else: |
---|
| 69 | plot(values, style) |
---|
| 70 | |
---|
| 71 | xlabel('values') |
---|
| 72 | ylabel('counts') |
---|
| 73 | |
---|
| 74 | if HIST: |
---|
| 75 | title('Histogram of values over column %s (%s bins)' % (col, len(bins)) ) |
---|
| 76 | else: |
---|
| 77 | title('Scatterplot over column %s' % col ) |
---|
| 78 | grid(True) |
---|
| 79 | |
---|
| 80 | # the plotter detects types by file extension |
---|
| 81 | png_out = out_file + '.png' # force it to png |
---|
| 82 | savefig(png_out) |
---|
| 83 | |
---|
| 84 | # shuffle it back and clean up |
---|
| 85 | data = file(png_out, 'rb').read() |
---|
| 86 | fp = open(out_file, 'wb') |
---|
| 87 | fp.write(data) |
---|
| 88 | fp.close() |
---|
| 89 | os.remove(png_out) |
---|