| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | # python histogram input_file output_file column bins |
|---|
| 4 | import sys, os |
|---|
| 5 | import matplotlib; matplotlib.use('Agg') |
|---|
| 6 | |
|---|
| 7 | from pylab import * |
|---|
| 8 | |
|---|
| 9 | assert sys.version_info[:2] >= ( 2, 4 ) |
|---|
| 10 | |
|---|
| 11 | def stop_err(msg): |
|---|
| 12 | sys.stderr.write(msg) |
|---|
| 13 | sys.exit() |
|---|
| 14 | |
|---|
| 15 | if __name__ == '__main__': |
|---|
| 16 | # parse the arguments |
|---|
| 17 | |
|---|
| 18 | if len(sys.argv) != 6: |
|---|
| 19 | stop_err('Usage: python histogram.py input_file column bins output_file style') |
|---|
| 20 | sys.exit() |
|---|
| 21 | |
|---|
| 22 | mode = sys.argv[5] |
|---|
| 23 | HIST = mode == 'hist' |
|---|
| 24 | try: |
|---|
| 25 | col = int(float(sys.argv[2])) |
|---|
| 26 | if HIST: |
|---|
| 27 | bin = int(float(sys.argv[3])) |
|---|
| 28 | else: |
|---|
| 29 | # hack, this parameter is the plotting style for scatter plots |
|---|
| 30 | if sys.argv[3] == 'P': |
|---|
| 31 | style = 'o' |
|---|
| 32 | elif sys.argv[3] == 'LP': |
|---|
| 33 | style = 'o-' |
|---|
| 34 | else: |
|---|
| 35 | style = '-' |
|---|
| 36 | |
|---|
| 37 | except: |
|---|
| 38 | msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4]) |
|---|
| 39 | stop_err(msg) |
|---|
| 40 | |
|---|
| 41 | # validate arguments |
|---|
| 42 | inp_file = sys.argv[1] |
|---|
| 43 | out_file = sys.argv[4] |
|---|
| 44 | |
|---|
| 45 | if HIST: |
|---|
| 46 | print "Histogram on column %s (%s bins)" % (col, bin) |
|---|
| 47 | else: |
|---|
| 48 | print "Scatterplot on column %s" % (col) |
|---|
| 49 | |
|---|
| 50 | xcol= col -1 |
|---|
| 51 | # read the file |
|---|
| 52 | values = [] |
|---|
| 53 | try: |
|---|
| 54 | count = 0 |
|---|
| 55 | for line in file(inp_file): |
|---|
| 56 | count += 1 |
|---|
| 57 | line = line.strip() |
|---|
| 58 | if line and line[0] != '#': |
|---|
| 59 | values.append(float(line.split()[xcol])) |
|---|
| 60 | except Exception, e: |
|---|
| 61 | stop_err('%s' % e) |
|---|
| 62 | stop_err("Non numerical data at line %d, column %d" % (count, col) ) |
|---|
| 63 | |
|---|
| 64 | # plot the data |
|---|
| 65 | |
|---|
| 66 | if HIST: |
|---|
| 67 | n, bins, patches = hist(values, bins=bin, normed=0) |
|---|
| 68 | else: |
|---|
| 69 | plot(values, style) |
|---|
| 70 | |
|---|
| 71 | xlabel('values') |
|---|
| 72 | ylabel('counts') |
|---|
| 73 | |
|---|
| 74 | if HIST: |
|---|
| 75 | title('Histogram of values over column %s (%s bins)' % (col, len(bins)) ) |
|---|
| 76 | else: |
|---|
| 77 | title('Scatterplot over column %s' % col ) |
|---|
| 78 | grid(True) |
|---|
| 79 | |
|---|
| 80 | # the plotter detects types by file extension |
|---|
| 81 | png_out = out_file + '.png' # force it to png |
|---|
| 82 | savefig(png_out) |
|---|
| 83 | |
|---|
| 84 | # shuffle it back and clean up |
|---|
| 85 | data = file(png_out, 'rb').read() |
|---|
| 86 | fp = open(out_file, 'wb') |
|---|
| 87 | fp.write(data) |
|---|
| 88 | fp.close() |
|---|
| 89 | os.remove(png_out) |
|---|