1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | # python histogram input_file output_file column bins |
---|
4 | import sys, os |
---|
5 | import matplotlib; matplotlib.use('Agg') |
---|
6 | |
---|
7 | from pylab import * |
---|
8 | |
---|
9 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
10 | |
---|
11 | def stop_err(msg): |
---|
12 | sys.stderr.write(msg) |
---|
13 | sys.exit() |
---|
14 | |
---|
15 | if __name__ == '__main__': |
---|
16 | # parse the arguments |
---|
17 | |
---|
18 | if len(sys.argv) != 6: |
---|
19 | stop_err('Usage: python histogram.py input_file column bins output_file style') |
---|
20 | sys.exit() |
---|
21 | |
---|
22 | mode = sys.argv[5] |
---|
23 | HIST = mode == 'hist' |
---|
24 | try: |
---|
25 | col = int(float(sys.argv[2])) |
---|
26 | if HIST: |
---|
27 | bin = int(float(sys.argv[3])) |
---|
28 | else: |
---|
29 | # hack, this parameter is the plotting style for scatter plots |
---|
30 | if sys.argv[3] == 'P': |
---|
31 | style = 'o' |
---|
32 | elif sys.argv[3] == 'LP': |
---|
33 | style = 'o-' |
---|
34 | else: |
---|
35 | style = '-' |
---|
36 | |
---|
37 | except: |
---|
38 | msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4]) |
---|
39 | stop_err(msg) |
---|
40 | |
---|
41 | # validate arguments |
---|
42 | inp_file = sys.argv[1] |
---|
43 | out_file = sys.argv[4] |
---|
44 | |
---|
45 | if HIST: |
---|
46 | print "Histogram on column %s (%s bins)" % (col, bin) |
---|
47 | else: |
---|
48 | print "Scatterplot on column %s" % (col) |
---|
49 | |
---|
50 | xcol= col -1 |
---|
51 | # read the file |
---|
52 | values = [] |
---|
53 | try: |
---|
54 | count = 0 |
---|
55 | for line in file(inp_file): |
---|
56 | count += 1 |
---|
57 | line = line.strip() |
---|
58 | if line and line[0] != '#': |
---|
59 | values.append(float(line.split()[xcol])) |
---|
60 | except Exception, e: |
---|
61 | stop_err('%s' % e) |
---|
62 | stop_err("Non numerical data at line %d, column %d" % (count, col) ) |
---|
63 | |
---|
64 | # plot the data |
---|
65 | |
---|
66 | if HIST: |
---|
67 | n, bins, patches = hist(values, bins=bin, normed=0) |
---|
68 | else: |
---|
69 | plot(values, style) |
---|
70 | |
---|
71 | xlabel('values') |
---|
72 | ylabel('counts') |
---|
73 | |
---|
74 | if HIST: |
---|
75 | title('Histogram of values over column %s (%s bins)' % (col, len(bins)) ) |
---|
76 | else: |
---|
77 | title('Scatterplot over column %s' % col ) |
---|
78 | grid(True) |
---|
79 | |
---|
80 | # the plotter detects types by file extension |
---|
81 | png_out = out_file + '.png' # force it to png |
---|
82 | savefig(png_out) |
---|
83 | |
---|
84 | # shuffle it back and clean up |
---|
85 | data = file(png_out, 'rb').read() |
---|
86 | fp = open(out_file, 'wb') |
---|
87 | fp.write(data) |
---|
88 | fp.close() |
---|
89 | os.remove(png_out) |
---|