1 | #!/usr/bin/env python |
---|
2 | #Greg Von Kuster |
---|
3 | |
---|
4 | import sys |
---|
5 | from rpy import * |
---|
6 | |
---|
7 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
8 | |
---|
9 | def stop_err(msg): |
---|
10 | sys.stderr.write(msg) |
---|
11 | sys.exit() |
---|
12 | |
---|
13 | def main(): |
---|
14 | |
---|
15 | # Handle input params |
---|
16 | in_fname = sys.argv[1] |
---|
17 | out_fname = sys.argv[2] |
---|
18 | try: |
---|
19 | column = int( sys.argv[3] ) - 1 |
---|
20 | except: |
---|
21 | stop_err( "Column not specified, your query does not contain a column of numerical data." ) |
---|
22 | title = sys.argv[4] |
---|
23 | xlab = sys.argv[5] |
---|
24 | breaks = int( sys.argv[6] ) |
---|
25 | if breaks == 0: |
---|
26 | breaks = "Sturges" |
---|
27 | if sys.argv[7] == "true": |
---|
28 | density = True |
---|
29 | else: density = False |
---|
30 | |
---|
31 | matrix = [] |
---|
32 | skipped_lines = 0 |
---|
33 | first_invalid_line = 0 |
---|
34 | invalid_value = '' |
---|
35 | i = 0 |
---|
36 | for i, line in enumerate( file( in_fname ) ): |
---|
37 | valid = True |
---|
38 | line = line.rstrip('\r\n') |
---|
39 | # Skip comments |
---|
40 | if line and not line.startswith( '#' ): |
---|
41 | # Extract values and convert to floats |
---|
42 | row = [] |
---|
43 | try: |
---|
44 | fields = line.split( "\t" ) |
---|
45 | val = fields[column] |
---|
46 | if val.lower() == "na": |
---|
47 | row.append( float( "nan" ) ) |
---|
48 | except: |
---|
49 | valid = False |
---|
50 | skipped_lines += 1 |
---|
51 | if not first_invalid_line: |
---|
52 | first_invalid_line = i+1 |
---|
53 | else: |
---|
54 | try: |
---|
55 | row.append( float( val ) ) |
---|
56 | except ValueError: |
---|
57 | valid = False |
---|
58 | skipped_lines += 1 |
---|
59 | if not first_invalid_line: |
---|
60 | first_invalid_line = i+1 |
---|
61 | invalid_value = fields[column] |
---|
62 | else: |
---|
63 | valid = False |
---|
64 | skipped_lines += 1 |
---|
65 | if not first_invalid_line: |
---|
66 | first_invalid_line = i+1 |
---|
67 | |
---|
68 | if valid: |
---|
69 | matrix.append( row ) |
---|
70 | |
---|
71 | if skipped_lines < i: |
---|
72 | try: |
---|
73 | a = array( matrix ) |
---|
74 | r.pdf( out_fname, 8, 8 ) |
---|
75 | r.hist( a, probability=True, main=title, xlab=xlab, breaks=breaks ) |
---|
76 | if density: |
---|
77 | r.lines( r.density( a ) ) |
---|
78 | r.dev_off() |
---|
79 | except Exception, exc: |
---|
80 | stop_err( "%s" %str( exc ) ) |
---|
81 | else: |
---|
82 | if i == 0: |
---|
83 | stop_err("Input dataset is empty.") |
---|
84 | else: |
---|
85 | stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) |
---|
86 | |
---|
87 | print "Histogram of column %s. " %sys.argv[3] |
---|
88 | if skipped_lines > 0: |
---|
89 | print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) |
---|
90 | |
---|
91 | r.quit( save="no" ) |
---|
92 | |
---|
93 | if __name__ == "__main__": |
---|
94 | main() |
---|