1 | #!/usr/bin/env python |
---|
2 | #Greg Von Kuster |
---|
3 | |
---|
4 | import sys |
---|
5 | from rpy import * |
---|
6 | |
---|
7 | def stop_err(msg): |
---|
8 | sys.stderr.write(msg) |
---|
9 | sys.exit() |
---|
10 | |
---|
11 | def main(): |
---|
12 | |
---|
13 | in_fname = sys.argv[1] |
---|
14 | out_fname = sys.argv[2] |
---|
15 | try: |
---|
16 | columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1 |
---|
17 | except: |
---|
18 | stop_err( "Columns not specified, your query does not contain a column of numerical data." ) |
---|
19 | title = sys.argv[5] |
---|
20 | xlab = sys.argv[6] |
---|
21 | ylab = sys.argv[7] |
---|
22 | |
---|
23 | matrix = [] |
---|
24 | skipped_lines = 0 |
---|
25 | first_invalid_line = 0 |
---|
26 | invalid_value = '' |
---|
27 | invalid_column = 0 |
---|
28 | i = 0 |
---|
29 | for i, line in enumerate( file( in_fname ) ): |
---|
30 | valid = True |
---|
31 | line = line.rstrip( '\r\n' ) |
---|
32 | if line and not line.startswith( '#' ): |
---|
33 | row = [] |
---|
34 | fields = line.split( "\t" ) |
---|
35 | for column in columns: |
---|
36 | try: |
---|
37 | val = fields[column] |
---|
38 | if val.lower() == "na": |
---|
39 | row.append( float( "nan" ) ) |
---|
40 | else: |
---|
41 | row.append( float( fields[column] ) ) |
---|
42 | except: |
---|
43 | valid = False |
---|
44 | skipped_lines += 1 |
---|
45 | if not first_invalid_line: |
---|
46 | first_invalid_line = i + 1 |
---|
47 | try: |
---|
48 | invalid_value = fields[column] |
---|
49 | except: |
---|
50 | invalid_value = '' |
---|
51 | invalid_column = column + 1 |
---|
52 | break |
---|
53 | else: |
---|
54 | valid = False |
---|
55 | skipped_lines += 1 |
---|
56 | if not first_invalid_line: |
---|
57 | first_invalid_line = i+1 |
---|
58 | |
---|
59 | if valid: |
---|
60 | matrix.append( row ) |
---|
61 | |
---|
62 | if skipped_lines < i: |
---|
63 | try: |
---|
64 | r.pdf( out_fname, 8, 8 ) |
---|
65 | r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 ) |
---|
66 | r.dev_off() |
---|
67 | except Exception, exc: |
---|
68 | stop_err( "%s" %str( exc ) ) |
---|
69 | else: |
---|
70 | stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) ) |
---|
71 | |
---|
72 | print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] ) |
---|
73 | if skipped_lines > 0: |
---|
74 | print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column ) |
---|
75 | |
---|
76 | r.quit( save="no" ) |
---|
77 | |
---|
78 | if __name__ == "__main__": |
---|
79 | main() |
---|