| 1 | #!/usr/bin/env python |
|---|
| 2 | #Greg Von Kuster |
|---|
| 3 | |
|---|
| 4 | import sys |
|---|
| 5 | from rpy import * |
|---|
| 6 | |
|---|
| 7 | assert sys.version_info[:2] >= ( 2, 4 ) |
|---|
| 8 | |
|---|
| 9 | def stop_err(msg): |
|---|
| 10 | sys.stderr.write(msg) |
|---|
| 11 | sys.exit() |
|---|
| 12 | |
|---|
| 13 | def main(): |
|---|
| 14 | |
|---|
| 15 | # Handle input params |
|---|
| 16 | in_fname = sys.argv[1] |
|---|
| 17 | out_fname = sys.argv[2] |
|---|
| 18 | try: |
|---|
| 19 | column = int( sys.argv[3] ) - 1 |
|---|
| 20 | except: |
|---|
| 21 | stop_err( "Column not specified, your query does not contain a column of numerical data." ) |
|---|
| 22 | title = sys.argv[4] |
|---|
| 23 | xlab = sys.argv[5] |
|---|
| 24 | breaks = int( sys.argv[6] ) |
|---|
| 25 | if breaks == 0: |
|---|
| 26 | breaks = "Sturges" |
|---|
| 27 | if sys.argv[7] == "true": |
|---|
| 28 | density = True |
|---|
| 29 | else: density = False |
|---|
| 30 | |
|---|
| 31 | matrix = [] |
|---|
| 32 | skipped_lines = 0 |
|---|
| 33 | first_invalid_line = 0 |
|---|
| 34 | invalid_value = '' |
|---|
| 35 | i = 0 |
|---|
| 36 | for i, line in enumerate( file( in_fname ) ): |
|---|
| 37 | valid = True |
|---|
| 38 | line = line.rstrip('\r\n') |
|---|
| 39 | # Skip comments |
|---|
| 40 | if line and not line.startswith( '#' ): |
|---|
| 41 | # Extract values and convert to floats |
|---|
| 42 | row = [] |
|---|
| 43 | try: |
|---|
| 44 | fields = line.split( "\t" ) |
|---|
| 45 | val = fields[column] |
|---|
| 46 | if val.lower() == "na": |
|---|
| 47 | row.append( float( "nan" ) ) |
|---|
| 48 | except: |
|---|
| 49 | valid = False |
|---|
| 50 | skipped_lines += 1 |
|---|
| 51 | if not first_invalid_line: |
|---|
| 52 | first_invalid_line = i+1 |
|---|
| 53 | else: |
|---|
| 54 | try: |
|---|
| 55 | row.append( float( val ) ) |
|---|
| 56 | except ValueError: |
|---|
| 57 | valid = False |
|---|
| 58 | skipped_lines += 1 |
|---|
| 59 | if not first_invalid_line: |
|---|
| 60 | first_invalid_line = i+1 |
|---|
| 61 | invalid_value = fields[column] |
|---|
| 62 | else: |
|---|
| 63 | valid = False |
|---|
| 64 | skipped_lines += 1 |
|---|
| 65 | if not first_invalid_line: |
|---|
| 66 | first_invalid_line = i+1 |
|---|
| 67 | |
|---|
| 68 | if valid: |
|---|
| 69 | matrix.append( row ) |
|---|
| 70 | |
|---|
| 71 | if skipped_lines < i: |
|---|
| 72 | try: |
|---|
| 73 | a = array( matrix ) |
|---|
| 74 | r.pdf( out_fname, 8, 8 ) |
|---|
| 75 | r.hist( a, probability=True, main=title, xlab=xlab, breaks=breaks ) |
|---|
| 76 | if density: |
|---|
| 77 | r.lines( r.density( a ) ) |
|---|
| 78 | r.dev_off() |
|---|
| 79 | except Exception, exc: |
|---|
| 80 | stop_err( "%s" %str( exc ) ) |
|---|
| 81 | else: |
|---|
| 82 | if i == 0: |
|---|
| 83 | stop_err("Input dataset is empty.") |
|---|
| 84 | else: |
|---|
| 85 | stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) |
|---|
| 86 | |
|---|
| 87 | print "Histogram of column %s. " %sys.argv[3] |
|---|
| 88 | if skipped_lines > 0: |
|---|
| 89 | print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) |
|---|
| 90 | |
|---|
| 91 | r.quit( save="no" ) |
|---|
| 92 | |
|---|
| 93 | if __name__ == "__main__": |
|---|
| 94 | main() |
|---|