1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Tool for filtering a tabular data file. Fields are separated by tabs, the |
---|
5 | header line is denoted by a '#' in the first byte, comments are denoted by |
---|
6 | a '#' at the start of any subsequent line. |
---|
7 | |
---|
8 | Expressions can use column names as well as numbers. The -c options allows |
---|
9 | cutting, again using field name or numbers. |
---|
10 | |
---|
11 | usage: %prog expression < table |
---|
12 | -H, --header: keep header in output |
---|
13 | -C, --comments: keep comments in output |
---|
14 | --force-header: assume the first line is a header even if it does not start with "#" |
---|
15 | -c, --cols=1,2: names or indexes of columns to keep |
---|
16 | """ |
---|
17 | |
---|
18 | import psyco_full |
---|
19 | |
---|
20 | import sys |
---|
21 | |
---|
22 | import sys |
---|
23 | import bx.tabular.io |
---|
24 | from bx.cookbook import doc_optparse |
---|
25 | |
---|
26 | def __main__(): |
---|
27 | |
---|
28 | # Parse command line arguments |
---|
29 | options, args = doc_optparse.parse( __doc__ ) |
---|
30 | try: |
---|
31 | keep_header = bool( options.header ) |
---|
32 | keep_comments = bool( options.comments ) |
---|
33 | cols = [] |
---|
34 | if options.cols: |
---|
35 | for c in options.cols.split( ',' ): |
---|
36 | try: |
---|
37 | v = int( c ) |
---|
38 | except: |
---|
39 | v = c |
---|
40 | cols.append( c ) |
---|
41 | if len( args ) > 0: |
---|
42 | expr = args[0] |
---|
43 | else: |
---|
44 | expr = None |
---|
45 | if options.force_header: |
---|
46 | force_header = bx.tabular.io.FIRST_LINE_IS_HEADER |
---|
47 | else: |
---|
48 | force_header = None |
---|
49 | except: |
---|
50 | doc_optparse.exception() |
---|
51 | |
---|
52 | # Compile expression for SPEED |
---|
53 | if expr: expr = compile( expr, '<expr arg>', 'eval' ) |
---|
54 | |
---|
55 | for element in bx.tabular.io.TableReader( sys.stdin, force_header=force_header ): |
---|
56 | if type( element ) is bx.tabular.io.Header: |
---|
57 | if keep_header: |
---|
58 | if cols: |
---|
59 | print "#" + "\t".join( element[c] for c in cols ) |
---|
60 | else: |
---|
61 | print element |
---|
62 | elif type( element ) is bx.tabular.io.Comment: |
---|
63 | if keep_comments: |
---|
64 | print element |
---|
65 | else: |
---|
66 | if expr is None or bool( eval( expr, dict( row=element ) ) ): |
---|
67 | if cols: |
---|
68 | print "\t".join( [ element[c] for c in cols ] ) |
---|
69 | else: |
---|
70 | print element |
---|
71 | |
---|
72 | if __name__ == "__main__": __main__() |
---|