1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Filter maf blocks for presence of wildcard columns. Blocks must meet the |
---|
5 | criteria of having at least `min_good` columns, each of which has more than |
---|
6 | `min_species` rows that are NOT wildcard bases ('*'). |
---|
7 | |
---|
8 | TODO: Allow specifying the character of the wildcard base. |
---|
9 | |
---|
10 | usage: %prog min_good min_species < maf > maf |
---|
11 | """ |
---|
12 | |
---|
13 | from __future__ import division |
---|
14 | |
---|
15 | import psyco_full |
---|
16 | |
---|
17 | import sys |
---|
18 | |
---|
19 | import sys |
---|
20 | from bx.align import maf |
---|
21 | from optparse import OptionParser |
---|
22 | |
---|
23 | def main(): |
---|
24 | |
---|
25 | min_good = int( sys.argv[1] ) |
---|
26 | min_species = int( sys.argv[2] ) |
---|
27 | |
---|
28 | maf_reader = maf.Reader( sys.stdin ) |
---|
29 | maf_writer = maf.Writer( sys.stdout ) |
---|
30 | |
---|
31 | for m in maf_reader: |
---|
32 | good = 0 |
---|
33 | for col in m.column_iter(): |
---|
34 | if col.count( '*' ) <= min_species: |
---|
35 | good += 1 |
---|
36 | if good >= min_good: |
---|
37 | maf_writer.write( m ) |
---|
38 | |
---|
39 | if __name__ == "__main__": |
---|
40 | main() |
---|