1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | For every column that occurs in a multiple alignment print the column |
---|
5 | and the number of times it occurs (one column/count per line, tab |
---|
6 | separated), sorted by count descending. |
---|
7 | |
---|
8 | Note: all blocks must have exactly the same number of species. |
---|
9 | |
---|
10 | usage: %prog < maf > column_counts |
---|
11 | """ |
---|
12 | |
---|
13 | import bx.align.maf |
---|
14 | import sys |
---|
15 | |
---|
16 | from itertools import * |
---|
17 | |
---|
18 | counts = {} |
---|
19 | |
---|
20 | nspecies = None |
---|
21 | |
---|
22 | for block in bx.align.maf.Reader( sys.stdin ): |
---|
23 | # Ensure all blocks have the same number of rows |
---|
24 | if nspecies: assert len( block.components ) == nspecies |
---|
25 | else: nspecies = len( block.components ) |
---|
26 | # Increment count for each column |
---|
27 | for col in izip( * [ iter( comp.text.upper() ) for comp in block.components ] ): |
---|
28 | try: counts[ col ] += 1 |
---|
29 | except: counts[ col ] = 1 |
---|
30 | |
---|
31 | counts = [ ( value, key ) for key, value in counts.iteritems() ] |
---|
32 | counts.sort() |
---|
33 | counts.reverse() |
---|
34 | |
---|
35 | # print len( counts ) |
---|
36 | |
---|
37 | for count, col in counts: |
---|
38 | print "".join(col), count |
---|