1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Read a MAF and print counts and frequencies of all n-mers |
---|
5 | (words composed on n consecutive alignment columns) |
---|
6 | |
---|
7 | TODO: reconcile this and maf_mapping_word_frequency.py |
---|
8 | |
---|
9 | usage: %prog n < maf_file |
---|
10 | """ |
---|
11 | |
---|
12 | from __future__ import division |
---|
13 | |
---|
14 | import psyco; psyco.profile() |
---|
15 | |
---|
16 | from bx.cookbook import doc_optparse |
---|
17 | import string |
---|
18 | import sys |
---|
19 | |
---|
20 | from align import maf |
---|
21 | |
---|
22 | |
---|
23 | def __main__(): |
---|
24 | |
---|
25 | motif_len = int( sys.argv[1] ) |
---|
26 | |
---|
27 | big_map = {} |
---|
28 | total = 0 |
---|
29 | |
---|
30 | maf_reader = maf.Reader( sys.stdin ) |
---|
31 | |
---|
32 | for m in maf_reader: |
---|
33 | texts = [ c.text.upper() for c in m.components ] |
---|
34 | for i in range( m.text_size - motif_len ): |
---|
35 | motif = string.join( [ text[ i : i + motif_len ] for text in texts ] ) |
---|
36 | if big_map.has_key( motif ): big_map[ motif ] += 1 |
---|
37 | else: big_map[ motif ] = 1 |
---|
38 | total += 1 |
---|
39 | |
---|
40 | items = zip( big_map.values(), big_map.keys() ) |
---|
41 | items.sort() |
---|
42 | items.reverse() |
---|
43 | |
---|
44 | for count, motif in items: |
---|
45 | print "%d\t%0.10f\t%s" % ( count, count / total, motif ) |
---|
46 | |
---|
47 | if __name__ == "__main__": __main__() |
---|