1 | #!/usr/bin/env python2.4 |
---|
2 | """ |
---|
3 | Returns all positions of a maf with any pwm score > threshold |
---|
4 | The positions are projected onto human coordinates |
---|
5 | """ |
---|
6 | |
---|
7 | import psyco_full |
---|
8 | from bx.align import maf as align_maf |
---|
9 | import bx.pwm.position_weight_matrix as pwmx |
---|
10 | from bx.pwm.pwm_score_maf import MafBlockScorer |
---|
11 | import sys |
---|
12 | from bx import intervals |
---|
13 | |
---|
14 | def isnan(x): |
---|
15 | return not x==x |
---|
16 | |
---|
17 | def main(): |
---|
18 | |
---|
19 | if len(sys.argv) < 6: |
---|
20 | print >>sys.stderr, "%s transfac|basic pwmfile inmaf threshold spec1,spec2,... " % sys.argv[0] |
---|
21 | sys.exit(0) |
---|
22 | |
---|
23 | pwm = {} |
---|
24 | format = sys.argv[1] |
---|
25 | for wm in pwmx.Reader(open(sys.argv[2]),format=format): |
---|
26 | pwm[ wm.id ] = wm |
---|
27 | |
---|
28 | |
---|
29 | inmaf = open(sys.argv[3]) |
---|
30 | threshold = float(sys.argv[4]) |
---|
31 | |
---|
32 | species = [] |
---|
33 | |
---|
34 | for sp in sys.argv[5].split(','): |
---|
35 | species.append( sp ) |
---|
36 | |
---|
37 | for maf in align_maf.Reader(inmaf): |
---|
38 | mafchrom = maf.components[0].src.split('.')[1] |
---|
39 | mafstart = maf.components[0].start |
---|
40 | mafend = maf.components[0].end |
---|
41 | reftext = maf.components[0].text |
---|
42 | |
---|
43 | # maf block scores for each matrix |
---|
44 | for scoremax,width,headers in MafBlockScorer(pwm, species, maf): |
---|
45 | blocklength = width |
---|
46 | mafsrc,mafstart,mafend = headers[0] |
---|
47 | mafchrom = mafsrc.split('.')[1] |
---|
48 | |
---|
49 | # lists of scores for each position in scoremax |
---|
50 | for id,mx in scoremax.items(): |
---|
51 | for offset in range(blocklength): |
---|
52 | |
---|
53 | # scan all species with threshold |
---|
54 | for i in range(len(species)): |
---|
55 | if mx[i][offset] > threshold: |
---|
56 | refstart = mafstart + offset - reftext.count('-',0,offset) |
---|
57 | refend = refstart + len(pwm[id]) |
---|
58 | data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species))]) |
---|
59 | # underscore spaces in the name |
---|
60 | print mafchrom,refstart,refend,id.replace(' ','_'),data |
---|
61 | break |
---|
62 | |
---|
63 | if __name__ == '__main__': main() |
---|