[3] | 1 | #!/usr/bin/env python2.4 |
---|
| 2 | """ |
---|
| 3 | Returns all positions of a maf with any pwm score > threshold |
---|
| 4 | The positions are projected onto human coordinates |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | import psyco_full |
---|
| 8 | from bx.align import maf as align_maf |
---|
| 9 | import bx.pwm.position_weight_matrix as pwmx |
---|
| 10 | from bx.pwm.pwm_score_maf import MafBlockScorer |
---|
| 11 | import sys |
---|
| 12 | from bx import intervals |
---|
| 13 | |
---|
| 14 | def isnan(x): |
---|
| 15 | return not x==x |
---|
| 16 | |
---|
| 17 | def main(): |
---|
| 18 | |
---|
| 19 | if len(sys.argv) < 6: |
---|
| 20 | print >>sys.stderr, "%s transfac|basic pwmfile inmaf threshold spec1,spec2,... " % sys.argv[0] |
---|
| 21 | sys.exit(0) |
---|
| 22 | |
---|
| 23 | pwm = {} |
---|
| 24 | format = sys.argv[1] |
---|
| 25 | for wm in pwmx.Reader(open(sys.argv[2]),format=format): |
---|
| 26 | pwm[ wm.id ] = wm |
---|
| 27 | |
---|
| 28 | |
---|
| 29 | inmaf = open(sys.argv[3]) |
---|
| 30 | threshold = float(sys.argv[4]) |
---|
| 31 | |
---|
| 32 | species = [] |
---|
| 33 | |
---|
| 34 | for sp in sys.argv[5].split(','): |
---|
| 35 | species.append( sp ) |
---|
| 36 | |
---|
| 37 | for maf in align_maf.Reader(inmaf): |
---|
| 38 | mafchrom = maf.components[0].src.split('.')[1] |
---|
| 39 | mafstart = maf.components[0].start |
---|
| 40 | mafend = maf.components[0].end |
---|
| 41 | reftext = maf.components[0].text |
---|
| 42 | |
---|
| 43 | # maf block scores for each matrix |
---|
| 44 | for scoremax,width,headers in MafBlockScorer(pwm, species, maf): |
---|
| 45 | blocklength = width |
---|
| 46 | mafsrc,mafstart,mafend = headers[0] |
---|
| 47 | mafchrom = mafsrc.split('.')[1] |
---|
| 48 | |
---|
| 49 | # lists of scores for each position in scoremax |
---|
| 50 | for id,mx in scoremax.items(): |
---|
| 51 | for offset in range(blocklength): |
---|
| 52 | |
---|
| 53 | # scan all species with threshold |
---|
| 54 | for i in range(len(species)): |
---|
| 55 | if mx[i][offset] > threshold: |
---|
| 56 | refstart = mafstart + offset - reftext.count('-',0,offset) |
---|
| 57 | refend = refstart + len(pwm[id]) |
---|
| 58 | data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species))]) |
---|
| 59 | # underscore spaces in the name |
---|
| 60 | print mafchrom,refstart,refend,id.replace(' ','_'),data |
---|
| 61 | break |
---|
| 62 | |
---|
| 63 | if __name__ == '__main__': main() |
---|