root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/pwm/bed_score_aligned_pwm.py @ 3

リビジョン 3, 2.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/env python2.4
2"""
3Returns all positions of a maf with any pwm score > threshold
4The positions are projected onto human coordinates
5"""
6
7import psyco_full
8from bx.align import maf as align_maf
9import position_weight_matrix as pwmx
10from bx.pwm.pwm_score_maf import MafBlockScorer
11import sys
12from bx import intervals
13import Numeric
14
15def isnan(x):
16    return not x==x
17
18def main():
19
20    if len(sys.argv) < 5:
21        print >>sys.stderr, "%s bedfile inmaf spec1,spec2,... motif_file " % sys.argv[0]
22        sys.exit(0)
23
24    # read in intervals
25    regions = {}
26    for line in open( sys.argv[1] ):
27        if line.startswith('#'): continue
28        fields = line.strip().split()
29        chrom, start, end = fields[0], int( fields[1] ), int( fields[2] )
30        try:
31            name = fields[3]
32        except:
33            name = None
34        if chrom not in regions: regions[chrom] = intervals.Intersecter()
35        regions[chrom].add( start, end, name )
36
37    pwm = {}
38    for wm in pwmx.Reader(open( sys.argv[4] )):
39        pwm[ wm.id] = wm
40        print >>sys.stderr, wm.id, len(wm)
41
42    inmaf = open(sys.argv[2])
43    threshold = 0.5
44
45    species = []
46
47    for sp in sys.argv[3].split(','):
48        species.append( sp )
49
50    for maf in align_maf.Reader(inmaf):
51        mafchrom = maf.components[0].src.split('.')[1]
52        mafstart = maf.components[0].start
53        mafend = maf.components[0].end
54        reftext = maf.components[0].text
55
56        # maf block scores for each matrix
57        for scoremax,width,headers in MafBlockScorer(pwm,species, maf):
58            #print >>sys.stderr,headers
59            blocklength = width
60            mafsrc,mafstart,mafend = headers[0]
61            mafchrom = mafsrc.split('.')[1]
62
63            # lists of scores for each position in scoremax
64            for mx_name,mx in scoremax.items():
65                #print >>sys.stderr, mx_name, len(pwm[mx_name])
66
67                for offset in range(blocklength):
68   
69                    # scan all species with threshold
70                    for i in range(len(species)):
71                        if mx[i][offset] > threshold:
72                            refstart = mafstart + offset - reftext.count('-',0,offset)
73                            refend = refstart + len(pwm[mx_name])
74
75                            data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species))])
76                            # quote the motif
77                            r = regions[mafchrom].find( refstart, refend )
78                            if mafchrom in regions and len( r ) > 0:
79                                region_label = r[0].value
80                            else:
81                                #region_label = 0
82                                continue
83                            v_name = mx_name.replace(' ','_')
84                            print mafchrom,refstart,refend,region_label,v_name,data
85                            break
86
87if __name__ == '__main__': main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。