root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/pwm/bed_score_aligned_string.py @ 3

リビジョン 3, 3.0 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/env python2.4
2"""
3Returns all positions of a maf with any pwm score > threshold
4The positions are projected onto human coordinates
5"""
6
7import psyco_full
8from bx.align import maf as align_maf
9import position_weight_matrix as pwmx
10from bx.pwm.pwm_score_maf import MafMotifScorer
11import sys
12from bx import intervals
13import Numeric
14
15def isnan(x):
16    return not x==x
17
18def main():
19
20    if len(sys.argv) < 5:
21        print >>sys.stderr, "%s bedfile inmaf spec1,spec2,... string [string2,...]" % sys.argv[0]
22        sys.exit(0)
23
24    # read in intervals
25    regions = {}
26    for line in open( sys.argv[1] ):
27        if line.startswith('#'): continue
28        fields = line.strip().split()
29        chrom, start, end = fields[0], int( fields[1] ), int( fields[2] )
30        try:
31            name = fields[3]
32        except:
33            name = None
34        if chrom not in regions: regions[chrom] = intervals.Intersecter()
35        regions[chrom].add( start, end, name )
36
37    motif_strings = sys.argv[4:]
38    if not isinstance(motif_strings, list): motif_strings = [motif_strings]
39    inmaf = open(sys.argv[2])
40    threshold = 0.5
41
42    species = []
43
44    for sp in sys.argv[3].split(','):
45        species.append( sp )
46
47    for maf in align_maf.Reader(inmaf):
48        mafchrom = maf.components[0].src.split('.')[1]
49        mafstart = maf.components[0].start
50        mafend = maf.components[0].end
51        reftext = maf.components[0].text
52        r = regions[mafchrom].find( mafstart, mafend )
53        if mafchrom not in regions or len( r ) == 0: continue
54
55        # maf block scores for each matrix
56        for scoremax,width,headers in MafMotifScorer(species, maf, motif_strings):
57            #print >>sys.stderr,headers
58            blocklength = width
59            mafsrc,mafstart,mafend = headers[0]
60            mafchrom = mafsrc.split('.')[1]
61
62            # lists of scores for each position in scoremax
63            for mx_name,mx in scoremax.items():
64                #print >>sys.stderr, mx_name, len(pwm[mx_name])
65
66                for offset in range(blocklength):
67   
68                    # scan all species with threshold
69                    for i in range(len(species)):
70                        if mx[i][offset] > threshold:
71                            refstart = mafstart + offset - reftext.count('-',0,offset)
72                            refend = refstart + len(mx_name)
73
74                            data = " ".join([ "%.2f" % mx[x][offset] for x in range(len(species))])
75                            # quote the motif
76                            r = regions[mafchrom].find( refstart, refend )
77                            if mafchrom in regions and len( r ) > 0:
78                                region_label = r[0].value
79                            else:
80                                #region_label = 0
81                                continue
82                            v_name = mx_name.replace(' ','_')
83                            print mafchrom,refstart,refend,region_label,v_name,data
84                            break
85
86if __name__ == '__main__': main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。