root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/div_snp_table_chr.py

リビジョン 3, 4.6 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4FIXME!
5
6usage: %prog feature.bed ar.bed snp.bed div_directory [options]
7    -m, --mask=M: Mask AR and features with this file
8    -s, --suffix=S: append suffix to chromosomes to get filenames from div_directory
9    -l, --lens=l: Set chromosome ends using LEN file
10"""
11
12import sys
13import bx.bitset
14from bx.bitset_builders import *
15from bx.cookbook import doc_optparse
16
17def main():
18
19    options, args = doc_optparse.parse( __doc__ )
20    try:
21        lens = {}
22        if options.lens:
23            for line in open( options.lens ):
24                chrom, length = line.split()
25                lens[chrom] = int( length )
26
27        if options.suffix: suffix = options.suffix
28        else: suffix = ""
29
30        print >>sys.stderr, "\nReading feature",
31        interval_file = open(args[0])
32        feature = binned_bitsets_from_file(interval_file, lens=lens)
33        interval_file.close()
34        # reuse interval file
35        intervals = {}
36        interval_file = open(args[0])
37        for line in interval_file:
38            fields = line.split()
39            chrom, start, end = fields[0], int(fields[1]), int(fields[2])
40            if chrom not in intervals: intervals[chrom] = []
41            intervals[chrom].append( [start,end] )
42        interval_file.close()
43
44        print >>sys.stderr, "\nReading ar",
45        ar = binned_bitsets_from_file(open( args[1] ), lens=lens)
46
47        print >>sys.stderr, "\nReading snps",
48        snp = binned_bitsets_from_file(open( args[2] ), lens=lens)
49        snp_mask = clone_inverted( snp )
50        snp_copy = clone( snp )
51
52        print >>sys.stderr, "\nMasking AR",
53        ar_mask = clone_inverted( ar )
54        print >>sys.stderr
55
56        dirname = args[3]
57
58
59        if options.mask: mask = binned_bitsets_from_file(open(options.mask), lens=lens)
60        else: mask = None
61    except:
62        doc_optparse.exit()
63   
64    if mask:
65        for chrom in mask.keys():
66            if chrom in feature: feature[chrom].iand( mask[chrom] )
67            if chrom in ar: ar[chrom].iand( mask[chrom] )
68
69
70    # divergence and snp counts for all features
71    feature_div_count = 0
72    feature_snp_count = 0
73    ar_div_count = 0
74    ar_snp_count = 0
75
76    # collect snp and div
77    for chr in feature.keys():
78
79        if chr not in snp: continue
80        if chr not in ar: continue
81
82        print >>sys.stderr, "reading %s ..." % chr,
83        try:
84            div = binned_bitsets_from_file( open( dirname + "/%s.bed" % (chr+suffix) ), lens=lens)
85        except:
86            print >>sys.stderr,"%s.bed not found" % chr
87            continue
88
89        div[chr].iand( snp_mask[chr] ) # div/snp sites count snp-only
90        div_copy = clone( div )
91
92        print >>sys.stderr, "AR:", chr,
93        snp[chr].iand( ar[chr] )
94        div[chr].iand( ar[chr] )
95        snp_count = snp[chr].count_range(0,snp[chr].size)
96        ar_snp_count += snp_count
97        print >>sys.stderr, snp_count,
98        try:
99            div_count = div[chr].count_range(0,div[chr].size)
100            ar_div_count += div_count
101            print >>sys.stderr, div_count
102        except:
103            print >>sys.stderr, chr, "failed"
104   
105        div = div_copy
106        snp[chr] = snp_copy[chr]
107        print >>sys.stderr, "feature:", chr,
108        feature[chr].iand( ar_mask[chr] ) # clip to non-AR only
109        snp[chr].iand( feature[chr] )
110        div[chr].iand( feature[chr] )
111        feature_snp_count += snp[chr].count_range(0,snp[chr].size)
112        print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size)
113        feature_div_count += div[chr].count_range(0,div[chr].size)
114        print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size)
115
116        # Note: can loop over feature intervals here for individual counts
117        if chr in intervals:
118            for start,end in intervals[chr]:
119                ind_div_count = div[chr].count_range(start,end-start)
120                ind_snp_count = snp[chr].count_range(start,end-start)
121                print chr, start, end, ind_div_count, ind_snp_count
122   
123    print "feature snp\t%d" %feature_snp_count
124    print "feature div\t%d" %feature_div_count
125    print "ar snp\t%d" %ar_snp_count
126    print "ar div\t%d" %ar_div_count
127
128# copies a dictionary of bitsets
129def copybits( binnedbits ):
130    bitset = BinnedBitSet( binnedbits.size )
131    bitset.ior( binnedbits )
132    return bitset
133
134def clone( bitsets ):
135    r = {}
136    for k,b in bitsets.items(): r[ k ] = copybits( b )
137    return r
138
139def clone_inverted( bitsets ):
140    r = {}
141    for k,b in bitsets.items():
142        r[ k ] = copybits( b )
143        r[ k ].invert()
144    return r
145
146main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。