1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | FIXME! |
---|
5 | |
---|
6 | usage: %prog feature.bed ar.bed snp.bed div_directory [options] |
---|
7 | -m, --mask=M: Mask AR and features with this file |
---|
8 | -s, --suffix=S: append suffix to chromosomes to get filenames from div_directory |
---|
9 | -l, --lens=l: Set chromosome ends using LEN file |
---|
10 | """ |
---|
11 | |
---|
12 | import sys |
---|
13 | import bx.bitset |
---|
14 | from bx.bitset_builders import * |
---|
15 | from bx.cookbook import doc_optparse |
---|
16 | |
---|
17 | def main(): |
---|
18 | |
---|
19 | options, args = doc_optparse.parse( __doc__ ) |
---|
20 | try: |
---|
21 | lens = {} |
---|
22 | if options.lens: |
---|
23 | for line in open( options.lens ): |
---|
24 | chrom, length = line.split() |
---|
25 | lens[chrom] = int( length ) |
---|
26 | |
---|
27 | if options.suffix: suffix = options.suffix |
---|
28 | else: suffix = "" |
---|
29 | |
---|
30 | print >>sys.stderr, "\nReading feature", |
---|
31 | interval_file = open(args[0]) |
---|
32 | feature = binned_bitsets_from_file(interval_file, lens=lens) |
---|
33 | interval_file.close() |
---|
34 | # reuse interval file |
---|
35 | intervals = {} |
---|
36 | interval_file = open(args[0]) |
---|
37 | for line in interval_file: |
---|
38 | fields = line.split() |
---|
39 | chrom, start, end = fields[0], int(fields[1]), int(fields[2]) |
---|
40 | if chrom not in intervals: intervals[chrom] = [] |
---|
41 | intervals[chrom].append( [start,end] ) |
---|
42 | interval_file.close() |
---|
43 | |
---|
44 | print >>sys.stderr, "\nReading ar", |
---|
45 | ar = binned_bitsets_from_file(open( args[1] ), lens=lens) |
---|
46 | |
---|
47 | print >>sys.stderr, "\nReading snps", |
---|
48 | snp = binned_bitsets_from_file(open( args[2] ), lens=lens) |
---|
49 | snp_mask = clone_inverted( snp ) |
---|
50 | snp_copy = clone( snp ) |
---|
51 | |
---|
52 | print >>sys.stderr, "\nMasking AR", |
---|
53 | ar_mask = clone_inverted( ar ) |
---|
54 | print >>sys.stderr |
---|
55 | |
---|
56 | dirname = args[3] |
---|
57 | |
---|
58 | |
---|
59 | if options.mask: mask = binned_bitsets_from_file(open(options.mask), lens=lens) |
---|
60 | else: mask = None |
---|
61 | except: |
---|
62 | doc_optparse.exit() |
---|
63 | |
---|
64 | if mask: |
---|
65 | for chrom in mask.keys(): |
---|
66 | if chrom in feature: feature[chrom].iand( mask[chrom] ) |
---|
67 | if chrom in ar: ar[chrom].iand( mask[chrom] ) |
---|
68 | |
---|
69 | |
---|
70 | # divergence and snp counts for all features |
---|
71 | feature_div_count = 0 |
---|
72 | feature_snp_count = 0 |
---|
73 | ar_div_count = 0 |
---|
74 | ar_snp_count = 0 |
---|
75 | |
---|
76 | # collect snp and div |
---|
77 | for chr in feature.keys(): |
---|
78 | |
---|
79 | if chr not in snp: continue |
---|
80 | if chr not in ar: continue |
---|
81 | |
---|
82 | print >>sys.stderr, "reading %s ..." % chr, |
---|
83 | try: |
---|
84 | div = binned_bitsets_from_file( open( dirname + "/%s.bed" % (chr+suffix) ), lens=lens) |
---|
85 | except: |
---|
86 | print >>sys.stderr,"%s.bed not found" % chr |
---|
87 | continue |
---|
88 | |
---|
89 | div[chr].iand( snp_mask[chr] ) # div/snp sites count snp-only |
---|
90 | div_copy = clone( div ) |
---|
91 | |
---|
92 | print >>sys.stderr, "AR:", chr, |
---|
93 | snp[chr].iand( ar[chr] ) |
---|
94 | div[chr].iand( ar[chr] ) |
---|
95 | snp_count = snp[chr].count_range(0,snp[chr].size) |
---|
96 | ar_snp_count += snp_count |
---|
97 | print >>sys.stderr, snp_count, |
---|
98 | try: |
---|
99 | div_count = div[chr].count_range(0,div[chr].size) |
---|
100 | ar_div_count += div_count |
---|
101 | print >>sys.stderr, div_count |
---|
102 | except: |
---|
103 | print >>sys.stderr, chr, "failed" |
---|
104 | |
---|
105 | div = div_copy |
---|
106 | snp[chr] = snp_copy[chr] |
---|
107 | print >>sys.stderr, "feature:", chr, |
---|
108 | feature[chr].iand( ar_mask[chr] ) # clip to non-AR only |
---|
109 | snp[chr].iand( feature[chr] ) |
---|
110 | div[chr].iand( feature[chr] ) |
---|
111 | feature_snp_count += snp[chr].count_range(0,snp[chr].size) |
---|
112 | print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size) |
---|
113 | feature_div_count += div[chr].count_range(0,div[chr].size) |
---|
114 | print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size) |
---|
115 | |
---|
116 | # Note: can loop over feature intervals here for individual counts |
---|
117 | if chr in intervals: |
---|
118 | for start,end in intervals[chr]: |
---|
119 | ind_div_count = div[chr].count_range(start,end-start) |
---|
120 | ind_snp_count = snp[chr].count_range(start,end-start) |
---|
121 | print chr, start, end, ind_div_count, ind_snp_count |
---|
122 | |
---|
123 | print "feature snp\t%d" %feature_snp_count |
---|
124 | print "feature div\t%d" %feature_div_count |
---|
125 | print "ar snp\t%d" %ar_snp_count |
---|
126 | print "ar div\t%d" %ar_div_count |
---|
127 | |
---|
128 | # copies a dictionary of bitsets |
---|
129 | def copybits( binnedbits ): |
---|
130 | bitset = BinnedBitSet( binnedbits.size ) |
---|
131 | bitset.ior( binnedbits ) |
---|
132 | return bitset |
---|
133 | |
---|
134 | def clone( bitsets ): |
---|
135 | r = {} |
---|
136 | for k,b in bitsets.items(): r[ k ] = copybits( b ) |
---|
137 | return r |
---|
138 | |
---|
139 | def clone_inverted( bitsets ): |
---|
140 | r = {} |
---|
141 | for k,b in bitsets.items(): |
---|
142 | r[ k ] = copybits( b ) |
---|
143 | r[ k ].invert() |
---|
144 | return r |
---|
145 | |
---|
146 | main() |
---|