1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | For each interval in `bed1` print the fraction of bases covered by `bed2`. |
---|
5 | |
---|
6 | usage: %prog bed1 bed2 [mask] |
---|
7 | """ |
---|
8 | |
---|
9 | from __future__ import division |
---|
10 | |
---|
11 | import psyco_full |
---|
12 | import sys |
---|
13 | from bx.bitset import BinnedBitSet |
---|
14 | from bx.bitset_builders import * |
---|
15 | from itertools import * |
---|
16 | |
---|
17 | bed1_fname, bed2_fname = sys.argv[1:3] |
---|
18 | |
---|
19 | bitsets = binned_bitsets_from_file( open( bed2_fname ) ) |
---|
20 | |
---|
21 | def clone( bits ): |
---|
22 | b = BinnedBitSet( bits.size ) |
---|
23 | b.ior( bits ) |
---|
24 | return b |
---|
25 | |
---|
26 | if len( sys.argv ) > 3: |
---|
27 | mask_fname = sys.argv[3] |
---|
28 | mask = binned_bitsets_from_file( open( mask_fname ) ) |
---|
29 | new_bitsets = dict() |
---|
30 | for key in bitsets: |
---|
31 | if key in mask: |
---|
32 | b = clone( mask[key] ) |
---|
33 | b.invert() |
---|
34 | b.iand( bitsets[key] ) |
---|
35 | new_bitsets[key] = b |
---|
36 | bitsets = new_bitsets |
---|
37 | else: |
---|
38 | mask = None |
---|
39 | |
---|
40 | for line in open( bed1_fname ): |
---|
41 | fields = line.split() |
---|
42 | chr, start, end = fields[0], int( fields[1] ), int( fields[2] ) |
---|
43 | bases_covered = 0 |
---|
44 | if chr in bitsets: |
---|
45 | bases_covered = bitsets[ chr ].count_range( start, end-start ) |
---|
46 | length = end - start |
---|
47 | if mask and chr in mask: |
---|
48 | bases_masked = mask[ chr ].count_range( start, end-start ) |
---|
49 | length -= bases_masked |
---|
50 | assert bases_covered <= length, "%r, %r, %r" % ( bases_covered, bases_masked, length ) |
---|
51 | if length == 0: |
---|
52 | print 0.0 |
---|
53 | else: |
---|
54 | print bases_covered / length |
---|
55 | |
---|