| 1 | #!/usr/bin/python2.6 |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | For each interval in `bed1` print the fraction of bases covered by `bed2`. |
|---|
| 5 | |
|---|
| 6 | usage: %prog bed1 bed2 [mask] |
|---|
| 7 | """ |
|---|
| 8 | |
|---|
| 9 | from __future__ import division |
|---|
| 10 | |
|---|
| 11 | import psyco_full |
|---|
| 12 | import sys |
|---|
| 13 | from bx.bitset import BinnedBitSet |
|---|
| 14 | from bx.bitset_builders import * |
|---|
| 15 | from itertools import * |
|---|
| 16 | |
|---|
| 17 | bed1_fname, bed2_fname = sys.argv[1:3] |
|---|
| 18 | |
|---|
| 19 | bitsets = binned_bitsets_from_file( open( bed2_fname ) ) |
|---|
| 20 | |
|---|
| 21 | def clone( bits ): |
|---|
| 22 | b = BinnedBitSet( bits.size ) |
|---|
| 23 | b.ior( bits ) |
|---|
| 24 | return b |
|---|
| 25 | |
|---|
| 26 | if len( sys.argv ) > 3: |
|---|
| 27 | mask_fname = sys.argv[3] |
|---|
| 28 | mask = binned_bitsets_from_file( open( mask_fname ) ) |
|---|
| 29 | new_bitsets = dict() |
|---|
| 30 | for key in bitsets: |
|---|
| 31 | if key in mask: |
|---|
| 32 | b = clone( mask[key] ) |
|---|
| 33 | b.invert() |
|---|
| 34 | b.iand( bitsets[key] ) |
|---|
| 35 | new_bitsets[key] = b |
|---|
| 36 | bitsets = new_bitsets |
|---|
| 37 | else: |
|---|
| 38 | mask = None |
|---|
| 39 | |
|---|
| 40 | for line in open( bed1_fname ): |
|---|
| 41 | fields = line.split() |
|---|
| 42 | chr, start, end = fields[0], int( fields[1] ), int( fields[2] ) |
|---|
| 43 | bases_covered = 0 |
|---|
| 44 | if chr in bitsets: |
|---|
| 45 | bases_covered = bitsets[ chr ].count_range( start, end-start ) |
|---|
| 46 | length = end - start |
|---|
| 47 | if mask and chr in mask: |
|---|
| 48 | bases_masked = mask[ chr ].count_range( start, end-start ) |
|---|
| 49 | length -= bases_masked |
|---|
| 50 | assert bases_covered <= length, "%r, %r, %r" % ( bases_covered, bases_masked, length ) |
|---|
| 51 | if length == 0: |
|---|
| 52 | print 0.0 |
|---|
| 53 | else: |
|---|
| 54 | print bases_covered / length |
|---|
| 55 | |
|---|