1 | """ |
---|
2 | Determine amount of each interval in one set covered by the intervals of |
---|
3 | another set. Adds two columns to the first input, giving number of bases |
---|
4 | covered and percent coverage on the second input. |
---|
5 | """ |
---|
6 | |
---|
7 | import traceback |
---|
8 | import fileinput |
---|
9 | from warnings import warn |
---|
10 | |
---|
11 | from bx.intervals.io import * |
---|
12 | from bx.intervals.operations import * |
---|
13 | |
---|
14 | def coverage(readers, comments=True): |
---|
15 | # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets. |
---|
16 | primary = readers[0] |
---|
17 | intersect = readers[1:] |
---|
18 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
---|
19 | # the bitsets are being created by skipping the problem lines |
---|
20 | intersect[0] = BitsetSafeReaderWrapper( intersect[0], lens={} ) |
---|
21 | bitsets = intersect[0].binned_bitsets() |
---|
22 | intersect = intersect[1:] |
---|
23 | for andset in intersect: |
---|
24 | bitset2 = andset.binned_bitsets() |
---|
25 | for chrom in bitsets: |
---|
26 | if chrom not in bitset2: continue |
---|
27 | bitsets[chrom].ior(bitset2[chrom]) |
---|
28 | intersect = intersect[1:] |
---|
29 | |
---|
30 | # Read remaining intervals and give coverage |
---|
31 | for interval in primary: |
---|
32 | if type( interval ) is Header: |
---|
33 | yield interval |
---|
34 | if type( interval ) is Comment and comments: |
---|
35 | yield interval |
---|
36 | elif type( interval ) == GenomicInterval: |
---|
37 | chrom = interval.chrom |
---|
38 | start = int(interval.start) |
---|
39 | end = int(interval.end) |
---|
40 | if start > end: |
---|
41 | try: |
---|
42 | # This will only work if primary is a NiceReaderWrapper |
---|
43 | primary.skipped += 1 |
---|
44 | # no reason to stuff an entire bad file into memmory |
---|
45 | if primary.skipped < 10: |
---|
46 | primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) ) |
---|
47 | except: |
---|
48 | pass |
---|
49 | continue |
---|
50 | if chrom not in bitsets: |
---|
51 | bases_covered = 0 |
---|
52 | percent = 0.0 |
---|
53 | else: |
---|
54 | try: |
---|
55 | bases_covered = bitsets[ chrom ].count_range( start, end-start ) |
---|
56 | except IndexError, e: |
---|
57 | try: |
---|
58 | # This will only work if primary is a NiceReaderWrapper |
---|
59 | primary.skipped += 1 |
---|
60 | # no reason to stuff an entire bad file into memmory |
---|
61 | if primary.skipped < 10: |
---|
62 | primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) ) |
---|
63 | except: |
---|
64 | pass |
---|
65 | continue |
---|
66 | if (end - start) == 0: |
---|
67 | percent = 0 |
---|
68 | else: |
---|
69 | percent = float(bases_covered) / float(end - start) |
---|
70 | interval.fields.append(str(bases_covered)) |
---|
71 | interval.fields.append(str(percent)) |
---|
72 | yield interval |
---|