| 1 | """ |
|---|
| 2 | Determine amount of each interval in one set covered by the intervals of |
|---|
| 3 | another set. Adds two columns to the first input, giving number of bases |
|---|
| 4 | covered and percent coverage on the second input. |
|---|
| 5 | """ |
|---|
| 6 | |
|---|
| 7 | import traceback |
|---|
| 8 | import fileinput |
|---|
| 9 | from warnings import warn |
|---|
| 10 | |
|---|
| 11 | from bx.intervals.io import * |
|---|
| 12 | from bx.intervals.operations import * |
|---|
| 13 | |
|---|
| 14 | def coverage(readers, comments=True): |
|---|
| 15 | # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets. |
|---|
| 16 | primary = readers[0] |
|---|
| 17 | intersect = readers[1:] |
|---|
| 18 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
|---|
| 19 | # the bitsets are being created by skipping the problem lines |
|---|
| 20 | intersect[0] = BitsetSafeReaderWrapper( intersect[0], lens={} ) |
|---|
| 21 | bitsets = intersect[0].binned_bitsets() |
|---|
| 22 | intersect = intersect[1:] |
|---|
| 23 | for andset in intersect: |
|---|
| 24 | bitset2 = andset.binned_bitsets() |
|---|
| 25 | for chrom in bitsets: |
|---|
| 26 | if chrom not in bitset2: continue |
|---|
| 27 | bitsets[chrom].ior(bitset2[chrom]) |
|---|
| 28 | intersect = intersect[1:] |
|---|
| 29 | |
|---|
| 30 | # Read remaining intervals and give coverage |
|---|
| 31 | for interval in primary: |
|---|
| 32 | if type( interval ) is Header: |
|---|
| 33 | yield interval |
|---|
| 34 | if type( interval ) is Comment and comments: |
|---|
| 35 | yield interval |
|---|
| 36 | elif type( interval ) == GenomicInterval: |
|---|
| 37 | chrom = interval.chrom |
|---|
| 38 | start = int(interval.start) |
|---|
| 39 | end = int(interval.end) |
|---|
| 40 | if start > end: |
|---|
| 41 | try: |
|---|
| 42 | # This will only work if primary is a NiceReaderWrapper |
|---|
| 43 | primary.skipped += 1 |
|---|
| 44 | # no reason to stuff an entire bad file into memmory |
|---|
| 45 | if primary.skipped < 10: |
|---|
| 46 | primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) ) |
|---|
| 47 | except: |
|---|
| 48 | pass |
|---|
| 49 | continue |
|---|
| 50 | if chrom not in bitsets: |
|---|
| 51 | bases_covered = 0 |
|---|
| 52 | percent = 0.0 |
|---|
| 53 | else: |
|---|
| 54 | try: |
|---|
| 55 | bases_covered = bitsets[ chrom ].count_range( start, end-start ) |
|---|
| 56 | except IndexError, e: |
|---|
| 57 | try: |
|---|
| 58 | # This will only work if primary is a NiceReaderWrapper |
|---|
| 59 | primary.skipped += 1 |
|---|
| 60 | # no reason to stuff an entire bad file into memmory |
|---|
| 61 | if primary.skipped < 10: |
|---|
| 62 | primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) ) |
|---|
| 63 | except: |
|---|
| 64 | pass |
|---|
| 65 | continue |
|---|
| 66 | if (end - start) == 0: |
|---|
| 67 | percent = 0 |
|---|
| 68 | else: |
|---|
| 69 | percent = float(bases_covered) / float(end - start) |
|---|
| 70 | interval.fields.append(str(bases_covered)) |
|---|
| 71 | interval.fields.append(str(percent)) |
|---|
| 72 | yield interval |
|---|