| 1 | """ |
|---|
| 2 | Merge overlapping regions in two sets of genomic intervals. |
|---|
| 3 | """ |
|---|
| 4 | |
|---|
| 5 | import psyco_full |
|---|
| 6 | |
|---|
| 7 | import traceback |
|---|
| 8 | import fileinput |
|---|
| 9 | from warnings import warn |
|---|
| 10 | |
|---|
| 11 | from bx.intervals.io import * |
|---|
| 12 | from bx.intervals.operations import * |
|---|
| 13 | |
|---|
| 14 | # sorting could make this a less memory intensive operation(?) |
|---|
| 15 | def merge( interval, mincols=1 ): |
|---|
| 16 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
|---|
| 17 | # the bitsets are being created by skipping the problem lines |
|---|
| 18 | interval = BitsetSafeReaderWrapper( interval, lens={} ) |
|---|
| 19 | bitsets = interval.binned_bitsets() |
|---|
| 20 | if interval.header: |
|---|
| 21 | yield interval.header |
|---|
| 22 | for chrom in bitsets: |
|---|
| 23 | bitset = bitsets[chrom] |
|---|
| 24 | output = ["."] * (max(interval.chrom_col, interval.start_col, interval.end_col) + 1) |
|---|
| 25 | output[interval.chrom_col] = chrom |
|---|
| 26 | try: |
|---|
| 27 | for start, end in bits_set_in_range(bitset,0, MAX_END): |
|---|
| 28 | output[interval.start_col] = str(start) |
|---|
| 29 | output[interval.end_col] = str(end) |
|---|
| 30 | yield output |
|---|
| 31 | except IndexError, e: |
|---|
| 32 | try: |
|---|
| 33 | # This will work only if interval is a NiceReaderWrapper |
|---|
| 34 | interval.skipped += 1 |
|---|
| 35 | # no reason to stuff an entire bad file into memmory |
|---|
| 36 | if interval.skipped < 10: |
|---|
| 37 | interval.skipped_lines.append( ( interval.linenum, interval.current_line, str( e ) ) ) |
|---|
| 38 | except: |
|---|
| 39 | pass |
|---|
| 40 | continue |
|---|