1 | """ |
---|
2 | Merge overlapping regions in two sets of genomic intervals. |
---|
3 | """ |
---|
4 | |
---|
5 | import psyco_full |
---|
6 | |
---|
7 | import traceback |
---|
8 | import fileinput |
---|
9 | from warnings import warn |
---|
10 | |
---|
11 | from bx.intervals.io import * |
---|
12 | from bx.intervals.operations import * |
---|
13 | |
---|
14 | # sorting could make this a less memory intensive operation(?) |
---|
15 | def merge( interval, mincols=1 ): |
---|
16 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
---|
17 | # the bitsets are being created by skipping the problem lines |
---|
18 | interval = BitsetSafeReaderWrapper( interval, lens={} ) |
---|
19 | bitsets = interval.binned_bitsets() |
---|
20 | if interval.header: |
---|
21 | yield interval.header |
---|
22 | for chrom in bitsets: |
---|
23 | bitset = bitsets[chrom] |
---|
24 | output = ["."] * (max(interval.chrom_col, interval.start_col, interval.end_col) + 1) |
---|
25 | output[interval.chrom_col] = chrom |
---|
26 | try: |
---|
27 | for start, end in bits_set_in_range(bitset,0, MAX_END): |
---|
28 | output[interval.start_col] = str(start) |
---|
29 | output[interval.end_col] = str(end) |
---|
30 | yield output |
---|
31 | except IndexError, e: |
---|
32 | try: |
---|
33 | # This will work only if interval is a NiceReaderWrapper |
---|
34 | interval.skipped += 1 |
---|
35 | # no reason to stuff an entire bad file into memmory |
---|
36 | if interval.skipped < 10: |
---|
37 | interval.skipped_lines.append( ( interval.linenum, interval.current_line, str( e ) ) ) |
---|
38 | except: |
---|
39 | pass |
---|
40 | continue |
---|