1 | """ |
---|
2 | Complement a set of intervals. |
---|
3 | """ |
---|
4 | |
---|
5 | import psyco_full |
---|
6 | |
---|
7 | import traceback |
---|
8 | import fileinput |
---|
9 | from warnings import warn |
---|
10 | |
---|
11 | from bx.intervals.io import * |
---|
12 | from bx.intervals.operations import * |
---|
13 | from bx.bitset import MAX |
---|
14 | |
---|
15 | def complement( reader, lens ): |
---|
16 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
---|
17 | # the bitsets are being created by skipping the problem lines |
---|
18 | complement_reader = BitsetSafeReaderWrapper( reader, lens=lens ) |
---|
19 | bitsets = complement_reader.binned_bitsets( upstream_pad=0, downstream_pad=0, lens=lens ) |
---|
20 | # NOT them all |
---|
21 | for key, value in bitsets.items(): |
---|
22 | value.invert() |
---|
23 | # Read remaining intervals and subtract |
---|
24 | for chrom in bitsets: |
---|
25 | bitset = bitsets[chrom] |
---|
26 | out_intervals = bits_set_in_range( bitset, 0, lens.get( chrom, MAX ) ) |
---|
27 | try: |
---|
28 | # Write the intervals |
---|
29 | for start, end in out_intervals: |
---|
30 | fields = ["." for x in range(max(complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col)+1)] |
---|
31 | # default the column to a + if it exists |
---|
32 | if complement_reader.strand_col < len( fields ) and complement_reader.strand_col >= 0: |
---|
33 | fields[complement_reader.strand_col] = "+" |
---|
34 | fields[complement_reader.chrom_col] = chrom |
---|
35 | fields[complement_reader.start_col] = start |
---|
36 | fields[complement_reader.end_col] = end |
---|
37 | new_interval = GenomicInterval(complement_reader, fields, complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col, complement_reader.strand_col, "+") |
---|
38 | yield new_interval |
---|
39 | except IndexError, e: |
---|
40 | complement_reader.skipped += 1 |
---|
41 | # no reason to stuff an entire bad file into memmory |
---|
42 | if complement_reader.skipped < 10: |
---|
43 | complement_reader.skipped_lines.append( ( complement_reader.linenum, complement_reader.current_line, str( e ) ) ) |
---|
44 | continue |
---|
45 | |
---|
46 | |
---|
47 | # def main(): |
---|
48 | # # test it all out |
---|
49 | # f1 = fileinput.FileInput("dataset_7.dat") |
---|
50 | # g1 = GenomicIntervalReader(f1) |
---|
51 | # for interval in complement(g1,{"chr":16000000}): |
---|
52 | # print "\t".join(interval) |
---|
53 | # |
---|
54 | # if __name__ == "__main__": |
---|
55 | # main() |
---|