| 1 | """ |
|---|
| 2 | Complement a set of intervals. |
|---|
| 3 | """ |
|---|
| 4 | |
|---|
| 5 | import psyco_full |
|---|
| 6 | |
|---|
| 7 | import traceback |
|---|
| 8 | import fileinput |
|---|
| 9 | from warnings import warn |
|---|
| 10 | |
|---|
| 11 | from bx.intervals.io import * |
|---|
| 12 | from bx.intervals.operations import * |
|---|
| 13 | from bx.bitset import MAX |
|---|
| 14 | |
|---|
| 15 | def complement( reader, lens ): |
|---|
| 16 | # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when |
|---|
| 17 | # the bitsets are being created by skipping the problem lines |
|---|
| 18 | complement_reader = BitsetSafeReaderWrapper( reader, lens=lens ) |
|---|
| 19 | bitsets = complement_reader.binned_bitsets( upstream_pad=0, downstream_pad=0, lens=lens ) |
|---|
| 20 | # NOT them all |
|---|
| 21 | for key, value in bitsets.items(): |
|---|
| 22 | value.invert() |
|---|
| 23 | # Read remaining intervals and subtract |
|---|
| 24 | for chrom in bitsets: |
|---|
| 25 | bitset = bitsets[chrom] |
|---|
| 26 | out_intervals = bits_set_in_range( bitset, 0, lens.get( chrom, MAX ) ) |
|---|
| 27 | try: |
|---|
| 28 | # Write the intervals |
|---|
| 29 | for start, end in out_intervals: |
|---|
| 30 | fields = ["." for x in range(max(complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col)+1)] |
|---|
| 31 | # default the column to a + if it exists |
|---|
| 32 | if complement_reader.strand_col < len( fields ) and complement_reader.strand_col >= 0: |
|---|
| 33 | fields[complement_reader.strand_col] = "+" |
|---|
| 34 | fields[complement_reader.chrom_col] = chrom |
|---|
| 35 | fields[complement_reader.start_col] = start |
|---|
| 36 | fields[complement_reader.end_col] = end |
|---|
| 37 | new_interval = GenomicInterval(complement_reader, fields, complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col, complement_reader.strand_col, "+") |
|---|
| 38 | yield new_interval |
|---|
| 39 | except IndexError, e: |
|---|
| 40 | complement_reader.skipped += 1 |
|---|
| 41 | # no reason to stuff an entire bad file into memmory |
|---|
| 42 | if complement_reader.skipped < 10: |
|---|
| 43 | complement_reader.skipped_lines.append( ( complement_reader.linenum, complement_reader.current_line, str( e ) ) ) |
|---|
| 44 | continue |
|---|
| 45 | |
|---|
| 46 | |
|---|
| 47 | # def main(): |
|---|
| 48 | # # test it all out |
|---|
| 49 | # f1 = fileinput.FileInput("dataset_7.dat") |
|---|
| 50 | # g1 = GenomicIntervalReader(f1) |
|---|
| 51 | # for interval in complement(g1,{"chr":16000000}): |
|---|
| 52 | # print "\t".join(interval) |
|---|
| 53 | # |
|---|
| 54 | # if __name__ == "__main__": |
|---|
| 55 | # main() |
|---|