| 1 | #!/usr/bin/python2.6 |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Complement the regions of a bed file. Requires a file that maps source names |
|---|
| 5 | to sizes. This should be in the simple LEN file format (each line contains |
|---|
| 6 | a source name followed by a size, separated by whitespace). |
|---|
| 7 | |
|---|
| 8 | usage: %prog bed_file chrom_length_file |
|---|
| 9 | """ |
|---|
| 10 | |
|---|
| 11 | import sys |
|---|
| 12 | |
|---|
| 13 | from bx.bitset import * |
|---|
| 14 | from bx.bitset_builders import * |
|---|
| 15 | |
|---|
| 16 | from bx.cookbook import doc_optparse |
|---|
| 17 | |
|---|
| 18 | def read_len( f ): |
|---|
| 19 | """Read a 'LEN' file and return a mapping from chromosome to length""" |
|---|
| 20 | mapping = dict() |
|---|
| 21 | for line in f: |
|---|
| 22 | fields = line.split() |
|---|
| 23 | mapping[ fields[0] ] = int( fields[1] ) |
|---|
| 24 | return mapping |
|---|
| 25 | |
|---|
| 26 | options, args = doc_optparse.parse( __doc__ ) |
|---|
| 27 | try: |
|---|
| 28 | in_fname, len_fname = args |
|---|
| 29 | except: |
|---|
| 30 | doc_optparse.exit() |
|---|
| 31 | |
|---|
| 32 | bitsets = binned_bitsets_from_file( open( in_fname ) ) |
|---|
| 33 | |
|---|
| 34 | lens = read_len( open( len_fname ) ) |
|---|
| 35 | |
|---|
| 36 | for chrom in lens: |
|---|
| 37 | if chrom in bitsets: |
|---|
| 38 | bits = bitsets[chrom] |
|---|
| 39 | bits.invert() |
|---|
| 40 | len = lens[chrom] |
|---|
| 41 | end = 0 |
|---|
| 42 | while 1: |
|---|
| 43 | start = bits.next_set( end ) |
|---|
| 44 | if start == bits.size: break |
|---|
| 45 | end = bits.next_clear( start ) |
|---|
| 46 | if end > len: end = len |
|---|
| 47 | print "%s\t%d\t%d" % ( chrom, start, end ) |
|---|
| 48 | if end == len: break |
|---|
| 49 | else: |
|---|
| 50 | print "%s\t%d\t%d" % ( chrom, 0, lens[chrom] ) |
|---|