1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Merge any overlapping regions of bed files. Bed files can be provided on the |
---|
5 | command line or on stdin. Merged regions are always reported on the '+' |
---|
6 | strand, and any fields beyond chrom/start/stop are lost. |
---|
7 | |
---|
8 | usage: %prog bed files ... |
---|
9 | """ |
---|
10 | |
---|
11 | import psyco_full |
---|
12 | import sys |
---|
13 | |
---|
14 | from bx.bitset import * |
---|
15 | from bx.bitset_builders import * |
---|
16 | from itertools import * |
---|
17 | |
---|
18 | bed_filenames = sys.argv[1:] |
---|
19 | if bed_filenames: |
---|
20 | input = chain( * imap( open, bed_filenames ) ) |
---|
21 | else: |
---|
22 | input = sys.stdin |
---|
23 | |
---|
24 | bitsets = binned_bitsets_from_bed_file( input ) |
---|
25 | |
---|
26 | for chrom in bitsets: |
---|
27 | bits = bitsets[chrom] |
---|
28 | end = 0 |
---|
29 | while 1: |
---|
30 | start = bits.next_set( end ) |
---|
31 | if start == bits.size: break |
---|
32 | end = bits.next_clear( start ) |
---|
33 | print "%s\t%d\t%d" % ( chrom, start, end ) |
---|