| 1 | #!/usr/bin/python2.6 |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Merge any overlapping regions of bed files. Bed files can be provided on the |
|---|
| 5 | command line or on stdin. Merged regions are always reported on the '+' |
|---|
| 6 | strand, and any fields beyond chrom/start/stop are lost. |
|---|
| 7 | |
|---|
| 8 | usage: %prog bed files ... |
|---|
| 9 | """ |
|---|
| 10 | |
|---|
| 11 | import psyco_full |
|---|
| 12 | import sys |
|---|
| 13 | |
|---|
| 14 | from bx.bitset import * |
|---|
| 15 | from bx.bitset_builders import * |
|---|
| 16 | from itertools import * |
|---|
| 17 | |
|---|
| 18 | bed_filenames = sys.argv[1:] |
|---|
| 19 | if bed_filenames: |
|---|
| 20 | input = chain( * imap( open, bed_filenames ) ) |
|---|
| 21 | else: |
|---|
| 22 | input = sys.stdin |
|---|
| 23 | |
|---|
| 24 | bitsets = binned_bitsets_from_bed_file( input ) |
|---|
| 25 | |
|---|
| 26 | for chrom in bitsets: |
|---|
| 27 | bits = bitsets[chrom] |
|---|
| 28 | end = 0 |
|---|
| 29 | while 1: |
|---|
| 30 | start = bits.next_set( end ) |
|---|
| 31 | if start == bits.size: break |
|---|
| 32 | end = bits.next_clear( start ) |
|---|
| 33 | print "%s\t%d\t%d" % ( chrom, start, end ) |
|---|