1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Find regions of first bed file that overlap regions in a second bed file. The |
---|
5 | output preserves all fields from the input. |
---|
6 | |
---|
7 | NOTE: -u and -d options are currently not functional! |
---|
8 | |
---|
9 | usage: %prog bed_file_1 bed_file_2 |
---|
10 | -m, --mincols=N: Require this much overlap (default 1bp) |
---|
11 | -u, --upstream_pad=N: upstream interval padding (default 0bp) |
---|
12 | -d, --downstream_pad=N: downstream interval padding (default 0bp) |
---|
13 | -v, --reverse: Print regions that DO NOT overlap |
---|
14 | -b, --booleans: Just print '1' if interval overlaps or '0' otherwise |
---|
15 | """ |
---|
16 | |
---|
17 | import sys |
---|
18 | from warnings import warn |
---|
19 | |
---|
20 | from bx.bitset import * |
---|
21 | from bx.bitset_builders import * |
---|
22 | |
---|
23 | from bx.cookbook import doc_optparse |
---|
24 | |
---|
25 | mincols = 1 |
---|
26 | upstream_pad = 0 |
---|
27 | downstream_pad = 0 |
---|
28 | |
---|
29 | options, args = doc_optparse.parse( __doc__ ) |
---|
30 | try: |
---|
31 | if options.mincols: mincols = int( options.mincols ) |
---|
32 | if options.upstream_pad: upstream_pad = int( options.upstream_pad ) |
---|
33 | if options.downstream_pad: downstream_pad = int( options.downstream_pad ) |
---|
34 | reverse = bool( options.reverse ) |
---|
35 | booleans = bool( options.booleans ) |
---|
36 | in_fname, in2_fname = args |
---|
37 | except: |
---|
38 | doc_optparse.exit() |
---|
39 | |
---|
40 | # Read first bed into some bitsets |
---|
41 | |
---|
42 | bitsets = binned_bitsets_from_file( open( in2_fname ) ) |
---|
43 | |
---|
44 | # Read second BED and intersect |
---|
45 | |
---|
46 | for line in open( in_fname ): |
---|
47 | if line.startswith("#") or line.isspace(): |
---|
48 | continue |
---|
49 | fields = line.split() |
---|
50 | start, end = int( fields[1] ), int( fields[2] ) |
---|
51 | if start > end: |
---|
52 | warn( "Bed interval start after end!" ) |
---|
53 | if fields[0] in bitsets and bitsets[fields[0]].count_range( start, end-start ) >= mincols: |
---|
54 | if booleans: |
---|
55 | if reverse: |
---|
56 | print 0 |
---|
57 | else: |
---|
58 | print 1 |
---|
59 | elif not reverse: |
---|
60 | print line, |
---|
61 | else: |
---|
62 | if booleans: |
---|
63 | if reverse: |
---|
64 | print 1 |
---|
65 | else: |
---|
66 | print 0 |
---|
67 | elif reverse: |
---|
68 | print line, |
---|