1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Build windows of length `window_size` over the sequences defined by |
---|
5 | `len_file` excluding regions in `gap_file`. |
---|
6 | |
---|
7 | After removing the gaps, windows of exactly `window_size` units will be |
---|
8 | placed in the remaining regions, with the extra space evenly placed |
---|
9 | between the windows. |
---|
10 | |
---|
11 | `len_file` is LEN format (name length) and `gap_file is BED (name start stop). |
---|
12 | |
---|
13 | usage: %prog len_file gap_file window_size |
---|
14 | """ |
---|
15 | |
---|
16 | import sys |
---|
17 | from bx.bitset_builders import binned_bitsets_from_file |
---|
18 | import random |
---|
19 | |
---|
20 | def main(): |
---|
21 | region_fname, exclude_fname, window_size = sys.argv[1], sys.argv[2], int( sys.argv[3] ) |
---|
22 | exclude_bitsets = binned_bitsets_from_file( open( exclude_fname ) ) |
---|
23 | for line in open( region_fname ): |
---|
24 | fields = line.split() |
---|
25 | chr, start, end = fields[0], 0, int( fields[1] ) |
---|
26 | if chr not in exclude_bitsets: |
---|
27 | do_windows( chr, start, end, window_size ) |
---|
28 | else: |
---|
29 | bits = exclude_bitsets[chr] |
---|
30 | assert end < bits.size |
---|
31 | e = 0 |
---|
32 | while 1: |
---|
33 | s = bits.next_clear( e ) |
---|
34 | if s > end: break |
---|
35 | e = bits.next_set( s ) |
---|
36 | do_windows( chr, s, min( e, end ), window_size ) |
---|
37 | |
---|
38 | def do_windows( chr, start, end, window_size ): |
---|
39 | length = end - start |
---|
40 | window_count = length // window_size |
---|
41 | if window_count == 0: |
---|
42 | return |
---|
43 | lost = length % window_size |
---|
44 | skip_amount = lost // window_count |
---|
45 | ## skip_amounts = [0] * ( window_count + 1 ) |
---|
46 | ## for i in range( 0, lost ): skip_amounts[ random.randrange( 0, window_count + 1 ) ] += 1 |
---|
47 | s = 0 |
---|
48 | for i in range( 0, window_count ): |
---|
49 | s += skip_amount |
---|
50 | print chr, start + s, start + s + window_size |
---|
51 | s += window_size |
---|
52 | |
---|
53 | if __name__ == "__main__": |
---|
54 | main() |
---|