1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Chops alignments in a MAF file to piece of a specified length. A random set of |
---|
5 | non overlapping chunks of exactly the specified chop length will be produced |
---|
6 | |
---|
7 | usage: %prog [options] < maf > maf |
---|
8 | -l, --length: Chop to exactly this length in columns (default 100) |
---|
9 | """ |
---|
10 | |
---|
11 | import sys |
---|
12 | |
---|
13 | import sys, random |
---|
14 | import bx.align.maf |
---|
15 | from optparse import OptionParser |
---|
16 | |
---|
17 | def main(): |
---|
18 | |
---|
19 | # Parse command line arguments |
---|
20 | |
---|
21 | parser = OptionParser() |
---|
22 | parser.add_option( "-l", "--length", action="store", type="int", default=100, help="" ) |
---|
23 | |
---|
24 | ( options, args ) = parser.parse_args() |
---|
25 | |
---|
26 | length = options.length |
---|
27 | maf_reader = bx.align.maf.Reader( sys.stdin ) |
---|
28 | maf_writer = bx.align.maf.Writer( sys.stdout ) |
---|
29 | |
---|
30 | for m in maf_reader: |
---|
31 | for chopped in chop( m, length ): |
---|
32 | maf_writer.write( chopped ) |
---|
33 | |
---|
34 | def chop( m, length ): |
---|
35 | maf_length = m.text_size |
---|
36 | chunk_count = maf_length // length |
---|
37 | lost_bases = maf_length % length |
---|
38 | skip_amounts = [0] * ( chunk_count + 1 ) |
---|
39 | for i in range( 0, lost_bases ): skip_amounts[ random.randrange( 0, chunk_count + 1 ) ] += 1 |
---|
40 | start = 0 |
---|
41 | rval = [] |
---|
42 | for i in range( 0, chunk_count ): |
---|
43 | start += skip_amounts[ i ] |
---|
44 | n = m.slice( start, start + length ) |
---|
45 | if check_len( n ): rval.append( m.slice( start, start + length ) ) |
---|
46 | start += length |
---|
47 | return rval |
---|
48 | |
---|
49 | def check_len( a ): |
---|
50 | for c in a.components: |
---|
51 | if c.size == 0: return False |
---|
52 | return True |
---|
53 | |
---|
54 | |
---|
55 | if __name__ == "__main__": main() |
---|