| 1 | #!/usr/bin/python2.6 |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Chops alignments in a MAF file to piece of a specified length. A random set of |
|---|
| 5 | non overlapping chunks of exactly the specified chop length will be produced |
|---|
| 6 | |
|---|
| 7 | usage: %prog [options] < maf > maf |
|---|
| 8 | -l, --length: Chop to exactly this length in columns (default 100) |
|---|
| 9 | """ |
|---|
| 10 | |
|---|
| 11 | import sys |
|---|
| 12 | |
|---|
| 13 | import sys, random |
|---|
| 14 | import bx.align.maf |
|---|
| 15 | from optparse import OptionParser |
|---|
| 16 | |
|---|
| 17 | def main(): |
|---|
| 18 | |
|---|
| 19 | # Parse command line arguments |
|---|
| 20 | |
|---|
| 21 | parser = OptionParser() |
|---|
| 22 | parser.add_option( "-l", "--length", action="store", type="int", default=100, help="" ) |
|---|
| 23 | |
|---|
| 24 | ( options, args ) = parser.parse_args() |
|---|
| 25 | |
|---|
| 26 | length = options.length |
|---|
| 27 | maf_reader = bx.align.maf.Reader( sys.stdin ) |
|---|
| 28 | maf_writer = bx.align.maf.Writer( sys.stdout ) |
|---|
| 29 | |
|---|
| 30 | for m in maf_reader: |
|---|
| 31 | for chopped in chop( m, length ): |
|---|
| 32 | maf_writer.write( chopped ) |
|---|
| 33 | |
|---|
| 34 | def chop( m, length ): |
|---|
| 35 | maf_length = m.text_size |
|---|
| 36 | chunk_count = maf_length // length |
|---|
| 37 | lost_bases = maf_length % length |
|---|
| 38 | skip_amounts = [0] * ( chunk_count + 1 ) |
|---|
| 39 | for i in range( 0, lost_bases ): skip_amounts[ random.randrange( 0, chunk_count + 1 ) ] += 1 |
|---|
| 40 | start = 0 |
|---|
| 41 | rval = [] |
|---|
| 42 | for i in range( 0, chunk_count ): |
|---|
| 43 | start += skip_amounts[ i ] |
|---|
| 44 | n = m.slice( start, start + length ) |
|---|
| 45 | if check_len( n ): rval.append( m.slice( start, start + length ) ) |
|---|
| 46 | start += length |
|---|
| 47 | return rval |
|---|
| 48 | |
|---|
| 49 | def check_len( a ): |
|---|
| 50 | for c in a.components: |
|---|
| 51 | if c.size == 0: return False |
|---|
| 52 | return True |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | if __name__ == "__main__": main() |
|---|