[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | """ |
---|
| 4 | Split into windows. |
---|
| 5 | |
---|
| 6 | usage: %prog input size out_file |
---|
| 7 | -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
---|
| 8 | """ |
---|
| 9 | |
---|
| 10 | import sys, re, os |
---|
| 11 | |
---|
| 12 | from galaxy import eggs |
---|
| 13 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 14 | from bx.cookbook import doc_optparse |
---|
| 15 | from galaxy.tools.util.galaxyops import * |
---|
| 16 | |
---|
| 17 | def stop_err( msg ): |
---|
| 18 | sys.stderr.write( msg ) |
---|
| 19 | sys.exit() |
---|
| 20 | |
---|
| 21 | def main(): |
---|
| 22 | # Parsing Command Line here |
---|
| 23 | options, args = doc_optparse.parse( __doc__ ) |
---|
| 24 | |
---|
| 25 | try: |
---|
| 26 | chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) |
---|
| 27 | inp_file, winsize, out_file, makesliding, offset = args |
---|
| 28 | winsize = int(winsize) |
---|
| 29 | offset = int(offset) |
---|
| 30 | makesliding = int(makesliding) |
---|
| 31 | if strand_col_1 <= 0: |
---|
| 32 | strand = "+" #if strand is not defined, default it to + |
---|
| 33 | except: |
---|
| 34 | stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) |
---|
| 35 | |
---|
| 36 | fo = open(out_file,'w') |
---|
| 37 | |
---|
| 38 | skipped_lines = 0 |
---|
| 39 | first_invalid_line = 0 |
---|
| 40 | invalid_line = None |
---|
| 41 | if offset == 0: |
---|
| 42 | makesliding = 0 |
---|
| 43 | |
---|
| 44 | for i, line in enumerate( file( inp_file ) ): |
---|
| 45 | line = line.strip() |
---|
| 46 | if line and line[0:1] != "#": |
---|
| 47 | try: |
---|
| 48 | elems = line.split('\t') |
---|
| 49 | if strand_col_1 != -1: |
---|
| 50 | strand = elems[strand_col_1] |
---|
| 51 | start = int(elems[start_col_1]) |
---|
| 52 | end = int(elems[end_col_1]) |
---|
| 53 | if makesliding == 0: |
---|
| 54 | numwin = (end - start)/winsize |
---|
| 55 | else: |
---|
| 56 | numwin = (end - start)/offset |
---|
| 57 | if numwin > 0: |
---|
| 58 | for win in range(numwin): |
---|
| 59 | elems_1 = elems |
---|
| 60 | elems_1[start_col_1] = str(start) |
---|
| 61 | elems_1[end_col_1] = str(start + winsize) |
---|
| 62 | fo.write( "%s\n" % '\t'.join( elems_1 ) ) |
---|
| 63 | if makesliding == 0: |
---|
| 64 | start = start + winsize |
---|
| 65 | else: |
---|
| 66 | start = start + offset |
---|
| 67 | if start+winsize > end: |
---|
| 68 | break |
---|
| 69 | except: |
---|
| 70 | skipped_lines += 1 |
---|
| 71 | if not invalid_line: |
---|
| 72 | first_invalid_line = i + 1 |
---|
| 73 | invalid_line = line |
---|
| 74 | |
---|
| 75 | fo.close() |
---|
| 76 | |
---|
| 77 | if makesliding == 1: |
---|
| 78 | print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset) |
---|
| 79 | else: |
---|
| 80 | print 'Window size=%d, Sliding=No' %(winsize) |
---|
| 81 | if skipped_lines > 0: |
---|
| 82 | print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) |
---|
| 83 | |
---|
| 84 | if __name__ == "__main__": |
---|
| 85 | main() |
---|