1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | """ |
---|
4 | Split into windows. |
---|
5 | |
---|
6 | usage: %prog input size out_file |
---|
7 | -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
---|
8 | """ |
---|
9 | |
---|
10 | import sys, re, os |
---|
11 | |
---|
12 | from galaxy import eggs |
---|
13 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
14 | from bx.cookbook import doc_optparse |
---|
15 | from galaxy.tools.util.galaxyops import * |
---|
16 | |
---|
17 | def stop_err( msg ): |
---|
18 | sys.stderr.write( msg ) |
---|
19 | sys.exit() |
---|
20 | |
---|
21 | def main(): |
---|
22 | # Parsing Command Line here |
---|
23 | options, args = doc_optparse.parse( __doc__ ) |
---|
24 | |
---|
25 | try: |
---|
26 | chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) |
---|
27 | inp_file, winsize, out_file, makesliding, offset = args |
---|
28 | winsize = int(winsize) |
---|
29 | offset = int(offset) |
---|
30 | makesliding = int(makesliding) |
---|
31 | if strand_col_1 <= 0: |
---|
32 | strand = "+" #if strand is not defined, default it to + |
---|
33 | except: |
---|
34 | stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) |
---|
35 | |
---|
36 | fo = open(out_file,'w') |
---|
37 | |
---|
38 | skipped_lines = 0 |
---|
39 | first_invalid_line = 0 |
---|
40 | invalid_line = None |
---|
41 | if offset == 0: |
---|
42 | makesliding = 0 |
---|
43 | |
---|
44 | for i, line in enumerate( file( inp_file ) ): |
---|
45 | line = line.strip() |
---|
46 | if line and line[0:1] != "#": |
---|
47 | try: |
---|
48 | elems = line.split('\t') |
---|
49 | if strand_col_1 != -1: |
---|
50 | strand = elems[strand_col_1] |
---|
51 | start = int(elems[start_col_1]) |
---|
52 | end = int(elems[end_col_1]) |
---|
53 | if makesliding == 0: |
---|
54 | numwin = (end - start)/winsize |
---|
55 | else: |
---|
56 | numwin = (end - start)/offset |
---|
57 | if numwin > 0: |
---|
58 | for win in range(numwin): |
---|
59 | elems_1 = elems |
---|
60 | elems_1[start_col_1] = str(start) |
---|
61 | elems_1[end_col_1] = str(start + winsize) |
---|
62 | fo.write( "%s\n" % '\t'.join( elems_1 ) ) |
---|
63 | if makesliding == 0: |
---|
64 | start = start + winsize |
---|
65 | else: |
---|
66 | start = start + offset |
---|
67 | if start+winsize > end: |
---|
68 | break |
---|
69 | except: |
---|
70 | skipped_lines += 1 |
---|
71 | if not invalid_line: |
---|
72 | first_invalid_line = i + 1 |
---|
73 | invalid_line = line |
---|
74 | |
---|
75 | fo.close() |
---|
76 | |
---|
77 | if makesliding == 1: |
---|
78 | print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset) |
---|
79 | else: |
---|
80 | print 'Window size=%d, Sliding=No' %(winsize) |
---|
81 | if skipped_lines > 0: |
---|
82 | print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) |
---|
83 | |
---|
84 | if __name__ == "__main__": |
---|
85 | main() |
---|