1 | #!/usr/bin/env python |
---|
2 | #Done by: Guru |
---|
3 | |
---|
4 | """ |
---|
5 | Get Flanking regions. |
---|
6 | |
---|
7 | usage: %prog input out_file size direction region |
---|
8 | -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
---|
9 | -o, --off=N: Offset |
---|
10 | """ |
---|
11 | |
---|
12 | import sys, re, os |
---|
13 | from galaxy import eggs |
---|
14 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
15 | from bx.cookbook import doc_optparse |
---|
16 | from galaxy.tools.util.galaxyops import * |
---|
17 | |
---|
18 | def stop_err( msg ): |
---|
19 | sys.stderr.write( msg ) |
---|
20 | sys.exit() |
---|
21 | |
---|
22 | def main(): |
---|
23 | try: |
---|
24 | if int( sys.argv[3] ) < 0: |
---|
25 | raise Exception |
---|
26 | except: |
---|
27 | stop_err( "Length of flanking region(s) must be a non-negative integer." ) |
---|
28 | |
---|
29 | # Parsing Command Line here |
---|
30 | options, args = doc_optparse.parse( __doc__ ) |
---|
31 | try: |
---|
32 | chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) |
---|
33 | inp_file, out_file, size, direction, region = args |
---|
34 | if strand_col_1 <= 0: |
---|
35 | strand = "+" #if strand is not defined, default it to + |
---|
36 | except: |
---|
37 | stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) |
---|
38 | try: |
---|
39 | offset = int(options.off) |
---|
40 | size = int(size) |
---|
41 | except: |
---|
42 | stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) |
---|
43 | |
---|
44 | fo = open(out_file,'w') |
---|
45 | |
---|
46 | skipped_lines = 0 |
---|
47 | first_invalid_line = 0 |
---|
48 | invalid_line = None |
---|
49 | elems = [] |
---|
50 | j=0 |
---|
51 | for i, line in enumerate( file( inp_file ) ): |
---|
52 | line = line.strip() |
---|
53 | if line and (not line.startswith( '#' )) and line != '': |
---|
54 | j+=1 |
---|
55 | try: |
---|
56 | elems = line.split('\t') |
---|
57 | #if the start and/or end columns are not numbers, skip that line. |
---|
58 | assert int(elems[start_col_1]) |
---|
59 | assert int(elems[end_col_1]) |
---|
60 | if strand_col_1 != -1: |
---|
61 | strand = elems[strand_col_1] |
---|
62 | #if the stand value is not + or -, skip that line. |
---|
63 | assert strand in ['+', '-'] |
---|
64 | if direction == 'Upstream': |
---|
65 | if strand == '+': |
---|
66 | if region == 'end': |
---|
67 | elems[end_col_1] = str(int(elems[end_col_1]) + offset) |
---|
68 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
69 | else: |
---|
70 | elems[end_col_1] = str(int(elems[start_col_1]) + offset) |
---|
71 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
72 | elif strand == '-': |
---|
73 | if region == 'end': |
---|
74 | elems[start_col_1] = str(int(elems[start_col_1]) - offset) |
---|
75 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
76 | else: |
---|
77 | elems[start_col_1] = str(int(elems[end_col_1]) - offset) |
---|
78 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
79 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
80 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
81 | |
---|
82 | elif direction == 'Downstream': |
---|
83 | if strand == '-': |
---|
84 | if region == 'start': |
---|
85 | elems[end_col_1] = str(int(elems[end_col_1]) - offset) |
---|
86 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
87 | else: |
---|
88 | elems[end_col_1] = str(int(elems[start_col_1]) - offset) |
---|
89 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
90 | elif strand == '+': |
---|
91 | if region == 'start': |
---|
92 | elems[start_col_1] = str(int(elems[start_col_1]) + offset) |
---|
93 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
94 | else: |
---|
95 | elems[start_col_1] = str(int(elems[end_col_1]) + offset) |
---|
96 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
97 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
98 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
99 | |
---|
100 | elif direction == 'Both': |
---|
101 | if strand == '-': |
---|
102 | if region == 'start': |
---|
103 | start = str(int(elems[end_col_1]) - offset) |
---|
104 | end1 = str(int(start) + size) |
---|
105 | end2 = str(int(start) - size) |
---|
106 | elems[start_col_1]=start |
---|
107 | elems[end_col_1]=end1 |
---|
108 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
109 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
110 | elems[start_col_1]=end2 |
---|
111 | elems[end_col_1]=start |
---|
112 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
113 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
114 | elif region == 'end': |
---|
115 | start = str(int(elems[start_col_1]) - offset) |
---|
116 | end1 = str(int(start) + size) |
---|
117 | end2 = str(int(start) - size) |
---|
118 | elems[start_col_1]=start |
---|
119 | elems[end_col_1]=end1 |
---|
120 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
121 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
122 | elems[start_col_1]=end2 |
---|
123 | elems[end_col_1]=start |
---|
124 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
125 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
126 | else: |
---|
127 | start1 = str(int(elems[end_col_1]) - offset) |
---|
128 | end1 = str(int(start1) + size) |
---|
129 | start2 = str(int(elems[start_col_1]) - offset) |
---|
130 | end2 = str(int(start2) - size) |
---|
131 | elems[start_col_1]=start1 |
---|
132 | elems[end_col_1]=end1 |
---|
133 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
134 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
135 | elems[start_col_1]=end2 |
---|
136 | elems[end_col_1]=start2 |
---|
137 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
138 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
139 | elif strand == '+': |
---|
140 | if region == 'start': |
---|
141 | start = str(int(elems[start_col_1]) + offset) |
---|
142 | end1 = str(int(start) - size) |
---|
143 | end2 = str(int(start) + size) |
---|
144 | elems[start_col_1]=end1 |
---|
145 | elems[end_col_1]=start |
---|
146 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
147 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
148 | elems[start_col_1]=start |
---|
149 | elems[end_col_1]=end2 |
---|
150 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
151 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
152 | elif region == 'end': |
---|
153 | start = str(int(elems[end_col_1]) + offset) |
---|
154 | end1 = str(int(start) - size) |
---|
155 | end2 = str(int(start) + size) |
---|
156 | elems[start_col_1]=end1 |
---|
157 | elems[end_col_1]=start |
---|
158 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
159 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
160 | elems[start_col_1]=start |
---|
161 | elems[end_col_1]=end2 |
---|
162 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
163 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
164 | else: |
---|
165 | start1 = str(int(elems[start_col_1]) + offset) |
---|
166 | end1 = str(int(start1) - size) |
---|
167 | start2 = str(int(elems[end_col_1]) + offset) |
---|
168 | end2 = str(int(start2) + size) |
---|
169 | elems[start_col_1]=end1 |
---|
170 | elems[end_col_1]=start1 |
---|
171 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
172 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
173 | elems[start_col_1]=start2 |
---|
174 | elems[end_col_1]=end2 |
---|
175 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
176 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
177 | except: |
---|
178 | skipped_lines += 1 |
---|
179 | if not invalid_line: |
---|
180 | first_invalid_line = i + 1 |
---|
181 | invalid_line = line |
---|
182 | fo.close() |
---|
183 | |
---|
184 | if skipped_lines == j: |
---|
185 | stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) |
---|
186 | if skipped_lines > 0: |
---|
187 | print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) |
---|
188 | print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) |
---|
189 | |
---|
190 | if __name__ == "__main__": |
---|
191 | main() |
---|