| 1 | #!/usr/bin/env python | 
|---|
| 2 | #Done by: Guru | 
|---|
| 3 |  | 
|---|
| 4 | """ | 
|---|
| 5 | Get Flanking regions. | 
|---|
| 6 |  | 
|---|
| 7 | usage: %prog input out_file size direction region | 
|---|
| 8 | -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | 
|---|
| 9 | -o, --off=N: Offset | 
|---|
| 10 | """ | 
|---|
| 11 |  | 
|---|
| 12 | import sys, re, os | 
|---|
| 13 | from galaxy import eggs | 
|---|
| 14 | import pkg_resources; pkg_resources.require( "bx-python" ) | 
|---|
| 15 | from bx.cookbook import doc_optparse | 
|---|
| 16 | from galaxy.tools.util.galaxyops import * | 
|---|
| 17 |  | 
|---|
| 18 | def stop_err( msg ): | 
|---|
| 19 | sys.stderr.write( msg ) | 
|---|
| 20 | sys.exit() | 
|---|
| 21 |  | 
|---|
| 22 | def main(): | 
|---|
| 23 | try: | 
|---|
| 24 | if int( sys.argv[3] ) < 0: | 
|---|
| 25 | raise Exception | 
|---|
| 26 | except: | 
|---|
| 27 | stop_err( "Length of flanking region(s) must be a non-negative integer." ) | 
|---|
| 28 |  | 
|---|
| 29 | # Parsing Command Line here | 
|---|
| 30 | options, args = doc_optparse.parse( __doc__ ) | 
|---|
| 31 | try: | 
|---|
| 32 | chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | 
|---|
| 33 | inp_file, out_file, size, direction, region = args | 
|---|
| 34 | if strand_col_1 <= 0: | 
|---|
| 35 | strand = "+"        #if strand is not defined, default it to + | 
|---|
| 36 | except: | 
|---|
| 37 | stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | 
|---|
| 38 | try: | 
|---|
| 39 | offset = int(options.off) | 
|---|
| 40 | size = int(size) | 
|---|
| 41 | except: | 
|---|
| 42 | stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) | 
|---|
| 43 |  | 
|---|
| 44 | fo = open(out_file,'w') | 
|---|
| 45 |  | 
|---|
| 46 | skipped_lines = 0 | 
|---|
| 47 | first_invalid_line = 0 | 
|---|
| 48 | invalid_line = None | 
|---|
| 49 | elems = [] | 
|---|
| 50 | j=0 | 
|---|
| 51 | for i, line in enumerate( file( inp_file ) ): | 
|---|
| 52 | line = line.strip() | 
|---|
| 53 | if line and (not line.startswith( '#' )) and line != '': | 
|---|
| 54 | j+=1 | 
|---|
| 55 | try: | 
|---|
| 56 | elems = line.split('\t') | 
|---|
| 57 | #if the start and/or end columns are not numbers, skip that line. | 
|---|
| 58 | assert int(elems[start_col_1]) | 
|---|
| 59 | assert int(elems[end_col_1]) | 
|---|
| 60 | if strand_col_1 != -1: | 
|---|
| 61 | strand = elems[strand_col_1] | 
|---|
| 62 | #if the stand value is not + or -, skip that line. | 
|---|
| 63 | assert strand in ['+', '-'] | 
|---|
| 64 | if direction == 'Upstream': | 
|---|
| 65 | if strand == '+': | 
|---|
| 66 | if region == 'end': | 
|---|
| 67 | elems[end_col_1] = str(int(elems[end_col_1]) + offset) | 
|---|
| 68 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 
|---|
| 69 | else: | 
|---|
| 70 | elems[end_col_1] = str(int(elems[start_col_1]) + offset) | 
|---|
| 71 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 
|---|
| 72 | elif strand == '-': | 
|---|
| 73 | if region == 'end': | 
|---|
| 74 | elems[start_col_1] = str(int(elems[start_col_1]) - offset) | 
|---|
| 75 | elems[end_col_1] = str(int(elems[start_col_1]) + size) | 
|---|
| 76 | else: | 
|---|
| 77 | elems[start_col_1] = str(int(elems[end_col_1]) - offset) | 
|---|
| 78 | elems[end_col_1] = str(int(elems[start_col_1]) + size) | 
|---|
| 79 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 80 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 81 |  | 
|---|
| 82 | elif direction == 'Downstream': | 
|---|
| 83 | if strand == '-': | 
|---|
| 84 | if region == 'start': | 
|---|
| 85 | elems[end_col_1] = str(int(elems[end_col_1]) - offset) | 
|---|
| 86 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 
|---|
| 87 | else: | 
|---|
| 88 | elems[end_col_1] = str(int(elems[start_col_1]) - offset) | 
|---|
| 89 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) | 
|---|
| 90 | elif strand == '+': | 
|---|
| 91 | if region == 'start': | 
|---|
| 92 | elems[start_col_1] = str(int(elems[start_col_1]) + offset) | 
|---|
| 93 | elems[end_col_1] = str(int(elems[start_col_1]) + size) | 
|---|
| 94 | else: | 
|---|
| 95 | elems[start_col_1] = str(int(elems[end_col_1]) + offset) | 
|---|
| 96 | elems[end_col_1] = str(int(elems[start_col_1]) + size) | 
|---|
| 97 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 98 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 99 |  | 
|---|
| 100 | elif direction == 'Both': | 
|---|
| 101 | if strand == '-': | 
|---|
| 102 | if region == 'start': | 
|---|
| 103 | start = str(int(elems[end_col_1]) - offset) | 
|---|
| 104 | end1 = str(int(start) + size) | 
|---|
| 105 | end2 = str(int(start) - size) | 
|---|
| 106 | elems[start_col_1]=start | 
|---|
| 107 | elems[end_col_1]=end1 | 
|---|
| 108 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 109 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 110 | elems[start_col_1]=end2 | 
|---|
| 111 | elems[end_col_1]=start | 
|---|
| 112 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 113 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 114 | elif region == 'end': | 
|---|
| 115 | start = str(int(elems[start_col_1]) - offset) | 
|---|
| 116 | end1 = str(int(start) + size) | 
|---|
| 117 | end2 = str(int(start) - size) | 
|---|
| 118 | elems[start_col_1]=start | 
|---|
| 119 | elems[end_col_1]=end1 | 
|---|
| 120 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 121 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 122 | elems[start_col_1]=end2 | 
|---|
| 123 | elems[end_col_1]=start | 
|---|
| 124 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 125 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 126 | else: | 
|---|
| 127 | start1 = str(int(elems[end_col_1]) - offset) | 
|---|
| 128 | end1 = str(int(start1) + size) | 
|---|
| 129 | start2 = str(int(elems[start_col_1]) - offset) | 
|---|
| 130 | end2 = str(int(start2) - size) | 
|---|
| 131 | elems[start_col_1]=start1 | 
|---|
| 132 | elems[end_col_1]=end1 | 
|---|
| 133 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 134 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 135 | elems[start_col_1]=end2 | 
|---|
| 136 | elems[end_col_1]=start2 | 
|---|
| 137 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 138 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 139 | elif strand == '+': | 
|---|
| 140 | if region == 'start': | 
|---|
| 141 | start = str(int(elems[start_col_1]) + offset) | 
|---|
| 142 | end1 = str(int(start) - size) | 
|---|
| 143 | end2 = str(int(start) + size) | 
|---|
| 144 | elems[start_col_1]=end1 | 
|---|
| 145 | elems[end_col_1]=start | 
|---|
| 146 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 147 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 148 | elems[start_col_1]=start | 
|---|
| 149 | elems[end_col_1]=end2 | 
|---|
| 150 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 151 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 152 | elif region == 'end': | 
|---|
| 153 | start = str(int(elems[end_col_1]) + offset) | 
|---|
| 154 | end1 = str(int(start) - size) | 
|---|
| 155 | end2 = str(int(start) + size) | 
|---|
| 156 | elems[start_col_1]=end1 | 
|---|
| 157 | elems[end_col_1]=start | 
|---|
| 158 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 159 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 160 | elems[start_col_1]=start | 
|---|
| 161 | elems[end_col_1]=end2 | 
|---|
| 162 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 163 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 164 | else: | 
|---|
| 165 | start1 = str(int(elems[start_col_1]) + offset) | 
|---|
| 166 | end1 = str(int(start1) - size) | 
|---|
| 167 | start2 = str(int(elems[end_col_1]) + offset) | 
|---|
| 168 | end2 = str(int(start2) + size) | 
|---|
| 169 | elems[start_col_1]=end1 | 
|---|
| 170 | elems[end_col_1]=start1 | 
|---|
| 171 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 172 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 173 | elems[start_col_1]=start2 | 
|---|
| 174 | elems[end_col_1]=end2 | 
|---|
| 175 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | 
|---|
| 176 | fo.write( "%s\n" % '\t'.join( elems ) ) | 
|---|
| 177 | except: | 
|---|
| 178 | skipped_lines += 1 | 
|---|
| 179 | if not invalid_line: | 
|---|
| 180 | first_invalid_line = i + 1 | 
|---|
| 181 | invalid_line = line | 
|---|
| 182 | fo.close() | 
|---|
| 183 |  | 
|---|
| 184 | if skipped_lines == j: | 
|---|
| 185 | stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | 
|---|
| 186 | if skipped_lines > 0: | 
|---|
| 187 | print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | 
|---|
| 188 | print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) | 
|---|
| 189 |  | 
|---|
| 190 | if __name__ == "__main__": | 
|---|
| 191 | main() | 
|---|