[2] | 1 | #!/usr/bin/env python |
---|
| 2 | #Done by: Guru |
---|
| 3 | |
---|
| 4 | """ |
---|
| 5 | Get Flanking regions. |
---|
| 6 | |
---|
| 7 | usage: %prog input out_file size direction region |
---|
| 8 | -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
---|
| 9 | -o, --off=N: Offset |
---|
| 10 | """ |
---|
| 11 | |
---|
| 12 | import sys, re, os |
---|
| 13 | from galaxy import eggs |
---|
| 14 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 15 | from bx.cookbook import doc_optparse |
---|
| 16 | from galaxy.tools.util.galaxyops import * |
---|
| 17 | |
---|
| 18 | def stop_err( msg ): |
---|
| 19 | sys.stderr.write( msg ) |
---|
| 20 | sys.exit() |
---|
| 21 | |
---|
| 22 | def main(): |
---|
| 23 | try: |
---|
| 24 | if int( sys.argv[3] ) < 0: |
---|
| 25 | raise Exception |
---|
| 26 | except: |
---|
| 27 | stop_err( "Length of flanking region(s) must be a non-negative integer." ) |
---|
| 28 | |
---|
| 29 | # Parsing Command Line here |
---|
| 30 | options, args = doc_optparse.parse( __doc__ ) |
---|
| 31 | try: |
---|
| 32 | chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) |
---|
| 33 | inp_file, out_file, size, direction, region = args |
---|
| 34 | if strand_col_1 <= 0: |
---|
| 35 | strand = "+" #if strand is not defined, default it to + |
---|
| 36 | except: |
---|
| 37 | stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) |
---|
| 38 | try: |
---|
| 39 | offset = int(options.off) |
---|
| 40 | size = int(size) |
---|
| 41 | except: |
---|
| 42 | stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) |
---|
| 43 | |
---|
| 44 | fo = open(out_file,'w') |
---|
| 45 | |
---|
| 46 | skipped_lines = 0 |
---|
| 47 | first_invalid_line = 0 |
---|
| 48 | invalid_line = None |
---|
| 49 | elems = [] |
---|
| 50 | j=0 |
---|
| 51 | for i, line in enumerate( file( inp_file ) ): |
---|
| 52 | line = line.strip() |
---|
| 53 | if line and (not line.startswith( '#' )) and line != '': |
---|
| 54 | j+=1 |
---|
| 55 | try: |
---|
| 56 | elems = line.split('\t') |
---|
| 57 | #if the start and/or end columns are not numbers, skip that line. |
---|
| 58 | assert int(elems[start_col_1]) |
---|
| 59 | assert int(elems[end_col_1]) |
---|
| 60 | if strand_col_1 != -1: |
---|
| 61 | strand = elems[strand_col_1] |
---|
| 62 | #if the stand value is not + or -, skip that line. |
---|
| 63 | assert strand in ['+', '-'] |
---|
| 64 | if direction == 'Upstream': |
---|
| 65 | if strand == '+': |
---|
| 66 | if region == 'end': |
---|
| 67 | elems[end_col_1] = str(int(elems[end_col_1]) + offset) |
---|
| 68 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
| 69 | else: |
---|
| 70 | elems[end_col_1] = str(int(elems[start_col_1]) + offset) |
---|
| 71 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
| 72 | elif strand == '-': |
---|
| 73 | if region == 'end': |
---|
| 74 | elems[start_col_1] = str(int(elems[start_col_1]) - offset) |
---|
| 75 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
| 76 | else: |
---|
| 77 | elems[start_col_1] = str(int(elems[end_col_1]) - offset) |
---|
| 78 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
| 79 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 80 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 81 | |
---|
| 82 | elif direction == 'Downstream': |
---|
| 83 | if strand == '-': |
---|
| 84 | if region == 'start': |
---|
| 85 | elems[end_col_1] = str(int(elems[end_col_1]) - offset) |
---|
| 86 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
| 87 | else: |
---|
| 88 | elems[end_col_1] = str(int(elems[start_col_1]) - offset) |
---|
| 89 | elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
---|
| 90 | elif strand == '+': |
---|
| 91 | if region == 'start': |
---|
| 92 | elems[start_col_1] = str(int(elems[start_col_1]) + offset) |
---|
| 93 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
| 94 | else: |
---|
| 95 | elems[start_col_1] = str(int(elems[end_col_1]) + offset) |
---|
| 96 | elems[end_col_1] = str(int(elems[start_col_1]) + size) |
---|
| 97 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 98 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 99 | |
---|
| 100 | elif direction == 'Both': |
---|
| 101 | if strand == '-': |
---|
| 102 | if region == 'start': |
---|
| 103 | start = str(int(elems[end_col_1]) - offset) |
---|
| 104 | end1 = str(int(start) + size) |
---|
| 105 | end2 = str(int(start) - size) |
---|
| 106 | elems[start_col_1]=start |
---|
| 107 | elems[end_col_1]=end1 |
---|
| 108 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 109 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 110 | elems[start_col_1]=end2 |
---|
| 111 | elems[end_col_1]=start |
---|
| 112 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 113 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 114 | elif region == 'end': |
---|
| 115 | start = str(int(elems[start_col_1]) - offset) |
---|
| 116 | end1 = str(int(start) + size) |
---|
| 117 | end2 = str(int(start) - size) |
---|
| 118 | elems[start_col_1]=start |
---|
| 119 | elems[end_col_1]=end1 |
---|
| 120 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 121 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 122 | elems[start_col_1]=end2 |
---|
| 123 | elems[end_col_1]=start |
---|
| 124 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 125 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 126 | else: |
---|
| 127 | start1 = str(int(elems[end_col_1]) - offset) |
---|
| 128 | end1 = str(int(start1) + size) |
---|
| 129 | start2 = str(int(elems[start_col_1]) - offset) |
---|
| 130 | end2 = str(int(start2) - size) |
---|
| 131 | elems[start_col_1]=start1 |
---|
| 132 | elems[end_col_1]=end1 |
---|
| 133 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 134 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 135 | elems[start_col_1]=end2 |
---|
| 136 | elems[end_col_1]=start2 |
---|
| 137 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 138 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 139 | elif strand == '+': |
---|
| 140 | if region == 'start': |
---|
| 141 | start = str(int(elems[start_col_1]) + offset) |
---|
| 142 | end1 = str(int(start) - size) |
---|
| 143 | end2 = str(int(start) + size) |
---|
| 144 | elems[start_col_1]=end1 |
---|
| 145 | elems[end_col_1]=start |
---|
| 146 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 147 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 148 | elems[start_col_1]=start |
---|
| 149 | elems[end_col_1]=end2 |
---|
| 150 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 151 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 152 | elif region == 'end': |
---|
| 153 | start = str(int(elems[end_col_1]) + offset) |
---|
| 154 | end1 = str(int(start) - size) |
---|
| 155 | end2 = str(int(start) + size) |
---|
| 156 | elems[start_col_1]=end1 |
---|
| 157 | elems[end_col_1]=start |
---|
| 158 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 159 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 160 | elems[start_col_1]=start |
---|
| 161 | elems[end_col_1]=end2 |
---|
| 162 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 163 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 164 | else: |
---|
| 165 | start1 = str(int(elems[start_col_1]) + offset) |
---|
| 166 | end1 = str(int(start1) - size) |
---|
| 167 | start2 = str(int(elems[end_col_1]) + offset) |
---|
| 168 | end2 = str(int(start2) + size) |
---|
| 169 | elems[start_col_1]=end1 |
---|
| 170 | elems[end_col_1]=start1 |
---|
| 171 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 172 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 173 | elems[start_col_1]=start2 |
---|
| 174 | elems[end_col_1]=end2 |
---|
| 175 | assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
---|
| 176 | fo.write( "%s\n" % '\t'.join( elems ) ) |
---|
| 177 | except: |
---|
| 178 | skipped_lines += 1 |
---|
| 179 | if not invalid_line: |
---|
| 180 | first_invalid_line = i + 1 |
---|
| 181 | invalid_line = line |
---|
| 182 | fo.close() |
---|
| 183 | |
---|
| 184 | if skipped_lines == j: |
---|
| 185 | stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) |
---|
| 186 | if skipped_lines > 0: |
---|
| 187 | print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) |
---|
| 188 | print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) |
---|
| 189 | |
---|
| 190 | if __name__ == "__main__": |
---|
| 191 | main() |
---|