| [2] | 1 | #!/usr/bin/env python | 
|---|
|  | 2 |  | 
|---|
|  | 3 | """ | 
|---|
|  | 4 | Runs Bowtie on single-end or paired-end data. | 
|---|
|  | 5 | For use with Bowtie v. 0.12.3 | 
|---|
|  | 6 |  | 
|---|
|  | 7 | usage: bowtie_wrapper.py [options] | 
|---|
|  | 8 | -t, --threads=t: The number of threads to run | 
|---|
|  | 9 | -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format | 
|---|
|  | 10 | -I, --input2=I: The reverse reads file in Sanger FASTQ format | 
|---|
|  | 11 | -o, --output=o: The output file | 
|---|
|  | 12 | -4, --dataType=4: The type of data (SOLiD or Solexa) | 
|---|
|  | 13 | -2, --paired=2: Whether the data is single- or paired-end | 
|---|
|  | 14 | -g, --genomeSource=g: The type of reference provided | 
|---|
|  | 15 | -r, --ref=r: The reference genome to use or index | 
|---|
|  | 16 | -s, --skip=s: Skip the first n reads | 
|---|
|  | 17 | -a, --alignLimit=a: Only align the first n reads | 
|---|
|  | 18 | -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment | 
|---|
|  | 19 | -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment | 
|---|
|  | 20 | -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed | 
|---|
|  | 21 | -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions | 
|---|
|  | 22 | -l, --seedLen=l: Seed length | 
|---|
|  | 23 | -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 | 
|---|
|  | 24 | -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy | 
|---|
|  | 25 | -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist | 
|---|
|  | 26 | -v, --valAlign=v: Report up to n valid arguments per read | 
|---|
|  | 27 | -V, --allValAligns=V: Whether or not to report all valid alignments per read | 
|---|
|  | 28 | -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist | 
|---|
|  | 29 | -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions | 
|---|
|  | 30 | -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read | 
|---|
|  | 31 | -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable | 
|---|
|  | 32 | -j, --minInsert=j: Minimum insert size for valid paired-end alignments | 
|---|
|  | 33 | -J, --maxInsert=J: Maximum insert size for valid paired-end alignments | 
|---|
|  | 34 | -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand | 
|---|
|  | 35 | -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate | 
|---|
|  | 36 | -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand | 
|---|
|  | 37 | -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand | 
|---|
|  | 38 | -F, --offrate=F: Override the offrate of the index to n | 
|---|
|  | 39 | -8, --snpphred=8: SNP penalty on Phred scale | 
|---|
|  | 40 | -6, --snpfrac=6: Fraction of sites expected to be SNP sites | 
|---|
|  | 41 | -7, --keepends=7: Keep extreme-end nucleotides and qualities | 
|---|
|  | 42 | -S, --seed=S: Seed for pseudo-random number generator | 
|---|
|  | 43 | -C, --params=C: Whether to use default or specified parameters | 
|---|
|  | 44 | -u, --iautoB=u: Automatic or specified behavior | 
|---|
|  | 45 | -K, --ipacked=K: Whether or not to use a packed representation for DNA strings | 
|---|
|  | 46 | -Q, --ibmax=Q: Maximum number of suffixes allowed in a block | 
|---|
|  | 47 | -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference | 
|---|
|  | 48 | -D, --idcv=D: The period for the difference-cover sample | 
|---|
|  | 49 | -U, --inodc=U: Whether or not to disable the use of the difference-cover sample | 
|---|
|  | 50 | -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment | 
|---|
|  | 51 | -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows | 
|---|
|  | 52 | -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query | 
|---|
|  | 53 | -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As | 
|---|
|  | 54 | -N, --iendian=N: Endianness to use when serializing integers to the index file | 
|---|
|  | 55 | -Z, --iseed=Z: Seed for the pseudorandom number generator | 
|---|
|  | 56 | -c, --icutoff=c: Number of first bases of the reference sequence to index | 
|---|
|  | 57 | -x, --indexSettings=x: Whether or not indexing options are to be set | 
|---|
|  | 58 | -H, --suppressHeader=H: Suppress header | 
|---|
|  | 59 | --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' | 
|---|
|  | 60 | """ | 
|---|
|  | 61 |  | 
|---|
|  | 62 | import optparse, os, shutil, subprocess, sys, tempfile | 
|---|
|  | 63 |  | 
|---|
|  | 64 | def stop_err( msg ): | 
|---|
|  | 65 | sys.stderr.write( '%s\n' % msg ) | 
|---|
|  | 66 | sys.exit() | 
|---|
|  | 67 |  | 
|---|
|  | 68 | def __main__(): | 
|---|
|  | 69 | #Parse Command Line | 
|---|
|  | 70 | parser = optparse.OptionParser() | 
|---|
|  | 71 | parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) | 
|---|
|  | 72 | parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) | 
|---|
|  | 73 | parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) | 
|---|
|  | 74 | parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) | 
|---|
|  | 75 | parser.add_option( '-o', '--output', dest='output', help='The output file' ) | 
|---|
|  | 76 | parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' ) | 
|---|
|  | 77 | parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) | 
|---|
|  | 78 | parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) | 
|---|
|  | 79 | parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) | 
|---|
|  | 80 | parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) | 
|---|
|  | 81 | parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) | 
|---|
|  | 82 | parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) | 
|---|
|  | 83 | parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) | 
|---|
|  | 84 | parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) | 
|---|
|  | 85 | parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) | 
|---|
|  | 86 | parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) | 
|---|
|  | 87 | parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' ) | 
|---|
|  | 88 | parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) | 
|---|
|  | 89 | parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' ) | 
|---|
|  | 90 | parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' ) | 
|---|
|  | 91 | parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) | 
|---|
|  | 92 | parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) | 
|---|
|  | 93 | parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) | 
|---|
|  | 94 | parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) | 
|---|
|  | 95 | parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) | 
|---|
|  | 96 | parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' ) | 
|---|
|  | 97 | parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' ) | 
|---|
|  | 98 | parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' ) | 
|---|
|  | 99 | parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' ) | 
|---|
|  | 100 | parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' ) | 
|---|
|  | 101 | parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) | 
|---|
|  | 102 | parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) | 
|---|
|  | 103 | parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' ) | 
|---|
|  | 104 | parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' ) | 
|---|
|  | 105 | parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' ) | 
|---|
|  | 106 | parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) | 
|---|
|  | 107 | parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) | 
|---|
|  | 108 | parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) | 
|---|
|  | 109 | parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) | 
|---|
|  | 110 | parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' ) | 
|---|
|  | 111 | parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' ) | 
|---|
|  | 112 | parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' ) | 
|---|
|  | 113 | parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' ) | 
|---|
|  | 114 | parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) | 
|---|
|  | 115 | parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) | 
|---|
|  | 116 | parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) | 
|---|
|  | 117 | parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) | 
|---|
|  | 118 | parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) | 
|---|
|  | 119 | parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' ) | 
|---|
|  | 120 | parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) | 
|---|
|  | 121 | parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) | 
|---|
|  | 122 | parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) | 
|---|
|  | 123 | (options, args) = parser.parse_args() | 
|---|
|  | 124 | stdout = '' | 
|---|
|  | 125 |  | 
|---|
|  | 126 | # make temp directory for placement of indices and copy reference file there if necessary | 
|---|
|  | 127 | tmp_index_dir = tempfile.mkdtemp() | 
|---|
|  | 128 | # get type of data (solid or solexa) | 
|---|
|  | 129 | if options.dataType == 'solid': | 
|---|
|  | 130 | colorspace = '-C' | 
|---|
|  | 131 | else: | 
|---|
|  | 132 | colorspace = '' | 
|---|
|  | 133 | # index if necessary | 
|---|
|  | 134 | if options.genomeSource == 'history' and not options.do_not_build_index: | 
|---|
|  | 135 | # set up commands | 
|---|
|  | 136 | if options.index_settings =='indexPreSet': | 
|---|
|  | 137 | indexing_cmds = '%s' % colorspace | 
|---|
|  | 138 | else: | 
|---|
|  | 139 | try: | 
|---|
|  | 140 | if options.iautoB != 'None' and options.iautoB == 'set': | 
|---|
|  | 141 | iautoB = '--noauto' | 
|---|
|  | 142 | else: | 
|---|
|  | 143 | iautoB = '' | 
|---|
|  | 144 | if options. ipacked != 'None' and options.ipacked == 'packed': | 
|---|
|  | 145 | ipacked = '--packed' | 
|---|
|  | 146 | else: | 
|---|
|  | 147 | ipacked = '' | 
|---|
|  | 148 | if options.ibmax != 'None' and int( options.ibmax ) >= 1: | 
|---|
|  | 149 | ibmax = '--bmax %s' % options.ibmax | 
|---|
|  | 150 | else: | 
|---|
|  | 151 | ibmax = '' | 
|---|
|  | 152 | if options.ibmaxdivn != 'None' and int( options.ibmaxdivn ) >= 0: | 
|---|
|  | 153 | ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn | 
|---|
|  | 154 | else: | 
|---|
|  | 155 | ibmaxdivn = '' | 
|---|
|  | 156 | if options.idcv != 'None' and int( options.idcv ) > 0: | 
|---|
|  | 157 | idcv = '--dcv %s' % options.idcv | 
|---|
|  | 158 | else: | 
|---|
|  | 159 | idcv = '' | 
|---|
|  | 160 | if options.inodc != 'None' and options.inodc == 'nodc': | 
|---|
|  | 161 | inodc = '--nodc' | 
|---|
|  | 162 | else: | 
|---|
|  | 163 | inodc = '' | 
|---|
|  | 164 | if options.inoref != 'None' and options.inoref == 'noref': | 
|---|
|  | 165 | inoref = '--noref' | 
|---|
|  | 166 | else: | 
|---|
|  | 167 | inoref = '' | 
|---|
|  | 168 | if options.iftab != 'None' and int( options.iftab ) >= 0: | 
|---|
|  | 169 | iftab = '--ftabchars %s' % options.iftab | 
|---|
|  | 170 | else: | 
|---|
|  | 171 | iftab = '' | 
|---|
|  | 172 | if options.intoa != 'None' and options.intoa == 'yes': | 
|---|
|  | 173 | intoa = '--ntoa' | 
|---|
|  | 174 | else: | 
|---|
|  | 175 | intoa = '' | 
|---|
|  | 176 | if options.iendian != 'None' and options.iendian == 'big': | 
|---|
|  | 177 | iendian = '--big' | 
|---|
|  | 178 | else: | 
|---|
|  | 179 | iendian = '--little' | 
|---|
|  | 180 | if options.iseed != 'None' and int( options.iseed ) > 0: | 
|---|
|  | 181 | iseed = '--seed %s' % options.iseed | 
|---|
|  | 182 | else: | 
|---|
|  | 183 | iseed = '' | 
|---|
|  | 184 | if options.icutoff != 'None' and int( options.icutoff ) > 0: | 
|---|
|  | 185 | icutoff = '--cutoff %s' % options.icutoff | 
|---|
|  | 186 | else: | 
|---|
|  | 187 | icutoff = '' | 
|---|
|  | 188 | indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \ | 
|---|
|  | 189 | ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, | 
|---|
|  | 190 | inoref, options.ioffrate, iftab, intoa, iendian, | 
|---|
|  | 191 | iseed, icutoff, colorspace ) | 
|---|
|  | 192 | except ValueError, e: | 
|---|
|  | 193 | # clean up temp dir | 
|---|
|  | 194 | if os.path.exists( tmp_index_dir ): | 
|---|
|  | 195 | shutil.rmtree( tmp_index_dir ) | 
|---|
|  | 196 | stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) | 
|---|
|  | 197 | ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) | 
|---|
|  | 198 | ref_file_name = ref_file.name | 
|---|
|  | 199 | ref_file.close() | 
|---|
|  | 200 | os.symlink( options.ref, ref_file_name ) | 
|---|
|  | 201 | cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) | 
|---|
|  | 202 | try: | 
|---|
|  | 203 | tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | 
|---|
|  | 204 | tmp_stderr = open( tmp, 'wb' ) | 
|---|
|  | 205 | proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | 
|---|
|  | 206 | returncode = proc.wait() | 
|---|
|  | 207 | tmp_stderr.close() | 
|---|
|  | 208 | # get stderr, allowing for case where it's very large | 
|---|
|  | 209 | tmp_stderr = open( tmp, 'rb' ) | 
|---|
|  | 210 | stderr = '' | 
|---|
|  | 211 | buffsize = 1048576 | 
|---|
|  | 212 | try: | 
|---|
|  | 213 | while True: | 
|---|
|  | 214 | stderr += tmp_stderr.read( buffsize ) | 
|---|
|  | 215 | if not stderr or len( stderr ) % buffsize != 0: | 
|---|
|  | 216 | break | 
|---|
|  | 217 | except OverflowError: | 
|---|
|  | 218 | pass | 
|---|
|  | 219 | tmp_stderr.close() | 
|---|
|  | 220 | if returncode != 0: | 
|---|
|  | 221 | raise Exception, stderr | 
|---|
|  | 222 | except Exception, e: | 
|---|
|  | 223 | # clean up temp dir | 
|---|
|  | 224 | if os.path.exists( tmp_index_dir ): | 
|---|
|  | 225 | shutil.rmtree( tmp_index_dir ) | 
|---|
|  | 226 | stop_err( 'Error indexing reference sequence\n' + str( e ) ) | 
|---|
|  | 227 | stdout += 'File indexed. ' | 
|---|
|  | 228 | else: | 
|---|
|  | 229 | ref_file_name = options.ref | 
|---|
|  | 230 | # set up aligning and generate aligning command options | 
|---|
|  | 231 | # automatically set threads in both cases | 
|---|
|  | 232 | if options.suppressHeader == 'true': | 
|---|
|  | 233 | suppressHeader = '--sam-nohead' | 
|---|
|  | 234 | else: | 
|---|
|  | 235 | suppressHeader = '' | 
|---|
|  | 236 | if options.maxInsert != 'None' and int( options.maxInsert ) > 0: | 
|---|
|  | 237 | maxInsert = '-X %s' % options.maxInsert | 
|---|
|  | 238 | else: | 
|---|
|  | 239 | maxInsert = '' | 
|---|
|  | 240 | if options.mateOrient != 'None': | 
|---|
|  | 241 | mateOrient = '--%s' % options.mateOrient | 
|---|
|  | 242 | else: | 
|---|
|  | 243 | mateOrient = '' | 
|---|
|  | 244 | if options.params == 'preSet': | 
|---|
|  | 245 | aligning_cmds = '%s %s -p %s -S %s -q %s ' % \ | 
|---|
|  | 246 | ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace ) | 
|---|
|  | 247 | else: | 
|---|
|  | 248 | try: | 
|---|
|  | 249 | if options.skip != 'None' and int( options.skip ) > 0: | 
|---|
|  | 250 | skip = '-s %s' % options.skip | 
|---|
|  | 251 | else: | 
|---|
|  | 252 | skip = '' | 
|---|
|  | 253 | if options.alignLimit != 'None' and int( options.alignLimit ) >= 0: | 
|---|
|  | 254 | alignLimit = '-u %s' % options.alignLimit | 
|---|
|  | 255 | else: | 
|---|
|  | 256 | alignLimit = '' | 
|---|
|  | 257 | if options.trimH != 'None' and int( options.trimH ) > 0: | 
|---|
|  | 258 | trimH = '-5 %s' % options.trimH | 
|---|
|  | 259 | else: | 
|---|
|  | 260 | trimH = '' | 
|---|
|  | 261 | if options.trimL != 'None' and int( options.trimL ) > 0: | 
|---|
|  | 262 | trimL = '-3 %s' % options.trimL | 
|---|
|  | 263 | else: | 
|---|
|  | 264 | trimL = '' | 
|---|
|  | 265 | if options.mismatchSeed != 'None' and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ | 
|---|
|  | 266 | or options.mismatchSeed == '2' or options.mismatchSeed == '3'): | 
|---|
|  | 267 | mismatchSeed = '-n %s' % options.mismatchSeed | 
|---|
|  | 268 | else: | 
|---|
|  | 269 | mismatchSeed = '' | 
|---|
|  | 270 | if options.mismatchQual != 'None' and int( options.mismatchQual ) >= 0: | 
|---|
|  | 271 | mismatchQual = '-e %s' % options.mismatchQual | 
|---|
|  | 272 | else: | 
|---|
|  | 273 | mismatchQual = '' | 
|---|
|  | 274 | if options.seedLen != 'None' and int( options.seedLen ) >= 5: | 
|---|
|  | 275 | seedLen = '-l %s' % options.seedLen | 
|---|
|  | 276 | else: | 
|---|
|  | 277 | seedLen = '' | 
|---|
|  | 278 | if options.rounding == 'noRound': | 
|---|
|  | 279 | rounding = '--nomaqround' | 
|---|
|  | 280 | else: | 
|---|
|  | 281 | rounding = '' | 
|---|
|  | 282 | if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0: | 
|---|
|  | 283 | maqSoapAlign = '-v %s' % options.maqSoapAlign | 
|---|
|  | 284 | else: | 
|---|
|  | 285 | maqSoapAlign = '' | 
|---|
|  | 286 | if options.minInsert != 'None' and int( options.minInsert ) > 0: | 
|---|
|  | 287 | minInsert = '-I %s' % options.minInsert | 
|---|
|  | 288 | else: | 
|---|
|  | 289 | minInsert = '' | 
|---|
|  | 290 | if options.maxAlignAttempt != 'None' and int( options.maxAlignAttempt ) >= 0: | 
|---|
|  | 291 | maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt | 
|---|
|  | 292 | else: | 
|---|
|  | 293 | maxAlignAttempt = '' | 
|---|
|  | 294 | if options.forwardAlign == 'noForward': | 
|---|
|  | 295 | forwardAlign = '--nofw' | 
|---|
|  | 296 | else: | 
|---|
|  | 297 | forwardAlign = '' | 
|---|
|  | 298 | if options.reverseAlign == 'noReverse': | 
|---|
|  | 299 | reverseAlign = '--norc' | 
|---|
|  | 300 | else: | 
|---|
|  | 301 | reverseAlign = '' | 
|---|
|  | 302 | if options.maxBacktracks != 'None' and int( options.maxBacktracks ) > 0 and \ | 
|---|
|  | 303 | ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): | 
|---|
|  | 304 | maxBacktracks = '--maxbts %s' % options.maxBacktracks | 
|---|
|  | 305 | else: | 
|---|
|  | 306 | maxBacktracks = '' | 
|---|
|  | 307 | if options.tryHard == 'doTryHard': | 
|---|
|  | 308 | tryHard = '-y' | 
|---|
|  | 309 | else: | 
|---|
|  | 310 | tryHard = '' | 
|---|
|  | 311 | if options.valAlign != 'None' and int( options.valAlign ) >= 0: | 
|---|
|  | 312 | valAlign = '-k %s' % options.valAlign | 
|---|
|  | 313 | else: | 
|---|
|  | 314 | valAlign = '' | 
|---|
|  | 315 | if options.allValAligns == 'doAllValAligns': | 
|---|
|  | 316 | allValAligns = '-a' | 
|---|
|  | 317 | else: | 
|---|
|  | 318 | allValAligns = '' | 
|---|
|  | 319 | if options.suppressAlign != 'None' and int( options.suppressAlign ) >= 0: | 
|---|
|  | 320 | suppressAlign = '-m %s' % options.suppressAlign | 
|---|
|  | 321 | else: | 
|---|
|  | 322 | suppressAlign = '' | 
|---|
|  | 323 | if options.best == 'doBest': | 
|---|
|  | 324 | best = '--best' | 
|---|
|  | 325 | else: | 
|---|
|  | 326 | best = '' | 
|---|
|  | 327 | if options.strata == 'doStrata': | 
|---|
|  | 328 | strata = '--strata' | 
|---|
|  | 329 | else: | 
|---|
|  | 330 | strata = '' | 
|---|
|  | 331 | if options.offrate != 'None' and int( options.offrate ) >= 0: | 
|---|
|  | 332 | offrate = '-o %s' % options.offrate | 
|---|
|  | 333 | else: | 
|---|
|  | 334 | offrate = '' | 
|---|
|  | 335 | if options.seed != 'None' and int( options.seed ) >= 0: | 
|---|
|  | 336 | seed = '--seed %s' % options.seed | 
|---|
|  | 337 | else: | 
|---|
|  | 338 | seed = '' | 
|---|
|  | 339 | if options.snpphred != 'None' and int( options.snpphred ) >= 0: | 
|---|
|  | 340 | snpphred = '--snpphred %s' % options.snpphred | 
|---|
|  | 341 | else: | 
|---|
|  | 342 | snpphred = '' | 
|---|
|  | 343 | if options.snpfrac != 'None' and float( options.snpfrac ) >= 0: | 
|---|
|  | 344 | snpfrac = '--snpfrac %s' % options.snpfrac | 
|---|
|  | 345 | else: | 
|---|
|  | 346 | snpfrac = '' | 
|---|
|  | 347 | if options.keepends != 'None' and options.keepends == 'doKeepends': | 
|---|
|  | 348 | keepends = '--col-keepends' | 
|---|
|  | 349 | else: | 
|---|
|  | 350 | keepends = '' | 
|---|
|  | 351 | aligning_cmds = '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \ | 
|---|
|  | 352 | '%s %s %s %s %s %s %s %s %s %s %s -p %s -S %s -q' % \ | 
|---|
|  | 353 | ( skip, alignLimit, trimH, trimL, mismatchSeed, mismatchQual, | 
|---|
|  | 354 | seedLen, rounding, maqSoapAlign, minInsert, maxInsert, | 
|---|
|  | 355 | mateOrient, maxAlignAttempt, forwardAlign, reverseAlign, | 
|---|
|  | 356 | maxBacktracks, tryHard, valAlign, allValAligns, suppressAlign, | 
|---|
|  | 357 | best, strata, offrate, seed, colorspace, snpphred, snpfrac, | 
|---|
|  | 358 | keepends, options.threads, suppressHeader ) | 
|---|
|  | 359 | except ValueError, e: | 
|---|
|  | 360 | # clean up temp dir | 
|---|
|  | 361 | if os.path.exists( tmp_index_dir ): | 
|---|
|  | 362 | shutil.rmtree( tmp_index_dir ) | 
|---|
|  | 363 | stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) | 
|---|
|  | 364 | try: | 
|---|
|  | 365 | # have to nest try-except in try-finally to handle 2.4 | 
|---|
|  | 366 | try: | 
|---|
|  | 367 | # prepare actual aligning commands | 
|---|
|  | 368 | if options.paired == 'paired': | 
|---|
|  | 369 | cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) | 
|---|
|  | 370 | else: | 
|---|
|  | 371 | cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) | 
|---|
|  | 372 | # align | 
|---|
|  | 373 | tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | 
|---|
|  | 374 | tmp_stderr = open( tmp, 'wb' ) | 
|---|
|  | 375 | proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | 
|---|
|  | 376 | returncode = proc.wait() | 
|---|
|  | 377 | tmp_stderr.close() | 
|---|
|  | 378 | # get stderr, allowing for case where it's very large | 
|---|
|  | 379 | tmp_stderr = open( tmp, 'rb' ) | 
|---|
|  | 380 | stderr = '' | 
|---|
|  | 381 | buffsize = 1048576 | 
|---|
|  | 382 | try: | 
|---|
|  | 383 | while True: | 
|---|
|  | 384 | stderr += tmp_stderr.read( buffsize ) | 
|---|
|  | 385 | if not stderr or len( stderr ) % buffsize != 0: | 
|---|
|  | 386 | break | 
|---|
|  | 387 | except OverflowError: | 
|---|
|  | 388 | pass | 
|---|
|  | 389 | tmp_stderr.close() | 
|---|
|  | 390 | if returncode != 0: | 
|---|
|  | 391 | raise Exception, stderr | 
|---|
|  | 392 | # check that there are results in the output file | 
|---|
|  | 393 | if os.path.getsize( options.output ) == 0: | 
|---|
|  | 394 | raise Exception, 'The output file is empty, there may be an error with your input file or settings.' + '\nextra: ' + str(extra) | 
|---|
|  | 395 | except Exception, e: | 
|---|
|  | 396 | stop_err( 'Error aligning sequence. ' + str( e ) ) | 
|---|
|  | 397 | finally: | 
|---|
|  | 398 | # clean up temp dir | 
|---|
|  | 399 | if os.path.exists( tmp_index_dir ): | 
|---|
|  | 400 | shutil.rmtree( tmp_index_dir ) | 
|---|
|  | 401 | stdout += 'Sequence file aligned.\n' | 
|---|
|  | 402 | sys.stdout.write( stdout ) | 
|---|
|  | 403 |  | 
|---|
|  | 404 | if __name__=="__main__": __main__() | 
|---|