1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | """ |
---|
4 | Creates a pileup file from a bam file and a reference. |
---|
5 | |
---|
6 | usage: %prog [options] |
---|
7 | -p, --input1=p: bam file |
---|
8 | -o, --output1=o: Output pileup |
---|
9 | -R, --ref=R: Reference file type |
---|
10 | -n, --ownFile=n: User-supplied fasta reference file |
---|
11 | -d, --dbkey=d: dbkey of user-supplied file |
---|
12 | -x, --indexDir=x: Index directory |
---|
13 | -b, --bamIndex=b: BAM index file |
---|
14 | -s, --lastCol=s: Print the mapping quality as the last column |
---|
15 | -i, --indels=i: Only output lines containing indels |
---|
16 | -M, --mapCap=M: Cap mapping quality |
---|
17 | -c, --consensus=c: Call the consensus sequence using MAQ consensu model |
---|
18 | -T, --theta=T: Theta paramter (error dependency coefficient) |
---|
19 | -N, --hapNum=N: Number of haplotypes in sample |
---|
20 | -r, --fraction=r: Expected fraction of differences between a pair of haplotypes |
---|
21 | -I, --phredProb=I: Phred probability of an indel in sequencing/prep |
---|
22 | |
---|
23 | """ |
---|
24 | |
---|
25 | import os, shutil, subprocess, sys, tempfile |
---|
26 | from galaxy import eggs |
---|
27 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
28 | from bx.cookbook import doc_optparse |
---|
29 | |
---|
30 | def stop_err( msg ): |
---|
31 | sys.stderr.write( '%s\n' % msg ) |
---|
32 | sys.exit() |
---|
33 | |
---|
34 | def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): |
---|
35 | seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR |
---|
36 | seqPath = '' |
---|
37 | for line in open( seqFile ): |
---|
38 | line = line.rstrip( '\r\n' ) |
---|
39 | if line and not line.startswith( '#' ) and line.startswith( 'index' ): |
---|
40 | fields = line.split( '\t' ) |
---|
41 | if len( fields ) < 3: |
---|
42 | continue |
---|
43 | if fields[1] == dbkey: |
---|
44 | seqPath = fields[2].strip() |
---|
45 | break |
---|
46 | return seqPath |
---|
47 | |
---|
48 | def __main__(): |
---|
49 | #Parse Command Line |
---|
50 | options, args = doc_optparse.parse( __doc__ ) |
---|
51 | seqPath = check_seq_file( options.dbkey, options.indexDir ) |
---|
52 | #prepare file names |
---|
53 | tmpDir = tempfile.mkdtemp() |
---|
54 | tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir ) |
---|
55 | tmpf0_name = tmpf0.name |
---|
56 | tmpf0.close() |
---|
57 | tmpf0bam_name = '%s.bam' % tmpf0_name |
---|
58 | tmpf0bambai_name = '%s.bam.bai' % tmpf0_name |
---|
59 | tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) |
---|
60 | tmpf1_name = tmpf1.name |
---|
61 | tmpf1.close() |
---|
62 | tmpf1fai_name = '%s.fai' % tmpf1_name |
---|
63 | #link bam and bam index to working directory (can't move because need to leave original) |
---|
64 | os.symlink( options.input1, tmpf0bam_name ) |
---|
65 | os.symlink( options.bamIndex, tmpf0bambai_name ) |
---|
66 | #get parameters for pileup command |
---|
67 | if options.lastCol == 'yes': |
---|
68 | lastCol = '-s' |
---|
69 | else: |
---|
70 | lastCol = '' |
---|
71 | if options.indels == 'yes': |
---|
72 | indels = '-i' |
---|
73 | else: |
---|
74 | indels = '' |
---|
75 | opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap ) |
---|
76 | if options.consensus == 'yes': |
---|
77 | opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb ) |
---|
78 | #prepare basic pileup command |
---|
79 | cmd = 'samtools pileup %s -f %s %s > %s' |
---|
80 | try: |
---|
81 | # have to nest try-except in try-finally to handle 2.4 |
---|
82 | try: |
---|
83 | #index reference if necessary and prepare pileup command |
---|
84 | if options.ref == 'indexed': |
---|
85 | if not os.path.exists( "%s.fai" % seqPath ): |
---|
86 | raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey |
---|
87 | cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) |
---|
88 | elif options.ref == 'history': |
---|
89 | os.symlink( options.ownFile, tmpf1_name ) |
---|
90 | cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) |
---|
91 | tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name |
---|
92 | tmp_stderr = open( tmp, 'wb' ) |
---|
93 | proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) |
---|
94 | returncode = proc.wait() |
---|
95 | tmp_stderr.close() |
---|
96 | # get stderr, allowing for case where it's very large |
---|
97 | tmp_stderr = open( tmp, 'rb' ) |
---|
98 | stderr = '' |
---|
99 | buffsize = 1048576 |
---|
100 | try: |
---|
101 | while True: |
---|
102 | stderr += tmp_stderr.read( buffsize ) |
---|
103 | if not stderr or len( stderr ) % buffsize != 0: |
---|
104 | break |
---|
105 | except OverflowError: |
---|
106 | pass |
---|
107 | tmp_stderr.close() |
---|
108 | #did index succeed? |
---|
109 | if returncode != 0: |
---|
110 | raise Exception, 'Error creating index file\n' + stderr |
---|
111 | cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) |
---|
112 | #perform pileup command |
---|
113 | tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name |
---|
114 | tmp_stderr = open( tmp, 'wb' ) |
---|
115 | proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) |
---|
116 | returncode = proc.wait() |
---|
117 | tmp_stderr.close() |
---|
118 | #did it succeed? |
---|
119 | # get stderr, allowing for case where it's very large |
---|
120 | tmp_stderr = open( tmp, 'rb' ) |
---|
121 | stderr = '' |
---|
122 | buffsize = 1048576 |
---|
123 | try: |
---|
124 | while True: |
---|
125 | stderr += tmp_stderr.read( buffsize ) |
---|
126 | if not stderr or len( stderr ) % buffsize != 0: |
---|
127 | break |
---|
128 | except OverflowError: |
---|
129 | pass |
---|
130 | tmp_stderr.close() |
---|
131 | if returncode != 0: |
---|
132 | raise Exception, stderr |
---|
133 | except Exception, e: |
---|
134 | stop_err( 'Error running Samtools pileup tool\n' + str( e ) ) |
---|
135 | finally: |
---|
136 | #clean up temp files |
---|
137 | if os.path.exists( tmpDir ): |
---|
138 | shutil.rmtree( tmpDir ) |
---|
139 | # check that there are results in the output file |
---|
140 | if os.path.getsize( options.output1 ) > 0: |
---|
141 | sys.stdout.write( 'Converted BAM to pileup' ) |
---|
142 | else: |
---|
143 | stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) |
---|
144 | |
---|
145 | if __name__ == "__main__" : __main__() |
---|