root/galaxy-central/tools/samtools/sam_pileup.py @ 3

リビジョン 2, 5.8 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3"""
4Creates a pileup file from a bam file and a reference.
5
6usage: %prog [options]
7   -p, --input1=p: bam file
8   -o, --output1=o: Output pileup
9   -R, --ref=R: Reference file type
10   -n, --ownFile=n: User-supplied fasta reference file
11   -d, --dbkey=d: dbkey of user-supplied file
12   -x, --indexDir=x: Index directory
13   -b, --bamIndex=b: BAM index file
14   -s, --lastCol=s: Print the mapping quality as the last column
15   -i, --indels=i: Only output lines containing indels
16   -M, --mapCap=M: Cap mapping quality
17   -c, --consensus=c: Call the consensus sequence using MAQ consensu model
18   -T, --theta=T: Theta paramter (error dependency coefficient)
19   -N, --hapNum=N: Number of haplotypes in sample
20   -r, --fraction=r: Expected fraction of differences between a pair of haplotypes
21   -I, --phredProb=I: Phred probability of an indel in sequencing/prep
22
23"""
24
25import os, shutil, subprocess, sys, tempfile
26from galaxy import eggs
27import pkg_resources; pkg_resources.require( "bx-python" )
28from bx.cookbook import doc_optparse
29
30def stop_err( msg ):
31    sys.stderr.write( '%s\n' % msg )
32    sys.exit()
33
34def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
35    seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
36    seqPath = ''
37    for line in open( seqFile ):
38        line = line.rstrip( '\r\n' )
39        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
40            fields = line.split( '\t' )
41            if len( fields ) < 3:
42                continue
43            if fields[1] == dbkey:
44                seqPath = fields[2].strip()
45                break
46    return seqPath
47
48def __main__():
49    #Parse Command Line
50    options, args = doc_optparse.parse( __doc__ )
51    seqPath = check_seq_file( options.dbkey, options.indexDir )
52    #prepare file names
53    tmpDir = tempfile.mkdtemp()
54    tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir )
55    tmpf0_name = tmpf0.name
56    tmpf0.close()
57    tmpf0bam_name = '%s.bam' % tmpf0_name
58    tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
59    tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
60    tmpf1_name = tmpf1.name
61    tmpf1.close()
62    tmpf1fai_name = '%s.fai' % tmpf1_name
63    #link bam and bam index to working directory (can't move because need to leave original)
64    os.symlink( options.input1, tmpf0bam_name )
65    os.symlink( options.bamIndex, tmpf0bambai_name )
66    #get parameters for pileup command
67    if options.lastCol == 'yes':
68        lastCol = '-s'
69    else:
70        lastCol = ''
71    if options.indels == 'yes':
72        indels = '-i'
73    else:
74        indels = ''
75    opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap )
76    if options.consensus == 'yes':
77        opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb )
78    #prepare basic pileup command
79    cmd = 'samtools pileup %s -f %s %s > %s'
80    try:
81        # have to nest try-except in try-finally to handle 2.4
82        try:
83            #index reference if necessary and prepare pileup command
84            if options.ref == 'indexed':
85                if not os.path.exists( "%s.fai" % seqPath ):
86                    raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
87                cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 )
88            elif options.ref == 'history':
89                os.symlink( options.ownFile, tmpf1_name )
90                cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
91                tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
92                tmp_stderr = open( tmp, 'wb' )
93                proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
94                returncode = proc.wait()
95                tmp_stderr.close()
96                # get stderr, allowing for case where it's very large
97                tmp_stderr = open( tmp, 'rb' )
98                stderr = ''
99                buffsize = 1048576
100                try:
101                    while True:
102                        stderr += tmp_stderr.read( buffsize )
103                        if not stderr or len( stderr ) % buffsize != 0:
104                            break
105                except OverflowError:
106                    pass
107                tmp_stderr.close()
108                #did index succeed?
109                if returncode != 0:
110                    raise Exception, 'Error creating index file\n' + stderr
111                cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 )
112            #perform pileup command
113            tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
114            tmp_stderr = open( tmp, 'wb' )
115            proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
116            returncode = proc.wait()
117            tmp_stderr.close()
118            #did it succeed?
119            # get stderr, allowing for case where it's very large
120            tmp_stderr = open( tmp, 'rb' )
121            stderr = ''
122            buffsize = 1048576
123            try:
124                while True:
125                    stderr += tmp_stderr.read( buffsize )
126                    if not stderr or len( stderr ) % buffsize != 0:
127                        break
128            except OverflowError:
129                pass
130            tmp_stderr.close()
131            if returncode != 0:
132                raise Exception, stderr
133        except Exception, e:
134            stop_err( 'Error running Samtools pileup tool\n' + str( e ) )
135    finally:
136        #clean up temp files
137        if os.path.exists( tmpDir ):
138            shutil.rmtree( tmpDir )
139    # check that there are results in the output file
140    if os.path.getsize( options.output1 ) > 0:
141        sys.stdout.write( 'Converted BAM to pileup' )
142    else:
143        stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
144
145if __name__ == "__main__" : __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。