root/galaxy-central/tools/peak_calling/macs_wrapper.py

リビジョン 2, 7.0 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip
2from galaxy import eggs
3import pkg_resources
4pkg_resources.require( "simplejson" )
5import simplejson
6
7CHUNK_SIZE = 1024
8ERROR_LINES = [ 'ERROR', 'command not found' ] #'Fatal' errors that should be passed back to stderr
9WARNING_LINE_START = 'WARNING @'
10
11def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
12    out = open( target_filename, 'wb' )
13    for filename in glob.glob( glob_path ):
14        fh = gzip.open( filename, 'rb' )
15        while True:
16            data = fh.read( CHUNK_SIZE )
17            if data:
18                out.write( data )
19            else:
20                break
21        fh.close()
22        if delete:
23            os.unlink( filename )
24    out.close()
25
26def xls_to_interval( xls_file, interval_file, header = None ):
27    out = open( interval_file, 'wb' )
28    if header:
29        out.write( '#%s\n' % header )
30    wrote_header = False
31    #From macs readme: Coordinates in XLS is 1-based which is different with BED format.
32    for line in open( xls_file ):
33        #keep all existing comment lines
34        if line.startswith( '#' ):
35            out.write( line )
36        elif not wrote_header:
37            out.write( '#%s' % line )
38            wrote_header = True
39        else:
40            fields = line.split( '\t' )
41            if len( fields ) > 1:
42                fields[1] = str( int( fields[1] ) - 1 )
43            out.write( '\t'.join( fields ) )
44    out.close()
45
46def main():
47    options = simplejson.load( open( sys.argv[1] ) )
48    output_bed = sys.argv[2]
49    output_extra_html = sys.argv[3]
50    output_extra_path = sys.argv[4]
51   
52    experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
53    cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
54    if options['input_control']:
55        cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
56    cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
57    if 'wig' in options:
58        wigextend = int( options['wig']['wigextend']  )
59        if wigextend >= 0:
60            wigextend = "--wigextend='%s'" % wigextend
61        else:
62            wigextend = ''
63        cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
64    if 'nomodel' in options:
65        cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
66    if 'diag' in options:
67        cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
68   
69    tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
70    stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
71    proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
72    proc.wait()
73   
74    #Need to lightly parse stderr file to see if there is a fatal error (e.g. macs is not installed on system)
75    #We don't want to set tool run to error state if only warnings or info, e.g. mfold needs to be decreased, rather create empty outputs, but let user view macs log
76    for line in open( stderr_name ):
77        for err_text in ERROR_LINES:
78            if err_text in line:
79                #print error, but don't quit, allow cleanup to occur at end
80                print >> sys.stderr, line.rstrip( '\n\r' )
81        if line.startswith( WARNING_LINE_START ):
82            #print warnings so they are viewable from info
83            print line.split( ':' )[-1].strip()
84   
85    #run R to create pdf from model script
86    if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
87        cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
88        proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
89        proc.wait()
90   
91   
92    #move bed out to proper output file
93    created_bed_name =  os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
94    if os.path.exists( created_bed_name ):
95        shutil.move( created_bed_name, output_bed )
96   
97    #parse xls files to interval files as needed
98    if options['xls_to_interval']:
99        create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
100        if os.path.exists( create_peak_xls_file ):
101            xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
102        create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
103        if os.path.exists( create_peak_xls_file ):
104            xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
105   
106    #merge and move wig files as needed, delete gz'd files and remove emptied dirs
107    if 'wig' in options:
108        wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
109        if os.path.exists( wig_base_dir ):
110            #treatment
111            treatment_dir = os.path.join( wig_base_dir, "treat" )
112            if os.path.exists( treatment_dir ):
113                gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
114                os.rmdir( treatment_dir )
115                #control
116                if options['input_control']:
117                    control_dir = os.path.join( wig_base_dir, "control" )
118                    if os.path.exists( control_dir ):
119                        gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
120                        os.rmdir( control_dir )
121            os.rmdir( wig_base_dir )
122   
123    #move all remaining files to extra files path of html file output to allow user download
124    out_html = open( output_extra_html, 'wb' )
125    out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
126    os.mkdir( output_extra_path )
127    for filename in sorted( os.listdir( tmp_dir ) ):
128        shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
129        out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
130    out_html.write( '</ul></p>\n' )
131    out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
132    out_html.write( '</body></html>\n' )
133    out_html.close()
134   
135    os.unlink( stderr_name )
136    os.rmdir( tmp_dir )
137
138if __name__ == "__main__": main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。