1 | import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip |
---|
2 | from galaxy import eggs |
---|
3 | import pkg_resources |
---|
4 | pkg_resources.require( "simplejson" ) |
---|
5 | import simplejson |
---|
6 | |
---|
7 | CHUNK_SIZE = 1024 |
---|
8 | ERROR_LINES = [ 'ERROR', 'command not found' ] #'Fatal' errors that should be passed back to stderr |
---|
9 | WARNING_LINE_START = 'WARNING @' |
---|
10 | |
---|
11 | def gunzip_cat_glob_path( glob_path, target_filename, delete = False ): |
---|
12 | out = open( target_filename, 'wb' ) |
---|
13 | for filename in glob.glob( glob_path ): |
---|
14 | fh = gzip.open( filename, 'rb' ) |
---|
15 | while True: |
---|
16 | data = fh.read( CHUNK_SIZE ) |
---|
17 | if data: |
---|
18 | out.write( data ) |
---|
19 | else: |
---|
20 | break |
---|
21 | fh.close() |
---|
22 | if delete: |
---|
23 | os.unlink( filename ) |
---|
24 | out.close() |
---|
25 | |
---|
26 | def xls_to_interval( xls_file, interval_file, header = None ): |
---|
27 | out = open( interval_file, 'wb' ) |
---|
28 | if header: |
---|
29 | out.write( '#%s\n' % header ) |
---|
30 | wrote_header = False |
---|
31 | #From macs readme: Coordinates in XLS is 1-based which is different with BED format. |
---|
32 | for line in open( xls_file ): |
---|
33 | #keep all existing comment lines |
---|
34 | if line.startswith( '#' ): |
---|
35 | out.write( line ) |
---|
36 | elif not wrote_header: |
---|
37 | out.write( '#%s' % line ) |
---|
38 | wrote_header = True |
---|
39 | else: |
---|
40 | fields = line.split( '\t' ) |
---|
41 | if len( fields ) > 1: |
---|
42 | fields[1] = str( int( fields[1] ) - 1 ) |
---|
43 | out.write( '\t'.join( fields ) ) |
---|
44 | out.close() |
---|
45 | |
---|
46 | def main(): |
---|
47 | options = simplejson.load( open( sys.argv[1] ) ) |
---|
48 | output_bed = sys.argv[2] |
---|
49 | output_extra_html = sys.argv[3] |
---|
50 | output_extra_path = sys.argv[4] |
---|
51 | |
---|
52 | experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier |
---|
53 | cmdline = "macs -t %s" % ",".join( options['input_chipseq'] ) |
---|
54 | if options['input_control']: |
---|
55 | cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) ) |
---|
56 | cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] ) |
---|
57 | if 'wig' in options: |
---|
58 | wigextend = int( options['wig']['wigextend'] ) |
---|
59 | if wigextend >= 0: |
---|
60 | wigextend = "--wigextend='%s'" % wigextend |
---|
61 | else: |
---|
62 | wigextend = '' |
---|
63 | cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] ) |
---|
64 | if 'nomodel' in options: |
---|
65 | cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) |
---|
66 | if 'diag' in options: |
---|
67 | cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] ) |
---|
68 | |
---|
69 | tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user |
---|
70 | stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report |
---|
71 | proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) ) |
---|
72 | proc.wait() |
---|
73 | |
---|
74 | #Need to lightly parse stderr file to see if there is a fatal error (e.g. macs is not installed on system) |
---|
75 | #We don't want to set tool run to error state if only warnings or info, e.g. mfold needs to be decreased, rather create empty outputs, but let user view macs log |
---|
76 | for line in open( stderr_name ): |
---|
77 | for err_text in ERROR_LINES: |
---|
78 | if err_text in line: |
---|
79 | #print error, but don't quit, allow cleanup to occur at end |
---|
80 | print >> sys.stderr, line.rstrip( '\n\r' ) |
---|
81 | if line.startswith( WARNING_LINE_START ): |
---|
82 | #print warnings so they are viewable from info |
---|
83 | print line.split( ':' )[-1].strip() |
---|
84 | |
---|
85 | #run R to create pdf from model script |
---|
86 | if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): |
---|
87 | cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) |
---|
88 | proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) |
---|
89 | proc.wait() |
---|
90 | |
---|
91 | |
---|
92 | #move bed out to proper output file |
---|
93 | created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) |
---|
94 | if os.path.exists( created_bed_name ): |
---|
95 | shutil.move( created_bed_name, output_bed ) |
---|
96 | |
---|
97 | #parse xls files to interval files as needed |
---|
98 | if options['xls_to_interval']: |
---|
99 | create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) |
---|
100 | if os.path.exists( create_peak_xls_file ): |
---|
101 | xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' ) |
---|
102 | create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) |
---|
103 | if os.path.exists( create_peak_xls_file ): |
---|
104 | xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' ) |
---|
105 | |
---|
106 | #merge and move wig files as needed, delete gz'd files and remove emptied dirs |
---|
107 | if 'wig' in options: |
---|
108 | wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name ) |
---|
109 | if os.path.exists( wig_base_dir ): |
---|
110 | #treatment |
---|
111 | treatment_dir = os.path.join( wig_base_dir, "treat" ) |
---|
112 | if os.path.exists( treatment_dir ): |
---|
113 | gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True ) |
---|
114 | os.rmdir( treatment_dir ) |
---|
115 | #control |
---|
116 | if options['input_control']: |
---|
117 | control_dir = os.path.join( wig_base_dir, "control" ) |
---|
118 | if os.path.exists( control_dir ): |
---|
119 | gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True ) |
---|
120 | os.rmdir( control_dir ) |
---|
121 | os.rmdir( wig_base_dir ) |
---|
122 | |
---|
123 | #move all remaining files to extra files path of html file output to allow user download |
---|
124 | out_html = open( output_extra_html, 'wb' ) |
---|
125 | out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name ) |
---|
126 | os.mkdir( output_extra_path ) |
---|
127 | for filename in sorted( os.listdir( tmp_dir ) ): |
---|
128 | shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) ) |
---|
129 | out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) ) |
---|
130 | out_html.write( '</ul></p>\n' ) |
---|
131 | out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() ) |
---|
132 | out_html.write( '</body></html>\n' ) |
---|
133 | out_html.close() |
---|
134 | |
---|
135 | os.unlink( stderr_name ) |
---|
136 | os.rmdir( tmp_dir ) |
---|
137 | |
---|
138 | if __name__ == "__main__": main() |
---|