Context Navigation

macs_wrapper.py

リビジョン 2, 7.0 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip
2	from galaxy import eggs
3	import pkg_resources
4	pkg_resources.require( "simplejson" )
5	import simplejson
6
7	CHUNK_SIZE = 1024
8	ERROR_LINES = [ 'ERROR', 'command not found' ] #'Fatal' errors that should be passed back to stderr
9	WARNING_LINE_START = 'WARNING @'
10
11	def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
12	out = open( target_filename, 'wb' )
13	for filename in glob.glob( glob_path ):
14	fh = gzip.open( filename, 'rb' )
15	while True:
16	data = fh.read( CHUNK_SIZE )
17	if data:
18	out.write( data )
19	else:
20	break
21	fh.close()
22	if delete:
23	os.unlink( filename )
24	out.close()
25
26	def xls_to_interval( xls_file, interval_file, header = None ):
27	out = open( interval_file, 'wb' )
28	if header:
29	out.write( '#%s\n' % header )
30	wrote_header = False
31	#From macs readme: Coordinates in XLS is 1-based which is different with BED format.
32	for line in open( xls_file ):
33	#keep all existing comment lines
34	if line.startswith( '#' ):
35	out.write( line )
36	elif not wrote_header:
37	out.write( '#%s' % line )
38	wrote_header = True
39	else:
40	fields = line.split( '\t' )
41	if len( fields ) > 1:
42	fields[1] = str( int( fields[1] ) - 1 )
43	out.write( '\t'.join( fields ) )
44	out.close()
45
46	def main():
47	options = simplejson.load( open( sys.argv[1] ) )
48	output_bed = sys.argv[2]
49	output_extra_html = sys.argv[3]
50	output_extra_path = sys.argv[4]
51
52	experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
53	cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
54	if options['input_control']:
55	cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
56	cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
57	if 'wig' in options:
58	wigextend = int( options['wig']['wigextend'] )
59	if wigextend >= 0:
60	wigextend = "--wigextend='%s'" % wigextend
61	else:
62	wigextend = ''
63	cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
64	if 'nomodel' in options:
65	cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
66	if 'diag' in options:
67	cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
68
69	tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
70	stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
71	proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
72	proc.wait()
73
74	#Need to lightly parse stderr file to see if there is a fatal error (e.g. macs is not installed on system)
75	#We don't want to set tool run to error state if only warnings or info, e.g. mfold needs to be decreased, rather create empty outputs, but let user view macs log
76	for line in open( stderr_name ):
77	for err_text in ERROR_LINES:
78	if err_text in line:
79	#print error, but don't quit, allow cleanup to occur at end
80	print >> sys.stderr, line.rstrip( '\n\r' )
81	if line.startswith( WARNING_LINE_START ):
82	#print warnings so they are viewable from info
83	print line.split( ':' )[-1].strip()
84
85	#run R to create pdf from model script
86	if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
87	cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
88	proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
89	proc.wait()
90
91
92	#move bed out to proper output file
93	created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
94	if os.path.exists( created_bed_name ):
95	shutil.move( created_bed_name, output_bed )
96
97	#parse xls files to interval files as needed
98	if options['xls_to_interval']:
99	create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
100	if os.path.exists( create_peak_xls_file ):
101	xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
102	create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
103	if os.path.exists( create_peak_xls_file ):
104	xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
105
106	#merge and move wig files as needed, delete gz'd files and remove emptied dirs
107	if 'wig' in options:
108	wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
109	if os.path.exists( wig_base_dir ):
110	#treatment
111	treatment_dir = os.path.join( wig_base_dir, "treat" )
112	if os.path.exists( treatment_dir ):
113	gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
114	os.rmdir( treatment_dir )
115	#control
116	if options['input_control']:
117	control_dir = os.path.join( wig_base_dir, "control" )
118	if os.path.exists( control_dir ):
119	gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
120	os.rmdir( control_dir )
121	os.rmdir( wig_base_dir )
122
123	#move all remaining files to extra files path of html file output to allow user download
124	out_html = open( output_extra_html, 'wb' )
125	out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
126	os.mkdir( output_extra_path )
127	for filename in sorted( os.listdir( tmp_dir ) ):
128	shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
129	out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
130	out_html.write( '</ul></p>\n' )
131	out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
132	out_html.write( '</body></html>\n' )
133	out_html.close()
134
135	os.unlink( stderr_name )
136	os.rmdir( tmp_dir )
137
138	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/peak_calling/macs_wrapper.py

異なるフォーマットでダウンロード: