Context Navigation

megablast_wrapper.py

リビジョン 2, 4.7 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	#!/usr/bin/env python
2	"""
3	run megablast for metagenomics data
4
5	usage: %prog [options]
6	-d, --db_build=d: The database to use
7	-i, --input=i: Input FASTQ candidate file
8	-w, --word_size=w: Size of best perfect match
9	-c, --identity_cutoff=c: Report hits at or above this identity
10	-e, --eval_cutoff=e: Expectation value cutoff
11	-f, --filter_query=f: Filter out low complexity regions
12	-x, --index_dir=x: Data index directory
13	-o, --output=o: Output file
14
15	usage: %prog db_build input_file word_size identity_cutoff eval_cutoff filter_query index_dir output_file
16	"""
17
18	import os, subprocess, sys, tempfile
19	from galaxy import eggs
20	import pkg_resources; pkg_resources.require( "bx-python" )
21	from bx.cookbook import doc_optparse
22
23	assert sys.version_info[:2] >= ( 2, 4 )
24
25	def stop_err( msg ):
26	sys.stderr.write( "%s\n" % msg )
27	sys.exit()
28
29	def __main__():
30	#Parse Command Line
31	options, args = doc_optparse.parse( __doc__ )
32
33	db_build = options.db_build
34	query_filename = options.input.strip()
35	output_filename = options.output.strip()
36	mega_word_size = options.word_size # -W
37	mega_iden_cutoff = options.identity_cutoff # -p
38	mega_evalue_cutoff = options.eval_cutoff # -e
39	mega_temp_output = tempfile.NamedTemporaryFile().name
40	mega_filter = options.filter_query # -F
41	GALAXY_DATA_INDEX_DIR = options.index_dir
42	DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR
43
44	# megablast parameters
45	try:
46	int( mega_word_size )
47	except:
48	stop_err( 'Invalid value for word size' )
49	try:
50	float(mega_iden_cutoff)
51	except:
52	stop_err( 'Invalid value for identity cut-off' )
53	try:
54	float(mega_evalue_cutoff)
55	except:
56	stop_err( 'Invalid value for Expectation value' )
57
58	# prepare the database
59	db = {}
60	for i, line in enumerate( file( DB_LOC ) ):
61	line = line.rstrip( '\r\n' )
62	if not line or line.startswith( '#' ):
63	continue
64	fields = line.split( '\t' )
65	db[ fields[0] ] = fields[1]
66
67	if not db.has_key( db_build ):
68	stop_err( 'Cannot locate the target database. Please check your location file.' )
69
70	# arguments for megablast
71	chunk = db[ ( db_build ) ]
72	megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null " \
73	% ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter )
74
75	print megablast_command
76
77	tmp = tempfile.NamedTemporaryFile().name
78	try:
79	tmp_stderr = open( tmp, 'wb' )
80	proc = subprocess.Popen( args=megablast_command, shell=True, stderr=tmp_stderr.fileno() )
81	returncode = proc.wait()
82	tmp_stderr.close()
83	# get stderr, allowing for case where it's very large
84	tmp_stderr = open( tmp, 'rb' )
85	stderr = ''
86	buffsize = 1048576
87	try:
88	while True:
89	stderr += tmp_stderr.read( buffsize )
90	if not stderr or len( stderr ) % buffsize != 0:
91	break
92	except OverflowError:
93	pass
94	tmp_stderr.close()
95	if returncode != 0:
96	raise Exception, stderr
97	if os.path.exists( tmp ):
98	os.unlink( tmp )
99	except Exception, e:
100	if os.path.exists( mega_temp_output ):
101	os.unlink( mega_temp_output )
102	if os.path.exists( tmp ):
103	os.unlink( tmp )
104	stop_err( 'Error indexing reference sequence. ' + str( e ) )
105
106	output = open( output_filename, 'w' )
107	invalid_lines = 0
108	for i, line in enumerate( file( mega_temp_output ) ):
109	line = line.rstrip( '\r\n' )
110	fields = line.split()
111	try:
112	# get gi and length of that gi seq
113	gi, gi_len = fields[1].split( '_' )
114	# convert the last column (causing problem in filter tool) to float
115	fields[-1] = float( fields[-1] )
116	new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] )
117	except:
118	new_line = line
119	invalid_lines += 1
120	output.write( "%s\n" % new_line )
121	output.close()
122
123	if os.path.exists( mega_temp_output ):
124	os.unlink( mega_temp_output ) #remove the tempfile that we just reformatted the contents of
125
126	if invalid_lines:
127	print "Unable to parse %d lines. Keep the default format." % invalid_lines
128
129	# megablast generates a file called error.log, if empty, delete it, if not, show the contents
130	if os.path.exists( './error.log' ):
131	for i, line in enumerate( file( './error.log' ) ):
132	line = line.rstrip( '\r\n' )
133	print line
134	os.remove( './error.log' )
135
136	if __name__ == "__main__" : __main__()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/metag_tools/megablast_wrapper.py

異なるフォーマットでダウンロード: