Context Navigation

fastq_trimmer_by_quality.py

リビジョン 2, 6.6 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	#Dan Blankenberg
2	from optparse import OptionParser
3	from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
4
5	def mean( score_list ):
6	return float( sum( score_list ) ) / float( len( score_list ) )
7
8	ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean }
9
10	def compare( aggregated_value, operator, threshold_value ):
11	if operator == '>':
12	return aggregated_value > threshold_value
13	elif operator == '>=':
14	return aggregated_value >= threshold_value
15	elif operator == '==':
16	return aggregated_value == threshold_value
17	elif operator == '<':
18	return aggregated_value < threshold_value
19	elif operator == '<=':
20	return aggregated_value <= threshold_value
21	elif operator == '!=':
22	return aggregated_value != threshold_value
23
24	def exclude( value_list, exclude_indexes ):
25	rval = []
26	for i, val in enumerate( value_list ):
27	if i not in exclude_indexes:
28	rval.append( val )
29	return rval
30
31	def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ):
32	if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ):
33	return True
34	if exclude_indexes:
35	for exclude_index in exclude_indexes:
36	excluded_list = exclude( aggregate_list, exclude_index )
37	if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ):
38	return True
39	return False
40
41	def main():
42	usage = "usage: %prog [options] input_file output_file"
43	parser = OptionParser( usage=usage )
44	parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
45	parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' )
46	parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' )
47	parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' )
48	parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' )
49	parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' )
50	parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' )
51	parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
52	parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length")
53	( options, args ) = parser.parse_args()
54
55	if len ( args ) != 2:
56	parser.error( "Need to specify an input file and an output file" )
57
58	if options.window_size < 1:
59	parser.error( 'You must specify a strictly positive window size' )
60
61	if options.window_step < 1:
62	parser.error( 'You must specify a strictly positive step size' )
63
64	#determine an exhaustive list of window indexes that can be excluded from aggregation
65	exclude_window_indexes = []
66	last_exclude_indexes = []
67	for exclude_count in range( min( options.exclude_count, options.window_size ) ):
68	if last_exclude_indexes:
69	new_exclude_indexes = []
70	for exclude_list in last_exclude_indexes:
71	for window_index in range( options.window_size ):
72	if window_index not in exclude_list:
73	new_exclude = sorted( exclude_list + [ window_index ] )
74	if new_exclude not in exclude_window_indexes + new_exclude_indexes:
75	new_exclude_indexes.append( new_exclude )
76	exclude_window_indexes += new_exclude_indexes
77	last_exclude_indexes = new_exclude_indexes
78	else:
79	for window_index in range( options.window_size ):
80	last_exclude_indexes.append( [ window_index ] )
81	exclude_window_indexes = list( last_exclude_indexes )
82
83	out = fastqWriter( open( args[1], 'wb' ), format = options.format )
84	action = ACTION_METHODS[ options.aggregation_action ]
85
86	num_reads = None
87	num_reads_excluded = 0
88	for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
89	for trim_end in options.trim_ends:
90	quality_list = fastq_read.get_decimal_quality_scores()
91	if trim_end == '5':
92	lwindow_position = 0 #left position of window
93	while True:
94	if lwindow_position >= len( quality_list ):
95	fastq_read.sequence = ''
96	fastq_read.quality = ''
97	break
98	if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ):
99	fastq_read = fastq_read.slice( lwindow_position, None )
100	break
101	lwindow_position += options.window_step
102	else:
103	rwindow_position = len( quality_list ) #right position of window
104	while True:
105	lwindow_position = rwindow_position - options.window_size #left position of window
106	if rwindow_position <= 0 or lwindow_position < 0:
107	fastq_read.sequence = ''
108	fastq_read.quality = ''
109	break
110	if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ):
111	fastq_read = fastq_read.slice( None, rwindow_position )
112	break
113	rwindow_position -= options.window_step
114	if options.keep_zero_length or len( fastq_read ):
115	out.write( fastq_read )
116	else:
117	num_reads_excluded += 1
118	out.close()
119	if num_reads is None:
120	print "No valid FASTQ reads could be processed."
121	else:
122	print "%i FASTQ reads were processed." % ( num_reads + 1 )
123	if num_reads_excluded:
124	print "%i reads of zero length were excluded from the output." % num_reads_excluded
125
126	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/fastq/fastq_trimmer_by_quality.py

異なるフォーマットでダウンロード: