Context Navigation

column_maker.py

リビジョン 2, 4.4 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	#!/usr/bin/env python
2	# This tool takes a tab-delimited textfile as input and creates another column in the file which is the result of
3	# a computation performed on every row in the original file. The tool will skip over invalid lines within the file,
4	# informing the user about the number of lines skipped.
5	import sys, re, os.path
6	from galaxy import eggs
7	from galaxy.tools import validation
8	from galaxy.datatypes import metadata
9	from math import log,exp,sqrt,ceil,floor
10
11	assert sys.version_info[:2] >= ( 2, 4 )
12
13	def stop_err( msg ):
14	sys.stderr.write( msg )
15	sys.exit()
16
17	inp_file = sys.argv[1]
18	out_file = sys.argv[2]
19	expr = sys.argv[3]
20	round_result = sys.argv[4]
21	try:
22	in_columns = int( sys.argv[5] )
23	except:
24	stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data." )
25	if in_columns < 2:
26	# To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
27	stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data." )
28	try:
29	in_column_types = sys.argv[6].split( ',' )
30	except:
31	stop_err( "Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data." )
32	if len( in_column_types ) != in_columns:
33	stop_err( "The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data." )
34
35	# Unescape if input has been escaped
36	mapped_str = {
37	'__lt__': '<',
38	'__le__': '<=',
39	'__eq__': '==',
40	'__ne__': '!=',
41	'__gt__': '>',
42	'__ge__': '>=',
43	'__sq__': '\'',
44	'__dq__': '"',
45	}
46	for key, value in mapped_str.items():
47	expr = expr.replace( key, value )
48
49	# Prepare the column variable names and wrappers for column data types
50	cols, type_casts = [], []
51	for col in range( 1, in_columns + 1 ):
52	col_name = "c%d" % col
53	cols.append( col_name )
54	col_type = in_column_types[ col - 1 ].strip()
55	if round_result == 'no' and col_type == 'int':
56	col_type = 'float'
57	type_cast = "%s(%s)" % ( col_type, col_name )
58	type_casts.append( type_cast )
59
60	col_str = ', '.join( cols ) # 'c1, c2, c3, c4'
61	type_cast_str = ', '.join( type_casts ) # 'str(c1), int(c2), int(c3), str(c4)'
62	assign = "%s = line.split( '\\t' )" % col_str
63	wrap = "%s = %s" % ( col_str, type_cast_str )
64	skipped_lines = 0
65	first_invalid_line = 0
66	invalid_line = None
67	lines_kept = 0
68	total_lines = 0
69	out = open( out_file, 'wt' )
70
71	# Read input file, skipping invalid lines, and perform computation that will result in a new column
72	code = '''
73	for i, line in enumerate( file( inp_file ) ):
74	total_lines += 1
75	line = line.rstrip( '\\r\\n' )
76	if not line or line.startswith( '#' ):
77	skipped_lines += 1
78	if not invalid_line:
79	first_invalid_line = i + 1
80	invalid_line = line
81	continue
82	try:
83	%s
84	%s
85	new_val = %s
86	if round_result == "yes":
87	new_val = int( round( new_val ) )
88	new_line = line + '\\t' + str( new_val )
89	print >> out, new_line
90	lines_kept += 1
91	except:
92	skipped_lines += 1
93	if not invalid_line:
94	first_invalid_line = i + 1
95	invalid_line = line
96	''' % ( assign, wrap, expr )
97
98	valid_expr = True
99	try:
100	exec code
101	except Exception, e:
102	out.close()
103	if str( e ).startswith( 'invalid syntax' ):
104	valid_expr = False
105	stop_err( 'Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr )
106	else:
107	stop_err( str( e ) )
108
109	if valid_expr:
110	out.close()
111	valid_lines = total_lines - skipped_lines
112	print 'Creating column %d with expression %s' % ( in_columns + 1, expr )
113	if valid_lines > 0:
114	print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
115	else:
116	print 'Possible invalid expression "%s" or non-existent column referenced. See tool tips, syntax and examples.' % expr
117	if skipped_lines > 0:
118	print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/stats/column_maker.py

異なるフォーマットでダウンロード: