root/galaxy-central/tools/stats/ @ 2

リビジョン 2, 4.4 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

1#!/usr/bin/env python
2# This tool takes a tab-delimited textfile as input and creates another column in the file which is the result of
3# a computation performed on every row in the original file.  The tool will skip over invalid lines within the file,
4# informing the user about the number of lines skipped. 
5import sys, re, os.path
6from galaxy import eggs
7from import validation
8from galaxy.datatypes import metadata
9from math import log,exp,sqrt,ceil,floor
11assert sys.version_info[:2] >= ( 2, 4 )
13def stop_err( msg ):
14    sys.stderr.write( msg )
15    sys.exit()
17inp_file = sys.argv[1]
18out_file = sys.argv[2]
19expr = sys.argv[3]
20round_result = sys.argv[4]
22    in_columns = int( sys.argv[5] )
24    stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
25if in_columns < 2:
26    # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
27    stop_err( "Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
29    in_column_types = sys.argv[6].split( ',' )
31    stop_err( "Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
32if len( in_column_types ) != in_columns:
33    stop_err( "The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data." )
35# Unescape if input has been escaped
36mapped_str = {
37    '__lt__': '<',
38    '__le__': '<=',
39    '__eq__': '==',
40    '__ne__': '!=',
41    '__gt__': '>',
42    '__ge__': '>=',
43    '__sq__': '\'',
44    '__dq__': '"',
46for key, value in mapped_str.items():
47    expr = expr.replace( key, value )
49# Prepare the column variable names and wrappers for column data types
50cols, type_casts = [], []
51for col in range( 1, in_columns + 1 ):
52    col_name = "c%d" % col
53    cols.append( col_name )
54    col_type = in_column_types[ col - 1 ].strip()
55    if round_result == 'no' and col_type == 'int':
56        col_type = 'float'
57    type_cast = "%s(%s)" % ( col_type, col_name )
58    type_casts.append( type_cast )
60col_str = ', '.join( cols )    # 'c1, c2, c3, c4'
61type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
62assign = "%s = line.split( '\\t' )" % col_str
63wrap = "%s = %s" % ( col_str, type_cast_str )
64skipped_lines = 0
65first_invalid_line = 0
66invalid_line = None
67lines_kept = 0
68total_lines = 0
69out = open( out_file, 'wt' )
71# Read input file, skipping invalid lines, and perform computation that will result in a new column
72code = '''
73for i, line in enumerate( file( inp_file ) ):
74    total_lines += 1
75    line = line.rstrip( '\\r\\n' )
76    if not line or line.startswith( '#' ):
77        skipped_lines += 1
78        if not invalid_line:
79            first_invalid_line = i + 1
80            invalid_line = line
81        continue
82    try:
83        %s
84        %s
85        new_val = %s
86        if round_result == "yes":
87            new_val = int( round( new_val ) )
88        new_line = line + '\\t' + str( new_val )
89        print >> out, new_line
90        lines_kept += 1
91    except:
92        skipped_lines += 1
93        if not invalid_line:
94            first_invalid_line = i + 1
95            invalid_line = line
96''' % ( assign, wrap, expr )
98valid_expr = True
100    exec code
101except Exception, e:
102    out.close()
103    if str( e ).startswith( 'invalid syntax' ):
104        valid_expr = False
105        stop_err( 'Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr )
106    else:
107        stop_err( str( e ) )
109if valid_expr:
110    out.close()
111    valid_lines = total_lines - skipped_lines
112    print 'Creating column %d with expression %s' % ( in_columns + 1, expr )
113    if valid_lines > 0:
114        print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines )
115    else:
116        print 'Possible invalid expression "%s" or non-existent column referenced. See tool tips, syntax and examples.' % expr
117    if skipped_lines > 0:
118        print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。