Context Navigation

gsummary.py @ 2

リビジョン 2, 4.5 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central
属性 svn:executable の設定値 ``*

Rev	行番号
[2]	1	#!/usr/bin/env python
	2
	3	import sys, re, tempfile
	4	from rpy import *
	5	# Older py compatibility
	6	try:
	7	set()
	8	except:
	9	from sets import Set as set
	10
	11	assert sys.version_info[:2] >= ( 2, 4 )
	12
	13	def stop_err( msg ):
	14	sys.stderr.write( msg )
	15	sys.exit()
	16
	17	def S3_METHODS( all="key" ):
	18	Group_Math = [ "abs", "sign", "sqrt", "floor", "ceiling", "trunc", "round", "signif",
	19	"exp", "log", "cos", "sin", "tan", "acos", "asin", "atan", "cosh", "sinh", "tanh",
	20	"acosh", "asinh", "atanh", "lgamma", "gamma", "gammaCody", "digamma", "trigamma",
	21	"cumsum", "cumprod", "cummax", "cummin", "c" ]
	22	Group_Ops = [ "+", "-", "*", "/", "^", "%%", "%/%", "&", "\|", "!", "==", "!=", "<", "<=", ">=", ">", "(", ")", "~", "," ]
	23	if all is "key":
	24	return { 'Math' : Group_Math, 'Ops' : Group_Ops }
	25
	26	def main():
	27	try:
	28	datafile = sys.argv[1]
	29	outfile_name = sys.argv[2]
	30	expression = sys.argv[3]
	31	except:
	32	stop_err( 'Usage: python gsummary.py input_file ouput_file expression' )
	33
	34	math_allowed = S3_METHODS()[ 'Math' ]
	35	ops_allowed = S3_METHODS()[ 'Ops' ]
	36
	37	# Check for invalid expressions
	38	for word in re.compile( '[a-zA-Z]+' ).findall( expression ):
	39	if word and not word in math_allowed:
	40	stop_err( "Invalid expression '%s': term '%s' is not recognized or allowed" %( expression, word ) )
	41	symbols = set()
	42	for symbol in re.compile( '[^a-z0-9\s]+' ).findall( expression ):
	43	if symbol and not symbol in ops_allowed:
	44	stop_err( "Invalid expression '%s': operator '%s' is not recognized or allowed" % ( expression, symbol ) )
	45	else:
	46	symbols.add( symbol )
	47	if len( symbols ) == 1 and ',' in symbols:
	48	# User may have entered a comma-separated list r_data_frame columns
	49	stop_err( "Invalid columns '%s': this tool requires a single column or expression" % expression )
	50
	51	# Find all column references in the expression
	52	cols = []
	53	for col in re.compile( 'c[0-9]+' ).findall( expression ):
	54	try:
	55	cols.append( int( col[1:] ) - 1 )
	56	except:
	57	pass
	58
	59	tmp_file = tempfile.NamedTemporaryFile( 'w+b' )
	60	# Write the R header row to the temporary file
	61	hdr_str = "\t".join( "c%s" % str( col+1 ) for col in cols )
	62	tmp_file.write( "%s\n" % hdr_str )
	63	skipped_lines = 0
	64	first_invalid_line = 0
	65	i = 0
	66	for i, line in enumerate( file( datafile ) ):
	67	line = line.rstrip( '\r\n' )
	68	if line and not line.startswith( '#' ):
	69	valid = True
	70	fields = line.split( '\t' )
	71	# Write the R data row to the temporary file
	72	for col in cols:
	73	try:
	74	float( fields[ col ] )
	75	except:
	76	skipped_lines += 1
	77	if not first_invalid_line:
	78	first_invalid_line = i + 1
	79	valid = False
	80	break
	81	if valid:
	82	data_str = "\t".join( fields[ col ] for col in cols )
	83	tmp_file.write( "%s\n" % data_str )
	84	tmp_file.flush()
	85
	86	if skipped_lines == i + 1:
	87	stop_err( "Invalid column or column data values invalid for computation. See tool tips and syntax for data requirements." )
	88	else:
	89	# summary function and return labels
	90	summary_func = r( "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }" )
	91	headings = [ 'sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%' ]
	92	headings_str = "\t".join( headings )
	93
	94	set_default_mode( NO_CONVERSION )
	95	r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" )
	96
	97	outfile = open( outfile_name, 'w' )
	98
	99	for col in re.compile( 'c[0-9]+' ).findall( expression ):
	100	r.assign( col, r[ "$" ]( r_data_frame, col ) )
	101	try:
	102	summary = summary_func( r( expression ) )
	103	except RException, s:
	104	outfile.close()
	105	stop_err( "Computation resulted in the following error: %s" % str( s ) )
	106	summary = summary.as_py( BASIC_CONVERSION )
	107	outfile.write( "#%s\n" % headings_str )
	108	outfile.write( "%s\n" % "\t".join( [ "%g" % ( summary[ k ] ) for k in headings ] ) )
	109	outfile.close()
	110
	111	if skipped_lines:
	112	print "Skipped %d invalid lines beginning with line #%d. See tool tips for data requirements." % ( skipped_lines, first_invalid_line )
	113
	114	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/stats/gsummary.py @ 2

異なるフォーマットでダウンロード: