Context Navigation

best_regression_subsets.py @ 2

リビジョン 2, 2.6 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	#!/usr/bin/env python
	2
	3	from galaxy import eggs
	4
	5	import sys, string
	6	from rpy import *
	7	import numpy
	8
	9	def stop_err(msg):
	10	sys.stderr.write(msg)
	11	sys.exit()
	12
	13	infile = sys.argv[1]
	14	y_col = int(sys.argv[2])-1
	15	x_cols = sys.argv[3].split(',')
	16	outfile = sys.argv[4]
	17	outfile2 = sys.argv[5]
	18	print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
	19	fout = open(outfile,'w')
	20
	21	for i, line in enumerate( file ( infile )):
	22	line = line.rstrip('\r\n')
	23	if len( line )>0 and not line.startswith( '#' ):
	24	elems = line.split( '\t' )
	25	break
	26	if i == 30:
	27	break # Hopefully we'll never get here...
	28
	29	if len( elems )<1:
	30	stop_err( "The data in your input dataset is either missing or not formatted properly." )
	31
	32	y_vals = []
	33	x_vals = []
	34
	35	for k,col in enumerate(x_cols):
	36	x_cols[k] = int(col)-1
	37	x_vals.append([])
	38
	39	NA = 'NA'
	40	for ind,line in enumerate( file( infile )):
	41	if line and not line.startswith( '#' ):
	42	try:
	43	fields = line.split("\t")
	44	try:
	45	yval = float(fields[y_col])
	46	except Exception, ey:
	47	yval = r('NA')
	48	y_vals.append(yval)
	49	for k,col in enumerate(x_cols):
	50	try:
	51	xval = float(fields[col])
	52	except Exception, ex:
	53	xval = r('NA')
	54	x_vals[k].append(xval)
	55	except:
	56	pass
	57
	58	response_term = ""
	59
	60	x_vals1 = numpy.asarray(x_vals).transpose()
	61
	62	dat= r.list(x=array(x_vals1), y=y_vals)
	63
	64	r.library("leaps")
	65
	66	set_default_mode(NO_CONVERSION)
	67	try:
	68	leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
	69	except RException, rex:
	70	stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
	71	set_default_mode(BASIC_CONVERSION)
	72
	73	summary = r.summary(leaps)
	74	tot = len(x_vals)
	75	pattern = "["
	76	for i in range(tot):
	77	pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
	78	pattern = pattern.strip() + ']'
	79	print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
	80	for ind,item in enumerate(summary['outmat']):
	81	print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
	82
	83
	84	r.pdf( outfile2, 8, 8 )
	85	r.plot(leaps, scale="Cp", main="Best subsets using Cp Criterion")
	86	r.plot(leaps, scale="r2", main="Best subsets using R-sq Criterion")
	87	r.plot(leaps, scale="adjr2", main="Best subsets using Adjusted R-sq Criterion")
	88	r.plot(leaps, scale="bic", main="Best subsets using bic Criterion")
	89
	90	r.dev_off()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/regVariation/best_regression_subsets.py @ 2

異なるフォーマットでダウンロード: