Context Navigation

best_regression_subsets.py

リビジョン 2, 2.6 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	#!/usr/bin/env python
2
3	from galaxy import eggs
4
5	import sys, string
6	from rpy import *
7	import numpy
8
9	def stop_err(msg):
10	sys.stderr.write(msg)
11	sys.exit()
12
13	infile = sys.argv[1]
14	y_col = int(sys.argv[2])-1
15	x_cols = sys.argv[3].split(',')
16	outfile = sys.argv[4]
17	outfile2 = sys.argv[5]
18	print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
19	fout = open(outfile,'w')
20
21	for i, line in enumerate( file ( infile )):
22	line = line.rstrip('\r\n')
23	if len( line )>0 and not line.startswith( '#' ):
24	elems = line.split( '\t' )
25	break
26	if i == 30:
27	break # Hopefully we'll never get here...
28
29	if len( elems )<1:
30	stop_err( "The data in your input dataset is either missing or not formatted properly." )
31
32	y_vals = []
33	x_vals = []
34
35	for k,col in enumerate(x_cols):
36	x_cols[k] = int(col)-1
37	x_vals.append([])
38
39	NA = 'NA'
40	for ind,line in enumerate( file( infile )):
41	if line and not line.startswith( '#' ):
42	try:
43	fields = line.split("\t")
44	try:
45	yval = float(fields[y_col])
46	except Exception, ey:
47	yval = r('NA')
48	y_vals.append(yval)
49	for k,col in enumerate(x_cols):
50	try:
51	xval = float(fields[col])
52	except Exception, ex:
53	xval = r('NA')
54	x_vals[k].append(xval)
55	except:
56	pass
57
58	response_term = ""
59
60	x_vals1 = numpy.asarray(x_vals).transpose()
61
62	dat= r.list(x=array(x_vals1), y=y_vals)
63
64	r.library("leaps")
65
66	set_default_mode(NO_CONVERSION)
67	try:
68	leaps = r.regsubsets(r("y ~ x"), data= r.na_exclude(dat))
69	except RException, rex:
70	stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
71	set_default_mode(BASIC_CONVERSION)
72
73	summary = r.summary(leaps)
74	tot = len(x_vals)
75	pattern = "["
76	for i in range(tot):
77	pattern = pattern + 'c' + str(int(x_cols[int(i)]) + 1) + ' '
78	pattern = pattern.strip() + ']'
79	print >>fout, "#Vars\t%s\tR-sq\tAdj. R-sq\tC-p\tbic" %(pattern)
80	for ind,item in enumerate(summary['outmat']):
81	print >>fout, "%s\t%s\t%s\t%s\t%s\t%s" %(str(item).count('*'), item, summary['rsq'][ind], summary['adjr2'][ind], summary['cp'][ind], summary['bic'][ind])
82
83
84	r.pdf( outfile2, 8, 8 )
85	r.plot(leaps, scale="Cp", main="Best subsets using Cp Criterion")
86	r.plot(leaps, scale="r2", main="Best subsets using R-sq Criterion")
87	r.plot(leaps, scale="adjr2", main="Best subsets using Adjusted R-sq Criterion")
88	r.plot(leaps, scale="bic", main="Best subsets using bic Criterion")
89
90	r.dev_off()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/regVariation/best_regression_subsets.py

異なるフォーマットでダウンロード: