Context Navigation

cca.py

リビジョン 2, 5.0 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

行番号
1	#!/usr/bin/env python
2
3	from galaxy import eggs
4	import sys, string
5	from rpy import *
6	import numpy
7
8	def stop_err(msg):
9	sys.stderr.write(msg)
10	sys.exit()
11
12	infile = sys.argv[1]
13	x_cols = sys.argv[2].split(',')
14	y_cols = sys.argv[3].split(',')
15
16	x_scale = x_center = "FALSE"
17	if sys.argv[4] == 'both':
18	x_scale = x_center = "TRUE"
19	elif sys.argv[4] == 'center':
20	x_center = "TRUE"
21	elif sys.argv[4] == 'scale':
22	x_scale = "TRUE"
23
24	y_scale = y_center = "FALSE"
25	if sys.argv[5] == 'both':
26	y_scale = y_center = "TRUE"
27	elif sys.argv[5] == 'center':
28	y_center = "TRUE"
29	elif sys.argv[5] == 'scale':
30	y_scale = "TRUE"
31
32	std_scores = "FALSE"
33	if sys.argv[6] == "yes":
34	std_scores = "TRUE"
35
36	outfile = sys.argv[7]
37	outfile2 = sys.argv[8]
38
39	fout = open(outfile,'w')
40	elems = []
41	for i, line in enumerate( file ( infile )):
42	line = line.rstrip('\r\n')
43	if len( line )>0 and not line.startswith( '#' ):
44	elems = line.split( '\t' )
45	break
46	if i == 30:
47	break # Hopefully we'll never get here...
48
49	if len( elems )<1:
50	stop_err( "The data in your input dataset is either missing or not formatted properly." )
51
52	x_vals = []
53
54	for k,col in enumerate(x_cols):
55	x_cols[k] = int(col)-1
56	x_vals.append([])
57
58	y_vals = []
59
60	for k,col in enumerate(y_cols):
61	y_cols[k] = int(col)-1
62	y_vals.append([])
63
64	skipped = 0
65	for ind,line in enumerate( file( infile )):
66	if line and not line.startswith( '#' ):
67	try:
68	fields = line.strip().split("\t")
69	valid_line = True
70	for col in x_cols+y_cols:
71	try:
72	assert float(fields[col])
73	except:
74	skipped += 1
75	valid_line = False
76	break
77	if valid_line:
78	for k,col in enumerate(x_cols):
79	try:
80	xval = float(fields[col])
81	except:
82	xval = NaN#
83	x_vals[k].append(xval)
84	for k,col in enumerate(y_cols):
85	try:
86	yval = float(fields[col])
87	except:
88	yval = NaN#
89	y_vals[k].append(yval)
90	except:
91	skipped += 1
92
93	x_vals1 = numpy.asarray(x_vals).transpose()
94	y_vals1 = numpy.asarray(y_vals).transpose()
95
96	x_dat= r.list(array(x_vals1))
97	y_dat= r.list(array(y_vals1))
98
99	try:
100	r.suppressWarnings(r.library("yacca"))
101	except:
102	stop_err("Missing R library yacca.")
103
104	set_default_mode(NO_CONVERSION)
105	try:
106	xcolnames = ["c%d" %(el+1) for el in x_cols]
107	ycolnames = ["c%d" %(el+1) for el in y_cols]
108	cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
109	ftest = r.F_test_cca(cc)
110	except RException, rex:
111	stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
112
113	set_default_mode(BASIC_CONVERSION)
114	summary = r.summary(cc)
115
116	ncomps = len(summary['corr'])
117	comps = summary['corr'].keys()
118	corr = summary['corr'].values()
119	xlab = summary['xlab']
120	ylab = summary['ylab']
121
122	for i in range(ncomps):
123	corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
124
125	ftest=ftest.as_py()
126	print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
127	print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
128	print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
129	print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
130
131	print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
132	for i,val in enumerate(summary['xcoef']):
133	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
134
135	print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
136	for i,val in enumerate(summary['ycoef']):
137	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
138
139	print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
140	for i,val in enumerate(summary['xstructcorr']):
141	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
142
143	print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
144	for i,val in enumerate(summary['ystructcorr']):
145	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
146
147	print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
148	for i,val in enumerate(summary['xcrosscorr']):
149	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
150
151	print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
152	for i,val in enumerate(summary['ycrosscorr']):
153	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
154
155	r.pdf( outfile2, 8, 8 )
156	#r.plot(cc)
157	for i in range(ncomps):
158	r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
159	r.dev_off()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/multivariate_stats/cca.py

異なるフォーマットでダウンロード: