Context Navigation

cca.py @ 2

リビジョン 2, 5.0 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	#!/usr/bin/env python
	2
	3	from galaxy import eggs
	4	import sys, string
	5	from rpy import *
	6	import numpy
	7
	8	def stop_err(msg):
	9	sys.stderr.write(msg)
	10	sys.exit()
	11
	12	infile = sys.argv[1]
	13	x_cols = sys.argv[2].split(',')
	14	y_cols = sys.argv[3].split(',')
	15
	16	x_scale = x_center = "FALSE"
	17	if sys.argv[4] == 'both':
	18	x_scale = x_center = "TRUE"
	19	elif sys.argv[4] == 'center':
	20	x_center = "TRUE"
	21	elif sys.argv[4] == 'scale':
	22	x_scale = "TRUE"
	23
	24	y_scale = y_center = "FALSE"
	25	if sys.argv[5] == 'both':
	26	y_scale = y_center = "TRUE"
	27	elif sys.argv[5] == 'center':
	28	y_center = "TRUE"
	29	elif sys.argv[5] == 'scale':
	30	y_scale = "TRUE"
	31
	32	std_scores = "FALSE"
	33	if sys.argv[6] == "yes":
	34	std_scores = "TRUE"
	35
	36	outfile = sys.argv[7]
	37	outfile2 = sys.argv[8]
	38
	39	fout = open(outfile,'w')
	40	elems = []
	41	for i, line in enumerate( file ( infile )):
	42	line = line.rstrip('\r\n')
	43	if len( line )>0 and not line.startswith( '#' ):
	44	elems = line.split( '\t' )
	45	break
	46	if i == 30:
	47	break # Hopefully we'll never get here...
	48
	49	if len( elems )<1:
	50	stop_err( "The data in your input dataset is either missing or not formatted properly." )
	51
	52	x_vals = []
	53
	54	for k,col in enumerate(x_cols):
	55	x_cols[k] = int(col)-1
	56	x_vals.append([])
	57
	58	y_vals = []
	59
	60	for k,col in enumerate(y_cols):
	61	y_cols[k] = int(col)-1
	62	y_vals.append([])
	63
	64	skipped = 0
	65	for ind,line in enumerate( file( infile )):
	66	if line and not line.startswith( '#' ):
	67	try:
	68	fields = line.strip().split("\t")
	69	valid_line = True
	70	for col in x_cols+y_cols:
	71	try:
	72	assert float(fields[col])
	73	except:
	74	skipped += 1
	75	valid_line = False
	76	break
	77	if valid_line:
	78	for k,col in enumerate(x_cols):
	79	try:
	80	xval = float(fields[col])
	81	except:
	82	xval = NaN#
	83	x_vals[k].append(xval)
	84	for k,col in enumerate(y_cols):
	85	try:
	86	yval = float(fields[col])
	87	except:
	88	yval = NaN#
	89	y_vals[k].append(yval)
	90	except:
	91	skipped += 1
	92
	93	x_vals1 = numpy.asarray(x_vals).transpose()
	94	y_vals1 = numpy.asarray(y_vals).transpose()
	95
	96	x_dat= r.list(array(x_vals1))
	97	y_dat= r.list(array(y_vals1))
	98
	99	try:
	100	r.suppressWarnings(r.library("yacca"))
	101	except:
	102	stop_err("Missing R library yacca.")
	103
	104	set_default_mode(NO_CONVERSION)
	105	try:
	106	xcolnames = ["c%d" %(el+1) for el in x_cols]
	107	ycolnames = ["c%d" %(el+1) for el in y_cols]
	108	cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
	109	ftest = r.F_test_cca(cc)
	110	except RException, rex:
	111	stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
	112
	113	set_default_mode(BASIC_CONVERSION)
	114	summary = r.summary(cc)
	115
	116	ncomps = len(summary['corr'])
	117	comps = summary['corr'].keys()
	118	corr = summary['corr'].values()
	119	xlab = summary['xlab']
	120	ylab = summary['ylab']
	121
	122	for i in range(ncomps):
	123	corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
	124
	125	ftest=ftest.as_py()
	126	print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	127	print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
	128	print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
	129	print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
	130
	131	print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	132	for i,val in enumerate(summary['xcoef']):
	133	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
	134
	135	print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	136	for i,val in enumerate(summary['ycoef']):
	137	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
	138
	139	print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	140	for i,val in enumerate(summary['xstructcorr']):
	141	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
	142
	143	print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	144	for i,val in enumerate(summary['ystructcorr']):
	145	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
	146
	147	print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	148	for i,val in enumerate(summary['xcrosscorr']):
	149	print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
	150
	151	print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
	152	for i,val in enumerate(summary['ycrosscorr']):
	153	print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
	154
	155	r.pdf( outfile2, 8, 8 )
	156	#r.plot(cc)
	157	for i in range(ncomps):
	158	r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
	159	r.dev_off()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/multivariate_stats/cca.py @ 2

異なるフォーマットでダウンロード: