root/galaxy-central/tools/multivariate_stats/cca.py

リビジョン 2, 5.0 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3from galaxy import eggs
4import sys, string
5from rpy import *
6import numpy
7
8def stop_err(msg):
9    sys.stderr.write(msg)
10    sys.exit()
11
12infile = sys.argv[1]
13x_cols = sys.argv[2].split(',')
14y_cols = sys.argv[3].split(',')
15
16x_scale = x_center = "FALSE"
17if sys.argv[4] == 'both':
18    x_scale = x_center = "TRUE"
19elif sys.argv[4] == 'center':
20    x_center = "TRUE"
21elif sys.argv[4] == 'scale':
22    x_scale = "TRUE"
23   
24y_scale = y_center = "FALSE"
25if sys.argv[5] == 'both':
26    y_scale = y_center = "TRUE"
27elif sys.argv[5] == 'center':
28    y_center = "TRUE"
29elif sys.argv[5] == 'scale':
30    y_scale = "TRUE"
31
32std_scores = "FALSE"   
33if sys.argv[6] == "yes":
34    std_scores = "TRUE"
35   
36outfile = sys.argv[7]
37outfile2 = sys.argv[8]
38
39fout = open(outfile,'w')
40elems = []
41for i, line in enumerate( file ( infile )):
42    line = line.rstrip('\r\n')
43    if len( line )>0 and not line.startswith( '#' ):
44        elems = line.split( '\t' )
45        break
46    if i == 30:
47        break # Hopefully we'll never get here...
48
49if len( elems )<1:
50    stop_err( "The data in your input dataset is either missing or not formatted properly." )
51
52x_vals = []
53
54for k,col in enumerate(x_cols):
55    x_cols[k] = int(col)-1
56    x_vals.append([])
57
58y_vals = []
59
60for k,col in enumerate(y_cols):
61    y_cols[k] = int(col)-1
62    y_vals.append([])
63
64skipped = 0
65for ind,line in enumerate( file( infile )):
66    if line and not line.startswith( '#' ):
67        try:
68            fields = line.strip().split("\t")
69            valid_line = True
70            for col in x_cols+y_cols:
71                try:
72                    assert float(fields[col])
73                except:
74                    skipped += 1
75                    valid_line = False
76                    break
77            if valid_line:
78                for k,col in enumerate(x_cols):
79                    try:
80                        xval = float(fields[col])
81                    except:
82                        xval = NaN#
83                    x_vals[k].append(xval)
84                for k,col in enumerate(y_cols):
85                    try:
86                        yval = float(fields[col])
87                    except:
88                        yval = NaN#
89                    y_vals[k].append(yval)
90        except:
91            skipped += 1
92
93x_vals1 = numpy.asarray(x_vals).transpose()
94y_vals1 = numpy.asarray(y_vals).transpose()
95
96x_dat= r.list(array(x_vals1))
97y_dat= r.list(array(y_vals1))
98
99try:
100    r.suppressWarnings(r.library("yacca"))
101except:
102    stop_err("Missing R library yacca.")
103   
104set_default_mode(NO_CONVERSION)
105try:
106    xcolnames = ["c%d" %(el+1) for el in x_cols]
107    ycolnames = ["c%d" %(el+1) for el in y_cols]
108    cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
109    ftest = r.F_test_cca(cc)
110except RException, rex:
111    stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
112
113set_default_mode(BASIC_CONVERSION)
114summary = r.summary(cc)
115
116ncomps = len(summary['corr'])
117comps = summary['corr'].keys()
118corr = summary['corr'].values()
119xlab = summary['xlab']
120ylab = summary['ylab']
121
122for i in range(ncomps):
123    corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
124
125ftest=ftest.as_py()
126print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
127print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
128print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
129print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
130
131print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
132for i,val in enumerate(summary['xcoef']):
133    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
134
135print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
136for i,val in enumerate(summary['ycoef']):
137    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
138       
139print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
140for i,val in enumerate(summary['xstructcorr']):
141    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
142
143print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
144for i,val in enumerate(summary['ystructcorr']):
145    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
146
147print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
148for i,val in enumerate(summary['xcrosscorr']):
149    print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
150
151print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
152for i,val in enumerate(summary['ycrosscorr']):
153    print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
154
155r.pdf( outfile2, 8, 8 )
156#r.plot(cc)
157for i in range(ncomps):
158    r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
159r.dev_off()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。