1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | from galaxy import eggs |
---|
4 | import sys, string |
---|
5 | from rpy import * |
---|
6 | import numpy |
---|
7 | |
---|
8 | def stop_err(msg): |
---|
9 | sys.stderr.write(msg) |
---|
10 | sys.exit() |
---|
11 | |
---|
12 | infile = sys.argv[1] |
---|
13 | x_cols = sys.argv[2].split(',') |
---|
14 | y_cols = sys.argv[3].split(',') |
---|
15 | |
---|
16 | x_scale = x_center = "FALSE" |
---|
17 | if sys.argv[4] == 'both': |
---|
18 | x_scale = x_center = "TRUE" |
---|
19 | elif sys.argv[4] == 'center': |
---|
20 | x_center = "TRUE" |
---|
21 | elif sys.argv[4] == 'scale': |
---|
22 | x_scale = "TRUE" |
---|
23 | |
---|
24 | y_scale = y_center = "FALSE" |
---|
25 | if sys.argv[5] == 'both': |
---|
26 | y_scale = y_center = "TRUE" |
---|
27 | elif sys.argv[5] == 'center': |
---|
28 | y_center = "TRUE" |
---|
29 | elif sys.argv[5] == 'scale': |
---|
30 | y_scale = "TRUE" |
---|
31 | |
---|
32 | std_scores = "FALSE" |
---|
33 | if sys.argv[6] == "yes": |
---|
34 | std_scores = "TRUE" |
---|
35 | |
---|
36 | outfile = sys.argv[7] |
---|
37 | outfile2 = sys.argv[8] |
---|
38 | |
---|
39 | fout = open(outfile,'w') |
---|
40 | elems = [] |
---|
41 | for i, line in enumerate( file ( infile )): |
---|
42 | line = line.rstrip('\r\n') |
---|
43 | if len( line )>0 and not line.startswith( '#' ): |
---|
44 | elems = line.split( '\t' ) |
---|
45 | break |
---|
46 | if i == 30: |
---|
47 | break # Hopefully we'll never get here... |
---|
48 | |
---|
49 | if len( elems )<1: |
---|
50 | stop_err( "The data in your input dataset is either missing or not formatted properly." ) |
---|
51 | |
---|
52 | x_vals = [] |
---|
53 | |
---|
54 | for k,col in enumerate(x_cols): |
---|
55 | x_cols[k] = int(col)-1 |
---|
56 | x_vals.append([]) |
---|
57 | |
---|
58 | y_vals = [] |
---|
59 | |
---|
60 | for k,col in enumerate(y_cols): |
---|
61 | y_cols[k] = int(col)-1 |
---|
62 | y_vals.append([]) |
---|
63 | |
---|
64 | skipped = 0 |
---|
65 | for ind,line in enumerate( file( infile )): |
---|
66 | if line and not line.startswith( '#' ): |
---|
67 | try: |
---|
68 | fields = line.strip().split("\t") |
---|
69 | valid_line = True |
---|
70 | for col in x_cols+y_cols: |
---|
71 | try: |
---|
72 | assert float(fields[col]) |
---|
73 | except: |
---|
74 | skipped += 1 |
---|
75 | valid_line = False |
---|
76 | break |
---|
77 | if valid_line: |
---|
78 | for k,col in enumerate(x_cols): |
---|
79 | try: |
---|
80 | xval = float(fields[col]) |
---|
81 | except: |
---|
82 | xval = NaN# |
---|
83 | x_vals[k].append(xval) |
---|
84 | for k,col in enumerate(y_cols): |
---|
85 | try: |
---|
86 | yval = float(fields[col]) |
---|
87 | except: |
---|
88 | yval = NaN# |
---|
89 | y_vals[k].append(yval) |
---|
90 | except: |
---|
91 | skipped += 1 |
---|
92 | |
---|
93 | x_vals1 = numpy.asarray(x_vals).transpose() |
---|
94 | y_vals1 = numpy.asarray(y_vals).transpose() |
---|
95 | |
---|
96 | x_dat= r.list(array(x_vals1)) |
---|
97 | y_dat= r.list(array(y_vals1)) |
---|
98 | |
---|
99 | try: |
---|
100 | r.suppressWarnings(r.library("yacca")) |
---|
101 | except: |
---|
102 | stop_err("Missing R library yacca.") |
---|
103 | |
---|
104 | set_default_mode(NO_CONVERSION) |
---|
105 | try: |
---|
106 | xcolnames = ["c%d" %(el+1) for el in x_cols] |
---|
107 | ycolnames = ["c%d" %(el+1) for el in y_cols] |
---|
108 | cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores)) |
---|
109 | ftest = r.F_test_cca(cc) |
---|
110 | except RException, rex: |
---|
111 | stop_err("Encountered error while performing CCA on the input data: %s" %(rex)) |
---|
112 | |
---|
113 | set_default_mode(BASIC_CONVERSION) |
---|
114 | summary = r.summary(cc) |
---|
115 | |
---|
116 | ncomps = len(summary['corr']) |
---|
117 | comps = summary['corr'].keys() |
---|
118 | corr = summary['corr'].values() |
---|
119 | xlab = summary['xlab'] |
---|
120 | ylab = summary['ylab'] |
---|
121 | |
---|
122 | for i in range(ncomps): |
---|
123 | corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i] |
---|
124 | |
---|
125 | ftest=ftest.as_py() |
---|
126 | print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
127 | print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr])) |
---|
128 | print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']])) |
---|
129 | print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']])) |
---|
130 | |
---|
131 | print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
132 | for i,val in enumerate(summary['xcoef']): |
---|
133 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
134 | |
---|
135 | print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
136 | for i,val in enumerate(summary['ycoef']): |
---|
137 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
138 | |
---|
139 | print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
140 | for i,val in enumerate(summary['xstructcorr']): |
---|
141 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
142 | |
---|
143 | print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
144 | for i,val in enumerate(summary['ystructcorr']): |
---|
145 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
146 | |
---|
147 | print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
148 | for i,val in enumerate(summary['xcrosscorr']): |
---|
149 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
150 | |
---|
151 | print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
152 | for i,val in enumerate(summary['ycrosscorr']): |
---|
153 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
154 | |
---|
155 | r.pdf( outfile2, 8, 8 ) |
---|
156 | #r.plot(cc) |
---|
157 | for i in range(ncomps): |
---|
158 | r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance") |
---|
159 | r.dev_off() |
---|