[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | from galaxy import eggs |
---|
| 4 | import sys, string |
---|
| 5 | from rpy import * |
---|
| 6 | import numpy |
---|
| 7 | |
---|
| 8 | def stop_err(msg): |
---|
| 9 | sys.stderr.write(msg) |
---|
| 10 | sys.exit() |
---|
| 11 | |
---|
| 12 | infile = sys.argv[1] |
---|
| 13 | x_cols = sys.argv[2].split(',') |
---|
| 14 | y_cols = sys.argv[3].split(',') |
---|
| 15 | |
---|
| 16 | x_scale = x_center = "FALSE" |
---|
| 17 | if sys.argv[4] == 'both': |
---|
| 18 | x_scale = x_center = "TRUE" |
---|
| 19 | elif sys.argv[4] == 'center': |
---|
| 20 | x_center = "TRUE" |
---|
| 21 | elif sys.argv[4] == 'scale': |
---|
| 22 | x_scale = "TRUE" |
---|
| 23 | |
---|
| 24 | y_scale = y_center = "FALSE" |
---|
| 25 | if sys.argv[5] == 'both': |
---|
| 26 | y_scale = y_center = "TRUE" |
---|
| 27 | elif sys.argv[5] == 'center': |
---|
| 28 | y_center = "TRUE" |
---|
| 29 | elif sys.argv[5] == 'scale': |
---|
| 30 | y_scale = "TRUE" |
---|
| 31 | |
---|
| 32 | std_scores = "FALSE" |
---|
| 33 | if sys.argv[6] == "yes": |
---|
| 34 | std_scores = "TRUE" |
---|
| 35 | |
---|
| 36 | outfile = sys.argv[7] |
---|
| 37 | outfile2 = sys.argv[8] |
---|
| 38 | |
---|
| 39 | fout = open(outfile,'w') |
---|
| 40 | elems = [] |
---|
| 41 | for i, line in enumerate( file ( infile )): |
---|
| 42 | line = line.rstrip('\r\n') |
---|
| 43 | if len( line )>0 and not line.startswith( '#' ): |
---|
| 44 | elems = line.split( '\t' ) |
---|
| 45 | break |
---|
| 46 | if i == 30: |
---|
| 47 | break # Hopefully we'll never get here... |
---|
| 48 | |
---|
| 49 | if len( elems )<1: |
---|
| 50 | stop_err( "The data in your input dataset is either missing or not formatted properly." ) |
---|
| 51 | |
---|
| 52 | x_vals = [] |
---|
| 53 | |
---|
| 54 | for k,col in enumerate(x_cols): |
---|
| 55 | x_cols[k] = int(col)-1 |
---|
| 56 | x_vals.append([]) |
---|
| 57 | |
---|
| 58 | y_vals = [] |
---|
| 59 | |
---|
| 60 | for k,col in enumerate(y_cols): |
---|
| 61 | y_cols[k] = int(col)-1 |
---|
| 62 | y_vals.append([]) |
---|
| 63 | |
---|
| 64 | skipped = 0 |
---|
| 65 | for ind,line in enumerate( file( infile )): |
---|
| 66 | if line and not line.startswith( '#' ): |
---|
| 67 | try: |
---|
| 68 | fields = line.strip().split("\t") |
---|
| 69 | valid_line = True |
---|
| 70 | for col in x_cols+y_cols: |
---|
| 71 | try: |
---|
| 72 | assert float(fields[col]) |
---|
| 73 | except: |
---|
| 74 | skipped += 1 |
---|
| 75 | valid_line = False |
---|
| 76 | break |
---|
| 77 | if valid_line: |
---|
| 78 | for k,col in enumerate(x_cols): |
---|
| 79 | try: |
---|
| 80 | xval = float(fields[col]) |
---|
| 81 | except: |
---|
| 82 | xval = NaN# |
---|
| 83 | x_vals[k].append(xval) |
---|
| 84 | for k,col in enumerate(y_cols): |
---|
| 85 | try: |
---|
| 86 | yval = float(fields[col]) |
---|
| 87 | except: |
---|
| 88 | yval = NaN# |
---|
| 89 | y_vals[k].append(yval) |
---|
| 90 | except: |
---|
| 91 | skipped += 1 |
---|
| 92 | |
---|
| 93 | x_vals1 = numpy.asarray(x_vals).transpose() |
---|
| 94 | y_vals1 = numpy.asarray(y_vals).transpose() |
---|
| 95 | |
---|
| 96 | x_dat= r.list(array(x_vals1)) |
---|
| 97 | y_dat= r.list(array(y_vals1)) |
---|
| 98 | |
---|
| 99 | try: |
---|
| 100 | r.suppressWarnings(r.library("yacca")) |
---|
| 101 | except: |
---|
| 102 | stop_err("Missing R library yacca.") |
---|
| 103 | |
---|
| 104 | set_default_mode(NO_CONVERSION) |
---|
| 105 | try: |
---|
| 106 | xcolnames = ["c%d" %(el+1) for el in x_cols] |
---|
| 107 | ycolnames = ["c%d" %(el+1) for el in y_cols] |
---|
| 108 | cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores)) |
---|
| 109 | ftest = r.F_test_cca(cc) |
---|
| 110 | except RException, rex: |
---|
| 111 | stop_err("Encountered error while performing CCA on the input data: %s" %(rex)) |
---|
| 112 | |
---|
| 113 | set_default_mode(BASIC_CONVERSION) |
---|
| 114 | summary = r.summary(cc) |
---|
| 115 | |
---|
| 116 | ncomps = len(summary['corr']) |
---|
| 117 | comps = summary['corr'].keys() |
---|
| 118 | corr = summary['corr'].values() |
---|
| 119 | xlab = summary['xlab'] |
---|
| 120 | ylab = summary['ylab'] |
---|
| 121 | |
---|
| 122 | for i in range(ncomps): |
---|
| 123 | corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i] |
---|
| 124 | |
---|
| 125 | ftest=ftest.as_py() |
---|
| 126 | print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 127 | print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr])) |
---|
| 128 | print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']])) |
---|
| 129 | print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']])) |
---|
| 130 | |
---|
| 131 | print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 132 | for i,val in enumerate(summary['xcoef']): |
---|
| 133 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 134 | |
---|
| 135 | print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 136 | for i,val in enumerate(summary['ycoef']): |
---|
| 137 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 138 | |
---|
| 139 | print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 140 | for i,val in enumerate(summary['xstructcorr']): |
---|
| 141 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 142 | |
---|
| 143 | print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 144 | for i,val in enumerate(summary['ystructcorr']): |
---|
| 145 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 146 | |
---|
| 147 | print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 148 | for i,val in enumerate(summary['xcrosscorr']): |
---|
| 149 | print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 150 | |
---|
| 151 | print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)])) |
---|
| 152 | for i,val in enumerate(summary['ycrosscorr']): |
---|
| 153 | print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val])) |
---|
| 154 | |
---|
| 155 | r.pdf( outfile2, 8, 8 ) |
---|
| 156 | #r.plot(cc) |
---|
| 157 | for i in range(ncomps): |
---|
| 158 | r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance") |
---|
| 159 | r.dev_off() |
---|