root/galaxy-central/tools/regVariation/rcve.py

リビジョン 2, 4.0 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3from galaxy import eggs
4
5import sys, string
6from rpy import *
7import numpy
8
9def stop_err(msg):
10    sys.stderr.write(msg)
11    sys.exit()
12
13def sscombs(s):
14    if len(s) == 1:
15        return [s]
16    else:
17        ssc = sscombs(s[1:])
18        return [s[0]] + [s[0]+comb for comb in ssc] + ssc
19
20
21infile = sys.argv[1]
22y_col = int(sys.argv[2])-1
23x_cols = sys.argv[3].split(',')
24outfile = sys.argv[4]
25
26print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
27fout = open(outfile,'w')
28
29for i, line in enumerate( file ( infile )):
30    line = line.rstrip('\r\n')
31    if len( line )>0 and not line.startswith( '#' ):
32        elems = line.split( '\t' )
33        break
34    if i == 30:
35        break # Hopefully we'll never get here...
36
37if len( elems )<1:
38    stop_err( "The data in your input dataset is either missing or not formatted properly." )
39
40y_vals = []
41x_vals = []
42
43for k,col in enumerate(x_cols):
44    x_cols[k] = int(col)-1
45    x_vals.append([])
46    """
47    try:
48        float( elems[x_cols[k]] )
49    except:
50        try:
51            msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." %( col, elems[x_cols[k]] )
52        except:
53            msg = "This operation cannot be performed on non-numeric data."
54        stop_err( msg )
55    """
56NA = 'NA'
57for ind,line in enumerate( file( infile )):
58    if line and not line.startswith( '#' ):
59        try:
60            fields = line.split("\t")
61            try:
62                yval = float(fields[y_col])
63            except Exception, ey:
64                yval = r('NA')
65                #print >>sys.stderr, "ey = %s" %ey
66            y_vals.append(yval)
67            for k,col in enumerate(x_cols):
68                try:
69                    xval = float(fields[col])
70                except Exception, ex:
71                    xval = r('NA')
72                    #print >>sys.stderr, "ex = %s" %ex
73                x_vals[k].append(xval)
74        except:
75            pass
76
77x_vals1 = numpy.asarray(x_vals).transpose()
78dat= r.list(x=array(x_vals1), y=y_vals)
79
80set_default_mode(NO_CONVERSION)
81try:
82    full = r.lm(r("y ~ x"), data= r.na_exclude(dat))    #full model includes all the predictor variables specified by the user
83except RException, rex:
84    stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
85set_default_mode(BASIC_CONVERSION)
86
87summary = r.summary(full)
88fullr2 = summary.get('r.squared','NA')
89
90if fullr2 == 'NA':
91    stop_error("Error in linear regression")
92
93if len(x_vals) < 10:
94    s = ""
95    for ch in range(len(x_vals)):
96        s += str(ch)
97else:
98    stop_err("This tool only works with less than 10 predictors.")
99
100print >>fout, "#Model\tR-sq\tRCVE_Terms\tRCVE_Value"
101all_combos = sorted(sscombs(s), key=len)
102all_combos.reverse()
103for j,cols in enumerate(all_combos):
104    #if len(cols) == len(s):    #Same as the full model above
105    #    continue
106    if len(cols) == 1:
107        x_vals1 = x_vals[int(cols)]
108    else:
109        x_v = []
110        for col in cols:
111            x_v.append(x_vals[int(col)])
112        x_vals1 = numpy.asarray(x_v).transpose()
113    dat= r.list(x=array(x_vals1), y=y_vals)
114    set_default_mode(NO_CONVERSION)
115    red = r.lm(r("y ~ x"), data= dat)    #Reduced model
116    set_default_mode(BASIC_CONVERSION)
117    summary = r.summary(red)
118    redr2 = summary.get('r.squared','NA')
119    try:
120        rcve = (float(fullr2)-float(redr2))/float(fullr2)
121    except:
122        rcve = 'NA'
123    col_str = ""
124    for col in cols:
125        col_str = col_str + str(int(x_cols[int(col)]) + 1) + " "
126    col_str.strip()
127    rcve_col_str = ""
128    for col in s:
129        if col not in cols:
130            rcve_col_str = rcve_col_str + str(int(x_cols[int(col)]) + 1) + " "
131    rcve_col_str.strip()
132    if len(cols) == len(s):    #full model
133        rcve_col_str = "-"
134        rcve = "-"
135    try:
136        redr2 = "%.4f" %(float(redr2))
137    except:
138        pass
139    try:
140        rcve = "%.4f" %(float(rcve))
141    except:
142        pass
143    print >>fout, "%s\t%s\t%s\t%s" %(col_str,redr2,rcve_col_str,rcve)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。