root/galaxy-central/tools/regVariation/linear_regression.py

リビジョン 2, 3.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3from galaxy import eggs
4import sys, string
5from rpy import *
6import numpy
7
8def stop_err(msg):
9    sys.stderr.write(msg)
10    sys.exit()
11
12infile = sys.argv[1]
13y_col = int(sys.argv[2])-1
14x_cols = sys.argv[3].split(',')
15outfile = sys.argv[4]
16outfile2 = sys.argv[5]
17
18print "Predictor columns: %s; Response column: %d" %(x_cols,y_col+1)
19fout = open(outfile,'w')
20elems = []
21for i, line in enumerate( file ( infile )):
22    line = line.rstrip('\r\n')
23    if len( line )>0 and not line.startswith( '#' ):
24        elems = line.split( '\t' )
25        break
26    if i == 30:
27        break # Hopefully we'll never get here...
28
29if len( elems )<1:
30    stop_err( "The data in your input dataset is either missing or not formatted properly." )
31
32y_vals = []
33x_vals = []
34
35for k,col in enumerate(x_cols):
36    x_cols[k] = int(col)-1
37    x_vals.append([])
38
39NA = 'NA'
40for ind,line in enumerate( file( infile )):
41    if line and not line.startswith( '#' ):
42        try:
43            fields = line.split("\t")
44            try:
45                yval = float(fields[y_col])
46            except:
47                yval = r('NA')
48            y_vals.append(yval)
49            for k,col in enumerate(x_cols):
50                try:
51                    xval = float(fields[col])
52                except:
53                    xval = r('NA')
54                x_vals[k].append(xval)
55        except:
56            pass
57
58x_vals1 = numpy.asarray(x_vals).transpose()
59
60dat= r.list(x=array(x_vals1), y=y_vals)
61
62set_default_mode(NO_CONVERSION)
63try:
64    linear_model = r.lm(r("y ~ x"), data = r.na_exclude(dat))
65except RException, rex:
66    stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain only non-numeric or invalid values.")
67set_default_mode(BASIC_CONVERSION)
68
69coeffs=linear_model.as_py()['coefficients']
70yintercept= coeffs['(Intercept)']
71print >>fout, "Y-intercept\t%s" %(yintercept)
72summary = r.summary(linear_model)
73
74co = summary.get('coefficients', 'NA')
75"""
76if len(co) != len(x_vals)+1:
77    stop_err("Stopped performing linear regression on the input data, since one of the predictor columns contains only non-numeric or invalid values.")
78"""
79print >>fout, "p-value (Y-intercept)\t%s" %(co[0][3])
80
81if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
82    try:
83        slope = coeffs['x']
84    except:
85        slope = 'NA'
86    try:
87        pval = co[1][3]
88    except:
89        pval = 'NA'
90    print >>fout, "Slope (c%d)\t%s" %(x_cols[0]+1,slope)
91    print >>fout, "p-value (c%d)\t%s" %(x_cols[0]+1,pval)
92else:    #Multiple regression case with >1 predictors
93    ind=1
94    while ind < len(coeffs.keys()):
95        print >>fout, "Slope (c%d)\t%s" %(x_cols[ind-1]+1,coeffs['x'+str(ind)])
96        try:
97            pval = co[ind][3]
98        except:
99            pval = 'NA'
100        print >>fout, "p-value (c%d)\t%s" %(x_cols[ind-1]+1,pval)
101        ind+=1
102
103print >>fout, "R-squared\t%s" %(summary.get('r.squared','NA'))
104print >>fout, "Adjusted R-squared\t%s" %(summary.get('adj.r.squared','NA'))
105print >>fout, "F-statistic\t%s" %(summary.get('fstatistic','NA'))
106print >>fout, "Sigma\t%s" %(summary.get('sigma','NA'))
107
108r.pdf( outfile2, 8, 8 )
109if len(x_vals) == 1:    #Simple linear  regression case with 1 predictor variable
110    sub_title =  "Slope = %s; Y-int = %s" %(slope,yintercept)
111    try:
112        r.plot(x=x_vals[0], y=y_vals, xlab="X", ylab="Y", sub=sub_title, main="Scatterplot with regression")
113        r.abline(a=yintercept, b=slope, col="red")
114    except:
115        pass
116else:
117    r.pairs(dat, main="Scatterplot Matrix", col="blue")
118try:
119    r.plot(linear_model)
120except:
121    pass
122r.dev_off()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。