root/galaxy-central/test-data/rgtestouts/rgManQQ/rgManQQtest1.R @ 3

リビジョン 2, 8.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1
2# license not stated so I'm assuming LGPL is ok for my derived work?
3# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
4# Originally created as qqman with the following
5# attribution:
6#--------------
7# Stephen Turner
8# http://StephenTurner.us/
9# http://GettingGeneticsDone.blogspot.com/
10
11# Last updated: Tuesday, December 22, 2009
12# R code for making manhattan plots and QQ plots from plink output files.
13# With GWAS data this can take a lot of memory. Recommended for use on
14# 64bit machines only, for now.
15
16#
17
18library(ggplot2)
19
20coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
21# not too fugly but need a colour expert please...
22
23
24manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max",
25   suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
26
27        if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
28        genomewideline=NULL # was genomewideline=-log10(5e-8)
29        if (genomewide) { # use bonferroni since might be only a small region?
30            genomewideline = -log10(0.05/length(pvals)) }
31        d=data.frame(CHR=chrom,BP=offset,P=pvals)
32
33        #limit to only chrs 1-23?
34        d=d[d$CHR %in% 1:23, ]
35
36        if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
37                d=na.omit(d)
38                d=d[d$P>0 & d$P<=1, ]
39                d$logp = -log10(d$P)
40
41                d$pos=NA
42                ticks=NULL
43                lastbase=0
44                chrlist = unique(d$CHR)
45                nchr = length(chrlist) # may be any number?
46                if (nchr >= 2) {
47                for (x in c(1:nchr)) {
48                        i = chrlist[x] # need the chrom number - may not == index
49                        if (x == 1) { # first time
50                                d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP
51                                tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]
52                        }       else {
53                                lastchr = chrlist[x-1] # previous whatever the list
54                                lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1)
55                                d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase
56                                tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
57                        }
58                    ticklim=c(min(d$pos),max(d$pos))
59                    xlabs = chrlist
60                    }
61                } else { # nchr is 1
62                   nticks = 10
63                   last = max(offset)
64                   first = min(offset)
65                   tks = c()
66                   t = (last-first)/nticks # units per tick
67                   for (x in c(1:nticks)) tks = c(tks,round(x*t))
68                   xlabs = tks
69                   ticklim = c(first,last)
70                } # else
71                if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
72                           } else {
73                           mycols=rep(coloursTouse,max(d$CHR))
74                           }
75
76                if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y
77                maxy = max(maxy,1.1*genomewideline)
78                # if (maxy<8) maxy=8
79                # only makes sense if genome wide is assumed - we could have a fine mapping region? 
80                if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
81                if (nchr >= 2) {
82                        manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
83                        manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs) }
84                else {
85                        manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
86                        manplot=manplot+scale_x_continuous("BP") }                 
87                manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
88                manplot=manplot+scale_colour_manual(value=mycols)
89                if (annotate) {  manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) }
90                manplot=manplot + opts(legend.position = "none")
91                manplot=manplot + opts(title=title)
92                manplot=manplot+opts(
93                        panel.background=theme_blank(),
94                        axis.text.x=theme_text(size=size.x.labels, colour="grey50"),
95                        axis.text.y=theme_text(size=size.y.labels, colour="grey50"),
96                        axis.ticks=theme_segment(colour=NA)
97                )
98                #manplot = manplot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank())
99                #manplot = manplot + opts(panel.grid.major=theme_blank())
100                 
101                if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
102                if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red")
103                manplot
104        }       else {
105                stop("Make sure your data frame contains columns CHR, BP, and P")
106        }
107}
108
109
110
111qq = function(pvector, title=NULL, spartan=F) {
112        # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
113        o = -log10(sort(pvector,decreasing=F))
114        e = -log10( 1:length(o)/length(o) )
115        # you could use base graphics
116        # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))),
117        # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
118        # lines(e,e,col="red")
119        #You'll need ggplot2 installed to do the rest
120        qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
121        qq=qq+opts(title=title)
122        qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
123        qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
124        if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
125        qq
126}
127rgqqMan = function(infile="/tmp/rgManQQtempcWfFkc",chromcolumn=1, offsetcolumn=2, pvalscolumns=c(3),
128title="rgManQQtest1",grey=0) {
129rawd = read.table(infile,head=T,sep='\t')
130dn = names(rawd)
131cc = dn[chromcolumn]
132oc = dn[offsetcolumn]
133nams = c(cc,oc)
134plen = length(rawd[,1])
135doreorder=1
136print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
137if (plen > 0) {
138  for (pvalscolumn in pvalscolumns) {
139  if (pvalscolumn > 0)
140     {
141     cname = names(rawd)[pvalscolumn]
142     mytitle = paste('p=',cname,', ',title,sep='')
143     myfname = chartr(' ','_',cname)
144     myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
145     ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=11,height=8,dpi=100)
146     print(paste('## qqplot on',cname,'done'))
147     if ((chromcolumn > 0) & (offsetcolumn > 0)) {
148         if (doreorder) {
149             rawd = rawd[do.call(order,rawd[nams]),]
150             # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
151             # in case not yet ordered
152             doreorder = 0
153             }
154         print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
155         mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
156         print(paste('## manhattan plot on',cname,'done'))
157         ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100)
158         }
159         else {
160              print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
161              'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
162              }
163     }
164  else {
165        print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
166      }
167  } # for pvalscolumn
168 } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
169}
170
171rgqqMan()
172# execute with defaults as substituted
173
174#R script autogenerated by rgenetics/rgutils.py on 18/09/2010 20:46:49
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。