[2] | 1 | #!/usr/bin/env perl |
---|
| 2 | |
---|
| 3 | use strict; |
---|
| 4 | use warnings; |
---|
| 5 | |
---|
| 6 | #expected input: path to file, cols of counts (2 sets of 3), threshold |
---|
| 7 | if (!@ARGV or scalar @ARGV != 9) { |
---|
| 8 | print "usage snpFreq.pl /path/to/snps.txt <6 column numbers(1 based) with counts for alleles, first one group then another> #threshold outfile\n"; |
---|
| 9 | exit 1; |
---|
| 10 | } |
---|
| 11 | |
---|
| 12 | #get and verify inputs |
---|
| 13 | my $file = shift @ARGV; |
---|
| 14 | my $a1 = shift @ARGV; |
---|
| 15 | if ($a1 =~ /\D/ or $a1 < 1) { |
---|
| 16 | print "Error the column number, must be an integer greater than or equal to 1. Got $a1\n"; |
---|
| 17 | exit 1; |
---|
| 18 | } |
---|
| 19 | my $a2 = shift @ARGV; |
---|
| 20 | if ($a2 =~ /\D/ or $a2 < 1) { |
---|
| 21 | print "Error the column number, must be an integer greater than or equal to 1. Got $a2\n"; |
---|
| 22 | exit 1; |
---|
| 23 | } |
---|
| 24 | my $a3 = shift @ARGV; |
---|
| 25 | if ($a3 =~ /\D/ or $a3 < 1) { |
---|
| 26 | print "Error the column number, must be an integer greater than or equal to 1. Got $a3\n"; |
---|
| 27 | exit 1; |
---|
| 28 | } |
---|
| 29 | my $b1 = shift @ARGV; |
---|
| 30 | if ($b1 =~ /\D/ or $b1 < 1) { |
---|
| 31 | print "Error the column number, must be an integer greater than or equal to 1. Got $b1\n"; |
---|
| 32 | exit 1; |
---|
| 33 | } |
---|
| 34 | my $b2 = shift @ARGV; |
---|
| 35 | if ($b2 =~ /\D/ or $b2 < 1) { |
---|
| 36 | print "Error the column number, must be an integer greater than or equal to 1. Got $b2\n"; |
---|
| 37 | exit 1; |
---|
| 38 | } |
---|
| 39 | my $b3 = shift @ARGV; |
---|
| 40 | if ($b3 =~ /\D/ or $b3 < 1) { |
---|
| 41 | print "Error the column number, must be an integer greater than or equal to 1. Got $b3\n"; |
---|
| 42 | exit 1; |
---|
| 43 | } |
---|
| 44 | my $thresh = shift @ARGV; |
---|
| 45 | if ($thresh !~ /^\d*\.?\d+$/) { |
---|
| 46 | print "Error the threshold must be a number. Got $thresh\n"; |
---|
| 47 | exit 1; |
---|
| 48 | }elsif ($thresh > .3) { |
---|
| 49 | print "Error the threshold can not be greater than 0.3 got $thresh\n"; |
---|
| 50 | exit 1; |
---|
| 51 | } |
---|
| 52 | my $outfile = shift @ARGV; |
---|
| 53 | |
---|
| 54 | #run a fishers exact test (using R) on whole table |
---|
| 55 | my $cmd = qq|options(warn=-1) |
---|
| 56 | tab <- read.table('$file', sep="\t") |
---|
| 57 | size <- length(tab[,1]) |
---|
| 58 | width <- length(tab[1,]) |
---|
| 59 | x <- 1:size |
---|
| 60 | y <- matrix(data=0, nr=size, nc=6) |
---|
| 61 | for(i in 1:size) { |
---|
| 62 | m <- matrix(c(tab[i,$a1], tab[i,$b1], tab[i,$a2], tab[i,$b2], tab[i,$a3], tab[i,$b3]), nrow=2) |
---|
| 63 | t <- fisher.test(m) |
---|
| 64 | x[i] <- t\$p.value |
---|
| 65 | if (x[i] >= 1) { |
---|
| 66 | x[i] <- .999 |
---|
| 67 | } |
---|
| 68 | n <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3] + tab[i,$b1] + tab[i,$b2] + tab[i,$b3]) |
---|
| 69 | n_a <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3]) |
---|
| 70 | y[i,1] <- ((tab[i,$a1] + tab[i,$b1])*(n_a))/n |
---|
| 71 | y[i,1] <- round(y[i,1],3) |
---|
| 72 | y[i,2] <- ((tab[i,$a2] + tab[i,$b2])*(n_a))/n |
---|
| 73 | y[i,2] <- round(y[i,2],3) |
---|
| 74 | y[i,3] <- ((tab[i,$a3] + tab[i,$b3])*(n_a))/n |
---|
| 75 | y[i,3] <- round(y[i,3],3) |
---|
| 76 | n_b <- (tab[i,$b1] + tab[i,$b2] + tab[i,$b3]) |
---|
| 77 | y[i,4] <- ((tab[i,$a1] + tab[i,$b1])*(n_b))/n |
---|
| 78 | y[i,4] <- round(y[i,4],3) |
---|
| 79 | y[i,5] <- ((tab[i,$a2] + tab[i,$b2])*(n_b))/n |
---|
| 80 | y[i,5] <- round(y[i,5],3) |
---|
| 81 | y[i,6] <- ((tab[i,$a3] + tab[i,$b3])*(n_b))/n |
---|
| 82 | y[i,6] <- round(y[i,6],3) |
---|
| 83 | }|; |
---|
| 84 | #results <- data.frame(tab[1:size,1:width], x[1:size]) |
---|
| 85 | #write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
| 86 | #q()|; |
---|
| 87 | |
---|
| 88 | my $cmd2 = qq|suppressPackageStartupMessages(library(qvalue)) |
---|
| 89 | qobj <- qvalue(x[1:size], lambda=seq(0,0.90,$thresh), pi0.method="bootstrap", fdr.level=0.1, robust=FALSE, smooth.log.pi0 = FALSE) |
---|
| 90 | q <- qobj\$qvalues |
---|
| 91 | results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size], q[1:size]) |
---|
| 92 | write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
| 93 | q()|; |
---|
| 94 | |
---|
| 95 | #for TESTING |
---|
| 96 | my $pr = qq|results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size]) |
---|
| 97 | write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
| 98 | q()|; |
---|
| 99 | |
---|
| 100 | open(FT, "| R --slave --vanilla") |
---|
| 101 | or die "Couldn't call fisher.text, $!\n"; |
---|
| 102 | print FT $cmd, "\n"; #fisher test |
---|
| 103 | print FT $cmd2, "\n"; #qvalues and results |
---|
| 104 | #print FT $pr, "\n"; |
---|
| 105 | close FT or die "Couldn't finish fisher.test, $!\n"; |
---|
| 106 | |
---|
| 107 | exit; |
---|