1 | #!/usr/bin/env perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | #expected input: path to file, cols of counts (2 sets of 3), threshold |
---|
7 | if (!@ARGV or scalar @ARGV != 9) { |
---|
8 | print "usage snpFreq.pl /path/to/snps.txt <6 column numbers(1 based) with counts for alleles, first one group then another> #threshold outfile\n"; |
---|
9 | exit 1; |
---|
10 | } |
---|
11 | |
---|
12 | #get and verify inputs |
---|
13 | my $file = shift @ARGV; |
---|
14 | my $a1 = shift @ARGV; |
---|
15 | if ($a1 =~ /\D/ or $a1 < 1) { |
---|
16 | print "Error the column number, must be an integer greater than or equal to 1. Got $a1\n"; |
---|
17 | exit 1; |
---|
18 | } |
---|
19 | my $a2 = shift @ARGV; |
---|
20 | if ($a2 =~ /\D/ or $a2 < 1) { |
---|
21 | print "Error the column number, must be an integer greater than or equal to 1. Got $a2\n"; |
---|
22 | exit 1; |
---|
23 | } |
---|
24 | my $a3 = shift @ARGV; |
---|
25 | if ($a3 =~ /\D/ or $a3 < 1) { |
---|
26 | print "Error the column number, must be an integer greater than or equal to 1. Got $a3\n"; |
---|
27 | exit 1; |
---|
28 | } |
---|
29 | my $b1 = shift @ARGV; |
---|
30 | if ($b1 =~ /\D/ or $b1 < 1) { |
---|
31 | print "Error the column number, must be an integer greater than or equal to 1. Got $b1\n"; |
---|
32 | exit 1; |
---|
33 | } |
---|
34 | my $b2 = shift @ARGV; |
---|
35 | if ($b2 =~ /\D/ or $b2 < 1) { |
---|
36 | print "Error the column number, must be an integer greater than or equal to 1. Got $b2\n"; |
---|
37 | exit 1; |
---|
38 | } |
---|
39 | my $b3 = shift @ARGV; |
---|
40 | if ($b3 =~ /\D/ or $b3 < 1) { |
---|
41 | print "Error the column number, must be an integer greater than or equal to 1. Got $b3\n"; |
---|
42 | exit 1; |
---|
43 | } |
---|
44 | my $thresh = shift @ARGV; |
---|
45 | if ($thresh !~ /^\d*\.?\d+$/) { |
---|
46 | print "Error the threshold must be a number. Got $thresh\n"; |
---|
47 | exit 1; |
---|
48 | }elsif ($thresh > .3) { |
---|
49 | print "Error the threshold can not be greater than 0.3 got $thresh\n"; |
---|
50 | exit 1; |
---|
51 | } |
---|
52 | my $outfile = shift @ARGV; |
---|
53 | |
---|
54 | #run a fishers exact test (using R) on whole table |
---|
55 | my $cmd = qq|options(warn=-1) |
---|
56 | tab <- read.table('$file', sep="\t") |
---|
57 | size <- length(tab[,1]) |
---|
58 | width <- length(tab[1,]) |
---|
59 | x <- 1:size |
---|
60 | y <- matrix(data=0, nr=size, nc=6) |
---|
61 | for(i in 1:size) { |
---|
62 | m <- matrix(c(tab[i,$a1], tab[i,$b1], tab[i,$a2], tab[i,$b2], tab[i,$a3], tab[i,$b3]), nrow=2) |
---|
63 | t <- fisher.test(m) |
---|
64 | x[i] <- t\$p.value |
---|
65 | if (x[i] >= 1) { |
---|
66 | x[i] <- .999 |
---|
67 | } |
---|
68 | n <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3] + tab[i,$b1] + tab[i,$b2] + tab[i,$b3]) |
---|
69 | n_a <- (tab[i,$a1] + tab[i,$a2] + tab[i,$a3]) |
---|
70 | y[i,1] <- ((tab[i,$a1] + tab[i,$b1])*(n_a))/n |
---|
71 | y[i,1] <- round(y[i,1],3) |
---|
72 | y[i,2] <- ((tab[i,$a2] + tab[i,$b2])*(n_a))/n |
---|
73 | y[i,2] <- round(y[i,2],3) |
---|
74 | y[i,3] <- ((tab[i,$a3] + tab[i,$b3])*(n_a))/n |
---|
75 | y[i,3] <- round(y[i,3],3) |
---|
76 | n_b <- (tab[i,$b1] + tab[i,$b2] + tab[i,$b3]) |
---|
77 | y[i,4] <- ((tab[i,$a1] + tab[i,$b1])*(n_b))/n |
---|
78 | y[i,4] <- round(y[i,4],3) |
---|
79 | y[i,5] <- ((tab[i,$a2] + tab[i,$b2])*(n_b))/n |
---|
80 | y[i,5] <- round(y[i,5],3) |
---|
81 | y[i,6] <- ((tab[i,$a3] + tab[i,$b3])*(n_b))/n |
---|
82 | y[i,6] <- round(y[i,6],3) |
---|
83 | }|; |
---|
84 | #results <- data.frame(tab[1:size,1:width], x[1:size]) |
---|
85 | #write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
86 | #q()|; |
---|
87 | |
---|
88 | my $cmd2 = qq|suppressPackageStartupMessages(library(qvalue)) |
---|
89 | qobj <- qvalue(x[1:size], lambda=seq(0,0.90,$thresh), pi0.method="bootstrap", fdr.level=0.1, robust=FALSE, smooth.log.pi0 = FALSE) |
---|
90 | q <- qobj\$qvalues |
---|
91 | results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size], q[1:size]) |
---|
92 | write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
93 | q()|; |
---|
94 | |
---|
95 | #for TESTING |
---|
96 | my $pr = qq|results <- data.frame(tab[1:size,1:width], y[1:size,1:6], x[1:size]) |
---|
97 | write.table(results, file="$outfile", row.names = FALSE ,col.names = FALSE,quote = FALSE, sep="\t") |
---|
98 | q()|; |
---|
99 | |
---|
100 | open(FT, "| R --slave --vanilla") |
---|
101 | or die "Couldn't call fisher.text, $!\n"; |
---|
102 | print FT $cmd, "\n"; #fisher test |
---|
103 | print FT $cmd2, "\n"; #qvalues and results |
---|
104 | #print FT $pr, "\n"; |
---|
105 | close FT or die "Couldn't finish fisher.test, $!\n"; |
---|
106 | |
---|
107 | exit; |
---|