| 1 | # A program to implement the non-pooled t-test for two samples where the alternative hypothesis is two-sided or one-sided. |
|---|
| 2 | # The first input file is a TABULAR format file representing the first sample and consisting of one column only. |
|---|
| 3 | # The second input file is a TABULAR format file representing the first sample nd consisting of one column only. |
|---|
| 4 | # The third input is the sidedness of the t-test: either two-sided or, one-sided with m1 less than m2 or, |
|---|
| 5 | # one-sided with m1 greater than m2. |
|---|
| 6 | # The fourth input is the equality status of the standard deviations of both populations |
|---|
| 7 | # The output file is a TXT file representing the result of the two sample t-test. |
|---|
| 8 | |
|---|
| 9 | use strict; |
|---|
| 10 | use warnings; |
|---|
| 11 | |
|---|
| 12 | #variable to handle the motif information |
|---|
| 13 | my $motif; |
|---|
| 14 | my $motifName = ""; |
|---|
| 15 | my $motifNumber = 0; |
|---|
| 16 | my $totalMotifsNumber = 0; |
|---|
| 17 | my @motifNamesArray = (); |
|---|
| 18 | |
|---|
| 19 | # check to make sure having correct files |
|---|
| 20 | my $usage = "usage: non_pooled_t_test_two_samples_galaxy.pl [TABULAR.in] [TABULAR.in] [testSidedness] [standardDeviationEquality] [TXT.out] \n"; |
|---|
| 21 | die $usage unless @ARGV == 5; |
|---|
| 22 | |
|---|
| 23 | #get the input arguments |
|---|
| 24 | my $firstSampleInputFile = $ARGV[0]; |
|---|
| 25 | my $secondSampleInputFile = $ARGV[1]; |
|---|
| 26 | my $testSidedness = $ARGV[2]; |
|---|
| 27 | my $standardDeviationEquality = $ARGV[3]; |
|---|
| 28 | my $outputFile = $ARGV[4]; |
|---|
| 29 | |
|---|
| 30 | #open the input files |
|---|
| 31 | open (INPUT1, "<", $firstSampleInputFile) || die("Could not open file $firstSampleInputFile \n"); |
|---|
| 32 | open (INPUT2, "<", $secondSampleInputFile) || die("Could not open file $secondSampleInputFile \n"); |
|---|
| 33 | open (OUTPUT, ">", $outputFile) || die("Could not open file $outputFile \n"); |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | #variables to store the name of the R script file |
|---|
| 37 | my $r_script; |
|---|
| 38 | |
|---|
| 39 | # R script to implement the two-sample test on the motif frequencies in upstream flanking region |
|---|
| 40 | #construct an R script file and save it in the same directory where the perl file is located |
|---|
| 41 | $r_script = "non_pooled_t_test_two_samples.r"; |
|---|
| 42 | |
|---|
| 43 | open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n"; |
|---|
| 44 | print Rcmd " |
|---|
| 45 | sampleTable1 <- read.table(\"$firstSampleInputFile\", header=FALSE); |
|---|
| 46 | sample1 <- sampleTable1[, 1]; |
|---|
| 47 | |
|---|
| 48 | sampleTable2 <- read.table(\"$secondSampleInputFile\", header=FALSE); |
|---|
| 49 | sample2 <- sampleTable2[, 1]; |
|---|
| 50 | |
|---|
| 51 | testSideStatus <- \"$testSidedness\"; |
|---|
| 52 | STEqualityStatus <- \"$standardDeviationEquality\"; |
|---|
| 53 | |
|---|
| 54 | #open the output a text file |
|---|
| 55 | sink(file = \"$outputFile\"); |
|---|
| 56 | |
|---|
| 57 | #check if the t-test is two-sided |
|---|
| 58 | if (testSideStatus == \"two-sided\"){ |
|---|
| 59 | |
|---|
| 60 | #check if the standard deviations are equal in both populations |
|---|
| 61 | if (STEqualityStatus == \"equal\"){ |
|---|
| 62 | #two-sample t-test where standard deviations are assumed to be unequal, the test is two-sided |
|---|
| 63 | testResult <- t.test(sample1, sample2, var.equal = TRUE); |
|---|
| 64 | } else{ |
|---|
| 65 | #two-sample t-test where standard deviations are assumed to be unequal, the test is two-sided |
|---|
| 66 | testResult <- t.test(sample1, sample2, var.equal = FALSE); |
|---|
| 67 | } |
|---|
| 68 | } else{ #the t-test is one sided |
|---|
| 69 | |
|---|
| 70 | #check if the t-test is two-sided with m1 < m2 |
|---|
| 71 | if (testSideStatus == \"one-sided:_m1_less_than_m2\"){ |
|---|
| 72 | |
|---|
| 73 | #check if the standard deviations are equal in both populations |
|---|
| 74 | if (STEqualityStatus == \"equal\"){ |
|---|
| 75 | #two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2 |
|---|
| 76 | testResult <- t.test(sample1, sample2, var.equal = TRUE, alternative = \"less\"); |
|---|
| 77 | } else{ |
|---|
| 78 | #two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2 |
|---|
| 79 | testResult <- t.test(sample1, sample2, var.equal = FALSE, alternative = \"less\"); |
|---|
| 80 | } |
|---|
| 81 | } else{ #the t-test is one-sided with m1 > m2 |
|---|
| 82 | #check if the standard deviations are equal in both populations |
|---|
| 83 | if (STEqualityStatus == \"equal\"){ |
|---|
| 84 | #two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2 |
|---|
| 85 | testResult <- t.test(sample1, sample2, var.equal = TRUE, alternative = \"greater\"); |
|---|
| 86 | } else{ |
|---|
| 87 | #two-sample t-test where standard deviations are assumed to be unequal, the test is one-sided: Halt: m1 < m2 |
|---|
| 88 | testResult <- t.test(sample1, sample2, var.equal = FALSE, alternative = \"greater\"); |
|---|
| 89 | } |
|---|
| 90 | } |
|---|
| 91 | } |
|---|
| 92 | |
|---|
| 93 | #save the output of the t-test into the output text file |
|---|
| 94 | testResult; |
|---|
| 95 | |
|---|
| 96 | #close the output text file |
|---|
| 97 | sink(); |
|---|
| 98 | |
|---|
| 99 | #eof" . "\n"; |
|---|
| 100 | |
|---|
| 101 | close Rcmd; |
|---|
| 102 | |
|---|
| 103 | system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); |
|---|
| 104 | |
|---|
| 105 | #close the input and output files |
|---|
| 106 | close(OUTPUT); |
|---|
| 107 | close(INPUT2); |
|---|
| 108 | close(INPUT1); |
|---|
| 109 | |
|---|