[2] | 1 | #!/usr/bin/env perl |
---|
| 2 | |
---|
| 3 | use strict; |
---|
| 4 | use warnings; |
---|
| 5 | |
---|
| 6 | ################################################################## |
---|
| 7 | # Select genes that are associated with the diseases listed in the |
---|
| 8 | # disease ontology. |
---|
| 9 | # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page |
---|
| 10 | # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/ |
---|
| 11 | # Sept 2010, switch to doLite |
---|
| 12 | # input: build outfile sourceFileLoc.loc term or partial term |
---|
| 13 | ################################################################## |
---|
| 14 | |
---|
| 15 | if (!@ARGV or @ARGV < 3) { |
---|
| 16 | print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n"; |
---|
| 17 | exit; |
---|
| 18 | } |
---|
| 19 | |
---|
| 20 | my $build = shift @ARGV; |
---|
| 21 | my $out = shift @ARGV; |
---|
| 22 | my $in = shift @ARGV; |
---|
| 23 | my $term = shift @ARGV; |
---|
| 24 | $term =~ s/^'//; #remove quotes protecting from shell |
---|
| 25 | $term =~ s/'$//; |
---|
| 26 | my $data; |
---|
| 27 | open(LOC, $in) or die "Couldn't open $in, $!\n"; |
---|
| 28 | while (<LOC>) { |
---|
| 29 | chomp; |
---|
| 30 | if (/^\s*#/) { next; } |
---|
| 31 | my @f = split(/\t/); |
---|
| 32 | if ($f[0] eq $build) { |
---|
| 33 | if ($f[1] eq 'disease associated genes') { |
---|
| 34 | $data = $f[2]; |
---|
| 35 | } |
---|
| 36 | } |
---|
| 37 | } |
---|
| 38 | close LOC or die "Couldn't close $in, $!\n"; |
---|
| 39 | if (!$data) { |
---|
| 40 | print "Error $build not found in $in\n"; |
---|
| 41 | exit; |
---|
| 42 | } |
---|
| 43 | if (!defined $term) { |
---|
| 44 | print "No disease term entered\n"; |
---|
| 45 | exit; |
---|
| 46 | } |
---|
| 47 | |
---|
| 48 | #start with just fuzzy term matches |
---|
| 49 | open(OUT, ">", $out) or die "Couldn't open $out, $!\n"; |
---|
| 50 | open(FH, $data) or die "Couldn't open data file $data, $!\n"; |
---|
| 51 | $term =~ s/\s+/|/g; #use OR between words |
---|
| 52 | while (<FH>) { |
---|
| 53 | chomp; |
---|
| 54 | my @f = split(/\t/); #chrom start end strand geneName geneID disease |
---|
| 55 | if ($f[6] =~ /($term)/i) { |
---|
| 56 | print OUT join("\t", @f), "\n"; |
---|
| 57 | }elsif ($term eq 'disease') { #print all with disease |
---|
| 58 | print OUT join("\t", @f), "\n"; |
---|
| 59 | } |
---|
| 60 | } |
---|
| 61 | close FH or die "Couldn't close data file $data, $!\n"; |
---|
| 62 | close OUT or die "Couldn't close $out, $!\n"; |
---|
| 63 | |
---|
| 64 | exit; |
---|