| 1 | #!/usr/bin/env perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | |
|---|
| 6 | ################################################################## |
|---|
| 7 | # Select genes that are associated with the diseases listed in the |
|---|
| 8 | # disease ontology. |
|---|
| 9 | # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page |
|---|
| 10 | # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/ |
|---|
| 11 | # Sept 2010, switch to doLite |
|---|
| 12 | # input: build outfile sourceFileLoc.loc term or partial term |
|---|
| 13 | ################################################################## |
|---|
| 14 | |
|---|
| 15 | if (!@ARGV or @ARGV < 3) { |
|---|
| 16 | print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n"; |
|---|
| 17 | exit; |
|---|
| 18 | } |
|---|
| 19 | |
|---|
| 20 | my $build = shift @ARGV; |
|---|
| 21 | my $out = shift @ARGV; |
|---|
| 22 | my $in = shift @ARGV; |
|---|
| 23 | my $term = shift @ARGV; |
|---|
| 24 | $term =~ s/^'//; #remove quotes protecting from shell |
|---|
| 25 | $term =~ s/'$//; |
|---|
| 26 | my $data; |
|---|
| 27 | open(LOC, $in) or die "Couldn't open $in, $!\n"; |
|---|
| 28 | while (<LOC>) { |
|---|
| 29 | chomp; |
|---|
| 30 | if (/^\s*#/) { next; } |
|---|
| 31 | my @f = split(/\t/); |
|---|
| 32 | if ($f[0] eq $build) { |
|---|
| 33 | if ($f[1] eq 'disease associated genes') { |
|---|
| 34 | $data = $f[2]; |
|---|
| 35 | } |
|---|
| 36 | } |
|---|
| 37 | } |
|---|
| 38 | close LOC or die "Couldn't close $in, $!\n"; |
|---|
| 39 | if (!$data) { |
|---|
| 40 | print "Error $build not found in $in\n"; |
|---|
| 41 | exit; |
|---|
| 42 | } |
|---|
| 43 | if (!defined $term) { |
|---|
| 44 | print "No disease term entered\n"; |
|---|
| 45 | exit; |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | #start with just fuzzy term matches |
|---|
| 49 | open(OUT, ">", $out) or die "Couldn't open $out, $!\n"; |
|---|
| 50 | open(FH, $data) or die "Couldn't open data file $data, $!\n"; |
|---|
| 51 | $term =~ s/\s+/|/g; #use OR between words |
|---|
| 52 | while (<FH>) { |
|---|
| 53 | chomp; |
|---|
| 54 | my @f = split(/\t/); #chrom start end strand geneName geneID disease |
|---|
| 55 | if ($f[6] =~ /($term)/i) { |
|---|
| 56 | print OUT join("\t", @f), "\n"; |
|---|
| 57 | }elsif ($term eq 'disease') { #print all with disease |
|---|
| 58 | print OUT join("\t", @f), "\n"; |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | close FH or die "Couldn't close data file $data, $!\n"; |
|---|
| 62 | close OUT or die "Couldn't close $out, $!\n"; |
|---|
| 63 | |
|---|
| 64 | exit; |
|---|