1 | #!/usr/bin/env perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | |
---|
6 | ################################################################## |
---|
7 | # Select genes that are associated with the diseases listed in the |
---|
8 | # disease ontology. |
---|
9 | # ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page |
---|
10 | # gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/ |
---|
11 | # Sept 2010, switch to doLite |
---|
12 | # input: build outfile sourceFileLoc.loc term or partial term |
---|
13 | ################################################################## |
---|
14 | |
---|
15 | if (!@ARGV or @ARGV < 3) { |
---|
16 | print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n"; |
---|
17 | exit; |
---|
18 | } |
---|
19 | |
---|
20 | my $build = shift @ARGV; |
---|
21 | my $out = shift @ARGV; |
---|
22 | my $in = shift @ARGV; |
---|
23 | my $term = shift @ARGV; |
---|
24 | $term =~ s/^'//; #remove quotes protecting from shell |
---|
25 | $term =~ s/'$//; |
---|
26 | my $data; |
---|
27 | open(LOC, $in) or die "Couldn't open $in, $!\n"; |
---|
28 | while (<LOC>) { |
---|
29 | chomp; |
---|
30 | if (/^\s*#/) { next; } |
---|
31 | my @f = split(/\t/); |
---|
32 | if ($f[0] eq $build) { |
---|
33 | if ($f[1] eq 'disease associated genes') { |
---|
34 | $data = $f[2]; |
---|
35 | } |
---|
36 | } |
---|
37 | } |
---|
38 | close LOC or die "Couldn't close $in, $!\n"; |
---|
39 | if (!$data) { |
---|
40 | print "Error $build not found in $in\n"; |
---|
41 | exit; |
---|
42 | } |
---|
43 | if (!defined $term) { |
---|
44 | print "No disease term entered\n"; |
---|
45 | exit; |
---|
46 | } |
---|
47 | |
---|
48 | #start with just fuzzy term matches |
---|
49 | open(OUT, ">", $out) or die "Couldn't open $out, $!\n"; |
---|
50 | open(FH, $data) or die "Couldn't open data file $data, $!\n"; |
---|
51 | $term =~ s/\s+/|/g; #use OR between words |
---|
52 | while (<FH>) { |
---|
53 | chomp; |
---|
54 | my @f = split(/\t/); #chrom start end strand geneName geneID disease |
---|
55 | if ($f[6] =~ /($term)/i) { |
---|
56 | print OUT join("\t", @f), "\n"; |
---|
57 | }elsif ($term eq 'disease') { #print all with disease |
---|
58 | print OUT join("\t", @f), "\n"; |
---|
59 | } |
---|
60 | } |
---|
61 | close FH or die "Couldn't close data file $data, $!\n"; |
---|
62 | close OUT or die "Couldn't close $out, $!\n"; |
---|
63 | |
---|
64 | exit; |
---|