1 | #!/usr/bin/env bash |
---|
2 | |
---|
3 | input_file=$1 |
---|
4 | output_file=$2 |
---|
5 | org=$3 |
---|
6 | db_loc=$4 |
---|
7 | chrom_col=$5 |
---|
8 | pos_col=$6 |
---|
9 | base=$7 |
---|
10 | allele_col=$8 |
---|
11 | strand_col=$9 |
---|
12 | output_opts=${10} |
---|
13 | |
---|
14 | working_dir=$PWD |
---|
15 | sift_input="$working_dir/sift_input.txt" |
---|
16 | sift_output="$working_dir/sift_output.txt" |
---|
17 | |
---|
18 | |
---|
19 | ## |
---|
20 | ## get/check the db directory from the argument org,db_loc |
---|
21 | ## |
---|
22 | db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc ) |
---|
23 | |
---|
24 | if [ -z "$db_dir" ]; then |
---|
25 | echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2 |
---|
26 | exit 1 |
---|
27 | fi |
---|
28 | |
---|
29 | if [ ! -d "$db_dir" ]; then |
---|
30 | echo "Can't access SIFT database directory \"$db_dir\"" 1>&2 |
---|
31 | exit 1 |
---|
32 | fi |
---|
33 | |
---|
34 | ## |
---|
35 | ## create input file for SIFT_exome_nssnvs.pl |
---|
36 | ## |
---|
37 | if [ ! -r "$input_file" ]; then |
---|
38 | echo "Can't read input file \"$input_file\"" 1>&2 |
---|
39 | exit 1 |
---|
40 | fi |
---|
41 | |
---|
42 | if [ $base -eq 0 ]; then |
---|
43 | beg_col="$pos_col" |
---|
44 | end_col="$pos_col + 1" |
---|
45 | pos_adj='$2 = $2 - 1' |
---|
46 | else |
---|
47 | beg_col="$pos_col - 1" |
---|
48 | end_col="$pos_col" |
---|
49 | pos_adj='' |
---|
50 | fi |
---|
51 | |
---|
52 | strand_cvt='' |
---|
53 | if [ \( "$strand_col" = "+" \) ]; then |
---|
54 | strand='"1"' |
---|
55 | elif [ \( "$strand_col" = "-" \) ]; then |
---|
56 | strand='"-1"' |
---|
57 | else |
---|
58 | strand="\$$strand_col" |
---|
59 | strand_cvt='if ( '"${strand}"' == "+") { '"${strand}"' = "1" } else if ( '"${strand}"' == "-") { '"${strand}"' = "-1"}' |
---|
60 | fi |
---|
61 | |
---|
62 | awk ' |
---|
63 | BEGIN {FS="\t";OFS=","} |
---|
64 | { |
---|
65 | $'"${chrom_col}"' = tolower($'"${chrom_col}"') |
---|
66 | sub(/^chr/, "", $'"${chrom_col}"') |
---|
67 | '"${strand_cvt}"' |
---|
68 | print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"' |
---|
69 | } |
---|
70 | ' "$input_file" > "$sift_input" |
---|
71 | |
---|
72 | ## |
---|
73 | ## run SIFT variants command line program |
---|
74 | ## |
---|
75 | if [ "$output_opts" = "None" ]; then |
---|
76 | output_opts="" |
---|
77 | else |
---|
78 | output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' ) |
---|
79 | output_opts="-$output_opts 1" |
---|
80 | fi |
---|
81 | |
---|
82 | SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output" |
---|
83 | if [ $? -ne 0 ]; then |
---|
84 | echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts" |
---|
85 | exit 1 |
---|
86 | fi |
---|
87 | |
---|
88 | ## |
---|
89 | ## locate the output file |
---|
90 | ## |
---|
91 | sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" ) |
---|
92 | |
---|
93 | if [ -z "$sift_pid" ]; then |
---|
94 | echo "Can't find SIFT pid in \"$sift_output\"" 1>&2 |
---|
95 | exit 1 |
---|
96 | fi |
---|
97 | |
---|
98 | sift_outdir="$working_dir/$sift_pid" |
---|
99 | if [ ! -d "$sift_outdir" ]; then |
---|
100 | echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2 |
---|
101 | exit 1 |
---|
102 | fi |
---|
103 | |
---|
104 | sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv" |
---|
105 | if [ ! -r "$sift_outfile" ]; then |
---|
106 | echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2 |
---|
107 | exit 1 |
---|
108 | fi |
---|
109 | |
---|
110 | ## |
---|
111 | ## create output file |
---|
112 | ## |
---|
113 | awk ' |
---|
114 | BEGIN {FS="\t";OFS="\t"} |
---|
115 | NR == 1 { |
---|
116 | $12 = "Num seqs at position" |
---|
117 | $1 = "Chrom\tPosition\tStrand\tAllele" |
---|
118 | print |
---|
119 | } |
---|
120 | NR != 1 { |
---|
121 | $1 = "chr" $1 |
---|
122 | gsub(/,/, "\t", $1) |
---|
123 | print |
---|
124 | } |
---|
125 | ' "$sift_outfile" | awk ' |
---|
126 | BEGIN {FS="\t";OFS="\t"} |
---|
127 | NR == 1 { |
---|
128 | print "#" $0 |
---|
129 | } |
---|
130 | NR != 1 { |
---|
131 | if ($3 == "1") { $3 = "+" } else if ($3 == "-1") { $3 = "-" } |
---|
132 | '"${pos_adj}"' |
---|
133 | print |
---|
134 | } |
---|
135 | ' > "$output_file" |
---|
136 | |
---|
137 | ## |
---|
138 | ## cleanup |
---|
139 | ## |
---|
140 | rm -rf "$sift_outdir" "$sift_input" "$sift_output" |
---|
141 | |
---|