[2] | 1 | #!/usr/bin/env bash |
---|
| 2 | |
---|
| 3 | input_file=$1 |
---|
| 4 | output_file=$2 |
---|
| 5 | org=$3 |
---|
| 6 | db_loc=$4 |
---|
| 7 | chrom_col=$5 |
---|
| 8 | pos_col=$6 |
---|
| 9 | base=$7 |
---|
| 10 | allele_col=$8 |
---|
| 11 | strand_col=$9 |
---|
| 12 | output_opts=${10} |
---|
| 13 | |
---|
| 14 | working_dir=$PWD |
---|
| 15 | sift_input="$working_dir/sift_input.txt" |
---|
| 16 | sift_output="$working_dir/sift_output.txt" |
---|
| 17 | |
---|
| 18 | |
---|
| 19 | ## |
---|
| 20 | ## get/check the db directory from the argument org,db_loc |
---|
| 21 | ## |
---|
| 22 | db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc ) |
---|
| 23 | |
---|
| 24 | if [ -z "$db_dir" ]; then |
---|
| 25 | echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2 |
---|
| 26 | exit 1 |
---|
| 27 | fi |
---|
| 28 | |
---|
| 29 | if [ ! -d "$db_dir" ]; then |
---|
| 30 | echo "Can't access SIFT database directory \"$db_dir\"" 1>&2 |
---|
| 31 | exit 1 |
---|
| 32 | fi |
---|
| 33 | |
---|
| 34 | ## |
---|
| 35 | ## create input file for SIFT_exome_nssnvs.pl |
---|
| 36 | ## |
---|
| 37 | if [ ! -r "$input_file" ]; then |
---|
| 38 | echo "Can't read input file \"$input_file\"" 1>&2 |
---|
| 39 | exit 1 |
---|
| 40 | fi |
---|
| 41 | |
---|
| 42 | if [ $base -eq 0 ]; then |
---|
| 43 | beg_col="$pos_col" |
---|
| 44 | end_col="$pos_col + 1" |
---|
| 45 | pos_adj='$2 = $2 - 1' |
---|
| 46 | else |
---|
| 47 | beg_col="$pos_col - 1" |
---|
| 48 | end_col="$pos_col" |
---|
| 49 | pos_adj='' |
---|
| 50 | fi |
---|
| 51 | |
---|
| 52 | strand_cvt='' |
---|
| 53 | if [ \( "$strand_col" = "+" \) ]; then |
---|
| 54 | strand='"1"' |
---|
| 55 | elif [ \( "$strand_col" = "-" \) ]; then |
---|
| 56 | strand='"-1"' |
---|
| 57 | else |
---|
| 58 | strand="\$$strand_col" |
---|
| 59 | strand_cvt='if ( '"${strand}"' == "+") { '"${strand}"' = "1" } else if ( '"${strand}"' == "-") { '"${strand}"' = "-1"}' |
---|
| 60 | fi |
---|
| 61 | |
---|
| 62 | awk ' |
---|
| 63 | BEGIN {FS="\t";OFS=","} |
---|
| 64 | { |
---|
| 65 | $'"${chrom_col}"' = tolower($'"${chrom_col}"') |
---|
| 66 | sub(/^chr/, "", $'"${chrom_col}"') |
---|
| 67 | '"${strand_cvt}"' |
---|
| 68 | print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"' |
---|
| 69 | } |
---|
| 70 | ' "$input_file" > "$sift_input" |
---|
| 71 | |
---|
| 72 | ## |
---|
| 73 | ## run SIFT variants command line program |
---|
| 74 | ## |
---|
| 75 | if [ "$output_opts" = "None" ]; then |
---|
| 76 | output_opts="" |
---|
| 77 | else |
---|
| 78 | output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' ) |
---|
| 79 | output_opts="-$output_opts 1" |
---|
| 80 | fi |
---|
| 81 | |
---|
| 82 | SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output" |
---|
| 83 | if [ $? -ne 0 ]; then |
---|
| 84 | echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts" |
---|
| 85 | exit 1 |
---|
| 86 | fi |
---|
| 87 | |
---|
| 88 | ## |
---|
| 89 | ## locate the output file |
---|
| 90 | ## |
---|
| 91 | sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" ) |
---|
| 92 | |
---|
| 93 | if [ -z "$sift_pid" ]; then |
---|
| 94 | echo "Can't find SIFT pid in \"$sift_output\"" 1>&2 |
---|
| 95 | exit 1 |
---|
| 96 | fi |
---|
| 97 | |
---|
| 98 | sift_outdir="$working_dir/$sift_pid" |
---|
| 99 | if [ ! -d "$sift_outdir" ]; then |
---|
| 100 | echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2 |
---|
| 101 | exit 1 |
---|
| 102 | fi |
---|
| 103 | |
---|
| 104 | sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv" |
---|
| 105 | if [ ! -r "$sift_outfile" ]; then |
---|
| 106 | echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2 |
---|
| 107 | exit 1 |
---|
| 108 | fi |
---|
| 109 | |
---|
| 110 | ## |
---|
| 111 | ## create output file |
---|
| 112 | ## |
---|
| 113 | awk ' |
---|
| 114 | BEGIN {FS="\t";OFS="\t"} |
---|
| 115 | NR == 1 { |
---|
| 116 | $12 = "Num seqs at position" |
---|
| 117 | $1 = "Chrom\tPosition\tStrand\tAllele" |
---|
| 118 | print |
---|
| 119 | } |
---|
| 120 | NR != 1 { |
---|
| 121 | $1 = "chr" $1 |
---|
| 122 | gsub(/,/, "\t", $1) |
---|
| 123 | print |
---|
| 124 | } |
---|
| 125 | ' "$sift_outfile" | awk ' |
---|
| 126 | BEGIN {FS="\t";OFS="\t"} |
---|
| 127 | NR == 1 { |
---|
| 128 | print "#" $0 |
---|
| 129 | } |
---|
| 130 | NR != 1 { |
---|
| 131 | if ($3 == "1") { $3 = "+" } else if ($3 == "-1") { $3 = "-" } |
---|
| 132 | '"${pos_adj}"' |
---|
| 133 | print |
---|
| 134 | } |
---|
| 135 | ' > "$output_file" |
---|
| 136 | |
---|
| 137 | ## |
---|
| 138 | ## cleanup |
---|
| 139 | ## |
---|
| 140 | rm -rf "$sift_outdir" "$sift_input" "$sift_output" |
---|
| 141 | |
---|