root/galaxy-central/tools/human_genome_variation/sift_variants_wrapper.sh

リビジョン 2, 2.9 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/usr/bin/env bash
2
3input_file=$1
4output_file=$2
5org=$3
6db_loc=$4
7chrom_col=$5
8pos_col=$6
9base=$7
10allele_col=$8
11strand_col=$9
12output_opts=${10}
13
14working_dir=$PWD
15sift_input="$working_dir/sift_input.txt"
16sift_output="$working_dir/sift_output.txt"
17
18
19##
20## get/check the db directory from the argument org,db_loc
21##
22db_dir=$( awk '$1 == org { print $2 }' org=$org $db_loc )
23
24if [ -z "$db_dir" ]; then
25    echo "Can't find dbkey \"$org\" in loc file \"$db_loc\"" 1>&2
26    exit 1
27fi
28
29if [ ! -d "$db_dir" ]; then
30    echo "Can't access SIFT database directory \"$db_dir\"" 1>&2
31    exit 1
32fi
33
34##
35## create input file for SIFT_exome_nssnvs.pl
36##
37if [ ! -r "$input_file" ]; then
38    echo "Can't read input file \"$input_file\"" 1>&2
39    exit 1
40fi
41
42if [ $base -eq 0 ]; then
43    beg_col="$pos_col"
44    end_col="$pos_col + 1"
45    pos_adj='$2 = $2 - 1'
46else
47    beg_col="$pos_col - 1"
48    end_col="$pos_col"
49    pos_adj=''
50fi
51
52strand_cvt=''
53if [ \( "$strand_col" = "+" \) ]; then
54    strand='"1"'
55elif [ \( "$strand_col" = "-" \) ]; then
56    strand='"-1"'
57else
58    strand="\$$strand_col"
59    strand_cvt='if ( '"${strand}"' == "+") { '"${strand}"' = "1" } else if ( '"${strand}"' == "-") { '"${strand}"' = "-1"}'
60fi
61
62awk '
63BEGIN {FS="\t";OFS=","}
64{
65    $'"${chrom_col}"' = tolower($'"${chrom_col}"')
66    sub(/^chr/, "", $'"${chrom_col}"')
67    '"${strand_cvt}"'
68    print $'"${chrom_col}"', $'"${beg_col}"', $'"${end_col}"', '"${strand}"', $'"${allele_col}"'
69}
70' "$input_file" > "$sift_input"
71
72##
73## run SIFT variants command line program
74##
75if [ "$output_opts" = "None" ]; then
76    output_opts=""
77else
78    output_opts=$( echo "$output_opts" | sed -e 's/,/ 1 -/g' )
79    output_opts="-$output_opts 1"
80fi
81
82SIFT_exome_nssnvs.pl -i "$sift_input" -d "$db_dir" -o "$working_dir" $output_opts &> "$sift_output"
83if [ $? -ne 0 ]; then
84  echo "failed: SIFT_exome_nssnvs.pl -i \"$sift_input\" -d \"$db_dir\" -o \"$working_dir\" $output_opts"
85  exit 1
86fi
87
88##
89## locate the output file
90##
91sift_pid=$( sed -n -e 's/^.*Your job id is \([0-9][0-9]*\) and is currently running.*$/\1/p' "$sift_output" )
92
93if [ -z "$sift_pid" ]; then
94  echo "Can't find SIFT pid in \"$sift_output\"" 1>&2
95  exit 1
96fi
97
98sift_outdir="$working_dir/$sift_pid"
99if [ ! -d "$sift_outdir" ]; then
100    echo "Can't access SIFT output directory \"$sift_outdir\"" 1>&2
101    exit 1
102fi
103
104sift_outfile="$sift_outdir/${sift_pid}_predictions.tsv"
105if [ ! -r "$sift_outfile" ]; then
106    echo "Can't access SIFT output file \"$sift_outfile\"" 1>&2
107    exit 1
108fi
109
110##
111## create output file
112##
113awk '
114BEGIN {FS="\t";OFS="\t"}
115NR == 1 {
116    $12 = "Num seqs at position"
117    $1 = "Chrom\tPosition\tStrand\tAllele"
118    print
119}
120NR != 1 {
121    $1 = "chr" $1
122    gsub(/,/, "\t", $1)
123    print
124}
125' "$sift_outfile" | awk '
126BEGIN {FS="\t";OFS="\t"}
127NR == 1 {
128    print "#" $0
129}
130NR != 1 {
131    if ($3 == "1") { $3 = "+" } else if ($3 == "-1") { $3 = "-" }
132    '"${pos_adj}"'
133    print
134}
135' > "$output_file"
136
137##
138## cleanup
139##
140rm -rf "$sift_outdir" "$sift_input" "$sift_output"
141
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。