root/galaxy-central/tools/human_genome_variation/hilbertvis.sh @ 3

リビジョン 2, 2.7 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/usr/bin/env bash
2
3input_file="$1"
4output_file="$2"
5chromInfo_file="$3"
6chrom="$4"
7score_col="$5"
8hilbert_curve_level="$6"
9summarization_mode="$7"
10chrom_col="$8"
11start_col="$9"
12end_col="${10}"
13strand_col="${11}"
14
15## use first sequence if chrom filed is empty
16if [ -z "$chrom" ]; then
17    chrom=$( head -n 1 "$input_file" | cut -f$chrom_col )
18fi
19
20## get sequence length
21if [ ! -r "$chromInfo_file" ]; then
22    echo "Unable to read chromInfo_file $chromInfo_file" 1>&2
23    exit 1
24fi
25
26chrom_len=$( awk '$1 == chrom {print $2}' chrom=$chrom $chromInfo_file )
27
28## error if we can't find the chrom_len
29if [ -z "$chrom_len" ]; then
30    echo "Can't find length for sequence \"$chrom\" in chromInfo_file $chromInfo_file" 1>&2
31    exit 1
32fi
33
34## make sure chrom_len is positive
35if [ $chrom_len -le 0 ]; then
36    echo "sequence \"$chrom\" length $chrom_len <= 0" 1>&2
37    exit 1
38fi
39
40## modify R script depending on the inclusion of a score column, strand information
41input_cols="\$${start_col}, \$${end_col}"
42col_types='beg=0, end=0, strand=""'
43
44# if strand_col == 0 (strandCol metadata is not set), assume everything's on the plus strand
45if [ $strand_col -ne 0 ]; then
46    input_cols="${input_cols}, \$${strand_col}"
47else
48    input_cols="${input_cols}, \\\"+\\\""
49fi
50
51# set plot value (either from data or use a constant value)
52if [ $score_col -eq -1 ]; then
53    value=1
54else
55    input_cols="${input_cols}, \$${score_col}"
56    col_types="${col_types}, score=0"
57    value='chunk$score[i]'
58fi
59
60R --vanilla &> /dev/null <<endOfCode
61library(HilbertVis);
62
63chrom_len <- ${chrom_len};
64chunk_size <- 1000;
65interval_count <- 0;
66invalid_strand <- 0;
67
68awk_cmd <- paste(
69  "awk 'BEGIN{FS=\"\t\";OFS=\"\t\"}",
70    "\$${chrom_col} == \"${chrom}\"",
71      "{print ${input_cols}}' ${input_file}"
72);
73
74col_types <- list(${col_types});
75vec <- vector(mode="numeric", length=chrom_len);
76conn <- pipe(description=awk_cmd, open="r");
77
78repeat {
79  chunk <- scan(file=conn, what=col_types, sep="\t", nlines=chunk_size, quiet=TRUE);
80
81  if ((rows <- length(chunk\$beg)) == 0)
82        break;
83
84  interval_count <- interval_count + rows;
85
86  for (i in 1:rows) {
87    if (chunk\$strand[i] == '+') {
88      start <- chunk\$beg[i] + 1;
89      stop <- chunk\$end[i];
90    } else if (chunk\$strand[i] == '-') {
91      start <- chrom_len - chunk\$end[i] - 1;
92      stop <- chrom_len - chunk\$beg[i];
93    } else {
94      invalid_strand <- invalid_strand + 1;
95      interval_count <- interval_count - 1;
96      next;
97    }
98    vec[start:stop] <- ${value};
99  }
100}
101
102close(conn);
103
104hMat <- hilbertImage(vec, level=$hilbert_curve_level, mode="$summarization_mode");
105pdf(file="$output_file", onefile=TRUE, width=8, height=10.5, paper="letter");
106showHilbertImage(hMat);
107dev.off();
108endOfCode
109
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。