1 | #!/bin/sh |
---|
2 | |
---|
3 | # Modified fastq_quality_boxplot_graph.sh from FASTX-toolkit - FASTA/FASTQ preprocessing tools. |
---|
4 | # Copyright (C) 2009 A. Gordon (gordon@cshl.edu) |
---|
5 | # |
---|
6 | # This program is free software: you can redistribute it and/or modify |
---|
7 | # it under the terms of the GNU Affero General Public License as |
---|
8 | # published by the Free Software Foundation, either version 3 of the |
---|
9 | # License, or (at your option) any later version. |
---|
10 | # |
---|
11 | # This program is distributed in the hope that it will be useful, |
---|
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
14 | # GNU Affero General Public License for more details. |
---|
15 | # |
---|
16 | # You should have received a copy of the GNU Affero General Public License |
---|
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
18 | |
---|
19 | function usage() |
---|
20 | { |
---|
21 | echo "SOLiD-Quality BoxPlot plotter" |
---|
22 | echo "Generates a SOLiD quality score box-plot graph " |
---|
23 | echo |
---|
24 | echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]" |
---|
25 | echo |
---|
26 | echo " [-p] - Generate PostScript (.PS) file. Default is PNG image." |
---|
27 | echo " [-i INPUT.TXT] - Input file. Should be the output of \"solid_qual_stats\" program." |
---|
28 | echo " [-o OUTPUT] - Output file name. default is STDOUT." |
---|
29 | echo " [-t TITLE] - Title (usually the solid file name) - will be plotted on the graph." |
---|
30 | echo |
---|
31 | exit |
---|
32 | } |
---|
33 | |
---|
34 | # |
---|
35 | # Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW |
---|
36 | # As produced by "solid_qual_stats" program |
---|
37 | |
---|
38 | TITLE="" # default title is empty |
---|
39 | FILENAME="" |
---|
40 | OUTPUTTERM="set term png size 800,600" |
---|
41 | OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout" |
---|
42 | while getopts ":t:i:o:ph" Option |
---|
43 | do |
---|
44 | case $Option in |
---|
45 | # w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;; |
---|
46 | t ) TITLE="for $OPTARG" ;; |
---|
47 | i ) FILENAME=$OPTARG ;; |
---|
48 | o ) OUTPUTFILE="$OPTARG" ;; |
---|
49 | p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 4" ;; |
---|
50 | h ) usage ;; |
---|
51 | * ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;; |
---|
52 | esac |
---|
53 | done |
---|
54 | shift $(($OPTIND - 1)) |
---|
55 | |
---|
56 | |
---|
57 | if [ "$FILENAME" == "" ]; then |
---|
58 | usage |
---|
59 | fi |
---|
60 | |
---|
61 | if [ ! -r "$FILENAME" ]; then |
---|
62 | echo "Error: can't open input file ($1)." >&2 |
---|
63 | exit 1 |
---|
64 | fi |
---|
65 | |
---|
66 | #Read number of cycles from the stats file (each line is a cycle, minus the header line) |
---|
67 | #But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now. |
---|
68 | NUM_CYCLES=$(cat "$FILENAME" | wc -l) |
---|
69 | |
---|
70 | GNUPLOTCMD=" |
---|
71 | $OUTPUTTERM |
---|
72 | set boxwidth 0.8 |
---|
73 | set size 1,1 |
---|
74 | set key Left inside |
---|
75 | set xlabel \"read position\" |
---|
76 | set ylabel \"Quality Score \" |
---|
77 | set title \"Quality Scores $TITLE\" |
---|
78 | #set auto x |
---|
79 | set bars 4.0 |
---|
80 | set xrange [ 0: $NUM_CYCLES ] |
---|
81 | set yrange [-2:45] |
---|
82 | set y2range [-2:45] |
---|
83 | set xtics 1 |
---|
84 | set x2tics 1 |
---|
85 | set ytics 2 |
---|
86 | set y2tics 2 |
---|
87 | set tics out |
---|
88 | set grid ytics |
---|
89 | set style fill empty |
---|
90 | plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \ |
---|
91 | '' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians' |
---|
92 | " |
---|
93 | |
---|
94 | echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE" |
---|