root/galaxy-central/tools/sr_mapping/bwa_wrapper.xml

リビジョン 2, 17.4 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="bwa_wrapper" name="Map with BWA" version="1.0.3">
2  <description></description>
3  <command interpreter="python">bwa_wrapper.py
4--threads="4"
5#if $genomeSource.refGenomeSource == "history":
6--ref=$genomeSource.ownFile
7#else:
8--ref=$genomeSource.indices
9#end if
10--fastq=$paired.input1
11#if $paired.sPaired == "paired":
12--rfastq=$paired.input2
13#else:
14--rfastq="None"
15#end if
16--output=$output --genAlignType=$paired.sPaired --params=$params.source_select --fileSource=$genomeSource.refGenomeSource
17#if $params.source_select == "pre_set":
18--maxEditDist="None" --fracMissingAligns="None" --maxGapOpens="None" --maxGapExtens="None" --disallowLongDel="None" --disallowIndel="None" --seed="None" --maxEditDistSeed="None" --mismatchPenalty="None" --gapOpenPenalty="None" --gapExtensPenalty="None" --suboptAlign="None" --noIterSearch="None" --outputTopN="None" --maxInsertSize="None" --maxOccurPairing="None"
19#else:
20--maxEditDist=$params.maxEditDist --fracMissingAligns=$params.fracMissingAligns --maxGapOpens=$params.maxGapOpens --maxGapExtens=$params.maxGapExtens --disallowLongDel=$params.disallowLongDel --disallowIndel=$params.disallowIndel --seed=$params.seed --maxEditDistSeed=$params.maxEditDistSeed --mismatchPenalty=$params.mismatchPenalty --gapOpenPenalty=$params.gapOpenPenalty --gapExtensPenalty=$params.gapExtensPenalty --suboptAlign=$params.suboptAlign --noIterSearch=$params.noIterSearch --outputTopN=$params.outputTopN --maxInsertSize=$params.maxInsertSize --maxOccurPairing=$params.maxOccurPairing
21#end if
22#if $genomeSource.refGenomeSource == "history":
23--dbkey=$dbkey
24#else:
25--dbkey="None"
26#end if
27--suppressHeader=$suppressHeader
28  </command>
29  <requirements>
30    <requirement type='package'>bwa</requirement>
31  </requirements>
32  <inputs>
33    <conditional name="genomeSource">
34      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
35        <option value="indexed">Use a built-in index</option>
36        <option value="history">Use one from the history</option>
37      </param>
38      <when value="indexed">
39        <param name="indices" type="select" label="Select a reference genome">
40          <options from_data_table="bwa_indexes"/>
41          <!--
42          <options from_file="bwa_index.loc">
43            <column name="value" index="1" />
44            <column name="name" index="0" />
45          </options>
46          -->
47        </param>
48      </when>
49      <when value="history">
50        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
51      </when>
52    </conditional>
53    <conditional name="paired">
54      <param name="sPaired" type="select" label="Is this library mate-paired?">
55        <option value="single">Single-end</option>
56        <option value="paired">Paired-end</option>
57      </param>
58      <when value="single">
59        <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
60      </when>
61      <when value="paired">
62        <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
63        <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
64      </when>
65    </conditional>     
66    <conditional name="params">
67      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
68        <option value="pre_set">Commonly Used</option>
69        <option value="full">Full Parameter List</option>
70      </param>
71      <when value="pre_set" />
72      <when value="full">
73        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (-n)" help="Enter this value OR a fraction of missing alignments, not both" />
74        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (-n)" help="Enter this value OR maximum edit distance, not both" />
75        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (-o)" />
76        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (-e)" help="-1 for k-difference mode (disallowing long gaps)" />
77        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (-d)" />
78        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (-i)" />
79        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (-l)" help="Enter -1 for infinity" />
80        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (-k)" />
81        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (-M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
82        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (-O)" />
83        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (-E)" />
84        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat" help="By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp) (-R)" />
85        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default (-N)" />
86        <param name="outputTopN" type="integer" value="-1" label="Output top [value] hits" help="For single-end reads only. Enter -1 to disable outputting multiple hits. NOTE: If you put in a positive value here, your output will NOT be in SAM format (-n)" />
87        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes (-a)" />
88        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing (-o)" />
89      </when>
90    </conditional>
91    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
92  </inputs>
93  <outputs>
94    <data format="sam" name="output">
95      <actions>
96        <conditional name="genomeSource.refGenomeSource">
97          <when value="indexed">
98            <action type="metadata" name="dbkey">
99              <option type="from_data_table" name="bwa_indexes" column="0">
100                <filter type="param_value" ref="genomeSource.indices" column="1"/>
101              </option>
102            </action>
103          </when>
104        </conditional>
105      </actions>
106    </data>
107  </outputs>
108  <tests>
109    <test>
110      <!--
111      BWA commands:
112      bwa aln -t 4 phiX test-data/bwa_wrapper_in1.fastq > bwa_wrapper_out1.sai
113      bwa samse phiX bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastq >> bwa_wrapper_out1.sam
114      phiX.fa is the prefix for the reference files (phiX.fa.amb, phiX.fa.ann, phiX.fa.bwt, ...)
115      remove the comment lines (beginning with '@') from the resulting sam file
116      -->
117      <param name="refGenomeSource" value="indexed" />
118      <param name="indices" value="phiX" />
119      <param name="sPaired" value="single" />
120      <param name="input1" value="bwa_wrapper_in1.fastq" ftype="fastqsanger" />
121      <param name="source_select" value="pre_set" />
122      <param name="suppressHeader" value="true" />
123      <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
124    </test>
125    <test>
126      <!--
127      BWA commands:
128      cp test-data/phiX.fasta phiX.fasta
129      bwa index -a is phiX.fasta
130      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastq > bwa_wrapper_out2.sai
131      bwa samse phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastq > bwa_wrapper_out2.sam
132      phiX.fa is the prefix for the reference files (phiX.fa.amb, phiX.fa.ann, phiX.fa.bwt, ...)
133      remove the comment lines (beginning with '@') from the resulting sam file
134      -->
135      <param name="refGenomeSource" value="history" />
136      <param name="ownFile" value="phiX.fasta" />
137      <param name="sPaired" value="single" />
138      <param name="input1" value="bwa_wrapper_in1.fastq" ftype="fastqsanger" />
139      <param name="source_select" value="full" />
140      <param name="maxEditDist" value="0" /> 
141      <param name="fracMissingAligns" value="0.04" />
142      <param name="maxGapOpens" value="1" />
143      <param name="maxGapExtens" value="-1" />
144      <param name="disallowLongDel" value="16" />
145      <param name="disallowIndel" value="5" />
146      <param name="seed" value="-1" />
147      <param name="maxEditDistSeed" value="2" />
148      <param name="mismatchPenalty" value="3" />
149      <param name="gapOpenPenalty" value="11" />
150      <param name="gapExtensPenalty" value="4" />
151      <param name="suboptAlign" value="true" />
152      <param name="noIterSearch" value="true" />
153      <param name="outputTopN" value="-1" />
154      <param name="maxInsertSize" value="500" />
155      <param name="maxOccurPairing" value="100000" />
156      <param name="suppressHeader" value="true" />
157      <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
158    </test>
159    <test>
160      <!--
161      BWA commands:
162      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fa test-data/bwa_wrapper_in2.fastq > bwa_wrapper_out3a.sai
163      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fa test-data/bwa_wrapper_in3.fastq > bwa_wrapper_out3b.sai
164      bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastq test-data/bwa_wrapper_in3.fastq > bwa_wrapper_out3.sam
165      phiX.fa is the prefix for the reference
166      remove the comment lines (beginning with '@') from the resulting sam file
167      -->
168      <param name="refGenomeSource" value="indexed" />
169      <param name="indices" value="phiX" />
170      <param name="sPaired" value="paired" />
171      <param name="input1" value="bwa_wrapper_in2.fastq" ftype="fastqsanger" />
172      <param name="input2" value="bwa_wrapper_in3.fastq" ftype="fastqsanger" />
173      <param name="source_select" value="full" />
174      <param name="maxEditDist" value="0" />
175      <param name="fracMissingAligns" value="0.04" />
176      <param name="maxGapOpens" value="1" />
177      <param name="maxGapExtens" value="-1" />
178      <param name="disallowLongDel" value="16" />
179      <param name="disallowIndel" value="5" />
180      <param name="seed" value="-1" />
181      <param name="maxEditDistSeed" value="2" />
182      <param name="mismatchPenalty" value="3" />
183      <param name="gapOpenPenalty" value="11" />
184      <param name="gapExtensPenalty" value="4" />
185      <param name="suboptAlign" value="true" />
186      <param name="noIterSearch" value="true" />
187      <param name="outputTopN" value="-1" />
188      <param name="maxInsertSize" value="500" />
189      <param name="maxOccurPairing" value="100000" />
190      <param name="suppressHeader" value="true" />
191      <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" />
192    </test>
193  </tests>
194  <help>
195
196**What it does**
197
198BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
199
200------
201
202**Know what you are doing**
203
204.. class:: warningmark
205
206There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
207
208 .. __: http://bio-bwa.sourceforge.net/
209
210------
211
212**Input formats**
213
214BWA accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
215
216------
217
218**Outputs**
219
220The output is in SAM format, and has the following columns::
221
222    Column  Description
223  --------  --------------------------------------------------------
224  1  QNAME  Query (pair) NAME
225  2  FLAG   bitwise FLAG
226  3  RNAME  Reference sequence NAME
227  4  POS    1-based leftmost POSition/coordinate of clipped sequence
228  5  MAPQ   MAPping Quality (Phred-scaled)
229  6  CIGAR  extended CIGAR string
230  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
231  8  MPOS   1-based Mate POSition
232  9  ISIZE  Inferred insert SIZE
233  10 SEQ    query SEQuence on the same strand as the reference
234  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
235  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
236 
237The flags are as follows::
238
239    Flag  Description
240  ------  -------------------------------------
241  0x0001  the read is paired in sequencing
242  0x0002  the read is mapped in a proper pair
243  0x0004  the query sequence itself is unmapped
244  0x0008  the mate is unmapped
245  0x0010  strand of the query (1 for reverse)
246  0x0020  strand of the mate
247  0x0040  the read is the first read in a pair
248  0x0080  the read is the second read in a pair
249  0x0100  the alignment is not primary
250
251It looks like this (scroll sideways to see the entire example)::
252
253  QNAME FLAG    RNAME   POS     MAPQ    CIAGR   MRNM    MPOS    ISIZE   SEQ     QUAL    OPT
254  HWI-EAS91_1_30788AAXX:1:1:1761:343    4       *       0       0       *       *       0       0       AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG        hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
255  HWI-EAS91_1_30788AAXX:1:1:1578:331    4       *       0       0       *       *       0       0       GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG        hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
256
257-------
258
259**BWA settings**
260
261All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
262
263------
264
265**BWA parameter list**
266
267This is an exhaustive list of BWA options:
268
269For **aln**::
270
271  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
272          alignments given 2% uniform base error rate if FLOAT. In the latter
273          case, the maximum edit distance is automatically chosen for different
274          read lengths. [0.04]
275  -o INT  Maximum number of gap opens [1]
276  -e INT  Maximum number of gap extensions, -1 for k-difference mode
277          (disallowing long gaps) [-1]
278  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
279  -i INT  Disallow an indel within INT bp towards the ends [5]
280  -l INT  Take the first INT subsequence as seed. If INT is larger than the
281          query sequence, seeding will be disabled. For long reads, this option
282          is typically ranged from 25 to 35 for '-k 2'. [inf]
283  -k INT  Maximum edit distance in the seed [2]
284  -t INT  Number of threads (multi-threading mode) [1]
285  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
286          lower than (bestScore-misMsc). [3]
287  -O INT  Gap open penalty [11]
288  -E INT  Gap extension penalty [4]
289  -c      Reverse query but not complement it, which is required for alignment
290          in the color space.
291  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
292          default, BWA only searches for suboptimal alignments if the top hit is
293          unique. Using this option has no effect on accuracy for single-end
294          reads. It is mainly designed for improving the alignment accuracy of
295          paired-end reads. However, the pairing procedure will be slowed down,
296          especially for very short reads (~32bp).
297  -N      Disable iterative search. All hits with no more than maxDiff
298          differences will be found. This mode is much slower than the default.
299
300For **samse**::
301
302  -n INT  Output up to INT top hits. Value -1 to disable outputting multiple
303          hits. NOTE: Entering a value other than -1 will result in output that
304          is not in SAM format, and therefore not usable further down the
305          pipeline. Check the BWA documentation for details on the format of
306          the output. [-1]
307
308For **sampe**::
309
310  -a INT  Maximum insert size for a read pair to be considered as being mapped
311          properly. Since version 0.4.5, this option is only used when there
312          are not enough good alignment to infer the distribution of insert
313          sizes. [500]
314  -o INT  Maximum occurrences of a read for pairing. A read with more
315          occurrences will be treated as a single-end read. Reducing this
316          parameter helps faster pairing. [100000]
317
318  </help>
319</tool>
320
321
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。