root/galaxy-central/tools/sr_mapping/bfast_wrapper.xml

リビジョン 2, 20.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="bfast_wrapper" name="Map with BFAST" version="0.1.2">
2  <description></description>
3  <command interpreter="python">bfast_wrapper.py
4    --numThreads="4" ##HACK: hardcode numThreads for now, should come from a location file
5    --fastq="$input1"
6    #if $input1.extension.startswith( "fastqcs" ):
7        ##if extention starts with fastqcs, then we have a color space file
8        --space="1" ##color space
9    #else
10        --space="0"
11    #end if
12    --output="$output"
13    $suppressHeader
14   
15    #if $refGenomeSource.refGenomeSource_type == "history":
16      ##build indexes on the fly
17      --buildIndex
18      --ref="${refGenomeSource.ownFile}"
19      --indexMask="${",".join( [ "%s:%s" % ( str( custom_index.get( 'mask' ) ).strip(), str( custom_index.get( 'hash_width' ) ).strip() ) for custom_index in $refGenomeSource.custom_index ] )}"
20      ${refGenomeSource.indexing_repeatmasker}
21      #if $refGenomeSource.indexing_option.indexing_option_selector == "contig_offset":
22        --indexContigOptions="${refGenomeSource.indexing_option.start_contig},${refGenomeSource.indexing_option.start_pos},${refGenomeSource.indexing_option.end_contig},${refGenomeSource.indexing_option.end_pos}"
23      #elif $refGenomeSource.indexing_option.indexing_option_selector == "exons_file":
24        --indexExonsFileName="${refGenomeSource.indexing_option.exons_file}"
25      #end if
26    #else:
27      ##use precomputed indexes
28      --ref="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.indices ), $__app__.tool_data_tables[ 'bfast_indexes' ].get_fields() )[0][-1] }"
29    #end if
30   
31    #if $params.source_select == "full":
32      --offsets="$params.offsets"
33      --keySize="$params.keySize"
34      --maxKeyMatches="$params.maxKeyMatches"
35      --maxNumMatches="$params.maxNumMatches"
36      --whichStrand="$params.whichStrand"
37     
38      #if str( $params.scoringMatrixFileName ) != 'None':
39        --scoringMatrixFileName="$params.scoringMatrixFileName"
40      #end if
41      ${params.ungapped}
42      ${params.unconstrained}
43      --offset="${params.offset}"
44      --avgMismatchQuality="${params.avgMismatchQuality}"
45     
46      --algorithm="${params.localalign_params.algorithm}"
47      ${params.unpaired}
48      ${params.reverseStrand}
49      #if $params.localalign_params.algorithm == "3":
50        ${params.localalign_params.pairedEndInfer}
51        ${params.localalign_params.randomBest}
52      #end if
53    #end if
54  </command>
55  <inputs>
56    <param name="input1" type="data" format="fastqsanger,fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
57    <conditional name="refGenomeSource">
58      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in index?">
59        <option value="indexed">Use a built-in index</option>
60        <option value="history">Use one from the history</option>
61      </param>
62      <when value="indexed">
63        <param name="indices" type="select" label="Select a reference genome index set">
64          <options from_data_table="bfast_indexes">
65            <filter type="multiple_splitter" column="2" separator=","/>
66            <filter type="param_value" column="2" ref="input1" ref_attribute="extension"/>
67            <filter type="sort_by" column="1"/>
68            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
69          </options>
70        </param>
71      </when>
72      <when value="history">
73        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
74        <repeat name="custom_index" title="Custom indice" min="1" >
75            <param name="mask" type="text" value="" label="Specify the mask" size="20">
76              <!-- <validator type="no_options" message="No indexes are available for the selected input dataset"/> need is int validator here or regex all 01s-->
77            </param>
78            <param name="hash_width" type="integer" value="" label="Hash Width" />
79        </repeat>
80        <param name="indexing_repeatmasker" type="boolean" truevalue="--indexRepeatMasker" falsevalue="" checked="False" label="Do not index lower case sequences" help="Such as those created by RepeatMasker"/>
81        <conditional name="indexing_option">
82          <param name="indexing_option_selector" type="select" label="BFAST indexing settings to use" help="For most indexing needs use default settings. If you want full control use the other options.">
83            <option value="default">Default</option>
84            <option value="contig_offset">Contig Offset</option>
85            <option value="exons_file">Exons file</option>
86          </param>
87          <when value="default">
88            <!-- nothing here -->
89          </when>
90          <when value="contig_offset">
91            <param name="start_contig" type="integer" value="-1" label="Start Contig" help="Specifies the first contig to include when building indexes. (advanced users only)" />
92            <param name="start_pos" type="integer" value="-1" label="Start Position" help="Specifies the first position in the first contig to include when building indexes. (advanced users only)" />
93            <param name="end_contig" type="integer" value="-1" label="End Contig" help="Specifies the last contig to include when building indexes. (advanced users only)" />
94            <param name="end_pos" type="integer" value="-1" label="End Position" help="Specifies the last position in the last contig to include when building indexes. (advanced users only)" />
95          </when>
96          <when value="exons_file">
97            <param name="exons_file" type="data" format="tabular" label="Select an exons file from history" help="See BFAST manual for file format requirements. (advanced users only)"/>
98          </when>
99        </conditional>
100      </when>
101    </conditional>
102    <conditional name="params">
103      <param name="source_select" type="select" label="BFAST matching settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
104        <option value="pre_set">Commonly Used</option>
105        <option value="full">Full Parameter List</option>
106      </param>
107      <when value="pre_set">
108        <!-- nothing here -->
109      </when>
110      <when value="full">
111        <param name="offsets" type="text" value="" label="The offsets for 'bfast match'" help="Set if not all offsets from the 5' end of the read are to be examined (advanced users only)" />
112        <param name="keySize" type="integer" value="-1" label="Truncate key size in 'match'" help="Set this to reduce the effective key size of all indexes in 'bfast match' (advanced users only)" />
113        <param name="maxKeyMatches" type="integer" value="8" label="The maximum number of matches to allow before a key is ignored" help="Lower values will result in more unique regions being examined, while larger values will allow include repetitive regions" />
114        <param name="maxNumMatches" type="integer" value="384" label="The maximum number of matches to allow before a read is discarded" help="Larger values will allow more hits to be examined" />
115                <param name="whichStrand" type="select" label="The strands to consider" help="Both strands, forward strand only, or reverse strand only">
116                        <option value="0">Both strands</option>
117                        <option value="1">Forward strand only</option>
118                        <option value="2">Reverse strand only</option>
119                </param>
120       
121        <param name="scoringMatrixFileName" type="data" format="text" optional="True" label="Scoring Matrix file used to score the alignments" help="See BFAST manual for file format requirements. (advanced users only)"/>
122        <param name="ungapped" type="boolean" truevalue="--ungapped" falsevalue="" checked="no" label="Perform ungapped local alignment" help="Performing ungapped local alignment will not consider indels while providing a significant speed increase" />
123        <param name="unconstrained" type="boolean" truevalue="--unconstrained" falsevalue="" checked="no" label="Perform unconstrained local alignment" help="Performing unconstrained local alignment will not use mask constraints at the cost of speed" />
124        <param name="offset" type="integer" value="20" label="The number of bases before and after each hit to consider in local alignment" help="Larger values will allow for larger insertions and deletions to be detected at the cost of speed" />
125        <param name="avgMismatchQuality" type="integer" value="10" label="The average mismatch quality" help="This can be used as a scaling factor for mapping quality (advanced users only)" />
126       
127            <conditional name="localalign_params">
128              <param name="algorithm" type="select" label="The post processing algorithm" help="This determines how reads with multiple candidate alignments are returned.  Unique alignments will return an alignment if the read has only one candidate alignment.  Uniquely best scoring alignments will return one alignment for a read if that alignment has a better alignment score than the rest of the candidate alignments.  All best scoring alignments will return all alignments that have the best alignment score for a read.">
129              <option value="0" selected="True">No filtering</option>
130              <option value="1">All alignments that pass filtering</option>
131              <option value="2">Unique alignments</option>
132              <option value="3">Uniquely best scoring alignments</option>
133              <option value="4">All best scoring alignments</option>
134          </param>
135          <when value="0">
136            <!-- nothing here -->
137          </when>
138          <when value="1">
139            <!-- nothing here -->
140          </when>
141          <when value="2">
142            <!-- nothing here -->
143          </when>
144          <when value="4">
145            <!-- nothing here -->
146          </when>
147          <when value="3">
148            <param name="pairedEndInfer" type="boolean" truevalue="--pairedEndInfer" falsevalue="" checked="no" label="pairedEndInfer" help="break ties when one end of a paired end read by estimating the insert size distribution" />
149            <param name="randomBest" type="boolean" truevalue="--randomBest" falsevalue="" checked="no" label="Random alignments" help="output a random best scoring alignment (advanced users only)" />
150          </when>
151        </conditional>
152        <param name="unpaired" type="boolean" truevalue="--unpaired" falsevalue="" checked="no" label="Disallow pairing" help="do not choose alignments based on pairing" />
153        <param name="reverseStrand" type="boolean" truevalue="--reverseStrand" falsevalue="" checked="no" label="Reverse paired ends" help="paired end reads are given on reverse strands" />
154       
155      </when>
156    </conditional>
157    <param name="suppressHeader" type="boolean" truevalue="--suppressHeader" falsevalue="" checked="True" label="Suppress the header in the output SAM file" help="BFAST produces SAM with several lines of header information" />
158  </inputs>
159  <outputs>
160    <data format="sam" name="output" >
161      <actions>
162        <conditional name="refGenomeSource.refGenomeSource_type">
163          <when value="indexed">
164            <action type="metadata" name="dbkey">
165              <option type="from_data_table" column="1" name="bfast_indexes">
166                <filter type="param_value" ref="refGenomeSource.indices" column="0"/>
167              </option>
168            </action>
169          </when>
170        </conditional>
171      </actions>
172    </data>
173  </outputs>
174  <help>
175**What it does**
176
177BFAST facilitates the fast and accurate mapping of short reads to reference sequences. Some advantages of BFAST include:
178* Speed: enables billions of short reads to be mapped quickly.
179* Accuracy: A priori probabilities for mapping reads with defined set of variants
180* An easy way to measurably tune accuracy at the expense of speed.
181Specifically, BFAST was designed to facilitate whole-genome resequencing, where mapping billions of short reads with variants is of utmost importance.
182
183BFAST supports both Illumina and ABI SOLiD data, as well as any other Next-Generation Sequencing Technology (454, Helicos), with particular emphasis on sensitivity towards errors, SNPs and especially indels. Other algorithms take short-cuts by ignoring errors, certain types of variants (indels), and even require further alignment, all to be the "fastest" (but still not complete). BFAST is able to be tuned to find variants regardless of the error-rate, polymorphism rate, or other factors.
184
185------
186
187Please cite the website "http://bfast.sourceforge.net" as well as the accompanying
188papers:
189
190Homer N, Merriman B, Nelson SF.
191BFAST: An alignment tool for large scale genome resequencing.
192PMID: 19907642
193PLoS ONE. 2009 4(11): e7767. 
194http://dx.doi.org/10.1371/journal.pone.0007767 
195
196Homer N, Merriman B, Nelson SF.
197Local alignment of two-base encoded DNA sequence.
198BMC Bioinformatics. 2009 Jun 9;10(1):175.
199PMID: 19508732
200http://dx.doi.org/10.1186/1471-2105-10-175
201
202------
203
204**Know what you are doing**
205
206.. class:: warningmark
207
208There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
209
210.. __: http://bfast.sourceforge.net/
211
212------
213
214**Input formats**
215
216BFAST accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
217
218------
219
220**Outputs**
221
222The output is in SAM format, and has the following columns::
223
224    Column  Description
225  --------  --------------------------------------------------------
226  1  QNAME  Query (pair) NAME
227  2  FLAG   bitwise FLAG
228  3  RNAME  Reference sequence NAME
229  4  POS    1-based leftmost POSition/coordinate of clipped sequence
230  5  MAPQ   MAPping Quality (Phred-scaled)
231  6  CIGAR  extended CIGAR string
232  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
233  8  MPOS   1-based Mate POSition
234  9  ISIZE  Inferred insert SIZE
235  10 SEQ    query SEQuence on the same strand as the reference
236  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
237  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
238
239The flags are as follows::
240
241  Flag  Description
242  ------  -------------------------------------
243  0x0001  the read is paired in sequencing
244  0x0002  the read is mapped in a proper pair
245  0x0004  the query sequence itself is unmapped
246  0x0008  the mate is unmapped
247  0x0010  strand of the query (1 for reverse)
248  0x0020  strand of the mate
249  0x0040  the read is the first read in a pair
250  0x0080  the read is the second read in a pair
251  0x0100  the alignment is not primary
252
253It looks like this (scroll sideways to see the entire example)::
254
255  QNAME  FLAG  RNAME  POS  MAPQ  CIAGR  MRNM  MPOS  ISIZE  SEQ  QUAL  OPT
256  HWI-EAS91_1_30788AAXX:1:1:1761:343  4  *  0  0  *  *  0  0  AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG  hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
257  HWI-EAS91_1_30788AAXX:1:1:1578:331  4  *  0  0  *  *  0  0  GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG  hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
258
259-------
260
261**BFAST settings**
262
263All of the options have a default value. You can change any of them. Most of the options in BFAST have been implemented here.
264
265------
266
267**BFAST parameter list**
268
269This is an exhaustive list of BFAST options:
270
271For **match**::
272
273  -o  STRING   Specifies the offset [Use all]
274  -l      Specifies to load all main or secondary indexes into memory
275  -A  INT    0: NT space 1: Color space [0]
276  -k  INT    Specifies to truncate all indexes to have the given key size
277  (must be greater than the hash width) [Not Using]
278  -K  INT    Specifies the maximum number of matches to allow before a key
279  is ignored [8]
280  -M  INT    Specifies the maximum total number of matches to consider
281  before the read is discarded [384]
282  -w  INT    0: consider both strands 1: forward strand only 2: reverse
283  strand only [0]
284  -n  INT   Specifies the number of threads to use [1]
285  -t         Specifies to output timing information
286
287For **localalign**::
288
289  -x  FILE  Specifies the file name storing the scoring matrix
290  -u        Do ungapped local alignment (the default is gapped).
291  -U         Do not use mask constraints from the match step
292  -A  INT    0: NT space 1: Color space [0]
293  -o  INT    Specifies the number of bases before and after the match to
294  include in the reference genome
295  -M  INT    Specifies the maximum total number of matches to consider
296  before the read is discarded [384]
297  -q  INT    Specifies the average mismatch quality
298  -n  INT   Specifies the number of threads to use [1]
299  -t         Specifies to output timing information
300
301For **postprocess**::
302
303  -a  INT    Specifies the algorithm to choose the alignment for each end of the read:
304
305    0: No filtering will occur.
306    1: All alignments that pass the filters will be output
307    2: Only consider reads that have been aligned uniquely
308    3: Choose uniquely the alignment with the best score
309    4: Choose all alignments with the best score
310 
311  -A  INT    0: NT space 1: Color space [0]
312  -U      Specifies that pairing should not be performed
313  -R          Specifies that paired reads are on opposite strands
314  -q   INT    Specifies the average mismatch quality
315  -x  FILE  Specifies the file name storing the scoring matrix
316  -z          Specifies to output a random best scoring alignment (with -a 3)
317  -r   FILE  Specifies to add the RG in the specified file to the SAM
318  header and updates the RG tag (and LB/PU tags if present) in
319  the reads (SAM only)
320  -n  INT   Specifies the number of threads to use [1]
321  -t         Specifies to output timing information
322
323  </help>
324  <requirements>
325    <requirement type="package">bfast</requirement>
326  </requirements>
327  <tests>
328    <test>
329      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
330      <param name="refGenomeSource_type" value="history" />
331      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
332      <param name="mask" value="111111111111111111" />
333      <param name="hash_width" value="14" />
334      <param name="source_select" value="pre_set" />
335      <param name="suppressHeader" value="False" />
336      <param name="indexing_repeatmasker" value="False" />
337      <param name="indexing_option_selector" value="default" />
338      <output name="output" ftype="sam" file="bfast_out1.sam" />
339    </test>
340    <test>
341      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger"/>
342      <param name="refGenomeSource_type" value="history" />
343      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
344      <param name="mask" value="111111111111111111" />
345      <param name="hash_width" value="14" />
346      <param name="source_select" value="pre_set" />
347      <param name="suppressHeader" value="True" />
348      <param name="indexing_repeatmasker" value="False" />
349      <param name="indexing_option_selector" value="default" />
350      <output name="output" ftype="sam" file="bfast_out1.sam" lines_diff="3" /><!-- 3 headers exist in compare file, but headers are suppressed -->
351    </test>
352    <test>
353      <param name="input1" ftype="fastqcssanger" value="random_phiX_1.fastqcssanger" />
354      <param name="refGenomeSource_type" value="history" />
355      <param name="ownFile" ftype="fasta" value="phiX.fasta" />
356      <param name="mask" value="111111111111111111" />
357      <param name="hash_width" value="14" />
358      <param name="source_select" value="pre_set" />
359      <param name="suppressHeader" value="False" />
360      <param name="indexing_repeatmasker" value="False" />
361      <param name="indexing_option_selector" value="default" />
362      <output name="output" ftype="sam" file="bfast_out2.sam" />
363    </test>
364    <!-- test of pre-indexed data now -->
365    <test>
366      <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
367      <param name="refGenomeSource_type" value="indexed" />
368      <param name="indices" value="phiX_nt_50" />
369      <param name="source_select" value="pre_set" />
370      <param name="suppressHeader" value="False" />
371      <param name="indexing_repeatmasker" value="" />
372      <param name="indexing_option_selector" value="default" />
373      <output name="output" ftype="sam" file="bfast_out1.sam" lines_diff="2" /><!-- MD:Z:11T38 instead of MD:Z:50 on one line-->
374    </test>
375  </tests>
376</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。