root/galaxy-central/tools/sr_mapping/lastz_paired_reads_wrapper.xml

リビジョン 2, 15.9 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="lastz_paired_reads_wrapper" name="Lastz paired reads" version="1.0.0">
2    <description> map short paired reads against reference sequence</description>
3    <command interpreter="python">lastz_paired_reads_wrapper.py
4#if $seq_name.how_to_name=="yes":
5--ref_name=$seq_name.ref_name
6#else:
7--ref_name="None"
8#end if
9--ref_source=$source.ref_source --input2=$input2 --input3=$input3 --input4=$input4
10#if $source.ref_source=="history":
11--input1=$source.input1
12--ref_sequences=$input1.metadata.sequences
13#else:
14--input1=$source.input1_2bit
15--ref_sequences="None"
16#end if
17--output=$output1 --lastz_seqs_file_dir=${GALAXY_DATA_INDEX_DIR}
18    </command>
19    <inputs>
20        <param name="input2" format="fasta" type="data" label="Align sequencing reads in" />
21        <conditional name="source">
22            <param name="ref_source" type="select" label="Against reference sequences that are">
23                <option value="cached">locally cached</option>
24                <option value="history">in your history</option>
25            </param>
26            <when value="cached">
27                <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
28                    <options from_file="lastz_seqs.loc">
29                        <column name="value" index="1" />
30                        <column name="name" index="0" />
31                    </options>
32                </param>
33            </when>
34            <when value="history">
35                <param name="input1" type="data" format="fasta" label="Select a reference dataset" />
36            </when>
37        </conditional>
38        <param name="input3" format="fasta" type="data" label="Linker file" />
39        <param name="input4" format="qual454" type="data" label="Select a base quality score 454 dataset" />
40        <conditional name="seq_name">
41            <param name="how_to_name" type="select" label="Do you want to modify the reference name?">
42                <option value="no">No</option>
43                <option value="yes">Yes</option>
44            </param>
45            <when value="yes">
46                <param name="ref_name" type="text" size="25" value="Type sequence name here" label="Enter name for the Reference sequence"/>
47            </when>
48            <when value="no" />
49        </conditional>
50    </inputs>
51    <outputs>
52        <data format="sam" name="output1" />
53    </outputs>
54    <requirements>
55        <requirement type="package">lastz</requirement>
56    </requirements>
57    <tests>
58        <test>
59            <!--
60                input1: a reference genome ( 2bit or fasta )
61                input2: a collection of 454 paired end reads ( a fasta file )
62                input3: a linker sequence ( a very small fasta file )
63                input4: a base quality score 454 file ( qual454 )
64            -->
65            <param name="input2" value="lastz_paired_input2.fasta" ftype="fasta" />
66            <param name="ref_source" value="cached" />
67            <param name="input1_2bit" value="hg18Chr21" />
68            <param name="input3" value="lastz_paired_input3.fasta" ftype="fasta" />
69            <param name="input4" value="lastz_paired_input4.qual454" ftype="qual454" />
70            <param name="how_to_name" value="no" />
71            <output name="output1" file="lastz_paired_out1.sam" />
72        </test>
73    </tests>
74    <help>
75       
76**What it does**   
77       
78**LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) paired reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_.
79
80 .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00.html
81 
82------
83
84**Input formats**
85
86LASTZ accepts reference and reads in FASTA format. However, because Galaxy supports implicit format conversion the tool will recognize fastq and other method specific formats.
87
88------
89
90**Outputs**
91
92This LASTZ tool produces a SAM file showing sequence alignments.
93
94**SAM output**
95
96SAM has 12 columns::
97
98                                   1     2     3         4   5    6  7         8     9                                    10                                     11  12
99  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
100  HWI-EAS91_1_30788AAXX:1:2:1670:915    99  chr9  58119878  60  36M  =  58120234   392  GACCCCTACCCCACCGTGCTCTGGATCTCAGTGTTT   IIIIIIIIIIIIIIIIEIIIIIII7IIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
101  HWI-EAS91_1_30788AAXX:1:2:1670:915   147  chr9  58120234  60  36M  =  58119878  -392  ATGAGTCGAATTCTATTTTCCAAACTGTTAACAAAA   IFIIDI;IIICIIIIIIIIIIIIIIIIIIIIIIIII  XT:A:U  NM:i:0  SM:i:37  AM:i:37  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:36
102
103
104where::
105
106     Column  Description
107  ---------  ---------------------------------------------------------------------   
108   1. QNAME  Query (pair) NAME
109   2. FLAG   bitwise FLAG
110   3. RNAME  Reference sequence NAME
111   4. POS    1-based leftmost POSition/coordinate of clipped sequence
112   5. MAPQ   MAPping Quality (Phred-scaled)
113   6. CIGAR  extended CIGAR string
114   7. MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
115   8. MPOS   1-based Mate POSition
116   9. ISIZE  Inferred insert SIZE
117  10. SEQ    query SEQuence on the same strand as the reference
118  11. QUAL   query QUALity (ASCII-33 gives the Phred base quality)
119  12. OPT    variable OPTional fields in the format TAG:VTYPE:VALUE, tab-separated
120 
121The flags are as follows::
122
123    Flag  Description
124  ------  -------------------------------------
125  0x0001  the read is paired in sequencing
126  0x0002  the read is mapped in a proper pair
127  0x0004  the query sequence itself is unmapped
128  0x0008  the mate is unmapped
129  0x0010  strand of the query (1 for reverse)
130  0x0020  strand of the mate
131  0x0040  the read is the first read in a pair
132  0x0080  the read is the second read in a pair
133  0x0100  the alignment is not primary
134
135------
136
137**Do you want to modify the reference name?**
138
139This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser.
140
141------
142
143**LASTZ parameter list**
144
145This is an exhaustive list of LASTZ options. Once again, please note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu::
146
147  target[[s..e]][-]       spec/file containing target sequence (fasta or nib)
148                          [s..e] defines a subrange of the file
149                          - indicates reverse-complement
150                          (use --help=files for more details)
151  query[[s..e]][-]        spec/file containing query sequences (fasta or nib)
152                          if absent, queries come from stdin (unless they
153                          aren't needed, as for --self or --tableonly)
154                          (use --help=files for more details)
155  --self                  the target sequence is also the query
156  --quantum               the query sequence contains quantum DNA
157  --seed=match&lt;length&gt;    use a word with no gaps instead of a seed pattern
158  --seed=half&lt;length&gt;     use space-free half-weight word instead of seed pattern
159  --match=&lt;reward&gt;[,&lt;penalty&gt;]   set the score values for a match (+&lt;reward&gt;)
160                          and mismatch (-&lt;penalty&gt;)
161  --[no]trans[ition][=2]         allow one or two transitions in a seed hit
162                          (by default a transition is allowed)
163  --word=&lt;bits&gt;           set max bits for word hash;  use this to trade time for
164                          memory, eliminating thrashing for heavy seeds
165                          (default is 28 bits)
166  --[no]filter=[&lt;T&gt;:]&lt;M&gt;     filter half-weight seed hits, requiring at least M
167                          matches and allowing no more than T transversions
168                          (default is no filtering)
169  --notwins               require just one seed hit
170  --twins=[&lt;min&gt;:]&lt;maxgap&gt;   require two nearby seed hits on the same diagonal
171                          (default is twins aren't required)
172  --notwins               allow single, isolated seeds
173  --[no]recoverseeds      avoid losing seeds in hash collisions. Cannot be used with --twins
174  --seedqueue=&lt;entries&gt;   set number of entries in seed hit queue
175                          (default is 262144)
176  --anchors=&lt;file&gt;        read anchors from a file, instead of discovering anchors
177                          via seeding
178  --recoverhits           recover hash-collision seed hits
179                          (default is not to recover seed hits)
180  --step=&lt;length&gt;         set step length (default is 1)
181  --maxwordcount=&lt;limit&gt;  words occurring more often than &lt;limit&gt; in the target
182                          are not eligible for seeds
183  --strand=both           search both strands
184  --strand=plus           search + strand only (matching strand of query spec)
185  --strand=minus          search - strand only (opposite strand of query spec)
186                          (by default both strands are searched)
187  --ambiguousn            treat N as an ambiguous nucleotide
188                          (by default N is treated as a sequence splicing character)
189  --[no]gfextend          perform gap-free extension of seed hits to HSPs
190                          (by default no extension is performed)
191  --[no]chain             perform chaining
192  --chain=&lt;diag,anti&gt;     perform chaining with given penalties for diagonal and
193                          anti-diagonal
194                          (by default no chaining is performed)
195  --[no]gapped            perform gapped alignment (instead of gap-free)
196                          (by default gapped alignment is performed)
197  --score[s]=&lt;file&gt;         read substitution scores from a file
198                          (default is HOXD70)
199  --unitscore[s]          scores are +1/-1 for match/mismatch
200  --gap=&lt;[open,]extend&gt;   set gap open and extend penalties (default is 400,30)
201  --xdrop=&lt;score&gt;         set x-drop threshold (default is 10*sub[A][A])
202  --ydrop=&lt;score&gt;         set y-drop threshold (default is open+300extend)
203  --infer[=&lt;control&gt;]     infer scores from the sequences, then use them
204  --inferonly[=&lt;control&gt;]   infer scores, but don't use them (requires --infscores)
205                          all inference options are read from the control file
206  --infscores[=&lt;file&gt;]    write inferred scores to a file
207  --hspthresh=&lt;score&gt;     set threshold for high scoring pairs (default is 3000)
208                          ungapped extensions scoring lower are discarded
209                          &lt;score&gt; can also be a percentage or base count
210  --entropy               adjust for entropy when qualifying HSPs in the x-drop extension
211                          method
212  --noentropy             don't adjust for entropy when qualifying HSPs
213  --exact=&lt;length&gt;        set threshold for exact matches
214                          if specified, exact matches are found rather than high
215                          scoring pairs (replaces --hspthresh)
216  --inner=&lt;score&gt;         set threshold for HSPs during interpolation
217                          (default is no interpolation)
218  --gappedthresh=&lt;score&gt;  set threshold for gapped alignments
219                          gapped extensions scoring lower are discarded
220                          &lt;score&gt; can also be a percentage or base count
221                          (default is to use same value as --hspthresh)
222  --ball=&lt;score&gt;          set minimum score required of words 'in' a quantum ball
223  --[no]entropy           involve entropy in filtering high scoring pairs
224                          (default is "entropy")
225  --[no]mirror            report/use mirror image of all gap-free alignments
226                          (default is "mirror" for self-alignments only)
227  --traceback=&lt;bytes&gt;     space for trace-back information
228                          (default is 80.0M)
229  --masking=&lt;count&gt;       mask any position in target hit this many times
230                          zero indicates no masking
231                          (default is no masking)
232  --targetcapsule=&lt;capsule_file&gt;   the target seed word position table and seed
233                          (as well as the target sequence)are read from specified file
234  --segments=&lt;segment_file&gt;   read segments from a file, instead of discovering
235                          them via seeding. Replaces other seeding or gap-free extension
236                          options
237  --[no]census[=&lt;file&gt;]     count/report how many times each target base aligns
238                          (default is to not report census)
239  --identity=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percent identity
240                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
241                          are discarded
242                          (default is no identity filtering)
243  --coverage=&lt;min&gt;[..&lt;max&gt;]   filter alignments by percentage pf query covered
244                          0&lt;=min&lt;=max&lt;=100;  blocks (or HSPs) outside min..max
245                          are discarded
246                          (default is no query coverage filtering)
247  --notrivial             do not output trivial self-alignment block if the target and query
248                          sequences are identical. Using --self enables this option automatically
249  --output=&lt;output_file&gt;  write the alignments to the specified file name instead of stdout
250  --code=&lt;file&gt;           give quantum code for query sequence (only for display)
251  --format=&lt;type&gt;         specify output format; one of lav, axt, maf, maf+, maf-, text,
252                          lav+text, cigar, text, rdplot, general, or general:&lt;fields&gt;
253                          (by default output is LAV)
254  --rdotplot=&lt;file&gt;       create an additional output file suitable for plotting the alignments
255                          with the R statistical package.
256  --markend               Just before normal completion, write "# lastz end-of-file" to output file
257  --census[=&lt;output_file&gt;]    count and report how many times each target base aligns, up
258                          to 255. Ns are included in the count
259  --census16[=&lt;output_file&gt;]  count and report how many times each target base aligns, up
260                          up 65 thousand
261  --census32[=&lt;output_file&gt;]  count and report how many times each target bas aligns, up
262                          to 4 billion
263  --writecapsule=&lt;capsule_file&gt;    just write out a targegt capsule file and quit; don't
264                          search for seeds or perform subsequent stages
265  --verbosity=&lt;level&gt;     set info level (0 is minimum, 10 is everything)
266                          (default is 0)
267  --[no]runtime           report runtime in the output file
268                          (default is to not report runtime)
269  --tableonly[=count]     just produce the target position table, don't
270                          search for seeds
271  --[no]stats[=&lt;file&gt;]    show search statistics (or don't)
272                          (not available in this build)
273  --version               report the program version and quit
274  --help                  list all options
275  --help=files            list information about file specifiers
276  --help=short[cuts]      list blastz-compatible shortcuts
277  --help=yasra            list yasra-specific shortcuts
278
279    </help>
280</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。