root/galaxy-central/tools/sr_mapping/PerM.xml

リビジョン 2, 19.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="PerM" name="Map with PerM" version="1.0.0">
2  <description>for SOLiD and Illumina</description>
3  <!-- works with PerM version 0.2.6 -->
4  <requirements>
5      <requirement type="package">perm</requirement>
6  </requirements>
7  <command>
8PerM
9#if $s.sourceOfRef.refSource == "history":
10    $s.sourceOfRef.ref
11#else:
12    $s.sourceOfRef.index
13#end if
14#if $s.mate.singleOrPairs == "single":
15    $s.mate.reads
16#else:
17    -1 $s.mate.reads1 -2 $s.mate.reads2
18    -U $s.mate.upperbound
19    -L $s.mate.lowerbound
20    $s.mate.excludeAmbiguousPairs
21#end if
22#if $s.space == "color":
23    --readFormat "csfastq"
24#else:
25    --readFormat "fastq"
26#end if
27#if $int($str($valAlign)) &gt;= 0:
28    -v $valAlign
29#end if
30#if $align.options == "full":
31    --seed $align.seed
32    -$align.alignments
33    #if $str($align.delimiter) != "None":
34        --delimiter $align.delimiter
35    #end if
36    -T $align.sTrimL
37    $align.includeReadsWN
38    $align.statsOnly
39    $align.ignoreQS
40#end if
41#if $str($bUnmappedRead) == "true" and $s.space == "color":
42  -u $unmappedReadOutCS
43#elif $str($bUnmappedRead) == "true" and $s.space == "base":
44  -u $unmappedReadOut
45#end if
46-o $output --outputFormat sam --noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
47  </command>
48  <inputs>
49    <conditional name="s">
50      <param name="space" label="Is your data color space (SOLiD) or base space (Illumina)?" type="select">
51        <option value="color">Color space</option>
52        <option value="base">Base space</option>
53      </param>
54      <when value="color">
55        <conditional name="sourceOfRef">
56          <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select">
57            <option value="indexed">Built-in index</option>
58            <option value="history">Fasta file from history</option>
59          </param>
60          <when value="indexed">
61            <param name="index" type="select" label="Select a reference genome (with seed and read length)" help="if your genome of interest is not listed - contact Galaxy team">
62              <options from_file="perm_color_index.loc">
63                <column name="value" index="1" />
64                <column name="name" index="0" />
65              </options>
66            </param>
67          </when>
68          <when value="history">
69            <param name="ref" format="fasta" type="data" label="Reference" />
70          </when>
71        </conditional>
72        <conditional name="mate">
73          <param name="singleOrPairs" label="Mate-paired?" type="select">
74            <option value="single">Single-end</option>
75            <option value="paired">Mate pairs</option>
76          </param>
77          <when value="single">
78            <param format="fastqcssanger" name="reads" type="data" label="Reads" />
79          </when>
80          <when value="paired">
81            <param name="reads1" format="fastqcssanger" label="Forward FASTQ file" type="data" />
82            <param name="reads2" format="fastqcssanger" label="Reverse FASTQ file" type="data" />
83            <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" />
84            <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" />
85            <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" />
86          </when>
87        </conditional>
88      </when>
89      <when value="base">
90        <conditional name="sourceOfRef">
91          <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select">
92            <option value="indexed">Built-in index</option>
93            <option value="history">Fasta file from history</option>
94          </param>
95          <when value="indexed">
96            <param name="index" type="select" label="Select a reference genome with seed and read length" help="if your genome of interest is not listed - contact Galaxy team">
97              <options from_file="perm_base_index.loc">
98                <column name="value" index="1" />
99                <column name="name" index="0" />
100              </options>
101            </param>
102          </when>
103          <when value="history">
104            <param name="ref" format="fasta" type="data" label="Reference" />
105          </when>
106        </conditional>
107        <conditional name="mate">
108          <param name="singleOrPairs" label="Mate-paired?" type="select">
109            <option value="single">Single-end</option>
110            <option value="paired">Mate pairs</option>
111          </param>
112          <when value="single">
113            <param format="fastqsanger" name="reads" type="data" label="Reads" />
114          </when>
115          <when value="paired">
116            <param name="reads1" format="fastqsanger" label="Forward FASTQ file" type="data" />
117            <param name="reads2" format="fastqsanger" label="Reverse FASTQ file" type="data" />
118            <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" />
119            <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" />
120            <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" />
121          </when>
122        </conditional>
123      </when>
124    </conditional>
125    <param label="Maximum number of mismatches permitted in one end of full read (-v)" name="valAlign" type="integer" size="5" value="2" />
126    <conditional name="align">
127      <param help="Use default setting or specify full parameters list" label="PerM settings to use" name="options" type="select">
128        <option value="preSet">Commonly used</option>
129        <option value="full">Full parameter list</option>
130      </param>
131      <when value="preSet"/>
132      <when value="full">
133        <param label="Whether or not to report all valid alignments per read (-A/-B/-E)" name="alignments" type="select">
134          <option value="A">Report all valid alignments</option>
135          <option value="B">Report the best alignments in terms of number of mismatches</option>
136          <option value="E">Report only uniquely mapped reads</option>
137        </param>
138        <param label="Choose the seed full sensitive to different number of mismatches (--seed)" name="seed" type="select" >
139          <option value="F2">2 mismatches</option>
140          <option value="S11">1 SNP + 1 color error</option>
141          <option value="F3">3 mismatches</option>
142          <option value="F4">4 mismatches</option>
143        </param>
144        <param label="Choose the delimiter to identify read name (--delimiter)" name="delimiter" type="select">
145          <option value="None">Tab/Space/Comma</option>
146          <option value=":">Colon</option>
147          <option value="_">Underscore</option>
148        </param>
149        <param label="Use the first n bases of each read for alignment (-T)" name="sTrimL" type="integer" size="5" value="50" />
150        <param name="includeReadsWN" type="boolean" checked="true" truevalue="--includeReadsWN" falsevalue="" label="Include reads with 'N' or '.' by encoding '.' as 3, 'N' as 'A' (--includeReadsWN)" />
151        <param name="statsOnly" type="boolean" checked="false" truevalue="--statsOnly" falsevalue="" label="output mapping stats only. Don't output alignments (--statsOnly)" />
152        <param name="ignoreQS" type="boolean" checked="false" truevalue="--ignoreQS" falsevalue="" label="Ignore quality scores (--ignoreQS)" />
153      </when>
154    </conditional> <!-- options -->
155    <param name="bUnmappedRead" type="select" label="Output the unmapped reads (-u)">
156      <option value="true">Yes</option>
157      <option value="false">No</option>
158    </param>
159  </inputs>
160  <outputs>
161    <data name="output" format="sam"/>
162    <data name="unmappedReadOut" format="fastqsanger">
163      <filter>bUnmappedRead == "true" and s["space"] == "base"</filter>
164    </data>
165    <data name="unmappedReadOutCS" format="fastqcssanger">
166      <filter>bUnmappedRead == "true" and s["space"] == "color"</filter>
167    </data>
168  </outputs>
169  <tests>
170    <test>
171      <!--
172      PerM command:
173      PerM test-data/phiX.fasta 50 +seed F3 -m -s phiX_F3_50.index +readFormat .fastq
174      PerM phiX_F3_50.index -1 test-data/perm_in1.fastqsanger -2 test-data/perm_in2.fastqsanger -U 100000 -L 0 -e +readFormat .fastq -v 0 +seed F2 -A -T 50 +includeReadsWN -o perm_out1.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
175      You need to replace the + with 2 dashes.
176      -->
177      <param name="space" value="base" />
178      <param name="refSource" value="indexed" />
179      <param name="index" value="phiX_F3_50" />
180      <param name="singleOrPairs" value="paired" />
181      <param name="reads1" value="perm_in1.fastqsanger" ftype="fastqsanger" />
182      <param name="reads2" value="perm_in2.fastqsanger" ftype="fastqsanger" />
183      <param name="upperbound" value="100000" />
184      <param name="lowerbound" value="0" />
185      <param name="excludeAmbiguousPairs" value="true" />
186      <param name="valAlign" value="0" />
187      <param name="options" value="full" />
188      <param name="alignments" value="A" />
189      <param name="seed" value="F2" />
190      <param name="delimiter" value="None" />
191      <param name="sTrimL" value="50" />
192      <param name="includeReadsWN" value="true" />
193      <param name="statsOnly" value="false" />
194      <param name="ignoreQS" value="false" />
195      <param name="bUnmappedRead" value="false" />
196      <output name="output" file="perm_out1.sam" ftype="sam" />
197    </test>
198    <test>
199      <!--
200      PerM command:
201      PerM test-data/chr_m.fasta test-data/perm_in3.fastqsanger +readFormat .fastq -v 2 -u perm_out3.fastqsanger -o perm_out2.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
202      You need to replace the + with 2 dashes.
203      -->
204      <param name="space" value="base" />
205      <param name="refSource" value="history" />
206      <param name="ref" value="chr_m.fasta" ftype="fasta" />
207      <param name="singleOrPairs" value="single" />
208      <param name="reads" value="perm_in3.fastqsanger" ftype="fastqsanger" />
209      <param name="valAlign" value="2" />
210      <param name="options" value="preSet" />
211      <param name="bUnmappedRead" value="true" />
212      <output name="output" file="perm_out2.sam" ftype="sam" />
213      <output name="unmappedReadOut" file="perm_out3.fastqsanger" ftype="fastqsanger" />
214    </test>
215    <test>
216      <!--
217      PerM command:
218      PerM test-data/phiX.fasta test-data/perm_in4.fastqcssanger +readFormat .csfastq -v 1 -o perm_out4.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
219      You need to replace the + with 2 dashes.
220      -->
221      <param name="space" value="color" />
222      <param name="refSource" value="history" />
223      <param name="ref" value="phiX.fasta" ftype="fasta" />
224      <param name="singleOrPairs" value="single" />
225      <param name="reads" value="perm_in4.fastqcssanger" ftype="fastqcssanger" />
226      <param name="valAlign" value="1" />
227      <param name="options" value="preSet" />
228      <param name="bUnmappedRead" value="false" />
229      <output name="output" file="perm_out4.sam" ftype="sam" />
230    </test>
231    <test>
232      <!--
233      PerM command:
234      PerM equCab2.fasta 50 +seed F4 -m -s equCab2_F3_50.index +readFormat .csfastq
235      PerM equCab2_F3_50.index -1 test-data/perm_in5.fastqcssanger -2 test-data/perm_in6.fastqcssanger -U 90000 -L 10000 +readFormat .csfastq -v 3 -o perm_out5.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/'
236      You need to replace the + with 2 dashes.
237      hg19.fasta needs to be supplied.
238      -->
239      <param name="space" value="color" />
240      <param name="refSource" value="indexed" />
241      <param name="index" value="equCab2_chrM_F3_50" />
242      <param name="singleOrPairs" value="paired" />
243      <param name="reads1" value="perm_in5.fastqcssanger" ftype="fastqcssanger" />
244      <param name="reads2" value="perm_in6.fastqcssanger" ftype="fastqcssanger" />
245      <param name="upperbound" value="90000" />
246      <param name="lowerbound" value="10000" />
247      <param name="excludeAmbiguousPairs" value="false" />
248      <param name="valAlign" value="3" />
249      <param name="options" value="preSet" />
250      <param name="bUnmappedRead" value="false" />
251      <output name="output" file="perm_out5.sam" ftype="sam" />
252    </test>
253  </tests>
254  <help>
255**What it does**
256
257PerM is a short read aligner designed to be ultrafast with long SOLiD reads to the whole genome or transcriptions. PerM can be fully sensitive to alignments with up to four mismatches and highly sensitive to a higher number of mismatches.
258
259**Development team**
260
261PerM is developed by Ting Chen's group, Center of Excellence in Genomic Sciences at the University of Southern California. If you have any questions, please email yanghoch at usc.edu or check the `project page`__.
262
263 .. __: http://code.google.com/p/perm/
264
265**Citation**
266
267PerM: Efficient mapping of short sequencing reads with periodic full sensitive spaced seeds. Bioinformatics, 2009, 25 (19): 2514-2521.
268
269**Input**
270
271The input files are read files and a reference. Users can use the pre-indexed reference in Galaxy or upload their own reference.
272
273The uploaded reference file should be in the fasta format. Multiple sequences like transcriptions should be concatenated together separated by a header line that starts with the ">" character.
274
275Reads files must be in either fastqsanger or fastqcssanger format to use in PerM. However, there are several possible starting formats that can be converted to one of those two: fastq (any type), color-space fastq, fasta, csfasta, or csfasta+qualsolid.
276
277An uploaded base-space fastq file MUST be checked/transformed with FASTQGroomer tools in Galaxy to be converted to the fastqsanger format (this is true even if the original file is in Sanger format).
278
279Uploaded fasta and csfasta without quality score files can be transformed to fastqsanger by the FASTQGroomer, with pseudo quality scores added.
280
281An uploaded csfasta + qual pair can also be transformed into fastqcssanger by solid2fastq.
282
283**Outputs**
284
285The output mapping result is in SAM format, and has the following columns::
286
287    Column  Description
288  --------  --------------------------------------------------------
289   1 QNAME  Query (pair) NAME
290   2 FLAG   bitwise FLAG
291   3 RNAME  Reference sequence NAME
292   4 POS    1-based leftmost POSition/coordinate of clipped sequence
293   5 MAPQ   MAPping Quality (Phred-scaled)
294   6 CIGAR  extended CIGAR string
295   7 MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
296   8 MPOS   1-based Mate POSition
297   9 ISIZE  Inferred insert SIZE
298  10 SEQ    query SEQuence on the same strand as the reference
299  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
300  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALUE
301  12.1 NM   Number of mismatches (SOLiD-specific)
302  12.2 CS   Reads in color space (SOLiD-specific)
303  12.3 CQ   Bases quality in color spacehidden="true" (SOLiD-specific)
304
305The flags are as follows::
306
307    Flag  Description
308  ------  -------------------------------------
309  0x0001  the read is paired in sequencing
310  0x0002  the read is mapped in a proper pair
311  0x0004  the query sequence itself is unmapped
312  0x0008  the mate is unmapped
313  0x0010  strand of the query (1 for reverse)
314  0x0020  strand of the mate
315  0x0040  the read is the first read in a pair
316  0x0080  the read is the second read in a pair
317  0x0100  the alignment is not primary
318
319Here is some sample output::
320
321  Qname FLAG    Rname   POS     MAPQ    CIAGR   MRNM    MPOS    ISIZE   SEQ     QUAL    NM      CS      CQ
322  491_28_332_F3   16      ref-1   282734  255     35M     *       0       0       AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG    #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%%      NM:i:3  CS:Z:C0230202330012130103100230121001212        CQ:Z:###################################
323  491_28_332_F3   16      ref-1   269436  255     35M     *       0       0       AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG    #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%%      NM:i:3  CS:Z:C0230202330012130103100230121001212        CQ:Z:###################################
324
325The user can check a checkbox for optional output containing the unmmaped reads in fastqsanger or fastqcssanger. The default is to produce it.
326
327**PerM parameter list**
328
329Below is a list of PerM command line options for PerM. Not all of these are relevant to Galaxy's implementation, but are included for completeness.
330
331The command for single-end::
332
333  PerM [ref_or_index] [read] [options]
334
335The command for paired-end::
336
337  PerM [ref_or_index] -1 [read1] -2 [read1] [options]
338
339The command-line options::
340
341  -A                Output all alignments within the given mismatch threshold, end-to-end.
342  -B                Output best alignments in terms of mismatches in the given mismatch threshold. [Default]
343  -E                Output only the uniquely mapped reads in the given mismatch threshold.
344  -m                Create the reference index, without reusing the saved index.
345  -s PATH           Save the reference index to accelerate the mapping in the future. If PATH is not specified, the default path will be used.
346  -v INT            Where INT is the number of mismatches allowed in one end. [Default=2]
347  -T INT            Where INT is the length to truncate read length to, so 30 means use only first 30 bases (signals). Leave blank if the full read is meant to be used.
348  -o PATH           Where PATH is for output the mapping of one read set. PerM's output are in .mapping or .sam format, determined by the ext name of PATH. Ex: -o out.sam will output in SAM format; -o out.mapping will output in .mapping format.
349  -d PATH           Where PATH is the directory for multiple read sets.
350  -u PATH           Print the fastq file of those unmapped reads to the file in PATH.
351  --noSamHeader     Print no SAM header so it is convenient to concatenate multiple SAM output files.
352  --includeReadsWN  Encodes N or "." with A or 3, respectively.
353  --statsOnly       Output the mapping statistics in stdout only, without saving alignments to files.
354  --ignoreQS        Ignore the quality scores in fastq or QUAL files.
355  --seed {F2 | S11 | F3 | F4}    Specify the seed pattern, which has a specific full sensitivity. Check the algorithm page (link below) for seed patterns to balance the sensitivity and running time.
356  --readFormat {fasta | fastq | csfasta | csfastq}    Read in reads in the specified format, instead of guessing according to the extension name.
357  --delimiter CHAR  Which is a character used as the delimiter to separate the the read id, and the additional info in the line with ">" in fasta or csfasta.
358
359Paired reads options::
360
361  -e        Exclude ambiguous paired.
362  -L INT    Mate-paired separate lower bound.
363  -U INT    Mate-paired separate upper bound.
364  -1 PATH   The forward reads file path.
365  -2 PATH   The reversed reads file path.
366
367See the PerM `algorithm page`__ for information on algorithms and seeds.
368
369 .. __: http://code.google.com/p/perm/wiki/Algorithms
370  </help>
371</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。