root/galaxy-central/tools/ngs_rna/cuffdiff_wrapper.xml

リビジョン 2, 9.4 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="cuffdiff" name="Cuffdiff" version="0.9.1">
2    <description>find significant changes in transcript expression, splicing, and promoter use</description>
3    <requirements>
4        <requirement type="package">cufflinks</requirement>
5    </requirements>
6    <command interpreter="python">
7        cuffdiff_wrapper.py
8            --FDR=$fdr
9            --num-threads="4"
10            --min-mapqual=$min_mapqual
11            --min-alignment-count=$min_alignment_count
12
13            --isoforms_fpkm_tracking_output=$isoforms_fpkm_tracking
14            --genes_fpkm_tracking_output=$genes_fpkm_tracking
15            --cds_fpkm_tracking_output=$cds_fpkm_tracking
16            --tss_groups_fpkm_tracking_output=$tss_groups_fpkm_tracking
17            --isoforms_exp_output=$isoforms_exp
18            --genes_exp_output=$genes_exp
19            --tss_groups_exp_output=$tss_groups_exp
20            --cds_exp_fpkm_tracking_output=$cds_exp_fpkm_tracking
21            --splicing_diff_output=$splicing_diff
22            --cds_diff_output=$cds_diff
23            --promoters_diff_output=$promoters_diff
24           
25            --inputA=$gtf_input
26            --input1=$aligned_reads1
27            --input2=$aligned_reads2
28    </command>
29    <inputs>
30        <param format="gtf" name="gtf_input" type="data" label="Transcripts" help="A transcript GTF file produced by cufflinks, cuffcompare, or other source."/>
31        <param format="sam" name="aligned_reads1" type="data" label="SAM file of aligned RNA-Seq reads" help=""/>
32        <param format="sam" name="aligned_reads2" type="data" label="SAM file of aligned RNA-Seq reads" help=""/>
33        <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/>
34        <param name="min_mapqual" type="integer" value="0" label="Min SAM Mapping Quality" help="Instructs Cufflinks to ignore alignments with a SAM mapping quality lower than this number."/>
35        <param name="min_alignment_count" type="integer" value="0" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/>
36        <conditional name="singlePaired">
37            <param name="sPaired" type="select" label="Is this library mate-paired?">
38                <option value="single">Single-end</option>
39                <option value="paired">Paired-end</option>
40            </param>
41            <when value="single"></when>
42            <when value="paired">
43                <param name="mean_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs"/>
44                <param name="inner_distance_std_dev" type="integer" value="20" label="Standard Deviation for Inner Distance between Mate Pairs"/>
45            </when>
46        </conditional>
47    </inputs>
48
49    <outputs>
50        <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: isoform expression"/>
51        <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene expression"/>
52        <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups expression"/>
53        <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS Expression FPKM Tracking"/>
54        <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: isoform FPKM tracking"/>
55        <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking"/>
56        <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" />
57        <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking"/>
58        <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing diff"/>
59        <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters diff"/>
60        <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS diff"/>
61    </outputs>
62
63    <tests>
64        <test>
65                <!--
66                    cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam
67                -->
68                <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
69                <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" />
70                <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" />
71                <!-- Defaults. -->
72                <param name="fdr" value="0.05" />
73                <param name="min_mapqual" value="0" ftype="sam" />
74                <param name="min_alignment_count" value="0" ftype="sam" />
75                <param name="sPaired" value="single" ftype="sam" />
76               
77                <!-- This won't pass until the test harness is updated to handle outputs better. -->
78                <!--
79                    <output name="XXXX" file="cuffdiff_out5.tracking" />
80                -->
81        </test>
82    </tests>
83
84    <help>
85**Cuffdiff Overview**
86
87Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
88
89.. _Cufflinks: http://cufflinks.cbcb.umd.edu/
90       
91------
92
93**Know what you are doing**
94
95.. class:: warningmark
96
97There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
98
99.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff
100
101------
102
103**Input format**
104
105Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples.
106
107.. ___: http://www.todo.org
108
109------
110
111**Outputs**
112
113Cuffdiff produces many output files:
114
1151. Transcript FPKM expression tracking.
1162. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id
1173. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id
1184. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, indepedent of tss_id
1195. Transcript differential FPKM.
1206. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id
1217. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id
1228. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id
1239. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file.
12410. Differential promoter tests: this tab delimited file lists, for each gene, the amount of overloading detected among its primary transcripts, i.e. how much differential promoter use exists between samples. Only genes producing two or more distinct primary transcripts (i.e. multi-promoter genes) are listed here.
12511. Differential CDS tests: this tab delimited file lists, for each gene, the amount of overloading detected among its coding sequences, i.e. how much differential CDS output exists between samples. Only genes producing two or more distinct CDS (i.e. multi-protein genes) are listed here.
126   
127-------
128
129**Settings**
130
131All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.
132
133------
134
135**Cuffdiff parameter list**
136
137This is a list of implemented Cuffdiff options::
138
139  -m INT                         This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
140  -s INT                         The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
141  -Q                             Instructs Cufflinks to ignore alignments with a SAM mapping quality lower than this number. The default is 0.
142  -c INT                         The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).
143  --FDR FLOAT                    The allowed false discovery rate. The default is 0.05.
144  --num-importance-samples INT   Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000
145  --max-mle-iterations INT       Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
146 
147    </help>
148</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。