root/galaxy-central/tools/human_genome_variation/sift.xml

リビジョン 2, 7.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="hgv_sift" name="SIFT" version="1.0.0">
2  <description>predictions of functional sites</description>
3
4  <command interpreter="bash">
5    sift_variants_wrapper.sh "$input" "$output" "${input.metadata.dbkey}" "${GALAXY_DATA_INDEX_DIR}/sift_db.loc" "$chrom_col" "$pos_col" "$base" "$allele_col" "$strand_source.strand_col" "$output_opts"
6  </command>
7
8  <inputs>
9    <param name="input" type="data" format="tabular" label="Dataset">
10      <validator type="unspecified_build"/>
11      <validator type="dataset_metadata_in_file" filename="sift_db.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
12    </param>
13    <param name="chrom_col"  type="data_column" data_ref="input" label="Column with chromosome"/>
14    <param name="pos_col"    type="data_column" data_ref="input" numerical="true" label="Column with position"/>
15    <param name="base" type="select" label="Position coordinates are">
16      <option value="0">zero-based</option>
17      <option value="1" selected="true">one-based</option>
18    </param>
19    <param name="allele_col" type="data_column" data_ref="input" label="Column with allele"/>
20    <conditional name="strand_source">
21      <param name="choice" type="select" label="Strand info">
22        <option value="data_column" selected="true">a column in the dataset</option>
23        <option value="all_pos">all on sense/forward/+ strand</option>
24        <option value="all_neg">all on antisense/reverse/- strand</option>
25      </param>
26      <when value="data_column">
27        <param name="strand_col" type="data_column" data_ref="input" label="Column with strand"/>
28      </when>
29      <when value="all_pos">
30        <param name="strand_col" type="hidden" value="+"/>
31      </when>
32      <when value="all_neg">
33        <param name="strand_col" type="hidden" value="-"/>
34      </when>
35    </conditional>
36    <param name="output_opts" type="select" multiple="true" display="checkboxes" label="Include the following additional fields in the output">
37      <option value="A">Ensembl Gene ID</option>
38      <option value="B">Gene Name</option>
39      <option value="C">Gene Description</option>
40      <option value="D">Ensembl Protein Family ID</option>
41      <option value="E">Ensembl Protein Family Description</option>
42      <option value="F">Ensembl Transcript Status (Known / Novel)</option>
43      <option value="G">Protein Family Size</option>
44      <option value="H">Ka/Ks (Human-mouse)</option>
45      <option value="I">Ka/Ks (Human-macaque)</option>
46      <option value="J">OMIM Disease</option>
47      <option value="K">Allele Frequencies (All Hapmap Populations - weighted average)</option>
48      <option value="L">Allele Frequencies (CEU Hapmap population)</option>
49    </param>
50  </inputs>
51
52  <outputs>
53    <data format="tabular" name="output" />
54  </outputs>
55
56  <requirements>
57    <requirement type="binary">awk</requirement>
58    <requirement type="binary">rm</requirement>
59    <requirement type="binary">sed</requirement>
60  </requirements>
61
62  <tests>
63    <test>
64      <param name="input" value="sift_variants.tab" ftype="tabular" dbkey="hg18"/>
65      <param name="chrom_col" value="1"/>
66      <param name="pos_col" value="3"/>
67      <param name="base" value="1"/>
68      <param name="allele_col" value="5"/>
69      <param name="choice" value="data_column"/>
70      <param name="strand_col" value="4"/>
71      <param name="output_opts" value="A"/>
72      <output name="output" file="sift_variants_result.tab"/>
73    </test>
74  </tests>
75
76  <help>
77.. class:: warningmark
78
79This currently works only for builds hg18 or hg19.
80
81-----
82
83**Dataset formats**
84
85The input and output datasets are tabular_.
86(`Dataset missing?`_)
87
88.. _tabular: ./static/formatHelp.html#tab
89.. _Dataset missing?: ./static/formatHelp.html
90
91-----
92
93**What it does**
94
95SIFT predicts whether an amino-acid substitution affects protein function,
96based on sequence homology and the physical properties of amino acids.
97SIFT can be applied to naturally occurring non-synonymous polymorphisms
98and laboratory-induced missense mutations.  This tool uses SQLite databases
99containing pre-computed SIFT scores and annotations for all possible nucleotide
100substitutions at each position in the human exome.  Allele frequency data
101are from the HapMap frequency database, and additional transcript and
102gene-level data are from Ensembl BioMart.
103
104The input dataset must contain columns for the chromosome, position, and
105alleles.  The alleles must be two nucleotides separated by '/',
106usually the reference allele and the allele of interest.
107The strand must either be in another column or all the same.
108The output contains a standard set of columns plus the additional ones that
109have been selected from the list above.
110
111Website: http://sift.jcvi.org/
112
113-----
114
115**Example**
116
117- input file::
118
119    chr3   81780820   +  T/C
120    chr2   230341630  +  G/A
121    chr2   43881517   +  A/T
122    chr2   43857514   +  T/C
123    chr6   88375602   +  G/A
124    chr22  29307353   -  T/A
125    chr10  115912482  -  G/T
126    chr10  115900918  -  C/T
127    chr16  69875502   +  G/T
128    etc.
129
130- output file::
131
132    #Chrom  Position   Strand  Allele  Codons   Transcript ID    Protein ID       Substitution  Region    dbSNP ID      SNP Type       Prediction  Score  Median Info  Num seqs at position  User Comment
133    chr3    81780820   +       T/C     AGA-gGA  ENST00000264326  ENSP00000264326  R190G         EXON CDS  rs2229519:C   Nonsynonymous  DAMAGING    0.04   3.06         149
134    chr2    230341630  +       G/T     -        ENST00000389045  ENSP00000373697  NA            EXON CDS  rs1803846:A   Unknown        Not scored  NA     NA           NA
135    chr2    43881517   +       A/T     ATA-tTA  ENST00000260605  ENSP00000260605  I230L         EXON CDS  rs11556157:T  Nonsynonymous  TOLERATED   0.47   3.19         7
136    chr2    43857514   +       T/C     TTT-TcT  ENST00000260605  ENSP00000260605  F33S          EXON CDS  rs2288709:C   Nonsynonymous  TOLERATED   0.61   3.33         6
137    chr6    88375602   +       G/A     GTT-aTT  ENST00000257789  ENSP00000257789  V217I         EXON CDS  rs2307389:A   Nonsynonymous  TOLERATED   0.75   3.17         13
138    chr22   29307353   +       T/A     ACC-tCC  ENST00000335214  ENSP00000334612  T264S         EXON CDS  rs42942:A     Nonsynonymous  TOLERATED   0.4    3.14         23
139    chr10   115912482  +       C/A     CGA-CtA  ENST00000369285  ENSP00000358291  R179L         EXON CDS  rs12782946:T  Nonsynonymous  TOLERATED   0.06   4.32         2
140    chr10   115900918  +       G/A     CAA-tAA  ENST00000369287  ENSP00000358293  Q271*         EXON CDS  rs7095762:T   Nonsynonymous  N/A         N/A    N/A          N/A
141    chr16   69875502   +       G/T     ACA-AaA  ENST00000338099  ENSP00000337512  T608K         EXON CDS  rs3096381:T   Nonsynonymous  TOLERATED   0.12   3.41         3
142    etc.
143
144-----
145
146**References**
147
148Ng PC, Henikoff S. (2001) Predicting deleterious amino acid substitutions.
149Genome Res. 11(5):863-74.
150
151Ng PC, Henikoff S. (2002) Accounting for human polymorphisms predicted to affect protein function.
152Genome Res. 12(3):436-46.
153
154Ng PC, Henikoff S. (2003) SIFT: Predicting amino acid changes that affect protein function.
155Nucleic Acids Res. 31(13):3812-4.
156
157Kumar P, Henikoff S, Ng PC. (2009) Predicting the effects of coding non-synonymous variants
158on protein function using the SIFT algorithm.
159Nat Protoc. 4(7):1073-81. Epub 2009 Jun 25.
160
161  </help>
162</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。