1 | <tool id="Extract_features1" name="Extract features">
|
---|
2 | <description> from GFF file</description>
|
---|
3 | <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command>
|
---|
4 | <inputs>
|
---|
5 | <param format="gff" name="input1" type="data" label="Select GFF data"/>
|
---|
6 | <conditional name="column_choice">
|
---|
7 | <param name="col" type="select" label="From">
|
---|
8 | <option value="0" selected="true">Column 1 / Sequence name</option>
|
---|
9 | <option value="1">Column 2 / Source</option>
|
---|
10 | <option value="2">Column 3 / Feature</option>
|
---|
11 | <option value="6">Column 7 / Strand</option>
|
---|
12 | <option value="7">Column 8 / Frame</option>
|
---|
13 | </param>
|
---|
14 | <when value="0">
|
---|
15 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
---|
16 | <options from_dataset="input1">
|
---|
17 | <column name="name" index="0"/>
|
---|
18 | <column name="value" index="0"/>
|
---|
19 | <filter type="unique_value" name="unique" column="0"/>
|
---|
20 | </options>
|
---|
21 | </param>
|
---|
22 | </when>
|
---|
23 | <when value="1">
|
---|
24 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
---|
25 | <options from_dataset="input1">
|
---|
26 | <column name="name" index="1"/>
|
---|
27 | <column name="value" index="1"/>
|
---|
28 | <filter type="unique_value" name="unique" column="1"/>
|
---|
29 | </options>
|
---|
30 | </param>
|
---|
31 | </when>
|
---|
32 | <when value="2">
|
---|
33 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
---|
34 | <options from_dataset="input1">
|
---|
35 | <column name="name" index="2"/>
|
---|
36 | <column name="value" index="2"/>
|
---|
37 | <filter type="unique_value" name="unique" column="2"/>
|
---|
38 | </options>
|
---|
39 | </param>
|
---|
40 | </when>
|
---|
41 | <when value="6">
|
---|
42 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
---|
43 | <options from_dataset="input1">
|
---|
44 | <column name="name" index="6"/>
|
---|
45 | <column name="value" index="6"/>
|
---|
46 | <filter type="unique_value" name="unique" column="6"/>
|
---|
47 | </options>
|
---|
48 | </param>
|
---|
49 | </when>
|
---|
50 | <when value="7">
|
---|
51 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
---|
52 | <options from_dataset="input1">
|
---|
53 | <column name="name" index="7"/>
|
---|
54 | <column name="value" index="7"/>
|
---|
55 | <filter type="unique_value" name="unique" column="7"/>
|
---|
56 | </options>
|
---|
57 | </param>
|
---|
58 | </when>
|
---|
59 | </conditional>
|
---|
60 | </inputs>
|
---|
61 | <outputs>
|
---|
62 | <data format="gff" name="out_file1" />
|
---|
63 | </outputs>
|
---|
64 | <tests>
|
---|
65 | <test>
|
---|
66 | <param name="input1" value="5.gff"/>
|
---|
67 | <param name="col" value="0" />
|
---|
68 | <param name="feature" value="chr5,chr6,chr7,chr8" />
|
---|
69 | <output name="out_file1" file="Extract_features1_out.gff"/>
|
---|
70 | </test>
|
---|
71 | </tests>
|
---|
72 | <help>
|
---|
73 |
|
---|
74 | **What it does**
|
---|
75 |
|
---|
76 | This tool extracts selected features from GFF data.
|
---|
77 |
|
---|
78 | -----
|
---|
79 |
|
---|
80 | **Example**
|
---|
81 |
|
---|
82 | Selecting **promoter** from the following GFF data::
|
---|
83 |
|
---|
84 | chr22 GeneA enhancer 10000000 10001000 500 + . TGA
|
---|
85 | chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
---|
86 | chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
---|
87 | chr22 GeneB CCDS2220 10030000 10065000 800 - . TGB
|
---|
88 |
|
---|
89 | will produce the following output::
|
---|
90 |
|
---|
91 | chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
---|
92 | chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
---|
93 |
|
---|
94 | ----
|
---|
95 |
|
---|
96 | .. class:: infomark
|
---|
97 |
|
---|
98 | **About formats**
|
---|
99 |
|
---|
100 | **GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
|
---|
101 |
|
---|
102 | 1. seqname - Must be a chromosome or scaffold.
|
---|
103 | 2. source - The program that generated this feature.
|
---|
104 | 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
|
---|
105 | 4. start - The starting position of the feature in the sequence. The first base is numbered 1.
|
---|
106 | 5. end - The ending position of the feature (inclusive).
|
---|
107 | 6. score - A score between 0 and 1000. If there is no score value, enter ".".
|
---|
108 | 7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
|
---|
109 | 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
|
---|
110 | 9. group - All lines with the same group are linked together into a single item.
|
---|
111 |
|
---|
112 |
|
---|
113 | </help>
|
---|
114 | </tool>
|
---|