| 1 | <tool id="Extract_features1" name="Extract features">
|
|---|
| 2 | <description> from GFF file</description>
|
|---|
| 3 | <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command>
|
|---|
| 4 | <inputs>
|
|---|
| 5 | <param format="gff" name="input1" type="data" label="Select GFF data"/>
|
|---|
| 6 | <conditional name="column_choice">
|
|---|
| 7 | <param name="col" type="select" label="From">
|
|---|
| 8 | <option value="0" selected="true">Column 1 / Sequence name</option>
|
|---|
| 9 | <option value="1">Column 2 / Source</option>
|
|---|
| 10 | <option value="2">Column 3 / Feature</option>
|
|---|
| 11 | <option value="6">Column 7 / Strand</option>
|
|---|
| 12 | <option value="7">Column 8 / Frame</option>
|
|---|
| 13 | </param>
|
|---|
| 14 | <when value="0">
|
|---|
| 15 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|---|
| 16 | <options from_dataset="input1">
|
|---|
| 17 | <column name="name" index="0"/>
|
|---|
| 18 | <column name="value" index="0"/>
|
|---|
| 19 | <filter type="unique_value" name="unique" column="0"/>
|
|---|
| 20 | </options>
|
|---|
| 21 | </param>
|
|---|
| 22 | </when>
|
|---|
| 23 | <when value="1">
|
|---|
| 24 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|---|
| 25 | <options from_dataset="input1">
|
|---|
| 26 | <column name="name" index="1"/>
|
|---|
| 27 | <column name="value" index="1"/>
|
|---|
| 28 | <filter type="unique_value" name="unique" column="1"/>
|
|---|
| 29 | </options>
|
|---|
| 30 | </param>
|
|---|
| 31 | </when>
|
|---|
| 32 | <when value="2">
|
|---|
| 33 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|---|
| 34 | <options from_dataset="input1">
|
|---|
| 35 | <column name="name" index="2"/>
|
|---|
| 36 | <column name="value" index="2"/>
|
|---|
| 37 | <filter type="unique_value" name="unique" column="2"/>
|
|---|
| 38 | </options>
|
|---|
| 39 | </param>
|
|---|
| 40 | </when>
|
|---|
| 41 | <when value="6">
|
|---|
| 42 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|---|
| 43 | <options from_dataset="input1">
|
|---|
| 44 | <column name="name" index="6"/>
|
|---|
| 45 | <column name="value" index="6"/>
|
|---|
| 46 | <filter type="unique_value" name="unique" column="6"/>
|
|---|
| 47 | </options>
|
|---|
| 48 | </param>
|
|---|
| 49 | </when>
|
|---|
| 50 | <when value="7">
|
|---|
| 51 | <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
|
|---|
| 52 | <options from_dataset="input1">
|
|---|
| 53 | <column name="name" index="7"/>
|
|---|
| 54 | <column name="value" index="7"/>
|
|---|
| 55 | <filter type="unique_value" name="unique" column="7"/>
|
|---|
| 56 | </options>
|
|---|
| 57 | </param>
|
|---|
| 58 | </when>
|
|---|
| 59 | </conditional>
|
|---|
| 60 | </inputs>
|
|---|
| 61 | <outputs>
|
|---|
| 62 | <data format="gff" name="out_file1" />
|
|---|
| 63 | </outputs>
|
|---|
| 64 | <tests>
|
|---|
| 65 | <test>
|
|---|
| 66 | <param name="input1" value="5.gff"/>
|
|---|
| 67 | <param name="col" value="0" />
|
|---|
| 68 | <param name="feature" value="chr5,chr6,chr7,chr8" />
|
|---|
| 69 | <output name="out_file1" file="Extract_features1_out.gff"/>
|
|---|
| 70 | </test>
|
|---|
| 71 | </tests>
|
|---|
| 72 | <help>
|
|---|
| 73 |
|
|---|
| 74 | **What it does**
|
|---|
| 75 |
|
|---|
| 76 | This tool extracts selected features from GFF data.
|
|---|
| 77 |
|
|---|
| 78 | -----
|
|---|
| 79 |
|
|---|
| 80 | **Example**
|
|---|
| 81 |
|
|---|
| 82 | Selecting **promoter** from the following GFF data::
|
|---|
| 83 |
|
|---|
| 84 | chr22 GeneA enhancer 10000000 10001000 500 + . TGA
|
|---|
| 85 | chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
|---|
| 86 | chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
|---|
| 87 | chr22 GeneB CCDS2220 10030000 10065000 800 - . TGB
|
|---|
| 88 |
|
|---|
| 89 | will produce the following output::
|
|---|
| 90 |
|
|---|
| 91 | chr22 GeneA promoter 10010000 10010100 900 + . TGA
|
|---|
| 92 | chr22 GeneB promoter 10020000 10025000 400 - . TGB
|
|---|
| 93 |
|
|---|
| 94 | ----
|
|---|
| 95 |
|
|---|
| 96 | .. class:: infomark
|
|---|
| 97 |
|
|---|
| 98 | **About formats**
|
|---|
| 99 |
|
|---|
| 100 | **GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::
|
|---|
| 101 |
|
|---|
| 102 | 1. seqname - Must be a chromosome or scaffold.
|
|---|
| 103 | 2. source - The program that generated this feature.
|
|---|
| 104 | 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
|
|---|
| 105 | 4. start - The starting position of the feature in the sequence. The first base is numbered 1.
|
|---|
| 106 | 5. end - The ending position of the feature (inclusive).
|
|---|
| 107 | 6. score - A score between 0 and 1000. If there is no score value, enter ".".
|
|---|
| 108 | 7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
|
|---|
| 109 | 8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
|
|---|
| 110 | 9. group - All lines with the same group are linked together into a single item.
|
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 | </help>
|
|---|
| 114 | </tool>
|
|---|