root/galaxy-central/tools/stats/aggregate_binned_scores_in_intervals.xml @ 3

リビジョン 2, 5.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1<tool id="aggregate_scores_in_intervals2" description="such as phastCons, GERP, binCons, and others for a set of genomic intervals" name="Aggregate datapoints" version="1.1.2">
2  <description>Appends the average, min, max of datapoints per interval</description>
3  <command interpreter="python">
4    #if $score_source_type.score_source == "user" #aggregate_scores_in_intervals.py $score_source_type.input2 $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 --chrom_buffer=3
5    #else                                         #aggregate_scores_in_intervals.py $score_source_type.datasets $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 -b
6    #end if#
7  </command>
8  <inputs>
9    <param format="interval" name="input1" type="data" label="Interval file">
10      <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16, hg17 or hg18. Click the pencil icon in your history item to set the genome build."/>
11      <validator type="dataset_metadata_in_file" filename="binned_scores.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build." />
12     
13    </param>
14    <conditional name="score_source_type">
15      <param name="score_source" type="select" label="Score Source">
16        <option value="cached" selected="true">Locally Cached Scores</option>
17        <option value="user">Scores in Your History</option>
18      </param>
19      <when value="cached">
20        <param name="datasets" type="select" label="Available datasets" display="radio">
21          <options from_file="binned_scores.loc">
22            <column name="name" index="1"/>
23            <column name="value" index="2"/>
24            <column name="dbkey" index="0"/>
25            <filter type="data_meta" ref="input1" key="dbkey" column="0" />
26          </options>
27        </param>
28      </when>
29      <when value="user">
30        <param format="wig" name="input2" type="data" label="Score file">
31          <options>
32            <filter type="data_meta" ref="input1" key="dbkey" />
33          </options>
34        </param>
35      </when>
36    </conditional>
37  </inputs>
38  <outputs>
39    <data format="interval" name="out_file1" metadata_source="input1"/>
40  </outputs>
41  <tests>
42    <test>
43      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
44      <param name="score_source" value="cached"/>
45      <param name="datasets" value="/galaxy/data/binned_scores/hg17/phastcons_encode_sep2005_tba" />
46      <output name="out_file1" file="aggregate_binned_scores_in_intervals.out" />
47    </test>
48    <test>
49      <param name="input1" value="9_hg18.bed" dbkey="hg18" ftype="bed"/>
50      <param name="score_source" value="cached"/>
51      <param name="datasets" value="/galaxy/data/binned_scores/hg18/phastCons17way/ba" />
52      <output name="out_file1" file="aggregate_binned_scores_in_intervals2.interval" />
53    </test>
54    <test>
55      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
56      <param name="score_source" value="user"/>
57      <param name="input2" value="aggregate_binned_scores_3.wig" dbkey="hg17" ftype="wig"/>
58      <output name="out_file1" file="aggregate_binned_scores_in_intervals3.out"/>
59    </test>
60  </tests>
61  <help>
62
63.. class:: warningmark
64
65This tool currently only has cached data for genome builds hg16, hg17 and hg18. However, you may use your own data point (wiggle) data, such as those available from UCSC. If you are trying to use your own data point file and it is not appearing as an option, make sure that the builds for your history items are the same.
66
67.. class:: warningmark
68
69This tool assumes that the input dataset is in interval format and contains at least a chrom column, a start column and an end column.  These 3 columns can be dispersed throughout any number of other data columns.
70
71-----
72
73.. class:: infomark
74
75**TIP:** Computing summary information may throw exceptions if the data type (e.g., string, integer) in every line of the columns is not appropriate for the computation (e.g., attempting numerical calculations on strings).  If an exception is thrown when computing summary information for a line, that line is skipped as invalid for the computation.  The number of invalid skipped lines is documented in the resulting history item as a "Data issue".
76
77-----
78
79**Syntax**
80
81This tool appends columns of summary information for each interval matched against a selected dataset.  For each interval, the average, minimum and maximum for the data falling within the interval is computed.
82
83- Several quantitative scores are provided for the ENCODE regions.
84
85  - Various Scores
86      - Regulatory Potential
87      - Neutral rate (Ancestral Repeats)
88      - GC fraction
89  - Conservation Scores
90      - PhastCons
91      - binCons
92      - GERP
93
94-----
95
96**Example**
97
98If your original data has the following format:
99
100+------+-----+-----+---+------+
101|other1|chrom|start|end|other2|
102+------+-----+-----+---+------+
103
104and you choose to aggregate phastCons scores, your output will look like this:
105
106+------+-----+-----+---+------+---+---+---+
107|other1|chrom|start|end|other2|avg|min|max|
108+------+-----+-----+---+------+---+---+---+
109
110where:
111
112* **avg** - average phastCons score for each region
113* **min** - minimum phastCons score for each region
114* **max** - maximum phastCons score for each region
115
116  </help>
117</tool>
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。