1 | <tool id="rgfakePed1" name="Null genotypes"> |
---|
2 | <description>for testing</description> |
---|
3 | <code file="rgfakePed_code.py"/> |
---|
4 | <command interpreter="python">rgfakePed.py --title '$title1' |
---|
5 | -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal' |
---|
6 | -s '$nsnp' -w '$lowmaf' -v '$missingValue' -l '$outFormat' |
---|
7 | -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command> |
---|
8 | <inputs> |
---|
9 | <page> |
---|
10 | |
---|
11 | <param name="title1" |
---|
12 | type="text" |
---|
13 | help="Name for outputs from this job" |
---|
14 | label="Descriptive short name"/> |
---|
15 | <param name="ntotal" |
---|
16 | type="integer" value = "200" |
---|
17 | help="N total: total number of subjects" |
---|
18 | label="Create this total N subjects"/> |
---|
19 | <param name="ncases" type="integer" |
---|
20 | value="100" |
---|
21 | help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)" |
---|
22 | label="Total N Cases (0=generate family data - trios)"/> |
---|
23 | <param name="nsnp" |
---|
24 | type="integer" value="1000" |
---|
25 | help="nsnp: total number of markers" |
---|
26 | label="Total N SNP"/> |
---|
27 | <param name="lowmaf" type="float" |
---|
28 | value="0.01" |
---|
29 | help = "Lower limit for MAF distribution" |
---|
30 | label="Lower MAF limit (default=1%)"/> |
---|
31 | <param name="mafdist" |
---|
32 | type="select" |
---|
33 | help="Choose a MAF distribution" |
---|
34 | label="SNP Minor Allele Frequency distribution"> |
---|
35 | <option value="U" selected="true">Uniform</option> |
---|
36 | <option value="T">Triangular (more low frequency SNPs)</option> |
---|
37 | </param> |
---|
38 | <param name="outFormat" |
---|
39 | type="select" |
---|
40 | help="Choose an output format" |
---|
41 | label="Output format file type - linkage ped or fbat ped"> |
---|
42 | <option value="L" selected="true">Linkage format - separate .map file</option> |
---|
43 | <option value="F">fbat style - marker names in a header row</option> |
---|
44 | </param> |
---|
45 | <param name="missingRate" type="float" |
---|
46 | value="0.05" |
---|
47 | help = "Fraction of genotypes to be randomly set missing" |
---|
48 | label="Missing genotype call fraction"/> |
---|
49 | <param name="mendelRate" |
---|
50 | type="float" value = "0.05" |
---|
51 | help="(family data) Fraction of apparently non-Mendelian transmission patterns" |
---|
52 | label="Mendel error transmission rate"/> |
---|
53 | |
---|
54 | <param name="missingValue" type="text" size="1" |
---|
55 | value='0' |
---|
56 | help = "Missing allele value" |
---|
57 | label="Missing value for an allele for the output ped file"/> |
---|
58 | |
---|
59 | </page> |
---|
60 | </inputs> |
---|
61 | |
---|
62 | <outputs> |
---|
63 | <data format="lped" name="out_file1" /> |
---|
64 | </outputs> |
---|
65 | <tests> |
---|
66 | <test> |
---|
67 | <param name='title1' value='rgfakePedtest1' /> |
---|
68 | <param name="ntotal" value="40" /> |
---|
69 | <param name="ncases" value="20" /> |
---|
70 | <param name="nsnp" value="10" /> |
---|
71 | <param name="lowmaf" value="0" /> |
---|
72 | <param name="mafdist" value="T" /> |
---|
73 | <param name="outFormat" value="L" /> |
---|
74 | <param name="missingRate" value="0" /> |
---|
75 | <param name="mendelRate" value="0" /> |
---|
76 | <param name="missingValue" value="0" /> |
---|
77 | <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'> |
---|
78 | <extra_files type="file" name='rgfakePedtest1.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/> |
---|
79 | <extra_files type="file" name='rgfakePedtest1.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" /> |
---|
80 | </output> |
---|
81 | </test> |
---|
82 | </tests> |
---|
83 | <help> |
---|
84 | .. class:: infomark |
---|
85 | |
---|
86 | This tool allows you to generate an arbitrary (sort of) |
---|
87 | synthetic genotype file (no attempt at LD - the markers are independent) |
---|
88 | with optional missingness, Mendel errors, minor allele frequency settings, family structure |
---|
89 | These might be used for testing under |
---|
90 | the null hypothesis of no association and are certainly useful for |
---|
91 | scale testing. |
---|
92 | |
---|
93 | Note that although it runs reasonably fast given it's a script, generating a large data set takes |
---|
94 | a while. An hour or so should get you a reasonable (3GB) sized simulated null data set.. |
---|
95 | |
---|
96 | A better simulator can easily be swapped in with this tool interface. |
---|
97 | |
---|
98 | ----- |
---|
99 | |
---|
100 | .. class:: warningmark |
---|
101 | |
---|
102 | This tool is very experimental |
---|
103 | |
---|
104 | **Attribution** |
---|
105 | Designed and written for the Rgenetics Galaxy tools |
---|
106 | copyright Ross Lazarus 2007 (ross.lazarus@gmail.com) |
---|
107 | Licensed under the terms of the LGPL |
---|
108 | as documented http://www.gnu.org/licenses/lgpl.html |
---|
109 | |
---|
110 | </help> |
---|
111 | </tool> |
---|