[2] | 1 | <tool id="rgfakePed1" name="Null genotypes"> |
---|
| 2 | <description>for testing</description> |
---|
| 3 | <code file="rgfakePed_code.py"/> |
---|
| 4 | <command interpreter="python">rgfakePed.py --title '$title1' |
---|
| 5 | -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal' |
---|
| 6 | -s '$nsnp' -w '$lowmaf' -v '$missingValue' -l '$outFormat' |
---|
| 7 | -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command> |
---|
| 8 | <inputs> |
---|
| 9 | <page> |
---|
| 10 | |
---|
| 11 | <param name="title1" |
---|
| 12 | type="text" |
---|
| 13 | help="Name for outputs from this job" |
---|
| 14 | label="Descriptive short name"/> |
---|
| 15 | <param name="ntotal" |
---|
| 16 | type="integer" value = "200" |
---|
| 17 | help="N total: total number of subjects" |
---|
| 18 | label="Create this total N subjects"/> |
---|
| 19 | <param name="ncases" type="integer" |
---|
| 20 | value="100" |
---|
| 21 | help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)" |
---|
| 22 | label="Total N Cases (0=generate family data - trios)"/> |
---|
| 23 | <param name="nsnp" |
---|
| 24 | type="integer" value="1000" |
---|
| 25 | help="nsnp: total number of markers" |
---|
| 26 | label="Total N SNP"/> |
---|
| 27 | <param name="lowmaf" type="float" |
---|
| 28 | value="0.01" |
---|
| 29 | help = "Lower limit for MAF distribution" |
---|
| 30 | label="Lower MAF limit (default=1%)"/> |
---|
| 31 | <param name="mafdist" |
---|
| 32 | type="select" |
---|
| 33 | help="Choose a MAF distribution" |
---|
| 34 | label="SNP Minor Allele Frequency distribution"> |
---|
| 35 | <option value="U" selected="true">Uniform</option> |
---|
| 36 | <option value="T">Triangular (more low frequency SNPs)</option> |
---|
| 37 | </param> |
---|
| 38 | <param name="outFormat" |
---|
| 39 | type="select" |
---|
| 40 | help="Choose an output format" |
---|
| 41 | label="Output format file type - linkage ped or fbat ped"> |
---|
| 42 | <option value="L" selected="true">Linkage format - separate .map file</option> |
---|
| 43 | <option value="F">fbat style - marker names in a header row</option> |
---|
| 44 | </param> |
---|
| 45 | <param name="missingRate" type="float" |
---|
| 46 | value="0.05" |
---|
| 47 | help = "Fraction of genotypes to be randomly set missing" |
---|
| 48 | label="Missing genotype call fraction"/> |
---|
| 49 | <param name="mendelRate" |
---|
| 50 | type="float" value = "0.05" |
---|
| 51 | help="(family data) Fraction of apparently non-Mendelian transmission patterns" |
---|
| 52 | label="Mendel error transmission rate"/> |
---|
| 53 | |
---|
| 54 | <param name="missingValue" type="text" size="1" |
---|
| 55 | value='0' |
---|
| 56 | help = "Missing allele value" |
---|
| 57 | label="Missing value for an allele for the output ped file"/> |
---|
| 58 | |
---|
| 59 | </page> |
---|
| 60 | </inputs> |
---|
| 61 | |
---|
| 62 | <outputs> |
---|
| 63 | <data format="lped" name="out_file1" /> |
---|
| 64 | </outputs> |
---|
| 65 | <tests> |
---|
| 66 | <test> |
---|
| 67 | <param name='title1' value='rgfakePedtest1' /> |
---|
| 68 | <param name="ntotal" value="40" /> |
---|
| 69 | <param name="ncases" value="20" /> |
---|
| 70 | <param name="nsnp" value="10" /> |
---|
| 71 | <param name="lowmaf" value="0" /> |
---|
| 72 | <param name="mafdist" value="T" /> |
---|
| 73 | <param name="outFormat" value="L" /> |
---|
| 74 | <param name="missingRate" value="0" /> |
---|
| 75 | <param name="mendelRate" value="0" /> |
---|
| 76 | <param name="missingValue" value="0" /> |
---|
| 77 | <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'> |
---|
| 78 | <extra_files type="file" name='rgfakePedtest1.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/> |
---|
| 79 | <extra_files type="file" name='rgfakePedtest1.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" /> |
---|
| 80 | </output> |
---|
| 81 | </test> |
---|
| 82 | </tests> |
---|
| 83 | <help> |
---|
| 84 | .. class:: infomark |
---|
| 85 | |
---|
| 86 | This tool allows you to generate an arbitrary (sort of) |
---|
| 87 | synthetic genotype file (no attempt at LD - the markers are independent) |
---|
| 88 | with optional missingness, Mendel errors, minor allele frequency settings, family structure |
---|
| 89 | These might be used for testing under |
---|
| 90 | the null hypothesis of no association and are certainly useful for |
---|
| 91 | scale testing. |
---|
| 92 | |
---|
| 93 | Note that although it runs reasonably fast given it's a script, generating a large data set takes |
---|
| 94 | a while. An hour or so should get you a reasonable (3GB) sized simulated null data set.. |
---|
| 95 | |
---|
| 96 | A better simulator can easily be swapped in with this tool interface. |
---|
| 97 | |
---|
| 98 | ----- |
---|
| 99 | |
---|
| 100 | .. class:: warningmark |
---|
| 101 | |
---|
| 102 | This tool is very experimental |
---|
| 103 | |
---|
| 104 | **Attribution** |
---|
| 105 | Designed and written for the Rgenetics Galaxy tools |
---|
| 106 | copyright Ross Lazarus 2007 (ross.lazarus@gmail.com) |
---|
| 107 | Licensed under the terms of the LGPL |
---|
| 108 | as documented http://www.gnu.org/licenses/lgpl.html |
---|
| 109 | |
---|
| 110 | </help> |
---|
| 111 | </tool> |
---|