| 1 | <tool id="cshl_word_list_grep" name="Select lines"> |
|---|
| 2 | <description>by word list</description> |
|---|
| 3 | <command interpreter="perl"> |
|---|
| 4 | word_list_grep.pl |
|---|
| 5 | #if $searchwhere.choice == "column": |
|---|
| 6 | -c $searchwhere.column |
|---|
| 7 | #end if |
|---|
| 8 | -o $output |
|---|
| 9 | $inverse |
|---|
| 10 | $caseinsensitive |
|---|
| 11 | $wholewords |
|---|
| 12 | $skip_first_line |
|---|
| 13 | $wordlist |
|---|
| 14 | $input |
|---|
| 15 | </command> |
|---|
| 16 | |
|---|
| 17 | <inputs> |
|---|
| 18 | <param name="input" format="txt" type="data" label="input file" /> |
|---|
| 19 | <param name="wordlist" format="txt" type="data" label="word list file" /> |
|---|
| 20 | |
|---|
| 21 | |
|---|
| 22 | <param name="inverse" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Inverse filter" |
|---|
| 23 | help="Report lines NOT matching the word list" /> |
|---|
| 24 | |
|---|
| 25 | <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search" |
|---|
| 26 | help="" /> |
|---|
| 27 | |
|---|
| 28 | <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words" |
|---|
| 29 | help="ignore partial matches (e.g. 'apple' will not match 'snapple') " /> |
|---|
| 30 | |
|---|
| 31 | <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line" |
|---|
| 32 | help="Select this option if the first line contains column headers. First line will not be filtered. " /> |
|---|
| 33 | |
|---|
| 34 | <conditional name="searchwhere"> |
|---|
| 35 | <param name="choice" type="select" label="Search words in"> |
|---|
| 36 | <option value="line" selected="true">entire line</option> |
|---|
| 37 | <option value="column">specific column</option> |
|---|
| 38 | </param> |
|---|
| 39 | |
|---|
| 40 | <when value="line"> |
|---|
| 41 | </when> |
|---|
| 42 | |
|---|
| 43 | <when value="column"> |
|---|
| 44 | <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" /> |
|---|
| 45 | </when> |
|---|
| 46 | </conditional> |
|---|
| 47 | |
|---|
| 48 | </inputs> |
|---|
| 49 | |
|---|
| 50 | <outputs> |
|---|
| 51 | <data name="output" format="input" metadata_source="input" /> |
|---|
| 52 | </outputs> |
|---|
| 53 | |
|---|
| 54 | <help> |
|---|
| 55 | **What it does** |
|---|
| 56 | |
|---|
| 57 | This tool selects lines that match words from a word list. |
|---|
| 58 | |
|---|
| 59 | -------- |
|---|
| 60 | |
|---|
| 61 | **Example** |
|---|
| 62 | |
|---|
| 63 | Input file (UCSC's rmsk track from dm3):: |
|---|
| 64 | |
|---|
| 65 | 585 787 66 241 11 chrXHet 2860 3009 -201103 - DNAREP1_DM LINE Penelope 0 594 435 1 |
|---|
| 66 | 585 1383 78 220 0 chrXHet 3012 3320 -200792 - DNAREP1_DM LINE Penelope -217 377 2 1 |
|---|
| 67 | 585 244 103 0 0 chrXHet 3737 3776 -200336 - DNAREP1_DM LINE Penelope -555 39 1 1 |
|---|
| 68 | 585 2270 83 144 0 chrXHet 7907 8426 -195686 + DNAREP1_DM LINE Penelope 1 594 0 1 |
|---|
| 69 | 585 617 189 73 68 chrXHet 10466 10671 -193441 + DNAREP1_DM LINE Penelope 368 573 -21 1 |
|---|
| 70 | 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 |
|---|
| 71 | ... |
|---|
| 72 | ... |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | Word list file:: |
|---|
| 76 | |
|---|
| 77 | STALKER |
|---|
| 78 | PROTOP |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | |
|---|
| 82 | Output sequence (searching in column 11):: |
|---|
| 83 | |
|---|
| 84 | 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 |
|---|
| 85 | 586 228 162 0 0 chrXHet 181026 181063 -23049 + STALKER4_I LTR Gypsy 9 45 -6485 1 |
|---|
| 86 | 585 245 105 26 0 chr3R 41609 41647 -27863406 + PROTOP_B DNA P 507 545 -608 4 |
|---|
| 87 | 586 238 91 0 0 chr3R 140224 140257 -27764796 - PROTOP_B DNA P -617 536 504 4 |
|---|
| 88 | ... |
|---|
| 89 | ... |
|---|
| 90 | |
|---|
| 91 | ( With **find whole-words** not selected, *PROTOP* matched *PROTOP_B*, *STALKER* matched *STALKER4_I* ) |
|---|
| 92 | |
|---|
| 93 | |
|---|
| 94 | |
|---|
| 95 | |
|---|
| 96 | Output sequence (searching in column 11, and whole-words only):: |
|---|
| 97 | |
|---|
| 98 | 586 670 90 38 57 chrXHet 168356 168462 -35650 - PROTOP DNA P -459 4021 3918 1 |
|---|
| 99 | 586 413 139 70 0 chrXHet 168462 168548 -35564 - PROTOP DNA P -3406 1074 983 1 |
|---|
| 100 | 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 |
|---|
| 101 | ... |
|---|
| 102 | ... |
|---|
| 103 | |
|---|
| 104 | </help> |
|---|
| 105 | |
|---|
| 106 | </tool> |
|---|