| 1 | <tool id="hgv_lps" name="LPS" version="1.0.0"> | 
|---|
| 2 | <description>LASSO-Patternsearch algorithm</description> | 
|---|
| 3 |  | 
|---|
| 4 | <command interpreter="bash"> | 
|---|
| 5 | lps_tool_wrapper.sh $lambda_fac $input_file $label_column $output_file $log_file | 
|---|
| 6 | Initialization 0 | 
|---|
| 7 | #if $advanced.options == "true": | 
|---|
| 8 | Sample $advanced.sample | 
|---|
| 9 | Verbosity $advanced.verbosity | 
|---|
| 10 | Standardize $advanced.standardize | 
|---|
| 11 | initialLambda $advanced.initialLambda | 
|---|
| 12 | #if $advanced.continuation.continuation == "1": | 
|---|
| 13 | Continuation $advanced.continuation.continuation | 
|---|
| 14 | continuationSteps $advanced.continuation.continuationSteps | 
|---|
| 15 | accurateIntermediates $advanced.continuation.accurateIntermediates | 
|---|
| 16 | #end if | 
|---|
| 17 | printFreq $advanced.printFreq | 
|---|
| 18 | #if $advanced.newton.newton == "1": | 
|---|
| 19 | Newton $advanced.newton.newton | 
|---|
| 20 | NewtonThreshold $advanced.newton.newtonThreshold | 
|---|
| 21 | #end if | 
|---|
| 22 | HessianSampleFraction $advanced.hessianSampleFraction | 
|---|
| 23 | BB 0 | 
|---|
| 24 | Monotone 0 | 
|---|
| 25 | FullGradient $advanced.fullGradient | 
|---|
| 26 | GradientFraction $advanced.gradientFraction | 
|---|
| 27 | InitialAlpha $advanced.initialAlpha | 
|---|
| 28 | AlphaIncrease $advanced.alphaIncrease | 
|---|
| 29 | AlphaDecrease $advanced.alphaDecrease | 
|---|
| 30 | AlphaMax $advanced.alphaMax | 
|---|
| 31 | c1 $advanced.c1 | 
|---|
| 32 | MaxIter $advanced.maxIter | 
|---|
| 33 | StopTol $advanced.stopTol | 
|---|
| 34 | IntermediateTol $advanced.intermediateTol | 
|---|
| 35 | FinalOnly $advanced.finalOnly | 
|---|
| 36 | #end if | 
|---|
| 37 | </command> | 
|---|
| 38 |  | 
|---|
| 39 | <inputs> | 
|---|
| 40 | <param name="input_file" type="data" format="tabular" label="Dataset"/> | 
|---|
| 41 | <param name="label_column" type="data_column" data_ref="input_file" numerical="true" label="Label column" help="Column containing outcome labels: +1 or -1."/> | 
|---|
| 42 | <param name="lambda_fac" label="Lambda_fac" type="float" value="0.03" help="Target value of the regularization parameter, expressed as a fraction of the calculated lambda_max."> | 
|---|
| 43 | <validator type="in_range" message="0.00 < lambda_fac <= 1.00" min="0.00" max="1.00"/> | 
|---|
| 44 | </param> | 
|---|
| 45 | <conditional name="advanced"> | 
|---|
| 46 | <param name="options" type="select" label="Advanced Options"> | 
|---|
| 47 | <option value="false" selected="true">Hide advanced options</option> | 
|---|
| 48 | <option value="true">Show advanced options</option> | 
|---|
| 49 | </param> | 
|---|
| 50 | <when value="false"> | 
|---|
| 51 | <!-- no options --> | 
|---|
| 52 | </when> | 
|---|
| 53 | <when value="true"> | 
|---|
| 54 | <!-- HARDCODED: 'Sample' we don't support passing an array --> | 
|---|
| 55 | <param name="sample" type="float" value="1.0" label="Sample fraction" help="Sample this fraction of the data set."> | 
|---|
| 56 | <validator type="in_range" message="0.0 <= sample <= 1.0" min="0.0" max="1.0"/> | 
|---|
| 57 | </param> | 
|---|
| 58 | <!-- HARDCODED: 'Initialization' = 0 :: Initialize at beta=0 --> | 
|---|
| 59 | <param name="verbosity" type="select" format="integer" label="Verbosity"> | 
|---|
| 60 | <option value="0" selected="true">Little output</option> | 
|---|
| 61 | <option value="1">More output</option> | 
|---|
| 62 | <option value="2">Still more output</option> | 
|---|
| 63 | </param> | 
|---|
| 64 | <param name="standardize" type="select" format="integer" label="Standardize" help="Scales and shifts each column so that it has mean zero and variance 1."> | 
|---|
| 65 | <option value="0" selected="true">Don't standardize</option> | 
|---|
| 66 | <option value="1">Standardize</option> | 
|---|
| 67 | </param> | 
|---|
| 68 | <param name="initialLambda" type="float" value="0.8" label="Initial lambda" help="First value of lambda to be used in the continuation scheme, expressed as a fraction of lambda_max."> | 
|---|
| 69 | <validator type="in_range" message="0.0 < initialLambda < 1.0" min="0.0" max="1.0"/> | 
|---|
| 70 | </param> | 
|---|
| 71 | <conditional name="continuation"> | 
|---|
| 72 | <param name="continuation" type="select" format="integer" label="Continuation" help="Use continuation strategy to start with a larger value of lambda, decreasing it successively to lambda_fac."> | 
|---|
| 73 | <option value="0" selected="true">Don't use continuation</option> | 
|---|
| 74 | <option value="1">Use continuation</option> | 
|---|
| 75 | </param> | 
|---|
| 76 | <when value="0"> | 
|---|
| 77 | <!-- no options --> | 
|---|
| 78 | </when> | 
|---|
| 79 | <when value="1"> | 
|---|
| 80 | <param name="continuationSteps" type="integer" value="5" label="Continuation steps" help="Number of lambda values to use in continuation <em>prior</em> to target value lambda_fac."/> | 
|---|
| 81 |  | 
|---|
| 82 | <param name="accurateIntermediates" type="select" format="integer" label="Accurate intermediates" help="Indicates whether accurate solutions are required for lambda values other than the target value lambda_fac."> | 
|---|
| 83 | <option value="0" selected="true">Don't need accurate intemediates</option> | 
|---|
| 84 | <option value="1">Calculate accurate intermediates</option> | 
|---|
| 85 | </param> | 
|---|
| 86 | </when> | 
|---|
| 87 | </conditional> <!-- name="continuation" --> | 
|---|
| 88 | <param name="printFreq" type="integer" value="1" label="Print frequency" help="Print a progress report every NI iterations, where NI is the supplied value of this parameter."> | 
|---|
| 89 | <validator type="in_range" message="printFreq >= 1" min="1"/> | 
|---|
| 90 | </param> | 
|---|
| 91 | <conditional name="newton"> | 
|---|
| 92 | <param name="newton" type="select" format="integer" label="Projected Newton steps"> | 
|---|
| 93 | <option value="0" selected="true">No Newton steps</option> | 
|---|
| 94 | <option value="1">Try projected Newton steps</option> | 
|---|
| 95 | </param> | 
|---|
| 96 | <when value="0"> | 
|---|
| 97 | <!-- no options --> | 
|---|
| 98 | </when> | 
|---|
| 99 | <when value="1"> | 
|---|
| 100 | <param name="newtonThreshold" type="integer" value="500" label="Newton threshold" help="Maximum size of free variable subvector for Newton."/> | 
|---|
| 101 | </when> | 
|---|
| 102 | </conditional> | 
|---|
| 103 | <param name="hessianSampleFraction" type="float" value="1.0" label="Hessian sample fraction" help="Fraction of terms to use in approximate Hessian calculation."> | 
|---|
| 104 | <validator type="in_range" message="0.01 < hessianSampleFraction <= 1.00" min="0.01" max="1.00"/> | 
|---|
| 105 | </param> | 
|---|
| 106 | <!-- HARDCODED: 'BB' = 0 :: don't use Barzilai-Borwein steps --> | 
|---|
| 107 | <!-- HARDCODED: 'Monotone' = 0 :: don't force monotonicity --> | 
|---|
| 108 | <param name="fullGradient" type="select" format="integer" label="Partial gradient vector selection"> | 
|---|
| 109 | <option value="0">Use randomly selected partial gradient, including current active components ("biased")</option> | 
|---|
| 110 | <option value="1">Use full gradient vector at every step</option> | 
|---|
| 111 | <option value="2">Randomly selected partial gradient, without regard to current active set ("unbiased")</option> | 
|---|
| 112 | </param> | 
|---|
| 113 | <param name="gradientFraction" type="float" value="0.1" label="Gradient fraction" help="Fraction of inactive gradient vector to evaluate."> | 
|---|
| 114 | <validator type="in_range" message="0.0 < gradientFraction <= 1" min="0.0" max="1.0"/> | 
|---|
| 115 | </param> | 
|---|
| 116 | <param name="initialAlpha" type="float" value="1.0" label="Initial value of alpha"/> | 
|---|
| 117 | <param name="alphaIncrease" type="float" value="2.0" label="Alpha increase" help="Factor by which to increase alpha after descent not obtained."/> | 
|---|
| 118 | <param name="alphaDecrease" type="float" value="0.8" label="Alpha decrease" help="Factor by which to decrease alpha after successful first-order step."/> | 
|---|
| 119 | <param name="alphaMax" type="float" value="1e12" label="Alpha max" help="Maximum value of alpha; terminate with error if we exceed this."/> | 
|---|
| 120 | <param name="c1" type="float" value="1e-3" help="Parameter defining the margin by which the first-order step is required to decrease before being taken."> | 
|---|
| 121 | <validator type="in_range" message="0.0 < c1 < 1.0" min="0.0" max="1.0"/> | 
|---|
| 122 | </param> | 
|---|
| 123 | <param name="maxIter" type="integer" value="10000" label="Maximum number of iterations" help="Terminate with error if we exceed this."/> | 
|---|
| 124 | <param name="stopTol" type="float" value="1e-6" label="Stop tolerance" help="Convergence tolerance for target value of lambda."/> | 
|---|
| 125 | <param name="intermediateTol" type="float" value="1e-4" label="Intermediate tolerance" help="Convergence tolerance for intermediate values of lambda."/> | 
|---|
| 126 | <param name="finalOnly" type="select" format="integer" label="Final only"> | 
|---|
| 127 | <option value="0" selected="true">Return information for all intermediate values</option> | 
|---|
| 128 | <option value="1">Just return information at the last lambda</option> | 
|---|
| 129 | </param> | 
|---|
| 130 | </when> <!-- value="advanced" --> | 
|---|
| 131 | </conditional> <!-- name="advanced" --> | 
|---|
| 132 | </inputs> | 
|---|
| 133 |  | 
|---|
| 134 | <outputs> | 
|---|
| 135 | <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: results"/> | 
|---|
| 136 | <data name="log_file" format="txt" label="${tool.name} on ${on_string}: log"/> | 
|---|
| 137 | </outputs> | 
|---|
| 138 |  | 
|---|
| 139 | <requirements> | 
|---|
| 140 | <requirement type="binary">lps_tool</requirement> | 
|---|
| 141 | </requirements> | 
|---|
| 142 |  | 
|---|
| 143 | <tests> | 
|---|
| 144 | <test> | 
|---|
| 145 | <param name="input_file" value="lps_arrhythmia.tabular"/> | 
|---|
| 146 | <param name="label_column" value="280"/> | 
|---|
| 147 | <param name="lambda_fac" value="0.03"/> | 
|---|
| 148 | <param name="options" value="true"/> | 
|---|
| 149 | <param name="sample" value="1.0"/> | 
|---|
| 150 | <param name="verbosity" value="1"/> | 
|---|
| 151 | <param name="standardize" value="0"/> | 
|---|
| 152 | <param name="initialLambda" value="0.9"/> | 
|---|
| 153 | <param name="continuation" value="1"/> | 
|---|
| 154 | <param name="continuationSteps" value="10"/> | 
|---|
| 155 | <param name="accurateIntermediates" value="0"/> | 
|---|
| 156 | <param name="printFreq" value="1"/> | 
|---|
| 157 | <param name="newton" value="1"/> | 
|---|
| 158 | <param name="newtonThreshold" value="500"/> | 
|---|
| 159 | <param name="hessianSampleFraction" value="1.0"/> | 
|---|
| 160 | <param name="fullGradient" value="1"/> | 
|---|
| 161 | <param name="gradientFraction" value="0.5"/> | 
|---|
| 162 | <param name="initialAlpha" value="1.0"/> | 
|---|
| 163 | <param name="alphaIncrease" value="2.0"/> | 
|---|
| 164 | <param name="alphaDecrease" value="0.8"/> | 
|---|
| 165 | <param name="alphaMax" value="1e12"/> | 
|---|
| 166 | <param name="c1" value="1e-3"/> | 
|---|
| 167 | <param name="maxIter" value="2500"/> | 
|---|
| 168 | <param name="stopTol" value="1e-6"/> | 
|---|
| 169 | <param name="intermediateTol" value="1e-6"/> | 
|---|
| 170 | <param name="finalOnly" value="0"/> | 
|---|
| 171 | <output name="ouput_file" file="lps_arrhythmia_beta.tabular"/> | 
|---|
| 172 | <output name="log_file" file="lps_arrhythmia_log.txt"/> | 
|---|
| 173 | </test> | 
|---|
| 174 | </tests> | 
|---|
| 175 |  | 
|---|
| 176 | <help> | 
|---|
| 177 | **Dataset formats** | 
|---|
| 178 |  | 
|---|
| 179 | The input and output datasets are tabular_.  The columns are described below. | 
|---|
| 180 | There is a second output dataset (a log) that is in text_ format. | 
|---|
| 181 | (`Dataset missing?`_) | 
|---|
| 182 |  | 
|---|
| 183 | .. _tabular: ./static/formatHelp.html#tab | 
|---|
| 184 | .. _text: ./static/formatHelp.html#text | 
|---|
| 185 | .. _Dataset missing?: ./static/formatHelp.html | 
|---|
| 186 |  | 
|---|
| 187 | ----- | 
|---|
| 188 |  | 
|---|
| 189 | **What it does** | 
|---|
| 190 |  | 
|---|
| 191 | The LASSO-Patternsearch algorithm fits your dataset to an L1-regularized | 
|---|
| 192 | logistic regression model.  A benefit of using L1-regularization is | 
|---|
| 193 | that it typically yields a weight vector with relatively few non-zero | 
|---|
| 194 | coefficients. | 
|---|
| 195 |  | 
|---|
| 196 | For example, say you have a dataset containing M rows (subjects) | 
|---|
| 197 | and N columns (attributes) where one of these N attributes is binary, | 
|---|
| 198 | indicating whether or not the subject has some property of interest P. | 
|---|
| 199 | In simple terms, LPS calculates a weight for each of the other attributes | 
|---|
| 200 | in your dataset.  This weight indicates how "relevant" that attribute | 
|---|
| 201 | is for predicting whether or not a given subject has property P. | 
|---|
| 202 | The L1-regularization causes most of these weights to be equal to zero, | 
|---|
| 203 | which means LPS will find a "small" subset of the remaining N-1 attributes | 
|---|
| 204 | in your dataset that can be used to predict P. | 
|---|
| 205 |  | 
|---|
| 206 | In other words, LPS can be used for feature selection. | 
|---|
| 207 |  | 
|---|
| 208 | The input dataset is tabular, and must contain a label column which | 
|---|
| 209 | indicates whether or not a given row has property P.  In the current | 
|---|
| 210 | version of this tool, P must be encoded using +1 and -1.  The Lambda_fac | 
|---|
| 211 | parameter ranges from 0 to 1, and controls how sparse the weight | 
|---|
| 212 | vector will be.  At the low end, when Lambda_fac = 0, there will be | 
|---|
| 213 | no regularization.  At the high end, when Lambda_fac = 1, there will be | 
|---|
| 214 | "too much" regularization, and all of the weights will equal zero. | 
|---|
| 215 |  | 
|---|
| 216 | The LPS tool creates two output datasets.  The first, called the results | 
|---|
| 217 | file, is a tabular dataset containing one column of weights for each | 
|---|
| 218 | value of the regularization parameter lambda that was tried.  The weight | 
|---|
| 219 | columns are in order from left to right by decreasing values of lambda. | 
|---|
| 220 | The first N-1 rows in each column are the weights for the N-1 attributes | 
|---|
| 221 | in your input dataset.  The final row is a constant, the intercept. | 
|---|
| 222 |  | 
|---|
| 223 | Let **x** be a row from your input dataset and let **b** be a column | 
|---|
| 224 | from the results file.  To compute the probability that row **x** has | 
|---|
| 225 | a label value of +1: | 
|---|
| 226 |  | 
|---|
| 227 | Probability(row **x** has label value = +1) = 1 / [1 + exp{**x** \* **b**\[1..N-1\] + **b**\[N\]}] | 
|---|
| 228 |  | 
|---|
| 229 | where **x** \* **b**\[1..N-1\] represents matrix multiplication. | 
|---|
| 230 |  | 
|---|
| 231 | The second output dataset, called the log file, is a text file which | 
|---|
| 232 | contains additional data about the fitted L1-regularized logistic | 
|---|
| 233 | regression model.  These data include the number of features, the | 
|---|
| 234 | computed value of lambda_max, the actual values of lambda used, the | 
|---|
| 235 | optimal values of the log-likelihood and regularized log-likelihood | 
|---|
| 236 | functions, the number of non-zeros, and the number of iterations. | 
|---|
| 237 |  | 
|---|
| 238 | Website: http://pages.cs.wisc.edu/~swright/LPS/ | 
|---|
| 239 |  | 
|---|
| 240 | ----- | 
|---|
| 241 |  | 
|---|
| 242 | **Example** | 
|---|
| 243 |  | 
|---|
| 244 | - input file:: | 
|---|
| 245 |  | 
|---|
| 246 | +1   1   0   0   0   0   1   0   1   1   ... | 
|---|
| 247 | +1   1   1   1   0   0   1   0   1   1   ... | 
|---|
| 248 | +1   1   0   1   0   1   0   1   0   1   ... | 
|---|
| 249 | etc. | 
|---|
| 250 |  | 
|---|
| 251 | - output results file:: | 
|---|
| 252 |  | 
|---|
| 253 | 0 | 
|---|
| 254 | 0 | 
|---|
| 255 | 0 | 
|---|
| 256 | 0 | 
|---|
| 257 | 0.025541 | 
|---|
| 258 | etc. | 
|---|
| 259 |  | 
|---|
| 260 | - output log file:: | 
|---|
| 261 |  | 
|---|
| 262 | Data set has 100 vectors with 50 features. | 
|---|
| 263 | calculateLambdaMax: n=50, m=100, m+=50, m-=50 | 
|---|
| 264 | computed value of lambda_max: 5.0000e-01 | 
|---|
| 265 |  | 
|---|
| 266 | lambda=2.96e-02 solution: | 
|---|
| 267 | optimal log-likelihood function value: 6.46e-01 | 
|---|
| 268 | optimal *regularized* log-likelihood function value: 6.79e-01 | 
|---|
| 269 | number of nonzeros at the optimum:      5 | 
|---|
| 270 | number of iterations required:     43 | 
|---|
| 271 | etc. | 
|---|
| 272 |  | 
|---|
| 273 | ----- | 
|---|
| 274 |  | 
|---|
| 275 | **References** | 
|---|
| 276 |  | 
|---|
| 277 | Koh K, Kim S-J, Boyd S. (2007) | 
|---|
| 278 | An interior-point method for large-scale l1-regularized logistic regression. | 
|---|
| 279 | Journal of Machine Learning Research. 8:1519-1555. | 
|---|
| 280 |  | 
|---|
| 281 | Shi W, Wahba G, Wright S, Lee K, Klein R, Klein B. (2008) | 
|---|
| 282 | LASSO-Patternsearch algorithm with application to ophthalmology and genomic data. | 
|---|
| 283 | Stat Interface. 1(1):137-153. | 
|---|
| 284 |  | 
|---|
| 285 | <!-- | 
|---|
| 286 | Wright S, Novak R, Figueiredo M. (2009) | 
|---|
| 287 | Sparse reconstruction via separable approximation. | 
|---|
| 288 | IEEE Transactions on Signal Processing. 57:2479-2403. | 
|---|
| 289 |  | 
|---|
| 290 | Shi J, Yin W, Osher S, Sajda P. (2010) | 
|---|
| 291 | A fast hybrid algorithm for large scale l1-regularized logistic regression. | 
|---|
| 292 | Journal of Machine Learning Research. 11:713-741. | 
|---|
| 293 |  | 
|---|
| 294 | Byrd R, Chin G, Neveitt W, Nocedal J. (2010) | 
|---|
| 295 | On the use of stochastic Hessian information in unconstrained optimization. | 
|---|
| 296 | Technical Report. Northwestern University. June 16, 2010. | 
|---|
| 297 |  | 
|---|
| 298 | Wright S. (2010) | 
|---|
| 299 | Accelerated block-coordinate relaxation for regularized optimization. | 
|---|
| 300 | Technical Report. University of Wisconsin. August 10, 2010. | 
|---|
| 301 | --> | 
|---|
| 302 |  | 
|---|
| 303 | </help> | 
|---|
| 304 | </tool> | 
|---|