Context Navigation

lps.xml @ 2

リビジョン 2, 14.4 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	<tool id="hgv_lps" name="LPS" version="1.0.0">
	2	<description>LASSO-Patternsearch algorithm</description>
	3
	4	<command interpreter="bash">
	5	lps_tool_wrapper.sh $lambda_fac $input_file $label_column $output_file $log_file
	6	Initialization 0
	7	#if $advanced.options == "true":
	8	Sample $advanced.sample
	9	Verbosity $advanced.verbosity
	10	Standardize $advanced.standardize
	11	initialLambda $advanced.initialLambda
	12	#if $advanced.continuation.continuation == "1":
	13	Continuation $advanced.continuation.continuation
	14	continuationSteps $advanced.continuation.continuationSteps
	15	accurateIntermediates $advanced.continuation.accurateIntermediates
	16	#end if
	17	printFreq $advanced.printFreq
	18	#if $advanced.newton.newton == "1":
	19	Newton $advanced.newton.newton
	20	NewtonThreshold $advanced.newton.newtonThreshold
	21	#end if
	22	HessianSampleFraction $advanced.hessianSampleFraction
	23	BB 0
	24	Monotone 0
	25	FullGradient $advanced.fullGradient
	26	GradientFraction $advanced.gradientFraction
	27	InitialAlpha $advanced.initialAlpha
	28	AlphaIncrease $advanced.alphaIncrease
	29	AlphaDecrease $advanced.alphaDecrease
	30	AlphaMax $advanced.alphaMax
	31	c1 $advanced.c1
	32	MaxIter $advanced.maxIter
	33	StopTol $advanced.stopTol
	34	IntermediateTol $advanced.intermediateTol
	35	FinalOnly $advanced.finalOnly
	36	#end if
	37	</command>
	38
	39	<inputs>
	40	<param name="input_file" type="data" format="tabular" label="Dataset"/>
	41	<param name="label_column" type="data_column" data_ref="input_file" numerical="true" label="Label column" help="Column containing outcome labels: +1 or -1."/>
	42	<param name="lambda_fac" label="Lambda_fac" type="float" value="0.03" help="Target value of the regularization parameter, expressed as a fraction of the calculated lambda_max.">
	43	<validator type="in_range" message="0.00 < lambda_fac <= 1.00" min="0.00" max="1.00"/>
	44	</param>
	45	<conditional name="advanced">
	46	<param name="options" type="select" label="Advanced Options">
	47	<option value="false" selected="true">Hide advanced options</option>
	48	<option value="true">Show advanced options</option>
	49	</param>
	50	<when value="false">
	51	<!-- no options -->
	52	</when>
	53	<when value="true">
	54	<!-- HARDCODED: 'Sample' we don't support passing an array -->
	55	<param name="sample" type="float" value="1.0" label="Sample fraction" help="Sample this fraction of the data set.">
	56	<validator type="in_range" message="0.0 <= sample <= 1.0" min="0.0" max="1.0"/>
	57	</param>
	58	<!-- HARDCODED: 'Initialization' = 0 :: Initialize at beta=0 -->
	59	<param name="verbosity" type="select" format="integer" label="Verbosity">
	60	<option value="0" selected="true">Little output</option>
	61	<option value="1">More output</option>
	62	<option value="2">Still more output</option>
	63	</param>
	64	<param name="standardize" type="select" format="integer" label="Standardize" help="Scales and shifts each column so that it has mean zero and variance 1.">
	65	<option value="0" selected="true">Don't standardize</option>
	66	<option value="1">Standardize</option>
	67	</param>
	68	<param name="initialLambda" type="float" value="0.8" label="Initial lambda" help="First value of lambda to be used in the continuation scheme, expressed as a fraction of lambda_max.">
	69	<validator type="in_range" message="0.0 < initialLambda < 1.0" min="0.0" max="1.0"/>
	70	</param>
	71	<conditional name="continuation">
	72	<param name="continuation" type="select" format="integer" label="Continuation" help="Use continuation strategy to start with a larger value of lambda, decreasing it successively to lambda_fac.">
	73	<option value="0" selected="true">Don't use continuation</option>
	74	<option value="1">Use continuation</option>
	75	</param>
	76	<when value="0">
	77	<!-- no options -->
	78	</when>
	79	<when value="1">
	80	<param name="continuationSteps" type="integer" value="5" label="Continuation steps" help="Number of lambda values to use in continuation <em>prior</em> to target value lambda_fac."/>
	81
	82	<param name="accurateIntermediates" type="select" format="integer" label="Accurate intermediates" help="Indicates whether accurate solutions are required for lambda values other than the target value lambda_fac.">
	83	<option value="0" selected="true">Don't need accurate intemediates</option>
	84	<option value="1">Calculate accurate intermediates</option>
	85	</param>
	86	</when>
	87	</conditional> <!-- name="continuation" -->
	88	<param name="printFreq" type="integer" value="1" label="Print frequency" help="Print a progress report every NI iterations, where NI is the supplied value of this parameter.">
	89	<validator type="in_range" message="printFreq >= 1" min="1"/>
	90	</param>
	91	<conditional name="newton">
	92	<param name="newton" type="select" format="integer" label="Projected Newton steps">
	93	<option value="0" selected="true">No Newton steps</option>
	94	<option value="1">Try projected Newton steps</option>
	95	</param>
	96	<when value="0">
	97	<!-- no options -->
	98	</when>
	99	<when value="1">
	100	<param name="newtonThreshold" type="integer" value="500" label="Newton threshold" help="Maximum size of free variable subvector for Newton."/>
	101	</when>
	102	</conditional>
	103	<param name="hessianSampleFraction" type="float" value="1.0" label="Hessian sample fraction" help="Fraction of terms to use in approximate Hessian calculation.">
	104	<validator type="in_range" message="0.01 < hessianSampleFraction <= 1.00" min="0.01" max="1.00"/>
	105	</param>
	106	<!-- HARDCODED: 'BB' = 0 :: don't use Barzilai-Borwein steps -->
	107	<!-- HARDCODED: 'Monotone' = 0 :: don't force monotonicity -->
	108	<param name="fullGradient" type="select" format="integer" label="Partial gradient vector selection">
	109	<option value="0">Use randomly selected partial gradient, including current active components ("biased")</option>
	110	<option value="1">Use full gradient vector at every step</option>
	111	<option value="2">Randomly selected partial gradient, without regard to current active set ("unbiased")</option>
	112	</param>
	113	<param name="gradientFraction" type="float" value="0.1" label="Gradient fraction" help="Fraction of inactive gradient vector to evaluate.">
	114	<validator type="in_range" message="0.0 < gradientFraction <= 1" min="0.0" max="1.0"/>
	115	</param>
	116	<param name="initialAlpha" type="float" value="1.0" label="Initial value of alpha"/>
	117	<param name="alphaIncrease" type="float" value="2.0" label="Alpha increase" help="Factor by which to increase alpha after descent not obtained."/>
	118	<param name="alphaDecrease" type="float" value="0.8" label="Alpha decrease" help="Factor by which to decrease alpha after successful first-order step."/>
	119	<param name="alphaMax" type="float" value="1e12" label="Alpha max" help="Maximum value of alpha; terminate with error if we exceed this."/>
	120	<param name="c1" type="float" value="1e-3" help="Parameter defining the margin by which the first-order step is required to decrease before being taken.">
	121	<validator type="in_range" message="0.0 < c1 < 1.0" min="0.0" max="1.0"/>
	122	</param>
	123	<param name="maxIter" type="integer" value="10000" label="Maximum number of iterations" help="Terminate with error if we exceed this."/>
	124	<param name="stopTol" type="float" value="1e-6" label="Stop tolerance" help="Convergence tolerance for target value of lambda."/>
	125	<param name="intermediateTol" type="float" value="1e-4" label="Intermediate tolerance" help="Convergence tolerance for intermediate values of lambda."/>
	126	<param name="finalOnly" type="select" format="integer" label="Final only">
	127	<option value="0" selected="true">Return information for all intermediate values</option>
	128	<option value="1">Just return information at the last lambda</option>
	129	</param>
	130	</when> <!-- value="advanced" -->
	131	</conditional> <!-- name="advanced" -->
	132	</inputs>
	133
	134	<outputs>
	135	<data name="output_file" format="tabular" label="${tool.name} on ${on_string}: results"/>
	136	<data name="log_file" format="txt" label="${tool.name} on ${on_string}: log"/>
	137	</outputs>
	138
	139	<requirements>
	140	<requirement type="binary">lps_tool</requirement>
	141	</requirements>
	142
	143	<tests>
	144	<test>
	145	<param name="input_file" value="lps_arrhythmia.tabular"/>
	146	<param name="label_column" value="280"/>
	147	<param name="lambda_fac" value="0.03"/>
	148	<param name="options" value="true"/>
	149	<param name="sample" value="1.0"/>
	150	<param name="verbosity" value="1"/>
	151	<param name="standardize" value="0"/>
	152	<param name="initialLambda" value="0.9"/>
	153	<param name="continuation" value="1"/>
	154	<param name="continuationSteps" value="10"/>
	155	<param name="accurateIntermediates" value="0"/>
	156	<param name="printFreq" value="1"/>
	157	<param name="newton" value="1"/>
	158	<param name="newtonThreshold" value="500"/>
	159	<param name="hessianSampleFraction" value="1.0"/>
	160	<param name="fullGradient" value="1"/>
	161	<param name="gradientFraction" value="0.5"/>
	162	<param name="initialAlpha" value="1.0"/>
	163	<param name="alphaIncrease" value="2.0"/>
	164	<param name="alphaDecrease" value="0.8"/>
	165	<param name="alphaMax" value="1e12"/>
	166	<param name="c1" value="1e-3"/>
	167	<param name="maxIter" value="2500"/>
	168	<param name="stopTol" value="1e-6"/>
	169	<param name="intermediateTol" value="1e-6"/>
	170	<param name="finalOnly" value="0"/>
	171	<output name="ouput_file" file="lps_arrhythmia_beta.tabular"/>
	172	<output name="log_file" file="lps_arrhythmia_log.txt"/>
	173	</test>
	174	</tests>
	175
	176	<help>
	177	Dataset formats
	178
	179	The input and output datasets are tabular_. The columns are described below.
	180	There is a second output dataset (a log) that is in text_ format.
	181	(`Dataset missing?`_)
	182
	183	.. _tabular: ./static/formatHelp.html#tab
	184	.. _text: ./static/formatHelp.html#text
	185	.. _Dataset missing?: ./static/formatHelp.html
	186
	187	-----
	188
	189	What it does
	190
	191	The LASSO-Patternsearch algorithm fits your dataset to an L1-regularized
	192	logistic regression model. A benefit of using L1-regularization is
	193	that it typically yields a weight vector with relatively few non-zero
	194	coefficients.
	195
	196	For example, say you have a dataset containing M rows (subjects)
	197	and N columns (attributes) where one of these N attributes is binary,
	198	indicating whether or not the subject has some property of interest P.
	199	In simple terms, LPS calculates a weight for each of the other attributes
	200	in your dataset. This weight indicates how "relevant" that attribute
	201	is for predicting whether or not a given subject has property P.
	202	The L1-regularization causes most of these weights to be equal to zero,
	203	which means LPS will find a "small" subset of the remaining N-1 attributes
	204	in your dataset that can be used to predict P.
	205
	206	In other words, LPS can be used for feature selection.
	207
	208	The input dataset is tabular, and must contain a label column which
	209	indicates whether or not a given row has property P. In the current
	210	version of this tool, P must be encoded using +1 and -1. The Lambda_fac
	211	parameter ranges from 0 to 1, and controls how sparse the weight
	212	vector will be. At the low end, when Lambda_fac = 0, there will be
	213	no regularization. At the high end, when Lambda_fac = 1, there will be
	214	"too much" regularization, and all of the weights will equal zero.
	215
	216	The LPS tool creates two output datasets. The first, called the results
	217	file, is a tabular dataset containing one column of weights for each
	218	value of the regularization parameter lambda that was tried. The weight
	219	columns are in order from left to right by decreasing values of lambda.
	220	The first N-1 rows in each column are the weights for the N-1 attributes
	221	in your input dataset. The final row is a constant, the intercept.
	222
	223	Let x be a row from your input dataset and let b be a column
	224	from the results file. To compute the probability that row x has
	225	a label value of +1:
	226
	227	Probability(row x has label value = +1) = 1 / [1 + exp{x \* b\[1..N-1\] + b\[N\]}]
	228
	229	where x \* b\[1..N-1\] represents matrix multiplication.
	230
	231	The second output dataset, called the log file, is a text file which
	232	contains additional data about the fitted L1-regularized logistic
	233	regression model. These data include the number of features, the
	234	computed value of lambda_max, the actual values of lambda used, the
	235	optimal values of the log-likelihood and regularized log-likelihood
	236	functions, the number of non-zeros, and the number of iterations.
	237
	238	Website: http://pages.cs.wisc.edu/~swright/LPS/
	239
	240	-----
	241
	242	Example
	243
	244	- input file::
	245
	246	+1 1 0 0 0 0 1 0 1 1 ...
	247	+1 1 1 1 0 0 1 0 1 1 ...
	248	+1 1 0 1 0 1 0 1 0 1 ...
	249	etc.
	250
	251	- output results file::
	252
	253	0
	254	0
	255	0
	256	0
	257	0.025541
	258	etc.
	259
	260	- output log file::
	261
	262	Data set has 100 vectors with 50 features.
	263	calculateLambdaMax: n=50, m=100, m+=50, m-=50
	264	computed value of lambda_max: 5.0000e-01
	265
	266	lambda=2.96e-02 solution:
	267	optimal log-likelihood function value: 6.46e-01
	268	optimal regularized log-likelihood function value: 6.79e-01
	269	number of nonzeros at the optimum: 5
	270	number of iterations required: 43
	271	etc.
	272
	273	-----
	274
	275	References
	276
	277	Koh K, Kim S-J, Boyd S. (2007)
	278	An interior-point method for large-scale l1-regularized logistic regression.
	279	Journal of Machine Learning Research. 8:1519-1555.
	280
	281	Shi W, Wahba G, Wright S, Lee K, Klein R, Klein B. (2008)
	282	LASSO-Patternsearch algorithm with application to ophthalmology and genomic data.
	283	Stat Interface. 1(1):137-153.
	284
	285	<!--
	286	Wright S, Novak R, Figueiredo M. (2009)
	287	Sparse reconstruction via separable approximation.
	288	IEEE Transactions on Signal Processing. 57:2479-2403.
	289
	290	Shi J, Yin W, Osher S, Sajda P. (2010)
	291	A fast hybrid algorithm for large scale l1-regularized logistic regression.
	292	Journal of Machine Learning Research. 11:713-741.
	293
	294	Byrd R, Chin G, Neveitt W, Nocedal J. (2010)
	295	On the use of stochastic Hessian information in unconstrained optimization.
	296	Technical Report. Northwestern University. June 16, 2010.
	297
	298	Wright S. (2010)
	299	Accelerated block-coordinate relaxation for regularized optimization.
	300	Technical Report. University of Wisconsin. August 10, 2010.
	301	-->
	302
	303	</help>
	304	</tool>

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/human_genome_variation/lps.xml @ 2

異なるフォーマットでダウンロード: