Context Navigation

peakpred.py @ 3

リビジョン 3, 6.1 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	"""
	2	Genetrack default smoothing and peak predictions.
	3
	4	The program may be invoked in multiple ways. As a standalone script::
	5
	6	python peakpred.py
	7
	8	As a python module::
	9
	10	python -m genetrack.scripts.peakpred
	11
	12	Or in other python scripts::
	13
	14	>>>
	15	>>> from genetrack.scripts import peakpred
	16	>>> peakpred.predict(inpname, outname, options)
	17	>>>
	18
	19	Run the script with no parameters to see the options that it takes.
	20
	21	Observed runtime:
	22
	23	"""
	24	import os, sys, csv
	25	from genetrack import logger, util, hdflib, fitlib
	26
	27	TWOSTRAND = "two"
	28
	29	commify = util.commify
	30
	31	def output(stream, peaks, chrom, w=73, strand='+', ):
	32	"Outputs peaks to a stream"
	33
	34	logger.debug('writing %s peaks on strand %s' % (commify(len(peaks)), strand))
	35	for mid, value in peaks:
	36	start, end = mid - w, mid + w
	37	stream.write("%s\t%d\t%d\t.\t%f\t%s\n" % (chrom, start, end, value, strand))
	38
	39	def predict(inpname, outname, options):
	40	"""
	41	Generate the peak predictions on a genome wide scale
	42	"""
	43	if options.strand == TWOSTRAND:
	44	logger.info('operating in twostrand mode')
	45
	46	if options.index:
	47	index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
	48	else:
	49	index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)
	50
	51	fp = file(outname, 'wt')
	52
	53	for label in index.labels:
	54	table = index.table(label)
	55	size = table.cols.idx[-1]
	56	info = util.commify(size)
	57	logger.info('predicting on %s of total size %s' % (label, info))
	58	lo = 0
	59	hi = min( (size, options.maxsize) )
	60
	61	while True:
	62	if lo >= size:
	63	break
	64	perc = '%.1f%%' % (100.0*lo/size)
	65	logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
	66
	67	# get the data
	68	res = index.query(start=lo, end=hi, label=label)
	69
	70
	71	# exclusion zone
	72	w = options.exclude/2
	73
	74	def predict(x, y):
	75	fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
	76	peaks = fitlib.detect_peaks(x=fx, y=fy )
	77	if options.mode != 'all':
	78	peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
	79	return peaks
	80
	81	if options.strand == TWOSTRAND:
	82	# operates in two strand mode
	83	for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
	84	logger.debug('processing strand %s' % strand)
	85	peaks = predict(x=res.idx, y=yval)
	86	output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
	87	else:
	88	# combine strands
	89	peaks = predict(x=res.idx, y=res.val)
	90	output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')
	91
	92	# switching to a higher interval
	93	lo = hi
	94	hi += options.maxsize
	95
	96	fp.close()
	97
	98	def option_parser():
	99	"The option parser may be constructed in other tools invoking this script"
	100	import optparse
	101
	102	usage = "usage: %prog -i inputfile -o outputfile"
	103
	104	parser = optparse.OptionParser(usage=usage)
	105
	106	# setting the input file name
	107	parser.add_option(
	108	'-i', '--input', action="store",
	109	dest="inpname", type='str', default=None,
	110	help="the input hdf file name (required)"
	111	)
	112
	113	# setting the output file name
	114	parser.add_option(
	115	'-o', '--output', action="store",
	116	dest="outname", type='str', default=None,
	117	help="the output file name (required)"
	118	)
	119
	120	# verbosity can be 0,1 and 2 (increasing verbosity)
	121	parser.add_option(
	122	'-v', '--verbosity', action="store",
	123	dest="verbosity", type="int", default=2,
	124	help="sets the verbosity (0, 1, 2) (default=1)",
	125	)
	126
	127	# sigma correction
	128	parser.add_option(
	129	'--sigma', action="store",
	130	dest="sigma", type="float", default=20,
	131	help="the smoothing factor",
	132	)
	133
	134	# the exclusion zone
	135	parser.add_option(
	136	'--exclusion', action="store",
	137	dest="exclude", type="int", default=100,
	138	help="the exclusion zone",
	139	)
	140
	141	# the exclusion zone
	142	parser.add_option(
	143	'--strand', action="store",
	144	dest="strand", type="str", default="ALL",
	145	help="strand",
	146	)
	147
	148	# threshold
	149	parser.add_option(
	150	'--level', action="store",
	151	dest="level", type="float", default=1,
	152	help="the minimum signal necessary to call peaks",
	153	)
	154
	155	# currently not used
	156	parser.add_option(
	157	'--mode', action="store",
	158	dest="mode", type="str", default='maximal',
	159	help="the peak prediction method: 'maximal', 'threshold' or 'all'",
	160	)
	161
	162	parser.add_option(
	163	'--maxsize', action="store",
	164	dest="maxsize", type="int", default=10**7,
	165	help="the size of the largest internal array allocated (default=10 million)",
	166	)
	167
	168	parser.add_option(
	169	'--test', action="store_true",
	170	dest="test", default=False,
	171	help="demo mode used in testing",
	172	)
	173
	174	parser.add_option(
	175	'-w', '--workdir', action="store",
	176	dest="workdir", type='str', default=None,
	177	help="work directory (optional)"
	178	)
	179
	180	parser.add_option(
	181	'-x', '--index', action="store_true",
	182	dest="index", default=False,
	183	help="treat input file as binary index"
	184	)
	185
	186	return parser
	187
	188
	189	if __name__ == '__main__':
	190	import optparse
	191
	192	parser = option_parser()
	193
	194	options, args = parser.parse_args()
	195
	196	logger.disable(options.verbosity)
	197
	198	from genetrack import conf
	199
	200	# trigger test mode
	201	if options.test:
	202	options.inpname = conf.testdata('test-hdflib-input.gtrack')
	203	options.outname = conf.testdata('predictions.bed')
	204
	205	# missing input file name
	206	if not options.inpname and not options.outname:
	207	parser.print_help()
	208	else:
	209	print 'Sigma = %s' % options.sigma
	210	print 'Minimum peak = %s' % options.level
	211	print 'Peak-to-peak = %s' % options.exclude
	212
	213	predict(options.inpname, options.outname, options)

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/GeneTrack-2.0.0_beta_1_dev_48da9e998f0caf01c5be731e926f4b0481f658f0-py2.6.egg/genetrack/scripts/peakpred.py @ 3

異なるフォーマットでダウンロード: