Context Navigation

peakpred.py @ 3

リビジョン 3, 6.1 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Genetrack default smoothing and peak predictions.
3
4	The program may be invoked in multiple ways. As a standalone script::
5
6	python peakpred.py
7
8	As a python module::
9
10	python -m genetrack.scripts.peakpred
11
12	Or in other python scripts::
13
14	>>>
15	>>> from genetrack.scripts import peakpred
16	>>> peakpred.predict(inpname, outname, options)
17	>>>
18
19	Run the script with no parameters to see the options that it takes.
20
21	Observed runtime:
22
23	"""
24	import os, sys, csv
25	from genetrack import logger, util, hdflib, fitlib
26
27	TWOSTRAND = "two"
28
29	commify = util.commify
30
31	def output(stream, peaks, chrom, w=73, strand='+', ):
32	"Outputs peaks to a stream"
33
34	logger.debug('writing %s peaks on strand %s' % (commify(len(peaks)), strand))
35	for mid, value in peaks:
36	start, end = mid - w, mid + w
37	stream.write("%s\t%d\t%d\t.\t%f\t%s\n" % (chrom, start, end, value, strand))
38
39	def predict(inpname, outname, options):
40	"""
41	Generate the peak predictions on a genome wide scale
42	"""
43	if options.strand == TWOSTRAND:
44	logger.info('operating in twostrand mode')
45
46	if options.index:
47	index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
48	else:
49	index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)
50
51	fp = file(outname, 'wt')
52
53	for label in index.labels:
54	table = index.table(label)
55	size = table.cols.idx[-1]
56	info = util.commify(size)
57	logger.info('predicting on %s of total size %s' % (label, info))
58	lo = 0
59	hi = min( (size, options.maxsize) )
60
61	while True:
62	if lo >= size:
63	break
64	perc = '%.1f%%' % (100.0*lo/size)
65	logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
66
67	# get the data
68	res = index.query(start=lo, end=hi, label=label)
69
70
71	# exclusion zone
72	w = options.exclude/2
73
74	def predict(x, y):
75	fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
76	peaks = fitlib.detect_peaks(x=fx, y=fy )
77	if options.mode != 'all':
78	peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
79	return peaks
80
81	if options.strand == TWOSTRAND:
82	# operates in two strand mode
83	for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
84	logger.debug('processing strand %s' % strand)
85	peaks = predict(x=res.idx, y=yval)
86	output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
87	else:
88	# combine strands
89	peaks = predict(x=res.idx, y=res.val)
90	output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')
91
92	# switching to a higher interval
93	lo = hi
94	hi += options.maxsize
95
96	fp.close()
97
98	def option_parser():
99	"The option parser may be constructed in other tools invoking this script"
100	import optparse
101
102	usage = "usage: %prog -i inputfile -o outputfile"
103
104	parser = optparse.OptionParser(usage=usage)
105
106	# setting the input file name
107	parser.add_option(
108	'-i', '--input', action="store",
109	dest="inpname", type='str', default=None,
110	help="the input hdf file name (required)"
111	)
112
113	# setting the output file name
114	parser.add_option(
115	'-o', '--output', action="store",
116	dest="outname", type='str', default=None,
117	help="the output file name (required)"
118	)
119
120	# verbosity can be 0,1 and 2 (increasing verbosity)
121	parser.add_option(
122	'-v', '--verbosity', action="store",
123	dest="verbosity", type="int", default=2,
124	help="sets the verbosity (0, 1, 2) (default=1)",
125	)
126
127	# sigma correction
128	parser.add_option(
129	'--sigma', action="store",
130	dest="sigma", type="float", default=20,
131	help="the smoothing factor",
132	)
133
134	# the exclusion zone
135	parser.add_option(
136	'--exclusion', action="store",
137	dest="exclude", type="int", default=100,
138	help="the exclusion zone",
139	)
140
141	# the exclusion zone
142	parser.add_option(
143	'--strand', action="store",
144	dest="strand", type="str", default="ALL",
145	help="strand",
146	)
147
148	# threshold
149	parser.add_option(
150	'--level', action="store",
151	dest="level", type="float", default=1,
152	help="the minimum signal necessary to call peaks",
153	)
154
155	# currently not used
156	parser.add_option(
157	'--mode', action="store",
158	dest="mode", type="str", default='maximal',
159	help="the peak prediction method: 'maximal', 'threshold' or 'all'",
160	)
161
162	parser.add_option(
163	'--maxsize', action="store",
164	dest="maxsize", type="int", default=10**7,
165	help="the size of the largest internal array allocated (default=10 million)",
166	)
167
168	parser.add_option(
169	'--test', action="store_true",
170	dest="test", default=False,
171	help="demo mode used in testing",
172	)
173
174	parser.add_option(
175	'-w', '--workdir', action="store",
176	dest="workdir", type='str', default=None,
177	help="work directory (optional)"
178	)
179
180	parser.add_option(
181	'-x', '--index', action="store_true",
182	dest="index", default=False,
183	help="treat input file as binary index"
184	)
185
186	return parser
187
188
189	if __name__ == '__main__':
190	import optparse
191
192	parser = option_parser()
193
194	options, args = parser.parse_args()
195
196	logger.disable(options.verbosity)
197
198	from genetrack import conf
199
200	# trigger test mode
201	if options.test:
202	options.inpname = conf.testdata('test-hdflib-input.gtrack')
203	options.outname = conf.testdata('predictions.bed')
204
205	# missing input file name
206	if not options.inpname and not options.outname:
207	parser.print_help()
208	else:
209	print 'Sigma = %s' % options.sigma
210	print 'Minimum peak = %s' % options.level
211	print 'Peak-to-peak = %s' % options.exclude
212
213	predict(options.inpname, options.outname, options)

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/GeneTrack-2.0.0_beta_1_dev_48da9e998f0caf01c5be731e926f4b0481f658f0-py2.6.egg/genetrack/scripts/peakpred.py @ 3

異なるフォーマットでダウンロード: