root/galaxy-central/eggs/GeneTrack-2.0.0_beta_1_dev_48da9e998f0caf01c5be731e926f4b0481f658f0-py2.6.egg/genetrack/scripts/peakpred.py @ 3

リビジョン 3, 6.1 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Genetrack default smoothing and peak predictions.
3
4The program may be invoked in multiple ways. As a standalone script::
5
6    python peakpred.py
7
8As a python module::
9
10    python -m genetrack.scripts.peakpred
11
12Or in other python scripts::
13
14>>>
15>>> from genetrack.scripts import peakpred
16>>> peakpred.predict(inpname, outname, options)
17>>>
18
19Run the script with no parameters to see the options that it takes.
20
21**Observed runtime**:
22
23"""
24import os, sys, csv
25from genetrack import logger, util, hdflib, fitlib
26
27TWOSTRAND = "two"
28
29commify = util.commify
30
31def output(stream, peaks, chrom, w=73, strand='+', ):
32    "Outputs peaks to a stream"
33
34    logger.debug('writing %s peaks on strand %s' % (commify(len(peaks)), strand))
35    for mid, value in peaks:
36        start, end = mid - w, mid + w
37        stream.write("%s\t%d\t%d\t.\t%f\t%s\n" % (chrom, start, end, value, strand))
38
39def predict(inpname, outname, options):
40    """
41    Generate the peak predictions on a genome wide scale
42    """
43    if options.strand == TWOSTRAND:
44            logger.info('operating in twostrand mode')
45
46    if options.index:
47        index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
48    else:
49        index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)
50
51    fp = file(outname, 'wt')
52
53    for label in index.labels:
54        table = index.table(label)
55        size  = table.cols.idx[-1]
56        info  = util.commify(size)
57        logger.info('predicting on %s of total size %s' % (label, info))
58        lo = 0
59        hi = min( (size, options.maxsize) )
60
61        while True:
62            if lo >= size:
63                break
64            perc = '%.1f%%' % (100.0*lo/size)
65            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
66           
67            # get the data
68            res = index.query(start=lo, end=hi, label=label)
69
70           
71            # exclusion zone
72            w = options.exclude/2
73
74            def predict(x, y):
75                fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
76                peaks = fitlib.detect_peaks(x=fx, y=fy )
77                if options.mode != 'all':
78                    peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
79                return peaks
80
81            if options.strand == TWOSTRAND:
82                # operates in two strand mode
83                for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
84                    logger.debug('processing strand %s' % strand)
85                    peaks = predict(x=res.idx, y=yval)
86                    output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
87            else:
88                # combine strands
89                peaks = predict(x=res.idx, y=res.val)
90                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')
91
92            # switching to a higher interval
93            lo = hi
94            hi += options.maxsize
95       
96    fp.close()
97
98def option_parser():
99    "The option parser may be constructed in other tools invoking this script"
100    import optparse
101
102    usage = "usage: %prog -i inputfile -o outputfile"
103
104    parser = optparse.OptionParser(usage=usage)
105
106    # setting the input file name
107    parser.add_option(
108        '-i', '--input', action="store",
109        dest="inpname", type='str', default=None,
110        help="the input hdf file name (required)"
111    )
112
113    # setting the output file name
114    parser.add_option(
115        '-o', '--output', action="store",
116        dest="outname", type='str', default=None,
117        help="the output file name (required)"
118    )
119
120    # verbosity can be 0,1 and 2 (increasing verbosity)
121    parser.add_option(
122        '-v', '--verbosity', action="store",
123        dest="verbosity", type="int", default=2,
124        help="sets the verbosity (0, 1, 2) (default=1)",
125    )
126
127    # sigma correction
128    parser.add_option(
129        '--sigma', action="store",
130        dest="sigma", type="float", default=20,
131        help="the smoothing factor",
132    )
133
134    # the exclusion zone
135    parser.add_option(
136        '--exclusion', action="store",
137        dest="exclude", type="int", default=100,
138        help="the exclusion zone",
139    )
140
141    # the exclusion zone
142    parser.add_option(
143        '--strand', action="store",
144        dest="strand", type="str", default="ALL",
145        help="strand",
146    )
147
148    # threshold
149    parser.add_option(
150        '--level', action="store",
151        dest="level", type="float", default=1,
152        help="the minimum signal necessary to call peaks",
153    )
154
155    # currently not used
156    parser.add_option(
157        '--mode', action="store",
158        dest="mode", type="str", default='maximal',
159        help="the peak prediction method: 'maximal', 'threshold' or 'all'",
160    )
161
162    parser.add_option(
163        '--maxsize', action="store",
164        dest="maxsize", type="int", default=10**7,
165        help="the size of the largest internal array allocated (default=10 million)",
166    )
167
168    parser.add_option(
169        '--test', action="store_true",
170        dest="test", default=False,
171        help="demo mode used in testing",
172    )
173
174    parser.add_option(
175        '-w', '--workdir', action="store",
176        dest="workdir", type='str', default=None,
177        help="work directory (optional)"
178    )
179
180    parser.add_option(
181        '-x', '--index', action="store_true",
182        dest="index", default=False,
183        help="treat input file as binary index"
184    )
185
186    return parser
187
188
189if __name__ == '__main__':
190    import optparse
191
192    parser = option_parser()
193
194    options, args = parser.parse_args()
195
196    logger.disable(options.verbosity)
197
198    from genetrack import conf
199
200    # trigger test mode
201    if options.test:
202        options.inpname = conf.testdata('test-hdflib-input.gtrack')
203        options.outname = conf.testdata('predictions.bed')
204
205    # missing input file name
206    if not options.inpname and not options.outname:
207        parser.print_help()
208    else:
209        print 'Sigma = %s' % options.sigma
210        print 'Minimum peak = %s' % options.level
211        print 'Peak-to-peak = %s' % options.exclude
212
213        predict(options.inpname, options.outname, options)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。