1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | """ |
---|
4 | Read a chromosome of coverage data, and write it as a npy array, as |
---|
5 | well as averages over regions of progressively larger size in powers of 10 |
---|
6 | """ |
---|
7 | |
---|
8 | from __future__ import division |
---|
9 | |
---|
10 | import sys |
---|
11 | from galaxy import eggs |
---|
12 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
13 | import bx.wiggle |
---|
14 | from bx.cookbook import doc_optparse |
---|
15 | from bx import misc |
---|
16 | max2 = max |
---|
17 | pkg_resources.require("numpy>=1.2.1") |
---|
18 | from numpy import * |
---|
19 | import tempfile |
---|
20 | import os |
---|
21 | |
---|
22 | def write_chrom(max, out_base, instream): |
---|
23 | |
---|
24 | scores = zeros( max, float32 ) * nan |
---|
25 | # Fill array from wiggle |
---|
26 | max_value = 0 |
---|
27 | min_value = 0 |
---|
28 | for line in instream: |
---|
29 | line = line.rstrip("\n\r") |
---|
30 | (chrom, pos, val) = line.split("\t") |
---|
31 | pos, val = int(pos), float(val) |
---|
32 | scores[pos] = val |
---|
33 | |
---|
34 | # Write ra |
---|
35 | fname = "%s_%d" % ( out_base, 1 ) |
---|
36 | save( fname, scores ) |
---|
37 | os.rename( fname+".npy", fname ) |
---|
38 | |
---|
39 | # Write average |
---|
40 | for window in 10, 100, 1000, 10000, 100000: |
---|
41 | input = scores.copy() |
---|
42 | size = len( input ) |
---|
43 | input.resize( ( ( size / window ), window ) ) |
---|
44 | masked = ma.masked_array( input, isnan( input ) ) |
---|
45 | averaged = mean( masked, 1 ) |
---|
46 | averaged.set_fill_value( nan ) |
---|
47 | fname = "%s_%d" % ( out_base, window ) |
---|
48 | save( fname, averaged.filled() ) |
---|
49 | del masked, averaged |
---|
50 | os.rename( fname+".npy", fname ) |
---|
51 | |
---|
52 | def main(): |
---|
53 | max = int( 512*1024*1024 ) |
---|
54 | # get chroms and lengths |
---|
55 | chroms = {} |
---|
56 | LEN = {} |
---|
57 | for line in open(sys.argv[1],"r"): |
---|
58 | line = line.rstrip("\r\n") |
---|
59 | fields = line.split("\t") |
---|
60 | (chrom, pos, forward) = fields[0:3] |
---|
61 | reverse = 0 |
---|
62 | if len(fields) == 4: reverse = int(fields[3]) |
---|
63 | forward = int(forward)+reverse |
---|
64 | pos = int(pos) |
---|
65 | chrom_file = chroms.get(chrom, None) |
---|
66 | if not chrom_file: |
---|
67 | chrom_file = chroms[chrom] = tempfile.NamedTemporaryFile() |
---|
68 | chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,forward)) |
---|
69 | LEN[chrom] = max2( LEN.get(chrom,0), pos+1 ) |
---|
70 | for chrom, stream in chroms.items(): |
---|
71 | stream.seek(0) |
---|
72 | prefix = os.path.join(sys.argv[2], chrom) |
---|
73 | write_chrom( LEN[chrom], prefix, stream ) |
---|
74 | |
---|
75 | manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" ) |
---|
76 | for key, value in LEN.items(): |
---|
77 | print >> manifest_file, "%s\t%s" % (key, value) |
---|
78 | manifest_file.close() |
---|
79 | |
---|
80 | |
---|
81 | if __name__ == "__main__": main() |
---|