[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | """ |
---|
| 4 | Read a chromosome of wiggle data, and write it as a npy array, as |
---|
| 5 | well as averages over regions of progressively larger size in powers of 10 |
---|
| 6 | """ |
---|
| 7 | |
---|
| 8 | from __future__ import division |
---|
| 9 | |
---|
| 10 | import sys |
---|
| 11 | from galaxy import eggs |
---|
| 12 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 13 | import bx.wiggle |
---|
| 14 | from bx.cookbook import doc_optparse |
---|
| 15 | from bx import misc |
---|
| 16 | max2 = max |
---|
| 17 | pkg_resources.require("numpy>=1.2.1") |
---|
| 18 | from numpy import * |
---|
| 19 | import tempfile |
---|
| 20 | import os |
---|
| 21 | from galaxy.tracks.store import sanitize_name |
---|
| 22 | |
---|
| 23 | |
---|
| 24 | def write_chrom(max, out_base, instream): |
---|
| 25 | |
---|
| 26 | scores = zeros( max, float32 ) * nan |
---|
| 27 | # Fill array from wiggle |
---|
| 28 | for line in instream: |
---|
| 29 | line = line.rstrip("\n\r") |
---|
| 30 | (chrom, pos, val) = line.split("\t") |
---|
| 31 | pos, val = int(pos), float(val) |
---|
| 32 | scores[pos] = val |
---|
| 33 | |
---|
| 34 | # Write ra |
---|
| 35 | fname = "%s_%d" % ( out_base, 1 ) |
---|
| 36 | save( fname, scores ) |
---|
| 37 | os.rename( fname+".npy", fname ) |
---|
| 38 | |
---|
| 39 | # Write average |
---|
| 40 | for window in 10, 100, 1000, 10000, 100000: |
---|
| 41 | input = scores.copy() |
---|
| 42 | size = len( input ) |
---|
| 43 | input.resize( ( ( size / window ), window ) ) |
---|
| 44 | masked = ma.masked_array( input, isnan( input ) ) |
---|
| 45 | averaged = mean( masked, 1 ) |
---|
| 46 | averaged.set_fill_value( nan ) |
---|
| 47 | fname = "%s_%d" % ( out_base, window ) |
---|
| 48 | save( fname, averaged.filled() ) |
---|
| 49 | del masked, averaged |
---|
| 50 | os.rename( fname+".npy", fname ) |
---|
| 51 | |
---|
| 52 | def main(): |
---|
| 53 | max = int( 512*1024*1024 ) |
---|
| 54 | # get chroms and lengths |
---|
| 55 | chroms = {} |
---|
| 56 | LEN = {} |
---|
| 57 | for (chrom, pos, val) in bx.wiggle.Reader( open(sys.argv[1],"r") ): |
---|
| 58 | chrom_file = chroms.get(chrom, None) |
---|
| 59 | if not chrom_file: |
---|
| 60 | chrom_file = chroms[chrom] = tempfile.NamedTemporaryFile() |
---|
| 61 | chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,val)) |
---|
| 62 | LEN[chrom] = max2( LEN.get(chrom,0), pos+1 ) |
---|
| 63 | for chrom, stream in chroms.items(): |
---|
| 64 | stream.seek(0) |
---|
| 65 | prefix = os.path.join(sys.argv[2], sanitize_name(chrom)) |
---|
| 66 | write_chrom( LEN[chrom], prefix, stream ) |
---|
| 67 | |
---|
| 68 | manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" ) |
---|
| 69 | for key, value in LEN.items(): |
---|
| 70 | print >> manifest_file, "%s\t%s" % (key, value) |
---|
| 71 | manifest_file.close() |
---|
| 72 | |
---|
| 73 | |
---|
| 74 | if __name__ == "__main__": main() |
---|