| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Read a chromosome of wiggle data, and write it as a npy array, as |
|---|
| 5 | well as averages over regions of progressively larger size in powers of 10 |
|---|
| 6 | """ |
|---|
| 7 | |
|---|
| 8 | from __future__ import division |
|---|
| 9 | |
|---|
| 10 | import sys |
|---|
| 11 | from galaxy import eggs |
|---|
| 12 | import pkg_resources; pkg_resources.require( "bx-python" ) |
|---|
| 13 | import bx.wiggle |
|---|
| 14 | from bx.cookbook import doc_optparse |
|---|
| 15 | from bx import misc |
|---|
| 16 | max2 = max |
|---|
| 17 | pkg_resources.require("numpy>=1.2.1") |
|---|
| 18 | from numpy import * |
|---|
| 19 | import tempfile |
|---|
| 20 | import os |
|---|
| 21 | from galaxy.tracks.store import sanitize_name |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | def write_chrom(max, out_base, instream): |
|---|
| 25 | |
|---|
| 26 | scores = zeros( max, float32 ) * nan |
|---|
| 27 | # Fill array from wiggle |
|---|
| 28 | for line in instream: |
|---|
| 29 | line = line.rstrip("\n\r") |
|---|
| 30 | (chrom, pos, val) = line.split("\t") |
|---|
| 31 | pos, val = int(pos), float(val) |
|---|
| 32 | scores[pos] = val |
|---|
| 33 | |
|---|
| 34 | # Write ra |
|---|
| 35 | fname = "%s_%d" % ( out_base, 1 ) |
|---|
| 36 | save( fname, scores ) |
|---|
| 37 | os.rename( fname+".npy", fname ) |
|---|
| 38 | |
|---|
| 39 | # Write average |
|---|
| 40 | for window in 10, 100, 1000, 10000, 100000: |
|---|
| 41 | input = scores.copy() |
|---|
| 42 | size = len( input ) |
|---|
| 43 | input.resize( ( ( size / window ), window ) ) |
|---|
| 44 | masked = ma.masked_array( input, isnan( input ) ) |
|---|
| 45 | averaged = mean( masked, 1 ) |
|---|
| 46 | averaged.set_fill_value( nan ) |
|---|
| 47 | fname = "%s_%d" % ( out_base, window ) |
|---|
| 48 | save( fname, averaged.filled() ) |
|---|
| 49 | del masked, averaged |
|---|
| 50 | os.rename( fname+".npy", fname ) |
|---|
| 51 | |
|---|
| 52 | def main(): |
|---|
| 53 | max = int( 512*1024*1024 ) |
|---|
| 54 | # get chroms and lengths |
|---|
| 55 | chroms = {} |
|---|
| 56 | LEN = {} |
|---|
| 57 | for (chrom, pos, val) in bx.wiggle.Reader( open(sys.argv[1],"r") ): |
|---|
| 58 | chrom_file = chroms.get(chrom, None) |
|---|
| 59 | if not chrom_file: |
|---|
| 60 | chrom_file = chroms[chrom] = tempfile.NamedTemporaryFile() |
|---|
| 61 | chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,val)) |
|---|
| 62 | LEN[chrom] = max2( LEN.get(chrom,0), pos+1 ) |
|---|
| 63 | for chrom, stream in chroms.items(): |
|---|
| 64 | stream.seek(0) |
|---|
| 65 | prefix = os.path.join(sys.argv[2], sanitize_name(chrom)) |
|---|
| 66 | write_chrom( LEN[chrom], prefix, stream ) |
|---|
| 67 | |
|---|
| 68 | manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" ) |
|---|
| 69 | for key, value in LEN.items(): |
|---|
| 70 | print >> manifest_file, "%s\t%s" % (key, value) |
|---|
| 71 | manifest_file.close() |
|---|
| 72 | |
|---|
| 73 | |
|---|
| 74 | if __name__ == "__main__": main() |
|---|