| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Read a chromosome of coverage data, and write it as a npy array, as |
|---|
| 5 | well as averages over regions of progressively larger size in powers of 10 |
|---|
| 6 | """ |
|---|
| 7 | |
|---|
| 8 | from __future__ import division |
|---|
| 9 | |
|---|
| 10 | import sys |
|---|
| 11 | from galaxy import eggs |
|---|
| 12 | import pkg_resources; pkg_resources.require( "bx-python" ) |
|---|
| 13 | import bx.wiggle |
|---|
| 14 | from bx.cookbook import doc_optparse |
|---|
| 15 | from bx import misc |
|---|
| 16 | max2 = max |
|---|
| 17 | pkg_resources.require("numpy>=1.2.1") |
|---|
| 18 | from numpy import * |
|---|
| 19 | import tempfile |
|---|
| 20 | import os |
|---|
| 21 | |
|---|
| 22 | def write_chrom(max, out_base, instream): |
|---|
| 23 | |
|---|
| 24 | scores = zeros( max, float32 ) * nan |
|---|
| 25 | # Fill array from wiggle |
|---|
| 26 | max_value = 0 |
|---|
| 27 | min_value = 0 |
|---|
| 28 | for line in instream: |
|---|
| 29 | line = line.rstrip("\n\r") |
|---|
| 30 | (chrom, pos, val) = line.split("\t") |
|---|
| 31 | pos, val = int(pos), float(val) |
|---|
| 32 | scores[pos] = val |
|---|
| 33 | |
|---|
| 34 | # Write ra |
|---|
| 35 | fname = "%s_%d" % ( out_base, 1 ) |
|---|
| 36 | save( fname, scores ) |
|---|
| 37 | os.rename( fname+".npy", fname ) |
|---|
| 38 | |
|---|
| 39 | # Write average |
|---|
| 40 | for window in 10, 100, 1000, 10000, 100000: |
|---|
| 41 | input = scores.copy() |
|---|
| 42 | size = len( input ) |
|---|
| 43 | input.resize( ( ( size / window ), window ) ) |
|---|
| 44 | masked = ma.masked_array( input, isnan( input ) ) |
|---|
| 45 | averaged = mean( masked, 1 ) |
|---|
| 46 | averaged.set_fill_value( nan ) |
|---|
| 47 | fname = "%s_%d" % ( out_base, window ) |
|---|
| 48 | save( fname, averaged.filled() ) |
|---|
| 49 | del masked, averaged |
|---|
| 50 | os.rename( fname+".npy", fname ) |
|---|
| 51 | |
|---|
| 52 | def main(): |
|---|
| 53 | max = int( 512*1024*1024 ) |
|---|
| 54 | # get chroms and lengths |
|---|
| 55 | chroms = {} |
|---|
| 56 | LEN = {} |
|---|
| 57 | for line in open(sys.argv[1],"r"): |
|---|
| 58 | line = line.rstrip("\r\n") |
|---|
| 59 | fields = line.split("\t") |
|---|
| 60 | (chrom, pos, forward) = fields[0:3] |
|---|
| 61 | reverse = 0 |
|---|
| 62 | if len(fields) == 4: reverse = int(fields[3]) |
|---|
| 63 | forward = int(forward)+reverse |
|---|
| 64 | pos = int(pos) |
|---|
| 65 | chrom_file = chroms.get(chrom, None) |
|---|
| 66 | if not chrom_file: |
|---|
| 67 | chrom_file = chroms[chrom] = tempfile.NamedTemporaryFile() |
|---|
| 68 | chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,forward)) |
|---|
| 69 | LEN[chrom] = max2( LEN.get(chrom,0), pos+1 ) |
|---|
| 70 | for chrom, stream in chroms.items(): |
|---|
| 71 | stream.seek(0) |
|---|
| 72 | prefix = os.path.join(sys.argv[2], chrom) |
|---|
| 73 | write_chrom( LEN[chrom], prefix, stream ) |
|---|
| 74 | |
|---|
| 75 | manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" ) |
|---|
| 76 | for key, value in LEN.items(): |
|---|
| 77 | print >> manifest_file, "%s\t%s" % (key, value) |
|---|
| 78 | manifest_file.close() |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | if __name__ == "__main__": main() |
|---|