"""
Genetrack default smoothing and peak predictions. 

The program may be invoked in multiple ways. As a standalone script::

    python peakpred.py

As a python module::

    python -m genetrack.scripts.peakpred

Or in other python scripts::

>>>
>>> from genetrack.scripts import peakpred
>>> peakpred.predict(inpname, outname, options)
>>>

Run the script with no parameters to see the options that it takes.

**Observed runtime**: 

"""
import os, sys, csv
from genetrack import logger, util, hdflib, fitlib

TWOSTRAND = "two"

commify = util.commify

def output(stream, peaks, chrom, w=73, strand='+', ):
    "Outputs peaks to a stream"

    logger.debug('writing %s peaks on strand %s' % (commify(len(peaks)), strand))
    for mid, value in peaks:
        start, end = mid - w, mid + w
        stream.write("%s\t%d\t%d\t.\t%f\t%s\n" % (chrom, start, end, value, strand))

def predict(inpname, outname, options):
    """
    Generate the peak predictions on a genome wide scale
    """
    if options.strand == TWOSTRAND:
            logger.info('operating in twostrand mode')

    if options.index:
        index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
    else:
        index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)

    fp = file(outname, 'wt')

    for label in index.labels:
        table = index.table(label)
        size  = table.cols.idx[-1]
        info  = util.commify(size)
        logger.info('predicting on %s of total size %s' % (label, info))
        lo = 0
        hi = min( (size, options.maxsize) )

        while True:
            if lo >= size:
                break
            perc = '%.1f%%' % (100.0*lo/size)
            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
            
            # get the data
            res = index.query(start=lo, end=hi, label=label)

            
            # exclusion zone
            w = options.exclude/2

            def predict(x, y):
                fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
                peaks = fitlib.detect_peaks(x=fx, y=fy )
                if options.mode != 'all':
                    peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
                return peaks

            if options.strand == TWOSTRAND:
                # operates in two strand mode
                for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
                    logger.debug('processing strand %s' % strand)
                    peaks = predict(x=res.idx, y=yval)
                    output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
            else:
                # combine strands
                peaks = predict(x=res.idx, y=res.val)
                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')

            # switching to a higher interval
            lo = hi
            hi += options.maxsize
        
    fp.close()

def option_parser():
    "The option parser may be constructed in other tools invoking this script"
    import optparse

    usage = "usage: %prog -i inputfile -o outputfile"

    parser = optparse.OptionParser(usage=usage)

    # setting the input file name
    parser.add_option(
        '-i', '--input', action="store", 
        dest="inpname", type='str', default=None,
        help="the input hdf file name (required)"
    )

    # setting the output file name
    parser.add_option(
        '-o', '--output', action="store", 
        dest="outname", type='str', default=None,
        help="the output file name (required)"
    )

    # verbosity can be 0,1 and 2 (increasing verbosity)
    parser.add_option(
        '-v', '--verbosity', action="store", 
        dest="verbosity", type="int", default=2, 
        help="sets the verbosity (0, 1, 2) (default=1)",
    )

    # sigma correction
    parser.add_option(
        '--sigma', action="store", 
        dest="sigma", type="float", default=20, 
        help="the smoothing factor",
    )

    # the exclusion zone
    parser.add_option(
        '--exclusion', action="store", 
        dest="exclude", type="int", default=100, 
        help="the exclusion zone",
    )

    # the exclusion zone
    parser.add_option(
        '--strand', action="store", 
        dest="strand", type="str", default="ALL", 
        help="strand",
    )

    # threshold
    parser.add_option(
        '--level', action="store", 
        dest="level", type="float", default=1, 
        help="the minimum signal necessary to call peaks",
    )

    # currently not used
    parser.add_option(
        '--mode', action="store", 
        dest="mode", type="str", default='maximal', 
        help="the peak prediction method: 'maximal', 'threshold' or 'all'",
    )

    parser.add_option(
        '--maxsize', action="store", 
        dest="maxsize", type="int", default=10**7, 
        help="the size of the largest internal array allocated (default=10 million)",
    )

    parser.add_option(
        '--test', action="store_true", 
        dest="test", default=False, 
        help="demo mode used in testing",
    )

    parser.add_option(
        '-w', '--workdir', action="store", 
        dest="workdir", type='str', default=None,
        help="work directory (optional)"
    )

    parser.add_option(
        '-x', '--index', action="store_true", 
        dest="index", default=False,
        help="treat input file as binary index"
    )

    return parser


if __name__ == '__main__':
    import optparse

    parser = option_parser()

    options, args = parser.parse_args()

    logger.disable(options.verbosity)

    from genetrack import conf

    # trigger test mode
    if options.test:
        options.inpname = conf.testdata('test-hdflib-input.gtrack')
        options.outname = conf.testdata('predictions.bed')

    # missing input file name
    if not options.inpname and not options.outname:
        parser.print_help()
    else:
        print 'Sigma = %s' % options.sigma
        print 'Minimum peak = %s' % options.level
        print 'Peak-to-peak = %s' % options.exclude

        predict(options.inpname, options.outname, options)