#!/usr/bin/env python
"""
Subtract one set of genomic intervals from another (base-by-base or whole
intervals). The returned GenomicIntervals will be in the order
of the first set of intervals passed in, with the corresponding
meta-data.
"""

import traceback
import fileinput
from warnings import warn

from bx.intervals.io import *
from bx.intervals.operations import *

def subtract(readers, mincols=1, upstream_pad=0, downstream_pad=0, pieces=True, lens={}, comments=True):
    # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets.
    # Read all but first into bitsets and union to one (if confused, read DeMorgan's...)
    primary = readers[0]
    union = readers[1:]
    # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when
    # the bitsets are being created by skipping the problem lines
    union[0] = BitsetSafeReaderWrapper( union[0], lens=lens )
    bitsets = union[0].binned_bitsets( upstream_pad=upstream_pad, downstream_pad=downstream_pad, lens=lens )
    union = union[1:]
    for andset in union:
        bitset2 = andset.binned_bitsets(upstream_pad = upstream_pad, downstream_pad = downstream_pad, lens = lens)
        for chrom in bitset2:
            if chrom not in bitsets:
                bitsets[chrom] = bitset2[chrom]
            else:
                bitsets[chrom].ior(bitset2[chrom])
    
    # Read remaining intervals and subtract
    for interval in primary:
        if type( interval ) is Header:
            yield interval
        if type( interval ) is Comment and comments:
            yield interval
        elif type( interval ) == GenomicInterval:
            chrom = interval.chrom
            if chrom not in bitsets:
                yield interval
            else:
                start = int(interval.start)
                end = int(interval.end)
                if start > end: warn( "Interval start after end!" )
                out_intervals = []
                # Find the intervals that meet the criteria (for the three sensible
                # permutations of reverse and pieces)
                try:
                    if bitsets[ chrom ].count_range( start, end-start ) >= mincols:                
                        if pieces:
                            out_intervals = bits_clear_in_range( bitsets[chrom], start, end )
                    else:
                        out_intervals = [ ( start, end ) ]
                    # Write the intervals
                    for start, end in out_intervals:
                        new_interval = interval.copy()
                        new_interval.start = start
                        new_interval.end = end
                        yield new_interval
                except IndexError, e:
                    try:
                        # This will work only if primary is a NiceReaderWrapper
                        primary.skipped += 1
                        # no reason to stuff an entire bad file into memmory
                        if primary.skipped < 10:
                            primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
                    except:
                        pass
                    continue