root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/intervals/operations/subtract.py

リビジョン 3, 3.2 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/env python
2"""
3Subtract one set of genomic intervals from another (base-by-base or whole
4intervals). The returned GenomicIntervals will be in the order
5of the first set of intervals passed in, with the corresponding
6meta-data.
7"""
8
9import traceback
10import fileinput
11from warnings import warn
12
13from bx.intervals.io import *
14from bx.intervals.operations import *
15
16def subtract(readers, mincols=1, upstream_pad=0, downstream_pad=0, pieces=True, lens={}, comments=True):
17    # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets.
18    # Read all but first into bitsets and union to one (if confused, read DeMorgan's...)
19    primary = readers[0]
20    union = readers[1:]
21    # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when
22    # the bitsets are being created by skipping the problem lines
23    union[0] = BitsetSafeReaderWrapper( union[0], lens=lens )
24    bitsets = union[0].binned_bitsets( upstream_pad=upstream_pad, downstream_pad=downstream_pad, lens=lens )
25    union = union[1:]
26    for andset in union:
27        bitset2 = andset.binned_bitsets(upstream_pad = upstream_pad, downstream_pad = downstream_pad, lens = lens)
28        for chrom in bitset2:
29            if chrom not in bitsets:
30                bitsets[chrom] = bitset2[chrom]
31            else:
32                bitsets[chrom].ior(bitset2[chrom])
33   
34    # Read remaining intervals and subtract
35    for interval in primary:
36        if type( interval ) is Header:
37            yield interval
38        if type( interval ) is Comment and comments:
39            yield interval
40        elif type( interval ) == GenomicInterval:
41            chrom = interval.chrom
42            if chrom not in bitsets:
43                yield interval
44            else:
45                start = int(interval.start)
46                end = int(interval.end)
47                if start > end: warn( "Interval start after end!" )
48                out_intervals = []
49                # Find the intervals that meet the criteria (for the three sensible
50                # permutations of reverse and pieces)
51                try:
52                    if bitsets[ chrom ].count_range( start, end-start ) >= mincols:               
53                        if pieces:
54                            out_intervals = bits_clear_in_range( bitsets[chrom], start, end )
55                    else:
56                        out_intervals = [ ( start, end ) ]
57                    # Write the intervals
58                    for start, end in out_intervals:
59                        new_interval = interval.copy()
60                        new_interval.start = start
61                        new_interval.end = end
62                        yield new_interval
63                except IndexError, e:
64                    try:
65                        # This will work only if primary is a NiceReaderWrapper
66                        primary.skipped += 1
67                        # no reason to stuff an entire bad file into memmory
68                        if primary.skipped < 10:
69                            primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
70                    except:
71                        pass
72                    continue
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。