root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/intervals/operations/intersect.py

リビジョン 3, 3.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Compute the intersection of two sets of genomic intervals, either base-by-base
3or at the interval level. The returned GenomicIntervalReader will be in
4the order of the first set of intervals passed in, with the corresponding
5additional fields.
6"""
7
8import traceback
9import fileinput
10from warnings import warn
11
12from bx.intervals.io import *
13from bx.intervals.operations import *
14
15def intersect(readers, mincols=1, upstream_pad=0, downstream_pad=0, pieces=True, lens={}, comments=True):
16    # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets.
17    # Read all but first into bitsets and intersect to one
18    primary = readers[0]
19    intersect = readers[1:]
20    # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when
21    # the bitsets are being created by skipping the problem lines
22    intersect[0] = BitsetSafeReaderWrapper( intersect[0], lens=lens )
23    bitsets = intersect[0].binned_bitsets( upstream_pad=upstream_pad, downstream_pad=downstream_pad, lens=lens )
24    intersect = intersect[1:]
25    for andset in intersect:
26        bitset2 = andset.binned_bitsets(upstream_pad = upstream_pad, downstream_pad = downstream_pad, lens = lens)
27        for chrom in bitsets:
28            if chrom not in bitset2: continue
29            bitsets[chrom].iand(bitset2[chrom])
30        intersect = intersect[1:]
31   
32    # Read remaining intervals and intersect
33    for interval in primary:
34        if type( interval ) is Header:
35            yield interval
36        if type( interval ) is Comment and comments:
37            yield interval
38        elif type( interval ) == GenomicInterval:
39            chrom = interval.chrom
40            start = int( interval.start )
41            end = int( interval.end )
42            if chrom not in bitsets:
43                continue
44            if start > end:
45                try:
46                    # This will only work if primary is a NiceReaderWrapper
47                    primary.skipped += 1
48                    # no reason to stuff an entire bad file into memmory
49                    if primary.skipped < 10:
50                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) )
51                except:
52                    pass
53                continue
54            out_intervals = []
55            # Intersect or Overlap
56            try:
57                if bitsets[ chrom ].count_range( start, end-start ) >= mincols:               
58                    if pieces:
59                        out_intervals = bits_set_in_range( bitsets[chrom], start, end )
60                    else:
61                        out_intervals = [ ( start, end ) ]
62                # Write the intervals
63                for start, end in out_intervals:
64                    new_interval = interval.copy()
65                    new_interval.start = start
66                    new_interval.end = end
67                    yield new_interval
68            except IndexError, e:
69                try:
70                    # This will only work if primary is a NiceReaderWrapper
71                    primary.skipped += 1
72                    # no reason to stuff an entire bad file into memmory
73                    if primary.skipped < 10:
74                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
75                except:
76                    pass
77                continue
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。