root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/intervals/operations/coverage.py

リビジョン 3, 2.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Determine amount of each interval in one set covered by the intervals of
3another set. Adds two columns to the first input, giving number of bases
4covered and percent coverage on the second input.
5"""
6
7import traceback
8import fileinput
9from warnings import warn
10
11from bx.intervals.io import *
12from bx.intervals.operations import *
13
14def coverage(readers, comments=True):
15    # The incoming lens dictionary is a dictionary of chromosome lengths which are used to initialize the bitsets.
16    primary = readers[0]
17    intersect = readers[1:]
18    # Handle any ValueError, IndexError and OverflowError exceptions that may be thrown when
19    # the bitsets are being created by skipping the problem lines
20    intersect[0] = BitsetSafeReaderWrapper( intersect[0], lens={} )
21    bitsets = intersect[0].binned_bitsets()
22    intersect = intersect[1:]
23    for andset in intersect:
24        bitset2 = andset.binned_bitsets()
25        for chrom in bitsets:
26            if chrom not in bitset2: continue
27            bitsets[chrom].ior(bitset2[chrom])
28        intersect = intersect[1:]
29
30    # Read remaining intervals and give coverage
31    for interval in primary:
32        if type( interval ) is Header:
33            yield interval
34        if type( interval ) is Comment and comments:
35            yield interval
36        elif type( interval ) == GenomicInterval:
37            chrom = interval.chrom
38            start = int(interval.start)
39            end = int(interval.end)
40            if start > end:
41                try:
42                    # This will only work if primary is a NiceReaderWrapper
43                    primary.skipped += 1
44                    # no reason to stuff an entire bad file into memmory
45                    if primary.skipped < 10:
46                        primary.skipped_lines.append( ( primary.linenum, primary.current_line, "Interval start after end!" ) )
47                except:
48                    pass
49                continue
50            if chrom not in bitsets:
51                bases_covered = 0
52                percent = 0.0
53            else:
54                try:
55                    bases_covered = bitsets[ chrom ].count_range( start, end-start )
56                except IndexError, e:
57                    try:
58                        # This will only work if primary is a NiceReaderWrapper
59                        primary.skipped += 1
60                        # no reason to stuff an entire bad file into memmory
61                        if primary.skipped < 10:
62                            primary.skipped_lines.append( ( primary.linenum, primary.current_line, str( e ) ) )
63                    except:
64                        pass
65                    continue
66                if (end - start) == 0:
67                    percent = 0
68                else:
69                    percent = float(bases_covered) / float(end - start)
70            interval.fields.append(str(bases_covered))
71            interval.fields.append(str(percent))
72            yield interval
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。