root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/intervals/operations/quicksect.py @ 3

リビジョン 3, 5.7 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Intersects ... faster.  Suports GenomicInterval datatype and multiple
3chromosomes.
4"""
5import math
6import time
7import sys
8import random
9
10class IntervalTree( object ):
11    def __init__( self ):
12        self.chroms = {}
13    def insert( self, interval, linenum=0, other=None ):
14        chrom = interval.chrom
15        start = interval.start
16        end = interval.end
17        if interval.chrom in self.chroms:
18            self.chroms[chrom] = self.chroms[chrom].insert( start, end, linenum, other )
19        else:
20            self.chroms[chrom] = IntervalNode( start, end, linenum, other )
21    def intersect( self, interval, report_func ):
22        chrom = interval.chrom
23        start = interval.start
24        end = interval.end
25        if chrom in self.chroms:
26            self.chroms[chrom].intersect( start, end, report_func )
27    def traverse( self, func ):
28        for item in self.chroms.itervalues():
29            item.traverse( func )
30
31class IntervalNode( object ):
32    def __init__( self, start, end, linenum=0, other=None ):
33        # Python lacks the binomial distribution, so we convert a
34        # uniform into a binomial because it naturally scales with
35        # tree size.  Also, python's uniform is perfect since the
36        # upper limit is not inclusive, which gives us undefined here.
37        self.priority = math.ceil( (-1.0 / math.log(.5)) * math.log( -1.0 / (random.uniform(0,1) - 1)))
38        self.start = start
39        self.end = end
40        self.maxend = self.end
41        self.minend = self.end
42        self.left = None
43        self.right = None
44        self.linenum = linenum
45        self.other = other
46    def insert( self, start, end, linenum=0, other=None ):
47        root = self
48        if start > self.start:
49            # insert to right tree
50            if self.right:
51                self.right = self.right.insert( start, end, linenum, other )
52            else:
53                self.right = IntervalNode(start, end, linenum, other )
54            # rebalance tree
55            if self.priority < self.right.priority:
56                root = self.rotateleft()
57        else:
58            # insert to left tree
59            if self.left:
60                self.left = self.left.insert( start, end, linenum, other )
61            else:
62                self.left = IntervalNode(start, end, linenum, other )
63            # rebalance tree
64            if self.priority < self.left.priority:
65                root = self.rotateright()
66        if root.right and root.left:
67            root.maxend = max( root.end, root.right.maxend, root.left.maxend )
68            root.minend = min( root.end, root.right.minend, root.left.minend )
69        elif root.right:
70            root.maxend = max( root.end, root.right.maxend )
71            root.minend = min( root.end, root.right.minend )
72        elif root.left:
73            root.maxend = max( root.end, root.left.maxend )
74            root.minend = min( root.end, root.left.minend )
75        return root
76
77    def rotateright( self ):
78        root = self.left
79        self.left = self.left.right
80        root.right = self
81        if self.right and self.left:
82            self.maxend = max(self.end, self.right.maxend, self.left.maxend)
83            self.minend = min(self.end, self.right.minend, self.left.minend )
84        elif self.right:
85            self.maxend = max(self.end, self.right.maxend)
86            self.minend = min(self.end, self.right.minend)
87        elif self.left:
88            self.maxend = max(self.end, self.left.maxend)
89            self.minend = min(self.end, self.left.minend )
90        return root
91       
92    def rotateleft( self ):
93        root = self.right
94        self.right = self.right.left
95        root.left = self
96        if self.right and self.left:
97            self.maxend = max(self.end, self.right.maxend, self.left.maxend)
98            self.minend = min(self.end, self.right.minend, self.left.minend )
99        elif self.right:
100            self.maxend = max(self.end, self.right.maxend)
101            self.minend = min(self.end, self.right.minend)
102        elif self.left:
103            self.maxend = max(self.end, self.left.maxend)
104            self.minend = min(self.end, self.left.minend )
105        return root
106
107    def intersect( self, start, end, report_func ):
108        if start < self.end and end > self.start: report_func( self )
109        if self.left and start < self.left.maxend:
110            self.left.intersect( start, end, report_func )
111        if self.right and end > self.start:
112            self.right.intersect( start, end, report_func )
113
114    def traverse( self, func ):
115        if self.left: self.left.traverse( func )
116        func( self )
117        if self.right: self.right.traverse( func )
118
119def main():
120    test = None
121    intlist = []
122    for x in range(20000):
123        start = random.randint(0,1000000)
124        end = start + random.randint(1, 1000)
125        if test: test = test.insert( start, end )
126        else: test = IntervalNode( start, end )
127        intlist.append( (start, end) )
128    starttime = time.clock()
129    for x in range(5000):
130        start = random.randint(0, 10000000)
131        end = start + random.randint(1, 1000)
132        result = []
133        test.intersect( start, end, lambda x: result.append(x.linenum) )
134    print "%f for tree method" % (time.clock() - starttime)
135    starttime = time.clock()
136    for x in range(5000):
137        start = random.randint(0, 10000000)
138        end = start + random.randint(1, 1000)
139        bad_sect( intlist, start, end)
140    print "%f for linear (bad) method" % (time.clock() - starttime)
141
142def test_func( node ):
143    print "[%d, %d), %d" % (node.start, node.end, node.maxend)
144
145def bad_sect( lst, int_start, int_end ):
146    intersection = []
147    for start, end in lst:
148        if int_start < end and int_end > start:
149            intersection.append( (start, end) )
150    return intersection
151
152if __name__ == "__main__":
153    main()
154
155
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。