root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/find_in_sorted_file.py @ 3

リビジョン 3, 1.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Extract ranges of scores from a sorted file in which each line contains a
5position followed by a score.
6
7TODO: The finder class might actually be useful, it strides through a file
8      and builds an index based on the first line. Maybe move it into the
9      library and get rid of this very specific script?
10     
11usage: %prog start_pos stop_pos
12"""
13
14import sys
15
16max_cats = 1000
17
18class Finder:
19    def __init__( self, file, segments ):
20        self.file = file
21        self.segments = segments
22        self.make_index()
23    def make_index( self ):
24        self.values = []
25        self.positions = []
26       
27        file.seek( 0, 2 )
28        end = file.tell()
29
30        step = end / ( self.segments - 1 )
31
32        for i in range( 0, self.segments - 1 ):
33            file.seek( i * step, 0 )
34            file.readline()
35            position = file.tell()
36            fields = file.readline().split()
37            self.values.append( int( fields[ 0 ] ) )
38            self.positions.append( position )
39
40    def scores_in_range( self, start, end ):
41        position = self.positions[ -1 ]
42        for i in range( 1, len( self.values ) ):
43            if self.values[ i ] > start:
44                position = self.positions[ i - 1 ]
45                break
46        self.file.seek( position, 0 )
47        result = []
48        while 1:
49            line = file.readline()
50            if line == "": break
51            fields = line.split()
52
53            pos = int( fields[ 0 ] )
54
55            if pos < start: continue
56            if pos > end: break
57
58            result.append( ( pos, fields[1] ) )
59
60        return result
61
62file = open( sys.argv[ 1 ] )
63
64finder = Finder( file, 100 )
65
66scores = finder.scores_in_range( int( sys.argv[2] ), int( sys.argv[3] ) )
67
68rng = scores[-1][0] - scores[0][0]
69
70if rng > max_cats:
71    stride = rng // max_cats
72else:
73    stride = 1
74
75for score in scores:
76    if score[0] % stride == 0:
77        print score[0], score[1]
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。