1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Extract ranges of scores from a sorted file in which each line contains a |
---|
5 | position followed by a score. |
---|
6 | |
---|
7 | TODO: The finder class might actually be useful, it strides through a file |
---|
8 | and builds an index based on the first line. Maybe move it into the |
---|
9 | library and get rid of this very specific script? |
---|
10 | |
---|
11 | usage: %prog start_pos stop_pos |
---|
12 | """ |
---|
13 | |
---|
14 | import sys |
---|
15 | |
---|
16 | max_cats = 1000 |
---|
17 | |
---|
18 | class Finder: |
---|
19 | def __init__( self, file, segments ): |
---|
20 | self.file = file |
---|
21 | self.segments = segments |
---|
22 | self.make_index() |
---|
23 | def make_index( self ): |
---|
24 | self.values = [] |
---|
25 | self.positions = [] |
---|
26 | |
---|
27 | file.seek( 0, 2 ) |
---|
28 | end = file.tell() |
---|
29 | |
---|
30 | step = end / ( self.segments - 1 ) |
---|
31 | |
---|
32 | for i in range( 0, self.segments - 1 ): |
---|
33 | file.seek( i * step, 0 ) |
---|
34 | file.readline() |
---|
35 | position = file.tell() |
---|
36 | fields = file.readline().split() |
---|
37 | self.values.append( int( fields[ 0 ] ) ) |
---|
38 | self.positions.append( position ) |
---|
39 | |
---|
40 | def scores_in_range( self, start, end ): |
---|
41 | position = self.positions[ -1 ] |
---|
42 | for i in range( 1, len( self.values ) ): |
---|
43 | if self.values[ i ] > start: |
---|
44 | position = self.positions[ i - 1 ] |
---|
45 | break |
---|
46 | self.file.seek( position, 0 ) |
---|
47 | result = [] |
---|
48 | while 1: |
---|
49 | line = file.readline() |
---|
50 | if line == "": break |
---|
51 | fields = line.split() |
---|
52 | |
---|
53 | pos = int( fields[ 0 ] ) |
---|
54 | |
---|
55 | if pos < start: continue |
---|
56 | if pos > end: break |
---|
57 | |
---|
58 | result.append( ( pos, fields[1] ) ) |
---|
59 | |
---|
60 | return result |
---|
61 | |
---|
62 | file = open( sys.argv[ 1 ] ) |
---|
63 | |
---|
64 | finder = Finder( file, 100 ) |
---|
65 | |
---|
66 | scores = finder.scores_in_range( int( sys.argv[2] ), int( sys.argv[3] ) ) |
---|
67 | |
---|
68 | rng = scores[-1][0] - scores[0][0] |
---|
69 | |
---|
70 | if rng > max_cats: |
---|
71 | stride = rng // max_cats |
---|
72 | else: |
---|
73 | stride = 1 |
---|
74 | |
---|
75 | for score in scores: |
---|
76 | if score[0] % stride == 0: |
---|
77 | print score[0], score[1] |
---|