1 | """ |
---|
2 | Support for scores in the `wiggle`_ file format used by the UCSC Genome |
---|
3 | Browser. |
---|
4 | |
---|
5 | The positions in the wiggle format are 1-relative, however, |
---|
6 | the positions returned match the BED/interval format which is zero-based, half-open. |
---|
7 | |
---|
8 | .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html |
---|
9 | """ |
---|
10 | |
---|
11 | def parse_header( line ): |
---|
12 | return dict( [ field.split( '=' ) for field in line.split()[1:] ] ) |
---|
13 | |
---|
14 | def IntervalReader( f ): |
---|
15 | """ |
---|
16 | Iterator yielding chrom, start, end, strand, value. |
---|
17 | Values are zero-based, half-open. |
---|
18 | Regions which lack a score are ignored. |
---|
19 | """ |
---|
20 | current_chrom = None |
---|
21 | current_pos = None |
---|
22 | current_step = None |
---|
23 | |
---|
24 | # always for wiggle data |
---|
25 | strand = '+' |
---|
26 | |
---|
27 | mode = "bed" |
---|
28 | |
---|
29 | for line in f: |
---|
30 | if line.isspace() or line.startswith( "track" ) or line.startswith( "#" ) or line.startswith( "browser" ): |
---|
31 | continue |
---|
32 | elif line.startswith( "variableStep" ): |
---|
33 | header = parse_header( line ) |
---|
34 | current_chrom = header['chrom'] |
---|
35 | current_pos = None |
---|
36 | current_step = None |
---|
37 | if 'span' in header: current_span = int( header['span'] ) |
---|
38 | else: current_span = 1 |
---|
39 | mode = "variableStep" |
---|
40 | elif line.startswith( "fixedStep" ): |
---|
41 | header = parse_header( line ) |
---|
42 | current_chrom = header['chrom'] |
---|
43 | current_pos = int( header['start'] ) - 1 |
---|
44 | current_step = int( header['step'] ) |
---|
45 | if 'span' in header: current_span = int( header['span'] ) |
---|
46 | else: current_span = 1 |
---|
47 | mode = "fixedStep" |
---|
48 | elif mode == "bed": |
---|
49 | fields = line.split() |
---|
50 | if len( fields ) > 3: |
---|
51 | if len( fields ) > 5: |
---|
52 | yield fields[0], int( fields[1] ), int( fields[2] ), fields[5], float( fields[3] ) |
---|
53 | else: |
---|
54 | yield fields[0], int( fields[1] ), int( fields[2] ), strand, float( fields[3] ) |
---|
55 | elif mode == "variableStep": |
---|
56 | fields = line.split() |
---|
57 | pos = int( fields[0] ) - 1 |
---|
58 | yield current_chrom, pos, pos + current_span, strand, float( fields[1] ) |
---|
59 | elif mode == "fixedStep": |
---|
60 | yield current_chrom, current_pos, current_pos + current_span, strand, float( line.split()[0] ) |
---|
61 | current_pos += current_step |
---|
62 | else: |
---|
63 | raise "Unexpected input line: %s" % line.strip() |
---|
64 | |
---|
65 | |
---|
66 | class Reader( object ): |
---|
67 | """ |
---|
68 | Iterator yielding chrom, position, value. |
---|
69 | Values are zero-based. |
---|
70 | Regions which lack a score are ignored. |
---|
71 | """ |
---|
72 | def __init__( self, f ): |
---|
73 | self.file = f |
---|
74 | |
---|
75 | def __iter__( self ): |
---|
76 | for chrom, start, end, strand, val in IntervalReader( self.file ): |
---|
77 | for pos in xrange( start, end ): |
---|
78 | yield chrom, pos, val |
---|