| 1 | """ |
|---|
| 2 | Support for scores in the `wiggle`_ file format used by the UCSC Genome |
|---|
| 3 | Browser. |
|---|
| 4 | |
|---|
| 5 | The positions in the wiggle format are 1-relative, however, |
|---|
| 6 | the positions returned match the BED/interval format which is zero-based, half-open. |
|---|
| 7 | |
|---|
| 8 | .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html |
|---|
| 9 | """ |
|---|
| 10 | |
|---|
| 11 | def parse_header( line ): |
|---|
| 12 | return dict( [ field.split( '=' ) for field in line.split()[1:] ] ) |
|---|
| 13 | |
|---|
| 14 | def IntervalReader( f ): |
|---|
| 15 | """ |
|---|
| 16 | Iterator yielding chrom, start, end, strand, value. |
|---|
| 17 | Values are zero-based, half-open. |
|---|
| 18 | Regions which lack a score are ignored. |
|---|
| 19 | """ |
|---|
| 20 | current_chrom = None |
|---|
| 21 | current_pos = None |
|---|
| 22 | current_step = None |
|---|
| 23 | |
|---|
| 24 | # always for wiggle data |
|---|
| 25 | strand = '+' |
|---|
| 26 | |
|---|
| 27 | mode = "bed" |
|---|
| 28 | |
|---|
| 29 | for line in f: |
|---|
| 30 | if line.isspace() or line.startswith( "track" ) or line.startswith( "#" ) or line.startswith( "browser" ): |
|---|
| 31 | continue |
|---|
| 32 | elif line.startswith( "variableStep" ): |
|---|
| 33 | header = parse_header( line ) |
|---|
| 34 | current_chrom = header['chrom'] |
|---|
| 35 | current_pos = None |
|---|
| 36 | current_step = None |
|---|
| 37 | if 'span' in header: current_span = int( header['span'] ) |
|---|
| 38 | else: current_span = 1 |
|---|
| 39 | mode = "variableStep" |
|---|
| 40 | elif line.startswith( "fixedStep" ): |
|---|
| 41 | header = parse_header( line ) |
|---|
| 42 | current_chrom = header['chrom'] |
|---|
| 43 | current_pos = int( header['start'] ) - 1 |
|---|
| 44 | current_step = int( header['step'] ) |
|---|
| 45 | if 'span' in header: current_span = int( header['span'] ) |
|---|
| 46 | else: current_span = 1 |
|---|
| 47 | mode = "fixedStep" |
|---|
| 48 | elif mode == "bed": |
|---|
| 49 | fields = line.split() |
|---|
| 50 | if len( fields ) > 3: |
|---|
| 51 | if len( fields ) > 5: |
|---|
| 52 | yield fields[0], int( fields[1] ), int( fields[2] ), fields[5], float( fields[3] ) |
|---|
| 53 | else: |
|---|
| 54 | yield fields[0], int( fields[1] ), int( fields[2] ), strand, float( fields[3] ) |
|---|
| 55 | elif mode == "variableStep": |
|---|
| 56 | fields = line.split() |
|---|
| 57 | pos = int( fields[0] ) - 1 |
|---|
| 58 | yield current_chrom, pos, pos + current_span, strand, float( fields[1] ) |
|---|
| 59 | elif mode == "fixedStep": |
|---|
| 60 | yield current_chrom, current_pos, current_pos + current_span, strand, float( line.split()[0] ) |
|---|
| 61 | current_pos += current_step |
|---|
| 62 | else: |
|---|
| 63 | raise "Unexpected input line: %s" % line.strip() |
|---|
| 64 | |
|---|
| 65 | |
|---|
| 66 | class Reader( object ): |
|---|
| 67 | """ |
|---|
| 68 | Iterator yielding chrom, position, value. |
|---|
| 69 | Values are zero-based. |
|---|
| 70 | Regions which lack a score are ignored. |
|---|
| 71 | """ |
|---|
| 72 | def __init__( self, f ): |
|---|
| 73 | self.file = f |
|---|
| 74 | |
|---|
| 75 | def __iter__( self ): |
|---|
| 76 | for chrom, start, end, strand, val in IntervalReader( self.file ): |
|---|
| 77 | for pos in xrange( start, end ): |
|---|
| 78 | yield chrom, pos, val |
|---|