| 1 | from __future__ import division |
|---|
| 2 | |
|---|
| 3 | import sys |
|---|
| 4 | from bx_extras.lrucache import LRUCache |
|---|
| 5 | from cStringIO import StringIO |
|---|
| 6 | |
|---|
| 7 | DEFAULT_CACHE_SIZE=10 |
|---|
| 8 | DEFAULT_BLOCK_SIZE=1024*1024*2 |
|---|
| 9 | |
|---|
| 10 | class FileCache( object ): |
|---|
| 11 | """ |
|---|
| 12 | Wrapper for a file that cache blocks of data in memory. |
|---|
| 13 | |
|---|
| 14 | **NOTE:** this is currently an incomplete file-like object, it only |
|---|
| 15 | supports seek, tell, and readline (plus iteration). Reading bytes is |
|---|
| 16 | currently not implemented. |
|---|
| 17 | """ |
|---|
| 18 | def __init__( self, file, size, cache_size=DEFAULT_CACHE_SIZE, |
|---|
| 19 | block_size=DEFAULT_BLOCK_SIZE ): |
|---|
| 20 | """ |
|---|
| 21 | Create a new `FileCache` wrapping the file-like object `file` that |
|---|
| 22 | has total size `size` and caching blocks of size `block_size`. |
|---|
| 23 | """ |
|---|
| 24 | self.file = file |
|---|
| 25 | self.size = size |
|---|
| 26 | self.cache_size = cache_size |
|---|
| 27 | self.block_size = block_size |
|---|
| 28 | # Setup the cache |
|---|
| 29 | self.nblocks = ( self.size // self.block_size ) + 1 |
|---|
| 30 | self.cache = LRUCache( self.cache_size ) |
|---|
| 31 | # Position in file |
|---|
| 32 | self.dirty = True |
|---|
| 33 | self.at_eof = False |
|---|
| 34 | self.file_pos = 0 |
|---|
| 35 | self.current_block_index = -1 |
|---|
| 36 | self.current_block = None |
|---|
| 37 | def fix_dirty( self ): |
|---|
| 38 | chunk, offset = self.get_block_and_offset( self.file_pos ) |
|---|
| 39 | if self.current_block_index != chunk: |
|---|
| 40 | self.current_block = StringIO( self.load_block( chunk ) ) |
|---|
| 41 | self.current_block.read( offset ) |
|---|
| 42 | self.current_block_index = chunk |
|---|
| 43 | else: |
|---|
| 44 | self.current_block.seek( offset ) |
|---|
| 45 | self.dirty = False |
|---|
| 46 | def get_block_and_offset( self, index ): |
|---|
| 47 | return int( index // self.block_size ), int( index % self.block_size ) |
|---|
| 48 | def load_block( self, index ): |
|---|
| 49 | if index in self.cache: |
|---|
| 50 | return self.cache[index] |
|---|
| 51 | else: |
|---|
| 52 | real_offset = index * self.block_size |
|---|
| 53 | self.file.seek( real_offset ) |
|---|
| 54 | block = self.file.read( self.block_size ) |
|---|
| 55 | self.cache[index] = block |
|---|
| 56 | return block |
|---|
| 57 | def seek( self, offset, whence=0 ): |
|---|
| 58 | """ |
|---|
| 59 | Move the file pointer to a particular offset. |
|---|
| 60 | """ |
|---|
| 61 | # Determine absolute target position |
|---|
| 62 | if whence == 0: |
|---|
| 63 | target_pos = offset |
|---|
| 64 | elif whence == 1: |
|---|
| 65 | target_pos = self.file_pos + offset |
|---|
| 66 | elif whence == 2: |
|---|
| 67 | target_pos = self.size - offset |
|---|
| 68 | else: |
|---|
| 69 | raise Exception( "Invalid `whence` argument: %r", whence ) |
|---|
| 70 | # Check if this is a noop |
|---|
| 71 | if target_pos == self.file_pos: |
|---|
| 72 | return |
|---|
| 73 | # Verify it is valid |
|---|
| 74 | assert 0 <= target_pos < self.size, "Attempt to seek outside file" |
|---|
| 75 | # Move the position |
|---|
| 76 | self.file_pos = target_pos |
|---|
| 77 | # Mark as dirty, the next time a read is done we need to actually |
|---|
| 78 | # move the position in the bzip2 file |
|---|
| 79 | self.dirty = True |
|---|
| 80 | def readline( self ): |
|---|
| 81 | if self.dirty: |
|---|
| 82 | self.fix_dirty() |
|---|
| 83 | if self.at_eof: |
|---|
| 84 | return "" |
|---|
| 85 | rval = [] |
|---|
| 86 | while 1: |
|---|
| 87 | line = self.current_block.readline() |
|---|
| 88 | rval.append( line ) |
|---|
| 89 | if len( line ) > 0 and line[-1] == '\n': |
|---|
| 90 | break |
|---|
| 91 | elif self.current_block_index == self.nblocks - 1: |
|---|
| 92 | self.at_eof = True |
|---|
| 93 | break |
|---|
| 94 | else: |
|---|
| 95 | self.current_block_index += 1 |
|---|
| 96 | self.current_block = StringIO( self.load_block( self.current_block_index ) ) |
|---|
| 97 | return "".join( rval ) |
|---|
| 98 | def next( self ): |
|---|
| 99 | line = self.readline() |
|---|
| 100 | if line == "": |
|---|
| 101 | raise StopIteration |
|---|
| 102 | def __iter__( self ): |
|---|
| 103 | return self |
|---|
| 104 | def close( self ): |
|---|
| 105 | self.file.close() |
|---|