| 1 | # lrucache.py -- a simple LRU (Least-Recently-Used) cache class |
|---|
| 2 | |
|---|
| 3 | # Copyright 2004 Evan Prodromou <evan@bad.dynu.ca> |
|---|
| 4 | # Licensed under the Academic Free License 2.1 |
|---|
| 5 | |
|---|
| 6 | # arch-tag: LRU cache main module |
|---|
| 7 | |
|---|
| 8 | """a simple LRU (Least-Recently-Used) cache module |
|---|
| 9 | |
|---|
| 10 | This module provides very simple LRU (Least-Recently-Used) cache |
|---|
| 11 | functionality. |
|---|
| 12 | |
|---|
| 13 | An *in-memory cache* is useful for storing the results of an |
|---|
| 14 | 'expensive' process (one that takes a lot of time or resources) for |
|---|
| 15 | later re-use. Typical examples are accessing data from the filesystem, |
|---|
| 16 | a database, or a network location. If you know you'll need to re-read |
|---|
| 17 | the data again, it can help to keep it in a cache. |
|---|
| 18 | |
|---|
| 19 | You *can* use a Python dictionary as a cache for some purposes. |
|---|
| 20 | However, if the results you're caching are large, or you have a lot of |
|---|
| 21 | possible results, this can be impractical memory-wise. |
|---|
| 22 | |
|---|
| 23 | An *LRU cache*, on the other hand, only keeps _some_ of the results in |
|---|
| 24 | memory, which keeps you from overusing resources. The cache is bounded |
|---|
| 25 | by a maximum size; if you try to add more values to the cache, it will |
|---|
| 26 | automatically discard the values that you haven't read or written to |
|---|
| 27 | in the longest time. In other words, the least-recently-used items are |
|---|
| 28 | discarded. [1]_ |
|---|
| 29 | |
|---|
| 30 | .. [1]: 'Discarded' here means 'removed from the cache'. |
|---|
| 31 | |
|---|
| 32 | """ |
|---|
| 33 | |
|---|
| 34 | from __future__ import generators |
|---|
| 35 | import time |
|---|
| 36 | from heapq import heappush, heappop, heapify |
|---|
| 37 | |
|---|
| 38 | __version__ = "0.2" |
|---|
| 39 | __all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE'] |
|---|
| 40 | __docformat__ = 'reStructuredText en' |
|---|
| 41 | |
|---|
| 42 | DEFAULT_SIZE = 16 |
|---|
| 43 | """Default size of a new LRUCache object, if no 'size' argument is given.""" |
|---|
| 44 | |
|---|
| 45 | class CacheKeyError(KeyError): |
|---|
| 46 | """Error raised when cache requests fail |
|---|
| 47 | |
|---|
| 48 | When a cache record is accessed which no longer exists (or never did), |
|---|
| 49 | this error is raised. To avoid it, you may want to check for the existence |
|---|
| 50 | of a cache record before reading or deleting it.""" |
|---|
| 51 | pass |
|---|
| 52 | |
|---|
| 53 | class LRUCache(object): |
|---|
| 54 | """Least-Recently-Used (LRU) cache. |
|---|
| 55 | |
|---|
| 56 | Instances of this class provide a least-recently-used (LRU) cache. They |
|---|
| 57 | emulate a Python mapping type. You can use an LRU cache more or less like |
|---|
| 58 | a Python dictionary, with the exception that objects you put into the |
|---|
| 59 | cache may be discarded before you take them out. |
|---|
| 60 | |
|---|
| 61 | Some example usage:: |
|---|
| 62 | |
|---|
| 63 | cache = LRUCache(32) # new cache |
|---|
| 64 | cache['foo'] = get_file_contents('foo') # or whatever |
|---|
| 65 | |
|---|
| 66 | if 'foo' in cache: # if it's still in cache... |
|---|
| 67 | # use cached version |
|---|
| 68 | contents = cache['foo'] |
|---|
| 69 | else: |
|---|
| 70 | # recalculate |
|---|
| 71 | contents = get_file_contents('foo') |
|---|
| 72 | # store in cache for next time |
|---|
| 73 | cache['foo'] = contents |
|---|
| 74 | |
|---|
| 75 | print cache.size # Maximum size |
|---|
| 76 | |
|---|
| 77 | print len(cache) # 0 <= len(cache) <= cache.size |
|---|
| 78 | |
|---|
| 79 | cache.size = 10 # Auto-shrink on size assignment |
|---|
| 80 | |
|---|
| 81 | for i in range(50): # note: larger than cache size |
|---|
| 82 | cache[i] = i |
|---|
| 83 | |
|---|
| 84 | if 0 not in cache: print 'Zero was discarded.' |
|---|
| 85 | |
|---|
| 86 | if 42 in cache: |
|---|
| 87 | del cache[42] # Manual deletion |
|---|
| 88 | |
|---|
| 89 | for j in cache: # iterate (in LRU order) |
|---|
| 90 | print j, cache[j] # iterator produces keys, not values |
|---|
| 91 | """ |
|---|
| 92 | |
|---|
| 93 | class __Node(object): |
|---|
| 94 | """Record of a cached value. Not for public consumption.""" |
|---|
| 95 | |
|---|
| 96 | def __init__(self, key, obj, timestamp): |
|---|
| 97 | object.__init__(self) |
|---|
| 98 | self.key = key |
|---|
| 99 | self.obj = obj |
|---|
| 100 | self.atime = timestamp |
|---|
| 101 | self.mtime = self.atime |
|---|
| 102 | |
|---|
| 103 | def __cmp__(self, other): |
|---|
| 104 | return cmp(self.atime, other.atime) |
|---|
| 105 | |
|---|
| 106 | def __repr__(self): |
|---|
| 107 | return "<%s %s => %s (%s)>" % \ |
|---|
| 108 | (self.__class__, self.key, self.obj, \ |
|---|
| 109 | time.asctime(time.localtime(self.atime))) |
|---|
| 110 | |
|---|
| 111 | def __init__(self, size=DEFAULT_SIZE): |
|---|
| 112 | # Check arguments |
|---|
| 113 | if size <= 0: |
|---|
| 114 | raise ValueError, size |
|---|
| 115 | elif type(size) is not type(0): |
|---|
| 116 | raise TypeError, size |
|---|
| 117 | object.__init__(self) |
|---|
| 118 | self.__heap = [] |
|---|
| 119 | self.__dict = {} |
|---|
| 120 | self.size = size |
|---|
| 121 | """Maximum size of the cache. |
|---|
| 122 | If more than 'size' elements are added to the cache, |
|---|
| 123 | the least-recently-used ones will be discarded.""" |
|---|
| 124 | |
|---|
| 125 | def __len__(self): |
|---|
| 126 | return len(self.__heap) |
|---|
| 127 | |
|---|
| 128 | def __contains__(self, key): |
|---|
| 129 | return self.__dict.has_key(key) |
|---|
| 130 | |
|---|
| 131 | def __setitem__(self, key, obj): |
|---|
| 132 | if self.__dict.has_key(key): |
|---|
| 133 | node = self.__dict[key] |
|---|
| 134 | node.obj = obj |
|---|
| 135 | node.atime = time.time() |
|---|
| 136 | node.mtime = node.atime |
|---|
| 137 | heapify(self.__heap) |
|---|
| 138 | else: |
|---|
| 139 | # size may have been reset, so we loop |
|---|
| 140 | while len(self.__heap) >= self.size: |
|---|
| 141 | lru = heappop(self.__heap) |
|---|
| 142 | del self.__dict[lru.key] |
|---|
| 143 | node = self.__Node(key, obj, time.time()) |
|---|
| 144 | self.__dict[key] = node |
|---|
| 145 | heappush(self.__heap, node) |
|---|
| 146 | |
|---|
| 147 | def __getitem__(self, key): |
|---|
| 148 | if not self.__dict.has_key(key): |
|---|
| 149 | raise CacheKeyError(key) |
|---|
| 150 | else: |
|---|
| 151 | node = self.__dict[key] |
|---|
| 152 | node.atime = time.time() |
|---|
| 153 | heapify(self.__heap) |
|---|
| 154 | return node.obj |
|---|
| 155 | |
|---|
| 156 | def __delitem__(self, key): |
|---|
| 157 | if not self.__dict.has_key(key): |
|---|
| 158 | raise CacheKeyError(key) |
|---|
| 159 | else: |
|---|
| 160 | node = self.__dict[key] |
|---|
| 161 | del self.__dict[key] |
|---|
| 162 | self.__heap.remove(node) |
|---|
| 163 | heapify(self.__heap) |
|---|
| 164 | return node.obj |
|---|
| 165 | |
|---|
| 166 | def __iter__(self): |
|---|
| 167 | copy = self.__heap[:] |
|---|
| 168 | while len(copy) > 0: |
|---|
| 169 | node = heappop(copy) |
|---|
| 170 | yield node.key |
|---|
| 171 | raise StopIteration |
|---|
| 172 | |
|---|
| 173 | def __setattr__(self, name, value): |
|---|
| 174 | object.__setattr__(self, name, value) |
|---|
| 175 | # automagically shrink heap on resize |
|---|
| 176 | if name == 'size': |
|---|
| 177 | while len(self.__heap) > value: |
|---|
| 178 | lru = heappop(self.__heap) |
|---|
| 179 | del self.__dict[lru.key] |
|---|
| 180 | |
|---|
| 181 | def __repr__(self): |
|---|
| 182 | return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap)) |
|---|
| 183 | |
|---|
| 184 | def mtime(self, key): |
|---|
| 185 | """Return the last modification time for the cache record with key. |
|---|
| 186 | May be useful for cache instances where the stored values can get |
|---|
| 187 | 'stale', such as caching file or network resource contents.""" |
|---|
| 188 | if not self.__dict.has_key(key): |
|---|
| 189 | raise CacheKeyError(key) |
|---|
| 190 | else: |
|---|
| 191 | node = self.__dict[key] |
|---|
| 192 | return node.mtime |
|---|
| 193 | |
|---|
| 194 | if __name__ == "__main__": |
|---|
| 195 | cache = LRUCache(25) |
|---|
| 196 | print cache |
|---|
| 197 | for i in range(50): |
|---|
| 198 | cache[i] = str(i) |
|---|
| 199 | print cache |
|---|
| 200 | if 46 in cache: |
|---|
| 201 | del cache[46] |
|---|
| 202 | print cache |
|---|
| 203 | cache.size = 10 |
|---|
| 204 | print cache |
|---|
| 205 | cache[46] = '46' |
|---|
| 206 | print cache |
|---|
| 207 | print len(cache) |
|---|
| 208 | for c in cache: |
|---|
| 209 | print c |
|---|
| 210 | print cache |
|---|
| 211 | print cache.mtime(46) |
|---|
| 212 | for c in cache: |
|---|
| 213 | print c |
|---|