| 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
|---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
|---|
| 3 | # (c) 2005 Ian Bicking, Clark C. Evans and contributors |
|---|
| 4 | # This module is part of the Python Paste Project and is released under |
|---|
| 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
|---|
| 6 | """ |
|---|
| 7 | This module handles sending static content such as in-memory data or |
|---|
| 8 | files. At this time it has cache helpers and understands the |
|---|
| 9 | if-modified-since request header. |
|---|
| 10 | """ |
|---|
| 11 | |
|---|
| 12 | import os, time, mimetypes, zipfile, tarfile |
|---|
| 13 | from paste.httpexceptions import * |
|---|
| 14 | from paste.httpheaders import * |
|---|
| 15 | |
|---|
| 16 | CACHE_SIZE = 4096 |
|---|
| 17 | BLOCK_SIZE = 4096 * 16 |
|---|
| 18 | |
|---|
| 19 | __all__ = ['DataApp', 'FileApp', 'ArchiveStore'] |
|---|
| 20 | |
|---|
| 21 | class DataApp(object): |
|---|
| 22 | """ |
|---|
| 23 | Returns an application that will send content in a single chunk, |
|---|
| 24 | this application has support for setting cache-control and for |
|---|
| 25 | responding to conditional (or HEAD) requests. |
|---|
| 26 | |
|---|
| 27 | Constructor Arguments: |
|---|
| 28 | |
|---|
| 29 | ``content`` the content being sent to the client |
|---|
| 30 | |
|---|
| 31 | ``headers`` the headers to send /w the response |
|---|
| 32 | |
|---|
| 33 | The remaining ``kwargs`` correspond to headers, where the |
|---|
| 34 | underscore is replaced with a dash. These values are only |
|---|
| 35 | added to the headers if they are not already provided; thus, |
|---|
| 36 | they can be used for default values. Examples include, but |
|---|
| 37 | are not limited to: |
|---|
| 38 | |
|---|
| 39 | ``content_type`` |
|---|
| 40 | ``content_encoding`` |
|---|
| 41 | ``content_location`` |
|---|
| 42 | |
|---|
| 43 | ``cache_control()`` |
|---|
| 44 | |
|---|
| 45 | This method provides validated construction of the ``Cache-Control`` |
|---|
| 46 | header as well as providing for automated filling out of the |
|---|
| 47 | ``EXPIRES`` header for HTTP/1.0 clients. |
|---|
| 48 | |
|---|
| 49 | ``set_content()`` |
|---|
| 50 | |
|---|
| 51 | This method provides a mechanism to set the content after the |
|---|
| 52 | application has been constructed. This method does things |
|---|
| 53 | like changing ``Last-Modified`` and ``Content-Length`` headers. |
|---|
| 54 | |
|---|
| 55 | """ |
|---|
| 56 | |
|---|
| 57 | allowed_methods = ('GET', 'HEAD') |
|---|
| 58 | |
|---|
| 59 | def __init__(self, content, headers=None, allowed_methods=None, |
|---|
| 60 | **kwargs): |
|---|
| 61 | assert isinstance(headers, (type(None), list)) |
|---|
| 62 | self.expires = None |
|---|
| 63 | self.content = None |
|---|
| 64 | self.content_length = None |
|---|
| 65 | self.last_modified = 0 |
|---|
| 66 | if allowed_methods is not None: |
|---|
| 67 | self.allowed_methods = allowed_methods |
|---|
| 68 | self.headers = headers or [] |
|---|
| 69 | for (k, v) in kwargs.items(): |
|---|
| 70 | header = get_header(k) |
|---|
| 71 | header.update(self.headers, v) |
|---|
| 72 | ACCEPT_RANGES.update(self.headers, bytes=True) |
|---|
| 73 | if not CONTENT_TYPE(self.headers): |
|---|
| 74 | CONTENT_TYPE.update(self.headers) |
|---|
| 75 | if content is not None: |
|---|
| 76 | self.set_content(content) |
|---|
| 77 | |
|---|
| 78 | def cache_control(self, **kwargs): |
|---|
| 79 | self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
|---|
| 80 | return self |
|---|
| 81 | |
|---|
| 82 | def set_content(self, content, last_modified=None): |
|---|
| 83 | assert content is not None |
|---|
| 84 | if last_modified is None: |
|---|
| 85 | self.last_modified = time.time() |
|---|
| 86 | else: |
|---|
| 87 | self.last_modified = last_modified |
|---|
| 88 | self.content = content |
|---|
| 89 | self.content_length = len(content) |
|---|
| 90 | LAST_MODIFIED.update(self.headers, time=self.last_modified) |
|---|
| 91 | return self |
|---|
| 92 | |
|---|
| 93 | def content_disposition(self, **kwargs): |
|---|
| 94 | CONTENT_DISPOSITION.apply(self.headers, **kwargs) |
|---|
| 95 | return self |
|---|
| 96 | |
|---|
| 97 | def __call__(self, environ, start_response): |
|---|
| 98 | method = environ['REQUEST_METHOD'].upper() |
|---|
| 99 | if method not in self.allowed_methods: |
|---|
| 100 | exc = HTTPMethodNotAllowed( |
|---|
| 101 | 'You cannot %s a file' % method, |
|---|
| 102 | headers=[('Allow', ','.join(self.allowed_methods))]) |
|---|
| 103 | return exc(environ, start_response) |
|---|
| 104 | return self.get(environ, start_response) |
|---|
| 105 | |
|---|
| 106 | def calculate_etag(self): |
|---|
| 107 | return str(self.last_modified) + '-' + str(self.content_length) |
|---|
| 108 | |
|---|
| 109 | def get(self, environ, start_response): |
|---|
| 110 | headers = self.headers[:] |
|---|
| 111 | current_etag = self.calculate_etag() |
|---|
| 112 | ETAG.update(headers, current_etag) |
|---|
| 113 | if self.expires is not None: |
|---|
| 114 | EXPIRES.update(headers, delta=self.expires) |
|---|
| 115 | |
|---|
| 116 | try: |
|---|
| 117 | client_etags = IF_NONE_MATCH.parse(environ) |
|---|
| 118 | if client_etags: |
|---|
| 119 | for etag in client_etags: |
|---|
| 120 | if etag == current_etag or etag == '*': |
|---|
| 121 | # horribly inefficient, n^2 performance, yuck! |
|---|
| 122 | for head in list_headers(entity=True): |
|---|
| 123 | head.delete(headers) |
|---|
| 124 | start_response('304 Not Modified', headers) |
|---|
| 125 | return [''] |
|---|
| 126 | except HTTPBadRequest, exce: |
|---|
| 127 | return exce.wsgi_application(environ, start_response) |
|---|
| 128 | |
|---|
| 129 | # If we get If-None-Match and If-Modified-Since, and |
|---|
| 130 | # If-None-Match doesn't match, then we should not try to |
|---|
| 131 | # figure out If-Modified-Since (which has 1-second granularity |
|---|
| 132 | # and just isn't as accurate) |
|---|
| 133 | if not client_etags: |
|---|
| 134 | try: |
|---|
| 135 | client_clock = IF_MODIFIED_SINCE.parse(environ) |
|---|
| 136 | if client_clock >= int(self.last_modified): |
|---|
| 137 | # horribly inefficient, n^2 performance, yuck! |
|---|
| 138 | for head in list_headers(entity=True): |
|---|
| 139 | head.delete(headers) |
|---|
| 140 | start_response('304 Not Modified', headers) |
|---|
| 141 | return [''] # empty body |
|---|
| 142 | except HTTPBadRequest, exce: |
|---|
| 143 | return exce.wsgi_application(environ, start_response) |
|---|
| 144 | |
|---|
| 145 | (lower, upper) = (0, self.content_length - 1) |
|---|
| 146 | range = RANGE.parse(environ) |
|---|
| 147 | if range and 'bytes' == range[0] and 1 == len(range[1]): |
|---|
| 148 | (lower, upper) = range[1][0] |
|---|
| 149 | upper = upper or (self.content_length - 1) |
|---|
| 150 | if upper >= self.content_length or lower > upper: |
|---|
| 151 | return HTTPRequestRangeNotSatisfiable(( |
|---|
| 152 | "Range request was made beyond the end of the content,\r\n" |
|---|
| 153 | "which is %s long.\r\n Range: %s\r\n") % ( |
|---|
| 154 | self.content_length, RANGE(environ)) |
|---|
| 155 | ).wsgi_application(environ, start_response) |
|---|
| 156 | |
|---|
| 157 | content_length = upper - lower + 1 |
|---|
| 158 | CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper, |
|---|
| 159 | total_length = self.content_length) |
|---|
| 160 | CONTENT_LENGTH.update(headers, content_length) |
|---|
| 161 | if content_length == self.content_length: |
|---|
| 162 | start_response('200 OK', headers) |
|---|
| 163 | else: |
|---|
| 164 | start_response('206 Partial Content', headers) |
|---|
| 165 | if self.content is not None: |
|---|
| 166 | return [self.content[lower:upper+1]] |
|---|
| 167 | return (lower, content_length) |
|---|
| 168 | |
|---|
| 169 | class FileApp(DataApp): |
|---|
| 170 | """ |
|---|
| 171 | Returns an application that will send the file at the given |
|---|
| 172 | filename. Adds a mime type based on ``mimetypes.guess_type()``. |
|---|
| 173 | See DataApp for the arguments beyond ``filename``. |
|---|
| 174 | """ |
|---|
| 175 | |
|---|
| 176 | def __init__(self, filename, headers=None, **kwargs): |
|---|
| 177 | self.filename = filename |
|---|
| 178 | content_type, content_encoding = self.guess_type() |
|---|
| 179 | if content_type and 'content_type' not in kwargs: |
|---|
| 180 | kwargs['content_type'] = content_type |
|---|
| 181 | if content_encoding and 'content_encoding' not in kwargs: |
|---|
| 182 | kwargs['content_encoding'] = content_encoding |
|---|
| 183 | DataApp.__init__(self, None, headers, **kwargs) |
|---|
| 184 | |
|---|
| 185 | def guess_type(self): |
|---|
| 186 | return mimetypes.guess_type(self.filename) |
|---|
| 187 | |
|---|
| 188 | def update(self, force=False): |
|---|
| 189 | stat = os.stat(self.filename) |
|---|
| 190 | if not force and stat.st_mtime == self.last_modified: |
|---|
| 191 | return |
|---|
| 192 | self.last_modified = stat.st_mtime |
|---|
| 193 | if stat.st_size < CACHE_SIZE: |
|---|
| 194 | fh = open(self.filename,"rb") |
|---|
| 195 | self.set_content(fh.read(), stat.st_mtime) |
|---|
| 196 | fh.close() |
|---|
| 197 | else: |
|---|
| 198 | self.content = None |
|---|
| 199 | self.content_length = stat.st_size |
|---|
| 200 | # This is updated automatically if self.set_content() is |
|---|
| 201 | # called |
|---|
| 202 | LAST_MODIFIED.update(self.headers, time=self.last_modified) |
|---|
| 203 | |
|---|
| 204 | def get(self, environ, start_response): |
|---|
| 205 | is_head = environ['REQUEST_METHOD'].upper() == 'HEAD' |
|---|
| 206 | if 'max-age=0' in CACHE_CONTROL(environ).lower(): |
|---|
| 207 | self.update(force=True) # RFC 2616 13.2.6 |
|---|
| 208 | else: |
|---|
| 209 | self.update() |
|---|
| 210 | if not self.content: |
|---|
| 211 | if not os.path.exists(self.filename): |
|---|
| 212 | exc = HTTPNotFound( |
|---|
| 213 | 'The resource does not exist', |
|---|
| 214 | comment="No file at %r" % self.filename) |
|---|
| 215 | return exc(environ, start_response) |
|---|
| 216 | try: |
|---|
| 217 | file = open(self.filename, 'rb') |
|---|
| 218 | except (IOError, OSError), e: |
|---|
| 219 | exc = HTTPForbidden( |
|---|
| 220 | 'You are not permitted to view this file (%s)' % e) |
|---|
| 221 | return exc.wsgi_application( |
|---|
| 222 | environ, start_response) |
|---|
| 223 | retval = DataApp.get(self, environ, start_response) |
|---|
| 224 | if isinstance(retval, list): |
|---|
| 225 | # cached content, exception, or not-modified |
|---|
| 226 | if is_head: |
|---|
| 227 | return [''] |
|---|
| 228 | return retval |
|---|
| 229 | (lower, content_length) = retval |
|---|
| 230 | if is_head: |
|---|
| 231 | return [''] |
|---|
| 232 | file.seek(lower) |
|---|
| 233 | return _FileIter(file, size=content_length) |
|---|
| 234 | |
|---|
| 235 | class _FileIter(object): |
|---|
| 236 | |
|---|
| 237 | def __init__(self, file, block_size=None, size=None): |
|---|
| 238 | self.file = file |
|---|
| 239 | self.size = size |
|---|
| 240 | self.block_size = block_size or BLOCK_SIZE |
|---|
| 241 | |
|---|
| 242 | def __iter__(self): |
|---|
| 243 | return self |
|---|
| 244 | |
|---|
| 245 | def next(self): |
|---|
| 246 | chunk_size = self.block_size |
|---|
| 247 | if self.size is not None: |
|---|
| 248 | if chunk_size > self.size: |
|---|
| 249 | chunk_size = self.size |
|---|
| 250 | self.size -= chunk_size |
|---|
| 251 | data = self.file.read(chunk_size) |
|---|
| 252 | if not data: |
|---|
| 253 | raise StopIteration |
|---|
| 254 | return data |
|---|
| 255 | |
|---|
| 256 | def close(self): |
|---|
| 257 | self.file.close() |
|---|
| 258 | |
|---|
| 259 | class ArchiveStore(object): |
|---|
| 260 | """ |
|---|
| 261 | Returns an application that serves up a DataApp for items requested |
|---|
| 262 | in a given zip or tar archive. |
|---|
| 263 | |
|---|
| 264 | Constructor Arguments: |
|---|
| 265 | |
|---|
| 266 | ``filepath`` the path to the archive being served |
|---|
| 267 | |
|---|
| 268 | ``cache_control()`` |
|---|
| 269 | |
|---|
| 270 | This method provides validated construction of the ``Cache-Control`` |
|---|
| 271 | header as well as providing for automated filling out of the |
|---|
| 272 | ``EXPIRES`` header for HTTP/1.0 clients. |
|---|
| 273 | """ |
|---|
| 274 | |
|---|
| 275 | def __init__(self, filepath): |
|---|
| 276 | if zipfile.is_zipfile(filepath): |
|---|
| 277 | self.archive = zipfile.ZipFile(filepath,"r") |
|---|
| 278 | elif tarfile.is_tarfile(filepath): |
|---|
| 279 | self.archive = tarfile.TarFileCompat(filepath,"r") |
|---|
| 280 | else: |
|---|
| 281 | raise AssertionError("filepath '%s' is not a zip or tar " % filepath) |
|---|
| 282 | self.expires = None |
|---|
| 283 | self.last_modified = time.time() |
|---|
| 284 | self.cache = {} |
|---|
| 285 | |
|---|
| 286 | def cache_control(self, **kwargs): |
|---|
| 287 | self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
|---|
| 288 | return self |
|---|
| 289 | |
|---|
| 290 | def __call__(self, environ, start_response): |
|---|
| 291 | path = environ.get("PATH_INFO","") |
|---|
| 292 | if path.startswith("/"): |
|---|
| 293 | path = path[1:] |
|---|
| 294 | application = self.cache.get(path) |
|---|
| 295 | if application: |
|---|
| 296 | return application(environ, start_response) |
|---|
| 297 | try: |
|---|
| 298 | info = self.archive.getinfo(path) |
|---|
| 299 | except KeyError: |
|---|
| 300 | exc = HTTPNotFound("The file requested, '%s', was not found." % path) |
|---|
| 301 | return exc.wsgi_application(environ, start_response) |
|---|
| 302 | if info.filename.endswith("/"): |
|---|
| 303 | exc = HTTPNotFound("Path requested, '%s', is not a file." % path) |
|---|
| 304 | return exc.wsgi_application(environ, start_response) |
|---|
| 305 | content_type, content_encoding = mimetypes.guess_type(info.filename) |
|---|
| 306 | app = DataApp(None, content_type = content_type, |
|---|
| 307 | content_encoding = content_encoding) |
|---|
| 308 | app.set_content(self.archive.read(path), |
|---|
| 309 | time.mktime(info.date_time + (0,0,0))) |
|---|
| 310 | self.cache[path] = app |
|---|
| 311 | app.expires = self.expires |
|---|
| 312 | return app(environ, start_response) |
|---|
| 313 | |
|---|