1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
3 | # (c) 2005 Ian Bicking, Clark C. Evans and contributors |
---|
4 | # This module is part of the Python Paste Project and is released under |
---|
5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
---|
6 | """ |
---|
7 | This module handles sending static content such as in-memory data or |
---|
8 | files. At this time it has cache helpers and understands the |
---|
9 | if-modified-since request header. |
---|
10 | """ |
---|
11 | |
---|
12 | import os, time, mimetypes, zipfile, tarfile |
---|
13 | from paste.httpexceptions import * |
---|
14 | from paste.httpheaders import * |
---|
15 | |
---|
16 | CACHE_SIZE = 4096 |
---|
17 | BLOCK_SIZE = 4096 * 16 |
---|
18 | |
---|
19 | __all__ = ['DataApp', 'FileApp', 'ArchiveStore'] |
---|
20 | |
---|
21 | class DataApp(object): |
---|
22 | """ |
---|
23 | Returns an application that will send content in a single chunk, |
---|
24 | this application has support for setting cache-control and for |
---|
25 | responding to conditional (or HEAD) requests. |
---|
26 | |
---|
27 | Constructor Arguments: |
---|
28 | |
---|
29 | ``content`` the content being sent to the client |
---|
30 | |
---|
31 | ``headers`` the headers to send /w the response |
---|
32 | |
---|
33 | The remaining ``kwargs`` correspond to headers, where the |
---|
34 | underscore is replaced with a dash. These values are only |
---|
35 | added to the headers if they are not already provided; thus, |
---|
36 | they can be used for default values. Examples include, but |
---|
37 | are not limited to: |
---|
38 | |
---|
39 | ``content_type`` |
---|
40 | ``content_encoding`` |
---|
41 | ``content_location`` |
---|
42 | |
---|
43 | ``cache_control()`` |
---|
44 | |
---|
45 | This method provides validated construction of the ``Cache-Control`` |
---|
46 | header as well as providing for automated filling out of the |
---|
47 | ``EXPIRES`` header for HTTP/1.0 clients. |
---|
48 | |
---|
49 | ``set_content()`` |
---|
50 | |
---|
51 | This method provides a mechanism to set the content after the |
---|
52 | application has been constructed. This method does things |
---|
53 | like changing ``Last-Modified`` and ``Content-Length`` headers. |
---|
54 | |
---|
55 | """ |
---|
56 | |
---|
57 | allowed_methods = ('GET', 'HEAD') |
---|
58 | |
---|
59 | def __init__(self, content, headers=None, allowed_methods=None, |
---|
60 | **kwargs): |
---|
61 | assert isinstance(headers, (type(None), list)) |
---|
62 | self.expires = None |
---|
63 | self.content = None |
---|
64 | self.content_length = None |
---|
65 | self.last_modified = 0 |
---|
66 | if allowed_methods is not None: |
---|
67 | self.allowed_methods = allowed_methods |
---|
68 | self.headers = headers or [] |
---|
69 | for (k, v) in kwargs.items(): |
---|
70 | header = get_header(k) |
---|
71 | header.update(self.headers, v) |
---|
72 | ACCEPT_RANGES.update(self.headers, bytes=True) |
---|
73 | if not CONTENT_TYPE(self.headers): |
---|
74 | CONTENT_TYPE.update(self.headers) |
---|
75 | if content is not None: |
---|
76 | self.set_content(content) |
---|
77 | |
---|
78 | def cache_control(self, **kwargs): |
---|
79 | self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
---|
80 | return self |
---|
81 | |
---|
82 | def set_content(self, content, last_modified=None): |
---|
83 | assert content is not None |
---|
84 | if last_modified is None: |
---|
85 | self.last_modified = time.time() |
---|
86 | else: |
---|
87 | self.last_modified = last_modified |
---|
88 | self.content = content |
---|
89 | self.content_length = len(content) |
---|
90 | LAST_MODIFIED.update(self.headers, time=self.last_modified) |
---|
91 | return self |
---|
92 | |
---|
93 | def content_disposition(self, **kwargs): |
---|
94 | CONTENT_DISPOSITION.apply(self.headers, **kwargs) |
---|
95 | return self |
---|
96 | |
---|
97 | def __call__(self, environ, start_response): |
---|
98 | method = environ['REQUEST_METHOD'].upper() |
---|
99 | if method not in self.allowed_methods: |
---|
100 | exc = HTTPMethodNotAllowed( |
---|
101 | 'You cannot %s a file' % method, |
---|
102 | headers=[('Allow', ','.join(self.allowed_methods))]) |
---|
103 | return exc(environ, start_response) |
---|
104 | return self.get(environ, start_response) |
---|
105 | |
---|
106 | def calculate_etag(self): |
---|
107 | return str(self.last_modified) + '-' + str(self.content_length) |
---|
108 | |
---|
109 | def get(self, environ, start_response): |
---|
110 | headers = self.headers[:] |
---|
111 | current_etag = self.calculate_etag() |
---|
112 | ETAG.update(headers, current_etag) |
---|
113 | if self.expires is not None: |
---|
114 | EXPIRES.update(headers, delta=self.expires) |
---|
115 | |
---|
116 | try: |
---|
117 | client_etags = IF_NONE_MATCH.parse(environ) |
---|
118 | if client_etags: |
---|
119 | for etag in client_etags: |
---|
120 | if etag == current_etag or etag == '*': |
---|
121 | # horribly inefficient, n^2 performance, yuck! |
---|
122 | for head in list_headers(entity=True): |
---|
123 | head.delete(headers) |
---|
124 | start_response('304 Not Modified', headers) |
---|
125 | return [''] |
---|
126 | except HTTPBadRequest, exce: |
---|
127 | return exce.wsgi_application(environ, start_response) |
---|
128 | |
---|
129 | # If we get If-None-Match and If-Modified-Since, and |
---|
130 | # If-None-Match doesn't match, then we should not try to |
---|
131 | # figure out If-Modified-Since (which has 1-second granularity |
---|
132 | # and just isn't as accurate) |
---|
133 | if not client_etags: |
---|
134 | try: |
---|
135 | client_clock = IF_MODIFIED_SINCE.parse(environ) |
---|
136 | if client_clock >= int(self.last_modified): |
---|
137 | # horribly inefficient, n^2 performance, yuck! |
---|
138 | for head in list_headers(entity=True): |
---|
139 | head.delete(headers) |
---|
140 | start_response('304 Not Modified', headers) |
---|
141 | return [''] # empty body |
---|
142 | except HTTPBadRequest, exce: |
---|
143 | return exce.wsgi_application(environ, start_response) |
---|
144 | |
---|
145 | (lower, upper) = (0, self.content_length - 1) |
---|
146 | range = RANGE.parse(environ) |
---|
147 | if range and 'bytes' == range[0] and 1 == len(range[1]): |
---|
148 | (lower, upper) = range[1][0] |
---|
149 | upper = upper or (self.content_length - 1) |
---|
150 | if upper >= self.content_length or lower > upper: |
---|
151 | return HTTPRequestRangeNotSatisfiable(( |
---|
152 | "Range request was made beyond the end of the content,\r\n" |
---|
153 | "which is %s long.\r\n Range: %s\r\n") % ( |
---|
154 | self.content_length, RANGE(environ)) |
---|
155 | ).wsgi_application(environ, start_response) |
---|
156 | |
---|
157 | content_length = upper - lower + 1 |
---|
158 | CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper, |
---|
159 | total_length = self.content_length) |
---|
160 | CONTENT_LENGTH.update(headers, content_length) |
---|
161 | if content_length == self.content_length: |
---|
162 | start_response('200 OK', headers) |
---|
163 | else: |
---|
164 | start_response('206 Partial Content', headers) |
---|
165 | if self.content is not None: |
---|
166 | return [self.content[lower:upper+1]] |
---|
167 | return (lower, content_length) |
---|
168 | |
---|
169 | class FileApp(DataApp): |
---|
170 | """ |
---|
171 | Returns an application that will send the file at the given |
---|
172 | filename. Adds a mime type based on ``mimetypes.guess_type()``. |
---|
173 | See DataApp for the arguments beyond ``filename``. |
---|
174 | """ |
---|
175 | |
---|
176 | def __init__(self, filename, headers=None, **kwargs): |
---|
177 | self.filename = filename |
---|
178 | content_type, content_encoding = self.guess_type() |
---|
179 | if content_type and 'content_type' not in kwargs: |
---|
180 | kwargs['content_type'] = content_type |
---|
181 | if content_encoding and 'content_encoding' not in kwargs: |
---|
182 | kwargs['content_encoding'] = content_encoding |
---|
183 | DataApp.__init__(self, None, headers, **kwargs) |
---|
184 | |
---|
185 | def guess_type(self): |
---|
186 | return mimetypes.guess_type(self.filename) |
---|
187 | |
---|
188 | def update(self, force=False): |
---|
189 | stat = os.stat(self.filename) |
---|
190 | if not force and stat.st_mtime == self.last_modified: |
---|
191 | return |
---|
192 | self.last_modified = stat.st_mtime |
---|
193 | if stat.st_size < CACHE_SIZE: |
---|
194 | fh = open(self.filename,"rb") |
---|
195 | self.set_content(fh.read(), stat.st_mtime) |
---|
196 | fh.close() |
---|
197 | else: |
---|
198 | self.content = None |
---|
199 | self.content_length = stat.st_size |
---|
200 | # This is updated automatically if self.set_content() is |
---|
201 | # called |
---|
202 | LAST_MODIFIED.update(self.headers, time=self.last_modified) |
---|
203 | |
---|
204 | def get(self, environ, start_response): |
---|
205 | is_head = environ['REQUEST_METHOD'].upper() == 'HEAD' |
---|
206 | if 'max-age=0' in CACHE_CONTROL(environ).lower(): |
---|
207 | self.update(force=True) # RFC 2616 13.2.6 |
---|
208 | else: |
---|
209 | self.update() |
---|
210 | if not self.content: |
---|
211 | if not os.path.exists(self.filename): |
---|
212 | exc = HTTPNotFound( |
---|
213 | 'The resource does not exist', |
---|
214 | comment="No file at %r" % self.filename) |
---|
215 | return exc(environ, start_response) |
---|
216 | try: |
---|
217 | file = open(self.filename, 'rb') |
---|
218 | except (IOError, OSError), e: |
---|
219 | exc = HTTPForbidden( |
---|
220 | 'You are not permitted to view this file (%s)' % e) |
---|
221 | return exc.wsgi_application( |
---|
222 | environ, start_response) |
---|
223 | retval = DataApp.get(self, environ, start_response) |
---|
224 | if isinstance(retval, list): |
---|
225 | # cached content, exception, or not-modified |
---|
226 | if is_head: |
---|
227 | return [''] |
---|
228 | return retval |
---|
229 | (lower, content_length) = retval |
---|
230 | if is_head: |
---|
231 | return [''] |
---|
232 | file.seek(lower) |
---|
233 | return _FileIter(file, size=content_length) |
---|
234 | |
---|
235 | class _FileIter(object): |
---|
236 | |
---|
237 | def __init__(self, file, block_size=None, size=None): |
---|
238 | self.file = file |
---|
239 | self.size = size |
---|
240 | self.block_size = block_size or BLOCK_SIZE |
---|
241 | |
---|
242 | def __iter__(self): |
---|
243 | return self |
---|
244 | |
---|
245 | def next(self): |
---|
246 | chunk_size = self.block_size |
---|
247 | if self.size is not None: |
---|
248 | if chunk_size > self.size: |
---|
249 | chunk_size = self.size |
---|
250 | self.size -= chunk_size |
---|
251 | data = self.file.read(chunk_size) |
---|
252 | if not data: |
---|
253 | raise StopIteration |
---|
254 | return data |
---|
255 | |
---|
256 | def close(self): |
---|
257 | self.file.close() |
---|
258 | |
---|
259 | class ArchiveStore(object): |
---|
260 | """ |
---|
261 | Returns an application that serves up a DataApp for items requested |
---|
262 | in a given zip or tar archive. |
---|
263 | |
---|
264 | Constructor Arguments: |
---|
265 | |
---|
266 | ``filepath`` the path to the archive being served |
---|
267 | |
---|
268 | ``cache_control()`` |
---|
269 | |
---|
270 | This method provides validated construction of the ``Cache-Control`` |
---|
271 | header as well as providing for automated filling out of the |
---|
272 | ``EXPIRES`` header for HTTP/1.0 clients. |
---|
273 | """ |
---|
274 | |
---|
275 | def __init__(self, filepath): |
---|
276 | if zipfile.is_zipfile(filepath): |
---|
277 | self.archive = zipfile.ZipFile(filepath,"r") |
---|
278 | elif tarfile.is_tarfile(filepath): |
---|
279 | self.archive = tarfile.TarFileCompat(filepath,"r") |
---|
280 | else: |
---|
281 | raise AssertionError("filepath '%s' is not a zip or tar " % filepath) |
---|
282 | self.expires = None |
---|
283 | self.last_modified = time.time() |
---|
284 | self.cache = {} |
---|
285 | |
---|
286 | def cache_control(self, **kwargs): |
---|
287 | self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None |
---|
288 | return self |
---|
289 | |
---|
290 | def __call__(self, environ, start_response): |
---|
291 | path = environ.get("PATH_INFO","") |
---|
292 | if path.startswith("/"): |
---|
293 | path = path[1:] |
---|
294 | application = self.cache.get(path) |
---|
295 | if application: |
---|
296 | return application(environ, start_response) |
---|
297 | try: |
---|
298 | info = self.archive.getinfo(path) |
---|
299 | except KeyError: |
---|
300 | exc = HTTPNotFound("The file requested, '%s', was not found." % path) |
---|
301 | return exc.wsgi_application(environ, start_response) |
---|
302 | if info.filename.endswith("/"): |
---|
303 | exc = HTTPNotFound("Path requested, '%s', is not a file." % path) |
---|
304 | return exc.wsgi_application(environ, start_response) |
---|
305 | content_type, content_encoding = mimetypes.guess_type(info.filename) |
---|
306 | app = DataApp(None, content_type = content_type, |
---|
307 | content_encoding = content_encoding) |
---|
308 | app.set_content(self.archive.read(path), |
---|
309 | time.mktime(info.date_time + (0,0,0))) |
---|
310 | self.cache[path] = app |
---|
311 | app.expires = self.expires |
---|
312 | return app(environ, start_response) |
---|
313 | |
---|