1 | """Response classes. |
---|
2 | |
---|
3 | The seek_wrapper code is not used if you're using UserAgent with |
---|
4 | .set_seekable_responses(False), or if you're using the urllib2-level interface |
---|
5 | without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is |
---|
6 | instantiated by some handlers (AbstractHTTPHandler), but the closeable_response |
---|
7 | interface is only depended upon by Browser-level code. Function |
---|
8 | upgrade_response is only used if you're using Browser or |
---|
9 | ResponseUpgradeProcessor. |
---|
10 | |
---|
11 | |
---|
12 | Copyright 2006 John J. Lee <jjl@pobox.com> |
---|
13 | |
---|
14 | This code is free software; you can redistribute it and/or modify it |
---|
15 | under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt |
---|
16 | included with the distribution). |
---|
17 | |
---|
18 | """ |
---|
19 | |
---|
20 | import copy, mimetools |
---|
21 | from cStringIO import StringIO |
---|
22 | import urllib2 |
---|
23 | |
---|
24 | # XXX Andrew Dalke kindly sent me a similar class in response to my request on |
---|
25 | # comp.lang.python, which I then proceeded to lose. I wrote this class |
---|
26 | # instead, but I think he's released his code publicly since, could pinch the |
---|
27 | # tests from it, at least... |
---|
28 | |
---|
29 | # For testing seek_wrapper invariant (note that |
---|
30 | # test_urllib2.HandlerTest.test_seekable is expected to fail when this |
---|
31 | # invariant checking is turned on). The invariant checking is done by module |
---|
32 | # ipdc, which is available here: |
---|
33 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834 |
---|
34 | ## from ipdbc import ContractBase |
---|
35 | ## class seek_wrapper(ContractBase): |
---|
36 | class seek_wrapper: |
---|
37 | """Adds a seek method to a file object. |
---|
38 | |
---|
39 | This is only designed for seeking on readonly file-like objects. |
---|
40 | |
---|
41 | Wrapped file-like object must have a read method. The readline method is |
---|
42 | only supported if that method is present on the wrapped object. The |
---|
43 | readlines method is always supported. xreadlines and iteration are |
---|
44 | supported only for Python 2.2 and above. |
---|
45 | |
---|
46 | Public attributes: |
---|
47 | |
---|
48 | wrapped: the wrapped file object |
---|
49 | is_closed: true iff .close() has been called |
---|
50 | |
---|
51 | WARNING: All other attributes of the wrapped object (ie. those that are not |
---|
52 | one of wrapped, read, readline, readlines, xreadlines, __iter__ and next) |
---|
53 | are passed through unaltered, which may or may not make sense for your |
---|
54 | particular file object. |
---|
55 | |
---|
56 | """ |
---|
57 | # General strategy is to check that cache is full enough, then delegate to |
---|
58 | # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek |
---|
59 | # position (self.__pos) is maintained independently of the cache, in order |
---|
60 | # that a single cache may be shared between multiple seek_wrapper objects. |
---|
61 | # Copying using module copy shares the cache in this way. |
---|
62 | |
---|
63 | def __init__(self, wrapped): |
---|
64 | self.wrapped = wrapped |
---|
65 | self.__read_complete_state = [False] |
---|
66 | self.__is_closed_state = [False] |
---|
67 | self.__have_readline = hasattr(self.wrapped, "readline") |
---|
68 | self.__cache = StringIO() |
---|
69 | self.__pos = 0 # seek position |
---|
70 | |
---|
71 | def invariant(self): |
---|
72 | # The end of the cache is always at the same place as the end of the |
---|
73 | # wrapped file. |
---|
74 | return self.wrapped.tell() == len(self.__cache.getvalue()) |
---|
75 | |
---|
76 | def close(self): |
---|
77 | self.wrapped.close() |
---|
78 | self.is_closed = True |
---|
79 | |
---|
80 | def __getattr__(self, name): |
---|
81 | if name == "is_closed": |
---|
82 | return self.__is_closed_state[0] |
---|
83 | elif name == "read_complete": |
---|
84 | return self.__read_complete_state[0] |
---|
85 | |
---|
86 | wrapped = self.__dict__.get("wrapped") |
---|
87 | if wrapped: |
---|
88 | return getattr(wrapped, name) |
---|
89 | |
---|
90 | return getattr(self.__class__, name) |
---|
91 | |
---|
92 | def __setattr__(self, name, value): |
---|
93 | if name == "is_closed": |
---|
94 | self.__is_closed_state[0] = bool(value) |
---|
95 | elif name == "read_complete": |
---|
96 | if not self.is_closed: |
---|
97 | self.__read_complete_state[0] = bool(value) |
---|
98 | else: |
---|
99 | self.__dict__[name] = value |
---|
100 | |
---|
101 | def seek(self, offset, whence=0): |
---|
102 | assert whence in [0,1,2] |
---|
103 | |
---|
104 | # how much data, if any, do we need to read? |
---|
105 | if whence == 2: # 2: relative to end of *wrapped* file |
---|
106 | if offset < 0: raise ValueError("negative seek offset") |
---|
107 | # since we don't know yet where the end of that file is, we must |
---|
108 | # read everything |
---|
109 | to_read = None |
---|
110 | else: |
---|
111 | if whence == 0: # 0: absolute |
---|
112 | if offset < 0: raise ValueError("negative seek offset") |
---|
113 | dest = offset |
---|
114 | else: # 1: relative to current position |
---|
115 | pos = self.__pos |
---|
116 | if pos < offset: |
---|
117 | raise ValueError("seek to before start of file") |
---|
118 | dest = pos + offset |
---|
119 | end = len(self.__cache.getvalue()) |
---|
120 | to_read = dest - end |
---|
121 | if to_read < 0: |
---|
122 | to_read = 0 |
---|
123 | |
---|
124 | if to_read != 0: |
---|
125 | self.__cache.seek(0, 2) |
---|
126 | if to_read is None: |
---|
127 | assert whence == 2 |
---|
128 | self.__cache.write(self.wrapped.read()) |
---|
129 | self.read_complete = True |
---|
130 | self.__pos = self.__cache.tell() - offset |
---|
131 | else: |
---|
132 | data = self.wrapped.read(to_read) |
---|
133 | if not data: |
---|
134 | self.read_complete = True |
---|
135 | else: |
---|
136 | self.__cache.write(data) |
---|
137 | # Don't raise an exception even if we've seek()ed past the end |
---|
138 | # of .wrapped, since fseek() doesn't complain in that case. |
---|
139 | # Also like fseek(), pretend we have seek()ed past the end, |
---|
140 | # i.e. not: |
---|
141 | #self.__pos = self.__cache.tell() |
---|
142 | # but rather: |
---|
143 | self.__pos = dest |
---|
144 | else: |
---|
145 | self.__pos = dest |
---|
146 | |
---|
147 | def tell(self): |
---|
148 | return self.__pos |
---|
149 | |
---|
150 | def __copy__(self): |
---|
151 | cpy = self.__class__(self.wrapped) |
---|
152 | cpy.__cache = self.__cache |
---|
153 | cpy.__read_complete_state = self.__read_complete_state |
---|
154 | cpy.__is_closed_state = self.__is_closed_state |
---|
155 | return cpy |
---|
156 | |
---|
157 | def get_data(self): |
---|
158 | pos = self.__pos |
---|
159 | try: |
---|
160 | self.seek(0) |
---|
161 | return self.read(-1) |
---|
162 | finally: |
---|
163 | self.__pos = pos |
---|
164 | |
---|
165 | def read(self, size=-1): |
---|
166 | pos = self.__pos |
---|
167 | end = len(self.__cache.getvalue()) |
---|
168 | available = end - pos |
---|
169 | |
---|
170 | # enough data already cached? |
---|
171 | if size <= available and size != -1: |
---|
172 | self.__cache.seek(pos) |
---|
173 | self.__pos = pos+size |
---|
174 | return self.__cache.read(size) |
---|
175 | |
---|
176 | # no, so read sufficient data from wrapped file and cache it |
---|
177 | self.__cache.seek(0, 2) |
---|
178 | if size == -1: |
---|
179 | self.__cache.write(self.wrapped.read()) |
---|
180 | self.read_complete = True |
---|
181 | else: |
---|
182 | to_read = size - available |
---|
183 | assert to_read > 0 |
---|
184 | data = self.wrapped.read(to_read) |
---|
185 | if not data: |
---|
186 | self.read_complete = True |
---|
187 | else: |
---|
188 | self.__cache.write(data) |
---|
189 | self.__cache.seek(pos) |
---|
190 | |
---|
191 | data = self.__cache.read(size) |
---|
192 | self.__pos = self.__cache.tell() |
---|
193 | assert self.__pos == pos + len(data) |
---|
194 | return data |
---|
195 | |
---|
196 | def readline(self, size=-1): |
---|
197 | if not self.__have_readline: |
---|
198 | raise NotImplementedError("no readline method on wrapped object") |
---|
199 | |
---|
200 | # line we're about to read might not be complete in the cache, so |
---|
201 | # read another line first |
---|
202 | pos = self.__pos |
---|
203 | self.__cache.seek(0, 2) |
---|
204 | data = self.wrapped.readline() |
---|
205 | if not data: |
---|
206 | self.read_complete = True |
---|
207 | else: |
---|
208 | self.__cache.write(data) |
---|
209 | self.__cache.seek(pos) |
---|
210 | |
---|
211 | data = self.__cache.readline() |
---|
212 | if size != -1: |
---|
213 | r = data[:size] |
---|
214 | self.__pos = pos+size |
---|
215 | else: |
---|
216 | r = data |
---|
217 | self.__pos = pos+len(data) |
---|
218 | return r |
---|
219 | |
---|
220 | def readlines(self, sizehint=-1): |
---|
221 | pos = self.__pos |
---|
222 | self.__cache.seek(0, 2) |
---|
223 | self.__cache.write(self.wrapped.read()) |
---|
224 | self.read_complete = True |
---|
225 | self.__cache.seek(pos) |
---|
226 | data = self.__cache.readlines(sizehint) |
---|
227 | self.__pos = self.__cache.tell() |
---|
228 | return data |
---|
229 | |
---|
230 | def __iter__(self): return self |
---|
231 | def next(self): |
---|
232 | line = self.readline() |
---|
233 | if line == "": raise StopIteration |
---|
234 | return line |
---|
235 | |
---|
236 | xreadlines = __iter__ |
---|
237 | |
---|
238 | def __repr__(self): |
---|
239 | return ("<%s at %s whose wrapped object = %r>" % |
---|
240 | (self.__class__.__name__, hex(abs(id(self))), self.wrapped)) |
---|
241 | |
---|
242 | |
---|
243 | class response_seek_wrapper(seek_wrapper): |
---|
244 | |
---|
245 | """ |
---|
246 | Supports copying response objects and setting response body data. |
---|
247 | |
---|
248 | """ |
---|
249 | |
---|
250 | def __init__(self, wrapped): |
---|
251 | seek_wrapper.__init__(self, wrapped) |
---|
252 | self._headers = self.wrapped.info() |
---|
253 | |
---|
254 | def __copy__(self): |
---|
255 | cpy = seek_wrapper.__copy__(self) |
---|
256 | # copy headers from delegate |
---|
257 | cpy._headers = copy.copy(self.info()) |
---|
258 | return cpy |
---|
259 | |
---|
260 | # Note that .info() and .geturl() (the only two urllib2 response methods |
---|
261 | # that are not implemented by seek_wrapper) must be here explicitly rather |
---|
262 | # than by seek_wrapper's __getattr__ delegation) so that the nasty |
---|
263 | # dynamically-created HTTPError classes in get_seek_wrapper_class() get the |
---|
264 | # wrapped object's implementation, and not HTTPError's. |
---|
265 | |
---|
266 | def info(self): |
---|
267 | return self._headers |
---|
268 | |
---|
269 | def geturl(self): |
---|
270 | return self.wrapped.geturl() |
---|
271 | |
---|
272 | def set_data(self, data): |
---|
273 | self.seek(0) |
---|
274 | self.read() |
---|
275 | self.close() |
---|
276 | cache = self._seek_wrapper__cache = StringIO() |
---|
277 | cache.write(data) |
---|
278 | self.seek(0) |
---|
279 | |
---|
280 | |
---|
281 | class eoffile: |
---|
282 | # file-like object that always claims to be at end-of-file... |
---|
283 | def read(self, size=-1): return "" |
---|
284 | def readline(self, size=-1): return "" |
---|
285 | def __iter__(self): return self |
---|
286 | def next(self): return "" |
---|
287 | def close(self): pass |
---|
288 | |
---|
289 | class eofresponse(eoffile): |
---|
290 | def __init__(self, url, headers, code, msg): |
---|
291 | self._url = url |
---|
292 | self._headers = headers |
---|
293 | self.code = code |
---|
294 | self.msg = msg |
---|
295 | def geturl(self): return self._url |
---|
296 | def info(self): return self._headers |
---|
297 | |
---|
298 | |
---|
299 | class closeable_response: |
---|
300 | """Avoids unnecessarily clobbering urllib.addinfourl methods on .close(). |
---|
301 | |
---|
302 | Only supports responses returned by mechanize.HTTPHandler. |
---|
303 | |
---|
304 | After .close(), the following methods are supported: |
---|
305 | |
---|
306 | .read() |
---|
307 | .readline() |
---|
308 | .info() |
---|
309 | .geturl() |
---|
310 | .__iter__() |
---|
311 | .next() |
---|
312 | .close() |
---|
313 | |
---|
314 | and the following attributes are supported: |
---|
315 | |
---|
316 | .code |
---|
317 | .msg |
---|
318 | |
---|
319 | Also supports pickling (but the stdlib currently does something to prevent |
---|
320 | it: http://python.org/sf/1144636). |
---|
321 | |
---|
322 | """ |
---|
323 | # presence of this attr indicates is useable after .close() |
---|
324 | closeable_response = None |
---|
325 | |
---|
326 | def __init__(self, fp, headers, url, code, msg): |
---|
327 | self._set_fp(fp) |
---|
328 | self._headers = headers |
---|
329 | self._url = url |
---|
330 | self.code = code |
---|
331 | self.msg = msg |
---|
332 | |
---|
333 | def _set_fp(self, fp): |
---|
334 | self.fp = fp |
---|
335 | self.read = self.fp.read |
---|
336 | self.readline = self.fp.readline |
---|
337 | if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines |
---|
338 | if hasattr(self.fp, "fileno"): |
---|
339 | self.fileno = self.fp.fileno |
---|
340 | else: |
---|
341 | self.fileno = lambda: None |
---|
342 | self.__iter__ = self.fp.__iter__ |
---|
343 | self.next = self.fp.next |
---|
344 | |
---|
345 | def __repr__(self): |
---|
346 | return '<%s at %s whose fp = %r>' % ( |
---|
347 | self.__class__.__name__, hex(abs(id(self))), self.fp) |
---|
348 | |
---|
349 | def info(self): |
---|
350 | return self._headers |
---|
351 | |
---|
352 | def geturl(self): |
---|
353 | return self._url |
---|
354 | |
---|
355 | def close(self): |
---|
356 | wrapped = self.fp |
---|
357 | wrapped.close() |
---|
358 | new_wrapped = eofresponse( |
---|
359 | self._url, self._headers, self.code, self.msg) |
---|
360 | self._set_fp(new_wrapped) |
---|
361 | |
---|
362 | def __getstate__(self): |
---|
363 | # There are three obvious options here: |
---|
364 | # 1. truncate |
---|
365 | # 2. read to end |
---|
366 | # 3. close socket, pickle state including read position, then open |
---|
367 | # again on unpickle and use Range header |
---|
368 | # XXXX um, 4. refuse to pickle unless .close()d. This is better, |
---|
369 | # actually ("errors should never pass silently"). Pickling doesn't |
---|
370 | # work anyway ATM, because of http://python.org/sf/1144636 so fix |
---|
371 | # this later |
---|
372 | |
---|
373 | # 2 breaks pickle protocol, because one expects the original object |
---|
374 | # to be left unscathed by pickling. 3 is too complicated and |
---|
375 | # surprising (and too much work ;-) to happen in a sane __getstate__. |
---|
376 | # So we do 1. |
---|
377 | |
---|
378 | state = self.__dict__.copy() |
---|
379 | new_wrapped = eofresponse( |
---|
380 | self._url, self._headers, self.code, self.msg) |
---|
381 | state["wrapped"] = new_wrapped |
---|
382 | return state |
---|
383 | |
---|
384 | def test_response(data='test data', headers=[], |
---|
385 | url="http://example.com/", code=200, msg="OK"): |
---|
386 | return make_response(data, headers, url, code, msg) |
---|
387 | |
---|
388 | def test_html_response(data='test data', headers=[], |
---|
389 | url="http://example.com/", code=200, msg="OK"): |
---|
390 | headers += [("Content-type", "text/html")] |
---|
391 | return make_response(data, headers, url, code, msg) |
---|
392 | |
---|
393 | def make_response(data, headers, url, code, msg): |
---|
394 | """Convenient factory for objects implementing response interface. |
---|
395 | |
---|
396 | data: string containing response body data |
---|
397 | headers: sequence of (name, value) pairs |
---|
398 | url: URL of response |
---|
399 | code: integer response code (e.g. 200) |
---|
400 | msg: string response code message (e.g. "OK") |
---|
401 | |
---|
402 | """ |
---|
403 | mime_headers = make_headers(headers) |
---|
404 | r = closeable_response(StringIO(data), mime_headers, url, code, msg) |
---|
405 | return response_seek_wrapper(r) |
---|
406 | |
---|
407 | |
---|
408 | def make_headers(headers): |
---|
409 | """ |
---|
410 | headers: sequence of (name, value) pairs |
---|
411 | """ |
---|
412 | hdr_text = [] |
---|
413 | for name_value in headers: |
---|
414 | hdr_text.append("%s: %s" % name_value) |
---|
415 | return mimetools.Message(StringIO("\n".join(hdr_text))) |
---|
416 | |
---|
417 | |
---|
418 | # Rest of this module is especially horrible, but needed, at least until fork |
---|
419 | # urllib2. Even then, may want to preseve urllib2 compatibility. |
---|
420 | |
---|
421 | def get_seek_wrapper_class(response): |
---|
422 | # in order to wrap response objects that are also exceptions, we must |
---|
423 | # dynamically subclass the exception :-((( |
---|
424 | if (isinstance(response, urllib2.HTTPError) and |
---|
425 | not hasattr(response, "seek")): |
---|
426 | if response.__class__.__module__ == "__builtin__": |
---|
427 | exc_class_name = response.__class__.__name__ |
---|
428 | else: |
---|
429 | exc_class_name = "%s.%s" % ( |
---|
430 | response.__class__.__module__, response.__class__.__name__) |
---|
431 | |
---|
432 | class httperror_seek_wrapper(response_seek_wrapper, response.__class__): |
---|
433 | # this only derives from HTTPError in order to be a subclass -- |
---|
434 | # the HTTPError behaviour comes from delegation |
---|
435 | |
---|
436 | _exc_class_name = exc_class_name |
---|
437 | |
---|
438 | def __init__(self, wrapped): |
---|
439 | response_seek_wrapper.__init__(self, wrapped) |
---|
440 | # be compatible with undocumented HTTPError attributes :-( |
---|
441 | self.hdrs = wrapped.info() |
---|
442 | self.filename = wrapped.geturl() |
---|
443 | |
---|
444 | def __repr__(self): |
---|
445 | return ( |
---|
446 | "<%s (%s instance) at %s " |
---|
447 | "whose wrapped object = %r>" % ( |
---|
448 | self.__class__.__name__, self._exc_class_name, |
---|
449 | hex(abs(id(self))), self.wrapped) |
---|
450 | ) |
---|
451 | wrapper_class = httperror_seek_wrapper |
---|
452 | else: |
---|
453 | wrapper_class = response_seek_wrapper |
---|
454 | return wrapper_class |
---|
455 | |
---|
456 | def seek_wrapped_response(response): |
---|
457 | """Return a copy of response that supports seekable response interface. |
---|
458 | |
---|
459 | Accepts responses from both mechanize and urllib2 handlers. |
---|
460 | |
---|
461 | Copes with both oridinary response instances and HTTPError instances (which |
---|
462 | can't be simply wrapped due to the requirement of preserving the exception |
---|
463 | base class). |
---|
464 | """ |
---|
465 | if not hasattr(response, "seek"): |
---|
466 | wrapper_class = get_seek_wrapper_class(response) |
---|
467 | response = wrapper_class(response) |
---|
468 | assert hasattr(response, "get_data") |
---|
469 | return response |
---|
470 | |
---|
471 | def upgrade_response(response): |
---|
472 | """Return a copy of response that supports Browser response interface. |
---|
473 | |
---|
474 | Browser response interface is that of "seekable responses" |
---|
475 | (response_seek_wrapper), plus the requirement that responses must be |
---|
476 | useable after .close() (closeable_response). |
---|
477 | |
---|
478 | Accepts responses from both mechanize and urllib2 handlers. |
---|
479 | |
---|
480 | Copes with both ordinary response instances and HTTPError instances (which |
---|
481 | can't be simply wrapped due to the requirement of preserving the exception |
---|
482 | base class). |
---|
483 | """ |
---|
484 | wrapper_class = get_seek_wrapper_class(response) |
---|
485 | if hasattr(response, "closeable_response"): |
---|
486 | if not hasattr(response, "seek"): |
---|
487 | response = wrapper_class(response) |
---|
488 | assert hasattr(response, "get_data") |
---|
489 | return copy.copy(response) |
---|
490 | |
---|
491 | # a urllib2 handler constructed the response, i.e. the response is an |
---|
492 | # urllib.addinfourl or a urllib2.HTTPError, instead of a |
---|
493 | # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler |
---|
494 | try: |
---|
495 | code = response.code |
---|
496 | except AttributeError: |
---|
497 | code = None |
---|
498 | try: |
---|
499 | msg = response.msg |
---|
500 | except AttributeError: |
---|
501 | msg = None |
---|
502 | |
---|
503 | # may have already-.read() data from .seek() cache |
---|
504 | data = None |
---|
505 | get_data = getattr(response, "get_data", None) |
---|
506 | if get_data: |
---|
507 | data = get_data() |
---|
508 | |
---|
509 | response = closeable_response( |
---|
510 | response.fp, response.info(), response.geturl(), code, msg) |
---|
511 | response = wrapper_class(response) |
---|
512 | if data: |
---|
513 | response.set_data(data) |
---|
514 | return response |
---|