1 | import urllib2 |
---|
2 | from cStringIO import StringIO |
---|
3 | import _response |
---|
4 | |
---|
5 | # GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library |
---|
6 | class GzipConsumer: |
---|
7 | |
---|
8 | def __init__(self, consumer): |
---|
9 | self.__consumer = consumer |
---|
10 | self.__decoder = None |
---|
11 | self.__data = "" |
---|
12 | |
---|
13 | def __getattr__(self, key): |
---|
14 | return getattr(self.__consumer, key) |
---|
15 | |
---|
16 | def feed(self, data): |
---|
17 | if self.__decoder is None: |
---|
18 | # check if we have a full gzip header |
---|
19 | data = self.__data + data |
---|
20 | try: |
---|
21 | i = 10 |
---|
22 | flag = ord(data[3]) |
---|
23 | if flag & 4: # extra |
---|
24 | x = ord(data[i]) + 256*ord(data[i+1]) |
---|
25 | i = i + 2 + x |
---|
26 | if flag & 8: # filename |
---|
27 | while ord(data[i]): |
---|
28 | i = i + 1 |
---|
29 | i = i + 1 |
---|
30 | if flag & 16: # comment |
---|
31 | while ord(data[i]): |
---|
32 | i = i + 1 |
---|
33 | i = i + 1 |
---|
34 | if flag & 2: # crc |
---|
35 | i = i + 2 |
---|
36 | if len(data) < i: |
---|
37 | raise IndexError("not enough data") |
---|
38 | if data[:3] != "\x1f\x8b\x08": |
---|
39 | raise IOError("invalid gzip data") |
---|
40 | data = data[i:] |
---|
41 | except IndexError: |
---|
42 | self.__data = data |
---|
43 | return # need more data |
---|
44 | import zlib |
---|
45 | self.__data = "" |
---|
46 | self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS) |
---|
47 | data = self.__decoder.decompress(data) |
---|
48 | if data: |
---|
49 | self.__consumer.feed(data) |
---|
50 | |
---|
51 | def close(self): |
---|
52 | if self.__decoder: |
---|
53 | data = self.__decoder.flush() |
---|
54 | if data: |
---|
55 | self.__consumer.feed(data) |
---|
56 | self.__consumer.close() |
---|
57 | |
---|
58 | |
---|
59 | # -------------------------------------------------------------------- |
---|
60 | |
---|
61 | # the rest of this module is John Lee's stupid code, not |
---|
62 | # Fredrik's nice code :-) |
---|
63 | |
---|
64 | class stupid_gzip_consumer: |
---|
65 | def __init__(self): self.data = [] |
---|
66 | def feed(self, data): self.data.append(data) |
---|
67 | |
---|
68 | class stupid_gzip_wrapper(_response.closeable_response): |
---|
69 | def __init__(self, response): |
---|
70 | self._response = response |
---|
71 | |
---|
72 | c = stupid_gzip_consumer() |
---|
73 | gzc = GzipConsumer(c) |
---|
74 | gzc.feed(response.read()) |
---|
75 | self.__data = StringIO("".join(c.data)) |
---|
76 | |
---|
77 | def read(self, size=-1): |
---|
78 | return self.__data.read(size) |
---|
79 | def readline(self, size=-1): |
---|
80 | return self.__data.readline(size) |
---|
81 | def readlines(self, sizehint=-1): |
---|
82 | return self.__data.readlines(size) |
---|
83 | |
---|
84 | def __getattr__(self, name): |
---|
85 | # delegate unknown methods/attributes |
---|
86 | return getattr(self._response, name) |
---|
87 | |
---|
88 | class HTTPGzipProcessor(urllib2.BaseHandler): |
---|
89 | handler_order = 200 # response processing before HTTPEquivProcessor |
---|
90 | |
---|
91 | def http_request(self, request): |
---|
92 | request.add_header("Accept-Encoding", "gzip") |
---|
93 | return request |
---|
94 | |
---|
95 | def http_response(self, request, response): |
---|
96 | # post-process response |
---|
97 | enc_hdrs = response.info().getheaders("Content-encoding") |
---|
98 | for enc_hdr in enc_hdrs: |
---|
99 | if ("gzip" in enc_hdr) or ("compress" in enc_hdr): |
---|
100 | return stupid_gzip_wrapper(response) |
---|
101 | return response |
---|
102 | |
---|
103 | https_response = http_response |
---|