1 | """ |
---|
2 | wsgi_intercept.WSGI_HTTPConnection is a replacement for |
---|
3 | httplib.HTTPConnection that intercepts certain HTTP connections into a |
---|
4 | WSGI application. |
---|
5 | |
---|
6 | Use 'add_wsgi_intercept' and 'remove_wsgi_intercept' to control this behavior. |
---|
7 | """ |
---|
8 | |
---|
9 | import sys |
---|
10 | from httplib import HTTPConnection |
---|
11 | import urllib |
---|
12 | from cStringIO import StringIO |
---|
13 | import traceback |
---|
14 | |
---|
15 | debuglevel = 0 |
---|
16 | # 1 basic |
---|
17 | # 2 verbose |
---|
18 | |
---|
19 | #### |
---|
20 | |
---|
21 | # |
---|
22 | # Specify which hosts/ports to target for interception to a given WSGI app. |
---|
23 | # |
---|
24 | # For simplicity's sake, intercept ENTIRE host/port combinations; |
---|
25 | # intercepting only specific URL subtrees gets complicated, because we don't |
---|
26 | # have that information in the HTTPConnection.connect() function that does the |
---|
27 | # redirection. |
---|
28 | # |
---|
29 | # format: key=(host, port), value=(create_app, top_url) |
---|
30 | # |
---|
31 | # (top_url becomes the SCRIPT_NAME) |
---|
32 | |
---|
33 | _wsgi_intercept = {} |
---|
34 | |
---|
35 | def add_wsgi_intercept(host, port, app_create_fn, script_name=''): |
---|
36 | """ |
---|
37 | Add a WSGI intercept call for host:port, using the app returned |
---|
38 | by app_create_fn with a SCRIPT_NAME of 'script_name' (default ''). |
---|
39 | """ |
---|
40 | _wsgi_intercept[(host, port)] = (app_create_fn, script_name) |
---|
41 | |
---|
42 | def remove_wsgi_intercept(host, port): |
---|
43 | """ |
---|
44 | Remove the WSGI intercept call for (host, port). |
---|
45 | """ |
---|
46 | key = (host, port) |
---|
47 | if _wsgi_intercept.has_key(key): |
---|
48 | del _wsgi_intercept[key] |
---|
49 | |
---|
50 | # |
---|
51 | # make_environ: behave like a Web server. Take in 'input', and behave |
---|
52 | # as if you're bound to 'host' and 'port'; build an environment dict |
---|
53 | # for the WSGI app. |
---|
54 | # |
---|
55 | # This is where the magic happens, folks. |
---|
56 | # |
---|
57 | |
---|
58 | def make_environ(inp, host, port, script_name): |
---|
59 | """ |
---|
60 | Take 'inp' as if it were HTTP-speak being received on host:port, |
---|
61 | and parse it into a WSGI-ok environment dictionary. Return the |
---|
62 | dictionary. |
---|
63 | |
---|
64 | Set 'SCRIPT_NAME' from the 'script_name' input, and, if present, |
---|
65 | remove it from the beginning of the PATH_INFO variable. |
---|
66 | """ |
---|
67 | # |
---|
68 | # parse the input up to the first blank line (or its end). |
---|
69 | # |
---|
70 | |
---|
71 | environ = {} |
---|
72 | |
---|
73 | method_line = inp.readline() |
---|
74 | |
---|
75 | content_type = None |
---|
76 | content_length = None |
---|
77 | cookies = [] |
---|
78 | |
---|
79 | for line in inp: |
---|
80 | if not line.strip(): |
---|
81 | break |
---|
82 | |
---|
83 | k, v = line.strip().split(':', 1) |
---|
84 | v = v.lstrip() |
---|
85 | |
---|
86 | # |
---|
87 | # take care of special headers, and for the rest, put them |
---|
88 | # into the environ with HTTP_ in front. |
---|
89 | # |
---|
90 | |
---|
91 | if k.lower() == 'content-type': |
---|
92 | content_type = v |
---|
93 | elif k.lower() == 'content-length': |
---|
94 | content_length = v |
---|
95 | elif k.lower() == 'cookie' or k.lower() == 'cookie2': |
---|
96 | cookies.append(v) |
---|
97 | else: |
---|
98 | h = k.upper() |
---|
99 | h = h.replace('-', '_') |
---|
100 | environ['HTTP_' + h] = v |
---|
101 | |
---|
102 | if debuglevel >= 2: |
---|
103 | print 'HEADER:', k, v |
---|
104 | |
---|
105 | # |
---|
106 | # decode the method line |
---|
107 | # |
---|
108 | |
---|
109 | if debuglevel >= 2: |
---|
110 | print 'METHOD LINE:', method_line |
---|
111 | |
---|
112 | method, url, protocol = method_line.split(' ') |
---|
113 | |
---|
114 | # clean the script_name off of the url, if it's there. |
---|
115 | if not url.startswith(script_name): |
---|
116 | script_name = '' # @CTB what to do -- bad URL. scrap? |
---|
117 | else: |
---|
118 | url = url[len(script_name):] |
---|
119 | |
---|
120 | url = url.split('?', 1) |
---|
121 | path_info = urllib.unquote_plus(url[0]) |
---|
122 | query_string = "" |
---|
123 | if len(url) == 2: |
---|
124 | query_string = urllib.unquote_plus(url[1]) |
---|
125 | |
---|
126 | if debuglevel: |
---|
127 | print "method: %s; script_name: %s; path_info: %s; query_string: %s" % (method, script_name, path_info, query_string) |
---|
128 | |
---|
129 | r = inp.read() |
---|
130 | inp = StringIO(r) |
---|
131 | |
---|
132 | # |
---|
133 | # fill out our dictionary. |
---|
134 | # |
---|
135 | |
---|
136 | environ.update({ "wsgi.version" : (1,0), |
---|
137 | "wsgi.url_scheme": "http", |
---|
138 | "wsgi.input" : inp, # to read for POSTs |
---|
139 | "wsgi.errors" : StringIO(), |
---|
140 | "wsgi.multithread" : 0, |
---|
141 | "wsgi.multiprocess" : 0, |
---|
142 | "wsgi.run_once" : 0, |
---|
143 | |
---|
144 | "REQUEST_METHOD" : method, |
---|
145 | "SCRIPT_NAME" : script_name, |
---|
146 | "PATH_INFO" : path_info, |
---|
147 | |
---|
148 | "SERVER_NAME" : host, |
---|
149 | "SERVER_PORT" : str(port), |
---|
150 | "SERVER_PROTOCOL" : protocol, |
---|
151 | |
---|
152 | "REMOTE_ADDR" : '127.0.0.1', |
---|
153 | }) |
---|
154 | |
---|
155 | # |
---|
156 | # query_string, content_type & length are optional. |
---|
157 | # |
---|
158 | |
---|
159 | if query_string: |
---|
160 | environ['QUERY_STRING'] = query_string |
---|
161 | else: |
---|
162 | environ['QUERY_STRING'] = '' |
---|
163 | |
---|
164 | if content_type: |
---|
165 | environ['CONTENT_TYPE'] = content_type |
---|
166 | if debuglevel >= 2: |
---|
167 | print 'CONTENT-TYPE:', content_type |
---|
168 | if content_length: |
---|
169 | environ['CONTENT_LENGTH'] = content_length |
---|
170 | if debuglevel >= 2: |
---|
171 | print 'CONTENT-LENGTH:', content_length |
---|
172 | |
---|
173 | # |
---|
174 | # handle cookies. |
---|
175 | # |
---|
176 | if cookies: |
---|
177 | environ['HTTP_COOKIE'] = "; ".join(cookies) |
---|
178 | |
---|
179 | if debuglevel: |
---|
180 | print 'WSGI environ dictionary:', environ |
---|
181 | |
---|
182 | return environ |
---|
183 | |
---|
184 | # |
---|
185 | # fake socket for WSGI intercept stuff. |
---|
186 | # |
---|
187 | |
---|
188 | class wsgi_fake_socket: |
---|
189 | """ |
---|
190 | Handle HTTP traffic and stuff into a WSGI application object instead. |
---|
191 | |
---|
192 | Note that this class assumes: |
---|
193 | |
---|
194 | 1. 'makefile' is called (by the response class) only after all of the |
---|
195 | data has been sent to the socket by the request class; |
---|
196 | 2. non-persistent (i.e. non-HTTP/1.1) connections. |
---|
197 | """ |
---|
198 | def __init__(self, app, host, port, script_name): |
---|
199 | self.app = app # WSGI app object |
---|
200 | self.host = host |
---|
201 | self.port = port |
---|
202 | self.script_name = script_name # SCRIPT_NAME (app mount point) |
---|
203 | |
---|
204 | self.inp = StringIO() # stuff written into this "socket" |
---|
205 | self.write_results = [] # results from the 'write_fn' |
---|
206 | self.results = None # results from running the app |
---|
207 | self.output = StringIO() # all output from the app, incl headers |
---|
208 | |
---|
209 | def makefile(self, *args, **kwargs): |
---|
210 | """ |
---|
211 | 'makefile' is called by the HTTPResponse class once all of the |
---|
212 | data has been written. So, in this interceptor class, we need to: |
---|
213 | |
---|
214 | 1. build a start_response function that grabs all the headers |
---|
215 | returned by the WSGI app; |
---|
216 | 2. create a wsgi.input file object 'inp', containing all of the |
---|
217 | traffic; |
---|
218 | 3. build an environment dict out of the traffic in inp; |
---|
219 | 4. run the WSGI app & grab the result object; |
---|
220 | 5. concatenate & return the result(s) read from the result object. |
---|
221 | """ |
---|
222 | |
---|
223 | # dynamically construct the start_response function for no good reason. |
---|
224 | |
---|
225 | def start_response(status, headers, exc_info=None): |
---|
226 | # construct the HTTP request. |
---|
227 | self.output.write("HTTP/1.0 " + status + "\n") |
---|
228 | |
---|
229 | for k, v in headers: |
---|
230 | self.output.write('%s: %s\n' % (k, v,)) |
---|
231 | self.output.write('\n') |
---|
232 | |
---|
233 | def write_fn(s): |
---|
234 | self.write_results.append(s) |
---|
235 | return write_fn |
---|
236 | |
---|
237 | # construct the wsgi.input file from everything that's been |
---|
238 | # written to this "socket". |
---|
239 | inp = StringIO(self.inp.getvalue()) |
---|
240 | |
---|
241 | # build the environ dictionary. |
---|
242 | environ = make_environ(inp, self.host, self.port, self.script_name) |
---|
243 | |
---|
244 | # run the application. |
---|
245 | app_result = self.app(environ, start_response) |
---|
246 | self.result = iter(app_result) |
---|
247 | |
---|
248 | ### |
---|
249 | |
---|
250 | # read all of the results. the trick here is to get the *first* |
---|
251 | # bit of data from the app via the generator, *then* grab & return |
---|
252 | # the data passed back from the 'write' function, and then return |
---|
253 | # the generator data. this is because the 'write' fn doesn't |
---|
254 | # necessarily get called until the first result is requested from |
---|
255 | # the app function. |
---|
256 | # |
---|
257 | # see twill tests, 'test_wrapper_intercept' for a test that breaks |
---|
258 | # if this is done incorrectly. |
---|
259 | |
---|
260 | try: |
---|
261 | generator_data = None |
---|
262 | try: |
---|
263 | generator_data = self.result.next() |
---|
264 | |
---|
265 | finally: |
---|
266 | for data in self.write_results: |
---|
267 | self.output.write(data) |
---|
268 | |
---|
269 | if generator_data: |
---|
270 | self.output.write(generator_data) |
---|
271 | |
---|
272 | while 1: |
---|
273 | data = self.result.next() |
---|
274 | self.output.write(data) |
---|
275 | |
---|
276 | except StopIteration: |
---|
277 | pass |
---|
278 | |
---|
279 | if hasattr(app_result, 'close'): |
---|
280 | app_result.close() |
---|
281 | |
---|
282 | if debuglevel >= 2: |
---|
283 | print "***", self.output.getvalue(), "***" |
---|
284 | |
---|
285 | # return the concatenated results. |
---|
286 | return StringIO(self.output.getvalue()) |
---|
287 | |
---|
288 | def sendall(self, str): |
---|
289 | """ |
---|
290 | Save all the traffic to self.inp. |
---|
291 | """ |
---|
292 | if debuglevel >= 2: |
---|
293 | print ">>>", str, ">>>" |
---|
294 | |
---|
295 | self.inp.write(str) |
---|
296 | |
---|
297 | def close(self): |
---|
298 | "Do nothing, for now." |
---|
299 | pass |
---|
300 | |
---|
301 | # |
---|
302 | # WSGI_HTTPConnection |
---|
303 | # |
---|
304 | |
---|
305 | class WSGI_HTTPConnection(HTTPConnection): |
---|
306 | """ |
---|
307 | Intercept all traffic to certain hosts & redirect into a WSGI |
---|
308 | application object. |
---|
309 | """ |
---|
310 | def get_app(self, host, port): |
---|
311 | """ |
---|
312 | Return the app object for the given (host, port). |
---|
313 | """ |
---|
314 | key = (host, int(port)) |
---|
315 | |
---|
316 | app, script_name = None, None |
---|
317 | |
---|
318 | if _wsgi_intercept.has_key(key): |
---|
319 | (app_fn, script_name) = _wsgi_intercept[key] |
---|
320 | app = app_fn() |
---|
321 | |
---|
322 | return app, script_name |
---|
323 | |
---|
324 | def connect(self): |
---|
325 | """ |
---|
326 | Override the connect() function to intercept calls to certain |
---|
327 | host/ports. |
---|
328 | """ |
---|
329 | if debuglevel: |
---|
330 | sys.stderr.write('connect: %s, %s\n' % (self.host, self.port,)) |
---|
331 | |
---|
332 | try: |
---|
333 | (app, script_name) = self.get_app(self.host, self.port) |
---|
334 | if app: |
---|
335 | if debuglevel: |
---|
336 | sys.stderr.write('INTERCEPTING call to %s:%s\n' % \ |
---|
337 | (self.host, self.port,)) |
---|
338 | self.sock = wsgi_fake_socket(app, self.host, self.port, |
---|
339 | script_name) |
---|
340 | else: |
---|
341 | HTTPConnection.connect(self) |
---|
342 | |
---|
343 | except Exception, e: |
---|
344 | if debuglevel: # intercept & print out tracebacks |
---|
345 | traceback.print_exc() |
---|
346 | raise |
---|
347 | |
---|
348 | ### DEBUGGING CODE -- to help me figure out communications stuff. ### |
---|
349 | |
---|
350 | # (ignore me, please) |
---|
351 | |
---|
352 | ''' |
---|
353 | import socket |
---|
354 | |
---|
355 | class file_wrapper: |
---|
356 | def __init__(self, fp): |
---|
357 | self.fp = fp |
---|
358 | |
---|
359 | def readline(self): |
---|
360 | d = self.fp.readline() |
---|
361 | if debuglevel: |
---|
362 | print 'file_wrapper readline:', d |
---|
363 | return d |
---|
364 | |
---|
365 | def __iter__(self): |
---|
366 | return self |
---|
367 | |
---|
368 | def next(self): |
---|
369 | d = self.fp.next() |
---|
370 | if debuglevel: |
---|
371 | print 'file_wrapper next:', d |
---|
372 | return d |
---|
373 | |
---|
374 | def read(self, *args): |
---|
375 | d = self.fp.read(*args) |
---|
376 | if debuglevel: |
---|
377 | print 'file_wrapper read:', d |
---|
378 | return d |
---|
379 | |
---|
380 | def close(self): |
---|
381 | if debuglevel: |
---|
382 | print 'file_wrapper close' |
---|
383 | self.fp.close() |
---|
384 | |
---|
385 | class intercept_socket: |
---|
386 | """ |
---|
387 | A socket that intercepts everything written to it & read from it. |
---|
388 | """ |
---|
389 | |
---|
390 | def __init__(self): |
---|
391 | for res in socket.getaddrinfo("floating.caltech.edu", 80, 0, |
---|
392 | socket.SOCK_STREAM): |
---|
393 | af, socktype, proto, canonname, sa = res |
---|
394 | self.sock = socket.socket(af, socktype, proto) |
---|
395 | self._open = True |
---|
396 | self.sock.connect(sa) |
---|
397 | break |
---|
398 | |
---|
399 | def makefile(self, *args, **kwargs): |
---|
400 | fp = self.sock.makefile('rb', 0) |
---|
401 | return file_wrapper(fp) |
---|
402 | |
---|
403 | def sendall(self, str): |
---|
404 | if not self._open: |
---|
405 | raise Exception |
---|
406 | |
---|
407 | return self.sock.sendall(str) |
---|
408 | |
---|
409 | def close(self): |
---|
410 | self._open = False |
---|
411 | ''' |
---|