[3] | 1 | """ |
---|
| 2 | wsgi_intercept.WSGI_HTTPConnection is a replacement for |
---|
| 3 | httplib.HTTPConnection that intercepts certain HTTP connections into a |
---|
| 4 | WSGI application. |
---|
| 5 | |
---|
| 6 | Use 'add_wsgi_intercept' and 'remove_wsgi_intercept' to control this behavior. |
---|
| 7 | """ |
---|
| 8 | |
---|
| 9 | import sys |
---|
| 10 | from httplib import HTTPConnection |
---|
| 11 | import urllib |
---|
| 12 | from cStringIO import StringIO |
---|
| 13 | import traceback |
---|
| 14 | |
---|
| 15 | debuglevel = 0 |
---|
| 16 | # 1 basic |
---|
| 17 | # 2 verbose |
---|
| 18 | |
---|
| 19 | #### |
---|
| 20 | |
---|
| 21 | # |
---|
| 22 | # Specify which hosts/ports to target for interception to a given WSGI app. |
---|
| 23 | # |
---|
| 24 | # For simplicity's sake, intercept ENTIRE host/port combinations; |
---|
| 25 | # intercepting only specific URL subtrees gets complicated, because we don't |
---|
| 26 | # have that information in the HTTPConnection.connect() function that does the |
---|
| 27 | # redirection. |
---|
| 28 | # |
---|
| 29 | # format: key=(host, port), value=(create_app, top_url) |
---|
| 30 | # |
---|
| 31 | # (top_url becomes the SCRIPT_NAME) |
---|
| 32 | |
---|
| 33 | _wsgi_intercept = {} |
---|
| 34 | |
---|
| 35 | def add_wsgi_intercept(host, port, app_create_fn, script_name=''): |
---|
| 36 | """ |
---|
| 37 | Add a WSGI intercept call for host:port, using the app returned |
---|
| 38 | by app_create_fn with a SCRIPT_NAME of 'script_name' (default ''). |
---|
| 39 | """ |
---|
| 40 | _wsgi_intercept[(host, port)] = (app_create_fn, script_name) |
---|
| 41 | |
---|
| 42 | def remove_wsgi_intercept(host, port): |
---|
| 43 | """ |
---|
| 44 | Remove the WSGI intercept call for (host, port). |
---|
| 45 | """ |
---|
| 46 | key = (host, port) |
---|
| 47 | if _wsgi_intercept.has_key(key): |
---|
| 48 | del _wsgi_intercept[key] |
---|
| 49 | |
---|
| 50 | # |
---|
| 51 | # make_environ: behave like a Web server. Take in 'input', and behave |
---|
| 52 | # as if you're bound to 'host' and 'port'; build an environment dict |
---|
| 53 | # for the WSGI app. |
---|
| 54 | # |
---|
| 55 | # This is where the magic happens, folks. |
---|
| 56 | # |
---|
| 57 | |
---|
| 58 | def make_environ(inp, host, port, script_name): |
---|
| 59 | """ |
---|
| 60 | Take 'inp' as if it were HTTP-speak being received on host:port, |
---|
| 61 | and parse it into a WSGI-ok environment dictionary. Return the |
---|
| 62 | dictionary. |
---|
| 63 | |
---|
| 64 | Set 'SCRIPT_NAME' from the 'script_name' input, and, if present, |
---|
| 65 | remove it from the beginning of the PATH_INFO variable. |
---|
| 66 | """ |
---|
| 67 | # |
---|
| 68 | # parse the input up to the first blank line (or its end). |
---|
| 69 | # |
---|
| 70 | |
---|
| 71 | environ = {} |
---|
| 72 | |
---|
| 73 | method_line = inp.readline() |
---|
| 74 | |
---|
| 75 | content_type = None |
---|
| 76 | content_length = None |
---|
| 77 | cookies = [] |
---|
| 78 | |
---|
| 79 | for line in inp: |
---|
| 80 | if not line.strip(): |
---|
| 81 | break |
---|
| 82 | |
---|
| 83 | k, v = line.strip().split(':', 1) |
---|
| 84 | v = v.lstrip() |
---|
| 85 | |
---|
| 86 | # |
---|
| 87 | # take care of special headers, and for the rest, put them |
---|
| 88 | # into the environ with HTTP_ in front. |
---|
| 89 | # |
---|
| 90 | |
---|
| 91 | if k.lower() == 'content-type': |
---|
| 92 | content_type = v |
---|
| 93 | elif k.lower() == 'content-length': |
---|
| 94 | content_length = v |
---|
| 95 | elif k.lower() == 'cookie' or k.lower() == 'cookie2': |
---|
| 96 | cookies.append(v) |
---|
| 97 | else: |
---|
| 98 | h = k.upper() |
---|
| 99 | h = h.replace('-', '_') |
---|
| 100 | environ['HTTP_' + h] = v |
---|
| 101 | |
---|
| 102 | if debuglevel >= 2: |
---|
| 103 | print 'HEADER:', k, v |
---|
| 104 | |
---|
| 105 | # |
---|
| 106 | # decode the method line |
---|
| 107 | # |
---|
| 108 | |
---|
| 109 | if debuglevel >= 2: |
---|
| 110 | print 'METHOD LINE:', method_line |
---|
| 111 | |
---|
| 112 | method, url, protocol = method_line.split(' ') |
---|
| 113 | |
---|
| 114 | # clean the script_name off of the url, if it's there. |
---|
| 115 | if not url.startswith(script_name): |
---|
| 116 | script_name = '' # @CTB what to do -- bad URL. scrap? |
---|
| 117 | else: |
---|
| 118 | url = url[len(script_name):] |
---|
| 119 | |
---|
| 120 | url = url.split('?', 1) |
---|
| 121 | path_info = urllib.unquote_plus(url[0]) |
---|
| 122 | query_string = "" |
---|
| 123 | if len(url) == 2: |
---|
| 124 | query_string = urllib.unquote_plus(url[1]) |
---|
| 125 | |
---|
| 126 | if debuglevel: |
---|
| 127 | print "method: %s; script_name: %s; path_info: %s; query_string: %s" % (method, script_name, path_info, query_string) |
---|
| 128 | |
---|
| 129 | r = inp.read() |
---|
| 130 | inp = StringIO(r) |
---|
| 131 | |
---|
| 132 | # |
---|
| 133 | # fill out our dictionary. |
---|
| 134 | # |
---|
| 135 | |
---|
| 136 | environ.update({ "wsgi.version" : (1,0), |
---|
| 137 | "wsgi.url_scheme": "http", |
---|
| 138 | "wsgi.input" : inp, # to read for POSTs |
---|
| 139 | "wsgi.errors" : StringIO(), |
---|
| 140 | "wsgi.multithread" : 0, |
---|
| 141 | "wsgi.multiprocess" : 0, |
---|
| 142 | "wsgi.run_once" : 0, |
---|
| 143 | |
---|
| 144 | "REQUEST_METHOD" : method, |
---|
| 145 | "SCRIPT_NAME" : script_name, |
---|
| 146 | "PATH_INFO" : path_info, |
---|
| 147 | |
---|
| 148 | "SERVER_NAME" : host, |
---|
| 149 | "SERVER_PORT" : str(port), |
---|
| 150 | "SERVER_PROTOCOL" : protocol, |
---|
| 151 | |
---|
| 152 | "REMOTE_ADDR" : '127.0.0.1', |
---|
| 153 | }) |
---|
| 154 | |
---|
| 155 | # |
---|
| 156 | # query_string, content_type & length are optional. |
---|
| 157 | # |
---|
| 158 | |
---|
| 159 | if query_string: |
---|
| 160 | environ['QUERY_STRING'] = query_string |
---|
| 161 | else: |
---|
| 162 | environ['QUERY_STRING'] = '' |
---|
| 163 | |
---|
| 164 | if content_type: |
---|
| 165 | environ['CONTENT_TYPE'] = content_type |
---|
| 166 | if debuglevel >= 2: |
---|
| 167 | print 'CONTENT-TYPE:', content_type |
---|
| 168 | if content_length: |
---|
| 169 | environ['CONTENT_LENGTH'] = content_length |
---|
| 170 | if debuglevel >= 2: |
---|
| 171 | print 'CONTENT-LENGTH:', content_length |
---|
| 172 | |
---|
| 173 | # |
---|
| 174 | # handle cookies. |
---|
| 175 | # |
---|
| 176 | if cookies: |
---|
| 177 | environ['HTTP_COOKIE'] = "; ".join(cookies) |
---|
| 178 | |
---|
| 179 | if debuglevel: |
---|
| 180 | print 'WSGI environ dictionary:', environ |
---|
| 181 | |
---|
| 182 | return environ |
---|
| 183 | |
---|
| 184 | # |
---|
| 185 | # fake socket for WSGI intercept stuff. |
---|
| 186 | # |
---|
| 187 | |
---|
| 188 | class wsgi_fake_socket: |
---|
| 189 | """ |
---|
| 190 | Handle HTTP traffic and stuff into a WSGI application object instead. |
---|
| 191 | |
---|
| 192 | Note that this class assumes: |
---|
| 193 | |
---|
| 194 | 1. 'makefile' is called (by the response class) only after all of the |
---|
| 195 | data has been sent to the socket by the request class; |
---|
| 196 | 2. non-persistent (i.e. non-HTTP/1.1) connections. |
---|
| 197 | """ |
---|
| 198 | def __init__(self, app, host, port, script_name): |
---|
| 199 | self.app = app # WSGI app object |
---|
| 200 | self.host = host |
---|
| 201 | self.port = port |
---|
| 202 | self.script_name = script_name # SCRIPT_NAME (app mount point) |
---|
| 203 | |
---|
| 204 | self.inp = StringIO() # stuff written into this "socket" |
---|
| 205 | self.write_results = [] # results from the 'write_fn' |
---|
| 206 | self.results = None # results from running the app |
---|
| 207 | self.output = StringIO() # all output from the app, incl headers |
---|
| 208 | |
---|
| 209 | def makefile(self, *args, **kwargs): |
---|
| 210 | """ |
---|
| 211 | 'makefile' is called by the HTTPResponse class once all of the |
---|
| 212 | data has been written. So, in this interceptor class, we need to: |
---|
| 213 | |
---|
| 214 | 1. build a start_response function that grabs all the headers |
---|
| 215 | returned by the WSGI app; |
---|
| 216 | 2. create a wsgi.input file object 'inp', containing all of the |
---|
| 217 | traffic; |
---|
| 218 | 3. build an environment dict out of the traffic in inp; |
---|
| 219 | 4. run the WSGI app & grab the result object; |
---|
| 220 | 5. concatenate & return the result(s) read from the result object. |
---|
| 221 | """ |
---|
| 222 | |
---|
| 223 | # dynamically construct the start_response function for no good reason. |
---|
| 224 | |
---|
| 225 | def start_response(status, headers, exc_info=None): |
---|
| 226 | # construct the HTTP request. |
---|
| 227 | self.output.write("HTTP/1.0 " + status + "\n") |
---|
| 228 | |
---|
| 229 | for k, v in headers: |
---|
| 230 | self.output.write('%s: %s\n' % (k, v,)) |
---|
| 231 | self.output.write('\n') |
---|
| 232 | |
---|
| 233 | def write_fn(s): |
---|
| 234 | self.write_results.append(s) |
---|
| 235 | return write_fn |
---|
| 236 | |
---|
| 237 | # construct the wsgi.input file from everything that's been |
---|
| 238 | # written to this "socket". |
---|
| 239 | inp = StringIO(self.inp.getvalue()) |
---|
| 240 | |
---|
| 241 | # build the environ dictionary. |
---|
| 242 | environ = make_environ(inp, self.host, self.port, self.script_name) |
---|
| 243 | |
---|
| 244 | # run the application. |
---|
| 245 | app_result = self.app(environ, start_response) |
---|
| 246 | self.result = iter(app_result) |
---|
| 247 | |
---|
| 248 | ### |
---|
| 249 | |
---|
| 250 | # read all of the results. the trick here is to get the *first* |
---|
| 251 | # bit of data from the app via the generator, *then* grab & return |
---|
| 252 | # the data passed back from the 'write' function, and then return |
---|
| 253 | # the generator data. this is because the 'write' fn doesn't |
---|
| 254 | # necessarily get called until the first result is requested from |
---|
| 255 | # the app function. |
---|
| 256 | # |
---|
| 257 | # see twill tests, 'test_wrapper_intercept' for a test that breaks |
---|
| 258 | # if this is done incorrectly. |
---|
| 259 | |
---|
| 260 | try: |
---|
| 261 | generator_data = None |
---|
| 262 | try: |
---|
| 263 | generator_data = self.result.next() |
---|
| 264 | |
---|
| 265 | finally: |
---|
| 266 | for data in self.write_results: |
---|
| 267 | self.output.write(data) |
---|
| 268 | |
---|
| 269 | if generator_data: |
---|
| 270 | self.output.write(generator_data) |
---|
| 271 | |
---|
| 272 | while 1: |
---|
| 273 | data = self.result.next() |
---|
| 274 | self.output.write(data) |
---|
| 275 | |
---|
| 276 | except StopIteration: |
---|
| 277 | pass |
---|
| 278 | |
---|
| 279 | if hasattr(app_result, 'close'): |
---|
| 280 | app_result.close() |
---|
| 281 | |
---|
| 282 | if debuglevel >= 2: |
---|
| 283 | print "***", self.output.getvalue(), "***" |
---|
| 284 | |
---|
| 285 | # return the concatenated results. |
---|
| 286 | return StringIO(self.output.getvalue()) |
---|
| 287 | |
---|
| 288 | def sendall(self, str): |
---|
| 289 | """ |
---|
| 290 | Save all the traffic to self.inp. |
---|
| 291 | """ |
---|
| 292 | if debuglevel >= 2: |
---|
| 293 | print ">>>", str, ">>>" |
---|
| 294 | |
---|
| 295 | self.inp.write(str) |
---|
| 296 | |
---|
| 297 | def close(self): |
---|
| 298 | "Do nothing, for now." |
---|
| 299 | pass |
---|
| 300 | |
---|
| 301 | # |
---|
| 302 | # WSGI_HTTPConnection |
---|
| 303 | # |
---|
| 304 | |
---|
| 305 | class WSGI_HTTPConnection(HTTPConnection): |
---|
| 306 | """ |
---|
| 307 | Intercept all traffic to certain hosts & redirect into a WSGI |
---|
| 308 | application object. |
---|
| 309 | """ |
---|
| 310 | def get_app(self, host, port): |
---|
| 311 | """ |
---|
| 312 | Return the app object for the given (host, port). |
---|
| 313 | """ |
---|
| 314 | key = (host, int(port)) |
---|
| 315 | |
---|
| 316 | app, script_name = None, None |
---|
| 317 | |
---|
| 318 | if _wsgi_intercept.has_key(key): |
---|
| 319 | (app_fn, script_name) = _wsgi_intercept[key] |
---|
| 320 | app = app_fn() |
---|
| 321 | |
---|
| 322 | return app, script_name |
---|
| 323 | |
---|
| 324 | def connect(self): |
---|
| 325 | """ |
---|
| 326 | Override the connect() function to intercept calls to certain |
---|
| 327 | host/ports. |
---|
| 328 | """ |
---|
| 329 | if debuglevel: |
---|
| 330 | sys.stderr.write('connect: %s, %s\n' % (self.host, self.port,)) |
---|
| 331 | |
---|
| 332 | try: |
---|
| 333 | (app, script_name) = self.get_app(self.host, self.port) |
---|
| 334 | if app: |
---|
| 335 | if debuglevel: |
---|
| 336 | sys.stderr.write('INTERCEPTING call to %s:%s\n' % \ |
---|
| 337 | (self.host, self.port,)) |
---|
| 338 | self.sock = wsgi_fake_socket(app, self.host, self.port, |
---|
| 339 | script_name) |
---|
| 340 | else: |
---|
| 341 | HTTPConnection.connect(self) |
---|
| 342 | |
---|
| 343 | except Exception, e: |
---|
| 344 | if debuglevel: # intercept & print out tracebacks |
---|
| 345 | traceback.print_exc() |
---|
| 346 | raise |
---|
| 347 | |
---|
| 348 | ### DEBUGGING CODE -- to help me figure out communications stuff. ### |
---|
| 349 | |
---|
| 350 | # (ignore me, please) |
---|
| 351 | |
---|
| 352 | ''' |
---|
| 353 | import socket |
---|
| 354 | |
---|
| 355 | class file_wrapper: |
---|
| 356 | def __init__(self, fp): |
---|
| 357 | self.fp = fp |
---|
| 358 | |
---|
| 359 | def readline(self): |
---|
| 360 | d = self.fp.readline() |
---|
| 361 | if debuglevel: |
---|
| 362 | print 'file_wrapper readline:', d |
---|
| 363 | return d |
---|
| 364 | |
---|
| 365 | def __iter__(self): |
---|
| 366 | return self |
---|
| 367 | |
---|
| 368 | def next(self): |
---|
| 369 | d = self.fp.next() |
---|
| 370 | if debuglevel: |
---|
| 371 | print 'file_wrapper next:', d |
---|
| 372 | return d |
---|
| 373 | |
---|
| 374 | def read(self, *args): |
---|
| 375 | d = self.fp.read(*args) |
---|
| 376 | if debuglevel: |
---|
| 377 | print 'file_wrapper read:', d |
---|
| 378 | return d |
---|
| 379 | |
---|
| 380 | def close(self): |
---|
| 381 | if debuglevel: |
---|
| 382 | print 'file_wrapper close' |
---|
| 383 | self.fp.close() |
---|
| 384 | |
---|
| 385 | class intercept_socket: |
---|
| 386 | """ |
---|
| 387 | A socket that intercepts everything written to it & read from it. |
---|
| 388 | """ |
---|
| 389 | |
---|
| 390 | def __init__(self): |
---|
| 391 | for res in socket.getaddrinfo("floating.caltech.edu", 80, 0, |
---|
| 392 | socket.SOCK_STREAM): |
---|
| 393 | af, socktype, proto, canonname, sa = res |
---|
| 394 | self.sock = socket.socket(af, socktype, proto) |
---|
| 395 | self._open = True |
---|
| 396 | self.sock.connect(sa) |
---|
| 397 | break |
---|
| 398 | |
---|
| 399 | def makefile(self, *args, **kwargs): |
---|
| 400 | fp = self.sock.makefile('rb', 0) |
---|
| 401 | return file_wrapper(fp) |
---|
| 402 | |
---|
| 403 | def sendall(self, str): |
---|
| 404 | if not self._open: |
---|
| 405 | raise Exception |
---|
| 406 | |
---|
| 407 | return self.sock.sendall(str) |
---|
| 408 | |
---|
| 409 | def close(self): |
---|
| 410 | self._open = False |
---|
| 411 | ''' |
---|