| 1 | """ |
|---|
| 2 | wsgi_intercept.WSGI_HTTPConnection is a replacement for |
|---|
| 3 | httplib.HTTPConnection that intercepts certain HTTP connections into a |
|---|
| 4 | WSGI application. |
|---|
| 5 | |
|---|
| 6 | Use 'add_wsgi_intercept' and 'remove_wsgi_intercept' to control this behavior. |
|---|
| 7 | """ |
|---|
| 8 | |
|---|
| 9 | import sys |
|---|
| 10 | from httplib import HTTPConnection |
|---|
| 11 | import urllib |
|---|
| 12 | from cStringIO import StringIO |
|---|
| 13 | import traceback |
|---|
| 14 | |
|---|
| 15 | debuglevel = 0 |
|---|
| 16 | # 1 basic |
|---|
| 17 | # 2 verbose |
|---|
| 18 | |
|---|
| 19 | #### |
|---|
| 20 | |
|---|
| 21 | # |
|---|
| 22 | # Specify which hosts/ports to target for interception to a given WSGI app. |
|---|
| 23 | # |
|---|
| 24 | # For simplicity's sake, intercept ENTIRE host/port combinations; |
|---|
| 25 | # intercepting only specific URL subtrees gets complicated, because we don't |
|---|
| 26 | # have that information in the HTTPConnection.connect() function that does the |
|---|
| 27 | # redirection. |
|---|
| 28 | # |
|---|
| 29 | # format: key=(host, port), value=(create_app, top_url) |
|---|
| 30 | # |
|---|
| 31 | # (top_url becomes the SCRIPT_NAME) |
|---|
| 32 | |
|---|
| 33 | _wsgi_intercept = {} |
|---|
| 34 | |
|---|
| 35 | def add_wsgi_intercept(host, port, app_create_fn, script_name=''): |
|---|
| 36 | """ |
|---|
| 37 | Add a WSGI intercept call for host:port, using the app returned |
|---|
| 38 | by app_create_fn with a SCRIPT_NAME of 'script_name' (default ''). |
|---|
| 39 | """ |
|---|
| 40 | _wsgi_intercept[(host, port)] = (app_create_fn, script_name) |
|---|
| 41 | |
|---|
| 42 | def remove_wsgi_intercept(host, port): |
|---|
| 43 | """ |
|---|
| 44 | Remove the WSGI intercept call for (host, port). |
|---|
| 45 | """ |
|---|
| 46 | key = (host, port) |
|---|
| 47 | if _wsgi_intercept.has_key(key): |
|---|
| 48 | del _wsgi_intercept[key] |
|---|
| 49 | |
|---|
| 50 | # |
|---|
| 51 | # make_environ: behave like a Web server. Take in 'input', and behave |
|---|
| 52 | # as if you're bound to 'host' and 'port'; build an environment dict |
|---|
| 53 | # for the WSGI app. |
|---|
| 54 | # |
|---|
| 55 | # This is where the magic happens, folks. |
|---|
| 56 | # |
|---|
| 57 | |
|---|
| 58 | def make_environ(inp, host, port, script_name): |
|---|
| 59 | """ |
|---|
| 60 | Take 'inp' as if it were HTTP-speak being received on host:port, |
|---|
| 61 | and parse it into a WSGI-ok environment dictionary. Return the |
|---|
| 62 | dictionary. |
|---|
| 63 | |
|---|
| 64 | Set 'SCRIPT_NAME' from the 'script_name' input, and, if present, |
|---|
| 65 | remove it from the beginning of the PATH_INFO variable. |
|---|
| 66 | """ |
|---|
| 67 | # |
|---|
| 68 | # parse the input up to the first blank line (or its end). |
|---|
| 69 | # |
|---|
| 70 | |
|---|
| 71 | environ = {} |
|---|
| 72 | |
|---|
| 73 | method_line = inp.readline() |
|---|
| 74 | |
|---|
| 75 | content_type = None |
|---|
| 76 | content_length = None |
|---|
| 77 | cookies = [] |
|---|
| 78 | |
|---|
| 79 | for line in inp: |
|---|
| 80 | if not line.strip(): |
|---|
| 81 | break |
|---|
| 82 | |
|---|
| 83 | k, v = line.strip().split(':', 1) |
|---|
| 84 | v = v.lstrip() |
|---|
| 85 | |
|---|
| 86 | # |
|---|
| 87 | # take care of special headers, and for the rest, put them |
|---|
| 88 | # into the environ with HTTP_ in front. |
|---|
| 89 | # |
|---|
| 90 | |
|---|
| 91 | if k.lower() == 'content-type': |
|---|
| 92 | content_type = v |
|---|
| 93 | elif k.lower() == 'content-length': |
|---|
| 94 | content_length = v |
|---|
| 95 | elif k.lower() == 'cookie' or k.lower() == 'cookie2': |
|---|
| 96 | cookies.append(v) |
|---|
| 97 | else: |
|---|
| 98 | h = k.upper() |
|---|
| 99 | h = h.replace('-', '_') |
|---|
| 100 | environ['HTTP_' + h] = v |
|---|
| 101 | |
|---|
| 102 | if debuglevel >= 2: |
|---|
| 103 | print 'HEADER:', k, v |
|---|
| 104 | |
|---|
| 105 | # |
|---|
| 106 | # decode the method line |
|---|
| 107 | # |
|---|
| 108 | |
|---|
| 109 | if debuglevel >= 2: |
|---|
| 110 | print 'METHOD LINE:', method_line |
|---|
| 111 | |
|---|
| 112 | method, url, protocol = method_line.split(' ') |
|---|
| 113 | |
|---|
| 114 | # clean the script_name off of the url, if it's there. |
|---|
| 115 | if not url.startswith(script_name): |
|---|
| 116 | script_name = '' # @CTB what to do -- bad URL. scrap? |
|---|
| 117 | else: |
|---|
| 118 | url = url[len(script_name):] |
|---|
| 119 | |
|---|
| 120 | url = url.split('?', 1) |
|---|
| 121 | path_info = urllib.unquote_plus(url[0]) |
|---|
| 122 | query_string = "" |
|---|
| 123 | if len(url) == 2: |
|---|
| 124 | query_string = urllib.unquote_plus(url[1]) |
|---|
| 125 | |
|---|
| 126 | if debuglevel: |
|---|
| 127 | print "method: %s; script_name: %s; path_info: %s; query_string: %s" % (method, script_name, path_info, query_string) |
|---|
| 128 | |
|---|
| 129 | r = inp.read() |
|---|
| 130 | inp = StringIO(r) |
|---|
| 131 | |
|---|
| 132 | # |
|---|
| 133 | # fill out our dictionary. |
|---|
| 134 | # |
|---|
| 135 | |
|---|
| 136 | environ.update({ "wsgi.version" : (1,0), |
|---|
| 137 | "wsgi.url_scheme": "http", |
|---|
| 138 | "wsgi.input" : inp, # to read for POSTs |
|---|
| 139 | "wsgi.errors" : StringIO(), |
|---|
| 140 | "wsgi.multithread" : 0, |
|---|
| 141 | "wsgi.multiprocess" : 0, |
|---|
| 142 | "wsgi.run_once" : 0, |
|---|
| 143 | |
|---|
| 144 | "REQUEST_METHOD" : method, |
|---|
| 145 | "SCRIPT_NAME" : script_name, |
|---|
| 146 | "PATH_INFO" : path_info, |
|---|
| 147 | |
|---|
| 148 | "SERVER_NAME" : host, |
|---|
| 149 | "SERVER_PORT" : str(port), |
|---|
| 150 | "SERVER_PROTOCOL" : protocol, |
|---|
| 151 | |
|---|
| 152 | "REMOTE_ADDR" : '127.0.0.1', |
|---|
| 153 | }) |
|---|
| 154 | |
|---|
| 155 | # |
|---|
| 156 | # query_string, content_type & length are optional. |
|---|
| 157 | # |
|---|
| 158 | |
|---|
| 159 | if query_string: |
|---|
| 160 | environ['QUERY_STRING'] = query_string |
|---|
| 161 | else: |
|---|
| 162 | environ['QUERY_STRING'] = '' |
|---|
| 163 | |
|---|
| 164 | if content_type: |
|---|
| 165 | environ['CONTENT_TYPE'] = content_type |
|---|
| 166 | if debuglevel >= 2: |
|---|
| 167 | print 'CONTENT-TYPE:', content_type |
|---|
| 168 | if content_length: |
|---|
| 169 | environ['CONTENT_LENGTH'] = content_length |
|---|
| 170 | if debuglevel >= 2: |
|---|
| 171 | print 'CONTENT-LENGTH:', content_length |
|---|
| 172 | |
|---|
| 173 | # |
|---|
| 174 | # handle cookies. |
|---|
| 175 | # |
|---|
| 176 | if cookies: |
|---|
| 177 | environ['HTTP_COOKIE'] = "; ".join(cookies) |
|---|
| 178 | |
|---|
| 179 | if debuglevel: |
|---|
| 180 | print 'WSGI environ dictionary:', environ |
|---|
| 181 | |
|---|
| 182 | return environ |
|---|
| 183 | |
|---|
| 184 | # |
|---|
| 185 | # fake socket for WSGI intercept stuff. |
|---|
| 186 | # |
|---|
| 187 | |
|---|
| 188 | class wsgi_fake_socket: |
|---|
| 189 | """ |
|---|
| 190 | Handle HTTP traffic and stuff into a WSGI application object instead. |
|---|
| 191 | |
|---|
| 192 | Note that this class assumes: |
|---|
| 193 | |
|---|
| 194 | 1. 'makefile' is called (by the response class) only after all of the |
|---|
| 195 | data has been sent to the socket by the request class; |
|---|
| 196 | 2. non-persistent (i.e. non-HTTP/1.1) connections. |
|---|
| 197 | """ |
|---|
| 198 | def __init__(self, app, host, port, script_name): |
|---|
| 199 | self.app = app # WSGI app object |
|---|
| 200 | self.host = host |
|---|
| 201 | self.port = port |
|---|
| 202 | self.script_name = script_name # SCRIPT_NAME (app mount point) |
|---|
| 203 | |
|---|
| 204 | self.inp = StringIO() # stuff written into this "socket" |
|---|
| 205 | self.write_results = [] # results from the 'write_fn' |
|---|
| 206 | self.results = None # results from running the app |
|---|
| 207 | self.output = StringIO() # all output from the app, incl headers |
|---|
| 208 | |
|---|
| 209 | def makefile(self, *args, **kwargs): |
|---|
| 210 | """ |
|---|
| 211 | 'makefile' is called by the HTTPResponse class once all of the |
|---|
| 212 | data has been written. So, in this interceptor class, we need to: |
|---|
| 213 | |
|---|
| 214 | 1. build a start_response function that grabs all the headers |
|---|
| 215 | returned by the WSGI app; |
|---|
| 216 | 2. create a wsgi.input file object 'inp', containing all of the |
|---|
| 217 | traffic; |
|---|
| 218 | 3. build an environment dict out of the traffic in inp; |
|---|
| 219 | 4. run the WSGI app & grab the result object; |
|---|
| 220 | 5. concatenate & return the result(s) read from the result object. |
|---|
| 221 | """ |
|---|
| 222 | |
|---|
| 223 | # dynamically construct the start_response function for no good reason. |
|---|
| 224 | |
|---|
| 225 | def start_response(status, headers, exc_info=None): |
|---|
| 226 | # construct the HTTP request. |
|---|
| 227 | self.output.write("HTTP/1.0 " + status + "\n") |
|---|
| 228 | |
|---|
| 229 | for k, v in headers: |
|---|
| 230 | self.output.write('%s: %s\n' % (k, v,)) |
|---|
| 231 | self.output.write('\n') |
|---|
| 232 | |
|---|
| 233 | def write_fn(s): |
|---|
| 234 | self.write_results.append(s) |
|---|
| 235 | return write_fn |
|---|
| 236 | |
|---|
| 237 | # construct the wsgi.input file from everything that's been |
|---|
| 238 | # written to this "socket". |
|---|
| 239 | inp = StringIO(self.inp.getvalue()) |
|---|
| 240 | |
|---|
| 241 | # build the environ dictionary. |
|---|
| 242 | environ = make_environ(inp, self.host, self.port, self.script_name) |
|---|
| 243 | |
|---|
| 244 | # run the application. |
|---|
| 245 | app_result = self.app(environ, start_response) |
|---|
| 246 | self.result = iter(app_result) |
|---|
| 247 | |
|---|
| 248 | ### |
|---|
| 249 | |
|---|
| 250 | # read all of the results. the trick here is to get the *first* |
|---|
| 251 | # bit of data from the app via the generator, *then* grab & return |
|---|
| 252 | # the data passed back from the 'write' function, and then return |
|---|
| 253 | # the generator data. this is because the 'write' fn doesn't |
|---|
| 254 | # necessarily get called until the first result is requested from |
|---|
| 255 | # the app function. |
|---|
| 256 | # |
|---|
| 257 | # see twill tests, 'test_wrapper_intercept' for a test that breaks |
|---|
| 258 | # if this is done incorrectly. |
|---|
| 259 | |
|---|
| 260 | try: |
|---|
| 261 | generator_data = None |
|---|
| 262 | try: |
|---|
| 263 | generator_data = self.result.next() |
|---|
| 264 | |
|---|
| 265 | finally: |
|---|
| 266 | for data in self.write_results: |
|---|
| 267 | self.output.write(data) |
|---|
| 268 | |
|---|
| 269 | if generator_data: |
|---|
| 270 | self.output.write(generator_data) |
|---|
| 271 | |
|---|
| 272 | while 1: |
|---|
| 273 | data = self.result.next() |
|---|
| 274 | self.output.write(data) |
|---|
| 275 | |
|---|
| 276 | except StopIteration: |
|---|
| 277 | pass |
|---|
| 278 | |
|---|
| 279 | if hasattr(app_result, 'close'): |
|---|
| 280 | app_result.close() |
|---|
| 281 | |
|---|
| 282 | if debuglevel >= 2: |
|---|
| 283 | print "***", self.output.getvalue(), "***" |
|---|
| 284 | |
|---|
| 285 | # return the concatenated results. |
|---|
| 286 | return StringIO(self.output.getvalue()) |
|---|
| 287 | |
|---|
| 288 | def sendall(self, str): |
|---|
| 289 | """ |
|---|
| 290 | Save all the traffic to self.inp. |
|---|
| 291 | """ |
|---|
| 292 | if debuglevel >= 2: |
|---|
| 293 | print ">>>", str, ">>>" |
|---|
| 294 | |
|---|
| 295 | self.inp.write(str) |
|---|
| 296 | |
|---|
| 297 | def close(self): |
|---|
| 298 | "Do nothing, for now." |
|---|
| 299 | pass |
|---|
| 300 | |
|---|
| 301 | # |
|---|
| 302 | # WSGI_HTTPConnection |
|---|
| 303 | # |
|---|
| 304 | |
|---|
| 305 | class WSGI_HTTPConnection(HTTPConnection): |
|---|
| 306 | """ |
|---|
| 307 | Intercept all traffic to certain hosts & redirect into a WSGI |
|---|
| 308 | application object. |
|---|
| 309 | """ |
|---|
| 310 | def get_app(self, host, port): |
|---|
| 311 | """ |
|---|
| 312 | Return the app object for the given (host, port). |
|---|
| 313 | """ |
|---|
| 314 | key = (host, int(port)) |
|---|
| 315 | |
|---|
| 316 | app, script_name = None, None |
|---|
| 317 | |
|---|
| 318 | if _wsgi_intercept.has_key(key): |
|---|
| 319 | (app_fn, script_name) = _wsgi_intercept[key] |
|---|
| 320 | app = app_fn() |
|---|
| 321 | |
|---|
| 322 | return app, script_name |
|---|
| 323 | |
|---|
| 324 | def connect(self): |
|---|
| 325 | """ |
|---|
| 326 | Override the connect() function to intercept calls to certain |
|---|
| 327 | host/ports. |
|---|
| 328 | """ |
|---|
| 329 | if debuglevel: |
|---|
| 330 | sys.stderr.write('connect: %s, %s\n' % (self.host, self.port,)) |
|---|
| 331 | |
|---|
| 332 | try: |
|---|
| 333 | (app, script_name) = self.get_app(self.host, self.port) |
|---|
| 334 | if app: |
|---|
| 335 | if debuglevel: |
|---|
| 336 | sys.stderr.write('INTERCEPTING call to %s:%s\n' % \ |
|---|
| 337 | (self.host, self.port,)) |
|---|
| 338 | self.sock = wsgi_fake_socket(app, self.host, self.port, |
|---|
| 339 | script_name) |
|---|
| 340 | else: |
|---|
| 341 | HTTPConnection.connect(self) |
|---|
| 342 | |
|---|
| 343 | except Exception, e: |
|---|
| 344 | if debuglevel: # intercept & print out tracebacks |
|---|
| 345 | traceback.print_exc() |
|---|
| 346 | raise |
|---|
| 347 | |
|---|
| 348 | ### DEBUGGING CODE -- to help me figure out communications stuff. ### |
|---|
| 349 | |
|---|
| 350 | # (ignore me, please) |
|---|
| 351 | |
|---|
| 352 | ''' |
|---|
| 353 | import socket |
|---|
| 354 | |
|---|
| 355 | class file_wrapper: |
|---|
| 356 | def __init__(self, fp): |
|---|
| 357 | self.fp = fp |
|---|
| 358 | |
|---|
| 359 | def readline(self): |
|---|
| 360 | d = self.fp.readline() |
|---|
| 361 | if debuglevel: |
|---|
| 362 | print 'file_wrapper readline:', d |
|---|
| 363 | return d |
|---|
| 364 | |
|---|
| 365 | def __iter__(self): |
|---|
| 366 | return self |
|---|
| 367 | |
|---|
| 368 | def next(self): |
|---|
| 369 | d = self.fp.next() |
|---|
| 370 | if debuglevel: |
|---|
| 371 | print 'file_wrapper next:', d |
|---|
| 372 | return d |
|---|
| 373 | |
|---|
| 374 | def read(self, *args): |
|---|
| 375 | d = self.fp.read(*args) |
|---|
| 376 | if debuglevel: |
|---|
| 377 | print 'file_wrapper read:', d |
|---|
| 378 | return d |
|---|
| 379 | |
|---|
| 380 | def close(self): |
|---|
| 381 | if debuglevel: |
|---|
| 382 | print 'file_wrapper close' |
|---|
| 383 | self.fp.close() |
|---|
| 384 | |
|---|
| 385 | class intercept_socket: |
|---|
| 386 | """ |
|---|
| 387 | A socket that intercepts everything written to it & read from it. |
|---|
| 388 | """ |
|---|
| 389 | |
|---|
| 390 | def __init__(self): |
|---|
| 391 | for res in socket.getaddrinfo("floating.caltech.edu", 80, 0, |
|---|
| 392 | socket.SOCK_STREAM): |
|---|
| 393 | af, socktype, proto, canonname, sa = res |
|---|
| 394 | self.sock = socket.socket(af, socktype, proto) |
|---|
| 395 | self._open = True |
|---|
| 396 | self.sock.connect(sa) |
|---|
| 397 | break |
|---|
| 398 | |
|---|
| 399 | def makefile(self, *args, **kwargs): |
|---|
| 400 | fp = self.sock.makefile('rb', 0) |
|---|
| 401 | return file_wrapper(fp) |
|---|
| 402 | |
|---|
| 403 | def sendall(self, str): |
|---|
| 404 | if not self._open: |
|---|
| 405 | raise Exception |
|---|
| 406 | |
|---|
| 407 | return self.sock.sendall(str) |
|---|
| 408 | |
|---|
| 409 | def close(self): |
|---|
| 410 | self._open = False |
|---|
| 411 | ''' |
|---|