| 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
|---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
|---|
| 3 | """ |
|---|
| 4 | An application that proxies WSGI requests to a remote server. |
|---|
| 5 | |
|---|
| 6 | TODO: |
|---|
| 7 | |
|---|
| 8 | * Send ``Via`` header? It's not clear to me this is a Via in the |
|---|
| 9 | style of a typical proxy. |
|---|
| 10 | |
|---|
| 11 | * Other headers or metadata? I put in X-Forwarded-For, but that's it. |
|---|
| 12 | |
|---|
| 13 | * Signed data of non-HTTP keys? This would be for things like |
|---|
| 14 | REMOTE_USER. |
|---|
| 15 | |
|---|
| 16 | * Something to indicate what the original URL was? The original host, |
|---|
| 17 | scheme, and base path. |
|---|
| 18 | |
|---|
| 19 | * Rewriting ``Location`` headers? mod_proxy does this. |
|---|
| 20 | |
|---|
| 21 | * Rewriting body? (Probably not on this one -- that can be done with |
|---|
| 22 | a different middleware that wraps this middleware) |
|---|
| 23 | |
|---|
| 24 | * Example:: |
|---|
| 25 | |
|---|
| 26 | use = egg:Paste#proxy |
|---|
| 27 | address = http://server3:8680/exist/rest/db/orgs/sch/config/ |
|---|
| 28 | allowed_request_methods = GET |
|---|
| 29 | |
|---|
| 30 | """ |
|---|
| 31 | |
|---|
| 32 | import httplib |
|---|
| 33 | import urlparse |
|---|
| 34 | import urllib |
|---|
| 35 | |
|---|
| 36 | from paste import httpexceptions |
|---|
| 37 | from paste.util.converters import aslist |
|---|
| 38 | |
|---|
| 39 | # Remove these headers from response (specify lower case header |
|---|
| 40 | # names): |
|---|
| 41 | filtered_headers = ( |
|---|
| 42 | 'transfer-encoding', |
|---|
| 43 | ) |
|---|
| 44 | |
|---|
| 45 | class Proxy(object): |
|---|
| 46 | |
|---|
| 47 | def __init__(self, address, allowed_request_methods=(), |
|---|
| 48 | suppress_http_headers=()): |
|---|
| 49 | self.address = address |
|---|
| 50 | self.parsed = urlparse.urlsplit(address) |
|---|
| 51 | self.scheme = self.parsed[0].lower() |
|---|
| 52 | self.host = self.parsed[1] |
|---|
| 53 | self.path = self.parsed[2] |
|---|
| 54 | self.allowed_request_methods = [ |
|---|
| 55 | x.lower() for x in allowed_request_methods if x] |
|---|
| 56 | |
|---|
| 57 | self.suppress_http_headers = [ |
|---|
| 58 | x.lower() for x in suppress_http_headers if x] |
|---|
| 59 | |
|---|
| 60 | def __call__(self, environ, start_response): |
|---|
| 61 | if (self.allowed_request_methods and |
|---|
| 62 | environ['REQUEST_METHOD'].lower() not in self.allowed_request_methods): |
|---|
| 63 | return httpexceptions.HTTPBadRequest("Disallowed")(environ, start_response) |
|---|
| 64 | |
|---|
| 65 | if self.scheme == 'http': |
|---|
| 66 | ConnClass = httplib.HTTPConnection |
|---|
| 67 | elif self.scheme == 'https': |
|---|
| 68 | ConnClass = httplib.HTTPSConnection |
|---|
| 69 | else: |
|---|
| 70 | raise ValueError( |
|---|
| 71 | "Unknown scheme for %r: %r" % (self.address, self.scheme)) |
|---|
| 72 | conn = ConnClass(self.host) |
|---|
| 73 | headers = {} |
|---|
| 74 | for key, value in environ.items(): |
|---|
| 75 | if key.startswith('HTTP_'): |
|---|
| 76 | key = key[5:].lower().replace('_', '-') |
|---|
| 77 | if key == 'host' or key in self.suppress_http_headers: |
|---|
| 78 | continue |
|---|
| 79 | headers[key] = value |
|---|
| 80 | headers['host'] = self.host |
|---|
| 81 | if 'REMOTE_ADDR' in environ: |
|---|
| 82 | headers['x-forwarded-for'] = environ['REMOTE_ADDR'] |
|---|
| 83 | if environ.get('CONTENT_TYPE'): |
|---|
| 84 | headers['content-type'] = environ['CONTENT_TYPE'] |
|---|
| 85 | if environ.get('CONTENT_LENGTH'): |
|---|
| 86 | length = int(environ['CONTENT_LENGTH']) |
|---|
| 87 | body = environ['wsgi.input'].read(length) |
|---|
| 88 | else: |
|---|
| 89 | body = '' |
|---|
| 90 | |
|---|
| 91 | path_info = urllib.quote(environ['PATH_INFO']) |
|---|
| 92 | if self.path: |
|---|
| 93 | request_path = path_info |
|---|
| 94 | if request_path[0] == '/': |
|---|
| 95 | request_path = request_path[1:] |
|---|
| 96 | |
|---|
| 97 | path = urlparse.urljoin(self.path, request_path) |
|---|
| 98 | else: |
|---|
| 99 | path = path_info |
|---|
| 100 | |
|---|
| 101 | conn.request(environ['REQUEST_METHOD'], |
|---|
| 102 | path, |
|---|
| 103 | body, headers) |
|---|
| 104 | res = conn.getresponse() |
|---|
| 105 | headers_out = parse_headers(res.msg) |
|---|
| 106 | |
|---|
| 107 | status = '%s %s' % (res.status, res.reason) |
|---|
| 108 | start_response(status, headers_out) |
|---|
| 109 | # @@: Default? |
|---|
| 110 | length = res.getheader('content-length') |
|---|
| 111 | if length is not None: |
|---|
| 112 | body = res.read(int(length)) |
|---|
| 113 | else: |
|---|
| 114 | body = res.read() |
|---|
| 115 | conn.close() |
|---|
| 116 | return [body] |
|---|
| 117 | |
|---|
| 118 | def make_proxy(global_conf, address, allowed_request_methods="", |
|---|
| 119 | suppress_http_headers=""): |
|---|
| 120 | """ |
|---|
| 121 | Make a WSGI application that proxies to another address: |
|---|
| 122 | |
|---|
| 123 | ``address`` |
|---|
| 124 | the full URL ending with a trailing ``/`` |
|---|
| 125 | |
|---|
| 126 | ``allowed_request_methods``: |
|---|
| 127 | a space seperated list of request methods (e.g., ``GET POST``) |
|---|
| 128 | |
|---|
| 129 | ``suppress_http_headers`` |
|---|
| 130 | a space seperated list of http headers (lower case, without |
|---|
| 131 | the leading ``http_``) that should not be passed on to target |
|---|
| 132 | host |
|---|
| 133 | """ |
|---|
| 134 | allowed_request_methods = aslist(allowed_request_methods) |
|---|
| 135 | suppress_http_headers = aslist(suppress_http_headers) |
|---|
| 136 | return Proxy( |
|---|
| 137 | address, |
|---|
| 138 | allowed_request_methods=allowed_request_methods, |
|---|
| 139 | suppress_http_headers=suppress_http_headers) |
|---|
| 140 | |
|---|
| 141 | |
|---|
| 142 | class TransparentProxy(object): |
|---|
| 143 | |
|---|
| 144 | """ |
|---|
| 145 | A proxy that sends the request just as it was given, including |
|---|
| 146 | respecting HTTP_HOST, wsgi.url_scheme, etc. |
|---|
| 147 | |
|---|
| 148 | This is a way of translating WSGI requests directly to real HTTP |
|---|
| 149 | requests. All information goes in the environment; modify it to |
|---|
| 150 | modify the way the request is made. |
|---|
| 151 | |
|---|
| 152 | If you specify ``force_host`` (and optionally ``force_scheme``) |
|---|
| 153 | then HTTP_HOST won't be used to determine where to connect to; |
|---|
| 154 | instead a specific host will be connected to, but the ``Host`` |
|---|
| 155 | header in the request will remain intact. |
|---|
| 156 | """ |
|---|
| 157 | |
|---|
| 158 | def __init__(self, force_host=None, |
|---|
| 159 | force_scheme='http'): |
|---|
| 160 | self.force_host = force_host |
|---|
| 161 | self.force_scheme = force_scheme |
|---|
| 162 | |
|---|
| 163 | def __repr__(self): |
|---|
| 164 | return '<%s %s force_host=%r force_scheme=%r>' % ( |
|---|
| 165 | self.__class__.__name__, |
|---|
| 166 | hex(id(self)), |
|---|
| 167 | self.force_host, self.force_scheme) |
|---|
| 168 | |
|---|
| 169 | def __call__(self, environ, start_response): |
|---|
| 170 | scheme = environ['wsgi.url_scheme'] |
|---|
| 171 | if self.force_host is None: |
|---|
| 172 | conn_scheme = scheme |
|---|
| 173 | else: |
|---|
| 174 | conn_scheme = self.force_scheme |
|---|
| 175 | if conn_scheme == 'http': |
|---|
| 176 | ConnClass = httplib.HTTPConnection |
|---|
| 177 | elif conn_scheme == 'https': |
|---|
| 178 | ConnClass = httplib.HTTPSConnection |
|---|
| 179 | else: |
|---|
| 180 | raise ValueError( |
|---|
| 181 | "Unknown scheme %r" % scheme) |
|---|
| 182 | if 'HTTP_HOST' not in environ: |
|---|
| 183 | raise ValueError( |
|---|
| 184 | "WSGI environ must contain an HTTP_HOST key") |
|---|
| 185 | host = environ['HTTP_HOST'] |
|---|
| 186 | if self.force_host is None: |
|---|
| 187 | conn_host = host |
|---|
| 188 | else: |
|---|
| 189 | conn_host = self.force_host |
|---|
| 190 | conn = ConnClass(conn_host) |
|---|
| 191 | headers = {} |
|---|
| 192 | for key, value in environ.items(): |
|---|
| 193 | if key.startswith('HTTP_'): |
|---|
| 194 | key = key[5:].lower().replace('_', '-') |
|---|
| 195 | headers[key] = value |
|---|
| 196 | headers['host'] = host |
|---|
| 197 | if 'REMOTE_ADDR' in environ and 'HTTP_X_FORWARDED_FOR' not in environ: |
|---|
| 198 | headers['x-forwarded-for'] = environ['REMOTE_ADDR'] |
|---|
| 199 | if environ.get('CONTENT_TYPE'): |
|---|
| 200 | headers['content-type'] = environ['CONTENT_TYPE'] |
|---|
| 201 | if environ.get('CONTENT_LENGTH'): |
|---|
| 202 | length = int(environ['CONTENT_LENGTH']) |
|---|
| 203 | body = environ['wsgi.input'].read(length) |
|---|
| 204 | elif 'CONTENT_LENGTH' not in environ: |
|---|
| 205 | body = '' |
|---|
| 206 | length = 0 |
|---|
| 207 | else: |
|---|
| 208 | body = '' |
|---|
| 209 | length = 0 |
|---|
| 210 | |
|---|
| 211 | path = (environ.get('SCRIPT_NAME', '') |
|---|
| 212 | + environ.get('PATH_INFO', '')) |
|---|
| 213 | path = urllib.quote(path) |
|---|
| 214 | if 'QUERY_STRING' in environ: |
|---|
| 215 | path += '?' + environ['QUERY_STRING'] |
|---|
| 216 | conn.request(environ['REQUEST_METHOD'], |
|---|
| 217 | path, body, headers) |
|---|
| 218 | res = conn.getresponse() |
|---|
| 219 | headers_out = parse_headers(res.msg) |
|---|
| 220 | |
|---|
| 221 | status = '%s %s' % (res.status, res.reason) |
|---|
| 222 | start_response(status, headers_out) |
|---|
| 223 | # @@: Default? |
|---|
| 224 | length = res.getheader('content-length') |
|---|
| 225 | if length is not None: |
|---|
| 226 | body = res.read(int(length)) |
|---|
| 227 | else: |
|---|
| 228 | body = res.read() |
|---|
| 229 | conn.close() |
|---|
| 230 | return [body] |
|---|
| 231 | |
|---|
| 232 | def parse_headers(message): |
|---|
| 233 | """ |
|---|
| 234 | Turn a Message object into a list of WSGI-style headers. |
|---|
| 235 | """ |
|---|
| 236 | headers_out = [] |
|---|
| 237 | for full_header in message.headers: |
|---|
| 238 | if not full_header: |
|---|
| 239 | # Shouldn't happen, but we'll just ignore |
|---|
| 240 | continue |
|---|
| 241 | if full_header[0].isspace(): |
|---|
| 242 | # Continuation line, add to the last header |
|---|
| 243 | if not headers_out: |
|---|
| 244 | raise ValueError( |
|---|
| 245 | "First header starts with a space (%r)" % full_header) |
|---|
| 246 | last_header, last_value = headers_out.pop() |
|---|
| 247 | value = last_value + ' ' + full_header.strip() |
|---|
| 248 | headers_out.append((last_header, value)) |
|---|
| 249 | continue |
|---|
| 250 | try: |
|---|
| 251 | header, value = full_header.split(':', 1) |
|---|
| 252 | except: |
|---|
| 253 | raise ValueError("Invalid header: %r" % full_header) |
|---|
| 254 | value = value.strip() |
|---|
| 255 | if header.lower() not in filtered_headers: |
|---|
| 256 | headers_out.append((header, value)) |
|---|
| 257 | return headers_out |
|---|
| 258 | |
|---|
| 259 | def make_transparent_proxy( |
|---|
| 260 | global_conf, force_host=None, force_scheme='http'): |
|---|
| 261 | """ |
|---|
| 262 | Create a proxy that connects to a specific host, but does |
|---|
| 263 | absolutely no other filtering, including the Host header. |
|---|
| 264 | """ |
|---|
| 265 | return TransparentProxy(force_host=force_host, |
|---|
| 266 | force_scheme=force_scheme) |
|---|