[3] | 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
| 3 | # (c) 2005 Ian Bicking and contributors |
---|
| 4 | # This module is part of the Python Paste Project and is released under |
---|
| 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
---|
| 6 | """ |
---|
| 7 | This module provides helper routines with work directly on a WSGI |
---|
| 8 | environment to solve common requirements. |
---|
| 9 | |
---|
| 10 | * get_cookies(environ) |
---|
| 11 | * parse_querystring(environ) |
---|
| 12 | * parse_formvars(environ, include_get_vars=True) |
---|
| 13 | * construct_url(environ, with_query_string=True, with_path_info=True, |
---|
| 14 | script_name=None, path_info=None, querystring=None) |
---|
| 15 | * path_info_split(path_info) |
---|
| 16 | * path_info_pop(environ) |
---|
| 17 | * resolve_relative_url(url, environ) |
---|
| 18 | |
---|
| 19 | """ |
---|
| 20 | import cgi |
---|
| 21 | from Cookie import SimpleCookie |
---|
| 22 | from StringIO import StringIO |
---|
| 23 | import urlparse |
---|
| 24 | import urllib |
---|
| 25 | try: |
---|
| 26 | from UserDict import DictMixin |
---|
| 27 | except ImportError: |
---|
| 28 | from paste.util.UserDict24 import DictMixin |
---|
| 29 | from paste.util.multidict import MultiDict |
---|
| 30 | |
---|
| 31 | __all__ = ['get_cookies', 'get_cookie_dict', 'parse_querystring', |
---|
| 32 | 'parse_formvars', 'construct_url', 'path_info_split', |
---|
| 33 | 'path_info_pop', 'resolve_relative_url', 'EnvironHeaders'] |
---|
| 34 | |
---|
| 35 | def get_cookies(environ): |
---|
| 36 | """ |
---|
| 37 | Gets a cookie object (which is a dictionary-like object) from the |
---|
| 38 | request environment; caches this value in case get_cookies is |
---|
| 39 | called again for the same request. |
---|
| 40 | |
---|
| 41 | """ |
---|
| 42 | header = environ.get('HTTP_COOKIE', '') |
---|
| 43 | if environ.has_key('paste.cookies'): |
---|
| 44 | cookies, check_header = environ['paste.cookies'] |
---|
| 45 | if check_header == header: |
---|
| 46 | return cookies |
---|
| 47 | cookies = SimpleCookie() |
---|
| 48 | cookies.load(header) |
---|
| 49 | environ['paste.cookies'] = (cookies, header) |
---|
| 50 | return cookies |
---|
| 51 | |
---|
| 52 | def get_cookie_dict(environ): |
---|
| 53 | """Return a *plain* dictionary of cookies as found in the request. |
---|
| 54 | |
---|
| 55 | Unlike ``get_cookies`` this returns a dictionary, not a |
---|
| 56 | ``SimpleCookie`` object. For incoming cookies a dictionary fully |
---|
| 57 | represents the information. Like ``get_cookies`` this caches and |
---|
| 58 | checks the cache. |
---|
| 59 | """ |
---|
| 60 | header = environ.get('HTTP_COOKIE') |
---|
| 61 | if not header: |
---|
| 62 | return {} |
---|
| 63 | if environ.has_key('paste.cookies.dict'): |
---|
| 64 | cookies, check_header = environ['paste.cookies.dict'] |
---|
| 65 | if check_header == header: |
---|
| 66 | return cookies |
---|
| 67 | cookies = SimpleCookie() |
---|
| 68 | cookies.load(header) |
---|
| 69 | result = {} |
---|
| 70 | for name in cookies: |
---|
| 71 | result[name] = cookies[name].value |
---|
| 72 | environ['paste.cookies.dict'] = (result, header) |
---|
| 73 | return result |
---|
| 74 | |
---|
| 75 | def parse_querystring(environ): |
---|
| 76 | """ |
---|
| 77 | Parses a query string into a list like ``[(name, value)]``. |
---|
| 78 | Caches this value in case parse_querystring is called again |
---|
| 79 | for the same request. |
---|
| 80 | |
---|
| 81 | You can pass the result to ``dict()``, but be aware that keys that |
---|
| 82 | appear multiple times will be lost (only the last value will be |
---|
| 83 | preserved). |
---|
| 84 | |
---|
| 85 | """ |
---|
| 86 | source = environ.get('QUERY_STRING', '') |
---|
| 87 | if not source: |
---|
| 88 | return [] |
---|
| 89 | if 'paste.parsed_querystring' in environ: |
---|
| 90 | parsed, check_source = environ['paste.parsed_querystring'] |
---|
| 91 | if check_source == source: |
---|
| 92 | return parsed |
---|
| 93 | parsed = cgi.parse_qsl(source, keep_blank_values=True, |
---|
| 94 | strict_parsing=False) |
---|
| 95 | environ['paste.parsed_querystring'] = (parsed, source) |
---|
| 96 | return parsed |
---|
| 97 | |
---|
| 98 | def parse_dict_querystring(environ): |
---|
| 99 | """Parses a query string like parse_querystring, but returns a MultiDict |
---|
| 100 | |
---|
| 101 | Caches this value in case parse_dict_querystring is called again |
---|
| 102 | for the same request. |
---|
| 103 | |
---|
| 104 | Example:: |
---|
| 105 | |
---|
| 106 | >>> environ = {'QUERY_STRING': 'day=Monday&user=fred&user=jane'} |
---|
| 107 | >>> parsed = parse_dict_querystring(environ) |
---|
| 108 | |
---|
| 109 | >>> parsed['day'] |
---|
| 110 | 'Monday' |
---|
| 111 | >>> parsed['user'] |
---|
| 112 | 'fred' |
---|
| 113 | >>> parsed.getall('user') |
---|
| 114 | ['fred', 'jane'] |
---|
| 115 | |
---|
| 116 | """ |
---|
| 117 | source = environ.get('QUERY_STRING', '') |
---|
| 118 | if not source: |
---|
| 119 | return MultiDict() |
---|
| 120 | if 'paste.parsed_dict_querystring' in environ: |
---|
| 121 | parsed, check_source = environ['paste.parsed_dict_querystring'] |
---|
| 122 | if check_source == source: |
---|
| 123 | return parsed |
---|
| 124 | parsed = cgi.parse_qsl(source, keep_blank_values=True, |
---|
| 125 | strict_parsing=False) |
---|
| 126 | multi = MultiDict(parsed) |
---|
| 127 | environ['paste.parsed_dict_querystring'] = (multi, source) |
---|
| 128 | return multi |
---|
| 129 | |
---|
| 130 | def parse_formvars(environ, include_get_vars=True): |
---|
| 131 | """Parses the request, returning a MultiDict of form variables. |
---|
| 132 | |
---|
| 133 | If ``include_get_vars`` is true then GET (query string) variables |
---|
| 134 | will also be folded into the MultiDict. |
---|
| 135 | |
---|
| 136 | All values should be strings, except for file uploads which are |
---|
| 137 | left as ``FieldStorage`` instances. |
---|
| 138 | |
---|
| 139 | If the request was not a normal form request (e.g., a POST with an |
---|
| 140 | XML body) then ``environ['wsgi.input']`` won't be read. |
---|
| 141 | """ |
---|
| 142 | source = environ['wsgi.input'] |
---|
| 143 | if 'paste.parsed_formvars' in environ: |
---|
| 144 | parsed, check_source = environ['paste.parsed_formvars'] |
---|
| 145 | if check_source == source: |
---|
| 146 | if include_get_vars: |
---|
| 147 | parsed.update(parse_querystring(environ)) |
---|
| 148 | return parsed |
---|
| 149 | # @@: Shouldn't bother FieldStorage parsing during GET/HEAD and |
---|
| 150 | # fake_out_cgi requests |
---|
| 151 | type = environ.get('CONTENT_TYPE', '').lower() |
---|
| 152 | if ';' in type: |
---|
| 153 | type = type.split(';', 1)[0] |
---|
| 154 | fake_out_cgi = type not in ('', 'application/x-www-form-urlencoded', |
---|
| 155 | 'multipart/form-data') |
---|
| 156 | # FieldStorage assumes a default CONTENT_LENGTH of -1, but a |
---|
| 157 | # default of 0 is better: |
---|
| 158 | if not environ.get('CONTENT_LENGTH'): |
---|
| 159 | environ['CONTENT_LENGTH'] = '0' |
---|
| 160 | # Prevent FieldStorage from parsing QUERY_STRING during GET/HEAD |
---|
| 161 | # requests |
---|
| 162 | old_query_string = environ.get('QUERY_STRING','') |
---|
| 163 | environ['QUERY_STRING'] = '' |
---|
| 164 | if fake_out_cgi: |
---|
| 165 | input = StringIO('') |
---|
| 166 | old_content_type = environ.get('CONTENT_TYPE') |
---|
| 167 | old_content_length = environ.get('CONTENT_LENGTH') |
---|
| 168 | environ['CONTENT_LENGTH'] = '0' |
---|
| 169 | environ['CONTENT_TYPE'] = '' |
---|
| 170 | else: |
---|
| 171 | input = environ['wsgi.input'] |
---|
| 172 | fs = cgi.FieldStorage(fp=input, |
---|
| 173 | environ=environ, |
---|
| 174 | keep_blank_values=1) |
---|
| 175 | environ['QUERY_STRING'] = old_query_string |
---|
| 176 | if fake_out_cgi: |
---|
| 177 | environ['CONTENT_TYPE'] = old_content_type |
---|
| 178 | environ['CONTENT_LENGTH'] = old_content_length |
---|
| 179 | formvars = MultiDict() |
---|
| 180 | if isinstance(fs.value, list): |
---|
| 181 | for name in fs.keys(): |
---|
| 182 | values = fs[name] |
---|
| 183 | if not isinstance(values, list): |
---|
| 184 | values = [values] |
---|
| 185 | for value in values: |
---|
| 186 | if not value.filename: |
---|
| 187 | value = value.value |
---|
| 188 | formvars.add(name, value) |
---|
| 189 | environ['paste.parsed_formvars'] = (formvars, source) |
---|
| 190 | if include_get_vars: |
---|
| 191 | formvars.update(parse_querystring(environ)) |
---|
| 192 | return formvars |
---|
| 193 | |
---|
| 194 | def construct_url(environ, with_query_string=True, with_path_info=True, |
---|
| 195 | script_name=None, path_info=None, querystring=None): |
---|
| 196 | """Reconstructs the URL from the WSGI environment. |
---|
| 197 | |
---|
| 198 | You may override SCRIPT_NAME, PATH_INFO, and QUERYSTRING with |
---|
| 199 | the keyword arguments. |
---|
| 200 | |
---|
| 201 | """ |
---|
| 202 | url = environ['wsgi.url_scheme']+'://' |
---|
| 203 | |
---|
| 204 | if environ.get('HTTP_HOST'): |
---|
| 205 | host = environ['HTTP_HOST'] |
---|
| 206 | port = None |
---|
| 207 | if ':' in host: |
---|
| 208 | host, port = host.split(':', 1) |
---|
| 209 | if environ['wsgi.url_scheme'] == 'https': |
---|
| 210 | if port == '443': |
---|
| 211 | port = None |
---|
| 212 | elif environ['wsgi.url_scheme'] == 'http': |
---|
| 213 | if port == '80': |
---|
| 214 | port = None |
---|
| 215 | url += host |
---|
| 216 | if port: |
---|
| 217 | url += ':%s' % port |
---|
| 218 | else: |
---|
| 219 | url += environ['SERVER_NAME'] |
---|
| 220 | if environ['wsgi.url_scheme'] == 'https': |
---|
| 221 | if environ['SERVER_PORT'] != '443': |
---|
| 222 | url += ':' + environ['SERVER_PORT'] |
---|
| 223 | else: |
---|
| 224 | if environ['SERVER_PORT'] != '80': |
---|
| 225 | url += ':' + environ['SERVER_PORT'] |
---|
| 226 | |
---|
| 227 | if script_name is None: |
---|
| 228 | url += urllib.quote(environ.get('SCRIPT_NAME','')) |
---|
| 229 | else: |
---|
| 230 | url += urllib.quote(script_name) |
---|
| 231 | if with_path_info: |
---|
| 232 | if path_info is None: |
---|
| 233 | url += urllib.quote(environ.get('PATH_INFO','')) |
---|
| 234 | else: |
---|
| 235 | url += urllib.quote(path_info) |
---|
| 236 | if with_query_string: |
---|
| 237 | if querystring is None: |
---|
| 238 | if environ.get('QUERY_STRING'): |
---|
| 239 | url += '?' + environ['QUERY_STRING'] |
---|
| 240 | elif querystring: |
---|
| 241 | url += '?' + querystring |
---|
| 242 | return url |
---|
| 243 | |
---|
| 244 | def resolve_relative_url(url, environ): |
---|
| 245 | """ |
---|
| 246 | Resolve the given relative URL as being relative to the |
---|
| 247 | location represented by the environment. This can be used |
---|
| 248 | for redirecting to a relative path. Note: if url is already |
---|
| 249 | absolute, this function will (intentionally) have no effect |
---|
| 250 | on it. |
---|
| 251 | |
---|
| 252 | """ |
---|
| 253 | cur_url = construct_url(environ, with_query_string=False) |
---|
| 254 | return urlparse.urljoin(cur_url, url) |
---|
| 255 | |
---|
| 256 | def path_info_split(path_info): |
---|
| 257 | """ |
---|
| 258 | Splits off the first segment of the path. Returns (first_part, |
---|
| 259 | rest_of_path). first_part can be None (if PATH_INFO is empty), '' |
---|
| 260 | (if PATH_INFO is '/'), or a name without any /'s. rest_of_path |
---|
| 261 | can be '' or a string starting with /. |
---|
| 262 | |
---|
| 263 | """ |
---|
| 264 | if not path_info: |
---|
| 265 | return None, '' |
---|
| 266 | assert path_info.startswith('/'), ( |
---|
| 267 | "PATH_INFO should start with /: %r" % path_info) |
---|
| 268 | path_info = path_info.lstrip('/') |
---|
| 269 | if '/' in path_info: |
---|
| 270 | first, rest = path_info.split('/', 1) |
---|
| 271 | return first, '/' + rest |
---|
| 272 | else: |
---|
| 273 | return path_info, '' |
---|
| 274 | |
---|
| 275 | def path_info_pop(environ): |
---|
| 276 | """ |
---|
| 277 | 'Pops' off the next segment of PATH_INFO, pushing it onto |
---|
| 278 | SCRIPT_NAME, and returning that segment. |
---|
| 279 | |
---|
| 280 | For instance:: |
---|
| 281 | |
---|
| 282 | >>> def call_it(script_name, path_info): |
---|
| 283 | ... env = {'SCRIPT_NAME': script_name, 'PATH_INFO': path_info} |
---|
| 284 | ... result = path_info_pop(env) |
---|
| 285 | ... print 'SCRIPT_NAME=%r; PATH_INFO=%r; returns=%r' % ( |
---|
| 286 | ... env['SCRIPT_NAME'], env['PATH_INFO'], result) |
---|
| 287 | >>> call_it('/foo', '/bar') |
---|
| 288 | SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns='bar' |
---|
| 289 | >>> call_it('/foo/bar', '') |
---|
| 290 | SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns=None |
---|
| 291 | >>> call_it('/foo/bar', '/') |
---|
| 292 | SCRIPT_NAME='/foo/bar/'; PATH_INFO=''; returns='' |
---|
| 293 | >>> call_it('', '/1/2/3') |
---|
| 294 | SCRIPT_NAME='/1'; PATH_INFO='/2/3'; returns='1' |
---|
| 295 | >>> call_it('', '//1/2') |
---|
| 296 | SCRIPT_NAME='//1'; PATH_INFO='/2'; returns='1' |
---|
| 297 | |
---|
| 298 | """ |
---|
| 299 | path = environ.get('PATH_INFO', '') |
---|
| 300 | if not path: |
---|
| 301 | return None |
---|
| 302 | while path.startswith('/'): |
---|
| 303 | environ['SCRIPT_NAME'] += '/' |
---|
| 304 | path = path[1:] |
---|
| 305 | if '/' not in path: |
---|
| 306 | environ['SCRIPT_NAME'] += path |
---|
| 307 | environ['PATH_INFO'] = '' |
---|
| 308 | return path |
---|
| 309 | else: |
---|
| 310 | segment, path = path.split('/', 1) |
---|
| 311 | environ['PATH_INFO'] = '/' + path |
---|
| 312 | environ['SCRIPT_NAME'] += segment |
---|
| 313 | return segment |
---|
| 314 | |
---|
| 315 | _parse_headers_special = { |
---|
| 316 | # This is a Zope convention, but we'll allow it here: |
---|
| 317 | 'HTTP_CGI_AUTHORIZATION': 'Authorization', |
---|
| 318 | 'CONTENT_LENGTH': 'Content-Length', |
---|
| 319 | 'CONTENT_TYPE': 'Content-Type', |
---|
| 320 | } |
---|
| 321 | |
---|
| 322 | def parse_headers(environ): |
---|
| 323 | """ |
---|
| 324 | Parse the headers in the environment (like ``HTTP_HOST``) and |
---|
| 325 | yield a sequence of those (header_name, value) tuples. |
---|
| 326 | """ |
---|
| 327 | # @@: Maybe should parse out comma-separated headers? |
---|
| 328 | for cgi_var, value in environ.iteritems(): |
---|
| 329 | if cgi_var in _parse_headers_special: |
---|
| 330 | yield _parse_headers_special[cgi_var], value |
---|
| 331 | elif cgi_var.startswith('HTTP_'): |
---|
| 332 | yield cgi_var[5:].title().replace('_', '-'), value |
---|
| 333 | |
---|
| 334 | class EnvironHeaders(DictMixin): |
---|
| 335 | """An object that represents the headers as present in a |
---|
| 336 | WSGI environment. |
---|
| 337 | |
---|
| 338 | This object is a wrapper (with no internal state) for a WSGI |
---|
| 339 | request object, representing the CGI-style HTTP_* keys as a |
---|
| 340 | dictionary. Because a CGI environment can only hold one value for |
---|
| 341 | each key, this dictionary is single-valued (unlike outgoing |
---|
| 342 | headers). |
---|
| 343 | """ |
---|
| 344 | |
---|
| 345 | def __init__(self, environ): |
---|
| 346 | self.environ = environ |
---|
| 347 | |
---|
| 348 | def _trans_name(self, name): |
---|
| 349 | key = 'HTTP_'+name.replace('-', '_').upper() |
---|
| 350 | if key == 'HTTP_CONTENT_LENGTH': |
---|
| 351 | key = 'CONTENT_LENGTH' |
---|
| 352 | elif key == 'HTTP_CONTENT_TYPE': |
---|
| 353 | key = 'CONTENT_TYPE' |
---|
| 354 | return key |
---|
| 355 | |
---|
| 356 | def _trans_key(self, key): |
---|
| 357 | if key == 'CONTENT_TYPE': |
---|
| 358 | return 'Content-Type' |
---|
| 359 | elif key == 'CONTENT_LENGTH': |
---|
| 360 | return 'Content-Length' |
---|
| 361 | elif key.startswith('HTTP_'): |
---|
| 362 | return key[5:].replace('_', '-').title() |
---|
| 363 | else: |
---|
| 364 | return None |
---|
| 365 | |
---|
| 366 | def __getitem__(self, item): |
---|
| 367 | return self.environ[self._trans_name(item)] |
---|
| 368 | |
---|
| 369 | def __setitem__(self, item, value): |
---|
| 370 | # @@: Should this dictionary be writable at all? |
---|
| 371 | self.environ[self._trans_name(item)] = value |
---|
| 372 | |
---|
| 373 | def __delitem__(self, item): |
---|
| 374 | del self.environ[self._trans_name(item)] |
---|
| 375 | |
---|
| 376 | def __iter__(self): |
---|
| 377 | for key in self.environ: |
---|
| 378 | name = self._trans_key(key) |
---|
| 379 | if name is not None: |
---|
| 380 | yield name |
---|
| 381 | |
---|
| 382 | def keys(self): |
---|
| 383 | return list(iter(self)) |
---|
| 384 | |
---|
| 385 | def __contains__(self, item): |
---|
| 386 | return self._trans_name(item) in self.environ |
---|
| 387 | |
---|
| 388 | def _cgi_FieldStorage__repr__patch(self): |
---|
| 389 | """ monkey patch for FieldStorage.__repr__ |
---|
| 390 | |
---|
| 391 | Unbelievely, the default __repr__ on FieldStorage reads |
---|
| 392 | the entire file content instead of being sane about it. |
---|
| 393 | This is a simple replacement that doesn't do that |
---|
| 394 | """ |
---|
| 395 | if self.file: |
---|
| 396 | return "FieldStorage(%r, %r)" % ( |
---|
| 397 | self.name, self.filename) |
---|
| 398 | return "FieldStorage(%r, %r, %r)" % ( |
---|
| 399 | self.name, self.filename, self.value) |
---|
| 400 | |
---|
| 401 | cgi.FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch |
---|
| 402 | |
---|
| 403 | if __name__ == '__main__': |
---|
| 404 | import doctest |
---|
| 405 | doctest.testmod() |
---|