1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
3 | # (c) 2005 Ian Bicking and contributors |
---|
4 | # This module is part of the Python Paste Project and is released under |
---|
5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
---|
6 | """ |
---|
7 | This module provides helper routines with work directly on a WSGI |
---|
8 | environment to solve common requirements. |
---|
9 | |
---|
10 | * get_cookies(environ) |
---|
11 | * parse_querystring(environ) |
---|
12 | * parse_formvars(environ, include_get_vars=True) |
---|
13 | * construct_url(environ, with_query_string=True, with_path_info=True, |
---|
14 | script_name=None, path_info=None, querystring=None) |
---|
15 | * path_info_split(path_info) |
---|
16 | * path_info_pop(environ) |
---|
17 | * resolve_relative_url(url, environ) |
---|
18 | |
---|
19 | """ |
---|
20 | import cgi |
---|
21 | from Cookie import SimpleCookie |
---|
22 | from StringIO import StringIO |
---|
23 | import urlparse |
---|
24 | import urllib |
---|
25 | try: |
---|
26 | from UserDict import DictMixin |
---|
27 | except ImportError: |
---|
28 | from paste.util.UserDict24 import DictMixin |
---|
29 | from paste.util.multidict import MultiDict |
---|
30 | |
---|
31 | __all__ = ['get_cookies', 'get_cookie_dict', 'parse_querystring', |
---|
32 | 'parse_formvars', 'construct_url', 'path_info_split', |
---|
33 | 'path_info_pop', 'resolve_relative_url', 'EnvironHeaders'] |
---|
34 | |
---|
35 | def get_cookies(environ): |
---|
36 | """ |
---|
37 | Gets a cookie object (which is a dictionary-like object) from the |
---|
38 | request environment; caches this value in case get_cookies is |
---|
39 | called again for the same request. |
---|
40 | |
---|
41 | """ |
---|
42 | header = environ.get('HTTP_COOKIE', '') |
---|
43 | if environ.has_key('paste.cookies'): |
---|
44 | cookies, check_header = environ['paste.cookies'] |
---|
45 | if check_header == header: |
---|
46 | return cookies |
---|
47 | cookies = SimpleCookie() |
---|
48 | cookies.load(header) |
---|
49 | environ['paste.cookies'] = (cookies, header) |
---|
50 | return cookies |
---|
51 | |
---|
52 | def get_cookie_dict(environ): |
---|
53 | """Return a *plain* dictionary of cookies as found in the request. |
---|
54 | |
---|
55 | Unlike ``get_cookies`` this returns a dictionary, not a |
---|
56 | ``SimpleCookie`` object. For incoming cookies a dictionary fully |
---|
57 | represents the information. Like ``get_cookies`` this caches and |
---|
58 | checks the cache. |
---|
59 | """ |
---|
60 | header = environ.get('HTTP_COOKIE') |
---|
61 | if not header: |
---|
62 | return {} |
---|
63 | if environ.has_key('paste.cookies.dict'): |
---|
64 | cookies, check_header = environ['paste.cookies.dict'] |
---|
65 | if check_header == header: |
---|
66 | return cookies |
---|
67 | cookies = SimpleCookie() |
---|
68 | cookies.load(header) |
---|
69 | result = {} |
---|
70 | for name in cookies: |
---|
71 | result[name] = cookies[name].value |
---|
72 | environ['paste.cookies.dict'] = (result, header) |
---|
73 | return result |
---|
74 | |
---|
75 | def parse_querystring(environ): |
---|
76 | """ |
---|
77 | Parses a query string into a list like ``[(name, value)]``. |
---|
78 | Caches this value in case parse_querystring is called again |
---|
79 | for the same request. |
---|
80 | |
---|
81 | You can pass the result to ``dict()``, but be aware that keys that |
---|
82 | appear multiple times will be lost (only the last value will be |
---|
83 | preserved). |
---|
84 | |
---|
85 | """ |
---|
86 | source = environ.get('QUERY_STRING', '') |
---|
87 | if not source: |
---|
88 | return [] |
---|
89 | if 'paste.parsed_querystring' in environ: |
---|
90 | parsed, check_source = environ['paste.parsed_querystring'] |
---|
91 | if check_source == source: |
---|
92 | return parsed |
---|
93 | parsed = cgi.parse_qsl(source, keep_blank_values=True, |
---|
94 | strict_parsing=False) |
---|
95 | environ['paste.parsed_querystring'] = (parsed, source) |
---|
96 | return parsed |
---|
97 | |
---|
98 | def parse_dict_querystring(environ): |
---|
99 | """Parses a query string like parse_querystring, but returns a MultiDict |
---|
100 | |
---|
101 | Caches this value in case parse_dict_querystring is called again |
---|
102 | for the same request. |
---|
103 | |
---|
104 | Example:: |
---|
105 | |
---|
106 | >>> environ = {'QUERY_STRING': 'day=Monday&user=fred&user=jane'} |
---|
107 | >>> parsed = parse_dict_querystring(environ) |
---|
108 | |
---|
109 | >>> parsed['day'] |
---|
110 | 'Monday' |
---|
111 | >>> parsed['user'] |
---|
112 | 'fred' |
---|
113 | >>> parsed.getall('user') |
---|
114 | ['fred', 'jane'] |
---|
115 | |
---|
116 | """ |
---|
117 | source = environ.get('QUERY_STRING', '') |
---|
118 | if not source: |
---|
119 | return MultiDict() |
---|
120 | if 'paste.parsed_dict_querystring' in environ: |
---|
121 | parsed, check_source = environ['paste.parsed_dict_querystring'] |
---|
122 | if check_source == source: |
---|
123 | return parsed |
---|
124 | parsed = cgi.parse_qsl(source, keep_blank_values=True, |
---|
125 | strict_parsing=False) |
---|
126 | multi = MultiDict(parsed) |
---|
127 | environ['paste.parsed_dict_querystring'] = (multi, source) |
---|
128 | return multi |
---|
129 | |
---|
130 | def parse_formvars(environ, include_get_vars=True): |
---|
131 | """Parses the request, returning a MultiDict of form variables. |
---|
132 | |
---|
133 | If ``include_get_vars`` is true then GET (query string) variables |
---|
134 | will also be folded into the MultiDict. |
---|
135 | |
---|
136 | All values should be strings, except for file uploads which are |
---|
137 | left as ``FieldStorage`` instances. |
---|
138 | |
---|
139 | If the request was not a normal form request (e.g., a POST with an |
---|
140 | XML body) then ``environ['wsgi.input']`` won't be read. |
---|
141 | """ |
---|
142 | source = environ['wsgi.input'] |
---|
143 | if 'paste.parsed_formvars' in environ: |
---|
144 | parsed, check_source = environ['paste.parsed_formvars'] |
---|
145 | if check_source == source: |
---|
146 | if include_get_vars: |
---|
147 | parsed.update(parse_querystring(environ)) |
---|
148 | return parsed |
---|
149 | # @@: Shouldn't bother FieldStorage parsing during GET/HEAD and |
---|
150 | # fake_out_cgi requests |
---|
151 | type = environ.get('CONTENT_TYPE', '').lower() |
---|
152 | if ';' in type: |
---|
153 | type = type.split(';', 1)[0] |
---|
154 | fake_out_cgi = type not in ('', 'application/x-www-form-urlencoded', |
---|
155 | 'multipart/form-data') |
---|
156 | # FieldStorage assumes a default CONTENT_LENGTH of -1, but a |
---|
157 | # default of 0 is better: |
---|
158 | if not environ.get('CONTENT_LENGTH'): |
---|
159 | environ['CONTENT_LENGTH'] = '0' |
---|
160 | # Prevent FieldStorage from parsing QUERY_STRING during GET/HEAD |
---|
161 | # requests |
---|
162 | old_query_string = environ.get('QUERY_STRING','') |
---|
163 | environ['QUERY_STRING'] = '' |
---|
164 | if fake_out_cgi: |
---|
165 | input = StringIO('') |
---|
166 | old_content_type = environ.get('CONTENT_TYPE') |
---|
167 | old_content_length = environ.get('CONTENT_LENGTH') |
---|
168 | environ['CONTENT_LENGTH'] = '0' |
---|
169 | environ['CONTENT_TYPE'] = '' |
---|
170 | else: |
---|
171 | input = environ['wsgi.input'] |
---|
172 | fs = cgi.FieldStorage(fp=input, |
---|
173 | environ=environ, |
---|
174 | keep_blank_values=1) |
---|
175 | environ['QUERY_STRING'] = old_query_string |
---|
176 | if fake_out_cgi: |
---|
177 | environ['CONTENT_TYPE'] = old_content_type |
---|
178 | environ['CONTENT_LENGTH'] = old_content_length |
---|
179 | formvars = MultiDict() |
---|
180 | if isinstance(fs.value, list): |
---|
181 | for name in fs.keys(): |
---|
182 | values = fs[name] |
---|
183 | if not isinstance(values, list): |
---|
184 | values = [values] |
---|
185 | for value in values: |
---|
186 | if not value.filename: |
---|
187 | value = value.value |
---|
188 | formvars.add(name, value) |
---|
189 | environ['paste.parsed_formvars'] = (formvars, source) |
---|
190 | if include_get_vars: |
---|
191 | formvars.update(parse_querystring(environ)) |
---|
192 | return formvars |
---|
193 | |
---|
194 | def construct_url(environ, with_query_string=True, with_path_info=True, |
---|
195 | script_name=None, path_info=None, querystring=None): |
---|
196 | """Reconstructs the URL from the WSGI environment. |
---|
197 | |
---|
198 | You may override SCRIPT_NAME, PATH_INFO, and QUERYSTRING with |
---|
199 | the keyword arguments. |
---|
200 | |
---|
201 | """ |
---|
202 | url = environ['wsgi.url_scheme']+'://' |
---|
203 | |
---|
204 | if environ.get('HTTP_HOST'): |
---|
205 | host = environ['HTTP_HOST'] |
---|
206 | port = None |
---|
207 | if ':' in host: |
---|
208 | host, port = host.split(':', 1) |
---|
209 | if environ['wsgi.url_scheme'] == 'https': |
---|
210 | if port == '443': |
---|
211 | port = None |
---|
212 | elif environ['wsgi.url_scheme'] == 'http': |
---|
213 | if port == '80': |
---|
214 | port = None |
---|
215 | url += host |
---|
216 | if port: |
---|
217 | url += ':%s' % port |
---|
218 | else: |
---|
219 | url += environ['SERVER_NAME'] |
---|
220 | if environ['wsgi.url_scheme'] == 'https': |
---|
221 | if environ['SERVER_PORT'] != '443': |
---|
222 | url += ':' + environ['SERVER_PORT'] |
---|
223 | else: |
---|
224 | if environ['SERVER_PORT'] != '80': |
---|
225 | url += ':' + environ['SERVER_PORT'] |
---|
226 | |
---|
227 | if script_name is None: |
---|
228 | url += urllib.quote(environ.get('SCRIPT_NAME','')) |
---|
229 | else: |
---|
230 | url += urllib.quote(script_name) |
---|
231 | if with_path_info: |
---|
232 | if path_info is None: |
---|
233 | url += urllib.quote(environ.get('PATH_INFO','')) |
---|
234 | else: |
---|
235 | url += urllib.quote(path_info) |
---|
236 | if with_query_string: |
---|
237 | if querystring is None: |
---|
238 | if environ.get('QUERY_STRING'): |
---|
239 | url += '?' + environ['QUERY_STRING'] |
---|
240 | elif querystring: |
---|
241 | url += '?' + querystring |
---|
242 | return url |
---|
243 | |
---|
244 | def resolve_relative_url(url, environ): |
---|
245 | """ |
---|
246 | Resolve the given relative URL as being relative to the |
---|
247 | location represented by the environment. This can be used |
---|
248 | for redirecting to a relative path. Note: if url is already |
---|
249 | absolute, this function will (intentionally) have no effect |
---|
250 | on it. |
---|
251 | |
---|
252 | """ |
---|
253 | cur_url = construct_url(environ, with_query_string=False) |
---|
254 | return urlparse.urljoin(cur_url, url) |
---|
255 | |
---|
256 | def path_info_split(path_info): |
---|
257 | """ |
---|
258 | Splits off the first segment of the path. Returns (first_part, |
---|
259 | rest_of_path). first_part can be None (if PATH_INFO is empty), '' |
---|
260 | (if PATH_INFO is '/'), or a name without any /'s. rest_of_path |
---|
261 | can be '' or a string starting with /. |
---|
262 | |
---|
263 | """ |
---|
264 | if not path_info: |
---|
265 | return None, '' |
---|
266 | assert path_info.startswith('/'), ( |
---|
267 | "PATH_INFO should start with /: %r" % path_info) |
---|
268 | path_info = path_info.lstrip('/') |
---|
269 | if '/' in path_info: |
---|
270 | first, rest = path_info.split('/', 1) |
---|
271 | return first, '/' + rest |
---|
272 | else: |
---|
273 | return path_info, '' |
---|
274 | |
---|
275 | def path_info_pop(environ): |
---|
276 | """ |
---|
277 | 'Pops' off the next segment of PATH_INFO, pushing it onto |
---|
278 | SCRIPT_NAME, and returning that segment. |
---|
279 | |
---|
280 | For instance:: |
---|
281 | |
---|
282 | >>> def call_it(script_name, path_info): |
---|
283 | ... env = {'SCRIPT_NAME': script_name, 'PATH_INFO': path_info} |
---|
284 | ... result = path_info_pop(env) |
---|
285 | ... print 'SCRIPT_NAME=%r; PATH_INFO=%r; returns=%r' % ( |
---|
286 | ... env['SCRIPT_NAME'], env['PATH_INFO'], result) |
---|
287 | >>> call_it('/foo', '/bar') |
---|
288 | SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns='bar' |
---|
289 | >>> call_it('/foo/bar', '') |
---|
290 | SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns=None |
---|
291 | >>> call_it('/foo/bar', '/') |
---|
292 | SCRIPT_NAME='/foo/bar/'; PATH_INFO=''; returns='' |
---|
293 | >>> call_it('', '/1/2/3') |
---|
294 | SCRIPT_NAME='/1'; PATH_INFO='/2/3'; returns='1' |
---|
295 | >>> call_it('', '//1/2') |
---|
296 | SCRIPT_NAME='//1'; PATH_INFO='/2'; returns='1' |
---|
297 | |
---|
298 | """ |
---|
299 | path = environ.get('PATH_INFO', '') |
---|
300 | if not path: |
---|
301 | return None |
---|
302 | while path.startswith('/'): |
---|
303 | environ['SCRIPT_NAME'] += '/' |
---|
304 | path = path[1:] |
---|
305 | if '/' not in path: |
---|
306 | environ['SCRIPT_NAME'] += path |
---|
307 | environ['PATH_INFO'] = '' |
---|
308 | return path |
---|
309 | else: |
---|
310 | segment, path = path.split('/', 1) |
---|
311 | environ['PATH_INFO'] = '/' + path |
---|
312 | environ['SCRIPT_NAME'] += segment |
---|
313 | return segment |
---|
314 | |
---|
315 | _parse_headers_special = { |
---|
316 | # This is a Zope convention, but we'll allow it here: |
---|
317 | 'HTTP_CGI_AUTHORIZATION': 'Authorization', |
---|
318 | 'CONTENT_LENGTH': 'Content-Length', |
---|
319 | 'CONTENT_TYPE': 'Content-Type', |
---|
320 | } |
---|
321 | |
---|
322 | def parse_headers(environ): |
---|
323 | """ |
---|
324 | Parse the headers in the environment (like ``HTTP_HOST``) and |
---|
325 | yield a sequence of those (header_name, value) tuples. |
---|
326 | """ |
---|
327 | # @@: Maybe should parse out comma-separated headers? |
---|
328 | for cgi_var, value in environ.iteritems(): |
---|
329 | if cgi_var in _parse_headers_special: |
---|
330 | yield _parse_headers_special[cgi_var], value |
---|
331 | elif cgi_var.startswith('HTTP_'): |
---|
332 | yield cgi_var[5:].title().replace('_', '-'), value |
---|
333 | |
---|
334 | class EnvironHeaders(DictMixin): |
---|
335 | """An object that represents the headers as present in a |
---|
336 | WSGI environment. |
---|
337 | |
---|
338 | This object is a wrapper (with no internal state) for a WSGI |
---|
339 | request object, representing the CGI-style HTTP_* keys as a |
---|
340 | dictionary. Because a CGI environment can only hold one value for |
---|
341 | each key, this dictionary is single-valued (unlike outgoing |
---|
342 | headers). |
---|
343 | """ |
---|
344 | |
---|
345 | def __init__(self, environ): |
---|
346 | self.environ = environ |
---|
347 | |
---|
348 | def _trans_name(self, name): |
---|
349 | key = 'HTTP_'+name.replace('-', '_').upper() |
---|
350 | if key == 'HTTP_CONTENT_LENGTH': |
---|
351 | key = 'CONTENT_LENGTH' |
---|
352 | elif key == 'HTTP_CONTENT_TYPE': |
---|
353 | key = 'CONTENT_TYPE' |
---|
354 | return key |
---|
355 | |
---|
356 | def _trans_key(self, key): |
---|
357 | if key == 'CONTENT_TYPE': |
---|
358 | return 'Content-Type' |
---|
359 | elif key == 'CONTENT_LENGTH': |
---|
360 | return 'Content-Length' |
---|
361 | elif key.startswith('HTTP_'): |
---|
362 | return key[5:].replace('_', '-').title() |
---|
363 | else: |
---|
364 | return None |
---|
365 | |
---|
366 | def __getitem__(self, item): |
---|
367 | return self.environ[self._trans_name(item)] |
---|
368 | |
---|
369 | def __setitem__(self, item, value): |
---|
370 | # @@: Should this dictionary be writable at all? |
---|
371 | self.environ[self._trans_name(item)] = value |
---|
372 | |
---|
373 | def __delitem__(self, item): |
---|
374 | del self.environ[self._trans_name(item)] |
---|
375 | |
---|
376 | def __iter__(self): |
---|
377 | for key in self.environ: |
---|
378 | name = self._trans_key(key) |
---|
379 | if name is not None: |
---|
380 | yield name |
---|
381 | |
---|
382 | def keys(self): |
---|
383 | return list(iter(self)) |
---|
384 | |
---|
385 | def __contains__(self, item): |
---|
386 | return self._trans_name(item) in self.environ |
---|
387 | |
---|
388 | def _cgi_FieldStorage__repr__patch(self): |
---|
389 | """ monkey patch for FieldStorage.__repr__ |
---|
390 | |
---|
391 | Unbelievely, the default __repr__ on FieldStorage reads |
---|
392 | the entire file content instead of being sane about it. |
---|
393 | This is a simple replacement that doesn't do that |
---|
394 | """ |
---|
395 | if self.file: |
---|
396 | return "FieldStorage(%r, %r)" % ( |
---|
397 | self.name, self.filename) |
---|
398 | return "FieldStorage(%r, %r, %r)" % ( |
---|
399 | self.name, self.filename, self.value) |
---|
400 | |
---|
401 | cgi.FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch |
---|
402 | |
---|
403 | if __name__ == '__main__': |
---|
404 | import doctest |
---|
405 | doctest.testmod() |
---|