1 | """HTTP cookie handling for web clients. |
---|
2 | |
---|
3 | This module originally developed from my port of Gisle Aas' Perl module |
---|
4 | HTTP::Cookies, from the libwww-perl library. |
---|
5 | |
---|
6 | Docstrings, comments and debug strings in this code refer to the |
---|
7 | attributes of the HTTP cookie system as cookie-attributes, to distinguish |
---|
8 | them clearly from Python attributes. |
---|
9 | |
---|
10 | CookieJar____ |
---|
11 | / \ \ |
---|
12 | FileCookieJar \ \ |
---|
13 | / | \ \ \ |
---|
14 | MozillaCookieJar | LWPCookieJar \ \ |
---|
15 | | | \ |
---|
16 | | ---MSIEBase | \ |
---|
17 | | / | | \ |
---|
18 | | / MSIEDBCookieJar BSDDBCookieJar |
---|
19 | |/ |
---|
20 | MSIECookieJar |
---|
21 | |
---|
22 | Comments to John J Lee <jjl@pobox.com>. |
---|
23 | |
---|
24 | |
---|
25 | Copyright 2002-2006 John J Lee <jjl@pobox.com> |
---|
26 | Copyright 1997-1999 Gisle Aas (original libwww-perl code) |
---|
27 | Copyright 2002-2003 Johnny Lee (original MSIE Perl code) |
---|
28 | |
---|
29 | This code is free software; you can redistribute it and/or modify it |
---|
30 | under the terms of the BSD or ZPL 2.1 licenses (see the file |
---|
31 | COPYING.txt included with the distribution). |
---|
32 | |
---|
33 | """ |
---|
34 | |
---|
35 | import sys, re, copy, time, struct, urllib, types, logging |
---|
36 | try: |
---|
37 | import threading |
---|
38 | _threading = threading; del threading |
---|
39 | except ImportError: |
---|
40 | import dummy_threading |
---|
41 | _threading = dummy_threading; del dummy_threading |
---|
42 | import httplib # only for the default HTTP port |
---|
43 | |
---|
44 | MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " |
---|
45 | "instance initialised with one)") |
---|
46 | DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) |
---|
47 | |
---|
48 | from _headersutil import split_header_words, parse_ns_headers |
---|
49 | from _util import isstringlike |
---|
50 | import _rfc3986 |
---|
51 | |
---|
52 | debug = logging.getLogger("mechanize.cookies").debug |
---|
53 | |
---|
54 | |
---|
55 | def reraise_unmasked_exceptions(unmasked=()): |
---|
56 | # There are a few catch-all except: statements in this module, for |
---|
57 | # catching input that's bad in unexpected ways. |
---|
58 | # This function re-raises some exceptions we don't want to trap. |
---|
59 | import mechanize, warnings |
---|
60 | if not mechanize.USE_BARE_EXCEPT: |
---|
61 | raise |
---|
62 | unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) |
---|
63 | etype = sys.exc_info()[0] |
---|
64 | if issubclass(etype, unmasked): |
---|
65 | raise |
---|
66 | # swallowed an exception |
---|
67 | import traceback, StringIO |
---|
68 | f = StringIO.StringIO() |
---|
69 | traceback.print_exc(None, f) |
---|
70 | msg = f.getvalue() |
---|
71 | warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) |
---|
72 | |
---|
73 | |
---|
74 | IPV4_RE = re.compile(r"\.\d+$") |
---|
75 | def is_HDN(text): |
---|
76 | """Return True if text is a host domain name.""" |
---|
77 | # XXX |
---|
78 | # This may well be wrong. Which RFC is HDN defined in, if any (for |
---|
79 | # the purposes of RFC 2965)? |
---|
80 | # For the current implementation, what about IPv6? Remember to look |
---|
81 | # at other uses of IPV4_RE also, if change this. |
---|
82 | return not (IPV4_RE.search(text) or |
---|
83 | text == "" or |
---|
84 | text[0] == "." or text[-1] == ".") |
---|
85 | |
---|
86 | def domain_match(A, B): |
---|
87 | """Return True if domain A domain-matches domain B, according to RFC 2965. |
---|
88 | |
---|
89 | A and B may be host domain names or IP addresses. |
---|
90 | |
---|
91 | RFC 2965, section 1: |
---|
92 | |
---|
93 | Host names can be specified either as an IP address or a HDN string. |
---|
94 | Sometimes we compare one host name with another. (Such comparisons SHALL |
---|
95 | be case-insensitive.) Host A's name domain-matches host B's if |
---|
96 | |
---|
97 | * their host name strings string-compare equal; or |
---|
98 | |
---|
99 | * A is a HDN string and has the form NB, where N is a non-empty |
---|
100 | name string, B has the form .B', and B' is a HDN string. (So, |
---|
101 | x.y.com domain-matches .Y.com but not Y.com.) |
---|
102 | |
---|
103 | Note that domain-match is not a commutative operation: a.b.c.com |
---|
104 | domain-matches .c.com, but not the reverse. |
---|
105 | |
---|
106 | """ |
---|
107 | # Note that, if A or B are IP addresses, the only relevant part of the |
---|
108 | # definition of the domain-match algorithm is the direct string-compare. |
---|
109 | A = A.lower() |
---|
110 | B = B.lower() |
---|
111 | if A == B: |
---|
112 | return True |
---|
113 | if not is_HDN(A): |
---|
114 | return False |
---|
115 | i = A.rfind(B) |
---|
116 | has_form_nb = not (i == -1 or i == 0) |
---|
117 | return ( |
---|
118 | has_form_nb and |
---|
119 | B.startswith(".") and |
---|
120 | is_HDN(B[1:]) |
---|
121 | ) |
---|
122 | |
---|
123 | def liberal_is_HDN(text): |
---|
124 | """Return True if text is a sort-of-like a host domain name. |
---|
125 | |
---|
126 | For accepting/blocking domains. |
---|
127 | |
---|
128 | """ |
---|
129 | return not IPV4_RE.search(text) |
---|
130 | |
---|
131 | def user_domain_match(A, B): |
---|
132 | """For blocking/accepting domains. |
---|
133 | |
---|
134 | A and B may be host domain names or IP addresses. |
---|
135 | |
---|
136 | """ |
---|
137 | A = A.lower() |
---|
138 | B = B.lower() |
---|
139 | if not (liberal_is_HDN(A) and liberal_is_HDN(B)): |
---|
140 | if A == B: |
---|
141 | # equal IP addresses |
---|
142 | return True |
---|
143 | return False |
---|
144 | initial_dot = B.startswith(".") |
---|
145 | if initial_dot and A.endswith(B): |
---|
146 | return True |
---|
147 | if not initial_dot and A == B: |
---|
148 | return True |
---|
149 | return False |
---|
150 | |
---|
151 | cut_port_re = re.compile(r":\d+$") |
---|
152 | def request_host(request): |
---|
153 | """Return request-host, as defined by RFC 2965. |
---|
154 | |
---|
155 | Variation from RFC: returned value is lowercased, for convenient |
---|
156 | comparison. |
---|
157 | |
---|
158 | """ |
---|
159 | url = request.get_full_url() |
---|
160 | host = _rfc3986.urlsplit(url)[1] |
---|
161 | if host is None: |
---|
162 | host = request.get_header("Host", "") |
---|
163 | |
---|
164 | # remove port, if present |
---|
165 | host = cut_port_re.sub("", host, 1) |
---|
166 | return host.lower() |
---|
167 | |
---|
168 | def eff_request_host(request): |
---|
169 | """Return a tuple (request-host, effective request-host name). |
---|
170 | |
---|
171 | As defined by RFC 2965, except both are lowercased. |
---|
172 | |
---|
173 | """ |
---|
174 | erhn = req_host = request_host(request) |
---|
175 | if req_host.find(".") == -1 and not IPV4_RE.search(req_host): |
---|
176 | erhn = req_host + ".local" |
---|
177 | return req_host, erhn |
---|
178 | |
---|
179 | def request_path(request): |
---|
180 | """request-URI, as defined by RFC 2965.""" |
---|
181 | url = request.get_full_url() |
---|
182 | path, query, frag = _rfc3986.urlsplit(url)[2:] |
---|
183 | path = escape_path(path) |
---|
184 | req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) |
---|
185 | if not req_path.startswith("/"): |
---|
186 | req_path = "/"+req_path |
---|
187 | return req_path |
---|
188 | |
---|
189 | def request_port(request): |
---|
190 | host = request.get_host() |
---|
191 | i = host.find(':') |
---|
192 | if i >= 0: |
---|
193 | port = host[i+1:] |
---|
194 | try: |
---|
195 | int(port) |
---|
196 | except ValueError: |
---|
197 | debug("nonnumeric port: '%s'", port) |
---|
198 | return None |
---|
199 | else: |
---|
200 | port = DEFAULT_HTTP_PORT |
---|
201 | return port |
---|
202 | |
---|
203 | # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't |
---|
204 | # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). |
---|
205 | HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" |
---|
206 | ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") |
---|
207 | def uppercase_escaped_char(match): |
---|
208 | return "%%%s" % match.group(1).upper() |
---|
209 | def escape_path(path): |
---|
210 | """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" |
---|
211 | # There's no knowing what character encoding was used to create URLs |
---|
212 | # containing %-escapes, but since we have to pick one to escape invalid |
---|
213 | # path characters, we pick UTF-8, as recommended in the HTML 4.0 |
---|
214 | # specification: |
---|
215 | # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 |
---|
216 | # And here, kind of: draft-fielding-uri-rfc2396bis-03 |
---|
217 | # (And in draft IRI specification: draft-duerst-iri-05) |
---|
218 | # (And here, for new URI schemes: RFC 2718) |
---|
219 | if isinstance(path, types.UnicodeType): |
---|
220 | path = path.encode("utf-8") |
---|
221 | path = urllib.quote(path, HTTP_PATH_SAFE) |
---|
222 | path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) |
---|
223 | return path |
---|
224 | |
---|
225 | def reach(h): |
---|
226 | """Return reach of host h, as defined by RFC 2965, section 1. |
---|
227 | |
---|
228 | The reach R of a host name H is defined as follows: |
---|
229 | |
---|
230 | * If |
---|
231 | |
---|
232 | - H is the host domain name of a host; and, |
---|
233 | |
---|
234 | - H has the form A.B; and |
---|
235 | |
---|
236 | - A has no embedded (that is, interior) dots; and |
---|
237 | |
---|
238 | - B has at least one embedded dot, or B is the string "local". |
---|
239 | then the reach of H is .B. |
---|
240 | |
---|
241 | * Otherwise, the reach of H is H. |
---|
242 | |
---|
243 | >>> reach("www.acme.com") |
---|
244 | '.acme.com' |
---|
245 | >>> reach("acme.com") |
---|
246 | 'acme.com' |
---|
247 | >>> reach("acme.local") |
---|
248 | '.local' |
---|
249 | |
---|
250 | """ |
---|
251 | i = h.find(".") |
---|
252 | if i >= 0: |
---|
253 | #a = h[:i] # this line is only here to show what a is |
---|
254 | b = h[i+1:] |
---|
255 | i = b.find(".") |
---|
256 | if is_HDN(h) and (i >= 0 or b == "local"): |
---|
257 | return "."+b |
---|
258 | return h |
---|
259 | |
---|
260 | def is_third_party(request): |
---|
261 | """ |
---|
262 | |
---|
263 | RFC 2965, section 3.3.6: |
---|
264 | |
---|
265 | An unverifiable transaction is to a third-party host if its request- |
---|
266 | host U does not domain-match the reach R of the request-host O in the |
---|
267 | origin transaction. |
---|
268 | |
---|
269 | """ |
---|
270 | req_host = request_host(request) |
---|
271 | # the origin request's request-host was stuffed into request by |
---|
272 | # _urllib2_support.AbstractHTTPHandler |
---|
273 | return not domain_match(req_host, reach(request.origin_req_host)) |
---|
274 | |
---|
275 | |
---|
276 | class Cookie: |
---|
277 | """HTTP Cookie. |
---|
278 | |
---|
279 | This class represents both Netscape and RFC 2965 cookies. |
---|
280 | |
---|
281 | This is deliberately a very simple class. It just holds attributes. It's |
---|
282 | possible to construct Cookie instances that don't comply with the cookie |
---|
283 | standards. CookieJar.make_cookies is the factory function for Cookie |
---|
284 | objects -- it deals with cookie parsing, supplying defaults, and |
---|
285 | normalising to the representation used in this class. CookiePolicy is |
---|
286 | responsible for checking them to see whether they should be accepted from |
---|
287 | and returned to the server. |
---|
288 | |
---|
289 | version: integer; |
---|
290 | name: string; |
---|
291 | value: string (may be None); |
---|
292 | port: string; None indicates no attribute was supplied (eg. "Port", rather |
---|
293 | than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list |
---|
294 | string (eg. "80,8080") |
---|
295 | port_specified: boolean; true if a value was supplied with the Port |
---|
296 | cookie-attribute |
---|
297 | domain: string; |
---|
298 | domain_specified: boolean; true if Domain was explicitly set |
---|
299 | domain_initial_dot: boolean; true if Domain as set in HTTP header by server |
---|
300 | started with a dot (yes, this really is necessary!) |
---|
301 | path: string; |
---|
302 | path_specified: boolean; true if Path was explicitly set |
---|
303 | secure: boolean; true if should only be returned over secure connection |
---|
304 | expires: integer; seconds since epoch (RFC 2965 cookies should calculate |
---|
305 | this value from the Max-Age attribute) |
---|
306 | discard: boolean, true if this is a session cookie; (if no expires value, |
---|
307 | this should be true) |
---|
308 | comment: string; |
---|
309 | comment_url: string; |
---|
310 | rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not |
---|
311 | Set-Cookie2:) header, but had a version cookie-attribute of 1 |
---|
312 | rest: mapping of other cookie-attributes |
---|
313 | |
---|
314 | Note that the port may be present in the headers, but unspecified ("Port" |
---|
315 | rather than"Port=80", for example); if this is the case, port is None. |
---|
316 | |
---|
317 | """ |
---|
318 | |
---|
319 | def __init__(self, version, name, value, |
---|
320 | port, port_specified, |
---|
321 | domain, domain_specified, domain_initial_dot, |
---|
322 | path, path_specified, |
---|
323 | secure, |
---|
324 | expires, |
---|
325 | discard, |
---|
326 | comment, |
---|
327 | comment_url, |
---|
328 | rest, |
---|
329 | rfc2109=False, |
---|
330 | ): |
---|
331 | |
---|
332 | if version is not None: version = int(version) |
---|
333 | if expires is not None: expires = int(expires) |
---|
334 | if port is None and port_specified is True: |
---|
335 | raise ValueError("if port is None, port_specified must be false") |
---|
336 | |
---|
337 | self.version = version |
---|
338 | self.name = name |
---|
339 | self.value = value |
---|
340 | self.port = port |
---|
341 | self.port_specified = port_specified |
---|
342 | # normalise case, as per RFC 2965 section 3.3.3 |
---|
343 | self.domain = domain.lower() |
---|
344 | self.domain_specified = domain_specified |
---|
345 | # Sigh. We need to know whether the domain given in the |
---|
346 | # cookie-attribute had an initial dot, in order to follow RFC 2965 |
---|
347 | # (as clarified in draft errata). Needed for the returned $Domain |
---|
348 | # value. |
---|
349 | self.domain_initial_dot = domain_initial_dot |
---|
350 | self.path = path |
---|
351 | self.path_specified = path_specified |
---|
352 | self.secure = secure |
---|
353 | self.expires = expires |
---|
354 | self.discard = discard |
---|
355 | self.comment = comment |
---|
356 | self.comment_url = comment_url |
---|
357 | self.rfc2109 = rfc2109 |
---|
358 | |
---|
359 | self._rest = copy.copy(rest) |
---|
360 | |
---|
361 | def has_nonstandard_attr(self, name): |
---|
362 | return self._rest.has_key(name) |
---|
363 | def get_nonstandard_attr(self, name, default=None): |
---|
364 | return self._rest.get(name, default) |
---|
365 | def set_nonstandard_attr(self, name, value): |
---|
366 | self._rest[name] = value |
---|
367 | def nonstandard_attr_keys(self): |
---|
368 | return self._rest.keys() |
---|
369 | |
---|
370 | def is_expired(self, now=None): |
---|
371 | if now is None: now = time.time() |
---|
372 | return (self.expires is not None) and (self.expires <= now) |
---|
373 | |
---|
374 | def __str__(self): |
---|
375 | if self.port is None: p = "" |
---|
376 | else: p = ":"+self.port |
---|
377 | limit = self.domain + p + self.path |
---|
378 | if self.value is not None: |
---|
379 | namevalue = "%s=%s" % (self.name, self.value) |
---|
380 | else: |
---|
381 | namevalue = self.name |
---|
382 | return "<Cookie %s for %s>" % (namevalue, limit) |
---|
383 | |
---|
384 | def __repr__(self): |
---|
385 | args = [] |
---|
386 | for name in ["version", "name", "value", |
---|
387 | "port", "port_specified", |
---|
388 | "domain", "domain_specified", "domain_initial_dot", |
---|
389 | "path", "path_specified", |
---|
390 | "secure", "expires", "discard", "comment", "comment_url", |
---|
391 | ]: |
---|
392 | attr = getattr(self, name) |
---|
393 | args.append("%s=%s" % (name, repr(attr))) |
---|
394 | args.append("rest=%s" % repr(self._rest)) |
---|
395 | args.append("rfc2109=%s" % repr(self.rfc2109)) |
---|
396 | return "Cookie(%s)" % ", ".join(args) |
---|
397 | |
---|
398 | |
---|
399 | class CookiePolicy: |
---|
400 | """Defines which cookies get accepted from and returned to server. |
---|
401 | |
---|
402 | May also modify cookies. |
---|
403 | |
---|
404 | The subclass DefaultCookiePolicy defines the standard rules for Netscape |
---|
405 | and RFC 2965 cookies -- override that if you want a customised policy. |
---|
406 | |
---|
407 | As well as implementing set_ok and return_ok, implementations of this |
---|
408 | interface must also supply the following attributes, indicating which |
---|
409 | protocols should be used, and how. These can be read and set at any time, |
---|
410 | though whether that makes complete sense from the protocol point of view is |
---|
411 | doubtful. |
---|
412 | |
---|
413 | Public attributes: |
---|
414 | |
---|
415 | netscape: implement netscape protocol |
---|
416 | rfc2965: implement RFC 2965 protocol |
---|
417 | rfc2109_as_netscape: |
---|
418 | WARNING: This argument will change or go away if is not accepted into |
---|
419 | the Python standard library in this form! |
---|
420 | If true, treat RFC 2109 cookies as though they were Netscape cookies. The |
---|
421 | default is for this attribute to be None, which means treat 2109 cookies |
---|
422 | as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, |
---|
423 | by default), and as Netscape cookies otherwise. |
---|
424 | hide_cookie2: don't add Cookie2 header to requests (the presence of |
---|
425 | this header indicates to the server that we understand RFC 2965 |
---|
426 | cookies) |
---|
427 | |
---|
428 | """ |
---|
429 | def set_ok(self, cookie, request): |
---|
430 | """Return true if (and only if) cookie should be accepted from server. |
---|
431 | |
---|
432 | Currently, pre-expired cookies never get this far -- the CookieJar |
---|
433 | class deletes such cookies itself. |
---|
434 | |
---|
435 | cookie: mechanize.Cookie object |
---|
436 | request: object implementing the interface defined by |
---|
437 | CookieJar.extract_cookies.__doc__ |
---|
438 | |
---|
439 | """ |
---|
440 | raise NotImplementedError() |
---|
441 | |
---|
442 | def return_ok(self, cookie, request): |
---|
443 | """Return true if (and only if) cookie should be returned to server. |
---|
444 | |
---|
445 | cookie: mechanize.Cookie object |
---|
446 | request: object implementing the interface defined by |
---|
447 | CookieJar.add_cookie_header.__doc__ |
---|
448 | |
---|
449 | """ |
---|
450 | raise NotImplementedError() |
---|
451 | |
---|
452 | def domain_return_ok(self, domain, request): |
---|
453 | """Return false if cookies should not be returned, given cookie domain. |
---|
454 | |
---|
455 | This is here as an optimization, to remove the need for checking every |
---|
456 | cookie with a particular domain (which may involve reading many files). |
---|
457 | The default implementations of domain_return_ok and path_return_ok |
---|
458 | (return True) leave all the work to return_ok. |
---|
459 | |
---|
460 | If domain_return_ok returns true for the cookie domain, path_return_ok |
---|
461 | is called for the cookie path. Otherwise, path_return_ok and return_ok |
---|
462 | are never called for that cookie domain. If path_return_ok returns |
---|
463 | true, return_ok is called with the Cookie object itself for a full |
---|
464 | check. Otherwise, return_ok is never called for that cookie path. |
---|
465 | |
---|
466 | Note that domain_return_ok is called for every *cookie* domain, not |
---|
467 | just for the *request* domain. For example, the function might be |
---|
468 | called with both ".acme.com" and "www.acme.com" if the request domain is |
---|
469 | "www.acme.com". The same goes for path_return_ok. |
---|
470 | |
---|
471 | For argument documentation, see the docstring for return_ok. |
---|
472 | |
---|
473 | """ |
---|
474 | return True |
---|
475 | |
---|
476 | def path_return_ok(self, path, request): |
---|
477 | """Return false if cookies should not be returned, given cookie path. |
---|
478 | |
---|
479 | See the docstring for domain_return_ok. |
---|
480 | |
---|
481 | """ |
---|
482 | return True |
---|
483 | |
---|
484 | |
---|
485 | class DefaultCookiePolicy(CookiePolicy): |
---|
486 | """Implements the standard rules for accepting and returning cookies. |
---|
487 | |
---|
488 | Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is |
---|
489 | switched off by default. |
---|
490 | |
---|
491 | The easiest way to provide your own policy is to override this class and |
---|
492 | call its methods in your overriden implementations before adding your own |
---|
493 | additional checks. |
---|
494 | |
---|
495 | import mechanize |
---|
496 | class MyCookiePolicy(mechanize.DefaultCookiePolicy): |
---|
497 | def set_ok(self, cookie, request): |
---|
498 | if not mechanize.DefaultCookiePolicy.set_ok( |
---|
499 | self, cookie, request): |
---|
500 | return False |
---|
501 | if i_dont_want_to_store_this_cookie(): |
---|
502 | return False |
---|
503 | return True |
---|
504 | |
---|
505 | In addition to the features required to implement the CookiePolicy |
---|
506 | interface, this class allows you to block and allow domains from setting |
---|
507 | and receiving cookies. There are also some strictness switches that allow |
---|
508 | you to tighten up the rather loose Netscape protocol rules a little bit (at |
---|
509 | the cost of blocking some benign cookies). |
---|
510 | |
---|
511 | A domain blacklist and whitelist is provided (both off by default). Only |
---|
512 | domains not in the blacklist and present in the whitelist (if the whitelist |
---|
513 | is active) participate in cookie setting and returning. Use the |
---|
514 | blocked_domains constructor argument, and blocked_domains and |
---|
515 | set_blocked_domains methods (and the corresponding argument and methods for |
---|
516 | allowed_domains). If you set a whitelist, you can turn it off again by |
---|
517 | setting it to None. |
---|
518 | |
---|
519 | Domains in block or allow lists that do not start with a dot must |
---|
520 | string-compare equal. For example, "acme.com" matches a blacklist entry of |
---|
521 | "acme.com", but "www.acme.com" does not. Domains that do start with a dot |
---|
522 | are matched by more specific domains too. For example, both "www.acme.com" |
---|
523 | and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does |
---|
524 | not). IP addresses are an exception, and must match exactly. For example, |
---|
525 | if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is |
---|
526 | blocked, but 193.168.1.2 is not. |
---|
527 | |
---|
528 | Additional Public Attributes: |
---|
529 | |
---|
530 | General strictness switches |
---|
531 | |
---|
532 | strict_domain: don't allow sites to set two-component domains with |
---|
533 | country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. |
---|
534 | This is far from perfect and isn't guaranteed to work! |
---|
535 | |
---|
536 | RFC 2965 protocol strictness switches |
---|
537 | |
---|
538 | strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable |
---|
539 | transactions (usually, an unverifiable transaction is one resulting from |
---|
540 | a redirect or an image hosted on another site); if this is false, cookies |
---|
541 | are NEVER blocked on the basis of verifiability |
---|
542 | |
---|
543 | Netscape protocol strictness switches |
---|
544 | |
---|
545 | strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions |
---|
546 | even to Netscape cookies |
---|
547 | strict_ns_domain: flags indicating how strict to be with domain-matching |
---|
548 | rules for Netscape cookies: |
---|
549 | DomainStrictNoDots: when setting cookies, host prefix must not contain a |
---|
550 | dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because |
---|
551 | www.foo contains a dot) |
---|
552 | DomainStrictNonDomain: cookies that did not explicitly specify a Domain |
---|
553 | cookie-attribute can only be returned to a domain that string-compares |
---|
554 | equal to the domain that set the cookie (eg. rockets.acme.com won't |
---|
555 | be returned cookies from acme.com that had no Domain cookie-attribute) |
---|
556 | DomainRFC2965Match: when setting cookies, require a full RFC 2965 |
---|
557 | domain-match |
---|
558 | DomainLiberal and DomainStrict are the most useful combinations of the |
---|
559 | above flags, for convenience |
---|
560 | strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that |
---|
561 | have names starting with '$' |
---|
562 | strict_ns_set_path: don't allow setting cookies whose path doesn't |
---|
563 | path-match request URI |
---|
564 | |
---|
565 | """ |
---|
566 | |
---|
567 | DomainStrictNoDots = 1 |
---|
568 | DomainStrictNonDomain = 2 |
---|
569 | DomainRFC2965Match = 4 |
---|
570 | |
---|
571 | DomainLiberal = 0 |
---|
572 | DomainStrict = DomainStrictNoDots|DomainStrictNonDomain |
---|
573 | |
---|
574 | def __init__(self, |
---|
575 | blocked_domains=None, allowed_domains=None, |
---|
576 | netscape=True, rfc2965=False, |
---|
577 | # WARNING: this argument will change or go away if is not |
---|
578 | # accepted into the Python standard library in this form! |
---|
579 | # default, ie. treat 2109 as netscape iff not rfc2965 |
---|
580 | rfc2109_as_netscape=None, |
---|
581 | hide_cookie2=False, |
---|
582 | strict_domain=False, |
---|
583 | strict_rfc2965_unverifiable=True, |
---|
584 | strict_ns_unverifiable=False, |
---|
585 | strict_ns_domain=DomainLiberal, |
---|
586 | strict_ns_set_initial_dollar=False, |
---|
587 | strict_ns_set_path=False, |
---|
588 | ): |
---|
589 | """ |
---|
590 | Constructor arguments should be used as keyword arguments only. |
---|
591 | |
---|
592 | blocked_domains: sequence of domain names that we never accept cookies |
---|
593 | from, nor return cookies to |
---|
594 | allowed_domains: if not None, this is a sequence of the only domains |
---|
595 | for which we accept and return cookies |
---|
596 | |
---|
597 | For other arguments, see CookiePolicy.__doc__ and |
---|
598 | DefaultCookiePolicy.__doc__.. |
---|
599 | |
---|
600 | """ |
---|
601 | self.netscape = netscape |
---|
602 | self.rfc2965 = rfc2965 |
---|
603 | self.rfc2109_as_netscape = rfc2109_as_netscape |
---|
604 | self.hide_cookie2 = hide_cookie2 |
---|
605 | self.strict_domain = strict_domain |
---|
606 | self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable |
---|
607 | self.strict_ns_unverifiable = strict_ns_unverifiable |
---|
608 | self.strict_ns_domain = strict_ns_domain |
---|
609 | self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar |
---|
610 | self.strict_ns_set_path = strict_ns_set_path |
---|
611 | |
---|
612 | if blocked_domains is not None: |
---|
613 | self._blocked_domains = tuple(blocked_domains) |
---|
614 | else: |
---|
615 | self._blocked_domains = () |
---|
616 | |
---|
617 | if allowed_domains is not None: |
---|
618 | allowed_domains = tuple(allowed_domains) |
---|
619 | self._allowed_domains = allowed_domains |
---|
620 | |
---|
621 | def blocked_domains(self): |
---|
622 | """Return the sequence of blocked domains (as a tuple).""" |
---|
623 | return self._blocked_domains |
---|
624 | def set_blocked_domains(self, blocked_domains): |
---|
625 | """Set the sequence of blocked domains.""" |
---|
626 | self._blocked_domains = tuple(blocked_domains) |
---|
627 | |
---|
628 | def is_blocked(self, domain): |
---|
629 | for blocked_domain in self._blocked_domains: |
---|
630 | if user_domain_match(domain, blocked_domain): |
---|
631 | return True |
---|
632 | return False |
---|
633 | |
---|
634 | def allowed_domains(self): |
---|
635 | """Return None, or the sequence of allowed domains (as a tuple).""" |
---|
636 | return self._allowed_domains |
---|
637 | def set_allowed_domains(self, allowed_domains): |
---|
638 | """Set the sequence of allowed domains, or None.""" |
---|
639 | if allowed_domains is not None: |
---|
640 | allowed_domains = tuple(allowed_domains) |
---|
641 | self._allowed_domains = allowed_domains |
---|
642 | |
---|
643 | def is_not_allowed(self, domain): |
---|
644 | if self._allowed_domains is None: |
---|
645 | return False |
---|
646 | for allowed_domain in self._allowed_domains: |
---|
647 | if user_domain_match(domain, allowed_domain): |
---|
648 | return False |
---|
649 | return True |
---|
650 | |
---|
651 | def set_ok(self, cookie, request): |
---|
652 | """ |
---|
653 | If you override set_ok, be sure to call this method. If it returns |
---|
654 | false, so should your subclass (assuming your subclass wants to be more |
---|
655 | strict about which cookies to accept). |
---|
656 | |
---|
657 | """ |
---|
658 | debug(" - checking cookie %s", cookie) |
---|
659 | |
---|
660 | assert cookie.name is not None |
---|
661 | |
---|
662 | for n in "version", "verifiability", "name", "path", "domain", "port": |
---|
663 | fn_name = "set_ok_"+n |
---|
664 | fn = getattr(self, fn_name) |
---|
665 | if not fn(cookie, request): |
---|
666 | return False |
---|
667 | |
---|
668 | return True |
---|
669 | |
---|
670 | def set_ok_version(self, cookie, request): |
---|
671 | if cookie.version is None: |
---|
672 | # Version is always set to 0 by parse_ns_headers if it's a Netscape |
---|
673 | # cookie, so this must be an invalid RFC 2965 cookie. |
---|
674 | debug(" Set-Cookie2 without version attribute (%s)", cookie) |
---|
675 | return False |
---|
676 | if cookie.version > 0 and not self.rfc2965: |
---|
677 | debug(" RFC 2965 cookies are switched off") |
---|
678 | return False |
---|
679 | elif cookie.version == 0 and not self.netscape: |
---|
680 | debug(" Netscape cookies are switched off") |
---|
681 | return False |
---|
682 | return True |
---|
683 | |
---|
684 | def set_ok_verifiability(self, cookie, request): |
---|
685 | if request.unverifiable and is_third_party(request): |
---|
686 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
---|
687 | debug(" third-party RFC 2965 cookie during " |
---|
688 | "unverifiable transaction") |
---|
689 | return False |
---|
690 | elif cookie.version == 0 and self.strict_ns_unverifiable: |
---|
691 | debug(" third-party Netscape cookie during " |
---|
692 | "unverifiable transaction") |
---|
693 | return False |
---|
694 | return True |
---|
695 | |
---|
696 | def set_ok_name(self, cookie, request): |
---|
697 | # Try and stop servers setting V0 cookies designed to hack other |
---|
698 | # servers that know both V0 and V1 protocols. |
---|
699 | if (cookie.version == 0 and self.strict_ns_set_initial_dollar and |
---|
700 | cookie.name.startswith("$")): |
---|
701 | debug(" illegal name (starts with '$'): '%s'", cookie.name) |
---|
702 | return False |
---|
703 | return True |
---|
704 | |
---|
705 | def set_ok_path(self, cookie, request): |
---|
706 | if cookie.path_specified: |
---|
707 | req_path = request_path(request) |
---|
708 | if ((cookie.version > 0 or |
---|
709 | (cookie.version == 0 and self.strict_ns_set_path)) and |
---|
710 | not req_path.startswith(cookie.path)): |
---|
711 | debug(" path attribute %s is not a prefix of request " |
---|
712 | "path %s", cookie.path, req_path) |
---|
713 | return False |
---|
714 | return True |
---|
715 | |
---|
716 | def set_ok_countrycode_domain(self, cookie, request): |
---|
717 | """Return False if explicit cookie domain is not acceptable. |
---|
718 | |
---|
719 | Called by set_ok_domain, for convenience of overriding by |
---|
720 | subclasses. |
---|
721 | |
---|
722 | """ |
---|
723 | if cookie.domain_specified and self.strict_domain: |
---|
724 | domain = cookie.domain |
---|
725 | # since domain was specified, we know that: |
---|
726 | assert domain.startswith(".") |
---|
727 | if domain.count(".") == 2: |
---|
728 | # domain like .foo.bar |
---|
729 | i = domain.rfind(".") |
---|
730 | tld = domain[i+1:] |
---|
731 | sld = domain[1:i] |
---|
732 | if (sld.lower() in [ |
---|
733 | "co", "ac", |
---|
734 | "com", "edu", "org", "net", "gov", "mil", "int", |
---|
735 | "aero", "biz", "cat", "coop", "info", "jobs", "mobi", |
---|
736 | "museum", "name", "pro", "travel", |
---|
737 | ] and |
---|
738 | len(tld) == 2): |
---|
739 | # domain like .co.uk |
---|
740 | return False |
---|
741 | return True |
---|
742 | |
---|
743 | def set_ok_domain(self, cookie, request): |
---|
744 | if self.is_blocked(cookie.domain): |
---|
745 | debug(" domain %s is in user block-list", cookie.domain) |
---|
746 | return False |
---|
747 | if self.is_not_allowed(cookie.domain): |
---|
748 | debug(" domain %s is not in user allow-list", cookie.domain) |
---|
749 | return False |
---|
750 | if not self.set_ok_countrycode_domain(cookie, request): |
---|
751 | debug(" country-code second level domain %s", cookie.domain) |
---|
752 | return False |
---|
753 | if cookie.domain_specified: |
---|
754 | req_host, erhn = eff_request_host(request) |
---|
755 | domain = cookie.domain |
---|
756 | if domain.startswith("."): |
---|
757 | undotted_domain = domain[1:] |
---|
758 | else: |
---|
759 | undotted_domain = domain |
---|
760 | embedded_dots = (undotted_domain.find(".") >= 0) |
---|
761 | if not embedded_dots and domain != ".local": |
---|
762 | debug(" non-local domain %s contains no embedded dot", |
---|
763 | domain) |
---|
764 | return False |
---|
765 | if cookie.version == 0: |
---|
766 | if (not erhn.endswith(domain) and |
---|
767 | (not erhn.startswith(".") and |
---|
768 | not ("."+erhn).endswith(domain))): |
---|
769 | debug(" effective request-host %s (even with added " |
---|
770 | "initial dot) does not end end with %s", |
---|
771 | erhn, domain) |
---|
772 | return False |
---|
773 | if (cookie.version > 0 or |
---|
774 | (self.strict_ns_domain & self.DomainRFC2965Match)): |
---|
775 | if not domain_match(erhn, domain): |
---|
776 | debug(" effective request-host %s does not domain-match " |
---|
777 | "%s", erhn, domain) |
---|
778 | return False |
---|
779 | if (cookie.version > 0 or |
---|
780 | (self.strict_ns_domain & self.DomainStrictNoDots)): |
---|
781 | host_prefix = req_host[:-len(domain)] |
---|
782 | if (host_prefix.find(".") >= 0 and |
---|
783 | not IPV4_RE.search(req_host)): |
---|
784 | debug(" host prefix %s for domain %s contains a dot", |
---|
785 | host_prefix, domain) |
---|
786 | return False |
---|
787 | return True |
---|
788 | |
---|
789 | def set_ok_port(self, cookie, request): |
---|
790 | if cookie.port_specified: |
---|
791 | req_port = request_port(request) |
---|
792 | if req_port is None: |
---|
793 | req_port = "80" |
---|
794 | else: |
---|
795 | req_port = str(req_port) |
---|
796 | for p in cookie.port.split(","): |
---|
797 | try: |
---|
798 | int(p) |
---|
799 | except ValueError: |
---|
800 | debug(" bad port %s (not numeric)", p) |
---|
801 | return False |
---|
802 | if p == req_port: |
---|
803 | break |
---|
804 | else: |
---|
805 | debug(" request port (%s) not found in %s", |
---|
806 | req_port, cookie.port) |
---|
807 | return False |
---|
808 | return True |
---|
809 | |
---|
810 | def return_ok(self, cookie, request): |
---|
811 | """ |
---|
812 | If you override return_ok, be sure to call this method. If it returns |
---|
813 | false, so should your subclass (assuming your subclass wants to be more |
---|
814 | strict about which cookies to return). |
---|
815 | |
---|
816 | """ |
---|
817 | # Path has already been checked by path_return_ok, and domain blocking |
---|
818 | # done by domain_return_ok. |
---|
819 | debug(" - checking cookie %s", cookie) |
---|
820 | |
---|
821 | for n in "version", "verifiability", "secure", "expires", "port", "domain": |
---|
822 | fn_name = "return_ok_"+n |
---|
823 | fn = getattr(self, fn_name) |
---|
824 | if not fn(cookie, request): |
---|
825 | return False |
---|
826 | return True |
---|
827 | |
---|
828 | def return_ok_version(self, cookie, request): |
---|
829 | if cookie.version > 0 and not self.rfc2965: |
---|
830 | debug(" RFC 2965 cookies are switched off") |
---|
831 | return False |
---|
832 | elif cookie.version == 0 and not self.netscape: |
---|
833 | debug(" Netscape cookies are switched off") |
---|
834 | return False |
---|
835 | return True |
---|
836 | |
---|
837 | def return_ok_verifiability(self, cookie, request): |
---|
838 | if request.unverifiable and is_third_party(request): |
---|
839 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
---|
840 | debug(" third-party RFC 2965 cookie during unverifiable " |
---|
841 | "transaction") |
---|
842 | return False |
---|
843 | elif cookie.version == 0 and self.strict_ns_unverifiable: |
---|
844 | debug(" third-party Netscape cookie during unverifiable " |
---|
845 | "transaction") |
---|
846 | return False |
---|
847 | return True |
---|
848 | |
---|
849 | def return_ok_secure(self, cookie, request): |
---|
850 | if cookie.secure and request.get_type() != "https": |
---|
851 | debug(" secure cookie with non-secure request") |
---|
852 | return False |
---|
853 | return True |
---|
854 | |
---|
855 | def return_ok_expires(self, cookie, request): |
---|
856 | if cookie.is_expired(self._now): |
---|
857 | debug(" cookie expired") |
---|
858 | return False |
---|
859 | return True |
---|
860 | |
---|
861 | def return_ok_port(self, cookie, request): |
---|
862 | if cookie.port: |
---|
863 | req_port = request_port(request) |
---|
864 | if req_port is None: |
---|
865 | req_port = "80" |
---|
866 | for p in cookie.port.split(","): |
---|
867 | if p == req_port: |
---|
868 | break |
---|
869 | else: |
---|
870 | debug(" request port %s does not match cookie port %s", |
---|
871 | req_port, cookie.port) |
---|
872 | return False |
---|
873 | return True |
---|
874 | |
---|
875 | def return_ok_domain(self, cookie, request): |
---|
876 | req_host, erhn = eff_request_host(request) |
---|
877 | domain = cookie.domain |
---|
878 | |
---|
879 | # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't |
---|
880 | if (cookie.version == 0 and |
---|
881 | (self.strict_ns_domain & self.DomainStrictNonDomain) and |
---|
882 | not cookie.domain_specified and domain != erhn): |
---|
883 | debug(" cookie with unspecified domain does not string-compare " |
---|
884 | "equal to request domain") |
---|
885 | return False |
---|
886 | |
---|
887 | if cookie.version > 0 and not domain_match(erhn, domain): |
---|
888 | debug(" effective request-host name %s does not domain-match " |
---|
889 | "RFC 2965 cookie domain %s", erhn, domain) |
---|
890 | return False |
---|
891 | if cookie.version == 0 and not ("."+erhn).endswith(domain): |
---|
892 | debug(" request-host %s does not match Netscape cookie domain " |
---|
893 | "%s", req_host, domain) |
---|
894 | return False |
---|
895 | return True |
---|
896 | |
---|
897 | def domain_return_ok(self, domain, request): |
---|
898 | # Liberal check of domain. This is here as an optimization to avoid |
---|
899 | # having to load lots of MSIE cookie files unless necessary. |
---|
900 | |
---|
901 | # Munge req_host and erhn to always start with a dot, so as to err on |
---|
902 | # the side of letting cookies through. |
---|
903 | dotted_req_host, dotted_erhn = eff_request_host(request) |
---|
904 | if not dotted_req_host.startswith("."): |
---|
905 | dotted_req_host = "."+dotted_req_host |
---|
906 | if not dotted_erhn.startswith("."): |
---|
907 | dotted_erhn = "."+dotted_erhn |
---|
908 | if not (dotted_req_host.endswith(domain) or |
---|
909 | dotted_erhn.endswith(domain)): |
---|
910 | #debug(" request domain %s does not match cookie domain %s", |
---|
911 | # req_host, domain) |
---|
912 | return False |
---|
913 | |
---|
914 | if self.is_blocked(domain): |
---|
915 | debug(" domain %s is in user block-list", domain) |
---|
916 | return False |
---|
917 | if self.is_not_allowed(domain): |
---|
918 | debug(" domain %s is not in user allow-list", domain) |
---|
919 | return False |
---|
920 | |
---|
921 | return True |
---|
922 | |
---|
923 | def path_return_ok(self, path, request): |
---|
924 | debug("- checking cookie path=%s", path) |
---|
925 | req_path = request_path(request) |
---|
926 | if not req_path.startswith(path): |
---|
927 | debug(" %s does not path-match %s", req_path, path) |
---|
928 | return False |
---|
929 | return True |
---|
930 | |
---|
931 | |
---|
932 | def vals_sorted_by_key(adict): |
---|
933 | keys = adict.keys() |
---|
934 | keys.sort() |
---|
935 | return map(adict.get, keys) |
---|
936 | |
---|
937 | class MappingIterator: |
---|
938 | """Iterates over nested mapping, depth-first, in sorted order by key.""" |
---|
939 | def __init__(self, mapping): |
---|
940 | self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack |
---|
941 | |
---|
942 | def __iter__(self): return self |
---|
943 | |
---|
944 | def next(self): |
---|
945 | # this is hairy because of lack of generators |
---|
946 | while 1: |
---|
947 | try: |
---|
948 | vals, i, prev_item = self._s.pop() |
---|
949 | except IndexError: |
---|
950 | raise StopIteration() |
---|
951 | if i < len(vals): |
---|
952 | item = vals[i] |
---|
953 | i = i + 1 |
---|
954 | self._s.append((vals, i, prev_item)) |
---|
955 | try: |
---|
956 | item.items |
---|
957 | except AttributeError: |
---|
958 | # non-mapping |
---|
959 | break |
---|
960 | else: |
---|
961 | # mapping |
---|
962 | self._s.append((vals_sorted_by_key(item), 0, item)) |
---|
963 | continue |
---|
964 | return item |
---|
965 | |
---|
966 | |
---|
967 | # Used as second parameter to dict.get method, to distinguish absent |
---|
968 | # dict key from one with a None value. |
---|
969 | class Absent: pass |
---|
970 | |
---|
971 | class CookieJar: |
---|
972 | """Collection of HTTP cookies. |
---|
973 | |
---|
974 | You may not need to know about this class: try mechanize.urlopen(). |
---|
975 | |
---|
976 | The major methods are extract_cookies and add_cookie_header; these are all |
---|
977 | you are likely to need. |
---|
978 | |
---|
979 | CookieJar supports the iterator protocol: |
---|
980 | |
---|
981 | for cookie in cookiejar: |
---|
982 | # do something with cookie |
---|
983 | |
---|
984 | Methods: |
---|
985 | |
---|
986 | add_cookie_header(request) |
---|
987 | extract_cookies(response, request) |
---|
988 | make_cookies(response, request) |
---|
989 | set_cookie_if_ok(cookie, request) |
---|
990 | set_cookie(cookie) |
---|
991 | clear_session_cookies() |
---|
992 | clear_expired_cookies() |
---|
993 | clear(domain=None, path=None, name=None) |
---|
994 | |
---|
995 | Public attributes |
---|
996 | |
---|
997 | policy: CookiePolicy object |
---|
998 | |
---|
999 | """ |
---|
1000 | |
---|
1001 | non_word_re = re.compile(r"\W") |
---|
1002 | quote_re = re.compile(r"([\"\\])") |
---|
1003 | strict_domain_re = re.compile(r"\.?[^.]*") |
---|
1004 | domain_re = re.compile(r"[^.]*") |
---|
1005 | dots_re = re.compile(r"^\.+") |
---|
1006 | |
---|
1007 | def __init__(self, policy=None): |
---|
1008 | """ |
---|
1009 | See CookieJar.__doc__ for argument documentation. |
---|
1010 | |
---|
1011 | """ |
---|
1012 | if policy is None: |
---|
1013 | policy = DefaultCookiePolicy() |
---|
1014 | self._policy = policy |
---|
1015 | |
---|
1016 | self._cookies = {} |
---|
1017 | |
---|
1018 | # for __getitem__ iteration in pre-2.2 Pythons |
---|
1019 | self._prev_getitem_index = 0 |
---|
1020 | |
---|
1021 | def set_policy(self, policy): |
---|
1022 | self._policy = policy |
---|
1023 | |
---|
1024 | def _cookies_for_domain(self, domain, request): |
---|
1025 | cookies = [] |
---|
1026 | if not self._policy.domain_return_ok(domain, request): |
---|
1027 | return [] |
---|
1028 | debug("Checking %s for cookies to return", domain) |
---|
1029 | cookies_by_path = self._cookies[domain] |
---|
1030 | for path in cookies_by_path.keys(): |
---|
1031 | if not self._policy.path_return_ok(path, request): |
---|
1032 | continue |
---|
1033 | cookies_by_name = cookies_by_path[path] |
---|
1034 | for cookie in cookies_by_name.values(): |
---|
1035 | if not self._policy.return_ok(cookie, request): |
---|
1036 | debug(" not returning cookie") |
---|
1037 | continue |
---|
1038 | debug(" it's a match") |
---|
1039 | cookies.append(cookie) |
---|
1040 | return cookies |
---|
1041 | |
---|
1042 | def _cookies_for_request(self, request): |
---|
1043 | """Return a list of cookies to be returned to server.""" |
---|
1044 | cookies = [] |
---|
1045 | for domain in self._cookies.keys(): |
---|
1046 | cookies.extend(self._cookies_for_domain(domain, request)) |
---|
1047 | return cookies |
---|
1048 | |
---|
1049 | def _cookie_attrs(self, cookies): |
---|
1050 | """Return a list of cookie-attributes to be returned to server. |
---|
1051 | |
---|
1052 | like ['foo="bar"; $Path="/"', ...] |
---|
1053 | |
---|
1054 | The $Version attribute is also added when appropriate (currently only |
---|
1055 | once per request). |
---|
1056 | |
---|
1057 | """ |
---|
1058 | # add cookies in order of most specific (ie. longest) path first |
---|
1059 | def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) |
---|
1060 | cookies.sort(decreasing_size) |
---|
1061 | |
---|
1062 | version_set = False |
---|
1063 | |
---|
1064 | attrs = [] |
---|
1065 | for cookie in cookies: |
---|
1066 | # set version of Cookie header |
---|
1067 | # XXX |
---|
1068 | # What should it be if multiple matching Set-Cookie headers have |
---|
1069 | # different versions themselves? |
---|
1070 | # Answer: there is no answer; was supposed to be settled by |
---|
1071 | # RFC 2965 errata, but that may never appear... |
---|
1072 | version = cookie.version |
---|
1073 | if not version_set: |
---|
1074 | version_set = True |
---|
1075 | if version > 0: |
---|
1076 | attrs.append("$Version=%s" % version) |
---|
1077 | |
---|
1078 | # quote cookie value if necessary |
---|
1079 | # (not for Netscape protocol, which already has any quotes |
---|
1080 | # intact, due to the poorly-specified Netscape Cookie: syntax) |
---|
1081 | if ((cookie.value is not None) and |
---|
1082 | self.non_word_re.search(cookie.value) and version > 0): |
---|
1083 | value = self.quote_re.sub(r"\\\1", cookie.value) |
---|
1084 | else: |
---|
1085 | value = cookie.value |
---|
1086 | |
---|
1087 | # add cookie-attributes to be returned in Cookie header |
---|
1088 | if cookie.value is None: |
---|
1089 | attrs.append(cookie.name) |
---|
1090 | else: |
---|
1091 | attrs.append("%s=%s" % (cookie.name, value)) |
---|
1092 | if version > 0: |
---|
1093 | if cookie.path_specified: |
---|
1094 | attrs.append('$Path="%s"' % cookie.path) |
---|
1095 | if cookie.domain.startswith("."): |
---|
1096 | domain = cookie.domain |
---|
1097 | if (not cookie.domain_initial_dot and |
---|
1098 | domain.startswith(".")): |
---|
1099 | domain = domain[1:] |
---|
1100 | attrs.append('$Domain="%s"' % domain) |
---|
1101 | if cookie.port is not None: |
---|
1102 | p = "$Port" |
---|
1103 | if cookie.port_specified: |
---|
1104 | p = p + ('="%s"' % cookie.port) |
---|
1105 | attrs.append(p) |
---|
1106 | |
---|
1107 | return attrs |
---|
1108 | |
---|
1109 | def add_cookie_header(self, request): |
---|
1110 | """Add correct Cookie: header to request (urllib2.Request object). |
---|
1111 | |
---|
1112 | The Cookie2 header is also added unless policy.hide_cookie2 is true. |
---|
1113 | |
---|
1114 | The request object (usually a urllib2.Request instance) must support |
---|
1115 | the methods get_full_url, get_host, get_type, has_header, get_header, |
---|
1116 | header_items and add_unredirected_header, as documented by urllib2, and |
---|
1117 | the port attribute (the port number). Actually, |
---|
1118 | RequestUpgradeProcessor will automatically upgrade your Request object |
---|
1119 | to one with has_header, get_header, header_items and |
---|
1120 | add_unredirected_header, if it lacks those methods, for compatibility |
---|
1121 | with pre-2.4 versions of urllib2. |
---|
1122 | |
---|
1123 | """ |
---|
1124 | debug("add_cookie_header") |
---|
1125 | self._policy._now = self._now = int(time.time()) |
---|
1126 | |
---|
1127 | req_host, erhn = eff_request_host(request) |
---|
1128 | strict_non_domain = ( |
---|
1129 | self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain) |
---|
1130 | |
---|
1131 | cookies = self._cookies_for_request(request) |
---|
1132 | |
---|
1133 | attrs = self._cookie_attrs(cookies) |
---|
1134 | if attrs: |
---|
1135 | if not request.has_header("Cookie"): |
---|
1136 | request.add_unredirected_header("Cookie", "; ".join(attrs)) |
---|
1137 | |
---|
1138 | # if necessary, advertise that we know RFC 2965 |
---|
1139 | if self._policy.rfc2965 and not self._policy.hide_cookie2: |
---|
1140 | for cookie in cookies: |
---|
1141 | if cookie.version != 1 and not request.has_header("Cookie2"): |
---|
1142 | request.add_unredirected_header("Cookie2", '$Version="1"') |
---|
1143 | break |
---|
1144 | |
---|
1145 | self.clear_expired_cookies() |
---|
1146 | |
---|
1147 | def _normalized_cookie_tuples(self, attrs_set): |
---|
1148 | """Return list of tuples containing normalised cookie information. |
---|
1149 | |
---|
1150 | attrs_set is the list of lists of key,value pairs extracted from |
---|
1151 | the Set-Cookie or Set-Cookie2 headers. |
---|
1152 | |
---|
1153 | Tuples are name, value, standard, rest, where name and value are the |
---|
1154 | cookie name and value, standard is a dictionary containing the standard |
---|
1155 | cookie-attributes (discard, secure, version, expires or max-age, |
---|
1156 | domain, path and port) and rest is a dictionary containing the rest of |
---|
1157 | the cookie-attributes. |
---|
1158 | |
---|
1159 | """ |
---|
1160 | cookie_tuples = [] |
---|
1161 | |
---|
1162 | boolean_attrs = "discard", "secure" |
---|
1163 | value_attrs = ("version", |
---|
1164 | "expires", "max-age", |
---|
1165 | "domain", "path", "port", |
---|
1166 | "comment", "commenturl") |
---|
1167 | |
---|
1168 | for cookie_attrs in attrs_set: |
---|
1169 | name, value = cookie_attrs[0] |
---|
1170 | |
---|
1171 | # Build dictionary of standard cookie-attributes (standard) and |
---|
1172 | # dictionary of other cookie-attributes (rest). |
---|
1173 | |
---|
1174 | # Note: expiry time is normalised to seconds since epoch. V0 |
---|
1175 | # cookies should have the Expires cookie-attribute, and V1 cookies |
---|
1176 | # should have Max-Age, but since V1 includes RFC 2109 cookies (and |
---|
1177 | # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we |
---|
1178 | # accept either (but prefer Max-Age). |
---|
1179 | max_age_set = False |
---|
1180 | |
---|
1181 | bad_cookie = False |
---|
1182 | |
---|
1183 | standard = {} |
---|
1184 | rest = {} |
---|
1185 | for k, v in cookie_attrs[1:]: |
---|
1186 | lc = k.lower() |
---|
1187 | # don't lose case distinction for unknown fields |
---|
1188 | if lc in value_attrs or lc in boolean_attrs: |
---|
1189 | k = lc |
---|
1190 | if k in boolean_attrs and v is None: |
---|
1191 | # boolean cookie-attribute is present, but has no value |
---|
1192 | # (like "discard", rather than "port=80") |
---|
1193 | v = True |
---|
1194 | if standard.has_key(k): |
---|
1195 | # only first value is significant |
---|
1196 | continue |
---|
1197 | if k == "domain": |
---|
1198 | if v is None: |
---|
1199 | debug(" missing value for domain attribute") |
---|
1200 | bad_cookie = True |
---|
1201 | break |
---|
1202 | # RFC 2965 section 3.3.3 |
---|
1203 | v = v.lower() |
---|
1204 | if k == "expires": |
---|
1205 | if max_age_set: |
---|
1206 | # Prefer max-age to expires (like Mozilla) |
---|
1207 | continue |
---|
1208 | if v is None: |
---|
1209 | debug(" missing or invalid value for expires " |
---|
1210 | "attribute: treating as session cookie") |
---|
1211 | continue |
---|
1212 | if k == "max-age": |
---|
1213 | max_age_set = True |
---|
1214 | try: |
---|
1215 | v = int(v) |
---|
1216 | except ValueError: |
---|
1217 | debug(" missing or invalid (non-numeric) value for " |
---|
1218 | "max-age attribute") |
---|
1219 | bad_cookie = True |
---|
1220 | break |
---|
1221 | # convert RFC 2965 Max-Age to seconds since epoch |
---|
1222 | # XXX Strictly you're supposed to follow RFC 2616 |
---|
1223 | # age-calculation rules. Remember that zero Max-Age is a |
---|
1224 | # is a request to discard (old and new) cookie, though. |
---|
1225 | k = "expires" |
---|
1226 | v = self._now + v |
---|
1227 | if (k in value_attrs) or (k in boolean_attrs): |
---|
1228 | if (v is None and |
---|
1229 | k not in ["port", "comment", "commenturl"]): |
---|
1230 | debug(" missing value for %s attribute" % k) |
---|
1231 | bad_cookie = True |
---|
1232 | break |
---|
1233 | standard[k] = v |
---|
1234 | else: |
---|
1235 | rest[k] = v |
---|
1236 | |
---|
1237 | if bad_cookie: |
---|
1238 | continue |
---|
1239 | |
---|
1240 | cookie_tuples.append((name, value, standard, rest)) |
---|
1241 | |
---|
1242 | return cookie_tuples |
---|
1243 | |
---|
1244 | def _cookie_from_cookie_tuple(self, tup, request): |
---|
1245 | # standard is dict of standard cookie-attributes, rest is dict of the |
---|
1246 | # rest of them |
---|
1247 | name, value, standard, rest = tup |
---|
1248 | |
---|
1249 | domain = standard.get("domain", Absent) |
---|
1250 | path = standard.get("path", Absent) |
---|
1251 | port = standard.get("port", Absent) |
---|
1252 | expires = standard.get("expires", Absent) |
---|
1253 | |
---|
1254 | # set the easy defaults |
---|
1255 | version = standard.get("version", None) |
---|
1256 | if version is not None: version = int(version) |
---|
1257 | secure = standard.get("secure", False) |
---|
1258 | # (discard is also set if expires is Absent) |
---|
1259 | discard = standard.get("discard", False) |
---|
1260 | comment = standard.get("comment", None) |
---|
1261 | comment_url = standard.get("commenturl", None) |
---|
1262 | |
---|
1263 | # set default path |
---|
1264 | if path is not Absent and path != "": |
---|
1265 | path_specified = True |
---|
1266 | path = escape_path(path) |
---|
1267 | else: |
---|
1268 | path_specified = False |
---|
1269 | path = request_path(request) |
---|
1270 | i = path.rfind("/") |
---|
1271 | if i != -1: |
---|
1272 | if version == 0: |
---|
1273 | # Netscape spec parts company from reality here |
---|
1274 | path = path[:i] |
---|
1275 | else: |
---|
1276 | path = path[:i+1] |
---|
1277 | if len(path) == 0: path = "/" |
---|
1278 | |
---|
1279 | # set default domain |
---|
1280 | domain_specified = domain is not Absent |
---|
1281 | # but first we have to remember whether it starts with a dot |
---|
1282 | domain_initial_dot = False |
---|
1283 | if domain_specified: |
---|
1284 | domain_initial_dot = bool(domain.startswith(".")) |
---|
1285 | if domain is Absent: |
---|
1286 | req_host, erhn = eff_request_host(request) |
---|
1287 | domain = erhn |
---|
1288 | elif not domain.startswith("."): |
---|
1289 | domain = "."+domain |
---|
1290 | |
---|
1291 | # set default port |
---|
1292 | port_specified = False |
---|
1293 | if port is not Absent: |
---|
1294 | if port is None: |
---|
1295 | # Port attr present, but has no value: default to request port. |
---|
1296 | # Cookie should then only be sent back on that port. |
---|
1297 | port = request_port(request) |
---|
1298 | else: |
---|
1299 | port_specified = True |
---|
1300 | port = re.sub(r"\s+", "", port) |
---|
1301 | else: |
---|
1302 | # No port attr present. Cookie can be sent back on any port. |
---|
1303 | port = None |
---|
1304 | |
---|
1305 | # set default expires and discard |
---|
1306 | if expires is Absent: |
---|
1307 | expires = None |
---|
1308 | discard = True |
---|
1309 | elif expires <= self._now: |
---|
1310 | # Expiry date in past is request to delete cookie. This can't be |
---|
1311 | # in DefaultCookiePolicy, because can't delete cookies there. |
---|
1312 | try: |
---|
1313 | self.clear(domain, path, name) |
---|
1314 | except KeyError: |
---|
1315 | pass |
---|
1316 | debug("Expiring cookie, domain='%s', path='%s', name='%s'", |
---|
1317 | domain, path, name) |
---|
1318 | return None |
---|
1319 | |
---|
1320 | return Cookie(version, |
---|
1321 | name, value, |
---|
1322 | port, port_specified, |
---|
1323 | domain, domain_specified, domain_initial_dot, |
---|
1324 | path, path_specified, |
---|
1325 | secure, |
---|
1326 | expires, |
---|
1327 | discard, |
---|
1328 | comment, |
---|
1329 | comment_url, |
---|
1330 | rest) |
---|
1331 | |
---|
1332 | def _cookies_from_attrs_set(self, attrs_set, request): |
---|
1333 | cookie_tuples = self._normalized_cookie_tuples(attrs_set) |
---|
1334 | |
---|
1335 | cookies = [] |
---|
1336 | for tup in cookie_tuples: |
---|
1337 | cookie = self._cookie_from_cookie_tuple(tup, request) |
---|
1338 | if cookie: cookies.append(cookie) |
---|
1339 | return cookies |
---|
1340 | |
---|
1341 | def _process_rfc2109_cookies(self, cookies): |
---|
1342 | if self._policy.rfc2109_as_netscape is None: |
---|
1343 | rfc2109_as_netscape = not self._policy.rfc2965 |
---|
1344 | else: |
---|
1345 | rfc2109_as_netscape = self._policy.rfc2109_as_netscape |
---|
1346 | for cookie in cookies: |
---|
1347 | if cookie.version == 1: |
---|
1348 | cookie.rfc2109 = True |
---|
1349 | if rfc2109_as_netscape: |
---|
1350 | # treat 2109 cookies as Netscape cookies rather than |
---|
1351 | # as RFC2965 cookies |
---|
1352 | cookie.version = 0 |
---|
1353 | |
---|
1354 | def make_cookies(self, response, request): |
---|
1355 | """Return sequence of Cookie objects extracted from response object. |
---|
1356 | |
---|
1357 | See extract_cookies.__doc__ for the interfaces required of the |
---|
1358 | response and request arguments. |
---|
1359 | |
---|
1360 | """ |
---|
1361 | # get cookie-attributes for RFC 2965 and Netscape protocols |
---|
1362 | headers = response.info() |
---|
1363 | rfc2965_hdrs = headers.getheaders("Set-Cookie2") |
---|
1364 | ns_hdrs = headers.getheaders("Set-Cookie") |
---|
1365 | |
---|
1366 | rfc2965 = self._policy.rfc2965 |
---|
1367 | netscape = self._policy.netscape |
---|
1368 | |
---|
1369 | if ((not rfc2965_hdrs and not ns_hdrs) or |
---|
1370 | (not ns_hdrs and not rfc2965) or |
---|
1371 | (not rfc2965_hdrs and not netscape) or |
---|
1372 | (not netscape and not rfc2965)): |
---|
1373 | return [] # no relevant cookie headers: quick exit |
---|
1374 | |
---|
1375 | try: |
---|
1376 | cookies = self._cookies_from_attrs_set( |
---|
1377 | split_header_words(rfc2965_hdrs), request) |
---|
1378 | except: |
---|
1379 | reraise_unmasked_exceptions() |
---|
1380 | cookies = [] |
---|
1381 | |
---|
1382 | if ns_hdrs and netscape: |
---|
1383 | try: |
---|
1384 | # RFC 2109 and Netscape cookies |
---|
1385 | ns_cookies = self._cookies_from_attrs_set( |
---|
1386 | parse_ns_headers(ns_hdrs), request) |
---|
1387 | except: |
---|
1388 | reraise_unmasked_exceptions() |
---|
1389 | ns_cookies = [] |
---|
1390 | self._process_rfc2109_cookies(ns_cookies) |
---|
1391 | |
---|
1392 | # Look for Netscape cookies (from Set-Cookie headers) that match |
---|
1393 | # corresponding RFC 2965 cookies (from Set-Cookie2 headers). |
---|
1394 | # For each match, keep the RFC 2965 cookie and ignore the Netscape |
---|
1395 | # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are |
---|
1396 | # bundled in with the Netscape cookies for this purpose, which is |
---|
1397 | # reasonable behaviour. |
---|
1398 | if rfc2965: |
---|
1399 | lookup = {} |
---|
1400 | for cookie in cookies: |
---|
1401 | lookup[(cookie.domain, cookie.path, cookie.name)] = None |
---|
1402 | |
---|
1403 | def no_matching_rfc2965(ns_cookie, lookup=lookup): |
---|
1404 | key = ns_cookie.domain, ns_cookie.path, ns_cookie.name |
---|
1405 | return not lookup.has_key(key) |
---|
1406 | ns_cookies = filter(no_matching_rfc2965, ns_cookies) |
---|
1407 | |
---|
1408 | if ns_cookies: |
---|
1409 | cookies.extend(ns_cookies) |
---|
1410 | |
---|
1411 | return cookies |
---|
1412 | |
---|
1413 | def set_cookie_if_ok(self, cookie, request): |
---|
1414 | """Set a cookie if policy says it's OK to do so. |
---|
1415 | |
---|
1416 | cookie: mechanize.Cookie instance |
---|
1417 | request: see extract_cookies.__doc__ for the required interface |
---|
1418 | |
---|
1419 | """ |
---|
1420 | self._policy._now = self._now = int(time.time()) |
---|
1421 | |
---|
1422 | if self._policy.set_ok(cookie, request): |
---|
1423 | self.set_cookie(cookie) |
---|
1424 | |
---|
1425 | def set_cookie(self, cookie): |
---|
1426 | """Set a cookie, without checking whether or not it should be set. |
---|
1427 | |
---|
1428 | cookie: mechanize.Cookie instance |
---|
1429 | """ |
---|
1430 | c = self._cookies |
---|
1431 | if not c.has_key(cookie.domain): c[cookie.domain] = {} |
---|
1432 | c2 = c[cookie.domain] |
---|
1433 | if not c2.has_key(cookie.path): c2[cookie.path] = {} |
---|
1434 | c3 = c2[cookie.path] |
---|
1435 | c3[cookie.name] = cookie |
---|
1436 | |
---|
1437 | def extract_cookies(self, response, request): |
---|
1438 | """Extract cookies from response, where allowable given the request. |
---|
1439 | |
---|
1440 | Look for allowable Set-Cookie: and Set-Cookie2: headers in the response |
---|
1441 | object passed as argument. Any of these headers that are found are |
---|
1442 | used to update the state of the object (subject to the policy.set_ok |
---|
1443 | method's approval). |
---|
1444 | |
---|
1445 | The response object (usually be the result of a call to |
---|
1446 | mechanize.urlopen, or similar) should support an info method, which |
---|
1447 | returns a mimetools.Message object (in fact, the 'mimetools.Message |
---|
1448 | object' may be any object that provides a getallmatchingheaders |
---|
1449 | method). |
---|
1450 | |
---|
1451 | The request object (usually a urllib2.Request instance) must support |
---|
1452 | the methods get_full_url and get_host, as documented by urllib2, and |
---|
1453 | the port attribute (the port number). The request is used to set |
---|
1454 | default values for cookie-attributes as well as for checking that the |
---|
1455 | cookie is OK to be set. |
---|
1456 | |
---|
1457 | """ |
---|
1458 | debug("extract_cookies: %s", response.info()) |
---|
1459 | self._policy._now = self._now = int(time.time()) |
---|
1460 | |
---|
1461 | for cookie in self.make_cookies(response, request): |
---|
1462 | if self._policy.set_ok(cookie, request): |
---|
1463 | debug(" setting cookie: %s", cookie) |
---|
1464 | self.set_cookie(cookie) |
---|
1465 | |
---|
1466 | def clear(self, domain=None, path=None, name=None): |
---|
1467 | """Clear some cookies. |
---|
1468 | |
---|
1469 | Invoking this method without arguments will clear all cookies. If |
---|
1470 | given a single argument, only cookies belonging to that domain will be |
---|
1471 | removed. If given two arguments, cookies belonging to the specified |
---|
1472 | path within that domain are removed. If given three arguments, then |
---|
1473 | the cookie with the specified name, path and domain is removed. |
---|
1474 | |
---|
1475 | Raises KeyError if no matching cookie exists. |
---|
1476 | |
---|
1477 | """ |
---|
1478 | if name is not None: |
---|
1479 | if (domain is None) or (path is None): |
---|
1480 | raise ValueError( |
---|
1481 | "domain and path must be given to remove a cookie by name") |
---|
1482 | del self._cookies[domain][path][name] |
---|
1483 | elif path is not None: |
---|
1484 | if domain is None: |
---|
1485 | raise ValueError( |
---|
1486 | "domain must be given to remove cookies by path") |
---|
1487 | del self._cookies[domain][path] |
---|
1488 | elif domain is not None: |
---|
1489 | del self._cookies[domain] |
---|
1490 | else: |
---|
1491 | self._cookies = {} |
---|
1492 | |
---|
1493 | def clear_session_cookies(self): |
---|
1494 | """Discard all session cookies. |
---|
1495 | |
---|
1496 | Discards all cookies held by object which had either no Max-Age or |
---|
1497 | Expires cookie-attribute or an explicit Discard cookie-attribute, or |
---|
1498 | which otherwise have ended up with a true discard attribute. For |
---|
1499 | interactive browsers, the end of a session usually corresponds to |
---|
1500 | closing the browser window. |
---|
1501 | |
---|
1502 | Note that the save method won't save session cookies anyway, unless you |
---|
1503 | ask otherwise by passing a true ignore_discard argument. |
---|
1504 | |
---|
1505 | """ |
---|
1506 | for cookie in self: |
---|
1507 | if cookie.discard: |
---|
1508 | self.clear(cookie.domain, cookie.path, cookie.name) |
---|
1509 | |
---|
1510 | def clear_expired_cookies(self): |
---|
1511 | """Discard all expired cookies. |
---|
1512 | |
---|
1513 | You probably don't need to call this method: expired cookies are never |
---|
1514 | sent back to the server (provided you're using DefaultCookiePolicy), |
---|
1515 | this method is called by CookieJar itself every so often, and the save |
---|
1516 | method won't save expired cookies anyway (unless you ask otherwise by |
---|
1517 | passing a true ignore_expires argument). |
---|
1518 | |
---|
1519 | """ |
---|
1520 | now = time.time() |
---|
1521 | for cookie in self: |
---|
1522 | if cookie.is_expired(now): |
---|
1523 | self.clear(cookie.domain, cookie.path, cookie.name) |
---|
1524 | |
---|
1525 | def __getitem__(self, i): |
---|
1526 | if i == 0: |
---|
1527 | self._getitem_iterator = self.__iter__() |
---|
1528 | elif self._prev_getitem_index != i-1: raise IndexError( |
---|
1529 | "CookieJar.__getitem__ only supports sequential iteration") |
---|
1530 | self._prev_getitem_index = i |
---|
1531 | try: |
---|
1532 | return self._getitem_iterator.next() |
---|
1533 | except StopIteration: |
---|
1534 | raise IndexError() |
---|
1535 | |
---|
1536 | def __iter__(self): |
---|
1537 | return MappingIterator(self._cookies) |
---|
1538 | |
---|
1539 | def __len__(self): |
---|
1540 | """Return number of contained cookies.""" |
---|
1541 | i = 0 |
---|
1542 | for cookie in self: i = i + 1 |
---|
1543 | return i |
---|
1544 | |
---|
1545 | def __repr__(self): |
---|
1546 | r = [] |
---|
1547 | for cookie in self: r.append(repr(cookie)) |
---|
1548 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
---|
1549 | |
---|
1550 | def __str__(self): |
---|
1551 | r = [] |
---|
1552 | for cookie in self: r.append(str(cookie)) |
---|
1553 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
---|
1554 | |
---|
1555 | |
---|
1556 | class LoadError(Exception): pass |
---|
1557 | |
---|
1558 | class FileCookieJar(CookieJar): |
---|
1559 | """CookieJar that can be loaded from and saved to a file. |
---|
1560 | |
---|
1561 | Additional methods |
---|
1562 | |
---|
1563 | save(filename=None, ignore_discard=False, ignore_expires=False) |
---|
1564 | load(filename=None, ignore_discard=False, ignore_expires=False) |
---|
1565 | revert(filename=None, ignore_discard=False, ignore_expires=False) |
---|
1566 | |
---|
1567 | Additional public attributes |
---|
1568 | |
---|
1569 | filename: filename for loading and saving cookies |
---|
1570 | |
---|
1571 | Additional public readable attributes |
---|
1572 | |
---|
1573 | delayload: request that cookies are lazily loaded from disk; this is only |
---|
1574 | a hint since this only affects performance, not behaviour (unless the |
---|
1575 | cookies on disk are changing); a CookieJar object may ignore it (in fact, |
---|
1576 | only MSIECookieJar lazily loads cookies at the moment) |
---|
1577 | |
---|
1578 | """ |
---|
1579 | |
---|
1580 | def __init__(self, filename=None, delayload=False, policy=None): |
---|
1581 | """ |
---|
1582 | See FileCookieJar.__doc__ for argument documentation. |
---|
1583 | |
---|
1584 | Cookies are NOT loaded from the named file until either the load or |
---|
1585 | revert method is called. |
---|
1586 | |
---|
1587 | """ |
---|
1588 | CookieJar.__init__(self, policy) |
---|
1589 | if filename is not None and not isstringlike(filename): |
---|
1590 | raise ValueError("filename must be string-like") |
---|
1591 | self.filename = filename |
---|
1592 | self.delayload = bool(delayload) |
---|
1593 | |
---|
1594 | def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
---|
1595 | """Save cookies to a file. |
---|
1596 | |
---|
1597 | filename: name of file in which to save cookies |
---|
1598 | ignore_discard: save even cookies set to be discarded |
---|
1599 | ignore_expires: save even cookies that have expired |
---|
1600 | |
---|
1601 | The file is overwritten if it already exists, thus wiping all its |
---|
1602 | cookies. Saved cookies can be restored later using the load or revert |
---|
1603 | methods. If filename is not specified, self.filename is used; if |
---|
1604 | self.filename is None, ValueError is raised. |
---|
1605 | |
---|
1606 | """ |
---|
1607 | raise NotImplementedError() |
---|
1608 | |
---|
1609 | def load(self, filename=None, ignore_discard=False, ignore_expires=False): |
---|
1610 | """Load cookies from a file. |
---|
1611 | |
---|
1612 | Old cookies are kept unless overwritten by newly loaded ones. |
---|
1613 | |
---|
1614 | Arguments are as for .save(). |
---|
1615 | |
---|
1616 | If filename is not specified, self.filename is used; if self.filename |
---|
1617 | is None, ValueError is raised. The named file must be in the format |
---|
1618 | understood by the class, or LoadError will be raised. This format will |
---|
1619 | be identical to that written by the save method, unless the load format |
---|
1620 | is not sufficiently well understood (as is the case for MSIECookieJar). |
---|
1621 | |
---|
1622 | """ |
---|
1623 | if filename is None: |
---|
1624 | if self.filename is not None: filename = self.filename |
---|
1625 | else: raise ValueError(MISSING_FILENAME_TEXT) |
---|
1626 | |
---|
1627 | f = open(filename) |
---|
1628 | try: |
---|
1629 | self._really_load(f, filename, ignore_discard, ignore_expires) |
---|
1630 | finally: |
---|
1631 | f.close() |
---|
1632 | |
---|
1633 | def revert(self, filename=None, |
---|
1634 | ignore_discard=False, ignore_expires=False): |
---|
1635 | """Clear all cookies and reload cookies from a saved file. |
---|
1636 | |
---|
1637 | Raises LoadError (or IOError) if reversion is not successful; the |
---|
1638 | object's state will not be altered if this happens. |
---|
1639 | |
---|
1640 | """ |
---|
1641 | if filename is None: |
---|
1642 | if self.filename is not None: filename = self.filename |
---|
1643 | else: raise ValueError(MISSING_FILENAME_TEXT) |
---|
1644 | |
---|
1645 | old_state = copy.deepcopy(self._cookies) |
---|
1646 | self._cookies = {} |
---|
1647 | try: |
---|
1648 | self.load(filename, ignore_discard, ignore_expires) |
---|
1649 | except (LoadError, IOError): |
---|
1650 | self._cookies = old_state |
---|
1651 | raise |
---|