[3] | 1 | """HTTP cookie handling for web clients. |
---|
| 2 | |
---|
| 3 | This module originally developed from my port of Gisle Aas' Perl module |
---|
| 4 | HTTP::Cookies, from the libwww-perl library. |
---|
| 5 | |
---|
| 6 | Docstrings, comments and debug strings in this code refer to the |
---|
| 7 | attributes of the HTTP cookie system as cookie-attributes, to distinguish |
---|
| 8 | them clearly from Python attributes. |
---|
| 9 | |
---|
| 10 | CookieJar____ |
---|
| 11 | / \ \ |
---|
| 12 | FileCookieJar \ \ |
---|
| 13 | / | \ \ \ |
---|
| 14 | MozillaCookieJar | LWPCookieJar \ \ |
---|
| 15 | | | \ |
---|
| 16 | | ---MSIEBase | \ |
---|
| 17 | | / | | \ |
---|
| 18 | | / MSIEDBCookieJar BSDDBCookieJar |
---|
| 19 | |/ |
---|
| 20 | MSIECookieJar |
---|
| 21 | |
---|
| 22 | Comments to John J Lee <jjl@pobox.com>. |
---|
| 23 | |
---|
| 24 | |
---|
| 25 | Copyright 2002-2006 John J Lee <jjl@pobox.com> |
---|
| 26 | Copyright 1997-1999 Gisle Aas (original libwww-perl code) |
---|
| 27 | Copyright 2002-2003 Johnny Lee (original MSIE Perl code) |
---|
| 28 | |
---|
| 29 | This code is free software; you can redistribute it and/or modify it |
---|
| 30 | under the terms of the BSD or ZPL 2.1 licenses (see the file |
---|
| 31 | COPYING.txt included with the distribution). |
---|
| 32 | |
---|
| 33 | """ |
---|
| 34 | |
---|
| 35 | import sys, re, copy, time, struct, urllib, types, logging |
---|
| 36 | try: |
---|
| 37 | import threading |
---|
| 38 | _threading = threading; del threading |
---|
| 39 | except ImportError: |
---|
| 40 | import dummy_threading |
---|
| 41 | _threading = dummy_threading; del dummy_threading |
---|
| 42 | import httplib # only for the default HTTP port |
---|
| 43 | |
---|
| 44 | MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " |
---|
| 45 | "instance initialised with one)") |
---|
| 46 | DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) |
---|
| 47 | |
---|
| 48 | from _headersutil import split_header_words, parse_ns_headers |
---|
| 49 | from _util import isstringlike |
---|
| 50 | import _rfc3986 |
---|
| 51 | |
---|
| 52 | debug = logging.getLogger("mechanize.cookies").debug |
---|
| 53 | |
---|
| 54 | |
---|
| 55 | def reraise_unmasked_exceptions(unmasked=()): |
---|
| 56 | # There are a few catch-all except: statements in this module, for |
---|
| 57 | # catching input that's bad in unexpected ways. |
---|
| 58 | # This function re-raises some exceptions we don't want to trap. |
---|
| 59 | import mechanize, warnings |
---|
| 60 | if not mechanize.USE_BARE_EXCEPT: |
---|
| 61 | raise |
---|
| 62 | unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) |
---|
| 63 | etype = sys.exc_info()[0] |
---|
| 64 | if issubclass(etype, unmasked): |
---|
| 65 | raise |
---|
| 66 | # swallowed an exception |
---|
| 67 | import traceback, StringIO |
---|
| 68 | f = StringIO.StringIO() |
---|
| 69 | traceback.print_exc(None, f) |
---|
| 70 | msg = f.getvalue() |
---|
| 71 | warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) |
---|
| 72 | |
---|
| 73 | |
---|
| 74 | IPV4_RE = re.compile(r"\.\d+$") |
---|
| 75 | def is_HDN(text): |
---|
| 76 | """Return True if text is a host domain name.""" |
---|
| 77 | # XXX |
---|
| 78 | # This may well be wrong. Which RFC is HDN defined in, if any (for |
---|
| 79 | # the purposes of RFC 2965)? |
---|
| 80 | # For the current implementation, what about IPv6? Remember to look |
---|
| 81 | # at other uses of IPV4_RE also, if change this. |
---|
| 82 | return not (IPV4_RE.search(text) or |
---|
| 83 | text == "" or |
---|
| 84 | text[0] == "." or text[-1] == ".") |
---|
| 85 | |
---|
| 86 | def domain_match(A, B): |
---|
| 87 | """Return True if domain A domain-matches domain B, according to RFC 2965. |
---|
| 88 | |
---|
| 89 | A and B may be host domain names or IP addresses. |
---|
| 90 | |
---|
| 91 | RFC 2965, section 1: |
---|
| 92 | |
---|
| 93 | Host names can be specified either as an IP address or a HDN string. |
---|
| 94 | Sometimes we compare one host name with another. (Such comparisons SHALL |
---|
| 95 | be case-insensitive.) Host A's name domain-matches host B's if |
---|
| 96 | |
---|
| 97 | * their host name strings string-compare equal; or |
---|
| 98 | |
---|
| 99 | * A is a HDN string and has the form NB, where N is a non-empty |
---|
| 100 | name string, B has the form .B', and B' is a HDN string. (So, |
---|
| 101 | x.y.com domain-matches .Y.com but not Y.com.) |
---|
| 102 | |
---|
| 103 | Note that domain-match is not a commutative operation: a.b.c.com |
---|
| 104 | domain-matches .c.com, but not the reverse. |
---|
| 105 | |
---|
| 106 | """ |
---|
| 107 | # Note that, if A or B are IP addresses, the only relevant part of the |
---|
| 108 | # definition of the domain-match algorithm is the direct string-compare. |
---|
| 109 | A = A.lower() |
---|
| 110 | B = B.lower() |
---|
| 111 | if A == B: |
---|
| 112 | return True |
---|
| 113 | if not is_HDN(A): |
---|
| 114 | return False |
---|
| 115 | i = A.rfind(B) |
---|
| 116 | has_form_nb = not (i == -1 or i == 0) |
---|
| 117 | return ( |
---|
| 118 | has_form_nb and |
---|
| 119 | B.startswith(".") and |
---|
| 120 | is_HDN(B[1:]) |
---|
| 121 | ) |
---|
| 122 | |
---|
| 123 | def liberal_is_HDN(text): |
---|
| 124 | """Return True if text is a sort-of-like a host domain name. |
---|
| 125 | |
---|
| 126 | For accepting/blocking domains. |
---|
| 127 | |
---|
| 128 | """ |
---|
| 129 | return not IPV4_RE.search(text) |
---|
| 130 | |
---|
| 131 | def user_domain_match(A, B): |
---|
| 132 | """For blocking/accepting domains. |
---|
| 133 | |
---|
| 134 | A and B may be host domain names or IP addresses. |
---|
| 135 | |
---|
| 136 | """ |
---|
| 137 | A = A.lower() |
---|
| 138 | B = B.lower() |
---|
| 139 | if not (liberal_is_HDN(A) and liberal_is_HDN(B)): |
---|
| 140 | if A == B: |
---|
| 141 | # equal IP addresses |
---|
| 142 | return True |
---|
| 143 | return False |
---|
| 144 | initial_dot = B.startswith(".") |
---|
| 145 | if initial_dot and A.endswith(B): |
---|
| 146 | return True |
---|
| 147 | if not initial_dot and A == B: |
---|
| 148 | return True |
---|
| 149 | return False |
---|
| 150 | |
---|
| 151 | cut_port_re = re.compile(r":\d+$") |
---|
| 152 | def request_host(request): |
---|
| 153 | """Return request-host, as defined by RFC 2965. |
---|
| 154 | |
---|
| 155 | Variation from RFC: returned value is lowercased, for convenient |
---|
| 156 | comparison. |
---|
| 157 | |
---|
| 158 | """ |
---|
| 159 | url = request.get_full_url() |
---|
| 160 | host = _rfc3986.urlsplit(url)[1] |
---|
| 161 | if host is None: |
---|
| 162 | host = request.get_header("Host", "") |
---|
| 163 | |
---|
| 164 | # remove port, if present |
---|
| 165 | host = cut_port_re.sub("", host, 1) |
---|
| 166 | return host.lower() |
---|
| 167 | |
---|
| 168 | def eff_request_host(request): |
---|
| 169 | """Return a tuple (request-host, effective request-host name). |
---|
| 170 | |
---|
| 171 | As defined by RFC 2965, except both are lowercased. |
---|
| 172 | |
---|
| 173 | """ |
---|
| 174 | erhn = req_host = request_host(request) |
---|
| 175 | if req_host.find(".") == -1 and not IPV4_RE.search(req_host): |
---|
| 176 | erhn = req_host + ".local" |
---|
| 177 | return req_host, erhn |
---|
| 178 | |
---|
| 179 | def request_path(request): |
---|
| 180 | """request-URI, as defined by RFC 2965.""" |
---|
| 181 | url = request.get_full_url() |
---|
| 182 | path, query, frag = _rfc3986.urlsplit(url)[2:] |
---|
| 183 | path = escape_path(path) |
---|
| 184 | req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) |
---|
| 185 | if not req_path.startswith("/"): |
---|
| 186 | req_path = "/"+req_path |
---|
| 187 | return req_path |
---|
| 188 | |
---|
| 189 | def request_port(request): |
---|
| 190 | host = request.get_host() |
---|
| 191 | i = host.find(':') |
---|
| 192 | if i >= 0: |
---|
| 193 | port = host[i+1:] |
---|
| 194 | try: |
---|
| 195 | int(port) |
---|
| 196 | except ValueError: |
---|
| 197 | debug("nonnumeric port: '%s'", port) |
---|
| 198 | return None |
---|
| 199 | else: |
---|
| 200 | port = DEFAULT_HTTP_PORT |
---|
| 201 | return port |
---|
| 202 | |
---|
| 203 | # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't |
---|
| 204 | # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). |
---|
| 205 | HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" |
---|
| 206 | ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") |
---|
| 207 | def uppercase_escaped_char(match): |
---|
| 208 | return "%%%s" % match.group(1).upper() |
---|
| 209 | def escape_path(path): |
---|
| 210 | """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" |
---|
| 211 | # There's no knowing what character encoding was used to create URLs |
---|
| 212 | # containing %-escapes, but since we have to pick one to escape invalid |
---|
| 213 | # path characters, we pick UTF-8, as recommended in the HTML 4.0 |
---|
| 214 | # specification: |
---|
| 215 | # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 |
---|
| 216 | # And here, kind of: draft-fielding-uri-rfc2396bis-03 |
---|
| 217 | # (And in draft IRI specification: draft-duerst-iri-05) |
---|
| 218 | # (And here, for new URI schemes: RFC 2718) |
---|
| 219 | if isinstance(path, types.UnicodeType): |
---|
| 220 | path = path.encode("utf-8") |
---|
| 221 | path = urllib.quote(path, HTTP_PATH_SAFE) |
---|
| 222 | path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) |
---|
| 223 | return path |
---|
| 224 | |
---|
| 225 | def reach(h): |
---|
| 226 | """Return reach of host h, as defined by RFC 2965, section 1. |
---|
| 227 | |
---|
| 228 | The reach R of a host name H is defined as follows: |
---|
| 229 | |
---|
| 230 | * If |
---|
| 231 | |
---|
| 232 | - H is the host domain name of a host; and, |
---|
| 233 | |
---|
| 234 | - H has the form A.B; and |
---|
| 235 | |
---|
| 236 | - A has no embedded (that is, interior) dots; and |
---|
| 237 | |
---|
| 238 | - B has at least one embedded dot, or B is the string "local". |
---|
| 239 | then the reach of H is .B. |
---|
| 240 | |
---|
| 241 | * Otherwise, the reach of H is H. |
---|
| 242 | |
---|
| 243 | >>> reach("www.acme.com") |
---|
| 244 | '.acme.com' |
---|
| 245 | >>> reach("acme.com") |
---|
| 246 | 'acme.com' |
---|
| 247 | >>> reach("acme.local") |
---|
| 248 | '.local' |
---|
| 249 | |
---|
| 250 | """ |
---|
| 251 | i = h.find(".") |
---|
| 252 | if i >= 0: |
---|
| 253 | #a = h[:i] # this line is only here to show what a is |
---|
| 254 | b = h[i+1:] |
---|
| 255 | i = b.find(".") |
---|
| 256 | if is_HDN(h) and (i >= 0 or b == "local"): |
---|
| 257 | return "."+b |
---|
| 258 | return h |
---|
| 259 | |
---|
| 260 | def is_third_party(request): |
---|
| 261 | """ |
---|
| 262 | |
---|
| 263 | RFC 2965, section 3.3.6: |
---|
| 264 | |
---|
| 265 | An unverifiable transaction is to a third-party host if its request- |
---|
| 266 | host U does not domain-match the reach R of the request-host O in the |
---|
| 267 | origin transaction. |
---|
| 268 | |
---|
| 269 | """ |
---|
| 270 | req_host = request_host(request) |
---|
| 271 | # the origin request's request-host was stuffed into request by |
---|
| 272 | # _urllib2_support.AbstractHTTPHandler |
---|
| 273 | return not domain_match(req_host, reach(request.origin_req_host)) |
---|
| 274 | |
---|
| 275 | |
---|
| 276 | class Cookie: |
---|
| 277 | """HTTP Cookie. |
---|
| 278 | |
---|
| 279 | This class represents both Netscape and RFC 2965 cookies. |
---|
| 280 | |
---|
| 281 | This is deliberately a very simple class. It just holds attributes. It's |
---|
| 282 | possible to construct Cookie instances that don't comply with the cookie |
---|
| 283 | standards. CookieJar.make_cookies is the factory function for Cookie |
---|
| 284 | objects -- it deals with cookie parsing, supplying defaults, and |
---|
| 285 | normalising to the representation used in this class. CookiePolicy is |
---|
| 286 | responsible for checking them to see whether they should be accepted from |
---|
| 287 | and returned to the server. |
---|
| 288 | |
---|
| 289 | version: integer; |
---|
| 290 | name: string; |
---|
| 291 | value: string (may be None); |
---|
| 292 | port: string; None indicates no attribute was supplied (eg. "Port", rather |
---|
| 293 | than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list |
---|
| 294 | string (eg. "80,8080") |
---|
| 295 | port_specified: boolean; true if a value was supplied with the Port |
---|
| 296 | cookie-attribute |
---|
| 297 | domain: string; |
---|
| 298 | domain_specified: boolean; true if Domain was explicitly set |
---|
| 299 | domain_initial_dot: boolean; true if Domain as set in HTTP header by server |
---|
| 300 | started with a dot (yes, this really is necessary!) |
---|
| 301 | path: string; |
---|
| 302 | path_specified: boolean; true if Path was explicitly set |
---|
| 303 | secure: boolean; true if should only be returned over secure connection |
---|
| 304 | expires: integer; seconds since epoch (RFC 2965 cookies should calculate |
---|
| 305 | this value from the Max-Age attribute) |
---|
| 306 | discard: boolean, true if this is a session cookie; (if no expires value, |
---|
| 307 | this should be true) |
---|
| 308 | comment: string; |
---|
| 309 | comment_url: string; |
---|
| 310 | rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not |
---|
| 311 | Set-Cookie2:) header, but had a version cookie-attribute of 1 |
---|
| 312 | rest: mapping of other cookie-attributes |
---|
| 313 | |
---|
| 314 | Note that the port may be present in the headers, but unspecified ("Port" |
---|
| 315 | rather than"Port=80", for example); if this is the case, port is None. |
---|
| 316 | |
---|
| 317 | """ |
---|
| 318 | |
---|
| 319 | def __init__(self, version, name, value, |
---|
| 320 | port, port_specified, |
---|
| 321 | domain, domain_specified, domain_initial_dot, |
---|
| 322 | path, path_specified, |
---|
| 323 | secure, |
---|
| 324 | expires, |
---|
| 325 | discard, |
---|
| 326 | comment, |
---|
| 327 | comment_url, |
---|
| 328 | rest, |
---|
| 329 | rfc2109=False, |
---|
| 330 | ): |
---|
| 331 | |
---|
| 332 | if version is not None: version = int(version) |
---|
| 333 | if expires is not None: expires = int(expires) |
---|
| 334 | if port is None and port_specified is True: |
---|
| 335 | raise ValueError("if port is None, port_specified must be false") |
---|
| 336 | |
---|
| 337 | self.version = version |
---|
| 338 | self.name = name |
---|
| 339 | self.value = value |
---|
| 340 | self.port = port |
---|
| 341 | self.port_specified = port_specified |
---|
| 342 | # normalise case, as per RFC 2965 section 3.3.3 |
---|
| 343 | self.domain = domain.lower() |
---|
| 344 | self.domain_specified = domain_specified |
---|
| 345 | # Sigh. We need to know whether the domain given in the |
---|
| 346 | # cookie-attribute had an initial dot, in order to follow RFC 2965 |
---|
| 347 | # (as clarified in draft errata). Needed for the returned $Domain |
---|
| 348 | # value. |
---|
| 349 | self.domain_initial_dot = domain_initial_dot |
---|
| 350 | self.path = path |
---|
| 351 | self.path_specified = path_specified |
---|
| 352 | self.secure = secure |
---|
| 353 | self.expires = expires |
---|
| 354 | self.discard = discard |
---|
| 355 | self.comment = comment |
---|
| 356 | self.comment_url = comment_url |
---|
| 357 | self.rfc2109 = rfc2109 |
---|
| 358 | |
---|
| 359 | self._rest = copy.copy(rest) |
---|
| 360 | |
---|
| 361 | def has_nonstandard_attr(self, name): |
---|
| 362 | return self._rest.has_key(name) |
---|
| 363 | def get_nonstandard_attr(self, name, default=None): |
---|
| 364 | return self._rest.get(name, default) |
---|
| 365 | def set_nonstandard_attr(self, name, value): |
---|
| 366 | self._rest[name] = value |
---|
| 367 | def nonstandard_attr_keys(self): |
---|
| 368 | return self._rest.keys() |
---|
| 369 | |
---|
| 370 | def is_expired(self, now=None): |
---|
| 371 | if now is None: now = time.time() |
---|
| 372 | return (self.expires is not None) and (self.expires <= now) |
---|
| 373 | |
---|
| 374 | def __str__(self): |
---|
| 375 | if self.port is None: p = "" |
---|
| 376 | else: p = ":"+self.port |
---|
| 377 | limit = self.domain + p + self.path |
---|
| 378 | if self.value is not None: |
---|
| 379 | namevalue = "%s=%s" % (self.name, self.value) |
---|
| 380 | else: |
---|
| 381 | namevalue = self.name |
---|
| 382 | return "<Cookie %s for %s>" % (namevalue, limit) |
---|
| 383 | |
---|
| 384 | def __repr__(self): |
---|
| 385 | args = [] |
---|
| 386 | for name in ["version", "name", "value", |
---|
| 387 | "port", "port_specified", |
---|
| 388 | "domain", "domain_specified", "domain_initial_dot", |
---|
| 389 | "path", "path_specified", |
---|
| 390 | "secure", "expires", "discard", "comment", "comment_url", |
---|
| 391 | ]: |
---|
| 392 | attr = getattr(self, name) |
---|
| 393 | args.append("%s=%s" % (name, repr(attr))) |
---|
| 394 | args.append("rest=%s" % repr(self._rest)) |
---|
| 395 | args.append("rfc2109=%s" % repr(self.rfc2109)) |
---|
| 396 | return "Cookie(%s)" % ", ".join(args) |
---|
| 397 | |
---|
| 398 | |
---|
| 399 | class CookiePolicy: |
---|
| 400 | """Defines which cookies get accepted from and returned to server. |
---|
| 401 | |
---|
| 402 | May also modify cookies. |
---|
| 403 | |
---|
| 404 | The subclass DefaultCookiePolicy defines the standard rules for Netscape |
---|
| 405 | and RFC 2965 cookies -- override that if you want a customised policy. |
---|
| 406 | |
---|
| 407 | As well as implementing set_ok and return_ok, implementations of this |
---|
| 408 | interface must also supply the following attributes, indicating which |
---|
| 409 | protocols should be used, and how. These can be read and set at any time, |
---|
| 410 | though whether that makes complete sense from the protocol point of view is |
---|
| 411 | doubtful. |
---|
| 412 | |
---|
| 413 | Public attributes: |
---|
| 414 | |
---|
| 415 | netscape: implement netscape protocol |
---|
| 416 | rfc2965: implement RFC 2965 protocol |
---|
| 417 | rfc2109_as_netscape: |
---|
| 418 | WARNING: This argument will change or go away if is not accepted into |
---|
| 419 | the Python standard library in this form! |
---|
| 420 | If true, treat RFC 2109 cookies as though they were Netscape cookies. The |
---|
| 421 | default is for this attribute to be None, which means treat 2109 cookies |
---|
| 422 | as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, |
---|
| 423 | by default), and as Netscape cookies otherwise. |
---|
| 424 | hide_cookie2: don't add Cookie2 header to requests (the presence of |
---|
| 425 | this header indicates to the server that we understand RFC 2965 |
---|
| 426 | cookies) |
---|
| 427 | |
---|
| 428 | """ |
---|
| 429 | def set_ok(self, cookie, request): |
---|
| 430 | """Return true if (and only if) cookie should be accepted from server. |
---|
| 431 | |
---|
| 432 | Currently, pre-expired cookies never get this far -- the CookieJar |
---|
| 433 | class deletes such cookies itself. |
---|
| 434 | |
---|
| 435 | cookie: mechanize.Cookie object |
---|
| 436 | request: object implementing the interface defined by |
---|
| 437 | CookieJar.extract_cookies.__doc__ |
---|
| 438 | |
---|
| 439 | """ |
---|
| 440 | raise NotImplementedError() |
---|
| 441 | |
---|
| 442 | def return_ok(self, cookie, request): |
---|
| 443 | """Return true if (and only if) cookie should be returned to server. |
---|
| 444 | |
---|
| 445 | cookie: mechanize.Cookie object |
---|
| 446 | request: object implementing the interface defined by |
---|
| 447 | CookieJar.add_cookie_header.__doc__ |
---|
| 448 | |
---|
| 449 | """ |
---|
| 450 | raise NotImplementedError() |
---|
| 451 | |
---|
| 452 | def domain_return_ok(self, domain, request): |
---|
| 453 | """Return false if cookies should not be returned, given cookie domain. |
---|
| 454 | |
---|
| 455 | This is here as an optimization, to remove the need for checking every |
---|
| 456 | cookie with a particular domain (which may involve reading many files). |
---|
| 457 | The default implementations of domain_return_ok and path_return_ok |
---|
| 458 | (return True) leave all the work to return_ok. |
---|
| 459 | |
---|
| 460 | If domain_return_ok returns true for the cookie domain, path_return_ok |
---|
| 461 | is called for the cookie path. Otherwise, path_return_ok and return_ok |
---|
| 462 | are never called for that cookie domain. If path_return_ok returns |
---|
| 463 | true, return_ok is called with the Cookie object itself for a full |
---|
| 464 | check. Otherwise, return_ok is never called for that cookie path. |
---|
| 465 | |
---|
| 466 | Note that domain_return_ok is called for every *cookie* domain, not |
---|
| 467 | just for the *request* domain. For example, the function might be |
---|
| 468 | called with both ".acme.com" and "www.acme.com" if the request domain is |
---|
| 469 | "www.acme.com". The same goes for path_return_ok. |
---|
| 470 | |
---|
| 471 | For argument documentation, see the docstring for return_ok. |
---|
| 472 | |
---|
| 473 | """ |
---|
| 474 | return True |
---|
| 475 | |
---|
| 476 | def path_return_ok(self, path, request): |
---|
| 477 | """Return false if cookies should not be returned, given cookie path. |
---|
| 478 | |
---|
| 479 | See the docstring for domain_return_ok. |
---|
| 480 | |
---|
| 481 | """ |
---|
| 482 | return True |
---|
| 483 | |
---|
| 484 | |
---|
| 485 | class DefaultCookiePolicy(CookiePolicy): |
---|
| 486 | """Implements the standard rules for accepting and returning cookies. |
---|
| 487 | |
---|
| 488 | Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is |
---|
| 489 | switched off by default. |
---|
| 490 | |
---|
| 491 | The easiest way to provide your own policy is to override this class and |
---|
| 492 | call its methods in your overriden implementations before adding your own |
---|
| 493 | additional checks. |
---|
| 494 | |
---|
| 495 | import mechanize |
---|
| 496 | class MyCookiePolicy(mechanize.DefaultCookiePolicy): |
---|
| 497 | def set_ok(self, cookie, request): |
---|
| 498 | if not mechanize.DefaultCookiePolicy.set_ok( |
---|
| 499 | self, cookie, request): |
---|
| 500 | return False |
---|
| 501 | if i_dont_want_to_store_this_cookie(): |
---|
| 502 | return False |
---|
| 503 | return True |
---|
| 504 | |
---|
| 505 | In addition to the features required to implement the CookiePolicy |
---|
| 506 | interface, this class allows you to block and allow domains from setting |
---|
| 507 | and receiving cookies. There are also some strictness switches that allow |
---|
| 508 | you to tighten up the rather loose Netscape protocol rules a little bit (at |
---|
| 509 | the cost of blocking some benign cookies). |
---|
| 510 | |
---|
| 511 | A domain blacklist and whitelist is provided (both off by default). Only |
---|
| 512 | domains not in the blacklist and present in the whitelist (if the whitelist |
---|
| 513 | is active) participate in cookie setting and returning. Use the |
---|
| 514 | blocked_domains constructor argument, and blocked_domains and |
---|
| 515 | set_blocked_domains methods (and the corresponding argument and methods for |
---|
| 516 | allowed_domains). If you set a whitelist, you can turn it off again by |
---|
| 517 | setting it to None. |
---|
| 518 | |
---|
| 519 | Domains in block or allow lists that do not start with a dot must |
---|
| 520 | string-compare equal. For example, "acme.com" matches a blacklist entry of |
---|
| 521 | "acme.com", but "www.acme.com" does not. Domains that do start with a dot |
---|
| 522 | are matched by more specific domains too. For example, both "www.acme.com" |
---|
| 523 | and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does |
---|
| 524 | not). IP addresses are an exception, and must match exactly. For example, |
---|
| 525 | if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is |
---|
| 526 | blocked, but 193.168.1.2 is not. |
---|
| 527 | |
---|
| 528 | Additional Public Attributes: |
---|
| 529 | |
---|
| 530 | General strictness switches |
---|
| 531 | |
---|
| 532 | strict_domain: don't allow sites to set two-component domains with |
---|
| 533 | country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. |
---|
| 534 | This is far from perfect and isn't guaranteed to work! |
---|
| 535 | |
---|
| 536 | RFC 2965 protocol strictness switches |
---|
| 537 | |
---|
| 538 | strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable |
---|
| 539 | transactions (usually, an unverifiable transaction is one resulting from |
---|
| 540 | a redirect or an image hosted on another site); if this is false, cookies |
---|
| 541 | are NEVER blocked on the basis of verifiability |
---|
| 542 | |
---|
| 543 | Netscape protocol strictness switches |
---|
| 544 | |
---|
| 545 | strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions |
---|
| 546 | even to Netscape cookies |
---|
| 547 | strict_ns_domain: flags indicating how strict to be with domain-matching |
---|
| 548 | rules for Netscape cookies: |
---|
| 549 | DomainStrictNoDots: when setting cookies, host prefix must not contain a |
---|
| 550 | dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because |
---|
| 551 | www.foo contains a dot) |
---|
| 552 | DomainStrictNonDomain: cookies that did not explicitly specify a Domain |
---|
| 553 | cookie-attribute can only be returned to a domain that string-compares |
---|
| 554 | equal to the domain that set the cookie (eg. rockets.acme.com won't |
---|
| 555 | be returned cookies from acme.com that had no Domain cookie-attribute) |
---|
| 556 | DomainRFC2965Match: when setting cookies, require a full RFC 2965 |
---|
| 557 | domain-match |
---|
| 558 | DomainLiberal and DomainStrict are the most useful combinations of the |
---|
| 559 | above flags, for convenience |
---|
| 560 | strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that |
---|
| 561 | have names starting with '$' |
---|
| 562 | strict_ns_set_path: don't allow setting cookies whose path doesn't |
---|
| 563 | path-match request URI |
---|
| 564 | |
---|
| 565 | """ |
---|
| 566 | |
---|
| 567 | DomainStrictNoDots = 1 |
---|
| 568 | DomainStrictNonDomain = 2 |
---|
| 569 | DomainRFC2965Match = 4 |
---|
| 570 | |
---|
| 571 | DomainLiberal = 0 |
---|
| 572 | DomainStrict = DomainStrictNoDots|DomainStrictNonDomain |
---|
| 573 | |
---|
| 574 | def __init__(self, |
---|
| 575 | blocked_domains=None, allowed_domains=None, |
---|
| 576 | netscape=True, rfc2965=False, |
---|
| 577 | # WARNING: this argument will change or go away if is not |
---|
| 578 | # accepted into the Python standard library in this form! |
---|
| 579 | # default, ie. treat 2109 as netscape iff not rfc2965 |
---|
| 580 | rfc2109_as_netscape=None, |
---|
| 581 | hide_cookie2=False, |
---|
| 582 | strict_domain=False, |
---|
| 583 | strict_rfc2965_unverifiable=True, |
---|
| 584 | strict_ns_unverifiable=False, |
---|
| 585 | strict_ns_domain=DomainLiberal, |
---|
| 586 | strict_ns_set_initial_dollar=False, |
---|
| 587 | strict_ns_set_path=False, |
---|
| 588 | ): |
---|
| 589 | """ |
---|
| 590 | Constructor arguments should be used as keyword arguments only. |
---|
| 591 | |
---|
| 592 | blocked_domains: sequence of domain names that we never accept cookies |
---|
| 593 | from, nor return cookies to |
---|
| 594 | allowed_domains: if not None, this is a sequence of the only domains |
---|
| 595 | for which we accept and return cookies |
---|
| 596 | |
---|
| 597 | For other arguments, see CookiePolicy.__doc__ and |
---|
| 598 | DefaultCookiePolicy.__doc__.. |
---|
| 599 | |
---|
| 600 | """ |
---|
| 601 | self.netscape = netscape |
---|
| 602 | self.rfc2965 = rfc2965 |
---|
| 603 | self.rfc2109_as_netscape = rfc2109_as_netscape |
---|
| 604 | self.hide_cookie2 = hide_cookie2 |
---|
| 605 | self.strict_domain = strict_domain |
---|
| 606 | self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable |
---|
| 607 | self.strict_ns_unverifiable = strict_ns_unverifiable |
---|
| 608 | self.strict_ns_domain = strict_ns_domain |
---|
| 609 | self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar |
---|
| 610 | self.strict_ns_set_path = strict_ns_set_path |
---|
| 611 | |
---|
| 612 | if blocked_domains is not None: |
---|
| 613 | self._blocked_domains = tuple(blocked_domains) |
---|
| 614 | else: |
---|
| 615 | self._blocked_domains = () |
---|
| 616 | |
---|
| 617 | if allowed_domains is not None: |
---|
| 618 | allowed_domains = tuple(allowed_domains) |
---|
| 619 | self._allowed_domains = allowed_domains |
---|
| 620 | |
---|
| 621 | def blocked_domains(self): |
---|
| 622 | """Return the sequence of blocked domains (as a tuple).""" |
---|
| 623 | return self._blocked_domains |
---|
| 624 | def set_blocked_domains(self, blocked_domains): |
---|
| 625 | """Set the sequence of blocked domains.""" |
---|
| 626 | self._blocked_domains = tuple(blocked_domains) |
---|
| 627 | |
---|
| 628 | def is_blocked(self, domain): |
---|
| 629 | for blocked_domain in self._blocked_domains: |
---|
| 630 | if user_domain_match(domain, blocked_domain): |
---|
| 631 | return True |
---|
| 632 | return False |
---|
| 633 | |
---|
| 634 | def allowed_domains(self): |
---|
| 635 | """Return None, or the sequence of allowed domains (as a tuple).""" |
---|
| 636 | return self._allowed_domains |
---|
| 637 | def set_allowed_domains(self, allowed_domains): |
---|
| 638 | """Set the sequence of allowed domains, or None.""" |
---|
| 639 | if allowed_domains is not None: |
---|
| 640 | allowed_domains = tuple(allowed_domains) |
---|
| 641 | self._allowed_domains = allowed_domains |
---|
| 642 | |
---|
| 643 | def is_not_allowed(self, domain): |
---|
| 644 | if self._allowed_domains is None: |
---|
| 645 | return False |
---|
| 646 | for allowed_domain in self._allowed_domains: |
---|
| 647 | if user_domain_match(domain, allowed_domain): |
---|
| 648 | return False |
---|
| 649 | return True |
---|
| 650 | |
---|
| 651 | def set_ok(self, cookie, request): |
---|
| 652 | """ |
---|
| 653 | If you override set_ok, be sure to call this method. If it returns |
---|
| 654 | false, so should your subclass (assuming your subclass wants to be more |
---|
| 655 | strict about which cookies to accept). |
---|
| 656 | |
---|
| 657 | """ |
---|
| 658 | debug(" - checking cookie %s", cookie) |
---|
| 659 | |
---|
| 660 | assert cookie.name is not None |
---|
| 661 | |
---|
| 662 | for n in "version", "verifiability", "name", "path", "domain", "port": |
---|
| 663 | fn_name = "set_ok_"+n |
---|
| 664 | fn = getattr(self, fn_name) |
---|
| 665 | if not fn(cookie, request): |
---|
| 666 | return False |
---|
| 667 | |
---|
| 668 | return True |
---|
| 669 | |
---|
| 670 | def set_ok_version(self, cookie, request): |
---|
| 671 | if cookie.version is None: |
---|
| 672 | # Version is always set to 0 by parse_ns_headers if it's a Netscape |
---|
| 673 | # cookie, so this must be an invalid RFC 2965 cookie. |
---|
| 674 | debug(" Set-Cookie2 without version attribute (%s)", cookie) |
---|
| 675 | return False |
---|
| 676 | if cookie.version > 0 and not self.rfc2965: |
---|
| 677 | debug(" RFC 2965 cookies are switched off") |
---|
| 678 | return False |
---|
| 679 | elif cookie.version == 0 and not self.netscape: |
---|
| 680 | debug(" Netscape cookies are switched off") |
---|
| 681 | return False |
---|
| 682 | return True |
---|
| 683 | |
---|
| 684 | def set_ok_verifiability(self, cookie, request): |
---|
| 685 | if request.unverifiable and is_third_party(request): |
---|
| 686 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
---|
| 687 | debug(" third-party RFC 2965 cookie during " |
---|
| 688 | "unverifiable transaction") |
---|
| 689 | return False |
---|
| 690 | elif cookie.version == 0 and self.strict_ns_unverifiable: |
---|
| 691 | debug(" third-party Netscape cookie during " |
---|
| 692 | "unverifiable transaction") |
---|
| 693 | return False |
---|
| 694 | return True |
---|
| 695 | |
---|
| 696 | def set_ok_name(self, cookie, request): |
---|
| 697 | # Try and stop servers setting V0 cookies designed to hack other |
---|
| 698 | # servers that know both V0 and V1 protocols. |
---|
| 699 | if (cookie.version == 0 and self.strict_ns_set_initial_dollar and |
---|
| 700 | cookie.name.startswith("$")): |
---|
| 701 | debug(" illegal name (starts with '$'): '%s'", cookie.name) |
---|
| 702 | return False |
---|
| 703 | return True |
---|
| 704 | |
---|
| 705 | def set_ok_path(self, cookie, request): |
---|
| 706 | if cookie.path_specified: |
---|
| 707 | req_path = request_path(request) |
---|
| 708 | if ((cookie.version > 0 or |
---|
| 709 | (cookie.version == 0 and self.strict_ns_set_path)) and |
---|
| 710 | not req_path.startswith(cookie.path)): |
---|
| 711 | debug(" path attribute %s is not a prefix of request " |
---|
| 712 | "path %s", cookie.path, req_path) |
---|
| 713 | return False |
---|
| 714 | return True |
---|
| 715 | |
---|
| 716 | def set_ok_countrycode_domain(self, cookie, request): |
---|
| 717 | """Return False if explicit cookie domain is not acceptable. |
---|
| 718 | |
---|
| 719 | Called by set_ok_domain, for convenience of overriding by |
---|
| 720 | subclasses. |
---|
| 721 | |
---|
| 722 | """ |
---|
| 723 | if cookie.domain_specified and self.strict_domain: |
---|
| 724 | domain = cookie.domain |
---|
| 725 | # since domain was specified, we know that: |
---|
| 726 | assert domain.startswith(".") |
---|
| 727 | if domain.count(".") == 2: |
---|
| 728 | # domain like .foo.bar |
---|
| 729 | i = domain.rfind(".") |
---|
| 730 | tld = domain[i+1:] |
---|
| 731 | sld = domain[1:i] |
---|
| 732 | if (sld.lower() in [ |
---|
| 733 | "co", "ac", |
---|
| 734 | "com", "edu", "org", "net", "gov", "mil", "int", |
---|
| 735 | "aero", "biz", "cat", "coop", "info", "jobs", "mobi", |
---|
| 736 | "museum", "name", "pro", "travel", |
---|
| 737 | ] and |
---|
| 738 | len(tld) == 2): |
---|
| 739 | # domain like .co.uk |
---|
| 740 | return False |
---|
| 741 | return True |
---|
| 742 | |
---|
| 743 | def set_ok_domain(self, cookie, request): |
---|
| 744 | if self.is_blocked(cookie.domain): |
---|
| 745 | debug(" domain %s is in user block-list", cookie.domain) |
---|
| 746 | return False |
---|
| 747 | if self.is_not_allowed(cookie.domain): |
---|
| 748 | debug(" domain %s is not in user allow-list", cookie.domain) |
---|
| 749 | return False |
---|
| 750 | if not self.set_ok_countrycode_domain(cookie, request): |
---|
| 751 | debug(" country-code second level domain %s", cookie.domain) |
---|
| 752 | return False |
---|
| 753 | if cookie.domain_specified: |
---|
| 754 | req_host, erhn = eff_request_host(request) |
---|
| 755 | domain = cookie.domain |
---|
| 756 | if domain.startswith("."): |
---|
| 757 | undotted_domain = domain[1:] |
---|
| 758 | else: |
---|
| 759 | undotted_domain = domain |
---|
| 760 | embedded_dots = (undotted_domain.find(".") >= 0) |
---|
| 761 | if not embedded_dots and domain != ".local": |
---|
| 762 | debug(" non-local domain %s contains no embedded dot", |
---|
| 763 | domain) |
---|
| 764 | return False |
---|
| 765 | if cookie.version == 0: |
---|
| 766 | if (not erhn.endswith(domain) and |
---|
| 767 | (not erhn.startswith(".") and |
---|
| 768 | not ("."+erhn).endswith(domain))): |
---|
| 769 | debug(" effective request-host %s (even with added " |
---|
| 770 | "initial dot) does not end end with %s", |
---|
| 771 | erhn, domain) |
---|
| 772 | return False |
---|
| 773 | if (cookie.version > 0 or |
---|
| 774 | (self.strict_ns_domain & self.DomainRFC2965Match)): |
---|
| 775 | if not domain_match(erhn, domain): |
---|
| 776 | debug(" effective request-host %s does not domain-match " |
---|
| 777 | "%s", erhn, domain) |
---|
| 778 | return False |
---|
| 779 | if (cookie.version > 0 or |
---|
| 780 | (self.strict_ns_domain & self.DomainStrictNoDots)): |
---|
| 781 | host_prefix = req_host[:-len(domain)] |
---|
| 782 | if (host_prefix.find(".") >= 0 and |
---|
| 783 | not IPV4_RE.search(req_host)): |
---|
| 784 | debug(" host prefix %s for domain %s contains a dot", |
---|
| 785 | host_prefix, domain) |
---|
| 786 | return False |
---|
| 787 | return True |
---|
| 788 | |
---|
| 789 | def set_ok_port(self, cookie, request): |
---|
| 790 | if cookie.port_specified: |
---|
| 791 | req_port = request_port(request) |
---|
| 792 | if req_port is None: |
---|
| 793 | req_port = "80" |
---|
| 794 | else: |
---|
| 795 | req_port = str(req_port) |
---|
| 796 | for p in cookie.port.split(","): |
---|
| 797 | try: |
---|
| 798 | int(p) |
---|
| 799 | except ValueError: |
---|
| 800 | debug(" bad port %s (not numeric)", p) |
---|
| 801 | return False |
---|
| 802 | if p == req_port: |
---|
| 803 | break |
---|
| 804 | else: |
---|
| 805 | debug(" request port (%s) not found in %s", |
---|
| 806 | req_port, cookie.port) |
---|
| 807 | return False |
---|
| 808 | return True |
---|
| 809 | |
---|
| 810 | def return_ok(self, cookie, request): |
---|
| 811 | """ |
---|
| 812 | If you override return_ok, be sure to call this method. If it returns |
---|
| 813 | false, so should your subclass (assuming your subclass wants to be more |
---|
| 814 | strict about which cookies to return). |
---|
| 815 | |
---|
| 816 | """ |
---|
| 817 | # Path has already been checked by path_return_ok, and domain blocking |
---|
| 818 | # done by domain_return_ok. |
---|
| 819 | debug(" - checking cookie %s", cookie) |
---|
| 820 | |
---|
| 821 | for n in "version", "verifiability", "secure", "expires", "port", "domain": |
---|
| 822 | fn_name = "return_ok_"+n |
---|
| 823 | fn = getattr(self, fn_name) |
---|
| 824 | if not fn(cookie, request): |
---|
| 825 | return False |
---|
| 826 | return True |
---|
| 827 | |
---|
| 828 | def return_ok_version(self, cookie, request): |
---|
| 829 | if cookie.version > 0 and not self.rfc2965: |
---|
| 830 | debug(" RFC 2965 cookies are switched off") |
---|
| 831 | return False |
---|
| 832 | elif cookie.version == 0 and not self.netscape: |
---|
| 833 | debug(" Netscape cookies are switched off") |
---|
| 834 | return False |
---|
| 835 | return True |
---|
| 836 | |
---|
| 837 | def return_ok_verifiability(self, cookie, request): |
---|
| 838 | if request.unverifiable and is_third_party(request): |
---|
| 839 | if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
---|
| 840 | debug(" third-party RFC 2965 cookie during unverifiable " |
---|
| 841 | "transaction") |
---|
| 842 | return False |
---|
| 843 | elif cookie.version == 0 and self.strict_ns_unverifiable: |
---|
| 844 | debug(" third-party Netscape cookie during unverifiable " |
---|
| 845 | "transaction") |
---|
| 846 | return False |
---|
| 847 | return True |
---|
| 848 | |
---|
| 849 | def return_ok_secure(self, cookie, request): |
---|
| 850 | if cookie.secure and request.get_type() != "https": |
---|
| 851 | debug(" secure cookie with non-secure request") |
---|
| 852 | return False |
---|
| 853 | return True |
---|
| 854 | |
---|
| 855 | def return_ok_expires(self, cookie, request): |
---|
| 856 | if cookie.is_expired(self._now): |
---|
| 857 | debug(" cookie expired") |
---|
| 858 | return False |
---|
| 859 | return True |
---|
| 860 | |
---|
| 861 | def return_ok_port(self, cookie, request): |
---|
| 862 | if cookie.port: |
---|
| 863 | req_port = request_port(request) |
---|
| 864 | if req_port is None: |
---|
| 865 | req_port = "80" |
---|
| 866 | for p in cookie.port.split(","): |
---|
| 867 | if p == req_port: |
---|
| 868 | break |
---|
| 869 | else: |
---|
| 870 | debug(" request port %s does not match cookie port %s", |
---|
| 871 | req_port, cookie.port) |
---|
| 872 | return False |
---|
| 873 | return True |
---|
| 874 | |
---|
| 875 | def return_ok_domain(self, cookie, request): |
---|
| 876 | req_host, erhn = eff_request_host(request) |
---|
| 877 | domain = cookie.domain |
---|
| 878 | |
---|
| 879 | # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't |
---|
| 880 | if (cookie.version == 0 and |
---|
| 881 | (self.strict_ns_domain & self.DomainStrictNonDomain) and |
---|
| 882 | not cookie.domain_specified and domain != erhn): |
---|
| 883 | debug(" cookie with unspecified domain does not string-compare " |
---|
| 884 | "equal to request domain") |
---|
| 885 | return False |
---|
| 886 | |
---|
| 887 | if cookie.version > 0 and not domain_match(erhn, domain): |
---|
| 888 | debug(" effective request-host name %s does not domain-match " |
---|
| 889 | "RFC 2965 cookie domain %s", erhn, domain) |
---|
| 890 | return False |
---|
| 891 | if cookie.version == 0 and not ("."+erhn).endswith(domain): |
---|
| 892 | debug(" request-host %s does not match Netscape cookie domain " |
---|
| 893 | "%s", req_host, domain) |
---|
| 894 | return False |
---|
| 895 | return True |
---|
| 896 | |
---|
| 897 | def domain_return_ok(self, domain, request): |
---|
| 898 | # Liberal check of domain. This is here as an optimization to avoid |
---|
| 899 | # having to load lots of MSIE cookie files unless necessary. |
---|
| 900 | |
---|
| 901 | # Munge req_host and erhn to always start with a dot, so as to err on |
---|
| 902 | # the side of letting cookies through. |
---|
| 903 | dotted_req_host, dotted_erhn = eff_request_host(request) |
---|
| 904 | if not dotted_req_host.startswith("."): |
---|
| 905 | dotted_req_host = "."+dotted_req_host |
---|
| 906 | if not dotted_erhn.startswith("."): |
---|
| 907 | dotted_erhn = "."+dotted_erhn |
---|
| 908 | if not (dotted_req_host.endswith(domain) or |
---|
| 909 | dotted_erhn.endswith(domain)): |
---|
| 910 | #debug(" request domain %s does not match cookie domain %s", |
---|
| 911 | # req_host, domain) |
---|
| 912 | return False |
---|
| 913 | |
---|
| 914 | if self.is_blocked(domain): |
---|
| 915 | debug(" domain %s is in user block-list", domain) |
---|
| 916 | return False |
---|
| 917 | if self.is_not_allowed(domain): |
---|
| 918 | debug(" domain %s is not in user allow-list", domain) |
---|
| 919 | return False |
---|
| 920 | |
---|
| 921 | return True |
---|
| 922 | |
---|
| 923 | def path_return_ok(self, path, request): |
---|
| 924 | debug("- checking cookie path=%s", path) |
---|
| 925 | req_path = request_path(request) |
---|
| 926 | if not req_path.startswith(path): |
---|
| 927 | debug(" %s does not path-match %s", req_path, path) |
---|
| 928 | return False |
---|
| 929 | return True |
---|
| 930 | |
---|
| 931 | |
---|
| 932 | def vals_sorted_by_key(adict): |
---|
| 933 | keys = adict.keys() |
---|
| 934 | keys.sort() |
---|
| 935 | return map(adict.get, keys) |
---|
| 936 | |
---|
| 937 | class MappingIterator: |
---|
| 938 | """Iterates over nested mapping, depth-first, in sorted order by key.""" |
---|
| 939 | def __init__(self, mapping): |
---|
| 940 | self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack |
---|
| 941 | |
---|
| 942 | def __iter__(self): return self |
---|
| 943 | |
---|
| 944 | def next(self): |
---|
| 945 | # this is hairy because of lack of generators |
---|
| 946 | while 1: |
---|
| 947 | try: |
---|
| 948 | vals, i, prev_item = self._s.pop() |
---|
| 949 | except IndexError: |
---|
| 950 | raise StopIteration() |
---|
| 951 | if i < len(vals): |
---|
| 952 | item = vals[i] |
---|
| 953 | i = i + 1 |
---|
| 954 | self._s.append((vals, i, prev_item)) |
---|
| 955 | try: |
---|
| 956 | item.items |
---|
| 957 | except AttributeError: |
---|
| 958 | # non-mapping |
---|
| 959 | break |
---|
| 960 | else: |
---|
| 961 | # mapping |
---|
| 962 | self._s.append((vals_sorted_by_key(item), 0, item)) |
---|
| 963 | continue |
---|
| 964 | return item |
---|
| 965 | |
---|
| 966 | |
---|
| 967 | # Used as second parameter to dict.get method, to distinguish absent |
---|
| 968 | # dict key from one with a None value. |
---|
| 969 | class Absent: pass |
---|
| 970 | |
---|
| 971 | class CookieJar: |
---|
| 972 | """Collection of HTTP cookies. |
---|
| 973 | |
---|
| 974 | You may not need to know about this class: try mechanize.urlopen(). |
---|
| 975 | |
---|
| 976 | The major methods are extract_cookies and add_cookie_header; these are all |
---|
| 977 | you are likely to need. |
---|
| 978 | |
---|
| 979 | CookieJar supports the iterator protocol: |
---|
| 980 | |
---|
| 981 | for cookie in cookiejar: |
---|
| 982 | # do something with cookie |
---|
| 983 | |
---|
| 984 | Methods: |
---|
| 985 | |
---|
| 986 | add_cookie_header(request) |
---|
| 987 | extract_cookies(response, request) |
---|
| 988 | make_cookies(response, request) |
---|
| 989 | set_cookie_if_ok(cookie, request) |
---|
| 990 | set_cookie(cookie) |
---|
| 991 | clear_session_cookies() |
---|
| 992 | clear_expired_cookies() |
---|
| 993 | clear(domain=None, path=None, name=None) |
---|
| 994 | |
---|
| 995 | Public attributes |
---|
| 996 | |
---|
| 997 | policy: CookiePolicy object |
---|
| 998 | |
---|
| 999 | """ |
---|
| 1000 | |
---|
| 1001 | non_word_re = re.compile(r"\W") |
---|
| 1002 | quote_re = re.compile(r"([\"\\])") |
---|
| 1003 | strict_domain_re = re.compile(r"\.?[^.]*") |
---|
| 1004 | domain_re = re.compile(r"[^.]*") |
---|
| 1005 | dots_re = re.compile(r"^\.+") |
---|
| 1006 | |
---|
| 1007 | def __init__(self, policy=None): |
---|
| 1008 | """ |
---|
| 1009 | See CookieJar.__doc__ for argument documentation. |
---|
| 1010 | |
---|
| 1011 | """ |
---|
| 1012 | if policy is None: |
---|
| 1013 | policy = DefaultCookiePolicy() |
---|
| 1014 | self._policy = policy |
---|
| 1015 | |
---|
| 1016 | self._cookies = {} |
---|
| 1017 | |
---|
| 1018 | # for __getitem__ iteration in pre-2.2 Pythons |
---|
| 1019 | self._prev_getitem_index = 0 |
---|
| 1020 | |
---|
| 1021 | def set_policy(self, policy): |
---|
| 1022 | self._policy = policy |
---|
| 1023 | |
---|
| 1024 | def _cookies_for_domain(self, domain, request): |
---|
| 1025 | cookies = [] |
---|
| 1026 | if not self._policy.domain_return_ok(domain, request): |
---|
| 1027 | return [] |
---|
| 1028 | debug("Checking %s for cookies to return", domain) |
---|
| 1029 | cookies_by_path = self._cookies[domain] |
---|
| 1030 | for path in cookies_by_path.keys(): |
---|
| 1031 | if not self._policy.path_return_ok(path, request): |
---|
| 1032 | continue |
---|
| 1033 | cookies_by_name = cookies_by_path[path] |
---|
| 1034 | for cookie in cookies_by_name.values(): |
---|
| 1035 | if not self._policy.return_ok(cookie, request): |
---|
| 1036 | debug(" not returning cookie") |
---|
| 1037 | continue |
---|
| 1038 | debug(" it's a match") |
---|
| 1039 | cookies.append(cookie) |
---|
| 1040 | return cookies |
---|
| 1041 | |
---|
| 1042 | def _cookies_for_request(self, request): |
---|
| 1043 | """Return a list of cookies to be returned to server.""" |
---|
| 1044 | cookies = [] |
---|
| 1045 | for domain in self._cookies.keys(): |
---|
| 1046 | cookies.extend(self._cookies_for_domain(domain, request)) |
---|
| 1047 | return cookies |
---|
| 1048 | |
---|
| 1049 | def _cookie_attrs(self, cookies): |
---|
| 1050 | """Return a list of cookie-attributes to be returned to server. |
---|
| 1051 | |
---|
| 1052 | like ['foo="bar"; $Path="/"', ...] |
---|
| 1053 | |
---|
| 1054 | The $Version attribute is also added when appropriate (currently only |
---|
| 1055 | once per request). |
---|
| 1056 | |
---|
| 1057 | """ |
---|
| 1058 | # add cookies in order of most specific (ie. longest) path first |
---|
| 1059 | def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) |
---|
| 1060 | cookies.sort(decreasing_size) |
---|
| 1061 | |
---|
| 1062 | version_set = False |
---|
| 1063 | |
---|
| 1064 | attrs = [] |
---|
| 1065 | for cookie in cookies: |
---|
| 1066 | # set version of Cookie header |
---|
| 1067 | # XXX |
---|
| 1068 | # What should it be if multiple matching Set-Cookie headers have |
---|
| 1069 | # different versions themselves? |
---|
| 1070 | # Answer: there is no answer; was supposed to be settled by |
---|
| 1071 | # RFC 2965 errata, but that may never appear... |
---|
| 1072 | version = cookie.version |
---|
| 1073 | if not version_set: |
---|
| 1074 | version_set = True |
---|
| 1075 | if version > 0: |
---|
| 1076 | attrs.append("$Version=%s" % version) |
---|
| 1077 | |
---|
| 1078 | # quote cookie value if necessary |
---|
| 1079 | # (not for Netscape protocol, which already has any quotes |
---|
| 1080 | # intact, due to the poorly-specified Netscape Cookie: syntax) |
---|
| 1081 | if ((cookie.value is not None) and |
---|
| 1082 | self.non_word_re.search(cookie.value) and version > 0): |
---|
| 1083 | value = self.quote_re.sub(r"\\\1", cookie.value) |
---|
| 1084 | else: |
---|
| 1085 | value = cookie.value |
---|
| 1086 | |
---|
| 1087 | # add cookie-attributes to be returned in Cookie header |
---|
| 1088 | if cookie.value is None: |
---|
| 1089 | attrs.append(cookie.name) |
---|
| 1090 | else: |
---|
| 1091 | attrs.append("%s=%s" % (cookie.name, value)) |
---|
| 1092 | if version > 0: |
---|
| 1093 | if cookie.path_specified: |
---|
| 1094 | attrs.append('$Path="%s"' % cookie.path) |
---|
| 1095 | if cookie.domain.startswith("."): |
---|
| 1096 | domain = cookie.domain |
---|
| 1097 | if (not cookie.domain_initial_dot and |
---|
| 1098 | domain.startswith(".")): |
---|
| 1099 | domain = domain[1:] |
---|
| 1100 | attrs.append('$Domain="%s"' % domain) |
---|
| 1101 | if cookie.port is not None: |
---|
| 1102 | p = "$Port" |
---|
| 1103 | if cookie.port_specified: |
---|
| 1104 | p = p + ('="%s"' % cookie.port) |
---|
| 1105 | attrs.append(p) |
---|
| 1106 | |
---|
| 1107 | return attrs |
---|
| 1108 | |
---|
| 1109 | def add_cookie_header(self, request): |
---|
| 1110 | """Add correct Cookie: header to request (urllib2.Request object). |
---|
| 1111 | |
---|
| 1112 | The Cookie2 header is also added unless policy.hide_cookie2 is true. |
---|
| 1113 | |
---|
| 1114 | The request object (usually a urllib2.Request instance) must support |
---|
| 1115 | the methods get_full_url, get_host, get_type, has_header, get_header, |
---|
| 1116 | header_items and add_unredirected_header, as documented by urllib2, and |
---|
| 1117 | the port attribute (the port number). Actually, |
---|
| 1118 | RequestUpgradeProcessor will automatically upgrade your Request object |
---|
| 1119 | to one with has_header, get_header, header_items and |
---|
| 1120 | add_unredirected_header, if it lacks those methods, for compatibility |
---|
| 1121 | with pre-2.4 versions of urllib2. |
---|
| 1122 | |
---|
| 1123 | """ |
---|
| 1124 | debug("add_cookie_header") |
---|
| 1125 | self._policy._now = self._now = int(time.time()) |
---|
| 1126 | |
---|
| 1127 | req_host, erhn = eff_request_host(request) |
---|
| 1128 | strict_non_domain = ( |
---|
| 1129 | self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain) |
---|
| 1130 | |
---|
| 1131 | cookies = self._cookies_for_request(request) |
---|
| 1132 | |
---|
| 1133 | attrs = self._cookie_attrs(cookies) |
---|
| 1134 | if attrs: |
---|
| 1135 | if not request.has_header("Cookie"): |
---|
| 1136 | request.add_unredirected_header("Cookie", "; ".join(attrs)) |
---|
| 1137 | |
---|
| 1138 | # if necessary, advertise that we know RFC 2965 |
---|
| 1139 | if self._policy.rfc2965 and not self._policy.hide_cookie2: |
---|
| 1140 | for cookie in cookies: |
---|
| 1141 | if cookie.version != 1 and not request.has_header("Cookie2"): |
---|
| 1142 | request.add_unredirected_header("Cookie2", '$Version="1"') |
---|
| 1143 | break |
---|
| 1144 | |
---|
| 1145 | self.clear_expired_cookies() |
---|
| 1146 | |
---|
| 1147 | def _normalized_cookie_tuples(self, attrs_set): |
---|
| 1148 | """Return list of tuples containing normalised cookie information. |
---|
| 1149 | |
---|
| 1150 | attrs_set is the list of lists of key,value pairs extracted from |
---|
| 1151 | the Set-Cookie or Set-Cookie2 headers. |
---|
| 1152 | |
---|
| 1153 | Tuples are name, value, standard, rest, where name and value are the |
---|
| 1154 | cookie name and value, standard is a dictionary containing the standard |
---|
| 1155 | cookie-attributes (discard, secure, version, expires or max-age, |
---|
| 1156 | domain, path and port) and rest is a dictionary containing the rest of |
---|
| 1157 | the cookie-attributes. |
---|
| 1158 | |
---|
| 1159 | """ |
---|
| 1160 | cookie_tuples = [] |
---|
| 1161 | |
---|
| 1162 | boolean_attrs = "discard", "secure" |
---|
| 1163 | value_attrs = ("version", |
---|
| 1164 | "expires", "max-age", |
---|
| 1165 | "domain", "path", "port", |
---|
| 1166 | "comment", "commenturl") |
---|
| 1167 | |
---|
| 1168 | for cookie_attrs in attrs_set: |
---|
| 1169 | name, value = cookie_attrs[0] |
---|
| 1170 | |
---|
| 1171 | # Build dictionary of standard cookie-attributes (standard) and |
---|
| 1172 | # dictionary of other cookie-attributes (rest). |
---|
| 1173 | |
---|
| 1174 | # Note: expiry time is normalised to seconds since epoch. V0 |
---|
| 1175 | # cookies should have the Expires cookie-attribute, and V1 cookies |
---|
| 1176 | # should have Max-Age, but since V1 includes RFC 2109 cookies (and |
---|
| 1177 | # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we |
---|
| 1178 | # accept either (but prefer Max-Age). |
---|
| 1179 | max_age_set = False |
---|
| 1180 | |
---|
| 1181 | bad_cookie = False |
---|
| 1182 | |
---|
| 1183 | standard = {} |
---|
| 1184 | rest = {} |
---|
| 1185 | for k, v in cookie_attrs[1:]: |
---|
| 1186 | lc = k.lower() |
---|
| 1187 | # don't lose case distinction for unknown fields |
---|
| 1188 | if lc in value_attrs or lc in boolean_attrs: |
---|
| 1189 | k = lc |
---|
| 1190 | if k in boolean_attrs and v is None: |
---|
| 1191 | # boolean cookie-attribute is present, but has no value |
---|
| 1192 | # (like "discard", rather than "port=80") |
---|
| 1193 | v = True |
---|
| 1194 | if standard.has_key(k): |
---|
| 1195 | # only first value is significant |
---|
| 1196 | continue |
---|
| 1197 | if k == "domain": |
---|
| 1198 | if v is None: |
---|
| 1199 | debug(" missing value for domain attribute") |
---|
| 1200 | bad_cookie = True |
---|
| 1201 | break |
---|
| 1202 | # RFC 2965 section 3.3.3 |
---|
| 1203 | v = v.lower() |
---|
| 1204 | if k == "expires": |
---|
| 1205 | if max_age_set: |
---|
| 1206 | # Prefer max-age to expires (like Mozilla) |
---|
| 1207 | continue |
---|
| 1208 | if v is None: |
---|
| 1209 | debug(" missing or invalid value for expires " |
---|
| 1210 | "attribute: treating as session cookie") |
---|
| 1211 | continue |
---|
| 1212 | if k == "max-age": |
---|
| 1213 | max_age_set = True |
---|
| 1214 | try: |
---|
| 1215 | v = int(v) |
---|
| 1216 | except ValueError: |
---|
| 1217 | debug(" missing or invalid (non-numeric) value for " |
---|
| 1218 | "max-age attribute") |
---|
| 1219 | bad_cookie = True |
---|
| 1220 | break |
---|
| 1221 | # convert RFC 2965 Max-Age to seconds since epoch |
---|
| 1222 | # XXX Strictly you're supposed to follow RFC 2616 |
---|
| 1223 | # age-calculation rules. Remember that zero Max-Age is a |
---|
| 1224 | # is a request to discard (old and new) cookie, though. |
---|
| 1225 | k = "expires" |
---|
| 1226 | v = self._now + v |
---|
| 1227 | if (k in value_attrs) or (k in boolean_attrs): |
---|
| 1228 | if (v is None and |
---|
| 1229 | k not in ["port", "comment", "commenturl"]): |
---|
| 1230 | debug(" missing value for %s attribute" % k) |
---|
| 1231 | bad_cookie = True |
---|
| 1232 | break |
---|
| 1233 | standard[k] = v |
---|
| 1234 | else: |
---|
| 1235 | rest[k] = v |
---|
| 1236 | |
---|
| 1237 | if bad_cookie: |
---|
| 1238 | continue |
---|
| 1239 | |
---|
| 1240 | cookie_tuples.append((name, value, standard, rest)) |
---|
| 1241 | |
---|
| 1242 | return cookie_tuples |
---|
| 1243 | |
---|
| 1244 | def _cookie_from_cookie_tuple(self, tup, request): |
---|
| 1245 | # standard is dict of standard cookie-attributes, rest is dict of the |
---|
| 1246 | # rest of them |
---|
| 1247 | name, value, standard, rest = tup |
---|
| 1248 | |
---|
| 1249 | domain = standard.get("domain", Absent) |
---|
| 1250 | path = standard.get("path", Absent) |
---|
| 1251 | port = standard.get("port", Absent) |
---|
| 1252 | expires = standard.get("expires", Absent) |
---|
| 1253 | |
---|
| 1254 | # set the easy defaults |
---|
| 1255 | version = standard.get("version", None) |
---|
| 1256 | if version is not None: version = int(version) |
---|
| 1257 | secure = standard.get("secure", False) |
---|
| 1258 | # (discard is also set if expires is Absent) |
---|
| 1259 | discard = standard.get("discard", False) |
---|
| 1260 | comment = standard.get("comment", None) |
---|
| 1261 | comment_url = standard.get("commenturl", None) |
---|
| 1262 | |
---|
| 1263 | # set default path |
---|
| 1264 | if path is not Absent and path != "": |
---|
| 1265 | path_specified = True |
---|
| 1266 | path = escape_path(path) |
---|
| 1267 | else: |
---|
| 1268 | path_specified = False |
---|
| 1269 | path = request_path(request) |
---|
| 1270 | i = path.rfind("/") |
---|
| 1271 | if i != -1: |
---|
| 1272 | if version == 0: |
---|
| 1273 | # Netscape spec parts company from reality here |
---|
| 1274 | path = path[:i] |
---|
| 1275 | else: |
---|
| 1276 | path = path[:i+1] |
---|
| 1277 | if len(path) == 0: path = "/" |
---|
| 1278 | |
---|
| 1279 | # set default domain |
---|
| 1280 | domain_specified = domain is not Absent |
---|
| 1281 | # but first we have to remember whether it starts with a dot |
---|
| 1282 | domain_initial_dot = False |
---|
| 1283 | if domain_specified: |
---|
| 1284 | domain_initial_dot = bool(domain.startswith(".")) |
---|
| 1285 | if domain is Absent: |
---|
| 1286 | req_host, erhn = eff_request_host(request) |
---|
| 1287 | domain = erhn |
---|
| 1288 | elif not domain.startswith("."): |
---|
| 1289 | domain = "."+domain |
---|
| 1290 | |
---|
| 1291 | # set default port |
---|
| 1292 | port_specified = False |
---|
| 1293 | if port is not Absent: |
---|
| 1294 | if port is None: |
---|
| 1295 | # Port attr present, but has no value: default to request port. |
---|
| 1296 | # Cookie should then only be sent back on that port. |
---|
| 1297 | port = request_port(request) |
---|
| 1298 | else: |
---|
| 1299 | port_specified = True |
---|
| 1300 | port = re.sub(r"\s+", "", port) |
---|
| 1301 | else: |
---|
| 1302 | # No port attr present. Cookie can be sent back on any port. |
---|
| 1303 | port = None |
---|
| 1304 | |
---|
| 1305 | # set default expires and discard |
---|
| 1306 | if expires is Absent: |
---|
| 1307 | expires = None |
---|
| 1308 | discard = True |
---|
| 1309 | elif expires <= self._now: |
---|
| 1310 | # Expiry date in past is request to delete cookie. This can't be |
---|
| 1311 | # in DefaultCookiePolicy, because can't delete cookies there. |
---|
| 1312 | try: |
---|
| 1313 | self.clear(domain, path, name) |
---|
| 1314 | except KeyError: |
---|
| 1315 | pass |
---|
| 1316 | debug("Expiring cookie, domain='%s', path='%s', name='%s'", |
---|
| 1317 | domain, path, name) |
---|
| 1318 | return None |
---|
| 1319 | |
---|
| 1320 | return Cookie(version, |
---|
| 1321 | name, value, |
---|
| 1322 | port, port_specified, |
---|
| 1323 | domain, domain_specified, domain_initial_dot, |
---|
| 1324 | path, path_specified, |
---|
| 1325 | secure, |
---|
| 1326 | expires, |
---|
| 1327 | discard, |
---|
| 1328 | comment, |
---|
| 1329 | comment_url, |
---|
| 1330 | rest) |
---|
| 1331 | |
---|
| 1332 | def _cookies_from_attrs_set(self, attrs_set, request): |
---|
| 1333 | cookie_tuples = self._normalized_cookie_tuples(attrs_set) |
---|
| 1334 | |
---|
| 1335 | cookies = [] |
---|
| 1336 | for tup in cookie_tuples: |
---|
| 1337 | cookie = self._cookie_from_cookie_tuple(tup, request) |
---|
| 1338 | if cookie: cookies.append(cookie) |
---|
| 1339 | return cookies |
---|
| 1340 | |
---|
| 1341 | def _process_rfc2109_cookies(self, cookies): |
---|
| 1342 | if self._policy.rfc2109_as_netscape is None: |
---|
| 1343 | rfc2109_as_netscape = not self._policy.rfc2965 |
---|
| 1344 | else: |
---|
| 1345 | rfc2109_as_netscape = self._policy.rfc2109_as_netscape |
---|
| 1346 | for cookie in cookies: |
---|
| 1347 | if cookie.version == 1: |
---|
| 1348 | cookie.rfc2109 = True |
---|
| 1349 | if rfc2109_as_netscape: |
---|
| 1350 | # treat 2109 cookies as Netscape cookies rather than |
---|
| 1351 | # as RFC2965 cookies |
---|
| 1352 | cookie.version = 0 |
---|
| 1353 | |
---|
| 1354 | def make_cookies(self, response, request): |
---|
| 1355 | """Return sequence of Cookie objects extracted from response object. |
---|
| 1356 | |
---|
| 1357 | See extract_cookies.__doc__ for the interfaces required of the |
---|
| 1358 | response and request arguments. |
---|
| 1359 | |
---|
| 1360 | """ |
---|
| 1361 | # get cookie-attributes for RFC 2965 and Netscape protocols |
---|
| 1362 | headers = response.info() |
---|
| 1363 | rfc2965_hdrs = headers.getheaders("Set-Cookie2") |
---|
| 1364 | ns_hdrs = headers.getheaders("Set-Cookie") |
---|
| 1365 | |
---|
| 1366 | rfc2965 = self._policy.rfc2965 |
---|
| 1367 | netscape = self._policy.netscape |
---|
| 1368 | |
---|
| 1369 | if ((not rfc2965_hdrs and not ns_hdrs) or |
---|
| 1370 | (not ns_hdrs and not rfc2965) or |
---|
| 1371 | (not rfc2965_hdrs and not netscape) or |
---|
| 1372 | (not netscape and not rfc2965)): |
---|
| 1373 | return [] # no relevant cookie headers: quick exit |
---|
| 1374 | |
---|
| 1375 | try: |
---|
| 1376 | cookies = self._cookies_from_attrs_set( |
---|
| 1377 | split_header_words(rfc2965_hdrs), request) |
---|
| 1378 | except: |
---|
| 1379 | reraise_unmasked_exceptions() |
---|
| 1380 | cookies = [] |
---|
| 1381 | |
---|
| 1382 | if ns_hdrs and netscape: |
---|
| 1383 | try: |
---|
| 1384 | # RFC 2109 and Netscape cookies |
---|
| 1385 | ns_cookies = self._cookies_from_attrs_set( |
---|
| 1386 | parse_ns_headers(ns_hdrs), request) |
---|
| 1387 | except: |
---|
| 1388 | reraise_unmasked_exceptions() |
---|
| 1389 | ns_cookies = [] |
---|
| 1390 | self._process_rfc2109_cookies(ns_cookies) |
---|
| 1391 | |
---|
| 1392 | # Look for Netscape cookies (from Set-Cookie headers) that match |
---|
| 1393 | # corresponding RFC 2965 cookies (from Set-Cookie2 headers). |
---|
| 1394 | # For each match, keep the RFC 2965 cookie and ignore the Netscape |
---|
| 1395 | # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are |
---|
| 1396 | # bundled in with the Netscape cookies for this purpose, which is |
---|
| 1397 | # reasonable behaviour. |
---|
| 1398 | if rfc2965: |
---|
| 1399 | lookup = {} |
---|
| 1400 | for cookie in cookies: |
---|
| 1401 | lookup[(cookie.domain, cookie.path, cookie.name)] = None |
---|
| 1402 | |
---|
| 1403 | def no_matching_rfc2965(ns_cookie, lookup=lookup): |
---|
| 1404 | key = ns_cookie.domain, ns_cookie.path, ns_cookie.name |
---|
| 1405 | return not lookup.has_key(key) |
---|
| 1406 | ns_cookies = filter(no_matching_rfc2965, ns_cookies) |
---|
| 1407 | |
---|
| 1408 | if ns_cookies: |
---|
| 1409 | cookies.extend(ns_cookies) |
---|
| 1410 | |
---|
| 1411 | return cookies |
---|
| 1412 | |
---|
| 1413 | def set_cookie_if_ok(self, cookie, request): |
---|
| 1414 | """Set a cookie if policy says it's OK to do so. |
---|
| 1415 | |
---|
| 1416 | cookie: mechanize.Cookie instance |
---|
| 1417 | request: see extract_cookies.__doc__ for the required interface |
---|
| 1418 | |
---|
| 1419 | """ |
---|
| 1420 | self._policy._now = self._now = int(time.time()) |
---|
| 1421 | |
---|
| 1422 | if self._policy.set_ok(cookie, request): |
---|
| 1423 | self.set_cookie(cookie) |
---|
| 1424 | |
---|
| 1425 | def set_cookie(self, cookie): |
---|
| 1426 | """Set a cookie, without checking whether or not it should be set. |
---|
| 1427 | |
---|
| 1428 | cookie: mechanize.Cookie instance |
---|
| 1429 | """ |
---|
| 1430 | c = self._cookies |
---|
| 1431 | if not c.has_key(cookie.domain): c[cookie.domain] = {} |
---|
| 1432 | c2 = c[cookie.domain] |
---|
| 1433 | if not c2.has_key(cookie.path): c2[cookie.path] = {} |
---|
| 1434 | c3 = c2[cookie.path] |
---|
| 1435 | c3[cookie.name] = cookie |
---|
| 1436 | |
---|
| 1437 | def extract_cookies(self, response, request): |
---|
| 1438 | """Extract cookies from response, where allowable given the request. |
---|
| 1439 | |
---|
| 1440 | Look for allowable Set-Cookie: and Set-Cookie2: headers in the response |
---|
| 1441 | object passed as argument. Any of these headers that are found are |
---|
| 1442 | used to update the state of the object (subject to the policy.set_ok |
---|
| 1443 | method's approval). |
---|
| 1444 | |
---|
| 1445 | The response object (usually be the result of a call to |
---|
| 1446 | mechanize.urlopen, or similar) should support an info method, which |
---|
| 1447 | returns a mimetools.Message object (in fact, the 'mimetools.Message |
---|
| 1448 | object' may be any object that provides a getallmatchingheaders |
---|
| 1449 | method). |
---|
| 1450 | |
---|
| 1451 | The request object (usually a urllib2.Request instance) must support |
---|
| 1452 | the methods get_full_url and get_host, as documented by urllib2, and |
---|
| 1453 | the port attribute (the port number). The request is used to set |
---|
| 1454 | default values for cookie-attributes as well as for checking that the |
---|
| 1455 | cookie is OK to be set. |
---|
| 1456 | |
---|
| 1457 | """ |
---|
| 1458 | debug("extract_cookies: %s", response.info()) |
---|
| 1459 | self._policy._now = self._now = int(time.time()) |
---|
| 1460 | |
---|
| 1461 | for cookie in self.make_cookies(response, request): |
---|
| 1462 | if self._policy.set_ok(cookie, request): |
---|
| 1463 | debug(" setting cookie: %s", cookie) |
---|
| 1464 | self.set_cookie(cookie) |
---|
| 1465 | |
---|
| 1466 | def clear(self, domain=None, path=None, name=None): |
---|
| 1467 | """Clear some cookies. |
---|
| 1468 | |
---|
| 1469 | Invoking this method without arguments will clear all cookies. If |
---|
| 1470 | given a single argument, only cookies belonging to that domain will be |
---|
| 1471 | removed. If given two arguments, cookies belonging to the specified |
---|
| 1472 | path within that domain are removed. If given three arguments, then |
---|
| 1473 | the cookie with the specified name, path and domain is removed. |
---|
| 1474 | |
---|
| 1475 | Raises KeyError if no matching cookie exists. |
---|
| 1476 | |
---|
| 1477 | """ |
---|
| 1478 | if name is not None: |
---|
| 1479 | if (domain is None) or (path is None): |
---|
| 1480 | raise ValueError( |
---|
| 1481 | "domain and path must be given to remove a cookie by name") |
---|
| 1482 | del self._cookies[domain][path][name] |
---|
| 1483 | elif path is not None: |
---|
| 1484 | if domain is None: |
---|
| 1485 | raise ValueError( |
---|
| 1486 | "domain must be given to remove cookies by path") |
---|
| 1487 | del self._cookies[domain][path] |
---|
| 1488 | elif domain is not None: |
---|
| 1489 | del self._cookies[domain] |
---|
| 1490 | else: |
---|
| 1491 | self._cookies = {} |
---|
| 1492 | |
---|
| 1493 | def clear_session_cookies(self): |
---|
| 1494 | """Discard all session cookies. |
---|
| 1495 | |
---|
| 1496 | Discards all cookies held by object which had either no Max-Age or |
---|
| 1497 | Expires cookie-attribute or an explicit Discard cookie-attribute, or |
---|
| 1498 | which otherwise have ended up with a true discard attribute. For |
---|
| 1499 | interactive browsers, the end of a session usually corresponds to |
---|
| 1500 | closing the browser window. |
---|
| 1501 | |
---|
| 1502 | Note that the save method won't save session cookies anyway, unless you |
---|
| 1503 | ask otherwise by passing a true ignore_discard argument. |
---|
| 1504 | |
---|
| 1505 | """ |
---|
| 1506 | for cookie in self: |
---|
| 1507 | if cookie.discard: |
---|
| 1508 | self.clear(cookie.domain, cookie.path, cookie.name) |
---|
| 1509 | |
---|
| 1510 | def clear_expired_cookies(self): |
---|
| 1511 | """Discard all expired cookies. |
---|
| 1512 | |
---|
| 1513 | You probably don't need to call this method: expired cookies are never |
---|
| 1514 | sent back to the server (provided you're using DefaultCookiePolicy), |
---|
| 1515 | this method is called by CookieJar itself every so often, and the save |
---|
| 1516 | method won't save expired cookies anyway (unless you ask otherwise by |
---|
| 1517 | passing a true ignore_expires argument). |
---|
| 1518 | |
---|
| 1519 | """ |
---|
| 1520 | now = time.time() |
---|
| 1521 | for cookie in self: |
---|
| 1522 | if cookie.is_expired(now): |
---|
| 1523 | self.clear(cookie.domain, cookie.path, cookie.name) |
---|
| 1524 | |
---|
| 1525 | def __getitem__(self, i): |
---|
| 1526 | if i == 0: |
---|
| 1527 | self._getitem_iterator = self.__iter__() |
---|
| 1528 | elif self._prev_getitem_index != i-1: raise IndexError( |
---|
| 1529 | "CookieJar.__getitem__ only supports sequential iteration") |
---|
| 1530 | self._prev_getitem_index = i |
---|
| 1531 | try: |
---|
| 1532 | return self._getitem_iterator.next() |
---|
| 1533 | except StopIteration: |
---|
| 1534 | raise IndexError() |
---|
| 1535 | |
---|
| 1536 | def __iter__(self): |
---|
| 1537 | return MappingIterator(self._cookies) |
---|
| 1538 | |
---|
| 1539 | def __len__(self): |
---|
| 1540 | """Return number of contained cookies.""" |
---|
| 1541 | i = 0 |
---|
| 1542 | for cookie in self: i = i + 1 |
---|
| 1543 | return i |
---|
| 1544 | |
---|
| 1545 | def __repr__(self): |
---|
| 1546 | r = [] |
---|
| 1547 | for cookie in self: r.append(repr(cookie)) |
---|
| 1548 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
---|
| 1549 | |
---|
| 1550 | def __str__(self): |
---|
| 1551 | r = [] |
---|
| 1552 | for cookie in self: r.append(str(cookie)) |
---|
| 1553 | return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
---|
| 1554 | |
---|
| 1555 | |
---|
| 1556 | class LoadError(Exception): pass |
---|
| 1557 | |
---|
| 1558 | class FileCookieJar(CookieJar): |
---|
| 1559 | """CookieJar that can be loaded from and saved to a file. |
---|
| 1560 | |
---|
| 1561 | Additional methods |
---|
| 1562 | |
---|
| 1563 | save(filename=None, ignore_discard=False, ignore_expires=False) |
---|
| 1564 | load(filename=None, ignore_discard=False, ignore_expires=False) |
---|
| 1565 | revert(filename=None, ignore_discard=False, ignore_expires=False) |
---|
| 1566 | |
---|
| 1567 | Additional public attributes |
---|
| 1568 | |
---|
| 1569 | filename: filename for loading and saving cookies |
---|
| 1570 | |
---|
| 1571 | Additional public readable attributes |
---|
| 1572 | |
---|
| 1573 | delayload: request that cookies are lazily loaded from disk; this is only |
---|
| 1574 | a hint since this only affects performance, not behaviour (unless the |
---|
| 1575 | cookies on disk are changing); a CookieJar object may ignore it (in fact, |
---|
| 1576 | only MSIECookieJar lazily loads cookies at the moment) |
---|
| 1577 | |
---|
| 1578 | """ |
---|
| 1579 | |
---|
| 1580 | def __init__(self, filename=None, delayload=False, policy=None): |
---|
| 1581 | """ |
---|
| 1582 | See FileCookieJar.__doc__ for argument documentation. |
---|
| 1583 | |
---|
| 1584 | Cookies are NOT loaded from the named file until either the load or |
---|
| 1585 | revert method is called. |
---|
| 1586 | |
---|
| 1587 | """ |
---|
| 1588 | CookieJar.__init__(self, policy) |
---|
| 1589 | if filename is not None and not isstringlike(filename): |
---|
| 1590 | raise ValueError("filename must be string-like") |
---|
| 1591 | self.filename = filename |
---|
| 1592 | self.delayload = bool(delayload) |
---|
| 1593 | |
---|
| 1594 | def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
---|
| 1595 | """Save cookies to a file. |
---|
| 1596 | |
---|
| 1597 | filename: name of file in which to save cookies |
---|
| 1598 | ignore_discard: save even cookies set to be discarded |
---|
| 1599 | ignore_expires: save even cookies that have expired |
---|
| 1600 | |
---|
| 1601 | The file is overwritten if it already exists, thus wiping all its |
---|
| 1602 | cookies. Saved cookies can be restored later using the load or revert |
---|
| 1603 | methods. If filename is not specified, self.filename is used; if |
---|
| 1604 | self.filename is None, ValueError is raised. |
---|
| 1605 | |
---|
| 1606 | """ |
---|
| 1607 | raise NotImplementedError() |
---|
| 1608 | |
---|
| 1609 | def load(self, filename=None, ignore_discard=False, ignore_expires=False): |
---|
| 1610 | """Load cookies from a file. |
---|
| 1611 | |
---|
| 1612 | Old cookies are kept unless overwritten by newly loaded ones. |
---|
| 1613 | |
---|
| 1614 | Arguments are as for .save(). |
---|
| 1615 | |
---|
| 1616 | If filename is not specified, self.filename is used; if self.filename |
---|
| 1617 | is None, ValueError is raised. The named file must be in the format |
---|
| 1618 | understood by the class, or LoadError will be raised. This format will |
---|
| 1619 | be identical to that written by the save method, unless the load format |
---|
| 1620 | is not sufficiently well understood (as is the case for MSIECookieJar). |
---|
| 1621 | |
---|
| 1622 | """ |
---|
| 1623 | if filename is None: |
---|
| 1624 | if self.filename is not None: filename = self.filename |
---|
| 1625 | else: raise ValueError(MISSING_FILENAME_TEXT) |
---|
| 1626 | |
---|
| 1627 | f = open(filename) |
---|
| 1628 | try: |
---|
| 1629 | self._really_load(f, filename, ignore_discard, ignore_expires) |
---|
| 1630 | finally: |
---|
| 1631 | f.close() |
---|
| 1632 | |
---|
| 1633 | def revert(self, filename=None, |
---|
| 1634 | ignore_discard=False, ignore_expires=False): |
---|
| 1635 | """Clear all cookies and reload cookies from a saved file. |
---|
| 1636 | |
---|
| 1637 | Raises LoadError (or IOError) if reversion is not successful; the |
---|
| 1638 | object's state will not be altered if this happens. |
---|
| 1639 | |
---|
| 1640 | """ |
---|
| 1641 | if filename is None: |
---|
| 1642 | if self.filename is not None: filename = self.filename |
---|
| 1643 | else: raise ValueError(MISSING_FILENAME_TEXT) |
---|
| 1644 | |
---|
| 1645 | old_state = copy.deepcopy(self._cookies) |
---|
| 1646 | self._cookies = {} |
---|
| 1647 | try: |
---|
| 1648 | self.load(filename, ignore_discard, ignore_expires) |
---|
| 1649 | except (LoadError, IOError): |
---|
| 1650 | self._cookies = old_state |
---|
| 1651 | raise |
---|