root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_clientcookie.py @ 3

リビジョン 3, 61.3 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""HTTP cookie handling for web clients.
2
3This module originally developed from my port of Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
10                        CookieJar____
11                        /     \      \
12            FileCookieJar      \      \
13             /    |   \         \      \
14 MozillaCookieJar | LWPCookieJar \      \
15                  |               |      \
16                  |   ---MSIEBase |       \
17                  |  /      |     |        \
18                  | /   MSIEDBCookieJar BSDDBCookieJar
19                  |/   
20               MSIECookieJar
21
22Comments to John J Lee <jjl@pobox.com>.
23
24
25Copyright 2002-2006 John J Lee <jjl@pobox.com>
26Copyright 1997-1999 Gisle Aas (original libwww-perl code)
27Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
28
29This code is free software; you can redistribute it and/or modify it
30under the terms of the BSD or ZPL 2.1 licenses (see the file
31COPYING.txt included with the distribution).
32
33"""
34
35import sys, re, copy, time, struct, urllib, types, logging
36try:
37    import threading
38    _threading = threading; del threading
39except ImportError:
40    import dummy_threading
41    _threading = dummy_threading; del dummy_threading
42import httplib  # only for the default HTTP port
43
44MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
45                         "instance initialised with one)")
46DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
47
48from _headersutil import split_header_words, parse_ns_headers
49from _util import isstringlike
50import _rfc3986
51
52debug = logging.getLogger("mechanize.cookies").debug
53
54
55def reraise_unmasked_exceptions(unmasked=()):
56    # There are a few catch-all except: statements in this module, for
57    # catching input that's bad in unexpected ways.
58    # This function re-raises some exceptions we don't want to trap.
59    import mechanize, warnings
60    if not mechanize.USE_BARE_EXCEPT:
61        raise
62    unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
63    etype = sys.exc_info()[0]
64    if issubclass(etype, unmasked):
65        raise
66    # swallowed an exception
67    import traceback, StringIO
68    f = StringIO.StringIO()
69    traceback.print_exc(None, f)
70    msg = f.getvalue()
71    warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
72
73
74IPV4_RE = re.compile(r"\.\d+$")
75def is_HDN(text):
76    """Return True if text is a host domain name."""
77    # XXX
78    # This may well be wrong.  Which RFC is HDN defined in, if any (for
79    #  the purposes of RFC 2965)?
80    # For the current implementation, what about IPv6?  Remember to look
81    #  at other uses of IPV4_RE also, if change this.
82    return not (IPV4_RE.search(text) or
83                text == "" or
84                text[0] == "." or text[-1] == ".")
85
86def domain_match(A, B):
87    """Return True if domain A domain-matches domain B, according to RFC 2965.
88
89    A and B may be host domain names or IP addresses.
90
91    RFC 2965, section 1:
92
93    Host names can be specified either as an IP address or a HDN string.
94    Sometimes we compare one host name with another.  (Such comparisons SHALL
95    be case-insensitive.)  Host A's name domain-matches host B's if
96
97         *  their host name strings string-compare equal; or
98
99         * A is a HDN string and has the form NB, where N is a non-empty
100            name string, B has the form .B', and B' is a HDN string.  (So,
101            x.y.com domain-matches .Y.com but not Y.com.)
102
103    Note that domain-match is not a commutative operation: a.b.c.com
104    domain-matches .c.com, but not the reverse.
105
106    """
107    # Note that, if A or B are IP addresses, the only relevant part of the
108    # definition of the domain-match algorithm is the direct string-compare.
109    A = A.lower()
110    B = B.lower()
111    if A == B:
112        return True
113    if not is_HDN(A):
114        return False
115    i = A.rfind(B)
116    has_form_nb = not (i == -1 or i == 0)
117    return (
118        has_form_nb and
119        B.startswith(".") and
120        is_HDN(B[1:])
121        )
122
123def liberal_is_HDN(text):
124    """Return True if text is a sort-of-like a host domain name.
125
126    For accepting/blocking domains.
127
128    """
129    return not IPV4_RE.search(text)
130
131def user_domain_match(A, B):
132    """For blocking/accepting domains.
133
134    A and B may be host domain names or IP addresses.
135
136    """
137    A = A.lower()
138    B = B.lower()
139    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
140        if A == B:
141            # equal IP addresses
142            return True
143        return False
144    initial_dot = B.startswith(".")
145    if initial_dot and A.endswith(B):
146        return True
147    if not initial_dot and A == B:
148        return True
149    return False
150
151cut_port_re = re.compile(r":\d+$")
152def request_host(request):
153    """Return request-host, as defined by RFC 2965.
154
155    Variation from RFC: returned value is lowercased, for convenient
156    comparison.
157
158    """
159    url = request.get_full_url()
160    host = _rfc3986.urlsplit(url)[1]
161    if host is None:
162        host = request.get_header("Host", "")
163
164    # remove port, if present
165    host = cut_port_re.sub("", host, 1)
166    return host.lower()
167
168def eff_request_host(request):
169    """Return a tuple (request-host, effective request-host name).
170
171    As defined by RFC 2965, except both are lowercased.
172
173    """
174    erhn = req_host = request_host(request)
175    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
176        erhn = req_host + ".local"
177    return req_host, erhn
178
179def request_path(request):
180    """request-URI, as defined by RFC 2965."""
181    url = request.get_full_url()
182    path, query, frag = _rfc3986.urlsplit(url)[2:]
183    path = escape_path(path)
184    req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
185    if not req_path.startswith("/"):
186        req_path = "/"+req_path
187    return req_path
188
189def request_port(request):
190    host = request.get_host()
191    i = host.find(':')
192    if i >= 0:
193        port = host[i+1:]
194        try:
195            int(port)
196        except ValueError:
197            debug("nonnumeric port: '%s'", port)
198            return None
199    else:
200        port = DEFAULT_HTTP_PORT
201    return port
202
203# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
204# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
205HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
206ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
207def uppercase_escaped_char(match):
208    return "%%%s" % match.group(1).upper()
209def escape_path(path):
210    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
211    # There's no knowing what character encoding was used to create URLs
212    # containing %-escapes, but since we have to pick one to escape invalid
213    # path characters, we pick UTF-8, as recommended in the HTML 4.0
214    # specification:
215    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
216    # And here, kind of: draft-fielding-uri-rfc2396bis-03
217    # (And in draft IRI specification: draft-duerst-iri-05)
218    # (And here, for new URI schemes: RFC 2718)
219    if isinstance(path, types.UnicodeType):
220        path = path.encode("utf-8")
221    path = urllib.quote(path, HTTP_PATH_SAFE)
222    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
223    return path
224
225def reach(h):
226    """Return reach of host h, as defined by RFC 2965, section 1.
227
228    The reach R of a host name H is defined as follows:
229
230       *  If
231
232          -  H is the host domain name of a host; and,
233
234          -  H has the form A.B; and
235
236          -  A has no embedded (that is, interior) dots; and
237
238          -  B has at least one embedded dot, or B is the string "local".
239             then the reach of H is .B.
240
241       *  Otherwise, the reach of H is H.
242
243    >>> reach("www.acme.com")
244    '.acme.com'
245    >>> reach("acme.com")
246    'acme.com'
247    >>> reach("acme.local")
248    '.local'
249
250    """
251    i = h.find(".")
252    if i >= 0:
253        #a = h[:i]  # this line is only here to show what a is
254        b = h[i+1:]
255        i = b.find(".")
256        if is_HDN(h) and (i >= 0 or b == "local"):
257            return "."+b
258    return h
259
260def is_third_party(request):
261    """
262
263    RFC 2965, section 3.3.6:
264
265        An unverifiable transaction is to a third-party host if its request-
266        host U does not domain-match the reach R of the request-host O in the
267        origin transaction.
268
269    """
270    req_host = request_host(request)
271    # the origin request's request-host was stuffed into request by
272    # _urllib2_support.AbstractHTTPHandler
273    return not domain_match(req_host, reach(request.origin_req_host))
274
275
276class Cookie:
277    """HTTP Cookie.
278
279    This class represents both Netscape and RFC 2965 cookies.
280
281    This is deliberately a very simple class.  It just holds attributes.  It's
282    possible to construct Cookie instances that don't comply with the cookie
283    standards.  CookieJar.make_cookies is the factory function for Cookie
284    objects -- it deals with cookie parsing, supplying defaults, and
285    normalising to the representation used in this class.  CookiePolicy is
286    responsible for checking them to see whether they should be accepted from
287    and returned to the server.
288
289    version: integer;
290    name: string;
291    value: string (may be None);
292    port: string; None indicates no attribute was supplied (eg. "Port", rather
293     than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
294     string (eg. "80,8080")
295    port_specified: boolean; true if a value was supplied with the Port
296     cookie-attribute
297    domain: string;
298    domain_specified: boolean; true if Domain was explicitly set
299    domain_initial_dot: boolean; true if Domain as set in HTTP header by server
300     started with a dot (yes, this really is necessary!)
301    path: string;
302    path_specified: boolean; true if Path was explicitly set
303    secure:  boolean; true if should only be returned over secure connection
304    expires: integer; seconds since epoch (RFC 2965 cookies should calculate
305     this value from the Max-Age attribute)
306    discard: boolean, true if this is a session cookie; (if no expires value,
307     this should be true)
308    comment: string;
309    comment_url: string;
310    rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
311     Set-Cookie2:) header, but had a version cookie-attribute of 1
312    rest: mapping of other cookie-attributes
313
314    Note that the port may be present in the headers, but unspecified ("Port"
315    rather than"Port=80", for example); if this is the case, port is None.
316
317    """
318
319    def __init__(self, version, name, value,
320                 port, port_specified,
321                 domain, domain_specified, domain_initial_dot,
322                 path, path_specified,
323                 secure,
324                 expires,
325                 discard,
326                 comment,
327                 comment_url,
328                 rest,
329                 rfc2109=False,
330                 ):
331
332        if version is not None: version = int(version)
333        if expires is not None: expires = int(expires)
334        if port is None and port_specified is True:
335            raise ValueError("if port is None, port_specified must be false")
336
337        self.version = version
338        self.name = name
339        self.value = value
340        self.port = port
341        self.port_specified = port_specified
342        # normalise case, as per RFC 2965 section 3.3.3
343        self.domain = domain.lower()
344        self.domain_specified = domain_specified
345        # Sigh.  We need to know whether the domain given in the
346        # cookie-attribute had an initial dot, in order to follow RFC 2965
347        # (as clarified in draft errata).  Needed for the returned $Domain
348        # value.
349        self.domain_initial_dot = domain_initial_dot
350        self.path = path
351        self.path_specified = path_specified
352        self.secure = secure
353        self.expires = expires
354        self.discard = discard
355        self.comment = comment
356        self.comment_url = comment_url
357        self.rfc2109 = rfc2109
358
359        self._rest = copy.copy(rest)
360
361    def has_nonstandard_attr(self, name):
362        return self._rest.has_key(name)
363    def get_nonstandard_attr(self, name, default=None):
364        return self._rest.get(name, default)
365    def set_nonstandard_attr(self, name, value):
366        self._rest[name] = value
367    def nonstandard_attr_keys(self):
368        return self._rest.keys()
369
370    def is_expired(self, now=None):
371        if now is None: now = time.time()
372        return (self.expires is not None) and (self.expires <= now)
373
374    def __str__(self):
375        if self.port is None: p = ""
376        else: p = ":"+self.port
377        limit = self.domain + p + self.path
378        if self.value is not None:
379            namevalue = "%s=%s" % (self.name, self.value)
380        else:
381            namevalue = self.name
382        return "<Cookie %s for %s>" % (namevalue, limit)
383
384    def __repr__(self):
385        args = []
386        for name in ["version", "name", "value",
387                     "port", "port_specified",
388                     "domain", "domain_specified", "domain_initial_dot",
389                     "path", "path_specified",
390                     "secure", "expires", "discard", "comment", "comment_url",
391                     ]:
392            attr = getattr(self, name)
393            args.append("%s=%s" % (name, repr(attr)))
394        args.append("rest=%s" % repr(self._rest))
395        args.append("rfc2109=%s" % repr(self.rfc2109))
396        return "Cookie(%s)" % ", ".join(args)
397
398
399class CookiePolicy:
400    """Defines which cookies get accepted from and returned to server.
401
402    May also modify cookies.
403
404    The subclass DefaultCookiePolicy defines the standard rules for Netscape
405    and RFC 2965 cookies -- override that if you want a customised policy.
406
407    As well as implementing set_ok and return_ok, implementations of this
408    interface must also supply the following attributes, indicating which
409    protocols should be used, and how.  These can be read and set at any time,
410    though whether that makes complete sense from the protocol point of view is
411    doubtful.
412
413    Public attributes:
414
415    netscape: implement netscape protocol
416    rfc2965: implement RFC 2965 protocol
417    rfc2109_as_netscape:
418       WARNING: This argument will change or go away if is not accepted into
419                the Python standard library in this form!
420     If true, treat RFC 2109 cookies as though they were Netscape cookies.  The
421     default is for this attribute to be None, which means treat 2109 cookies
422     as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
423     by default), and as Netscape cookies otherwise.
424    hide_cookie2: don't add Cookie2 header to requests (the presence of
425     this header indicates to the server that we understand RFC 2965
426     cookies)
427
428    """
429    def set_ok(self, cookie, request):
430        """Return true if (and only if) cookie should be accepted from server.
431
432        Currently, pre-expired cookies never get this far -- the CookieJar
433        class deletes such cookies itself.
434
435        cookie: mechanize.Cookie object
436        request: object implementing the interface defined by
437         CookieJar.extract_cookies.__doc__
438
439        """
440        raise NotImplementedError()
441
442    def return_ok(self, cookie, request):
443        """Return true if (and only if) cookie should be returned to server.
444
445        cookie: mechanize.Cookie object
446        request: object implementing the interface defined by
447         CookieJar.add_cookie_header.__doc__
448
449        """
450        raise NotImplementedError()
451
452    def domain_return_ok(self, domain, request):
453        """Return false if cookies should not be returned, given cookie domain.
454
455        This is here as an optimization, to remove the need for checking every
456        cookie with a particular domain (which may involve reading many files).
457        The default implementations of domain_return_ok and path_return_ok
458        (return True) leave all the work to return_ok.
459
460        If domain_return_ok returns true for the cookie domain, path_return_ok
461        is called for the cookie path.  Otherwise, path_return_ok and return_ok
462        are never called for that cookie domain.  If path_return_ok returns
463        true, return_ok is called with the Cookie object itself for a full
464        check.  Otherwise, return_ok is never called for that cookie path.
465
466        Note that domain_return_ok is called for every *cookie* domain, not
467        just for the *request* domain.  For example, the function might be
468        called with both ".acme.com" and "www.acme.com" if the request domain is
469        "www.acme.com".  The same goes for path_return_ok.
470
471        For argument documentation, see the docstring for return_ok.
472
473        """
474        return True
475
476    def path_return_ok(self, path, request):
477        """Return false if cookies should not be returned, given cookie path.
478
479        See the docstring for domain_return_ok.
480
481        """
482        return True
483
484
485class DefaultCookiePolicy(CookiePolicy):
486    """Implements the standard rules for accepting and returning cookies.
487
488    Both RFC 2965 and Netscape cookies are covered.  RFC 2965 handling is
489    switched off by default.
490
491    The easiest way to provide your own policy is to override this class and
492    call its methods in your overriden implementations before adding your own
493    additional checks.
494
495    import mechanize
496    class MyCookiePolicy(mechanize.DefaultCookiePolicy):
497        def set_ok(self, cookie, request):
498            if not mechanize.DefaultCookiePolicy.set_ok(
499                self, cookie, request):
500                return False
501            if i_dont_want_to_store_this_cookie():
502                return False
503            return True
504
505    In addition to the features required to implement the CookiePolicy
506    interface, this class allows you to block and allow domains from setting
507    and receiving cookies.  There are also some strictness switches that allow
508    you to tighten up the rather loose Netscape protocol rules a little bit (at
509    the cost of blocking some benign cookies).
510
511    A domain blacklist and whitelist is provided (both off by default).  Only
512    domains not in the blacklist and present in the whitelist (if the whitelist
513    is active) participate in cookie setting and returning.  Use the
514    blocked_domains constructor argument, and blocked_domains and
515    set_blocked_domains methods (and the corresponding argument and methods for
516    allowed_domains).  If you set a whitelist, you can turn it off again by
517    setting it to None.
518
519    Domains in block or allow lists that do not start with a dot must
520    string-compare equal.  For example, "acme.com" matches a blacklist entry of
521    "acme.com", but "www.acme.com" does not.  Domains that do start with a dot
522    are matched by more specific domains too.  For example, both "www.acme.com"
523    and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
524    not).  IP addresses are an exception, and must match exactly.  For example,
525    if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
526    blocked, but 193.168.1.2 is not.
527
528    Additional Public Attributes:
529
530    General strictness switches
531
532    strict_domain: don't allow sites to set two-component domains with
533     country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
534     This is far from perfect and isn't guaranteed to work!
535
536    RFC 2965 protocol strictness switches
537
538    strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
539     transactions (usually, an unverifiable transaction is one resulting from
540     a redirect or an image hosted on another site); if this is false, cookies
541     are NEVER blocked on the basis of verifiability
542
543    Netscape protocol strictness switches
544
545    strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
546     even to Netscape cookies
547    strict_ns_domain: flags indicating how strict to be with domain-matching
548     rules for Netscape cookies:
549      DomainStrictNoDots: when setting cookies, host prefix must not contain a
550       dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
551       www.foo contains a dot)
552      DomainStrictNonDomain: cookies that did not explicitly specify a Domain
553       cookie-attribute can only be returned to a domain that string-compares
554       equal to the domain that set the cookie (eg. rockets.acme.com won't
555       be returned cookies from acme.com that had no Domain cookie-attribute)
556      DomainRFC2965Match: when setting cookies, require a full RFC 2965
557       domain-match
558      DomainLiberal and DomainStrict are the most useful combinations of the
559       above flags, for convenience
560    strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
561     have names starting with '$'
562    strict_ns_set_path: don't allow setting cookies whose path doesn't
563     path-match request URI
564
565    """
566
567    DomainStrictNoDots = 1
568    DomainStrictNonDomain = 2
569    DomainRFC2965Match = 4
570
571    DomainLiberal = 0
572    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
573
574    def __init__(self,
575                 blocked_domains=None, allowed_domains=None,
576                 netscape=True, rfc2965=False,
577                 # WARNING: this argument will change or go away if is not
578                 # accepted into the Python standard library in this form!
579                 # default, ie. treat 2109 as netscape iff not rfc2965
580                 rfc2109_as_netscape=None,
581                 hide_cookie2=False,
582                 strict_domain=False,
583                 strict_rfc2965_unverifiable=True,
584                 strict_ns_unverifiable=False,
585                 strict_ns_domain=DomainLiberal,
586                 strict_ns_set_initial_dollar=False,
587                 strict_ns_set_path=False,
588                 ):
589        """
590        Constructor arguments should be used as keyword arguments only.
591
592        blocked_domains: sequence of domain names that we never accept cookies
593         from, nor return cookies to
594        allowed_domains: if not None, this is a sequence of the only domains
595         for which we accept and return cookies
596
597        For other arguments, see CookiePolicy.__doc__ and
598        DefaultCookiePolicy.__doc__..
599
600        """
601        self.netscape = netscape
602        self.rfc2965 = rfc2965
603        self.rfc2109_as_netscape = rfc2109_as_netscape
604        self.hide_cookie2 = hide_cookie2
605        self.strict_domain = strict_domain
606        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
607        self.strict_ns_unverifiable = strict_ns_unverifiable
608        self.strict_ns_domain = strict_ns_domain
609        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
610        self.strict_ns_set_path = strict_ns_set_path
611
612        if blocked_domains is not None:
613            self._blocked_domains = tuple(blocked_domains)
614        else:
615            self._blocked_domains = ()
616
617        if allowed_domains is not None:
618            allowed_domains = tuple(allowed_domains)
619        self._allowed_domains = allowed_domains
620
621    def blocked_domains(self):
622        """Return the sequence of blocked domains (as a tuple)."""
623        return self._blocked_domains
624    def set_blocked_domains(self, blocked_domains):
625        """Set the sequence of blocked domains."""
626        self._blocked_domains = tuple(blocked_domains)
627
628    def is_blocked(self, domain):
629        for blocked_domain in self._blocked_domains:
630            if user_domain_match(domain, blocked_domain):
631                return True
632        return False
633
634    def allowed_domains(self):
635        """Return None, or the sequence of allowed domains (as a tuple)."""
636        return self._allowed_domains
637    def set_allowed_domains(self, allowed_domains):
638        """Set the sequence of allowed domains, or None."""
639        if allowed_domains is not None:
640            allowed_domains = tuple(allowed_domains)
641        self._allowed_domains = allowed_domains
642
643    def is_not_allowed(self, domain):
644        if self._allowed_domains is None:
645            return False
646        for allowed_domain in self._allowed_domains:
647            if user_domain_match(domain, allowed_domain):
648                return False
649        return True
650
651    def set_ok(self, cookie, request):
652        """
653        If you override set_ok, be sure to call this method.  If it returns
654        false, so should your subclass (assuming your subclass wants to be more
655        strict about which cookies to accept).
656
657        """
658        debug(" - checking cookie %s", cookie)
659
660        assert cookie.name is not None
661
662        for n in "version", "verifiability", "name", "path", "domain", "port":
663            fn_name = "set_ok_"+n
664            fn = getattr(self, fn_name)
665            if not fn(cookie, request):
666                return False
667
668        return True
669
670    def set_ok_version(self, cookie, request):
671        if cookie.version is None:
672            # Version is always set to 0 by parse_ns_headers if it's a Netscape
673            # cookie, so this must be an invalid RFC 2965 cookie.
674            debug("   Set-Cookie2 without version attribute (%s)", cookie)
675            return False
676        if cookie.version > 0 and not self.rfc2965:
677            debug("   RFC 2965 cookies are switched off")
678            return False
679        elif cookie.version == 0 and not self.netscape:
680            debug("   Netscape cookies are switched off")
681            return False
682        return True
683
684    def set_ok_verifiability(self, cookie, request):
685        if request.unverifiable and is_third_party(request):
686            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
687                debug("   third-party RFC 2965 cookie during "
688                             "unverifiable transaction")
689                return False
690            elif cookie.version == 0 and self.strict_ns_unverifiable:
691                debug("   third-party Netscape cookie during "
692                             "unverifiable transaction")
693                return False
694        return True
695
696    def set_ok_name(self, cookie, request):
697        # Try and stop servers setting V0 cookies designed to hack other
698        # servers that know both V0 and V1 protocols.
699        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
700            cookie.name.startswith("$")):
701            debug("   illegal name (starts with '$'): '%s'", cookie.name)
702            return False
703        return True
704
705    def set_ok_path(self, cookie, request):
706        if cookie.path_specified:
707            req_path = request_path(request)
708            if ((cookie.version > 0 or
709                 (cookie.version == 0 and self.strict_ns_set_path)) and
710                not req_path.startswith(cookie.path)):
711                debug("   path attribute %s is not a prefix of request "
712                      "path %s", cookie.path, req_path)
713                return False
714        return True
715
716    def set_ok_countrycode_domain(self, cookie, request):
717        """Return False if explicit cookie domain is not acceptable.
718
719        Called by set_ok_domain, for convenience of overriding by
720        subclasses.
721
722        """
723        if cookie.domain_specified and self.strict_domain:
724            domain = cookie.domain
725            # since domain was specified, we know that:
726            assert domain.startswith(".")
727            if domain.count(".") == 2:
728                # domain like .foo.bar
729                i = domain.rfind(".")
730                tld = domain[i+1:]
731                sld = domain[1:i]
732                if (sld.lower() in [
733                    "co", "ac",
734                    "com", "edu", "org", "net", "gov", "mil", "int",
735                    "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
736                    "museum", "name", "pro", "travel",
737                    ] and
738                    len(tld) == 2):
739                    # domain like .co.uk
740                    return False
741        return True
742
743    def set_ok_domain(self, cookie, request):
744        if self.is_blocked(cookie.domain):
745            debug("   domain %s is in user block-list", cookie.domain)
746            return False
747        if self.is_not_allowed(cookie.domain):
748            debug("   domain %s is not in user allow-list", cookie.domain)
749            return False
750        if not self.set_ok_countrycode_domain(cookie, request):
751            debug("   country-code second level domain %s", cookie.domain)
752            return False
753        if cookie.domain_specified:
754            req_host, erhn = eff_request_host(request)
755            domain = cookie.domain
756            if domain.startswith("."):
757                undotted_domain = domain[1:]
758            else:
759                undotted_domain = domain
760            embedded_dots = (undotted_domain.find(".") >= 0)
761            if not embedded_dots and domain != ".local":
762                debug("   non-local domain %s contains no embedded dot",
763                      domain)
764                return False
765            if cookie.version == 0:
766                if (not erhn.endswith(domain) and
767                    (not erhn.startswith(".") and
768                     not ("."+erhn).endswith(domain))):
769                    debug("   effective request-host %s (even with added "
770                          "initial dot) does not end end with %s",
771                          erhn, domain)
772                    return False
773            if (cookie.version > 0 or
774                (self.strict_ns_domain & self.DomainRFC2965Match)):
775                if not domain_match(erhn, domain):
776                    debug("   effective request-host %s does not domain-match "
777                          "%s", erhn, domain)
778                    return False
779            if (cookie.version > 0 or
780                (self.strict_ns_domain & self.DomainStrictNoDots)):
781                host_prefix = req_host[:-len(domain)]
782                if (host_prefix.find(".") >= 0 and
783                    not IPV4_RE.search(req_host)):
784                    debug("   host prefix %s for domain %s contains a dot",
785                          host_prefix, domain)
786                    return False
787        return True
788
789    def set_ok_port(self, cookie, request):
790        if cookie.port_specified:
791            req_port = request_port(request)
792            if req_port is None:
793                req_port = "80"
794            else:
795                req_port = str(req_port)
796            for p in cookie.port.split(","):
797                try:
798                    int(p)
799                except ValueError:
800                    debug("   bad port %s (not numeric)", p)
801                    return False
802                if p == req_port:
803                    break
804            else:
805                debug("   request port (%s) not found in %s",
806                      req_port, cookie.port)
807                return False
808        return True
809
810    def return_ok(self, cookie, request):
811        """
812        If you override return_ok, be sure to call this method.  If it returns
813        false, so should your subclass (assuming your subclass wants to be more
814        strict about which cookies to return).
815
816        """
817        # Path has already been checked by path_return_ok, and domain blocking
818        # done by domain_return_ok.
819        debug(" - checking cookie %s", cookie)
820
821        for n in "version", "verifiability", "secure", "expires", "port", "domain":
822            fn_name = "return_ok_"+n
823            fn = getattr(self, fn_name)
824            if not fn(cookie, request):
825                return False
826        return True
827
828    def return_ok_version(self, cookie, request):
829        if cookie.version > 0 and not self.rfc2965:
830            debug("   RFC 2965 cookies are switched off")
831            return False
832        elif cookie.version == 0 and not self.netscape:
833            debug("   Netscape cookies are switched off")
834            return False
835        return True
836
837    def return_ok_verifiability(self, cookie, request):
838        if request.unverifiable and is_third_party(request):
839            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
840                debug("   third-party RFC 2965 cookie during unverifiable "
841                      "transaction")
842                return False
843            elif cookie.version == 0 and self.strict_ns_unverifiable:
844                debug("   third-party Netscape cookie during unverifiable "
845                      "transaction")
846                return False
847        return True
848
849    def return_ok_secure(self, cookie, request):
850        if cookie.secure and request.get_type() != "https":
851            debug("   secure cookie with non-secure request")
852            return False
853        return True
854
855    def return_ok_expires(self, cookie, request):
856        if cookie.is_expired(self._now):
857            debug("   cookie expired")
858            return False
859        return True
860
861    def return_ok_port(self, cookie, request):
862        if cookie.port:
863            req_port = request_port(request)
864            if req_port is None:
865                req_port = "80"
866            for p in cookie.port.split(","):
867                if p == req_port:
868                    break
869            else:
870                debug("   request port %s does not match cookie port %s",
871                      req_port, cookie.port)
872                return False
873        return True
874
875    def return_ok_domain(self, cookie, request):
876        req_host, erhn = eff_request_host(request)
877        domain = cookie.domain
878
879        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
880        if (cookie.version == 0 and
881            (self.strict_ns_domain & self.DomainStrictNonDomain) and
882            not cookie.domain_specified and domain != erhn):
883            debug("   cookie with unspecified domain does not string-compare "
884                  "equal to request domain")
885            return False
886
887        if cookie.version > 0 and not domain_match(erhn, domain):
888            debug("   effective request-host name %s does not domain-match "
889                  "RFC 2965 cookie domain %s", erhn, domain)
890            return False
891        if cookie.version == 0 and not ("."+erhn).endswith(domain):
892            debug("   request-host %s does not match Netscape cookie domain "
893                  "%s", req_host, domain)
894            return False
895        return True
896
897    def domain_return_ok(self, domain, request):
898        # Liberal check of domain.  This is here as an optimization to avoid
899        # having to load lots of MSIE cookie files unless necessary.
900
901        # Munge req_host and erhn to always start with a dot, so as to err on
902        # the side of letting cookies through.
903        dotted_req_host, dotted_erhn = eff_request_host(request)
904        if not dotted_req_host.startswith("."):
905            dotted_req_host = "."+dotted_req_host
906        if not dotted_erhn.startswith("."):
907            dotted_erhn = "."+dotted_erhn
908        if not (dotted_req_host.endswith(domain) or
909                dotted_erhn.endswith(domain)):
910            #debug("   request domain %s does not match cookie domain %s",
911            #      req_host, domain)
912            return False
913
914        if self.is_blocked(domain):
915            debug("   domain %s is in user block-list", domain)
916            return False
917        if self.is_not_allowed(domain):
918            debug("   domain %s is not in user allow-list", domain)
919            return False
920
921        return True
922
923    def path_return_ok(self, path, request):
924        debug("- checking cookie path=%s", path)
925        req_path = request_path(request)
926        if not req_path.startswith(path):
927            debug("  %s does not path-match %s", req_path, path)
928            return False
929        return True
930
931
932def vals_sorted_by_key(adict):
933    keys = adict.keys()
934    keys.sort()
935    return map(adict.get, keys)
936
937class MappingIterator:
938    """Iterates over nested mapping, depth-first, in sorted order by key."""
939    def __init__(self, mapping):
940        self._s = [(vals_sorted_by_key(mapping), 0, None)]  # LIFO stack
941
942    def __iter__(self): return self
943
944    def next(self):
945        # this is hairy because of lack of generators
946        while 1:
947            try:
948                vals, i, prev_item = self._s.pop()
949            except IndexError:
950                raise StopIteration()
951            if i < len(vals):
952                item = vals[i]
953                i = i + 1
954                self._s.append((vals, i, prev_item))
955                try:
956                    item.items
957                except AttributeError:
958                    # non-mapping
959                    break
960                else:
961                    # mapping
962                    self._s.append((vals_sorted_by_key(item), 0, item))
963                    continue
964        return item
965
966
967# Used as second parameter to dict.get method, to distinguish absent
968# dict key from one with a None value.
969class Absent: pass
970
971class CookieJar:
972    """Collection of HTTP cookies.
973
974    You may not need to know about this class: try mechanize.urlopen().
975
976    The major methods are extract_cookies and add_cookie_header; these are all
977    you are likely to need.
978
979    CookieJar supports the iterator protocol:
980
981    for cookie in cookiejar:
982        # do something with cookie
983
984    Methods:
985
986    add_cookie_header(request)
987    extract_cookies(response, request)
988    make_cookies(response, request)
989    set_cookie_if_ok(cookie, request)
990    set_cookie(cookie)
991    clear_session_cookies()
992    clear_expired_cookies()
993    clear(domain=None, path=None, name=None)
994
995    Public attributes
996
997    policy: CookiePolicy object
998
999    """
1000
1001    non_word_re = re.compile(r"\W")
1002    quote_re = re.compile(r"([\"\\])")
1003    strict_domain_re = re.compile(r"\.?[^.]*")
1004    domain_re = re.compile(r"[^.]*")
1005    dots_re = re.compile(r"^\.+")
1006
1007    def __init__(self, policy=None):
1008        """
1009        See CookieJar.__doc__ for argument documentation.
1010
1011        """
1012        if policy is None:
1013            policy = DefaultCookiePolicy()
1014        self._policy = policy
1015
1016        self._cookies = {}
1017
1018        # for __getitem__ iteration in pre-2.2 Pythons
1019        self._prev_getitem_index = 0
1020
1021    def set_policy(self, policy):
1022        self._policy = policy
1023
1024    def _cookies_for_domain(self, domain, request):
1025        cookies = []
1026        if not self._policy.domain_return_ok(domain, request):
1027            return []
1028        debug("Checking %s for cookies to return", domain)
1029        cookies_by_path = self._cookies[domain]
1030        for path in cookies_by_path.keys():
1031            if not self._policy.path_return_ok(path, request):
1032                continue
1033            cookies_by_name = cookies_by_path[path]
1034            for cookie in cookies_by_name.values():
1035                if not self._policy.return_ok(cookie, request):
1036                    debug("   not returning cookie")
1037                    continue
1038                debug("   it's a match")
1039                cookies.append(cookie)
1040        return cookies
1041
1042    def _cookies_for_request(self, request):
1043        """Return a list of cookies to be returned to server."""
1044        cookies = []
1045        for domain in self._cookies.keys():
1046            cookies.extend(self._cookies_for_domain(domain, request))
1047        return cookies
1048
1049    def _cookie_attrs(self, cookies):
1050        """Return a list of cookie-attributes to be returned to server.
1051
1052        like ['foo="bar"; $Path="/"', ...]
1053
1054        The $Version attribute is also added when appropriate (currently only
1055        once per request).
1056
1057        """
1058        # add cookies in order of most specific (ie. longest) path first
1059        def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1060        cookies.sort(decreasing_size)
1061
1062        version_set = False
1063
1064        attrs = []
1065        for cookie in cookies:
1066            # set version of Cookie header
1067            # XXX
1068            # What should it be if multiple matching Set-Cookie headers have
1069            #  different versions themselves?
1070            # Answer: there is no answer; was supposed to be settled by
1071            #  RFC 2965 errata, but that may never appear...
1072            version = cookie.version
1073            if not version_set:
1074                version_set = True
1075                if version > 0:
1076                    attrs.append("$Version=%s" % version)
1077
1078            # quote cookie value if necessary
1079            # (not for Netscape protocol, which already has any quotes
1080            #  intact, due to the poorly-specified Netscape Cookie: syntax)
1081            if ((cookie.value is not None) and
1082                self.non_word_re.search(cookie.value) and version > 0):
1083                value = self.quote_re.sub(r"\\\1", cookie.value)
1084            else:
1085                value = cookie.value
1086
1087            # add cookie-attributes to be returned in Cookie header
1088            if cookie.value is None:
1089                attrs.append(cookie.name)
1090            else:
1091                attrs.append("%s=%s" % (cookie.name, value))
1092            if version > 0:
1093                if cookie.path_specified:
1094                    attrs.append('$Path="%s"' % cookie.path)
1095                if cookie.domain.startswith("."):
1096                    domain = cookie.domain
1097                    if (not cookie.domain_initial_dot and
1098                        domain.startswith(".")):
1099                        domain = domain[1:]
1100                    attrs.append('$Domain="%s"' % domain)
1101                if cookie.port is not None:
1102                    p = "$Port"
1103                    if cookie.port_specified:
1104                        p = p + ('="%s"' % cookie.port)
1105                    attrs.append(p)
1106
1107        return attrs
1108
1109    def add_cookie_header(self, request):
1110        """Add correct Cookie: header to request (urllib2.Request object).
1111
1112        The Cookie2 header is also added unless policy.hide_cookie2 is true.
1113
1114        The request object (usually a urllib2.Request instance) must support
1115        the methods get_full_url, get_host, get_type, has_header, get_header,
1116        header_items and add_unredirected_header, as documented by urllib2, and
1117        the port attribute (the port number).  Actually,
1118        RequestUpgradeProcessor will automatically upgrade your Request object
1119        to one with has_header, get_header, header_items and
1120        add_unredirected_header, if it lacks those methods, for compatibility
1121        with pre-2.4 versions of urllib2.
1122
1123        """
1124        debug("add_cookie_header")
1125        self._policy._now = self._now = int(time.time())
1126
1127        req_host, erhn = eff_request_host(request)
1128        strict_non_domain = (
1129            self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
1130
1131        cookies = self._cookies_for_request(request)
1132
1133        attrs = self._cookie_attrs(cookies)
1134        if attrs:
1135            if not request.has_header("Cookie"):
1136                request.add_unredirected_header("Cookie", "; ".join(attrs))
1137
1138        # if necessary, advertise that we know RFC 2965
1139        if self._policy.rfc2965 and not self._policy.hide_cookie2:
1140            for cookie in cookies:
1141                if cookie.version != 1 and not request.has_header("Cookie2"):
1142                    request.add_unredirected_header("Cookie2", '$Version="1"')
1143                    break
1144
1145        self.clear_expired_cookies()
1146
1147    def _normalized_cookie_tuples(self, attrs_set):
1148        """Return list of tuples containing normalised cookie information.
1149
1150        attrs_set is the list of lists of key,value pairs extracted from
1151        the Set-Cookie or Set-Cookie2 headers.
1152
1153        Tuples are name, value, standard, rest, where name and value are the
1154        cookie name and value, standard is a dictionary containing the standard
1155        cookie-attributes (discard, secure, version, expires or max-age,
1156        domain, path and port) and rest is a dictionary containing the rest of
1157        the cookie-attributes.
1158
1159        """
1160        cookie_tuples = []
1161
1162        boolean_attrs = "discard", "secure"
1163        value_attrs = ("version",
1164                       "expires", "max-age",
1165                       "domain", "path", "port",
1166                       "comment", "commenturl")
1167
1168        for cookie_attrs in attrs_set:
1169            name, value = cookie_attrs[0]
1170
1171            # Build dictionary of standard cookie-attributes (standard) and
1172            # dictionary of other cookie-attributes (rest).
1173
1174            # Note: expiry time is normalised to seconds since epoch.  V0
1175            # cookies should have the Expires cookie-attribute, and V1 cookies
1176            # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1177            # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1178            # accept either (but prefer Max-Age).
1179            max_age_set = False
1180
1181            bad_cookie = False
1182
1183            standard = {}
1184            rest = {}
1185            for k, v in cookie_attrs[1:]:
1186                lc = k.lower()
1187                # don't lose case distinction for unknown fields
1188                if lc in value_attrs or lc in boolean_attrs:
1189                    k = lc
1190                if k in boolean_attrs and v is None:
1191                    # boolean cookie-attribute is present, but has no value
1192                    # (like "discard", rather than "port=80")
1193                    v = True
1194                if standard.has_key(k):
1195                    # only first value is significant
1196                    continue
1197                if k == "domain":
1198                    if v is None:
1199                        debug("   missing value for domain attribute")
1200                        bad_cookie = True
1201                        break
1202                    # RFC 2965 section 3.3.3
1203                    v = v.lower()
1204                if k == "expires":
1205                    if max_age_set:
1206                        # Prefer max-age to expires (like Mozilla)
1207                        continue
1208                    if v is None:
1209                        debug("   missing or invalid value for expires "
1210                              "attribute: treating as session cookie")
1211                        continue
1212                if k == "max-age":
1213                    max_age_set = True
1214                    try:
1215                        v = int(v)
1216                    except ValueError:
1217                        debug("   missing or invalid (non-numeric) value for "
1218                              "max-age attribute")
1219                        bad_cookie = True
1220                        break
1221                    # convert RFC 2965 Max-Age to seconds since epoch
1222                    # XXX Strictly you're supposed to follow RFC 2616
1223                    #   age-calculation rules.  Remember that zero Max-Age is a
1224                    #   is a request to discard (old and new) cookie, though.
1225                    k = "expires"
1226                    v = self._now + v
1227                if (k in value_attrs) or (k in boolean_attrs):
1228                    if (v is None and
1229                        k not in ["port", "comment", "commenturl"]):
1230                        debug("   missing value for %s attribute" % k)
1231                        bad_cookie = True
1232                        break
1233                    standard[k] = v
1234                else:
1235                    rest[k] = v
1236
1237            if bad_cookie:
1238                continue
1239
1240            cookie_tuples.append((name, value, standard, rest))
1241
1242        return cookie_tuples
1243
1244    def _cookie_from_cookie_tuple(self, tup, request):
1245        # standard is dict of standard cookie-attributes, rest is dict of the
1246        # rest of them
1247        name, value, standard, rest = tup
1248
1249        domain = standard.get("domain", Absent)
1250        path = standard.get("path", Absent)
1251        port = standard.get("port", Absent)
1252        expires = standard.get("expires", Absent)
1253
1254        # set the easy defaults
1255        version = standard.get("version", None)
1256        if version is not None: version = int(version)
1257        secure = standard.get("secure", False)
1258        # (discard is also set if expires is Absent)
1259        discard = standard.get("discard", False)
1260        comment = standard.get("comment", None)
1261        comment_url = standard.get("commenturl", None)
1262
1263        # set default path
1264        if path is not Absent and path != "":
1265            path_specified = True
1266            path = escape_path(path)
1267        else:
1268            path_specified = False
1269            path = request_path(request)
1270            i = path.rfind("/")
1271            if i != -1:
1272                if version == 0:
1273                    # Netscape spec parts company from reality here
1274                    path = path[:i]
1275                else:
1276                    path = path[:i+1]
1277            if len(path) == 0: path = "/"
1278
1279        # set default domain
1280        domain_specified = domain is not Absent
1281        # but first we have to remember whether it starts with a dot
1282        domain_initial_dot = False
1283        if domain_specified:
1284            domain_initial_dot = bool(domain.startswith("."))
1285        if domain is Absent:
1286            req_host, erhn = eff_request_host(request)
1287            domain = erhn
1288        elif not domain.startswith("."):
1289            domain = "."+domain
1290
1291        # set default port
1292        port_specified = False
1293        if port is not Absent:
1294            if port is None:
1295                # Port attr present, but has no value: default to request port.
1296                # Cookie should then only be sent back on that port.
1297                port = request_port(request)
1298            else:
1299                port_specified = True
1300                port = re.sub(r"\s+", "", port)
1301        else:
1302            # No port attr present.  Cookie can be sent back on any port.
1303            port = None
1304
1305        # set default expires and discard
1306        if expires is Absent:
1307            expires = None
1308            discard = True
1309        elif expires <= self._now:
1310            # Expiry date in past is request to delete cookie.  This can't be
1311            # in DefaultCookiePolicy, because can't delete cookies there.
1312            try:
1313                self.clear(domain, path, name)
1314            except KeyError:
1315                pass
1316            debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1317                  domain, path, name)
1318            return None
1319
1320        return Cookie(version,
1321                      name, value,
1322                      port, port_specified,
1323                      domain, domain_specified, domain_initial_dot,
1324                      path, path_specified,
1325                      secure,
1326                      expires,
1327                      discard,
1328                      comment,
1329                      comment_url,
1330                      rest)
1331
1332    def _cookies_from_attrs_set(self, attrs_set, request):
1333        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1334
1335        cookies = []
1336        for tup in cookie_tuples:
1337            cookie = self._cookie_from_cookie_tuple(tup, request)
1338            if cookie: cookies.append(cookie)
1339        return cookies
1340
1341    def _process_rfc2109_cookies(self, cookies):
1342        if self._policy.rfc2109_as_netscape is None:
1343            rfc2109_as_netscape = not self._policy.rfc2965
1344        else:
1345            rfc2109_as_netscape = self._policy.rfc2109_as_netscape
1346        for cookie in cookies:
1347            if cookie.version == 1:
1348                cookie.rfc2109 = True
1349                if rfc2109_as_netscape:
1350                    # treat 2109 cookies as Netscape cookies rather than
1351                    # as RFC2965 cookies
1352                    cookie.version = 0
1353
1354    def make_cookies(self, response, request):
1355        """Return sequence of Cookie objects extracted from response object.
1356
1357        See extract_cookies.__doc__ for the interfaces required of the
1358        response and request arguments.
1359
1360        """
1361        # get cookie-attributes for RFC 2965 and Netscape protocols
1362        headers = response.info()
1363        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1364        ns_hdrs = headers.getheaders("Set-Cookie")
1365
1366        rfc2965 = self._policy.rfc2965
1367        netscape = self._policy.netscape
1368
1369        if ((not rfc2965_hdrs and not ns_hdrs) or
1370            (not ns_hdrs and not rfc2965) or
1371            (not rfc2965_hdrs and not netscape) or
1372            (not netscape and not rfc2965)):
1373            return []  # no relevant cookie headers: quick exit
1374
1375        try:
1376            cookies = self._cookies_from_attrs_set(
1377                split_header_words(rfc2965_hdrs), request)
1378        except:
1379            reraise_unmasked_exceptions()
1380            cookies = []
1381
1382        if ns_hdrs and netscape:
1383            try:
1384                # RFC 2109 and Netscape cookies
1385                ns_cookies = self._cookies_from_attrs_set(
1386                    parse_ns_headers(ns_hdrs), request)
1387            except:
1388                reraise_unmasked_exceptions()
1389                ns_cookies = []
1390            self._process_rfc2109_cookies(ns_cookies)
1391
1392            # Look for Netscape cookies (from Set-Cookie headers) that match
1393            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1394            # For each match, keep the RFC 2965 cookie and ignore the Netscape
1395            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
1396            # bundled in with the Netscape cookies for this purpose, which is
1397            # reasonable behaviour.
1398            if rfc2965:
1399                lookup = {}
1400                for cookie in cookies:
1401                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
1402
1403                def no_matching_rfc2965(ns_cookie, lookup=lookup):
1404                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1405                    return not lookup.has_key(key)
1406                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1407
1408            if ns_cookies:
1409                cookies.extend(ns_cookies)
1410
1411        return cookies
1412
1413    def set_cookie_if_ok(self, cookie, request):
1414        """Set a cookie if policy says it's OK to do so.
1415
1416        cookie: mechanize.Cookie instance
1417        request: see extract_cookies.__doc__ for the required interface
1418
1419        """
1420        self._policy._now = self._now = int(time.time())
1421
1422        if self._policy.set_ok(cookie, request):
1423            self.set_cookie(cookie)
1424
1425    def set_cookie(self, cookie):
1426        """Set a cookie, without checking whether or not it should be set.
1427
1428        cookie: mechanize.Cookie instance
1429        """
1430        c = self._cookies
1431        if not c.has_key(cookie.domain): c[cookie.domain] = {}
1432        c2 = c[cookie.domain]
1433        if not c2.has_key(cookie.path): c2[cookie.path] = {}
1434        c3 = c2[cookie.path]
1435        c3[cookie.name] = cookie
1436
1437    def extract_cookies(self, response, request):
1438        """Extract cookies from response, where allowable given the request.
1439
1440        Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
1441        object passed as argument.  Any of these headers that are found are
1442        used to update the state of the object (subject to the policy.set_ok
1443        method's approval).
1444
1445        The response object (usually be the result of a call to
1446        mechanize.urlopen, or similar) should support an info method, which
1447        returns a mimetools.Message object (in fact, the 'mimetools.Message
1448        object' may be any object that provides a getallmatchingheaders
1449        method).
1450
1451        The request object (usually a urllib2.Request instance) must support
1452        the methods get_full_url and get_host, as documented by urllib2, and
1453        the port attribute (the port number).  The request is used to set
1454        default values for cookie-attributes as well as for checking that the
1455        cookie is OK to be set.
1456
1457        """
1458        debug("extract_cookies: %s", response.info())
1459        self._policy._now = self._now = int(time.time())
1460
1461        for cookie in self.make_cookies(response, request):
1462            if self._policy.set_ok(cookie, request):
1463                debug(" setting cookie: %s", cookie)
1464                self.set_cookie(cookie)
1465
1466    def clear(self, domain=None, path=None, name=None):
1467        """Clear some cookies.
1468
1469        Invoking this method without arguments will clear all cookies.  If
1470        given a single argument, only cookies belonging to that domain will be
1471        removed.  If given two arguments, cookies belonging to the specified
1472        path within that domain are removed.  If given three arguments, then
1473        the cookie with the specified name, path and domain is removed.
1474
1475        Raises KeyError if no matching cookie exists.
1476
1477        """
1478        if name is not None:
1479            if (domain is None) or (path is None):
1480                raise ValueError(
1481                    "domain and path must be given to remove a cookie by name")
1482            del self._cookies[domain][path][name]
1483        elif path is not None:
1484            if domain is None:
1485                raise ValueError(
1486                    "domain must be given to remove cookies by path")
1487            del self._cookies[domain][path]
1488        elif domain is not None:
1489            del self._cookies[domain]
1490        else:
1491            self._cookies = {}
1492
1493    def clear_session_cookies(self):
1494        """Discard all session cookies.
1495
1496        Discards all cookies held by object which had either no Max-Age or
1497        Expires cookie-attribute or an explicit Discard cookie-attribute, or
1498        which otherwise have ended up with a true discard attribute.  For
1499        interactive browsers, the end of a session usually corresponds to
1500        closing the browser window.
1501
1502        Note that the save method won't save session cookies anyway, unless you
1503        ask otherwise by passing a true ignore_discard argument.
1504
1505        """
1506        for cookie in self:
1507            if cookie.discard:
1508                self.clear(cookie.domain, cookie.path, cookie.name)
1509
1510    def clear_expired_cookies(self):
1511        """Discard all expired cookies.
1512
1513        You probably don't need to call this method: expired cookies are never
1514        sent back to the server (provided you're using DefaultCookiePolicy),
1515        this method is called by CookieJar itself every so often, and the save
1516        method won't save expired cookies anyway (unless you ask otherwise by
1517        passing a true ignore_expires argument).
1518
1519        """
1520        now = time.time()
1521        for cookie in self:
1522            if cookie.is_expired(now):
1523                self.clear(cookie.domain, cookie.path, cookie.name)
1524
1525    def __getitem__(self, i):
1526        if i == 0:
1527            self._getitem_iterator = self.__iter__()
1528        elif self._prev_getitem_index != i-1: raise IndexError(
1529            "CookieJar.__getitem__ only supports sequential iteration")
1530        self._prev_getitem_index = i
1531        try:
1532            return self._getitem_iterator.next()
1533        except StopIteration:
1534            raise IndexError()
1535
1536    def __iter__(self):
1537        return MappingIterator(self._cookies)
1538
1539    def __len__(self):
1540        """Return number of contained cookies."""
1541        i = 0
1542        for cookie in self: i = i + 1
1543        return i
1544
1545    def __repr__(self):
1546        r = []
1547        for cookie in self: r.append(repr(cookie))
1548        return "<%s[%s]>" % (self.__class__, ", ".join(r))
1549
1550    def __str__(self):
1551        r = []
1552        for cookie in self: r.append(str(cookie))
1553        return "<%s[%s]>" % (self.__class__, ", ".join(r))
1554
1555
1556class LoadError(Exception): pass
1557
1558class FileCookieJar(CookieJar):
1559    """CookieJar that can be loaded from and saved to a file.
1560
1561    Additional methods
1562
1563    save(filename=None, ignore_discard=False, ignore_expires=False)
1564    load(filename=None, ignore_discard=False, ignore_expires=False)
1565    revert(filename=None, ignore_discard=False, ignore_expires=False)
1566
1567    Additional public attributes
1568
1569    filename: filename for loading and saving cookies
1570
1571    Additional public readable attributes
1572
1573    delayload: request that cookies are lazily loaded from disk; this is only
1574     a hint since this only affects performance, not behaviour (unless the
1575     cookies on disk are changing); a CookieJar object may ignore it (in fact,
1576     only MSIECookieJar lazily loads cookies at the moment)
1577
1578    """
1579
1580    def __init__(self, filename=None, delayload=False, policy=None):
1581        """
1582        See FileCookieJar.__doc__ for argument documentation.
1583
1584        Cookies are NOT loaded from the named file until either the load or
1585        revert method is called.
1586
1587        """
1588        CookieJar.__init__(self, policy)
1589        if filename is not None and not isstringlike(filename):
1590            raise ValueError("filename must be string-like")
1591        self.filename = filename
1592        self.delayload = bool(delayload)
1593
1594    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1595        """Save cookies to a file.
1596
1597        filename: name of file in which to save cookies
1598        ignore_discard: save even cookies set to be discarded
1599        ignore_expires: save even cookies that have expired
1600
1601        The file is overwritten if it already exists, thus wiping all its
1602        cookies.  Saved cookies can be restored later using the load or revert
1603        methods.  If filename is not specified, self.filename is used; if
1604        self.filename is None, ValueError is raised.
1605
1606        """
1607        raise NotImplementedError()
1608
1609    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1610        """Load cookies from a file.
1611
1612        Old cookies are kept unless overwritten by newly loaded ones.
1613
1614        Arguments are as for .save().
1615
1616        If filename is not specified, self.filename is used; if self.filename
1617        is None, ValueError is raised.  The named file must be in the format
1618        understood by the class, or LoadError will be raised.  This format will
1619        be identical to that written by the save method, unless the load format
1620        is not sufficiently well understood (as is the case for MSIECookieJar).
1621
1622        """
1623        if filename is None:
1624            if self.filename is not None: filename = self.filename
1625            else: raise ValueError(MISSING_FILENAME_TEXT)
1626
1627        f = open(filename)
1628        try:
1629            self._really_load(f, filename, ignore_discard, ignore_expires)
1630        finally:
1631            f.close()
1632
1633    def revert(self, filename=None,
1634               ignore_discard=False, ignore_expires=False):
1635        """Clear all cookies and reload cookies from a saved file.
1636
1637        Raises LoadError (or IOError) if reversion is not successful; the
1638        object's state will not be altered if this happens.
1639
1640        """
1641        if filename is None:
1642            if self.filename is not None: filename = self.filename
1643            else: raise ValueError(MISSING_FILENAME_TEXT)
1644
1645        old_state = copy.deepcopy(self._cookies)
1646        self._cookies = {}
1647        try:
1648            self.load(filename, ignore_discard, ignore_expires)
1649        except (LoadError, IOError):
1650            self._cookies = old_state
1651            raise
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。