root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_msiecookiejar.py @ 3

リビジョン 3, 14.3 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""Microsoft Internet Explorer cookie loading on Windows.
2
3Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
4Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
5
6This code is free software; you can redistribute it and/or modify it
7under the terms of the BSD or ZPL 2.1 licenses (see the file
8COPYING.txt included with the distribution).
9
10"""
11
12# XXX names and comments are not great here
13
14import os, re, time, struct, logging
15if os.name == "nt":
16    import _winreg
17
18from _clientcookie import FileCookieJar, CookieJar, Cookie, \
19     MISSING_FILENAME_TEXT, LoadError
20
21debug = logging.getLogger("mechanize").debug
22
23
24def regload(path, leaf):
25    key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
26                          _winreg.KEY_ALL_ACCESS)
27    try:
28        value = _winreg.QueryValueEx(key, leaf)[0]
29    except WindowsError:
30        value = None
31    return value
32
33WIN32_EPOCH = 0x019db1ded53e8000L  # 1970 Jan 01 00:00:00 in Win32 FILETIME
34
35def epoch_time_offset_from_win32_filetime(filetime):
36    """Convert from win32 filetime to seconds-since-epoch value.
37
38    MSIE stores create and expire times as Win32 FILETIME, which is 64
39    bits of 100 nanosecond intervals since Jan 01 1601.
40
41    mechanize expects time in 32-bit value expressed in seconds since the
42    epoch (Jan 01 1970).
43
44    """
45    if filetime < WIN32_EPOCH:
46        raise ValueError("filetime (%d) is before epoch (%d)" %
47                         (filetime, WIN32_EPOCH))
48
49    return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
50
51def binary_to_char(c): return "%02X" % ord(c)
52def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
53
54class MSIEBase:
55    magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
56    padding = "\x0d\xf0\xad\x0b"
57
58    msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
59    cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
60                           "(.+\@[\x21-\xFF]+\.txt)")
61
62    # path under HKEY_CURRENT_USER from which to get location of index.dat
63    reg_path = r"software\microsoft\windows" \
64               r"\currentversion\explorer\shell folders"
65    reg_key = "Cookies"
66
67    def __init__(self):
68        self._delayload_domains = {}
69
70    def _delayload_domain(self, domain):
71        # if necessary, lazily load cookies for this domain
72        delayload_info = self._delayload_domains.get(domain)
73        if delayload_info is not None:
74            cookie_file, ignore_discard, ignore_expires = delayload_info
75            try:
76                self.load_cookie_data(cookie_file,
77                                      ignore_discard, ignore_expires)
78            except (LoadError, IOError):
79                debug("error reading cookie file, skipping: %s", cookie_file)
80            else:
81                del self._delayload_domains[domain]
82
83    def _load_cookies_from_file(self, filename):
84        debug("Loading MSIE cookies file: %s", filename)
85        cookies = []
86
87        cookies_fh = open(filename)
88
89        try:
90            while 1:
91                key = cookies_fh.readline()
92                if key == "": break
93
94                rl = cookies_fh.readline
95                def getlong(rl=rl): return long(rl().rstrip())
96                def getstr(rl=rl): return rl().rstrip()
97
98                key = key.rstrip()
99                value = getstr()
100                domain_path = getstr()
101                flags = getlong()  # 0x2000 bit is for secure I think
102                lo_expire = getlong()
103                hi_expire = getlong()
104                lo_create = getlong()
105                hi_create = getlong()
106                sep = getstr()
107
108                if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
109                          hi_create, lo_create, sep) or (sep != "*"):
110                    break
111
112                m = self.msie_domain_re.search(domain_path)
113                if m:
114                    domain = m.group(1)
115                    path = m.group(2)
116
117                    cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
118                                    "PATH": path, "FLAGS": flags, "HIXP": hi_expire,
119                                    "LOXP": lo_expire, "HICREATE": hi_create,
120                                    "LOCREATE": lo_create})
121        finally:
122            cookies_fh.close()
123
124        return cookies
125
126    def load_cookie_data(self, filename,
127                         ignore_discard=False, ignore_expires=False):
128        """Load cookies from file containing actual cookie data.
129
130        Old cookies are kept unless overwritten by newly loaded ones.
131
132        You should not call this method if the delayload attribute is set.
133
134        I think each of these files contain all cookies for one user, domain,
135        and path.
136
137        filename: file containing cookies -- usually found in a file like
138         C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
139
140        """
141        now = int(time.time())
142
143        cookie_data = self._load_cookies_from_file(filename)
144
145        for cookie in cookie_data:
146            flags = cookie["FLAGS"]
147            secure = ((flags & 0x2000) != 0)
148            filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
149            expires = epoch_time_offset_from_win32_filetime(filetime)
150            if expires < now:
151                discard = True
152            else:
153                discard = False
154            domain = cookie["DOMAIN"]
155            initial_dot = domain.startswith(".")
156            if initial_dot:
157                domain_specified = True
158            else:
159                # MSIE 5 does not record whether the domain cookie-attribute
160                # was specified.
161                # Assuming it wasn't is conservative, because with strict
162                # domain matching this will match less frequently; with regular
163                # Netscape tail-matching, this will match at exactly the same
164                # times that domain_specified = True would.  It also means we
165                # don't have to prepend a dot to achieve consistency with our
166                # own & Mozilla's domain-munging scheme.
167                domain_specified = False
168
169            # assume path_specified is false
170            # XXX is there other stuff in here? -- eg. comment, commentURL?
171            c = Cookie(0,
172                       cookie["KEY"], cookie["VALUE"],
173                       None, False,
174                       domain, domain_specified, initial_dot,
175                       cookie["PATH"], False,
176                       secure,
177                       expires,
178                       discard,
179                       None,
180                       None,
181                       {"flags": flags})
182            if not ignore_discard and c.discard:
183                continue
184            if not ignore_expires and c.is_expired(now):
185                continue
186            CookieJar.set_cookie(self, c)
187
188    def load_from_registry(self, ignore_discard=False, ignore_expires=False,
189                           username=None):
190        """
191        username: only required on win9x
192
193        """
194        cookies_dir = regload(self.reg_path, self.reg_key)
195        filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
196        self.load(filename, ignore_discard, ignore_expires, username)
197
198    def _really_load(self, index, filename, ignore_discard, ignore_expires,
199                     username):
200        now = int(time.time())
201
202        if username is None:
203            username = os.environ['USERNAME'].lower()
204
205        cookie_dir = os.path.dirname(filename)
206
207        data = index.read(256)
208        if len(data) != 256:
209            raise LoadError("%s file is too short" % filename)
210
211        # Cookies' index.dat file starts with 32 bytes of signature
212        # followed by an offset to the first record, stored as a little-
213        # endian DWORD.
214        sig, size, data = data[:32], data[32:36], data[36:]
215        size = struct.unpack("<L", size)[0]
216
217        # check that sig is valid
218        if not self.magic_re.match(sig) or size != 0x4000:
219            raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
220                          (str(filename), sig, size))
221
222        # skip to start of first record
223        index.seek(size, 0)
224
225        sector = 128  # size of sector in bytes
226
227        while 1:
228            data = ""
229
230            # Cookies are usually in two contiguous sectors, so read in two
231            # sectors and adjust if not a Cookie.
232            to_read = 2 * sector
233            d = index.read(to_read)
234            if len(d) != to_read:
235                break
236            data = data + d
237
238            # Each record starts with a 4-byte signature and a count
239            # (little-endian DWORD) of sectors for the record.
240            sig, size, data = data[:4], data[4:8], data[8:]
241            size = struct.unpack("<L", size)[0]
242
243            to_read = (size - 2) * sector
244
245##             from urllib import quote
246##             print "data", quote(data)
247##             print "sig", quote(sig)
248##             print "size in sectors", size
249##             print "size in bytes", size*sector
250##             print "size in units of 16 bytes", (size*sector) / 16
251##             print "size to read in bytes", to_read
252##             print
253
254            if sig != "URL ":
255                assert (sig in ("HASH", "LEAK",
256                                self.padding, "\x00\x00\x00\x00"),
257                        "unrecognized MSIE index.dat record: %s" %
258                        binary_to_str(sig))
259                if sig == "\x00\x00\x00\x00":
260                    # assume we've got all the cookies, and stop
261                    break
262                if sig == self.padding:
263                    continue
264                # skip the rest of this record
265                assert to_read >= 0
266                if size != 2:
267                    assert to_read != 0
268                    index.seek(to_read, 1)
269                continue
270
271            # read in rest of record if necessary
272            if size > 2:
273                more_data = index.read(to_read)
274                if len(more_data) != to_read: break
275                data = data + more_data
276
277            cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
278                         "(%s\@[\x21-\xFF]+\.txt)" % username)
279            m = re.search(cookie_re, data, re.I)
280            if m:
281                cookie_file = os.path.join(cookie_dir, m.group(2))
282                if not self.delayload:
283                    try:
284                        self.load_cookie_data(cookie_file,
285                                              ignore_discard, ignore_expires)
286                    except (LoadError, IOError):
287                        debug("error reading cookie file, skipping: %s",
288                              cookie_file)
289                else:
290                    domain = m.group(1)
291                    i = domain.find("/")
292                    if i != -1:
293                        domain = domain[:i]
294
295                    self._delayload_domains[domain] = (
296                        cookie_file, ignore_discard, ignore_expires)
297
298
299class MSIECookieJar(MSIEBase, FileCookieJar):
300    """FileCookieJar that reads from the Windows MSIE cookies database.
301
302    MSIECookieJar can read the cookie files of Microsoft Internet Explorer
303    (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
304    Windows 98.  Other configurations may also work, but are untested.  Saving
305    cookies in MSIE format is NOT supported.  If you save cookies, they'll be
306    in the usual Set-Cookie3 format, which you can read back in using an
307    instance of the plain old CookieJar class.  Don't save using the same
308    filename that you loaded cookies from, because you may succeed in
309    clobbering your MSIE cookies index file!
310
311    You should be able to have LWP share Internet Explorer's cookies like
312    this (note you need to supply a username to load_from_registry if you're on
313    Windows 9x or Windows ME):
314
315    cj = MSIECookieJar(delayload=1)
316    # find cookies index file in registry and load cookies from it
317    cj.load_from_registry()
318    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
319    response = opener.open("http://example.com/")
320
321    Iterating over a delayloaded MSIECookieJar instance will not cause any
322    cookies to be read from disk.  To force reading of all cookies from disk,
323    call read_all_cookies.  Note that the following methods iterate over self:
324    clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
325    and as_string.
326
327    Additional methods:
328
329    load_from_registry(ignore_discard=False, ignore_expires=False,
330                       username=None)
331    load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
332    read_all_cookies()
333
334    """
335    def __init__(self, filename=None, delayload=False, policy=None):
336        MSIEBase.__init__(self)
337        FileCookieJar.__init__(self, filename, delayload, policy)
338
339    def set_cookie(self, cookie):
340        if self.delayload:
341            self._delayload_domain(cookie.domain)
342        CookieJar.set_cookie(self, cookie)
343
344    def _cookies_for_request(self, request):
345        """Return a list of cookies to be returned to server."""
346        domains = self._cookies.copy()
347        domains.update(self._delayload_domains)
348        domains = domains.keys()
349
350        cookies = []
351        for domain in domains:
352            cookies.extend(self._cookies_for_domain(domain, request))
353        return cookies
354
355    def _cookies_for_domain(self, domain, request):
356        if not self._policy.domain_return_ok(domain, request):
357            return []
358        debug("Checking %s for cookies to return", domain)
359        if self.delayload:
360            self._delayload_domain(domain)
361        return CookieJar._cookies_for_domain(self, domain, request)
362
363    def read_all_cookies(self):
364        """Eagerly read in all cookies."""
365        if self.delayload:
366            for domain in self._delayload_domains.keys():
367                self._delayload_domain(domain)
368
369    def load(self, filename, ignore_discard=False, ignore_expires=False,
370             username=None):
371        """Load cookies from an MSIE 'index.dat' cookies index file.
372
373        filename: full path to cookie index file
374        username: only required on win9x
375
376        """
377        if filename is None:
378            if self.filename is not None: filename = self.filename
379            else: raise ValueError(MISSING_FILENAME_TEXT)
380
381        index = open(filename, "rb")
382
383        try:
384            self._really_load(index, filename, ignore_discard, ignore_expires,
385                              username)
386        finally:
387            index.close()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。