root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_opener.py @ 3

リビジョン 3, 13.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""Integration with Python standard library module urllib2: OpenerDirector
2class.
3
4Copyright 2004-2006 John J Lee <jjl@pobox.com>
5
6This code is free software; you can redistribute it and/or modify it
7under the terms of the BSD or ZPL 2.1 licenses (see the file
8COPYING.txt included with the distribution).
9
10"""
11
12import os, urllib2, bisect, urllib, httplib, types, tempfile
13try:
14    import threading as _threading
15except ImportError:
16    import dummy_threading as _threading
17try:
18    set
19except NameError:
20    import sets
21    set = sets.Set
22
23import _http
24import _upgrade
25import _rfc3986
26import _response
27from _util import isstringlike
28from _request import Request
29
30
31class ContentTooShortError(urllib2.URLError):
32    def __init__(self, reason, result):
33        urllib2.URLError.__init__(self, reason)
34        self.result = result
35
36
37class OpenerDirector(urllib2.OpenerDirector):
38    def __init__(self):
39        urllib2.OpenerDirector.__init__(self)
40        # really none of these are (sanely) public -- the lack of initial
41        # underscore on some is just due to following urllib2
42        self.process_response = {}
43        self.process_request = {}
44        self._any_request = {}
45        self._any_response = {}
46        self._handler_index_valid = True
47        self._tempfiles = []
48
49    def add_handler(self, handler):
50        if handler in self.handlers:
51            return
52        # XXX why does self.handlers need to be sorted?
53        bisect.insort(self.handlers, handler)
54        handler.add_parent(self)
55        self._handler_index_valid = False
56
57    def _maybe_reindex_handlers(self):
58        if self._handler_index_valid:
59            return
60
61        handle_error = {}
62        handle_open = {}
63        process_request = {}
64        process_response = {}
65        any_request = set()
66        any_response = set()
67        unwanted = []
68
69        for handler in self.handlers:
70            added = False
71            for meth in dir(handler):
72                if meth in ["redirect_request", "do_open", "proxy_open"]:
73                    # oops, coincidental match
74                    continue
75
76                if meth == "any_request":
77                    any_request.add(handler)
78                    added = True
79                    continue
80                elif meth == "any_response":
81                    any_response.add(handler)
82                    added = True
83                    continue
84
85                ii = meth.find("_")
86                scheme = meth[:ii]
87                condition = meth[ii+1:]
88
89                if condition.startswith("error"):
90                    jj = meth[ii+1:].find("_") + ii + 1
91                    kind = meth[jj+1:]
92                    try:
93                        kind = int(kind)
94                    except ValueError:
95                        pass
96                    lookup = handle_error.setdefault(scheme, {})
97                elif condition == "open":
98                    kind = scheme
99                    lookup = handle_open
100                elif condition == "request":
101                    kind = scheme
102                    lookup = process_request
103                elif condition == "response":
104                    kind = scheme
105                    lookup = process_response
106                else:
107                    continue
108
109                lookup.setdefault(kind, set()).add(handler)
110                added = True
111
112            if not added:
113                unwanted.append(handler)
114
115        for handler in unwanted:
116            self.handlers.remove(handler)
117
118        # sort indexed methods
119        # XXX could be cleaned up
120        for lookup in [process_request, process_response]:
121            for scheme, handlers in lookup.iteritems():
122                lookup[scheme] = handlers
123        for scheme, lookup in handle_error.iteritems():
124            for code, handlers in lookup.iteritems():
125                handlers = list(handlers)
126                handlers.sort()
127                lookup[code] = handlers
128        for scheme, handlers in handle_open.iteritems():
129            handlers = list(handlers)
130            handlers.sort()
131            handle_open[scheme] = handlers
132
133        # cache the indexes
134        self.handle_error = handle_error
135        self.handle_open = handle_open
136        self.process_request = process_request
137        self.process_response = process_response
138        self._any_request = any_request
139        self._any_response = any_response
140
141    def _request(self, url_or_req, data, visit):
142        if isstringlike(url_or_req):
143            req = Request(url_or_req, data, visit=visit)
144        else:
145            # already a urllib2.Request or mechanize.Request instance
146            req = url_or_req
147            if data is not None:
148                req.add_data(data)
149            # XXX yuck, give request a .visit attribute if it doesn't have one
150            try:
151                req.visit
152            except AttributeError:
153                req.visit = None
154            if visit is not None:
155                req.visit = visit
156        return req
157
158    def open(self, fullurl, data=None):
159        req = self._request(fullurl, data, None)
160        req_scheme = req.get_type()
161
162        self._maybe_reindex_handlers()
163
164        # pre-process request
165        # XXX should we allow a Processor to change the URL scheme
166        #   of the request?
167        request_processors = set(self.process_request.get(req_scheme, []))
168        request_processors.update(self._any_request)
169        request_processors = list(request_processors)
170        request_processors.sort()
171        for processor in request_processors:
172            for meth_name in ["any_request", req_scheme+"_request"]:
173                meth = getattr(processor, meth_name, None)
174                if meth:
175                    req = meth(req)
176
177        # In Python >= 2.4, .open() supports processors already, so we must
178        # call ._open() instead.
179        urlopen = getattr(urllib2.OpenerDirector, "_open",
180                          urllib2.OpenerDirector.open)
181        response = urlopen(self, req, data)
182
183        # post-process response
184        response_processors = set(self.process_response.get(req_scheme, []))
185        response_processors.update(self._any_response)
186        response_processors = list(response_processors)
187        response_processors.sort()
188        for processor in response_processors:
189            for meth_name in ["any_response", req_scheme+"_response"]:
190                meth = getattr(processor, meth_name, None)
191                if meth:
192                    response = meth(req, response)
193
194        return response
195
196    def error(self, proto, *args):
197        if proto in ['http', 'https']:
198            # XXX http[s] protocols are special-cased
199            dict = self.handle_error['http'] # https is not different than http
200            proto = args[2]  # YUCK!
201            meth_name = 'http_error_%s' % proto
202            http_err = 1
203            orig_args = args
204        else:
205            dict = self.handle_error
206            meth_name = proto + '_error'
207            http_err = 0
208        args = (dict, proto, meth_name) + args
209        result = apply(self._call_chain, args)
210        if result:
211            return result
212
213        if http_err:
214            args = (dict, 'default', 'http_error_default') + orig_args
215            return apply(self._call_chain, args)
216
217    BLOCK_SIZE = 1024*8
218    def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
219        """Returns (filename, headers).
220
221        For remote objects, the default filename will refer to a temporary
222        file.  Temporary files are removed when the OpenerDirector.close()
223        method is called.
224
225        For file: URLs, at present the returned filename is None.  This may
226        change in future.
227
228        If the actual number of bytes read is less than indicated by the
229        Content-Length header, raises ContentTooShortError (a URLError
230        subclass).  The exception's .result attribute contains the (filename,
231        headers) that would have been returned.
232
233        """
234        req = self._request(fullurl, data, False)
235        scheme = req.get_type()
236        fp = self.open(req)
237        headers = fp.info()
238        if filename is None and scheme == 'file':
239            # XXX req.get_selector() seems broken here, return None,
240            #   pending sanity :-/
241            return None, headers
242            #return urllib.url2pathname(req.get_selector()), headers
243        if filename:
244            tfp = open(filename, 'wb')
245        else:
246            path = _rfc3986.urlsplit(fullurl)[2]
247            suffix = os.path.splitext(path)[1]
248            fd, filename = tempfile.mkstemp(suffix)
249            self._tempfiles.append(filename)
250            tfp = os.fdopen(fd, 'wb')
251
252        result = filename, headers
253        bs = self.BLOCK_SIZE
254        size = -1
255        read = 0
256        blocknum = 0
257        if reporthook:
258            if "content-length" in headers:
259                size = int(headers["Content-Length"])
260            reporthook(blocknum, bs, size)
261        while 1:
262            block = fp.read(bs)
263            if block == "":
264                break
265            read += len(block)
266            tfp.write(block)
267            blocknum += 1
268            if reporthook:
269                reporthook(blocknum, bs, size)
270        fp.close()
271        tfp.close()
272        del fp
273        del tfp
274
275        # raise exception if actual size does not match content-length header
276        if size >= 0 and read < size:
277            raise ContentTooShortError(
278                "retrieval incomplete: "
279                "got only %i out of %i bytes" % (read, size),
280                result
281                )
282
283        return result
284
285    def close(self):
286        urllib2.OpenerDirector.close(self)
287
288        # make it very obvious this object is no longer supposed to be used
289        self.open = self.error = self.retrieve = self.add_handler = None
290
291        if self._tempfiles:
292            for filename in self._tempfiles:
293                try:
294                    os.unlink(filename)
295                except OSError:
296                    pass
297            del self._tempfiles[:]
298
299
300def wrapped_open(urlopen, process_response_object, fullurl, data=None):
301    success = True
302    try:
303        response = urlopen(fullurl, data)
304    except urllib2.HTTPError, error:
305        success = False
306        if error.fp is None:  # not a response
307            raise
308        response = error
309
310    if response is not None:
311        response = process_response_object(response)
312
313    if not success:
314        raise response
315    return response
316
317class ResponseProcessingOpener(OpenerDirector):
318
319    def open(self, fullurl, data=None):
320        def bound_open(fullurl, data=None):
321            return OpenerDirector.open(self, fullurl, data)
322        return wrapped_open(
323            bound_open, self.process_response_object, fullurl, data)
324
325    def process_response_object(self, response):
326        return response
327
328
329class SeekableResponseOpener(ResponseProcessingOpener):
330    def process_response_object(self, response):
331        return _response.seek_wrapped_response(response)
332
333
334class OpenerFactory:
335    """This class's interface is quite likely to change."""
336
337    default_classes = [
338        # handlers
339        urllib2.ProxyHandler,
340        urllib2.UnknownHandler,
341        _http.HTTPHandler,  # derived from new AbstractHTTPHandler
342        _http.HTTPDefaultErrorHandler,
343        _http.HTTPRedirectHandler,  # bugfixed
344        urllib2.FTPHandler,
345        urllib2.FileHandler,
346        # processors
347        _upgrade.HTTPRequestUpgradeProcessor,
348        _http.HTTPCookieProcessor,
349        _http.HTTPErrorProcessor,
350        ]
351    if hasattr(httplib, 'HTTPS'):
352        default_classes.append(_http.HTTPSHandler)
353    handlers = []
354    replacement_handlers = []
355
356    def __init__(self, klass=OpenerDirector):
357        self.klass = klass
358
359    def build_opener(self, *handlers):
360        """Create an opener object from a list of handlers and processors.
361
362        The opener will use several default handlers and processors, including
363        support for HTTP and FTP.
364
365        If any of the handlers passed as arguments are subclasses of the
366        default handlers, the default handlers will not be used.
367
368        """
369        opener = self.klass()
370        default_classes = list(self.default_classes)
371        skip = []
372        for klass in default_classes:
373            for check in handlers:
374                if type(check) == types.ClassType:
375                    if issubclass(check, klass):
376                        skip.append(klass)
377                elif type(check) == types.InstanceType:
378                    if isinstance(check, klass):
379                        skip.append(klass)
380        for klass in skip:
381            default_classes.remove(klass)
382
383        for klass in default_classes:
384            opener.add_handler(klass())
385        for h in handlers:
386            if type(h) == types.ClassType:
387                h = h()
388            opener.add_handler(h)
389
390        return opener
391
392
393build_opener = OpenerFactory().build_opener
394
395_opener = None
396urlopen_lock = _threading.Lock()
397def urlopen(url, data=None):
398    global _opener
399    if _opener is None:
400        urlopen_lock.acquire()
401        try:
402            if _opener is None:
403                _opener = build_opener()
404        finally:
405            urlopen_lock.release()
406    return _opener.open(url, data)
407
408def urlretrieve(url, filename=None, reporthook=None, data=None):
409    global _opener
410    if _opener is None:
411        urlopen_lock.acquire()
412        try:
413            if _opener is None:
414                _opener = build_opener()
415        finally:
416            urlopen_lock.release()
417    return _opener.retrieve(url, filename, reporthook, data)
418
419def install_opener(opener):
420    global _opener
421    _opener = opener
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。