root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_useragent.py

リビジョン 3, 13.1 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""Convenient HTTP UserAgent class.
2
3This is a subclass of urllib2.OpenerDirector.
4
5
6Copyright 2003-2006 John J. Lee <jjl@pobox.com>
7
8This code is free software; you can redistribute it and/or modify it under
9the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
10included with the distribution).
11
12"""
13
14import sys, warnings, urllib2
15
16import _opener
17import _urllib2
18import _auth
19import _gzip
20import _response
21
22
23class UserAgentBase(_opener.OpenerDirector):
24    """Convenient user-agent class.
25
26    Do not use .add_handler() to add a handler for something already dealt with
27    by this code.
28
29    The only reason at present for the distinction between UserAgent and
30    UserAgentBase is so that classes that depend on .seek()able responses
31    (e.g. mechanize.Browser) can inherit from UserAgentBase.  The subclass
32    UserAgent exposes a .set_seekable_responses() method that allows switching
33    off the adding of a .seek() method to responses.
34
35    Public attributes:
36
37    addheaders: list of (name, value) pairs specifying headers to send with
38     every request, unless they are overridden in the Request instance.
39
40     >>> ua = UserAgentBase()
41     >>> ua.addheaders = [
42     ...  ("User-agent", "Mozilla/5.0 (compatible)"),
43     ...  ("From", "responsible.person@example.com")]
44
45    """
46
47    handler_classes = {
48        # scheme handlers
49        "http": _urllib2.HTTPHandler,
50        # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
51        "ftp": _urllib2.FTPHandler,
52        "file": _urllib2.FileHandler,
53
54        # other handlers
55        "_unknown": _urllib2.UnknownHandler,
56        # HTTP{S,}Handler depend on HTTPErrorProcessor too
57        "_http_error": _urllib2.HTTPErrorProcessor,
58        "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
59        "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
60
61        # feature handlers
62        "_basicauth": _urllib2.HTTPBasicAuthHandler,
63        "_digestauth": _urllib2.HTTPDigestAuthHandler,
64        "_redirect": _urllib2.HTTPRedirectHandler,
65        "_cookies": _urllib2.HTTPCookieProcessor,
66        "_refresh": _urllib2.HTTPRefreshProcessor,
67        "_equiv": _urllib2.HTTPEquivProcessor,
68        "_proxy": _urllib2.ProxyHandler,
69        "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
70        "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
71        "_robots": _urllib2.HTTPRobotRulesProcessor,
72        "_gzip": _gzip.HTTPGzipProcessor,  # experimental!
73
74        # debug handlers
75        "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
76        "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
77        }
78
79    default_schemes = ["http", "ftp", "file"]
80    default_others = ["_unknown", "_http_error", "_http_request_upgrade",
81                      "_http_default_error",
82                      ]
83    default_features = ["_redirect", "_cookies",
84                        "_refresh", "_equiv",
85                        "_basicauth", "_digestauth",
86                        "_proxy", "_proxy_basicauth", "_proxy_digestauth",
87                        "_robots",
88                        ]
89    if hasattr(_urllib2, 'HTTPSHandler'):
90        handler_classes["https"] = _urllib2.HTTPSHandler
91        default_schemes.append("https")
92
93    def __init__(self):
94        _opener.OpenerDirector.__init__(self)
95
96        ua_handlers = self._ua_handlers = {}
97        for scheme in (self.default_schemes+
98                       self.default_others+
99                       self.default_features):
100            klass = self.handler_classes[scheme]
101            ua_handlers[scheme] = klass()
102        for handler in ua_handlers.itervalues():
103            self.add_handler(handler)
104
105        # Yuck.
106        # Ensure correct default constructor args were passed to
107        # HTTPRefreshProcessor and HTTPEquivProcessor.
108        if "_refresh" in ua_handlers:
109            self.set_handle_refresh(True)
110        if "_equiv" in ua_handlers:
111            self.set_handle_equiv(True)
112        # Ensure default password managers are installed.
113        pm = ppm = None
114        if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
115            pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
116        if ("_proxy_basicauth" in ua_handlers or
117            "_proxy_digestauth" in ua_handlers):
118            ppm = _auth.HTTPProxyPasswordMgr()
119        self.set_password_manager(pm)
120        self.set_proxy_password_manager(ppm)
121        # set default certificate manager
122        if "https" in ua_handlers:
123            cm = _urllib2.HTTPSClientCertMgr()
124            self.set_client_cert_manager(cm)
125
126    def close(self):
127        _opener.OpenerDirector.close(self)
128        self._ua_handlers = None
129
130    # XXX
131##     def set_timeout(self, timeout):
132##         self._timeout = timeout
133##     def set_http_connection_cache(self, conn_cache):
134##         self._http_conn_cache = conn_cache
135##     def set_ftp_connection_cache(self, conn_cache):
136##         # XXX ATM, FTP has cache as part of handler; should it be separate?
137##         self._ftp_conn_cache = conn_cache
138
139    def set_handled_schemes(self, schemes):
140        """Set sequence of URL scheme (protocol) strings.
141
142        For example: ua.set_handled_schemes(["http", "ftp"])
143
144        If this fails (with ValueError) because you've passed an unknown
145        scheme, the set of handled schemes will not be changed.
146
147        """
148        want = {}
149        for scheme in schemes:
150            if scheme.startswith("_"):
151                raise ValueError("not a scheme '%s'" % scheme)
152            if scheme not in self.handler_classes:
153                raise ValueError("unknown scheme '%s'")
154            want[scheme] = None
155
156        # get rid of scheme handlers we don't want
157        for scheme, oldhandler in self._ua_handlers.items():
158            if scheme.startswith("_"): continue  # not a scheme handler
159            if scheme not in want:
160                self._replace_handler(scheme, None)
161            else:
162                del want[scheme]  # already got it
163        # add the scheme handlers that are missing
164        for scheme in want.keys():
165            self._set_handler(scheme, True)
166
167    def set_cookiejar(self, cookiejar):
168        """Set a mechanize.CookieJar, or None."""
169        self._set_handler("_cookies", obj=cookiejar)
170
171    # XXX could use Greg Stein's httpx for some of this instead?
172    # or httplib2??
173    def set_proxies(self, proxies):
174        """Set a dictionary mapping URL scheme to proxy specification, or None.
175
176        e.g. {"http": "joe:password@myproxy.example.com:3128",
177              "ftp": "proxy.example.com"}
178
179        """
180        self._set_handler("_proxy", obj=proxies)
181
182    def add_password(self, url, user, password, realm=None):
183        self._password_manager.add_password(realm, url, user, password)
184    def add_proxy_password(self, user, password, hostport=None, realm=None):
185        self._proxy_password_manager.add_password(
186            realm, hostport, user, password)
187
188    def add_client_certificate(self, url, key_file, cert_file):
189        """Add an SSL client certificate, for HTTPS client auth.
190
191        key_file and cert_file must be filenames of the key and certificate
192        files, in PEM format.  You can use e.g. OpenSSL to convert a p12 (PKCS
193        12) file to PEM format:
194
195        openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
196        openssl pkcs12 -nocerts -in cert.p12 -out key.pem
197
198
199        Note that client certificate password input is very inflexible ATM.  At
200        the moment this seems to be console only, which is presumably the
201        default behaviour of libopenssl.  In future mechanize may support
202        third-party libraries that (I assume) allow more options here.
203
204        """
205        self._client_cert_manager.add_key_cert(url, key_file, cert_file)
206
207    # the following are rarely useful -- use add_password / add_proxy_password
208    # instead
209    def set_password_manager(self, password_manager):
210        """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
211        self._password_manager = password_manager
212        self._set_handler("_basicauth", obj=password_manager)
213        self._set_handler("_digestauth", obj=password_manager)
214    def set_proxy_password_manager(self, password_manager):
215        """Set a mechanize.HTTPProxyPasswordMgr, or None."""
216        self._proxy_password_manager = password_manager
217        self._set_handler("_proxy_basicauth", obj=password_manager)
218        self._set_handler("_proxy_digestauth", obj=password_manager)
219    def set_client_cert_manager(self, cert_manager):
220        """Set a mechanize.HTTPClientCertMgr, or None."""
221        self._client_cert_manager = cert_manager
222        handler = self._ua_handlers["https"]
223        handler.client_cert_manager = cert_manager
224
225    # these methods all take a boolean parameter
226    def set_handle_robots(self, handle):
227        """Set whether to observe rules from robots.txt."""
228        self._set_handler("_robots", handle)
229    def set_handle_redirect(self, handle):
230        """Set whether to handle HTTP 30x redirections."""
231        self._set_handler("_redirect", handle)
232    def set_handle_refresh(self, handle, max_time=None, honor_time=True):
233        """Set whether to handle HTTP Refresh headers."""
234        self._set_handler("_refresh", handle, constructor_kwds=
235                          {"max_time": max_time, "honor_time": honor_time})
236    def set_handle_equiv(self, handle, head_parser_class=None):
237        """Set whether to treat HTML http-equiv headers like HTTP headers.
238
239        Response objects may be .seek()able if this is set (currently returned
240        responses are, raised HTTPError exception responses are not).
241
242        """
243        if head_parser_class is not None:
244            constructor_kwds = {"head_parser_class": head_parser_class}
245        else:
246            constructor_kwds={}
247        self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
248    def set_handle_gzip(self, handle):
249        """Handle gzip transfer encoding.
250
251        """
252        if handle:
253            warnings.warn(
254                "gzip transfer encoding is experimental!", stacklevel=2)
255        self._set_handler("_gzip", handle)
256    def set_debug_redirects(self, handle):
257        """Log information about HTTP redirects (including refreshes).
258
259        Logging is performed using module logging.  The logger name is
260        "mechanize.http_redirects".  To actually print some debug output,
261        eg:
262
263        import sys, logging
264        logger = logging.getLogger("mechanize.http_redirects")
265        logger.addHandler(logging.StreamHandler(sys.stdout))
266        logger.setLevel(logging.INFO)
267
268        Other logger names relevant to this module:
269
270        "mechanize.http_responses"
271        "mechanize.cookies" (or "cookielib" if running Python 2.4)
272
273        To turn on everything:
274
275        import sys, logging
276        logger = logging.getLogger("mechanize")
277        logger.addHandler(logging.StreamHandler(sys.stdout))
278        logger.setLevel(logging.INFO)
279
280        """
281        self._set_handler("_debug_redirect", handle)
282    def set_debug_responses(self, handle):
283        """Log HTTP response bodies.
284
285        See docstring for .set_debug_redirects() for details of logging.
286
287        Response objects may be .seek()able if this is set (currently returned
288        responses are, raised HTTPError exception responses are not).
289
290        """
291        self._set_handler("_debug_response_body", handle)
292    def set_debug_http(self, handle):
293        """Print HTTP headers to sys.stdout."""
294        level = int(bool(handle))
295        for scheme in "http", "https":
296            h = self._ua_handlers.get(scheme)
297            if h is not None:
298                h.set_http_debuglevel(level)
299
300    def _set_handler(self, name, handle=None, obj=None,
301                     constructor_args=(), constructor_kwds={}):
302        if handle is None:
303            handle = obj is not None
304        if handle:
305            handler_class = self.handler_classes[name]
306            if obj is not None:
307                newhandler = handler_class(obj)
308            else:
309                newhandler = handler_class(*constructor_args, **constructor_kwds)
310        else:
311            newhandler = None
312        self._replace_handler(name, newhandler)
313
314    def _replace_handler(self, name, newhandler=None):
315        # first, if handler was previously added, remove it
316        if name is not None:
317            handler = self._ua_handlers.get(name)
318            if handler:
319                try:
320                    self.handlers.remove(handler)
321                except ValueError:
322                    pass
323        # then add the replacement, if any
324        if newhandler is not None:
325            self.add_handler(newhandler)
326            self._ua_handlers[name] = newhandler
327
328
329class UserAgent(UserAgentBase):
330
331    def __init__(self):
332        UserAgentBase.__init__(self)
333        self._seekable = False
334
335    def set_seekable_responses(self, handle):
336        """Make response objects .seek()able."""
337        self._seekable = bool(handle)
338
339    def open(self, fullurl, data=None):
340        if self._seekable:
341            def bound_open(fullurl, data=None):
342                return UserAgentBase.open(self, fullurl, data)
343            response = _opener.wrapped_open(
344                bound_open, _response.seek_wrapped_response, fullurl, data)
345        else:
346            response = UserAgentBase.open(self, fullurl, data)
347        return response
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。