root/galaxy-central/eggs/Paste-1.6-py2.6.egg/paste/urlparser.py

リビジョン 3, 26.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3"""
4WSGI applications that parse the URL and dispatch to on-disk resources
5"""
6
7import os
8import sys
9import imp
10import mimetypes
11try:
12    import pkg_resources
13except ImportError:
14    pkg_resources = None
15from paste import request
16from paste import fileapp
17from paste.util import import_string
18from paste import httpexceptions
19from httpheaders import ETAG
20from paste.util import converters
21
22class NoDefault(object):
23    pass
24
25__all__ = ['URLParser', 'StaticURLParser', 'PkgResourcesParser']
26
27class URLParser(object):
28
29    """
30    WSGI middleware
31
32    Application dispatching, based on URL.  An instance of `URLParser` is
33    an application that loads and delegates to other applications.  It
34    looks for files in its directory that match the first part of
35    PATH_INFO; these may have an extension, but are not required to have
36    one, in which case the available files are searched to find the
37    appropriate file.  If it is ambiguous, a 404 is returned and an error
38    logged.
39
40    By default there is a constructor for .py files that loads the module,
41    and looks for an attribute ``application``, which is a ready
42    application object, or an attribute that matches the module name,
43    which is a factory for building applications, and is called with no
44    arguments.
45
46    URLParser will also look in __init__.py for special overrides.
47    These overrides are:
48
49    ``urlparser_hook(environ)``
50        This can modify the environment.  Its return value is ignored,
51        and it cannot be used to change the response in any way.  You
52        *can* use this, for example, to manipulate SCRIPT_NAME/PATH_INFO
53        (try to keep them consistent with the original URL -- but
54        consuming PATH_INFO and moving that to SCRIPT_NAME is ok).
55
56    ``urlparser_wrap(environ, start_response, app)``:
57        After URLParser finds the application, it calls this function
58        (if present).  If this function doesn't call
59        ``app(environ, start_response)`` then the application won't be
60        called at all!  This can be used to allocate resources (with
61        ``try:finally:``) or otherwise filter the output of the
62        application.
63
64    ``not_found_hook(environ, start_response)``:
65        If no file can be found (*in this directory*) to match the
66        request, then this WSGI application will be called.  You can
67        use this to change the URL and pass the request back to
68        URLParser again, or on to some other application.  This
69        doesn't catch all ``404 Not Found`` responses, just missing
70        files.
71
72    ``application(environ, start_response)``:
73        This basically overrides URLParser completely, and the given
74        application is used for all requests.  ``urlparser_wrap`` and
75        ``urlparser_hook`` are still called, but the filesystem isn't
76        searched in any way.
77    """
78
79    parsers_by_directory = {}
80
81    # This is lazily initialized
82    init_module = NoDefault
83
84    global_constructors = {}
85
86    def __init__(self, global_conf,
87                 directory, base_python_name,
88                 index_names=NoDefault,
89                 hide_extensions=NoDefault,
90                 ignore_extensions=NoDefault,
91                 constructors=None,
92                 **constructor_conf):
93        """
94        Create a URLParser object that looks at `directory`.
95        `base_python_name` is the package that this directory
96        represents, thus any Python modules in this directory will
97        be given names under this package.
98        """
99        if global_conf:
100            import warnings
101            warnings.warn(
102                'The global_conf argument to URLParser is deprecated; '
103                'either pass in None or {}, or use make_url_parser',
104                DeprecationWarning)
105        else:
106            global_conf = {}
107        if os.path.sep != '/':
108            directory = directory.replace(os.path.sep, '/')
109        self.directory = directory
110        self.base_python_name = base_python_name
111        # This logic here should be deprecated since it is in
112        # make_url_parser
113        if index_names is NoDefault:
114            index_names = global_conf.get(
115                'index_names', ('index', 'Index', 'main', 'Main'))
116        self.index_names = converters.aslist(index_names)
117        if hide_extensions is NoDefault:
118            hide_extensions = global_conf.get(
119                'hide_extensions', ('.pyc', '.bak', '.py~', '.pyo'))
120        self.hide_extensions = converters.aslist(hide_extensions)
121        if ignore_extensions is NoDefault:
122            ignore_extensions = global_conf.get(
123                'ignore_extensions', ())
124        self.ignore_extensions = converters.aslist(ignore_extensions)
125        self.constructors = self.global_constructors.copy()
126        if constructors:
127            self.constructors.update(constructors)
128        # @@: Should we also check the global options for constructors?
129        for name, value in constructor_conf.items():
130            if not name.startswith('constructor '):
131                raise ValueError(
132                    "Only extra configuration keys allowed are "
133                    "'constructor .ext = import_expr'; you gave %r "
134                    "(=%r)" % (name, value))
135            ext = name[len('constructor '):].strip()
136            if isinstance(value, (str, unicode)):
137                value = import_string.eval_import(value)
138            self.constructors[ext] = value
139
140    def __call__(self, environ, start_response):
141        environ['paste.urlparser.base_python_name'] = self.base_python_name
142        if self.init_module is NoDefault:
143            self.init_module = self.find_init_module(environ)
144        path_info = environ.get('PATH_INFO', '')
145        if not path_info:
146            return self.add_slash(environ, start_response)
147        if (self.init_module
148            and getattr(self.init_module, 'urlparser_hook', None)):
149            self.init_module.urlparser_hook(environ)
150        orig_path_info = environ['PATH_INFO']
151        orig_script_name = environ['SCRIPT_NAME']
152        application, filename = self.find_application(environ)
153        if not application:
154            if (self.init_module
155                and getattr(self.init_module, 'not_found_hook', None)
156                and environ.get('paste.urlparser.not_found_parser') is not self):
157                not_found_hook = self.init_module.not_found_hook
158                environ['paste.urlparser.not_found_parser'] = self
159                environ['PATH_INFO'] = orig_path_info
160                environ['SCRIPT_NAME'] = orig_script_name
161                return not_found_hook(environ, start_response)
162            if filename is None:
163                name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
164                if not name:
165                    name = 'one of %s' % ', '.join(
166                        self.index_names or
167                        ['(no index_names defined)'])
168
169                return self.not_found(
170                    environ, start_response,
171                    'Tried to load %s from directory %s'
172                    % (name, self.directory))
173            else:
174                environ['wsgi.errors'].write(
175                    'Found resource %s, but could not construct application\n'
176                    % filename)
177                return self.not_found(
178                    environ, start_response,
179                    'Tried to load %s from directory %s'
180                    % (filename, self.directory))
181        if (self.init_module
182            and getattr(self.init_module, 'urlparser_wrap', None)):
183            return self.init_module.urlparser_wrap(
184                environ, start_response, application)
185        else:
186            return application(environ, start_response)
187
188    def find_application(self, environ):
189        if (self.init_module
190            and getattr(self.init_module, 'application', None)
191            and not environ.get('paste.urlparser.init_application') == environ['SCRIPT_NAME']):
192            environ['paste.urlparser.init_application'] = environ['SCRIPT_NAME']
193            return self.init_module.application, None
194        name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
195        environ['PATH_INFO'] = rest_of_path
196        if name is not None:
197            environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name
198        if not name:
199            names = self.index_names
200            for index_name in names:
201                filename = self.find_file(environ, index_name)
202                if filename:
203                    break
204            else:
205                # None of the index files found
206                filename = None
207        else:
208            filename = self.find_file(environ, name)
209        if filename is None:
210            return None, filename
211        else:
212            return self.get_application(environ, filename), filename
213
214    def not_found(self, environ, start_response, debug_message=None):
215        exc = httpexceptions.HTTPNotFound(
216            'The resource at %s could not be found'
217            % request.construct_url(environ),
218            comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
219            % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
220               self.directory, debug_message or '(none)'))
221        return exc.wsgi_application(environ, start_response)
222
223    def add_slash(self, environ, start_response):
224        """
225        This happens when you try to get to a directory
226        without a trailing /
227        """
228        url = request.construct_url(environ, with_query_string=False)
229        url += '/'
230        if environ.get('QUERY_STRING'):
231            url += '?' + environ['QUERY_STRING']
232        exc = httpexceptions.HTTPMovedPermanently(
233            'The resource has moved to %s - you should be redirected '
234            'automatically.''' % url,
235            headers=[('location', url)])
236        return exc.wsgi_application(environ, start_response)
237
238    def find_file(self, environ, base_filename):
239        possible = []
240        """Cache a few values to reduce function call overhead"""
241        for filename in os.listdir(self.directory):
242            base, ext = os.path.splitext(filename)
243            full_filename = os.path.join(self.directory, filename)
244            if (ext in self.hide_extensions
245                or not base):
246                continue
247            if filename == base_filename:
248                possible.append(full_filename)
249                continue
250            if ext in self.ignore_extensions:
251                continue
252            if base == base_filename:
253                possible.append(full_filename)
254        if not possible:
255            #environ['wsgi.errors'].write(
256            #    'No file found matching %r in %s\n'
257            #    % (base_filename, self.directory))
258            return None
259        if len(possible) > 1:
260            # If there is an exact match, this isn't 'ambiguous'
261            # per se; it might mean foo.gif and foo.gif.back for
262            # instance
263            if full_filename in possible:
264                return full_filename
265            else:
266                environ['wsgi.errors'].write(
267                    'Ambiguous URL: %s; matches files %s\n'
268                    % (request.construct_url(environ),
269                       ', '.join(possible)))
270            return None
271        return possible[0]
272
273    def get_application(self, environ, filename):
274        if os.path.isdir(filename):
275            t = 'dir'
276        else:
277            t = os.path.splitext(filename)[1]
278        constructor = self.constructors.get(t, self.constructors.get('*'))
279        if constructor is None:
280            #environ['wsgi.errors'].write(
281            #    'No constructor found for %s\n' % t)
282            return constructor
283        app = constructor(self, environ, filename)
284        if app is None:
285            #environ['wsgi.errors'].write(
286            #    'Constructor %s return None for %s\n' %
287            #    (constructor, filename))
288            pass
289        return app
290
291    def register_constructor(cls, extension, constructor):
292        """
293        Register a function as a constructor.  Registered constructors
294        apply to all instances of `URLParser`.
295
296        The extension should have a leading ``.``, or the special
297        extensions ``dir`` (for directories) and ``*`` (a catch-all).
298
299        `constructor` must be a callable that takes two arguments:
300        ``environ`` and ``filename``, and returns a WSGI application.
301        """
302        d = cls.global_constructors
303        assert not d.has_key(extension), (
304            "A constructor already exists for the extension %r (%r) "
305            "when attemption to register constructor %r"
306            % (extension, d[extension], constructor))
307        d[extension] = constructor
308    register_constructor = classmethod(register_constructor)
309
310    def get_parser(self, directory, base_python_name):
311        """
312        Get a parser for the given directory, or create one if
313        necessary.  This way parsers can be cached and reused.
314
315        # @@: settings are inherited from the first caller
316        """
317        try:
318            return self.parsers_by_directory[(directory, base_python_name)]
319        except KeyError:
320            parser = self.__class__(
321                {},
322                directory, base_python_name,
323                index_names=self.index_names,
324                hide_extensions=self.hide_extensions,
325                ignore_extensions=self.ignore_extensions,
326                constructors=self.constructors)
327            self.parsers_by_directory[(directory, base_python_name)] = parser
328            return parser
329
330    def find_init_module(self, environ):
331        filename = os.path.join(self.directory, '__init__.py')
332        if not os.path.exists(filename):
333            return None
334        return load_module(environ, filename)
335
336    def __repr__(self):
337        return '<%s directory=%r; module=%s at %s>' % (
338            self.__class__.__name__,
339            self.directory,
340            self.base_python_name,
341            hex(abs(id(self))))
342
343def make_directory(parser, environ, filename):
344    base_python_name = environ['paste.urlparser.base_python_name']
345    if base_python_name:
346        base_python_name += "." + os.path.basename(filename)
347    else:
348        base_python_name = os.path.basename(filename)
349    return parser.get_parser(filename, base_python_name)
350
351URLParser.register_constructor('dir', make_directory)
352
353def make_unknown(parser, environ, filename):
354    return fileapp.FileApp(filename)
355
356URLParser.register_constructor('*', make_unknown)
357
358def load_module(environ, filename):
359    base_python_name = environ['paste.urlparser.base_python_name']
360    module_name = os.path.splitext(os.path.basename(filename))[0]
361    if base_python_name:
362        module_name = base_python_name + '.' + module_name
363    return load_module_from_name(environ, filename, module_name,
364                                 environ['wsgi.errors'])
365
366def load_module_from_name(environ, filename, module_name, errors):
367    if sys.modules.has_key(module_name):
368        return sys.modules[module_name]
369    init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
370    if not os.path.exists(init_filename):
371        try:
372            f = open(init_filename, 'w')
373        except (OSError, IOError), e:
374            errors.write(
375                'Cannot write __init__.py file into directory %s (%s)\n'
376                % (os.path.dirname(filename), e))
377            return None
378        f.write('#\n')
379        f.close()
380    fp = None
381    if sys.modules.has_key(module_name):
382        return sys.modules[module_name]
383    if '.' in module_name:
384        parent_name = '.'.join(module_name.split('.')[:-1])
385        base_name = module_name.split('.')[-1]
386        parent = load_module_from_name(environ, os.path.dirname(filename),
387                                       parent_name, errors)
388    else:
389        base_name = module_name
390    fp = None
391    try:
392        fp, pathname, stuff = imp.find_module(
393            base_name, [os.path.dirname(filename)])
394        module = imp.load_module(module_name, fp, pathname, stuff)
395    finally:
396        if fp is not None:
397            fp.close()
398    return module
399
400def make_py(parser, environ, filename):
401    module = load_module(environ, filename)
402    if not module:
403        return None
404    if hasattr(module, 'application') and module.application:
405        return getattr(module.application, 'wsgi_application', module.application)
406    base_name = module.__name__.split('.')[-1]
407    if hasattr(module, base_name):
408        obj = getattr(module, base_name)
409        if hasattr(obj, 'wsgi_application'):
410            return obj.wsgi_application
411        else:
412            # @@: Old behavior; should probably be deprecated eventually:
413            return getattr(module, base_name)()
414    environ['wsgi.errors'].write(
415        "Cound not find application or %s in %s\n"
416        % (base_name, module))
417    return None
418
419URLParser.register_constructor('.py', make_py)
420
421class StaticURLParser(object):
422   
423    """
424    Like ``URLParser`` but only serves static files.
425
426    ``cache_max_age``:
427      integer specifies Cache-Control max_age in seconds
428    """
429    # @@: Should URLParser subclass from this?
430
431    def __init__(self, directory, root_directory=None,
432                 cache_max_age=None):
433        if os.path.sep != '/':
434            directory = directory.replace(os.path.sep, '/')
435        self.directory = directory
436        self.root_directory = root_directory
437        if root_directory is not None:
438            self.root_directory = os.path.normpath(self.root_directory)
439        else:
440            self.root_directory = directory
441        self.cache_max_age = cache_max_age
442        if os.path.sep != '/':
443            directory = directory.replace('/', os.path.sep)
444            self.root_directory = self.root_directory.replace('/', os.path.sep)
445
446    def __call__(self, environ, start_response):
447        path_info = environ.get('PATH_INFO', '')
448        if not path_info:
449            return self.add_slash(environ, start_response)
450        if path_info == '/':
451            # @@: This should obviously be configurable
452            filename = 'index.html'
453        else:
454            filename = request.path_info_pop(environ)
455        full = os.path.normpath(os.path.join(self.directory, filename))
456        if os.path.sep != '/':
457            full = full.replace('/', os.path.sep)
458        if self.root_directory is not None and not full.startswith(self.root_directory):
459            # Out of bounds
460            return self.not_found(environ, start_response)
461        if not os.path.exists(full):
462            return self.not_found(environ, start_response)
463        if os.path.isdir(full):
464            # @@: Cache?
465            child_root = self.root_directory is not None and \
466                self.root_directory or self.directory
467            return self.__class__(full, root_directory=child_root,
468                                  cache_max_age=self.cache_max_age)(environ,
469                                                                   start_response)
470        if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
471            return self.error_extra_path(environ, start_response)
472        if_none_match = environ.get('HTTP_IF_NONE_MATCH')
473        if if_none_match:
474            mytime = os.stat(full).st_mtime
475            if str(mytime) == if_none_match:
476                headers = []
477                ETAG.update(headers, mytime)
478                start_response('304 Not Modified', headers)
479                return [''] # empty body
480       
481        fa = self.make_app(full)
482        if self.cache_max_age:
483            fa.cache_control(max_age=self.cache_max_age)
484        return fa(environ, start_response)
485
486    def make_app(self, filename):
487        return fileapp.FileApp(filename)
488
489    def add_slash(self, environ, start_response):
490        """
491        This happens when you try to get to a directory
492        without a trailing /
493        """
494        url = request.construct_url(environ, with_query_string=False)
495        url += '/'
496        if environ.get('QUERY_STRING'):
497            url += '?' + environ['QUERY_STRING']
498        exc = httpexceptions.HTTPMovedPermanently(
499            'The resource has moved to %s - you should be redirected '
500            'automatically.''' % url,
501            headers=[('location', url)])
502        return exc.wsgi_application(environ, start_response)
503       
504    def not_found(self, environ, start_response, debug_message=None):
505        exc = httpexceptions.HTTPNotFound(
506            'The resource at %s could not be found'
507            % request.construct_url(environ),
508            comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
509            % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
510               self.directory, debug_message or '(none)'))
511        return exc.wsgi_application(environ, start_response)
512
513    def error_extra_path(self, environ, start_response):
514        exc = httpexceptions.HTTPNotFound(
515            'The trailing path %r is not allowed' % environ['PATH_INFO'])
516        return exc.wsgi_application(environ, start_response)
517   
518    def __repr__(self):
519        return '<%s %r>' % (self.__class__.__name__, self.directory)
520
521def make_static(global_conf, document_root, cache_max_age=None):
522    """
523    Return a WSGI application that serves a directory (configured
524    with document_root)
525   
526    cache_max_age - integer specifies CACHE_CONTROL max_age in seconds
527    """
528    if cache_max_age is not None:
529        cache_max_age = int(cache_max_age)
530    return StaticURLParser(
531        document_root, cache_max_age=cache_max_age)
532
533class PkgResourcesParser(StaticURLParser):
534
535    def __init__(self, egg_or_spec, resource_name, manager=None, root_resource=None):
536        if pkg_resources is None:
537            raise NotImplementedError("This class requires pkg_resources.")
538        if isinstance(egg_or_spec, (str, unicode)):
539            self.egg = pkg_resources.get_distribution(egg_or_spec)
540        else:
541            self.egg = egg_or_spec
542        self.resource_name = resource_name
543        if manager is None:
544            manager = pkg_resources.ResourceManager()
545        self.manager = manager
546        if root_resource is None:
547            root_resource = resource_name
548        self.root_resource = os.path.normpath(root_resource)
549
550    def __repr__(self):
551        return '<%s for %s:%r>' % (
552            self.__class__.__name__,
553            self.egg.project_name,
554            self.resource_name)
555
556    def __call__(self, environ, start_response):
557        path_info = environ.get('PATH_INFO', '')
558        if not path_info:
559            return self.add_slash(environ, start_response)
560        if path_info == '/':
561            # @@: This should obviously be configurable
562            filename = 'index.html'
563        else:
564            filename = request.path_info_pop(environ)
565        resource = os.path.normpath(self.resource_name + '/' + filename)
566        if self.root_resource is not None and not resource.startswith(self.root_resource):
567            # Out of bounds
568            return self.not_found(environ, start_response)
569        if not self.egg.has_resource(resource):
570            return self.not_found(environ, start_response)
571        if self.egg.resource_isdir(resource):
572            # @@: Cache?
573            child_root = self.root_resource is not None and self.root_resource or \
574                self.resource_name
575            return self.__class__(self.egg, resource, self.manager,
576                                  root_resource=child_root)(environ, start_response)
577        if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
578            return self.error_extra_path(environ, start_response)
579       
580        type, encoding = mimetypes.guess_type(resource)
581        if not type:
582            type = 'application/octet-stream'
583        # @@: I don't know what to do with the encoding.
584        try:
585            file = self.egg.get_resource_stream(self.manager, resource)
586        except (IOError, OSError), e:
587            exc = httpexceptions.HTTPForbidden(
588                'You are not permitted to view this file (%s)' % e)
589            return exc.wsgi_application(environ, start_response)
590        start_response('200 OK',
591                       [('content-type', type)])
592        return fileapp._FileIter(file)
593       
594    def not_found(self, environ, start_response, debug_message=None):
595        exc = httpexceptions.HTTPNotFound(
596            'The resource at %s could not be found'
597            % request.construct_url(environ),
598            comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in egg:%s#%r; debug: %s'
599            % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
600               self.egg, self.resource_name, debug_message or '(none)'))
601        return exc.wsgi_application(environ, start_response)
602
603def make_pkg_resources(global_conf, egg, resource_name=''):
604    """
605    A static file parser that loads data from an egg using
606    ``pkg_resources``.  Takes a configuration value ``egg``, which is
607    an egg spec, and a base ``resource_name`` (default empty string)
608    which is the path in the egg that this starts at.
609    """
610    if pkg_resources is None:
611        raise NotImplementedError("This function requires pkg_resources.")
612    return PkgResourcesParser(egg, resource_name)
613
614def make_url_parser(global_conf, directory, base_python_name,
615                    index_names=None, hide_extensions=None,
616                    ignore_extensions=None,
617                    **constructor_conf):
618    """
619    Create a URLParser application that looks in ``directory``, which
620    should be the directory for the Python package named in
621    ``base_python_name``.  ``index_names`` are used when viewing the
622    directory (like ``'index'`` for ``'index.html'``).
623    ``hide_extensions`` are extensions that are not viewable (like
624    ``'.pyc'``) and ``ignore_extensions`` are viewable but only if an
625    explicit extension is given.
626    """
627    if index_names is None:
628        index_names = global_conf.get(
629            'index_names', ('index', 'Index', 'main', 'Main'))
630    index_names = converters.aslist(index_names)
631
632    if hide_extensions is None:
633        hide_extensions = global_conf.get(
634            'hide_extensions', ('.pyc', 'bak', 'py~'))
635    hide_extensions = converters.aslist(hide_extensions)
636   
637    if ignore_extensions is None:
638        ignore_extensions = global_conf.get(
639            'ignore_extensions', ())
640    ignore_extensions = converters.aslist(ignore_extensions)
641    # There's no real way to set constructors currently...
642   
643    return URLParser({}, directory, base_python_name,
644                     index_names=index_names,
645                     hide_extensions=hide_extensions,
646                     ignore_extensions=ignore_extensions,
647                     **constructor_conf)
648
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。