Context Navigation

urlparser.py

リビジョン 3, 26.4 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2	# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3	"""
4	WSGI applications that parse the URL and dispatch to on-disk resources
5	"""
6
7	import os
8	import sys
9	import imp
10	import mimetypes
11	try:
12	import pkg_resources
13	except ImportError:
14	pkg_resources = None
15	from paste import request
16	from paste import fileapp
17	from paste.util import import_string
18	from paste import httpexceptions
19	from httpheaders import ETAG
20	from paste.util import converters
21
22	class NoDefault(object):
23	pass
24
25	__all__ = ['URLParser', 'StaticURLParser', 'PkgResourcesParser']
26
27	class URLParser(object):
28
29	"""
30	WSGI middleware
31
32	Application dispatching, based on URL. An instance of `URLParser` is
33	an application that loads and delegates to other applications. It
34	looks for files in its directory that match the first part of
35	PATH_INFO; these may have an extension, but are not required to have
36	one, in which case the available files are searched to find the
37	appropriate file. If it is ambiguous, a 404 is returned and an error
38	logged.
39
40	By default there is a constructor for .py files that loads the module,
41	and looks for an attribute ``application``, which is a ready
42	application object, or an attribute that matches the module name,
43	which is a factory for building applications, and is called with no
44	arguments.
45
46	URLParser will also look in __init__.py for special overrides.
47	These overrides are:
48
49	``urlparser_hook(environ)``
50	This can modify the environment. Its return value is ignored,
51	and it cannot be used to change the response in any way. You
52	can use this, for example, to manipulate SCRIPT_NAME/PATH_INFO
53	(try to keep them consistent with the original URL -- but
54	consuming PATH_INFO and moving that to SCRIPT_NAME is ok).
55
56	``urlparser_wrap(environ, start_response, app)``:
57	After URLParser finds the application, it calls this function
58	(if present). If this function doesn't call
59	``app(environ, start_response)`` then the application won't be
60	called at all! This can be used to allocate resources (with
61	``try:finally:``) or otherwise filter the output of the
62	application.
63
64	``not_found_hook(environ, start_response)``:
65	If no file can be found (in this directory) to match the
66	request, then this WSGI application will be called. You can
67	use this to change the URL and pass the request back to
68	URLParser again, or on to some other application. This
69	doesn't catch all ``404 Not Found`` responses, just missing
70	files.
71
72	``application(environ, start_response)``:
73	This basically overrides URLParser completely, and the given
74	application is used for all requests. ``urlparser_wrap`` and
75	``urlparser_hook`` are still called, but the filesystem isn't
76	searched in any way.
77	"""
78
79	parsers_by_directory = {}
80
81	# This is lazily initialized
82	init_module = NoDefault
83
84	global_constructors = {}
85
86	def __init__(self, global_conf,
87	directory, base_python_name,
88	index_names=NoDefault,
89	hide_extensions=NoDefault,
90	ignore_extensions=NoDefault,
91	constructors=None,
92	**constructor_conf):
93	"""
94	Create a URLParser object that looks at `directory`.
95	`base_python_name` is the package that this directory
96	represents, thus any Python modules in this directory will
97	be given names under this package.
98	"""
99	if global_conf:
100	import warnings
101	warnings.warn(
102	'The global_conf argument to URLParser is deprecated; '
103	'either pass in None or {}, or use make_url_parser',
104	DeprecationWarning)
105	else:
106	global_conf = {}
107	if os.path.sep != '/':
108	directory = directory.replace(os.path.sep, '/')
109	self.directory = directory
110	self.base_python_name = base_python_name
111	# This logic here should be deprecated since it is in
112	# make_url_parser
113	if index_names is NoDefault:
114	index_names = global_conf.get(
115	'index_names', ('index', 'Index', 'main', 'Main'))
116	self.index_names = converters.aslist(index_names)
117	if hide_extensions is NoDefault:
118	hide_extensions = global_conf.get(
119	'hide_extensions', ('.pyc', '.bak', '.py~', '.pyo'))
120	self.hide_extensions = converters.aslist(hide_extensions)
121	if ignore_extensions is NoDefault:
122	ignore_extensions = global_conf.get(
123	'ignore_extensions', ())
124	self.ignore_extensions = converters.aslist(ignore_extensions)
125	self.constructors = self.global_constructors.copy()
126	if constructors:
127	self.constructors.update(constructors)
128	# @@: Should we also check the global options for constructors?
129	for name, value in constructor_conf.items():
130	if not name.startswith('constructor '):
131	raise ValueError(
132	"Only extra configuration keys allowed are "
133	"'constructor .ext = import_expr'; you gave %r "
134	"(=%r)" % (name, value))
135	ext = name[len('constructor '):].strip()
136	if isinstance(value, (str, unicode)):
137	value = import_string.eval_import(value)
138	self.constructors[ext] = value
139
140	def __call__(self, environ, start_response):
141	environ['paste.urlparser.base_python_name'] = self.base_python_name
142	if self.init_module is NoDefault:
143	self.init_module = self.find_init_module(environ)
144	path_info = environ.get('PATH_INFO', '')
145	if not path_info:
146	return self.add_slash(environ, start_response)
147	if (self.init_module
148	and getattr(self.init_module, 'urlparser_hook', None)):
149	self.init_module.urlparser_hook(environ)
150	orig_path_info = environ['PATH_INFO']
151	orig_script_name = environ['SCRIPT_NAME']
152	application, filename = self.find_application(environ)
153	if not application:
154	if (self.init_module
155	and getattr(self.init_module, 'not_found_hook', None)
156	and environ.get('paste.urlparser.not_found_parser') is not self):
157	not_found_hook = self.init_module.not_found_hook
158	environ['paste.urlparser.not_found_parser'] = self
159	environ['PATH_INFO'] = orig_path_info
160	environ['SCRIPT_NAME'] = orig_script_name
161	return not_found_hook(environ, start_response)
162	if filename is None:
163	name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
164	if not name:
165	name = 'one of %s' % ', '.join(
166	self.index_names or
167	['(no index_names defined)'])
168
169	return self.not_found(
170	environ, start_response,
171	'Tried to load %s from directory %s'
172	% (name, self.directory))
173	else:
174	environ['wsgi.errors'].write(
175	'Found resource %s, but could not construct application\n'
176	% filename)
177	return self.not_found(
178	environ, start_response,
179	'Tried to load %s from directory %s'
180	% (filename, self.directory))
181	if (self.init_module
182	and getattr(self.init_module, 'urlparser_wrap', None)):
183	return self.init_module.urlparser_wrap(
184	environ, start_response, application)
185	else:
186	return application(environ, start_response)
187
188	def find_application(self, environ):
189	if (self.init_module
190	and getattr(self.init_module, 'application', None)
191	and not environ.get('paste.urlparser.init_application') == environ['SCRIPT_NAME']):
192	environ['paste.urlparser.init_application'] = environ['SCRIPT_NAME']
193	return self.init_module.application, None
194	name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
195	environ['PATH_INFO'] = rest_of_path
196	if name is not None:
197	environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name
198	if not name:
199	names = self.index_names
200	for index_name in names:
201	filename = self.find_file(environ, index_name)
202	if filename:
203	break
204	else:
205	# None of the index files found
206	filename = None
207	else:
208	filename = self.find_file(environ, name)
209	if filename is None:
210	return None, filename
211	else:
212	return self.get_application(environ, filename), filename
213
214	def not_found(self, environ, start_response, debug_message=None):
215	exc = httpexceptions.HTTPNotFound(
216	'The resource at %s could not be found'
217	% request.construct_url(environ),
218	comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
219	% (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
220	self.directory, debug_message or '(none)'))
221	return exc.wsgi_application(environ, start_response)
222
223	def add_slash(self, environ, start_response):
224	"""
225	This happens when you try to get to a directory
226	without a trailing /
227	"""
228	url = request.construct_url(environ, with_query_string=False)
229	url += '/'
230	if environ.get('QUERY_STRING'):
231	url += '?' + environ['QUERY_STRING']
232	exc = httpexceptions.HTTPMovedPermanently(
233	'The resource has moved to %s - you should be redirected '
234	'automatically.''' % url,
235	headers=[('location', url)])
236	return exc.wsgi_application(environ, start_response)
237
238	def find_file(self, environ, base_filename):
239	possible = []
240	"""Cache a few values to reduce function call overhead"""
241	for filename in os.listdir(self.directory):
242	base, ext = os.path.splitext(filename)
243	full_filename = os.path.join(self.directory, filename)
244	if (ext in self.hide_extensions
245	or not base):
246	continue
247	if filename == base_filename:
248	possible.append(full_filename)
249	continue
250	if ext in self.ignore_extensions:
251	continue
252	if base == base_filename:
253	possible.append(full_filename)
254	if not possible:
255	#environ['wsgi.errors'].write(
256	# 'No file found matching %r in %s\n'
257	# % (base_filename, self.directory))
258	return None
259	if len(possible) > 1:
260	# If there is an exact match, this isn't 'ambiguous'
261	# per se; it might mean foo.gif and foo.gif.back for
262	# instance
263	if full_filename in possible:
264	return full_filename
265	else:
266	environ['wsgi.errors'].write(
267	'Ambiguous URL: %s; matches files %s\n'
268	% (request.construct_url(environ),
269	', '.join(possible)))
270	return None
271	return possible[0]
272
273	def get_application(self, environ, filename):
274	if os.path.isdir(filename):
275	t = 'dir'
276	else:
277	t = os.path.splitext(filename)[1]
278	constructor = self.constructors.get(t, self.constructors.get('*'))
279	if constructor is None:
280	#environ['wsgi.errors'].write(
281	# 'No constructor found for %s\n' % t)
282	return constructor
283	app = constructor(self, environ, filename)
284	if app is None:
285	#environ['wsgi.errors'].write(
286	# 'Constructor %s return None for %s\n' %
287	# (constructor, filename))
288	pass
289	return app
290
291	def register_constructor(cls, extension, constructor):
292	"""
293	Register a function as a constructor. Registered constructors
294	apply to all instances of `URLParser`.
295
296	The extension should have a leading ``.``, or the special
297	extensions ``dir`` (for directories) and ``*`` (a catch-all).
298
299	`constructor` must be a callable that takes two arguments:
300	``environ`` and ``filename``, and returns a WSGI application.
301	"""
302	d = cls.global_constructors
303	assert not d.has_key(extension), (
304	"A constructor already exists for the extension %r (%r) "
305	"when attemption to register constructor %r"
306	% (extension, d[extension], constructor))
307	d[extension] = constructor
308	register_constructor = classmethod(register_constructor)
309
310	def get_parser(self, directory, base_python_name):
311	"""
312	Get a parser for the given directory, or create one if
313	necessary. This way parsers can be cached and reused.
314
315	# @@: settings are inherited from the first caller
316	"""
317	try:
318	return self.parsers_by_directory[(directory, base_python_name)]
319	except KeyError:
320	parser = self.__class__(
321	{},
322	directory, base_python_name,
323	index_names=self.index_names,
324	hide_extensions=self.hide_extensions,
325	ignore_extensions=self.ignore_extensions,
326	constructors=self.constructors)
327	self.parsers_by_directory[(directory, base_python_name)] = parser
328	return parser
329
330	def find_init_module(self, environ):
331	filename = os.path.join(self.directory, '__init__.py')
332	if not os.path.exists(filename):
333	return None
334	return load_module(environ, filename)
335
336	def __repr__(self):
337	return '<%s directory=%r; module=%s at %s>' % (
338	self.__class__.__name__,
339	self.directory,
340	self.base_python_name,
341	hex(abs(id(self))))
342
343	def make_directory(parser, environ, filename):
344	base_python_name = environ['paste.urlparser.base_python_name']
345	if base_python_name:
346	base_python_name += "." + os.path.basename(filename)
347	else:
348	base_python_name = os.path.basename(filename)
349	return parser.get_parser(filename, base_python_name)
350
351	URLParser.register_constructor('dir', make_directory)
352
353	def make_unknown(parser, environ, filename):
354	return fileapp.FileApp(filename)
355
356	URLParser.register_constructor('*', make_unknown)
357
358	def load_module(environ, filename):
359	base_python_name = environ['paste.urlparser.base_python_name']
360	module_name = os.path.splitext(os.path.basename(filename))[0]
361	if base_python_name:
362	module_name = base_python_name + '.' + module_name
363	return load_module_from_name(environ, filename, module_name,
364	environ['wsgi.errors'])
365
366	def load_module_from_name(environ, filename, module_name, errors):
367	if sys.modules.has_key(module_name):
368	return sys.modules[module_name]
369	init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
370	if not os.path.exists(init_filename):
371	try:
372	f = open(init_filename, 'w')
373	except (OSError, IOError), e:
374	errors.write(
375	'Cannot write __init__.py file into directory %s (%s)\n'
376	% (os.path.dirname(filename), e))
377	return None
378	f.write('#\n')
379	f.close()
380	fp = None
381	if sys.modules.has_key(module_name):
382	return sys.modules[module_name]
383	if '.' in module_name:
384	parent_name = '.'.join(module_name.split('.')[:-1])
385	base_name = module_name.split('.')[-1]
386	parent = load_module_from_name(environ, os.path.dirname(filename),
387	parent_name, errors)
388	else:
389	base_name = module_name
390	fp = None
391	try:
392	fp, pathname, stuff = imp.find_module(
393	base_name, [os.path.dirname(filename)])
394	module = imp.load_module(module_name, fp, pathname, stuff)
395	finally:
396	if fp is not None:
397	fp.close()
398	return module
399
400	def make_py(parser, environ, filename):
401	module = load_module(environ, filename)
402	if not module:
403	return None
404	if hasattr(module, 'application') and module.application:
405	return getattr(module.application, 'wsgi_application', module.application)
406	base_name = module.__name__.split('.')[-1]
407	if hasattr(module, base_name):
408	obj = getattr(module, base_name)
409	if hasattr(obj, 'wsgi_application'):
410	return obj.wsgi_application
411	else:
412	# @@: Old behavior; should probably be deprecated eventually:
413	return getattr(module, base_name)()
414	environ['wsgi.errors'].write(
415	"Cound not find application or %s in %s\n"
416	% (base_name, module))
417	return None
418
419	URLParser.register_constructor('.py', make_py)
420
421	class StaticURLParser(object):
422
423	"""
424	Like ``URLParser`` but only serves static files.
425
426	``cache_max_age``:
427	integer specifies Cache-Control max_age in seconds
428	"""
429	# @@: Should URLParser subclass from this?
430
431	def __init__(self, directory, root_directory=None,
432	cache_max_age=None):
433	if os.path.sep != '/':
434	directory = directory.replace(os.path.sep, '/')
435	self.directory = directory
436	self.root_directory = root_directory
437	if root_directory is not None:
438	self.root_directory = os.path.normpath(self.root_directory)
439	else:
440	self.root_directory = directory
441	self.cache_max_age = cache_max_age
442	if os.path.sep != '/':
443	directory = directory.replace('/', os.path.sep)
444	self.root_directory = self.root_directory.replace('/', os.path.sep)
445
446	def __call__(self, environ, start_response):
447	path_info = environ.get('PATH_INFO', '')
448	if not path_info:
449	return self.add_slash(environ, start_response)
450	if path_info == '/':
451	# @@: This should obviously be configurable
452	filename = 'index.html'
453	else:
454	filename = request.path_info_pop(environ)
455	full = os.path.normpath(os.path.join(self.directory, filename))
456	if os.path.sep != '/':
457	full = full.replace('/', os.path.sep)
458	if self.root_directory is not None and not full.startswith(self.root_directory):
459	# Out of bounds
460	return self.not_found(environ, start_response)
461	if not os.path.exists(full):
462	return self.not_found(environ, start_response)
463	if os.path.isdir(full):
464	# @@: Cache?
465	child_root = self.root_directory is not None and \
466	self.root_directory or self.directory
467	return self.__class__(full, root_directory=child_root,
468	cache_max_age=self.cache_max_age)(environ,
469	start_response)
470	if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
471	return self.error_extra_path(environ, start_response)
472	if_none_match = environ.get('HTTP_IF_NONE_MATCH')
473	if if_none_match:
474	mytime = os.stat(full).st_mtime
475	if str(mytime) == if_none_match:
476	headers = []
477	ETAG.update(headers, mytime)
478	start_response('304 Not Modified', headers)
479	return [''] # empty body
480
481	fa = self.make_app(full)
482	if self.cache_max_age:
483	fa.cache_control(max_age=self.cache_max_age)
484	return fa(environ, start_response)
485
486	def make_app(self, filename):
487	return fileapp.FileApp(filename)
488
489	def add_slash(self, environ, start_response):
490	"""
491	This happens when you try to get to a directory
492	without a trailing /
493	"""
494	url = request.construct_url(environ, with_query_string=False)
495	url += '/'
496	if environ.get('QUERY_STRING'):
497	url += '?' + environ['QUERY_STRING']
498	exc = httpexceptions.HTTPMovedPermanently(
499	'The resource has moved to %s - you should be redirected '
500	'automatically.''' % url,
501	headers=[('location', url)])
502	return exc.wsgi_application(environ, start_response)
503
504	def not_found(self, environ, start_response, debug_message=None):
505	exc = httpexceptions.HTTPNotFound(
506	'The resource at %s could not be found'
507	% request.construct_url(environ),
508	comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
509	% (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
510	self.directory, debug_message or '(none)'))
511	return exc.wsgi_application(environ, start_response)
512
513	def error_extra_path(self, environ, start_response):
514	exc = httpexceptions.HTTPNotFound(
515	'The trailing path %r is not allowed' % environ['PATH_INFO'])
516	return exc.wsgi_application(environ, start_response)
517
518	def __repr__(self):
519	return '<%s %r>' % (self.__class__.__name__, self.directory)
520
521	def make_static(global_conf, document_root, cache_max_age=None):
522	"""
523	Return a WSGI application that serves a directory (configured
524	with document_root)
525
526	cache_max_age - integer specifies CACHE_CONTROL max_age in seconds
527	"""
528	if cache_max_age is not None:
529	cache_max_age = int(cache_max_age)
530	return StaticURLParser(
531	document_root, cache_max_age=cache_max_age)
532
533	class PkgResourcesParser(StaticURLParser):
534
535	def __init__(self, egg_or_spec, resource_name, manager=None, root_resource=None):
536	if pkg_resources is None:
537	raise NotImplementedError("This class requires pkg_resources.")
538	if isinstance(egg_or_spec, (str, unicode)):
539	self.egg = pkg_resources.get_distribution(egg_or_spec)
540	else:
541	self.egg = egg_or_spec
542	self.resource_name = resource_name
543	if manager is None:
544	manager = pkg_resources.ResourceManager()
545	self.manager = manager
546	if root_resource is None:
547	root_resource = resource_name
548	self.root_resource = os.path.normpath(root_resource)
549
550	def __repr__(self):
551	return '<%s for %s:%r>' % (
552	self.__class__.__name__,
553	self.egg.project_name,
554	self.resource_name)
555
556	def __call__(self, environ, start_response):
557	path_info = environ.get('PATH_INFO', '')
558	if not path_info:
559	return self.add_slash(environ, start_response)
560	if path_info == '/':
561	# @@: This should obviously be configurable
562	filename = 'index.html'
563	else:
564	filename = request.path_info_pop(environ)
565	resource = os.path.normpath(self.resource_name + '/' + filename)
566	if self.root_resource is not None and not resource.startswith(self.root_resource):
567	# Out of bounds
568	return self.not_found(environ, start_response)
569	if not self.egg.has_resource(resource):
570	return self.not_found(environ, start_response)
571	if self.egg.resource_isdir(resource):
572	# @@: Cache?
573	child_root = self.root_resource is not None and self.root_resource or \
574	self.resource_name
575	return self.__class__(self.egg, resource, self.manager,
576	root_resource=child_root)(environ, start_response)
577	if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
578	return self.error_extra_path(environ, start_response)
579
580	type, encoding = mimetypes.guess_type(resource)
581	if not type:
582	type = 'application/octet-stream'
583	# @@: I don't know what to do with the encoding.
584	try:
585	file = self.egg.get_resource_stream(self.manager, resource)
586	except (IOError, OSError), e:
587	exc = httpexceptions.HTTPForbidden(
588	'You are not permitted to view this file (%s)' % e)
589	return exc.wsgi_application(environ, start_response)
590	start_response('200 OK',
591	[('content-type', type)])
592	return fileapp._FileIter(file)
593
594	def not_found(self, environ, start_response, debug_message=None):
595	exc = httpexceptions.HTTPNotFound(
596	'The resource at %s could not be found'
597	% request.construct_url(environ),
598	comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in egg:%s#%r; debug: %s'
599	% (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
600	self.egg, self.resource_name, debug_message or '(none)'))
601	return exc.wsgi_application(environ, start_response)
602
603	def make_pkg_resources(global_conf, egg, resource_name=''):
604	"""
605	A static file parser that loads data from an egg using
606	``pkg_resources``. Takes a configuration value ``egg``, which is
607	an egg spec, and a base ``resource_name`` (default empty string)
608	which is the path in the egg that this starts at.
609	"""
610	if pkg_resources is None:
611	raise NotImplementedError("This function requires pkg_resources.")
612	return PkgResourcesParser(egg, resource_name)
613
614	def make_url_parser(global_conf, directory, base_python_name,
615	index_names=None, hide_extensions=None,
616	ignore_extensions=None,
617	**constructor_conf):
618	"""
619	Create a URLParser application that looks in ``directory``, which
620	should be the directory for the Python package named in
621	``base_python_name``. ``index_names`` are used when viewing the
622	directory (like ``'index'`` for ``'index.html'``).
623	``hide_extensions`` are extensions that are not viewable (like
624	``'.pyc'``) and ``ignore_extensions`` are viewable but only if an
625	explicit extension is given.
626	"""
627	if index_names is None:
628	index_names = global_conf.get(
629	'index_names', ('index', 'Index', 'main', 'Main'))
630	index_names = converters.aslist(index_names)
631
632	if hide_extensions is None:
633	hide_extensions = global_conf.get(
634	'hide_extensions', ('.pyc', 'bak', 'py~'))
635	hide_extensions = converters.aslist(hide_extensions)
636
637	if ignore_extensions is None:
638	ignore_extensions = global_conf.get(
639	'ignore_extensions', ())
640	ignore_extensions = converters.aslist(ignore_extensions)
641	# There's no real way to set constructors currently...
642
643	return URLParser({}, directory, base_python_name,
644	index_names=index_names,
645	hide_extensions=hide_extensions,
646	ignore_extensions=ignore_extensions,
647	**constructor_conf)
648

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Paste-1.6-py2.6.egg/paste/urlparser.py

異なるフォーマットでダウンロード: