Context Navigation

extract.py

リビジョン 3, 22.0 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# -- coding: utf-8 --
2	#
3	# Copyright (C) 2007 Edgewall Software
4	# All rights reserved.
5	#
6	# This software is licensed as described in the file COPYING, which
7	# you should have received as part of this distribution. The terms
8	# are also available at http://babel.edgewall.org/wiki/License.
9	#
10	# This software consists of voluntary contributions made by many
11	# individuals. For the exact contribution history, see the revision
12	# history and logs, available at http://babel.edgewall.org/log/.
13
14	"""Basic infrastructure for extracting localizable messages from source files.
15
16	This module defines an extensible system for collecting localizable message
17	strings from a variety of sources. A native extractor for Python source files
18	is builtin, extractors for other sources can be added using very simple plugins.
19
20	The main entry points into the extraction functionality are the functions
21	`extract_from_dir` and `extract_from_file`.
22	"""
23
24	import os
25	try:
26	set
27	except NameError:
28	from sets import Set as set
29	import sys
30	from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
31
32	from babel.util import parse_encoding, pathmatch, relpath
33	from textwrap import dedent
34
35	__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
36	__docformat__ = 'restructuredtext en'
37
38	GROUP_NAME = 'babel.extractors'
39
40	DEFAULT_KEYWORDS = {
41	'_': None,
42	'gettext': None,
43	'ngettext': (1, 2),
44	'ugettext': None,
45	'ungettext': (1, 2),
46	'dgettext': (2,),
47	'dngettext': (2, 3),
48	'N_': None
49	}
50
51	DEFAULT_MAPPING = [('**.py', 'python')]
52
53	empty_msgid_warning = (
54	'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") '
55	'returns the header entry with meta information, not the empty string.')
56
57
58	def _strip_comment_tags(comments, tags):
59	"""Helper function for `extract` that strips comment tags from strings
60	in a list of comment lines. This functions operates in-place.
61	"""
62	def _strip(line):
63	for tag in tags:
64	if line.startswith(tag):
65	return line[len(tag):].strip()
66	return line
67	comments[:] = map(_strip, comments)
68
69
70	def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
71	options_map=None, keywords=DEFAULT_KEYWORDS,
72	comment_tags=(), callback=None, strip_comment_tags=False):
73	"""Extract messages from any source files found in the given directory.
74
75	This function generates tuples of the form:
76
77	``(filename, lineno, message, comments)``
78
79	Which extraction method is used per file is determined by the `method_map`
80	parameter, which maps extended glob patterns to extraction method names.
81	For example, the following is the default mapping:
82
83	>>> method_map = [
84	... ('**.py', 'python')
85	... ]
86
87	This basically says that files with the filename extension ".py" at any
88	level inside the directory should be processed by the "python" extraction
89	method. Files that don't match any of the mapping patterns are ignored. See
90	the documentation of the `pathmatch` function for details on the pattern
91	syntax.
92
93	The following extended mapping would also use the "genshi" extraction
94	method on any file in "templates" subdirectory:
95
96	>>> method_map = [
97	... ('/templates/.*', 'genshi'),
98	... ('**.py', 'python')
99	... ]
100
101	The dictionary provided by the optional `options_map` parameter augments
102	these mappings. It uses extended glob patterns as keys, and the values are
103	dictionaries mapping options names to option values (both strings).
104
105	The glob patterns of the `options_map` do not necessarily need to be the
106	same as those used in the method mapping. For example, while all files in
107	the ``templates`` folders in an application may be Genshi applications, the
108	options for those files may differ based on extension:
109
110	>>> options_map = {
111	... '/templates/.txt': {
112	... 'template_class': 'genshi.template:TextTemplate',
113	... 'encoding': 'latin-1'
114	... },
115	... '/templates/.html': {
116	... 'include_attrs': ''
117	... }
118	... }
119
120	:param dirname: the path to the directory to extract messages from
121	:param method_map: a list of ``(pattern, method)`` tuples that maps of
122	extraction method names to extended glob patterns
123	:param options_map: a dictionary of additional options (optional)
124	:param keywords: a dictionary mapping keywords (i.e. names of functions
125	that should be recognized as translation functions) to
126	tuples that specify which of their arguments contain
127	localizable strings
128	:param comment_tags: a list of tags of translator comments to search for
129	and include in the results
130	:param callback: a function that is called for every file that message are
131	extracted from, just before the extraction itself is
132	performed; the function is passed the filename, the name
133	of the extraction method and and the options dictionary as
134	positional arguments, in that order
135	:param strip_comment_tags: a flag that if set to `True` causes all comment
136	tags to be removed from the collected comments.
137	:return: an iterator over ``(filename, lineno, funcname, message)`` tuples
138	:rtype: ``iterator``
139	:see: `pathmatch`
140	"""
141	if options_map is None:
142	options_map = {}
143
144	absname = os.path.abspath(dirname)
145	for root, dirnames, filenames in os.walk(absname):
146	for subdir in dirnames:
147	if subdir.startswith('.') or subdir.startswith('_'):
148	dirnames.remove(subdir)
149	dirnames.sort()
150	filenames.sort()
151	for filename in filenames:
152	filename = relpath(
153	os.path.join(root, filename).replace(os.sep, '/'),
154	dirname
155	)
156	for pattern, method in method_map:
157	if pathmatch(pattern, filename):
158	filepath = os.path.join(absname, filename)
159	options = {}
160	for opattern, odict in options_map.items():
161	if pathmatch(opattern, filename):
162	options = odict
163	if callback:
164	callback(filename, method, options)
165	for lineno, message, comments in \
166	extract_from_file(method, filepath,
167	keywords=keywords,
168	comment_tags=comment_tags,
169	options=options,
170	strip_comment_tags=
171	strip_comment_tags):
172	yield filename, lineno, message, comments
173	break
174
175
176	def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
177	comment_tags=(), options=None, strip_comment_tags=False):
178	"""Extract messages from a specific file.
179
180	This function returns a list of tuples of the form:
181
182	``(lineno, funcname, message)``
183
184	:param filename: the path to the file to extract messages from
185	:param method: a string specifying the extraction method (.e.g. "python")
186	:param keywords: a dictionary mapping keywords (i.e. names of functions
187	that should be recognized as translation functions) to
188	tuples that specify which of their arguments contain
189	localizable strings
190	:param comment_tags: a list of translator tags to search for and include
191	in the results
192	:param strip_comment_tags: a flag that if set to `True` causes all comment
193	tags to be removed from the collected comments.
194	:param options: a dictionary of additional options (optional)
195	:return: the list of extracted messages
196	:rtype: `list`
197	"""
198	fileobj = open(filename, 'U')
199	try:
200	return list(extract(method, fileobj, keywords, comment_tags, options,
201	strip_comment_tags))
202	finally:
203	fileobj.close()
204
205
206	def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
207	options=None, strip_comment_tags=False):
208	"""Extract messages from the given file-like object using the specified
209	extraction method.
210
211	This function returns a list of tuples of the form:
212
213	``(lineno, message, comments)``
214
215	The implementation dispatches the actual extraction to plugins, based on the
216	value of the ``method`` parameter.
217
218	>>> source = '''# foo module
219	... def run(argv):
220	... print _('Hello, world!')
221	... '''
222
223	>>> from StringIO import StringIO
224	>>> for message in extract('python', StringIO(source)):
225	... print message
226	(3, u'Hello, world!', [])
227
228	:param method: a string specifying the extraction method (.e.g. "python");
229	if this is a simple name, the extraction function will be
230	looked up by entry point; if it is an explicit reference
231	to a function (of the form ``package.module:funcname`` or
232	``package.module.funcname``), the corresponding function
233	will be imported and used
234	:param fileobj: the file-like object the messages should be extracted from
235	:param keywords: a dictionary mapping keywords (i.e. names of functions
236	that should be recognized as translation functions) to
237	tuples that specify which of their arguments contain
238	localizable strings
239	:param comment_tags: a list of translator tags to search for and include
240	in the results
241	:param options: a dictionary of additional options (optional)
242	:param strip_comment_tags: a flag that if set to `True` causes all comment
243	tags to be removed from the collected comments.
244	:return: the list of extracted messages
245	:rtype: `list`
246	:raise ValueError: if the extraction method is not registered
247	"""
248	func = None
249	if ':' in method or '.' in method:
250	if ':' not in method:
251	lastdot = method.rfind('.')
252	module, attrname = method[:lastdot], method[lastdot + 1:]
253	else:
254	module, attrname = method.split(':', 1)
255	func = getattr(__import__(module, {}, {}, [attrname]), attrname)
256	else:
257	try:
258	from pkg_resources import working_set
259	except ImportError:
260	# pkg_resources is not available, so we resort to looking up the
261	# builtin extractors directly
262	builtin = {'ignore': extract_nothing, 'python': extract_python}
263	func = builtin.get(method)
264	else:
265	for entry_point in working_set.iter_entry_points(GROUP_NAME,
266	method):
267	func = entry_point.load(require=True)
268	break
269	if func is None:
270	raise ValueError('Unknown extraction method %r' % method)
271
272	results = func(fileobj, keywords.keys(), comment_tags,
273	options=options or {})
274
275	for lineno, funcname, messages, comments in results:
276	if funcname:
277	spec = keywords[funcname] or (1,)
278	else:
279	spec = (1,)
280	if not isinstance(messages, (list, tuple)):
281	messages = [messages]
282	if not messages:
283	continue
284
285	# Validate the messages against the keyword's specification
286	msgs = []
287	invalid = False
288	# last_index is 1 based like the keyword spec
289	last_index = len(messages)
290	for index in spec:
291	if last_index < index:
292	# Not enough arguments
293	invalid = True
294	break
295	message = messages[index - 1]
296	if message is None:
297	invalid = True
298	break
299	msgs.append(message)
300	if invalid:
301	continue
302
303	first_msg_index = spec[0] - 1
304	if not messages[first_msg_index]:
305	# An empty string msgid isn't valid, emit a warning
306	where = '%s:%i' % (hasattr(fileobj, 'name') and \
307	fileobj.name or '(unknown)', lineno)
308	print >> sys.stderr, empty_msgid_warning % where
309	continue
310
311	messages = tuple(msgs)
312	if len(messages) == 1:
313	messages = messages[0]
314
315	if strip_comment_tags:
316	_strip_comment_tags(comments, comment_tags)
317	yield lineno, messages, comments
318
319
320	def extract_nothing(fileobj, keywords, comment_tags, options):
321	"""Pseudo extractor that does not actually extract anything, but simply
322	returns an empty list.
323	"""
324	return []
325
326
327	def extract_python(fileobj, keywords, comment_tags, options):
328	"""Extract messages from Python source code.
329
330	:param fileobj: the seekable, file-like object the messages should be
331	extracted from
332	:param keywords: a list of keywords (i.e. function names) that should be
333	recognized as translation functions
334	:param comment_tags: a list of translator tags to search for and include
335	in the results
336	:param options: a dictionary of additional options (optional)
337	:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
338	:rtype: ``iterator``
339	"""
340	funcname = lineno = message_lineno = None
341	call_stack = -1
342	buf = []
343	messages = []
344	translator_comments = []
345	in_def = in_translator_comments = False
346	comment_tag = None
347
348	encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
349
350	tokens = generate_tokens(fileobj.readline)
351	for tok, value, (lineno, _), _, _ in tokens:
352	if call_stack == -1 and tok == NAME and value in ('def', 'class'):
353	in_def = True
354	elif tok == OP and value == '(':
355	if in_def:
356	# Avoid false positives for declarations such as:
357	# def gettext(arg='message'):
358	in_def = False
359	continue
360	if funcname:
361	message_lineno = lineno
362	call_stack += 1
363	elif in_def and tok == OP and value == ':':
364	# End of a class definition without parens
365	in_def = False
366	continue
367	elif call_stack == -1 and tok == COMMENT:
368	# Strip the comment token from the line
369	value = value.decode(encoding)[1:].strip()
370	if in_translator_comments and \
371	translator_comments[-1][0] == lineno - 1:
372	# We're already inside a translator comment, continue appending
373	translator_comments.append((lineno, value))
374	continue
375	# If execution reaches this point, let's see if comment line
376	# starts with one of the comment tags
377	for comment_tag in comment_tags:
378	if value.startswith(comment_tag):
379	in_translator_comments = True
380	translator_comments.append((lineno, value))
381	break
382	elif funcname and call_stack == 0:
383	if tok == OP and value == ')':
384	if buf:
385	messages.append(''.join(buf))
386	del buf[:]
387	else:
388	messages.append(None)
389
390	if len(messages) > 1:
391	messages = tuple(messages)
392	else:
393	messages = messages[0]
394	# Comments don't apply unless they immediately preceed the
395	# message
396	if translator_comments and \
397	translator_comments[-1][0] < message_lineno - 1:
398	translator_comments = []
399
400	yield (message_lineno, funcname, messages,
401	[comment[1] for comment in translator_comments])
402
403	funcname = lineno = message_lineno = None
404	call_stack = -1
405	messages = []
406	translator_comments = []
407	in_translator_comments = False
408	elif tok == STRING:
409	# Unwrap quotes in a safe manner, maintaining the string's
410	# encoding
411	# https://sourceforge.net/tracker/?func=detail&atid=355470&
412	# aid=617979&group_id=5470
413	value = eval('# coding=%s\n%s' % (encoding, value),
414	{'__builtins__':{}}, {})
415	if isinstance(value, str):
416	value = value.decode(encoding)
417	buf.append(value)
418	elif tok == OP and value == ',':
419	if buf:
420	messages.append(''.join(buf))
421	del buf[:]
422	else:
423	messages.append(None)
424	if translator_comments:
425	# We have translator comments, and since we're on a
426	# comma(,) user is allowed to break into a new line
427	# Let's increase the last comment's lineno in order
428	# for the comment to still be a valid one
429	old_lineno, old_comment = translator_comments.pop()
430	translator_comments.append((old_lineno+1, old_comment))
431	elif call_stack > 0 and tok == OP and value == ')':
432	call_stack -= 1
433	elif funcname and call_stack == -1:
434	funcname = None
435	elif tok == NAME and value in keywords:
436	funcname = value
437
438
439	def extract_javascript(fileobj, keywords, comment_tags, options):
440	"""Extract messages from JavaScript source code.
441
442	:param fileobj: the seekable, file-like object the messages should be
443	extracted from
444	:param keywords: a list of keywords (i.e. function names) that should be
445	recognized as translation functions
446	:param comment_tags: a list of translator tags to search for and include
447	in the results
448	:param options: a dictionary of additional options (optional)
449	:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
450	:rtype: ``iterator``
451	"""
452	from babel.messages.jslexer import tokenize, unquote_string
453	funcname = message_lineno = None
454	messages = []
455	last_argument = None
456	translator_comments = []
457	concatenate_next = False
458	encoding = options.get('encoding', 'utf-8')
459	last_token = None
460	call_stack = -1
461
462	for token in tokenize(fileobj.read().decode(encoding)):
463	if token.type == 'operator' and token.value == '(':
464	if funcname:
465	message_lineno = token.lineno
466	call_stack += 1
467
468	elif call_stack == -1 and token.type == 'linecomment':
469	value = token.value[2:].strip()
470	if translator_comments and \
471	translator_comments[-1][0] == token.lineno - 1:
472	translator_comments.append((token.lineno, value))
473	continue
474
475	for comment_tag in comment_tags:
476	if value.startswith(comment_tag):
477	translator_comments.append((token.lineno, value.strip()))
478	break
479
480	elif token.type == 'multilinecomment':
481	# only one multi-line comment may preceed a translation
482	translator_comments = []
483	value = token.value[2:-2].strip()
484	for comment_tag in comment_tags:
485	if value.startswith(comment_tag):
486	lines = value.splitlines()
487	if lines:
488	lines[0] = lines[0].strip()
489	lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
490	for offset, line in enumerate(lines):
491	translator_comments.append((token.lineno + offset,
492	line))
493	break
494
495	elif funcname and call_stack == 0:
496	if token.type == 'operator' and token.value == ')':
497	if last_argument is not None:
498	messages.append(last_argument)
499	if len(messages) > 1:
500	messages = tuple(messages)
501	elif messages:
502	messages = messages[0]
503	else:
504	messages = None
505
506	# Comments don't apply unless they immediately preceed the
507	# message
508	if translator_comments and \
509	translator_comments[-1][0] < message_lineno - 1:
510	translator_comments = []
511
512	if messages is not None:
513	yield (message_lineno, funcname, messages,
514	[comment[1] for comment in translator_comments])
515
516	funcname = message_lineno = last_argument = None
517	concatenate_next = False
518	translator_comments = []
519	messages = []
520	call_stack = -1
521
522	elif token.type == 'string':
523	new_value = unquote_string(token.value)
524	if concatenate_next:
525	last_argument = (last_argument or '') + new_value
526	concatenate_next = False
527	else:
528	last_argument = new_value
529
530	elif token.type == 'operator':
531	if token.value == ',':
532	if last_argument is not None:
533	messages.append(last_argument)
534	last_argument = None
535	else:
536	messages.append(None)
537	concatenate_next = False
538	elif token.value == '+':
539	concatenate_next = True
540
541	elif call_stack > 0 and token.type == 'operator' \
542	and token.value == ')':
543	call_stack -= 1
544
545	elif funcname and call_stack == -1:
546	funcname = None
547
548	elif call_stack == -1 and token.type == 'name' and \
549	token.value in keywords and \
550	(last_token is None or last_token.type != 'name' or
551	last_token.value != 'function'):
552	funcname = token.value
553
554	last_token = token

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/extract.py

異なるフォーマットでダウンロード: