Context Navigation

extract.py @ 3

リビジョン 3, 22.0 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	# -- coding: utf-8 --
	2	#
	3	# Copyright (C) 2007 Edgewall Software
	4	# All rights reserved.
	5	#
	6	# This software is licensed as described in the file COPYING, which
	7	# you should have received as part of this distribution. The terms
	8	# are also available at http://babel.edgewall.org/wiki/License.
	9	#
	10	# This software consists of voluntary contributions made by many
	11	# individuals. For the exact contribution history, see the revision
	12	# history and logs, available at http://babel.edgewall.org/log/.
	13
	14	"""Basic infrastructure for extracting localizable messages from source files.
	15
	16	This module defines an extensible system for collecting localizable message
	17	strings from a variety of sources. A native extractor for Python source files
	18	is builtin, extractors for other sources can be added using very simple plugins.
	19
	20	The main entry points into the extraction functionality are the functions
	21	`extract_from_dir` and `extract_from_file`.
	22	"""
	23
	24	import os
	25	try:
	26	set
	27	except NameError:
	28	from sets import Set as set
	29	import sys
	30	from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
	31
	32	from babel.util import parse_encoding, pathmatch, relpath
	33	from textwrap import dedent
	34
	35	__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
	36	__docformat__ = 'restructuredtext en'
	37
	38	GROUP_NAME = 'babel.extractors'
	39
	40	DEFAULT_KEYWORDS = {
	41	'_': None,
	42	'gettext': None,
	43	'ngettext': (1, 2),
	44	'ugettext': None,
	45	'ungettext': (1, 2),
	46	'dgettext': (2,),
	47	'dngettext': (2, 3),
	48	'N_': None
	49	}
	50
	51	DEFAULT_MAPPING = [('**.py', 'python')]
	52
	53	empty_msgid_warning = (
	54	'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") '
	55	'returns the header entry with meta information, not the empty string.')
	56
	57
	58	def _strip_comment_tags(comments, tags):
	59	"""Helper function for `extract` that strips comment tags from strings
	60	in a list of comment lines. This functions operates in-place.
	61	"""
	62	def _strip(line):
	63	for tag in tags:
	64	if line.startswith(tag):
	65	return line[len(tag):].strip()
	66	return line
	67	comments[:] = map(_strip, comments)
	68
	69
	70	def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
	71	options_map=None, keywords=DEFAULT_KEYWORDS,
	72	comment_tags=(), callback=None, strip_comment_tags=False):
	73	"""Extract messages from any source files found in the given directory.
	74
	75	This function generates tuples of the form:
	76
	77	``(filename, lineno, message, comments)``
	78
	79	Which extraction method is used per file is determined by the `method_map`
	80	parameter, which maps extended glob patterns to extraction method names.
	81	For example, the following is the default mapping:
	82
	83	>>> method_map = [
	84	... ('**.py', 'python')
	85	... ]
	86
	87	This basically says that files with the filename extension ".py" at any
	88	level inside the directory should be processed by the "python" extraction
	89	method. Files that don't match any of the mapping patterns are ignored. See
	90	the documentation of the `pathmatch` function for details on the pattern
	91	syntax.
	92
	93	The following extended mapping would also use the "genshi" extraction
	94	method on any file in "templates" subdirectory:
	95
	96	>>> method_map = [
	97	... ('/templates/.*', 'genshi'),
	98	... ('**.py', 'python')
	99	... ]
	100
	101	The dictionary provided by the optional `options_map` parameter augments
	102	these mappings. It uses extended glob patterns as keys, and the values are
	103	dictionaries mapping options names to option values (both strings).
	104
	105	The glob patterns of the `options_map` do not necessarily need to be the
	106	same as those used in the method mapping. For example, while all files in
	107	the ``templates`` folders in an application may be Genshi applications, the
	108	options for those files may differ based on extension:
	109
	110	>>> options_map = {
	111	... '/templates/.txt': {
	112	... 'template_class': 'genshi.template:TextTemplate',
	113	... 'encoding': 'latin-1'
	114	... },
	115	... '/templates/.html': {
	116	... 'include_attrs': ''
	117	... }
	118	... }
	119
	120	:param dirname: the path to the directory to extract messages from
	121	:param method_map: a list of ``(pattern, method)`` tuples that maps of
	122	extraction method names to extended glob patterns
	123	:param options_map: a dictionary of additional options (optional)
	124	:param keywords: a dictionary mapping keywords (i.e. names of functions
	125	that should be recognized as translation functions) to
	126	tuples that specify which of their arguments contain
	127	localizable strings
	128	:param comment_tags: a list of tags of translator comments to search for
	129	and include in the results
	130	:param callback: a function that is called for every file that message are
	131	extracted from, just before the extraction itself is
	132	performed; the function is passed the filename, the name
	133	of the extraction method and and the options dictionary as
	134	positional arguments, in that order
	135	:param strip_comment_tags: a flag that if set to `True` causes all comment
	136	tags to be removed from the collected comments.
	137	:return: an iterator over ``(filename, lineno, funcname, message)`` tuples
	138	:rtype: ``iterator``
	139	:see: `pathmatch`
	140	"""
	141	if options_map is None:
	142	options_map = {}
	143
	144	absname = os.path.abspath(dirname)
	145	for root, dirnames, filenames in os.walk(absname):
	146	for subdir in dirnames:
	147	if subdir.startswith('.') or subdir.startswith('_'):
	148	dirnames.remove(subdir)
	149	dirnames.sort()
	150	filenames.sort()
	151	for filename in filenames:
	152	filename = relpath(
	153	os.path.join(root, filename).replace(os.sep, '/'),
	154	dirname
	155	)
	156	for pattern, method in method_map:
	157	if pathmatch(pattern, filename):
	158	filepath = os.path.join(absname, filename)
	159	options = {}
	160	for opattern, odict in options_map.items():
	161	if pathmatch(opattern, filename):
	162	options = odict
	163	if callback:
	164	callback(filename, method, options)
	165	for lineno, message, comments in \
	166	extract_from_file(method, filepath,
	167	keywords=keywords,
	168	comment_tags=comment_tags,
	169	options=options,
	170	strip_comment_tags=
	171	strip_comment_tags):
	172	yield filename, lineno, message, comments
	173	break
	174
	175
	176	def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
	177	comment_tags=(), options=None, strip_comment_tags=False):
	178	"""Extract messages from a specific file.
	179
	180	This function returns a list of tuples of the form:
	181
	182	``(lineno, funcname, message)``
	183
	184	:param filename: the path to the file to extract messages from
	185	:param method: a string specifying the extraction method (.e.g. "python")
	186	:param keywords: a dictionary mapping keywords (i.e. names of functions
	187	that should be recognized as translation functions) to
	188	tuples that specify which of their arguments contain
	189	localizable strings
	190	:param comment_tags: a list of translator tags to search for and include
	191	in the results
	192	:param strip_comment_tags: a flag that if set to `True` causes all comment
	193	tags to be removed from the collected comments.
	194	:param options: a dictionary of additional options (optional)
	195	:return: the list of extracted messages
	196	:rtype: `list`
	197	"""
	198	fileobj = open(filename, 'U')
	199	try:
	200	return list(extract(method, fileobj, keywords, comment_tags, options,
	201	strip_comment_tags))
	202	finally:
	203	fileobj.close()
	204
	205
	206	def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
	207	options=None, strip_comment_tags=False):
	208	"""Extract messages from the given file-like object using the specified
	209	extraction method.
	210
	211	This function returns a list of tuples of the form:
	212
	213	``(lineno, message, comments)``
	214
	215	The implementation dispatches the actual extraction to plugins, based on the
	216	value of the ``method`` parameter.
	217
	218	>>> source = '''# foo module
	219	... def run(argv):
	220	... print _('Hello, world!')
	221	... '''
	222
	223	>>> from StringIO import StringIO
	224	>>> for message in extract('python', StringIO(source)):
	225	... print message
	226	(3, u'Hello, world!', [])
	227
	228	:param method: a string specifying the extraction method (.e.g. "python");
	229	if this is a simple name, the extraction function will be
	230	looked up by entry point; if it is an explicit reference
	231	to a function (of the form ``package.module:funcname`` or
	232	``package.module.funcname``), the corresponding function
	233	will be imported and used
	234	:param fileobj: the file-like object the messages should be extracted from
	235	:param keywords: a dictionary mapping keywords (i.e. names of functions
	236	that should be recognized as translation functions) to
	237	tuples that specify which of their arguments contain
	238	localizable strings
	239	:param comment_tags: a list of translator tags to search for and include
	240	in the results
	241	:param options: a dictionary of additional options (optional)
	242	:param strip_comment_tags: a flag that if set to `True` causes all comment
	243	tags to be removed from the collected comments.
	244	:return: the list of extracted messages
	245	:rtype: `list`
	246	:raise ValueError: if the extraction method is not registered
	247	"""
	248	func = None
	249	if ':' in method or '.' in method:
	250	if ':' not in method:
	251	lastdot = method.rfind('.')
	252	module, attrname = method[:lastdot], method[lastdot + 1:]
	253	else:
	254	module, attrname = method.split(':', 1)
	255	func = getattr(__import__(module, {}, {}, [attrname]), attrname)
	256	else:
	257	try:
	258	from pkg_resources import working_set
	259	except ImportError:
	260	# pkg_resources is not available, so we resort to looking up the
	261	# builtin extractors directly
	262	builtin = {'ignore': extract_nothing, 'python': extract_python}
	263	func = builtin.get(method)
	264	else:
	265	for entry_point in working_set.iter_entry_points(GROUP_NAME,
	266	method):
	267	func = entry_point.load(require=True)
	268	break
	269	if func is None:
	270	raise ValueError('Unknown extraction method %r' % method)
	271
	272	results = func(fileobj, keywords.keys(), comment_tags,
	273	options=options or {})
	274
	275	for lineno, funcname, messages, comments in results:
	276	if funcname:
	277	spec = keywords[funcname] or (1,)
	278	else:
	279	spec = (1,)
	280	if not isinstance(messages, (list, tuple)):
	281	messages = [messages]
	282	if not messages:
	283	continue
	284
	285	# Validate the messages against the keyword's specification
	286	msgs = []
	287	invalid = False
	288	# last_index is 1 based like the keyword spec
	289	last_index = len(messages)
	290	for index in spec:
	291	if last_index < index:
	292	# Not enough arguments
	293	invalid = True
	294	break
	295	message = messages[index - 1]
	296	if message is None:
	297	invalid = True
	298	break
	299	msgs.append(message)
	300	if invalid:
	301	continue
	302
	303	first_msg_index = spec[0] - 1
	304	if not messages[first_msg_index]:
	305	# An empty string msgid isn't valid, emit a warning
	306	where = '%s:%i' % (hasattr(fileobj, 'name') and \
	307	fileobj.name or '(unknown)', lineno)
	308	print >> sys.stderr, empty_msgid_warning % where
	309	continue
	310
	311	messages = tuple(msgs)
	312	if len(messages) == 1:
	313	messages = messages[0]
	314
	315	if strip_comment_tags:
	316	_strip_comment_tags(comments, comment_tags)
	317	yield lineno, messages, comments
	318
	319
	320	def extract_nothing(fileobj, keywords, comment_tags, options):
	321	"""Pseudo extractor that does not actually extract anything, but simply
	322	returns an empty list.
	323	"""
	324	return []
	325
	326
	327	def extract_python(fileobj, keywords, comment_tags, options):
	328	"""Extract messages from Python source code.
	329
	330	:param fileobj: the seekable, file-like object the messages should be
	331	extracted from
	332	:param keywords: a list of keywords (i.e. function names) that should be
	333	recognized as translation functions
	334	:param comment_tags: a list of translator tags to search for and include
	335	in the results
	336	:param options: a dictionary of additional options (optional)
	337	:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
	338	:rtype: ``iterator``
	339	"""
	340	funcname = lineno = message_lineno = None
	341	call_stack = -1
	342	buf = []
	343	messages = []
	344	translator_comments = []
	345	in_def = in_translator_comments = False
	346	comment_tag = None
	347
	348	encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
	349
	350	tokens = generate_tokens(fileobj.readline)
	351	for tok, value, (lineno, _), _, _ in tokens:
	352	if call_stack == -1 and tok == NAME and value in ('def', 'class'):
	353	in_def = True
	354	elif tok == OP and value == '(':
	355	if in_def:
	356	# Avoid false positives for declarations such as:
	357	# def gettext(arg='message'):
	358	in_def = False
	359	continue
	360	if funcname:
	361	message_lineno = lineno
	362	call_stack += 1
	363	elif in_def and tok == OP and value == ':':
	364	# End of a class definition without parens
	365	in_def = False
	366	continue
	367	elif call_stack == -1 and tok == COMMENT:
	368	# Strip the comment token from the line
	369	value = value.decode(encoding)[1:].strip()
	370	if in_translator_comments and \
	371	translator_comments[-1][0] == lineno - 1:
	372	# We're already inside a translator comment, continue appending
	373	translator_comments.append((lineno, value))
	374	continue
	375	# If execution reaches this point, let's see if comment line
	376	# starts with one of the comment tags
	377	for comment_tag in comment_tags:
	378	if value.startswith(comment_tag):
	379	in_translator_comments = True
	380	translator_comments.append((lineno, value))
	381	break
	382	elif funcname and call_stack == 0:
	383	if tok == OP and value == ')':
	384	if buf:
	385	messages.append(''.join(buf))
	386	del buf[:]
	387	else:
	388	messages.append(None)
	389
	390	if len(messages) > 1:
	391	messages = tuple(messages)
	392	else:
	393	messages = messages[0]
	394	# Comments don't apply unless they immediately preceed the
	395	# message
	396	if translator_comments and \
	397	translator_comments[-1][0] < message_lineno - 1:
	398	translator_comments = []
	399
	400	yield (message_lineno, funcname, messages,
	401	[comment[1] for comment in translator_comments])
	402
	403	funcname = lineno = message_lineno = None
	404	call_stack = -1
	405	messages = []
	406	translator_comments = []
	407	in_translator_comments = False
	408	elif tok == STRING:
	409	# Unwrap quotes in a safe manner, maintaining the string's
	410	# encoding
	411	# https://sourceforge.net/tracker/?func=detail&atid=355470&
	412	# aid=617979&group_id=5470
	413	value = eval('# coding=%s\n%s' % (encoding, value),
	414	{'__builtins__':{}}, {})
	415	if isinstance(value, str):
	416	value = value.decode(encoding)
	417	buf.append(value)
	418	elif tok == OP and value == ',':
	419	if buf:
	420	messages.append(''.join(buf))
	421	del buf[:]
	422	else:
	423	messages.append(None)
	424	if translator_comments:
	425	# We have translator comments, and since we're on a
	426	# comma(,) user is allowed to break into a new line
	427	# Let's increase the last comment's lineno in order
	428	# for the comment to still be a valid one
	429	old_lineno, old_comment = translator_comments.pop()
	430	translator_comments.append((old_lineno+1, old_comment))
	431	elif call_stack > 0 and tok == OP and value == ')':
	432	call_stack -= 1
	433	elif funcname and call_stack == -1:
	434	funcname = None
	435	elif tok == NAME and value in keywords:
	436	funcname = value
	437
	438
	439	def extract_javascript(fileobj, keywords, comment_tags, options):
	440	"""Extract messages from JavaScript source code.
	441
	442	:param fileobj: the seekable, file-like object the messages should be
	443	extracted from
	444	:param keywords: a list of keywords (i.e. function names) that should be
	445	recognized as translation functions
	446	:param comment_tags: a list of translator tags to search for and include
	447	in the results
	448	:param options: a dictionary of additional options (optional)
	449	:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
	450	:rtype: ``iterator``
	451	"""
	452	from babel.messages.jslexer import tokenize, unquote_string
	453	funcname = message_lineno = None
	454	messages = []
	455	last_argument = None
	456	translator_comments = []
	457	concatenate_next = False
	458	encoding = options.get('encoding', 'utf-8')
	459	last_token = None
	460	call_stack = -1
	461
	462	for token in tokenize(fileobj.read().decode(encoding)):
	463	if token.type == 'operator' and token.value == '(':
	464	if funcname:
	465	message_lineno = token.lineno
	466	call_stack += 1
	467
	468	elif call_stack == -1 and token.type == 'linecomment':
	469	value = token.value[2:].strip()
	470	if translator_comments and \
	471	translator_comments[-1][0] == token.lineno - 1:
	472	translator_comments.append((token.lineno, value))
	473	continue
	474
	475	for comment_tag in comment_tags:
	476	if value.startswith(comment_tag):
	477	translator_comments.append((token.lineno, value.strip()))
	478	break
	479
	480	elif token.type == 'multilinecomment':
	481	# only one multi-line comment may preceed a translation
	482	translator_comments = []
	483	value = token.value[2:-2].strip()
	484	for comment_tag in comment_tags:
	485	if value.startswith(comment_tag):
	486	lines = value.splitlines()
	487	if lines:
	488	lines[0] = lines[0].strip()
	489	lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
	490	for offset, line in enumerate(lines):
	491	translator_comments.append((token.lineno + offset,
	492	line))
	493	break
	494
	495	elif funcname and call_stack == 0:
	496	if token.type == 'operator' and token.value == ')':
	497	if last_argument is not None:
	498	messages.append(last_argument)
	499	if len(messages) > 1:
	500	messages = tuple(messages)
	501	elif messages:
	502	messages = messages[0]
	503	else:
	504	messages = None
	505
	506	# Comments don't apply unless they immediately preceed the
	507	# message
	508	if translator_comments and \
	509	translator_comments[-1][0] < message_lineno - 1:
	510	translator_comments = []
	511
	512	if messages is not None:
	513	yield (message_lineno, funcname, messages,
	514	[comment[1] for comment in translator_comments])
	515
	516	funcname = message_lineno = last_argument = None
	517	concatenate_next = False
	518	translator_comments = []
	519	messages = []
	520	call_stack = -1
	521
	522	elif token.type == 'string':
	523	new_value = unquote_string(token.value)
	524	if concatenate_next:
	525	last_argument = (last_argument or '') + new_value
	526	concatenate_next = False
	527	else:
	528	last_argument = new_value
	529
	530	elif token.type == 'operator':
	531	if token.value == ',':
	532	if last_argument is not None:
	533	messages.append(last_argument)
	534	last_argument = None
	535	else:
	536	messages.append(None)
	537	concatenate_next = False
	538	elif token.value == '+':
	539	concatenate_next = True
	540
	541	elif call_stack > 0 and token.type == 'operator' \
	542	and token.value == ')':
	543	call_stack -= 1
	544
	545	elif funcname and call_stack == -1:
	546	funcname = None
	547
	548	elif call_stack == -1 and token.type == 'name' and \
	549	token.value in keywords and \
	550	(last_token is None or last_token.type != 'name' or
	551	last_token.value != 'function'):
	552	funcname = token.value
	553
	554	last_token = token

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/extract.py @ 3

異なるフォーマットでダウンロード: