Context Navigation

pofile.py @ 3

リビジョン 3, 15.5 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	# -- coding: utf-8 --
	2	#
	3	# Copyright (C) 2007 Edgewall Software
	4	# All rights reserved.
	5	#
	6	# This software is licensed as described in the file COPYING, which
	7	# you should have received as part of this distribution. The terms
	8	# are also available at http://babel.edgewall.org/wiki/License.
	9	#
	10	# This software consists of voluntary contributions made by many
	11	# individuals. For the exact contribution history, see the revision
	12	# history and logs, available at http://babel.edgewall.org/log/.
	13
	14	"""Reading and writing of files in the ``gettext`` PO (portable object)
	15	format.
	16
	17	:see: `The Format of PO Files
	18	<http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
	19	"""
	20
	21	from datetime import date, datetime
	22	import os
	23	import re
	24	try:
	25	set
	26	except NameError:
	27	from sets import Set as set
	28
	29	from babel import __version__ as VERSION
	30	from babel.messages.catalog import Catalog, Message
	31	from babel.util import wraptext, LOCALTZ
	32
	33	__all__ = ['read_po', 'write_po']
	34	__docformat__ = 'restructuredtext en'
	35
	36	def unescape(string):
	37	r"""Reverse `escape` the given string.
	38
	39	>>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
	40	Say:
	41	"hello, world!"
	42	<BLANKLINE>
	43
	44	:param string: the string to unescape
	45	:return: the unescaped string
	46	:rtype: `str` or `unicode`
	47	"""
	48	return string[1:-1].replace('\\\\', '\\') \
	49	.replace('\\t', '\t') \
	50	.replace('\\r', '\r') \
	51	.replace('\\n', '\n') \
	52	.replace('\\"', '\"')
	53
	54	def denormalize(string):
	55	r"""Reverse the normalization done by the `normalize` function.
	56
	57	>>> print denormalize(r'''""
	58	... "Say:\n"
	59	... " \"hello, world!\"\n"''')
	60	Say:
	61	"hello, world!"
	62	<BLANKLINE>
	63
	64	>>> print denormalize(r'''""
	65	... "Say:\n"
	66	... " \"Lorem ipsum dolor sit "
	67	... "amet, consectetur adipisicing"
	68	... " elit, \"\n"''')
	69	Say:
	70	"Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
	71	<BLANKLINE>
	72
	73	:param string: the string to denormalize
	74	:return: the denormalized string
	75	:rtype: `unicode` or `str`
	76	"""
	77	if string.startswith('""'):
	78	lines = []
	79	for line in string.splitlines()[1:]:
	80	lines.append(unescape(line))
	81	return ''.join(lines)
	82	else:
	83	return unescape(string)
	84
	85	def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
	86	"""Read messages from a ``gettext`` PO (portable object) file from the given
	87	file-like object and return a `Catalog`.
	88
	89	>>> from StringIO import StringIO
	90	>>> buf = StringIO('''
	91	... #: main.py:1
	92	... #, fuzzy, python-format
	93	... msgid "foo %(name)s"
	94	... msgstr ""
	95	...
	96	... # A user comment
	97	... #. An auto comment
	98	... #: main.py:3
	99	... msgid "bar"
	100	... msgid_plural "baz"
	101	... msgstr[0] ""
	102	... msgstr[1] ""
	103	... ''')
	104	>>> catalog = read_po(buf)
	105	>>> catalog.revision_date = datetime(2007, 04, 01)
	106
	107	>>> for message in catalog:
	108	... if message.id:
	109	... print (message.id, message.string)
	110	... print ' ', (message.locations, message.flags)
	111	... print ' ', (message.user_comments, message.auto_comments)
	112	(u'foo %(name)s', '')
	113	([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
	114	([], [])
	115	((u'bar', u'baz'), ('', ''))
	116	([(u'main.py', 3)], set([]))
	117	([u'A user comment'], [u'An auto comment'])
	118
	119	:param fileobj: the file-like object to read the PO file from
	120	:param locale: the locale identifier or `Locale` object, or `None`
	121	if the catalog is not bound to a locale (which basically
	122	means it's a template)
	123	:param domain: the message domain
	124	:param ignore_obsolete: whether to ignore obsolete messages in the input
	125	:return: an iterator over ``(message, translation, location)`` tuples
	126	:rtype: ``iterator``
	127	"""
	128	catalog = Catalog(locale=locale, domain=domain)
	129
	130	counter = [0]
	131	offset = [0]
	132	messages = []
	133	translations = []
	134	locations = []
	135	flags = []
	136	user_comments = []
	137	auto_comments = []
	138	obsolete = [False]
	139	in_msgid = [False]
	140	in_msgstr = [False]
	141
	142	def _add_message():
	143	translations.sort()
	144	if len(messages) > 1:
	145	msgid = tuple([denormalize(m) for m in messages])
	146	else:
	147	msgid = denormalize(messages[0])
	148	if isinstance(msgid, (list, tuple)):
	149	string = []
	150	for idx in range(catalog.num_plurals):
	151	try:
	152	string.append(translations[idx])
	153	except IndexError:
	154	string.append((idx, ''))
	155	string = tuple([denormalize(t[1]) for t in string])
	156	else:
	157	string = denormalize(translations[0][1])
	158	message = Message(msgid, string, list(locations), set(flags),
	159	auto_comments, user_comments, lineno=offset[0] + 1)
	160	if obsolete[0]:
	161	if not ignore_obsolete:
	162	catalog.obsolete[msgid] = message
	163	else:
	164	catalog[msgid] = message
	165	del messages[:]; del translations[:]; del locations[:];
	166	del flags[:]; del auto_comments[:]; del user_comments[:]
	167	obsolete[0] = False
	168	counter[0] += 1
	169
	170	def _process_message_line(lineno, line):
	171	if line.startswith('msgid_plural'):
	172	in_msgid[0] = True
	173	msg = line[12:].lstrip()
	174	messages.append(msg)
	175	elif line.startswith('msgid'):
	176	in_msgid[0] = True
	177	offset[0] = lineno
	178	txt = line[5:].lstrip()
	179	if messages:
	180	_add_message()
	181	messages.append(txt)
	182	elif line.startswith('msgstr'):
	183	in_msgid[0] = False
	184	in_msgstr[0] = True
	185	msg = line[6:].lstrip()
	186	if msg.startswith('['):
	187	idx, msg = msg[1:].split(']')
	188	translations.append([int(idx), msg.lstrip()])
	189	else:
	190	translations.append([0, msg])
	191	elif line.startswith('"'):
	192	if in_msgid[0]:
	193	messages[-1] += u'\n' + line.rstrip()
	194	elif in_msgstr[0]:
	195	translations[-1][1] += u'\n' + line.rstrip()
	196
	197	for lineno, line in enumerate(fileobj.readlines()):
	198	line = line.strip().decode(catalog.charset)
	199	if line.startswith('#'):
	200	in_msgid[0] = in_msgstr[0] = False
	201	if messages and translations:
	202	_add_message()
	203	if line[1:].startswith(':'):
	204	for location in line[2:].lstrip().split():
	205	pos = location.rfind(':')
	206	if pos >= 0:
	207	try:
	208	lineno = int(location[pos + 1:])
	209	except ValueError:
	210	continue
	211	locations.append((location[:pos], lineno))
	212	elif line[1:].startswith(','):
	213	for flag in line[2:].lstrip().split(','):
	214	flags.append(flag.strip())
	215	elif line[1:].startswith('~'):
	216	obsolete[0] = True
	217	_process_message_line(lineno, line[2:].lstrip())
	218	elif line[1:].startswith('.'):
	219	# These are called auto-comments
	220	comment = line[2:].strip()
	221	if comment: # Just check that we're not adding empty comments
	222	auto_comments.append(comment)
	223	else:
	224	# These are called user comments
	225	user_comments.append(line[1:].strip())
	226	else:
	227	_process_message_line(lineno, line)
	228
	229	if messages:
	230	_add_message()
	231
	232	# No actual messages found, but there was some info in comments, from which
	233	# we'll construct an empty header message
	234	elif not counter[0] and (flags or user_comments or auto_comments):
	235	messages.append(u'')
	236	translations.append([0, u''])
	237	_add_message()
	238
	239	return catalog
	240
	241	WORD_SEP = re.compile('('
	242	r'\s+\|' # any whitespace
	243	r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])\|' # hyphenated words
	244	r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
	245	')')
	246
	247	def escape(string):
	248	r"""Escape the given string so that it can be included in double-quoted
	249	strings in ``PO`` files.
	250
	251	>>> escape('''Say:
	252	... "hello, world!"
	253	... ''')
	254	'"Say:\\n \\"hello, world!\\"\\n"'
	255
	256	:param string: the string to escape
	257	:return: the escaped string
	258	:rtype: `str` or `unicode`
	259	"""
	260	return '"%s"' % string.replace('\\', '\\\\') \
	261	.replace('\t', '\\t') \
	262	.replace('\r', '\\r') \
	263	.replace('\n', '\\n') \
	264	.replace('\"', '\\"')
	265
	266	def normalize(string, prefix='', width=76):
	267	r"""Convert a string into a format that is appropriate for .po files.
	268
	269	>>> print normalize('''Say:
	270	... "hello, world!"
	271	... ''', width=None)
	272	""
	273	"Say:\n"
	274	" \"hello, world!\"\n"
	275
	276	>>> print normalize('''Say:
	277	... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
	278	... ''', width=32)
	279	""
	280	"Say:\n"
	281	" \"Lorem ipsum dolor sit "
	282	"amet, consectetur adipisicing"
	283	" elit, \"\n"
	284
	285	:param string: the string to normalize
	286	:param prefix: a string that should be prepended to every line
	287	:param width: the maximum line width; use `None`, 0, or a negative number
	288	to completely disable line wrapping
	289	:return: the normalized string
	290	:rtype: `unicode`
	291	"""
	292	if width and width > 0:
	293	prefixlen = len(prefix)
	294	lines = []
	295	for idx, line in enumerate(string.splitlines(True)):
	296	if len(escape(line)) + prefixlen > width:
	297	chunks = WORD_SEP.split(line)
	298	chunks.reverse()
	299	while chunks:
	300	buf = []
	301	size = 2
	302	while chunks:
	303	l = len(escape(chunks[-1])) - 2 + prefixlen
	304	if size + l < width:
	305	buf.append(chunks.pop())
	306	size += l
	307	else:
	308	if not buf:
	309	# handle long chunks by putting them on a
	310	# separate line
	311	buf.append(chunks.pop())
	312	break
	313	lines.append(u''.join(buf))
	314	else:
	315	lines.append(line)
	316	else:
	317	lines = string.splitlines(True)
	318
	319	if len(lines) <= 1:
	320	return escape(string)
	321
	322	# Remove empty trailing line
	323	if lines and not lines[-1]:
	324	del lines[-1]
	325	lines[-1] += '\n'
	326	return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
	327
	328	def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
	329	sort_output=False, sort_by_file=False, ignore_obsolete=False,
	330	include_previous=False):
	331	r"""Write a ``gettext`` PO (portable object) template file for a given
	332	message catalog to the provided file-like object.
	333
	334	>>> catalog = Catalog()
	335	>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
	336	... flags=('fuzzy',))
	337	>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
	338	>>> from StringIO import StringIO
	339	>>> buf = StringIO()
	340	>>> write_po(buf, catalog, omit_header=True)
	341	>>> print buf.getvalue()
	342	#: main.py:1
	343	#, fuzzy, python-format
	344	msgid "foo %(name)s"
	345	msgstr ""
	346	<BLANKLINE>
	347	#: main.py:3
	348	msgid "bar"
	349	msgid_plural "baz"
	350	msgstr[0] ""
	351	msgstr[1] ""
	352	<BLANKLINE>
	353	<BLANKLINE>
	354
	355	:param fileobj: the file-like object to write to
	356	:param catalog: the `Catalog` instance
	357	:param width: the maximum line width for the generated output; use `None`,
	358	0, or a negative number to completely disable line wrapping
	359	:param no_location: do not emit a location comment for every message
	360	:param omit_header: do not include the ``msgid ""`` entry at the top of the
	361	output
	362	:param sort_output: whether to sort the messages in the output by msgid
	363	:param sort_by_file: whether to sort the messages in the output by their
	364	locations
	365	:param ignore_obsolete: whether to ignore obsolete messages and not include
	366	them in the output; by default they are included as
	367	comments
	368	:param include_previous: include the old msgid as a comment when
	369	updating the catalog
	370	"""
	371	def _normalize(key, prefix=''):
	372	return normalize(key, prefix=prefix, width=width) \
	373	.encode(catalog.charset, 'backslashreplace')
	374
	375	def _write(text):
	376	if isinstance(text, unicode):
	377	text = text.encode(catalog.charset)
	378	fileobj.write(text)
	379
	380	def _write_comment(comment, prefix=''):
	381	lines = comment
	382	if width and width > 0:
	383	lines = wraptext(comment, width)
	384	for line in lines:
	385	_write('#%s %s\n' % (prefix, line.strip()))
	386
	387	def _write_message(message, prefix=''):
	388	if isinstance(message.id, (list, tuple)):
	389	_write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
	390	_write('%smsgid_plural %s\n' % (
	391	prefix, _normalize(message.id[1], prefix)
	392	))
	393
	394	for idx in range(catalog.num_plurals):
	395	try:
	396	string = message.string[idx]
	397	except IndexError:
	398	string = ''
	399	_write('%smsgstr[%d] %s\n' % (
	400	prefix, idx, _normalize(string, prefix)
	401	))
	402	else:
	403	_write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
	404	_write('%smsgstr %s\n' % (
	405	prefix, _normalize(message.string or '', prefix)
	406	))
	407
	408	messages = list(catalog)
	409	if sort_output:
	410	messages.sort()
	411	elif sort_by_file:
	412	messages.sort(lambda x,y: cmp(x.locations, y.locations))
	413
	414	for message in messages:
	415	if not message.id: # This is the header "message"
	416	if omit_header:
	417	continue
	418	comment_header = catalog.header_comment
	419	if width and width > 0:
	420	lines = []
	421	for line in comment_header.splitlines():
	422	lines += wraptext(line, width=width,
	423	subsequent_indent='# ')
	424	comment_header = u'\n'.join(lines) + u'\n'
	425	_write(comment_header)
	426
	427	for comment in message.user_comments:
	428	_write_comment(comment)
	429	for comment in message.auto_comments:
	430	_write_comment(comment, prefix='.')
	431
	432	if not no_location:
	433	locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
	434	for filename, lineno in message.locations])
	435	_write_comment(locs, prefix=':')
	436	if message.flags:
	437	_write('#%s\n' % ', '.join([''] + list(message.flags)))
	438
	439	if message.previous_id and include_previous:
	440	_write_comment('msgid %s' % _normalize(message.previous_id[0]),
	441	prefix='\|')
	442	if len(message.previous_id) > 1:
	443	_write_comment('msgid_plural %s' % _normalize(
	444	message.previous_id[1]
	445	), prefix='\|')
	446
	447	_write_message(message)
	448	_write('\n')
	449
	450	if not ignore_obsolete:
	451	for message in catalog.obsolete.values():
	452	for comment in message.user_comments:
	453	_write_comment(comment)
	454	_write_message(message, prefix='#~ ')
	455	_write('\n')

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/pofile.py @ 3

異なるフォーマットでダウンロード: