Context Navigation

pofile.py

リビジョン 3, 15.5 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# -- coding: utf-8 --
2	#
3	# Copyright (C) 2007 Edgewall Software
4	# All rights reserved.
5	#
6	# This software is licensed as described in the file COPYING, which
7	# you should have received as part of this distribution. The terms
8	# are also available at http://babel.edgewall.org/wiki/License.
9	#
10	# This software consists of voluntary contributions made by many
11	# individuals. For the exact contribution history, see the revision
12	# history and logs, available at http://babel.edgewall.org/log/.
13
14	"""Reading and writing of files in the ``gettext`` PO (portable object)
15	format.
16
17	:see: `The Format of PO Files
18	<http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
19	"""
20
21	from datetime import date, datetime
22	import os
23	import re
24	try:
25	set
26	except NameError:
27	from sets import Set as set
28
29	from babel import __version__ as VERSION
30	from babel.messages.catalog import Catalog, Message
31	from babel.util import wraptext, LOCALTZ
32
33	__all__ = ['read_po', 'write_po']
34	__docformat__ = 'restructuredtext en'
35
36	def unescape(string):
37	r"""Reverse `escape` the given string.
38
39	>>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
40	Say:
41	"hello, world!"
42	<BLANKLINE>
43
44	:param string: the string to unescape
45	:return: the unescaped string
46	:rtype: `str` or `unicode`
47	"""
48	return string[1:-1].replace('\\\\', '\\') \
49	.replace('\\t', '\t') \
50	.replace('\\r', '\r') \
51	.replace('\\n', '\n') \
52	.replace('\\"', '\"')
53
54	def denormalize(string):
55	r"""Reverse the normalization done by the `normalize` function.
56
57	>>> print denormalize(r'''""
58	... "Say:\n"
59	... " \"hello, world!\"\n"''')
60	Say:
61	"hello, world!"
62	<BLANKLINE>
63
64	>>> print denormalize(r'''""
65	... "Say:\n"
66	... " \"Lorem ipsum dolor sit "
67	... "amet, consectetur adipisicing"
68	... " elit, \"\n"''')
69	Say:
70	"Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
71	<BLANKLINE>
72
73	:param string: the string to denormalize
74	:return: the denormalized string
75	:rtype: `unicode` or `str`
76	"""
77	if string.startswith('""'):
78	lines = []
79	for line in string.splitlines()[1:]:
80	lines.append(unescape(line))
81	return ''.join(lines)
82	else:
83	return unescape(string)
84
85	def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
86	"""Read messages from a ``gettext`` PO (portable object) file from the given
87	file-like object and return a `Catalog`.
88
89	>>> from StringIO import StringIO
90	>>> buf = StringIO('''
91	... #: main.py:1
92	... #, fuzzy, python-format
93	... msgid "foo %(name)s"
94	... msgstr ""
95	...
96	... # A user comment
97	... #. An auto comment
98	... #: main.py:3
99	... msgid "bar"
100	... msgid_plural "baz"
101	... msgstr[0] ""
102	... msgstr[1] ""
103	... ''')
104	>>> catalog = read_po(buf)
105	>>> catalog.revision_date = datetime(2007, 04, 01)
106
107	>>> for message in catalog:
108	... if message.id:
109	... print (message.id, message.string)
110	... print ' ', (message.locations, message.flags)
111	... print ' ', (message.user_comments, message.auto_comments)
112	(u'foo %(name)s', '')
113	([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
114	([], [])
115	((u'bar', u'baz'), ('', ''))
116	([(u'main.py', 3)], set([]))
117	([u'A user comment'], [u'An auto comment'])
118
119	:param fileobj: the file-like object to read the PO file from
120	:param locale: the locale identifier or `Locale` object, or `None`
121	if the catalog is not bound to a locale (which basically
122	means it's a template)
123	:param domain: the message domain
124	:param ignore_obsolete: whether to ignore obsolete messages in the input
125	:return: an iterator over ``(message, translation, location)`` tuples
126	:rtype: ``iterator``
127	"""
128	catalog = Catalog(locale=locale, domain=domain)
129
130	counter = [0]
131	offset = [0]
132	messages = []
133	translations = []
134	locations = []
135	flags = []
136	user_comments = []
137	auto_comments = []
138	obsolete = [False]
139	in_msgid = [False]
140	in_msgstr = [False]
141
142	def _add_message():
143	translations.sort()
144	if len(messages) > 1:
145	msgid = tuple([denormalize(m) for m in messages])
146	else:
147	msgid = denormalize(messages[0])
148	if isinstance(msgid, (list, tuple)):
149	string = []
150	for idx in range(catalog.num_plurals):
151	try:
152	string.append(translations[idx])
153	except IndexError:
154	string.append((idx, ''))
155	string = tuple([denormalize(t[1]) for t in string])
156	else:
157	string = denormalize(translations[0][1])
158	message = Message(msgid, string, list(locations), set(flags),
159	auto_comments, user_comments, lineno=offset[0] + 1)
160	if obsolete[0]:
161	if not ignore_obsolete:
162	catalog.obsolete[msgid] = message
163	else:
164	catalog[msgid] = message
165	del messages[:]; del translations[:]; del locations[:];
166	del flags[:]; del auto_comments[:]; del user_comments[:]
167	obsolete[0] = False
168	counter[0] += 1
169
170	def _process_message_line(lineno, line):
171	if line.startswith('msgid_plural'):
172	in_msgid[0] = True
173	msg = line[12:].lstrip()
174	messages.append(msg)
175	elif line.startswith('msgid'):
176	in_msgid[0] = True
177	offset[0] = lineno
178	txt = line[5:].lstrip()
179	if messages:
180	_add_message()
181	messages.append(txt)
182	elif line.startswith('msgstr'):
183	in_msgid[0] = False
184	in_msgstr[0] = True
185	msg = line[6:].lstrip()
186	if msg.startswith('['):
187	idx, msg = msg[1:].split(']')
188	translations.append([int(idx), msg.lstrip()])
189	else:
190	translations.append([0, msg])
191	elif line.startswith('"'):
192	if in_msgid[0]:
193	messages[-1] += u'\n' + line.rstrip()
194	elif in_msgstr[0]:
195	translations[-1][1] += u'\n' + line.rstrip()
196
197	for lineno, line in enumerate(fileobj.readlines()):
198	line = line.strip().decode(catalog.charset)
199	if line.startswith('#'):
200	in_msgid[0] = in_msgstr[0] = False
201	if messages and translations:
202	_add_message()
203	if line[1:].startswith(':'):
204	for location in line[2:].lstrip().split():
205	pos = location.rfind(':')
206	if pos >= 0:
207	try:
208	lineno = int(location[pos + 1:])
209	except ValueError:
210	continue
211	locations.append((location[:pos], lineno))
212	elif line[1:].startswith(','):
213	for flag in line[2:].lstrip().split(','):
214	flags.append(flag.strip())
215	elif line[1:].startswith('~'):
216	obsolete[0] = True
217	_process_message_line(lineno, line[2:].lstrip())
218	elif line[1:].startswith('.'):
219	# These are called auto-comments
220	comment = line[2:].strip()
221	if comment: # Just check that we're not adding empty comments
222	auto_comments.append(comment)
223	else:
224	# These are called user comments
225	user_comments.append(line[1:].strip())
226	else:
227	_process_message_line(lineno, line)
228
229	if messages:
230	_add_message()
231
232	# No actual messages found, but there was some info in comments, from which
233	# we'll construct an empty header message
234	elif not counter[0] and (flags or user_comments or auto_comments):
235	messages.append(u'')
236	translations.append([0, u''])
237	_add_message()
238
239	return catalog
240
241	WORD_SEP = re.compile('('
242	r'\s+\|' # any whitespace
243	r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])\|' # hyphenated words
244	r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
245	')')
246
247	def escape(string):
248	r"""Escape the given string so that it can be included in double-quoted
249	strings in ``PO`` files.
250
251	>>> escape('''Say:
252	... "hello, world!"
253	... ''')
254	'"Say:\\n \\"hello, world!\\"\\n"'
255
256	:param string: the string to escape
257	:return: the escaped string
258	:rtype: `str` or `unicode`
259	"""
260	return '"%s"' % string.replace('\\', '\\\\') \
261	.replace('\t', '\\t') \
262	.replace('\r', '\\r') \
263	.replace('\n', '\\n') \
264	.replace('\"', '\\"')
265
266	def normalize(string, prefix='', width=76):
267	r"""Convert a string into a format that is appropriate for .po files.
268
269	>>> print normalize('''Say:
270	... "hello, world!"
271	... ''', width=None)
272	""
273	"Say:\n"
274	" \"hello, world!\"\n"
275
276	>>> print normalize('''Say:
277	... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
278	... ''', width=32)
279	""
280	"Say:\n"
281	" \"Lorem ipsum dolor sit "
282	"amet, consectetur adipisicing"
283	" elit, \"\n"
284
285	:param string: the string to normalize
286	:param prefix: a string that should be prepended to every line
287	:param width: the maximum line width; use `None`, 0, or a negative number
288	to completely disable line wrapping
289	:return: the normalized string
290	:rtype: `unicode`
291	"""
292	if width and width > 0:
293	prefixlen = len(prefix)
294	lines = []
295	for idx, line in enumerate(string.splitlines(True)):
296	if len(escape(line)) + prefixlen > width:
297	chunks = WORD_SEP.split(line)
298	chunks.reverse()
299	while chunks:
300	buf = []
301	size = 2
302	while chunks:
303	l = len(escape(chunks[-1])) - 2 + prefixlen
304	if size + l < width:
305	buf.append(chunks.pop())
306	size += l
307	else:
308	if not buf:
309	# handle long chunks by putting them on a
310	# separate line
311	buf.append(chunks.pop())
312	break
313	lines.append(u''.join(buf))
314	else:
315	lines.append(line)
316	else:
317	lines = string.splitlines(True)
318
319	if len(lines) <= 1:
320	return escape(string)
321
322	# Remove empty trailing line
323	if lines and not lines[-1]:
324	del lines[-1]
325	lines[-1] += '\n'
326	return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
327
328	def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
329	sort_output=False, sort_by_file=False, ignore_obsolete=False,
330	include_previous=False):
331	r"""Write a ``gettext`` PO (portable object) template file for a given
332	message catalog to the provided file-like object.
333
334	>>> catalog = Catalog()
335	>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
336	... flags=('fuzzy',))
337	>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
338	>>> from StringIO import StringIO
339	>>> buf = StringIO()
340	>>> write_po(buf, catalog, omit_header=True)
341	>>> print buf.getvalue()
342	#: main.py:1
343	#, fuzzy, python-format
344	msgid "foo %(name)s"
345	msgstr ""
346	<BLANKLINE>
347	#: main.py:3
348	msgid "bar"
349	msgid_plural "baz"
350	msgstr[0] ""
351	msgstr[1] ""
352	<BLANKLINE>
353	<BLANKLINE>
354
355	:param fileobj: the file-like object to write to
356	:param catalog: the `Catalog` instance
357	:param width: the maximum line width for the generated output; use `None`,
358	0, or a negative number to completely disable line wrapping
359	:param no_location: do not emit a location comment for every message
360	:param omit_header: do not include the ``msgid ""`` entry at the top of the
361	output
362	:param sort_output: whether to sort the messages in the output by msgid
363	:param sort_by_file: whether to sort the messages in the output by their
364	locations
365	:param ignore_obsolete: whether to ignore obsolete messages and not include
366	them in the output; by default they are included as
367	comments
368	:param include_previous: include the old msgid as a comment when
369	updating the catalog
370	"""
371	def _normalize(key, prefix=''):
372	return normalize(key, prefix=prefix, width=width) \
373	.encode(catalog.charset, 'backslashreplace')
374
375	def _write(text):
376	if isinstance(text, unicode):
377	text = text.encode(catalog.charset)
378	fileobj.write(text)
379
380	def _write_comment(comment, prefix=''):
381	lines = comment
382	if width and width > 0:
383	lines = wraptext(comment, width)
384	for line in lines:
385	_write('#%s %s\n' % (prefix, line.strip()))
386
387	def _write_message(message, prefix=''):
388	if isinstance(message.id, (list, tuple)):
389	_write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
390	_write('%smsgid_plural %s\n' % (
391	prefix, _normalize(message.id[1], prefix)
392	))
393
394	for idx in range(catalog.num_plurals):
395	try:
396	string = message.string[idx]
397	except IndexError:
398	string = ''
399	_write('%smsgstr[%d] %s\n' % (
400	prefix, idx, _normalize(string, prefix)
401	))
402	else:
403	_write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
404	_write('%smsgstr %s\n' % (
405	prefix, _normalize(message.string or '', prefix)
406	))
407
408	messages = list(catalog)
409	if sort_output:
410	messages.sort()
411	elif sort_by_file:
412	messages.sort(lambda x,y: cmp(x.locations, y.locations))
413
414	for message in messages:
415	if not message.id: # This is the header "message"
416	if omit_header:
417	continue
418	comment_header = catalog.header_comment
419	if width and width > 0:
420	lines = []
421	for line in comment_header.splitlines():
422	lines += wraptext(line, width=width,
423	subsequent_indent='# ')
424	comment_header = u'\n'.join(lines) + u'\n'
425	_write(comment_header)
426
427	for comment in message.user_comments:
428	_write_comment(comment)
429	for comment in message.auto_comments:
430	_write_comment(comment, prefix='.')
431
432	if not no_location:
433	locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
434	for filename, lineno in message.locations])
435	_write_comment(locs, prefix=':')
436	if message.flags:
437	_write('#%s\n' % ', '.join([''] + list(message.flags)))
438
439	if message.previous_id and include_previous:
440	_write_comment('msgid %s' % _normalize(message.previous_id[0]),
441	prefix='\|')
442	if len(message.previous_id) > 1:
443	_write_comment('msgid_plural %s' % _normalize(
444	message.previous_id[1]
445	), prefix='\|')
446
447	_write_message(message)
448	_write('\n')
449
450	if not ignore_obsolete:
451	for message in catalog.obsolete.values():
452	for comment in message.user_comments:
453	_write_comment(comment)
454	_write_message(message, prefix='#~ ')
455	_write('\n')

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/pofile.py

異なるフォーマットでダウンロード: