1 | # -*- coding: utf-8 -*- |
---|
2 | # |
---|
3 | # Copyright (C) 2007 Edgewall Software |
---|
4 | # All rights reserved. |
---|
5 | # |
---|
6 | # This software is licensed as described in the file COPYING, which |
---|
7 | # you should have received as part of this distribution. The terms |
---|
8 | # are also available at http://babel.edgewall.org/wiki/License. |
---|
9 | # |
---|
10 | # This software consists of voluntary contributions made by many |
---|
11 | # individuals. For the exact contribution history, see the revision |
---|
12 | # history and logs, available at http://babel.edgewall.org/log/. |
---|
13 | |
---|
14 | """Reading and writing of files in the ``gettext`` PO (portable object) |
---|
15 | format. |
---|
16 | |
---|
17 | :see: `The Format of PO Files |
---|
18 | <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ |
---|
19 | """ |
---|
20 | |
---|
21 | from datetime import date, datetime |
---|
22 | import os |
---|
23 | import re |
---|
24 | try: |
---|
25 | set |
---|
26 | except NameError: |
---|
27 | from sets import Set as set |
---|
28 | |
---|
29 | from babel import __version__ as VERSION |
---|
30 | from babel.messages.catalog import Catalog, Message |
---|
31 | from babel.util import wraptext, LOCALTZ |
---|
32 | |
---|
33 | __all__ = ['read_po', 'write_po'] |
---|
34 | __docformat__ = 'restructuredtext en' |
---|
35 | |
---|
36 | def unescape(string): |
---|
37 | r"""Reverse `escape` the given string. |
---|
38 | |
---|
39 | >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') |
---|
40 | Say: |
---|
41 | "hello, world!" |
---|
42 | <BLANKLINE> |
---|
43 | |
---|
44 | :param string: the string to unescape |
---|
45 | :return: the unescaped string |
---|
46 | :rtype: `str` or `unicode` |
---|
47 | """ |
---|
48 | return string[1:-1].replace('\\\\', '\\') \ |
---|
49 | .replace('\\t', '\t') \ |
---|
50 | .replace('\\r', '\r') \ |
---|
51 | .replace('\\n', '\n') \ |
---|
52 | .replace('\\"', '\"') |
---|
53 | |
---|
54 | def denormalize(string): |
---|
55 | r"""Reverse the normalization done by the `normalize` function. |
---|
56 | |
---|
57 | >>> print denormalize(r'''"" |
---|
58 | ... "Say:\n" |
---|
59 | ... " \"hello, world!\"\n"''') |
---|
60 | Say: |
---|
61 | "hello, world!" |
---|
62 | <BLANKLINE> |
---|
63 | |
---|
64 | >>> print denormalize(r'''"" |
---|
65 | ... "Say:\n" |
---|
66 | ... " \"Lorem ipsum dolor sit " |
---|
67 | ... "amet, consectetur adipisicing" |
---|
68 | ... " elit, \"\n"''') |
---|
69 | Say: |
---|
70 | "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
---|
71 | <BLANKLINE> |
---|
72 | |
---|
73 | :param string: the string to denormalize |
---|
74 | :return: the denormalized string |
---|
75 | :rtype: `unicode` or `str` |
---|
76 | """ |
---|
77 | if string.startswith('""'): |
---|
78 | lines = [] |
---|
79 | for line in string.splitlines()[1:]: |
---|
80 | lines.append(unescape(line)) |
---|
81 | return ''.join(lines) |
---|
82 | else: |
---|
83 | return unescape(string) |
---|
84 | |
---|
85 | def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): |
---|
86 | """Read messages from a ``gettext`` PO (portable object) file from the given |
---|
87 | file-like object and return a `Catalog`. |
---|
88 | |
---|
89 | >>> from StringIO import StringIO |
---|
90 | >>> buf = StringIO(''' |
---|
91 | ... #: main.py:1 |
---|
92 | ... #, fuzzy, python-format |
---|
93 | ... msgid "foo %(name)s" |
---|
94 | ... msgstr "" |
---|
95 | ... |
---|
96 | ... # A user comment |
---|
97 | ... #. An auto comment |
---|
98 | ... #: main.py:3 |
---|
99 | ... msgid "bar" |
---|
100 | ... msgid_plural "baz" |
---|
101 | ... msgstr[0] "" |
---|
102 | ... msgstr[1] "" |
---|
103 | ... ''') |
---|
104 | >>> catalog = read_po(buf) |
---|
105 | >>> catalog.revision_date = datetime(2007, 04, 01) |
---|
106 | |
---|
107 | >>> for message in catalog: |
---|
108 | ... if message.id: |
---|
109 | ... print (message.id, message.string) |
---|
110 | ... print ' ', (message.locations, message.flags) |
---|
111 | ... print ' ', (message.user_comments, message.auto_comments) |
---|
112 | (u'foo %(name)s', '') |
---|
113 | ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) |
---|
114 | ([], []) |
---|
115 | ((u'bar', u'baz'), ('', '')) |
---|
116 | ([(u'main.py', 3)], set([])) |
---|
117 | ([u'A user comment'], [u'An auto comment']) |
---|
118 | |
---|
119 | :param fileobj: the file-like object to read the PO file from |
---|
120 | :param locale: the locale identifier or `Locale` object, or `None` |
---|
121 | if the catalog is not bound to a locale (which basically |
---|
122 | means it's a template) |
---|
123 | :param domain: the message domain |
---|
124 | :param ignore_obsolete: whether to ignore obsolete messages in the input |
---|
125 | :return: an iterator over ``(message, translation, location)`` tuples |
---|
126 | :rtype: ``iterator`` |
---|
127 | """ |
---|
128 | catalog = Catalog(locale=locale, domain=domain) |
---|
129 | |
---|
130 | counter = [0] |
---|
131 | offset = [0] |
---|
132 | messages = [] |
---|
133 | translations = [] |
---|
134 | locations = [] |
---|
135 | flags = [] |
---|
136 | user_comments = [] |
---|
137 | auto_comments = [] |
---|
138 | obsolete = [False] |
---|
139 | in_msgid = [False] |
---|
140 | in_msgstr = [False] |
---|
141 | |
---|
142 | def _add_message(): |
---|
143 | translations.sort() |
---|
144 | if len(messages) > 1: |
---|
145 | msgid = tuple([denormalize(m) for m in messages]) |
---|
146 | else: |
---|
147 | msgid = denormalize(messages[0]) |
---|
148 | if isinstance(msgid, (list, tuple)): |
---|
149 | string = [] |
---|
150 | for idx in range(catalog.num_plurals): |
---|
151 | try: |
---|
152 | string.append(translations[idx]) |
---|
153 | except IndexError: |
---|
154 | string.append((idx, '')) |
---|
155 | string = tuple([denormalize(t[1]) for t in string]) |
---|
156 | else: |
---|
157 | string = denormalize(translations[0][1]) |
---|
158 | message = Message(msgid, string, list(locations), set(flags), |
---|
159 | auto_comments, user_comments, lineno=offset[0] + 1) |
---|
160 | if obsolete[0]: |
---|
161 | if not ignore_obsolete: |
---|
162 | catalog.obsolete[msgid] = message |
---|
163 | else: |
---|
164 | catalog[msgid] = message |
---|
165 | del messages[:]; del translations[:]; del locations[:]; |
---|
166 | del flags[:]; del auto_comments[:]; del user_comments[:] |
---|
167 | obsolete[0] = False |
---|
168 | counter[0] += 1 |
---|
169 | |
---|
170 | def _process_message_line(lineno, line): |
---|
171 | if line.startswith('msgid_plural'): |
---|
172 | in_msgid[0] = True |
---|
173 | msg = line[12:].lstrip() |
---|
174 | messages.append(msg) |
---|
175 | elif line.startswith('msgid'): |
---|
176 | in_msgid[0] = True |
---|
177 | offset[0] = lineno |
---|
178 | txt = line[5:].lstrip() |
---|
179 | if messages: |
---|
180 | _add_message() |
---|
181 | messages.append(txt) |
---|
182 | elif line.startswith('msgstr'): |
---|
183 | in_msgid[0] = False |
---|
184 | in_msgstr[0] = True |
---|
185 | msg = line[6:].lstrip() |
---|
186 | if msg.startswith('['): |
---|
187 | idx, msg = msg[1:].split(']') |
---|
188 | translations.append([int(idx), msg.lstrip()]) |
---|
189 | else: |
---|
190 | translations.append([0, msg]) |
---|
191 | elif line.startswith('"'): |
---|
192 | if in_msgid[0]: |
---|
193 | messages[-1] += u'\n' + line.rstrip() |
---|
194 | elif in_msgstr[0]: |
---|
195 | translations[-1][1] += u'\n' + line.rstrip() |
---|
196 | |
---|
197 | for lineno, line in enumerate(fileobj.readlines()): |
---|
198 | line = line.strip().decode(catalog.charset) |
---|
199 | if line.startswith('#'): |
---|
200 | in_msgid[0] = in_msgstr[0] = False |
---|
201 | if messages and translations: |
---|
202 | _add_message() |
---|
203 | if line[1:].startswith(':'): |
---|
204 | for location in line[2:].lstrip().split(): |
---|
205 | pos = location.rfind(':') |
---|
206 | if pos >= 0: |
---|
207 | try: |
---|
208 | lineno = int(location[pos + 1:]) |
---|
209 | except ValueError: |
---|
210 | continue |
---|
211 | locations.append((location[:pos], lineno)) |
---|
212 | elif line[1:].startswith(','): |
---|
213 | for flag in line[2:].lstrip().split(','): |
---|
214 | flags.append(flag.strip()) |
---|
215 | elif line[1:].startswith('~'): |
---|
216 | obsolete[0] = True |
---|
217 | _process_message_line(lineno, line[2:].lstrip()) |
---|
218 | elif line[1:].startswith('.'): |
---|
219 | # These are called auto-comments |
---|
220 | comment = line[2:].strip() |
---|
221 | if comment: # Just check that we're not adding empty comments |
---|
222 | auto_comments.append(comment) |
---|
223 | else: |
---|
224 | # These are called user comments |
---|
225 | user_comments.append(line[1:].strip()) |
---|
226 | else: |
---|
227 | _process_message_line(lineno, line) |
---|
228 | |
---|
229 | if messages: |
---|
230 | _add_message() |
---|
231 | |
---|
232 | # No actual messages found, but there was some info in comments, from which |
---|
233 | # we'll construct an empty header message |
---|
234 | elif not counter[0] and (flags or user_comments or auto_comments): |
---|
235 | messages.append(u'') |
---|
236 | translations.append([0, u'']) |
---|
237 | _add_message() |
---|
238 | |
---|
239 | return catalog |
---|
240 | |
---|
241 | WORD_SEP = re.compile('(' |
---|
242 | r'\s+|' # any whitespace |
---|
243 | r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words |
---|
244 | r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash |
---|
245 | ')') |
---|
246 | |
---|
247 | def escape(string): |
---|
248 | r"""Escape the given string so that it can be included in double-quoted |
---|
249 | strings in ``PO`` files. |
---|
250 | |
---|
251 | >>> escape('''Say: |
---|
252 | ... "hello, world!" |
---|
253 | ... ''') |
---|
254 | '"Say:\\n \\"hello, world!\\"\\n"' |
---|
255 | |
---|
256 | :param string: the string to escape |
---|
257 | :return: the escaped string |
---|
258 | :rtype: `str` or `unicode` |
---|
259 | """ |
---|
260 | return '"%s"' % string.replace('\\', '\\\\') \ |
---|
261 | .replace('\t', '\\t') \ |
---|
262 | .replace('\r', '\\r') \ |
---|
263 | .replace('\n', '\\n') \ |
---|
264 | .replace('\"', '\\"') |
---|
265 | |
---|
266 | def normalize(string, prefix='', width=76): |
---|
267 | r"""Convert a string into a format that is appropriate for .po files. |
---|
268 | |
---|
269 | >>> print normalize('''Say: |
---|
270 | ... "hello, world!" |
---|
271 | ... ''', width=None) |
---|
272 | "" |
---|
273 | "Say:\n" |
---|
274 | " \"hello, world!\"\n" |
---|
275 | |
---|
276 | >>> print normalize('''Say: |
---|
277 | ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
---|
278 | ... ''', width=32) |
---|
279 | "" |
---|
280 | "Say:\n" |
---|
281 | " \"Lorem ipsum dolor sit " |
---|
282 | "amet, consectetur adipisicing" |
---|
283 | " elit, \"\n" |
---|
284 | |
---|
285 | :param string: the string to normalize |
---|
286 | :param prefix: a string that should be prepended to every line |
---|
287 | :param width: the maximum line width; use `None`, 0, or a negative number |
---|
288 | to completely disable line wrapping |
---|
289 | :return: the normalized string |
---|
290 | :rtype: `unicode` |
---|
291 | """ |
---|
292 | if width and width > 0: |
---|
293 | prefixlen = len(prefix) |
---|
294 | lines = [] |
---|
295 | for idx, line in enumerate(string.splitlines(True)): |
---|
296 | if len(escape(line)) + prefixlen > width: |
---|
297 | chunks = WORD_SEP.split(line) |
---|
298 | chunks.reverse() |
---|
299 | while chunks: |
---|
300 | buf = [] |
---|
301 | size = 2 |
---|
302 | while chunks: |
---|
303 | l = len(escape(chunks[-1])) - 2 + prefixlen |
---|
304 | if size + l < width: |
---|
305 | buf.append(chunks.pop()) |
---|
306 | size += l |
---|
307 | else: |
---|
308 | if not buf: |
---|
309 | # handle long chunks by putting them on a |
---|
310 | # separate line |
---|
311 | buf.append(chunks.pop()) |
---|
312 | break |
---|
313 | lines.append(u''.join(buf)) |
---|
314 | else: |
---|
315 | lines.append(line) |
---|
316 | else: |
---|
317 | lines = string.splitlines(True) |
---|
318 | |
---|
319 | if len(lines) <= 1: |
---|
320 | return escape(string) |
---|
321 | |
---|
322 | # Remove empty trailing line |
---|
323 | if lines and not lines[-1]: |
---|
324 | del lines[-1] |
---|
325 | lines[-1] += '\n' |
---|
326 | return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) |
---|
327 | |
---|
328 | def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, |
---|
329 | sort_output=False, sort_by_file=False, ignore_obsolete=False, |
---|
330 | include_previous=False): |
---|
331 | r"""Write a ``gettext`` PO (portable object) template file for a given |
---|
332 | message catalog to the provided file-like object. |
---|
333 | |
---|
334 | >>> catalog = Catalog() |
---|
335 | >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], |
---|
336 | ... flags=('fuzzy',)) |
---|
337 | >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) |
---|
338 | >>> from StringIO import StringIO |
---|
339 | >>> buf = StringIO() |
---|
340 | >>> write_po(buf, catalog, omit_header=True) |
---|
341 | >>> print buf.getvalue() |
---|
342 | #: main.py:1 |
---|
343 | #, fuzzy, python-format |
---|
344 | msgid "foo %(name)s" |
---|
345 | msgstr "" |
---|
346 | <BLANKLINE> |
---|
347 | #: main.py:3 |
---|
348 | msgid "bar" |
---|
349 | msgid_plural "baz" |
---|
350 | msgstr[0] "" |
---|
351 | msgstr[1] "" |
---|
352 | <BLANKLINE> |
---|
353 | <BLANKLINE> |
---|
354 | |
---|
355 | :param fileobj: the file-like object to write to |
---|
356 | :param catalog: the `Catalog` instance |
---|
357 | :param width: the maximum line width for the generated output; use `None`, |
---|
358 | 0, or a negative number to completely disable line wrapping |
---|
359 | :param no_location: do not emit a location comment for every message |
---|
360 | :param omit_header: do not include the ``msgid ""`` entry at the top of the |
---|
361 | output |
---|
362 | :param sort_output: whether to sort the messages in the output by msgid |
---|
363 | :param sort_by_file: whether to sort the messages in the output by their |
---|
364 | locations |
---|
365 | :param ignore_obsolete: whether to ignore obsolete messages and not include |
---|
366 | them in the output; by default they are included as |
---|
367 | comments |
---|
368 | :param include_previous: include the old msgid as a comment when |
---|
369 | updating the catalog |
---|
370 | """ |
---|
371 | def _normalize(key, prefix=''): |
---|
372 | return normalize(key, prefix=prefix, width=width) \ |
---|
373 | .encode(catalog.charset, 'backslashreplace') |
---|
374 | |
---|
375 | def _write(text): |
---|
376 | if isinstance(text, unicode): |
---|
377 | text = text.encode(catalog.charset) |
---|
378 | fileobj.write(text) |
---|
379 | |
---|
380 | def _write_comment(comment, prefix=''): |
---|
381 | lines = comment |
---|
382 | if width and width > 0: |
---|
383 | lines = wraptext(comment, width) |
---|
384 | for line in lines: |
---|
385 | _write('#%s %s\n' % (prefix, line.strip())) |
---|
386 | |
---|
387 | def _write_message(message, prefix=''): |
---|
388 | if isinstance(message.id, (list, tuple)): |
---|
389 | _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) |
---|
390 | _write('%smsgid_plural %s\n' % ( |
---|
391 | prefix, _normalize(message.id[1], prefix) |
---|
392 | )) |
---|
393 | |
---|
394 | for idx in range(catalog.num_plurals): |
---|
395 | try: |
---|
396 | string = message.string[idx] |
---|
397 | except IndexError: |
---|
398 | string = '' |
---|
399 | _write('%smsgstr[%d] %s\n' % ( |
---|
400 | prefix, idx, _normalize(string, prefix) |
---|
401 | )) |
---|
402 | else: |
---|
403 | _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) |
---|
404 | _write('%smsgstr %s\n' % ( |
---|
405 | prefix, _normalize(message.string or '', prefix) |
---|
406 | )) |
---|
407 | |
---|
408 | messages = list(catalog) |
---|
409 | if sort_output: |
---|
410 | messages.sort() |
---|
411 | elif sort_by_file: |
---|
412 | messages.sort(lambda x,y: cmp(x.locations, y.locations)) |
---|
413 | |
---|
414 | for message in messages: |
---|
415 | if not message.id: # This is the header "message" |
---|
416 | if omit_header: |
---|
417 | continue |
---|
418 | comment_header = catalog.header_comment |
---|
419 | if width and width > 0: |
---|
420 | lines = [] |
---|
421 | for line in comment_header.splitlines(): |
---|
422 | lines += wraptext(line, width=width, |
---|
423 | subsequent_indent='# ') |
---|
424 | comment_header = u'\n'.join(lines) + u'\n' |
---|
425 | _write(comment_header) |
---|
426 | |
---|
427 | for comment in message.user_comments: |
---|
428 | _write_comment(comment) |
---|
429 | for comment in message.auto_comments: |
---|
430 | _write_comment(comment, prefix='.') |
---|
431 | |
---|
432 | if not no_location: |
---|
433 | locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) |
---|
434 | for filename, lineno in message.locations]) |
---|
435 | _write_comment(locs, prefix=':') |
---|
436 | if message.flags: |
---|
437 | _write('#%s\n' % ', '.join([''] + list(message.flags))) |
---|
438 | |
---|
439 | if message.previous_id and include_previous: |
---|
440 | _write_comment('msgid %s' % _normalize(message.previous_id[0]), |
---|
441 | prefix='|') |
---|
442 | if len(message.previous_id) > 1: |
---|
443 | _write_comment('msgid_plural %s' % _normalize( |
---|
444 | message.previous_id[1] |
---|
445 | ), prefix='|') |
---|
446 | |
---|
447 | _write_message(message) |
---|
448 | _write('\n') |
---|
449 | |
---|
450 | if not ignore_obsolete: |
---|
451 | for message in catalog.obsolete.values(): |
---|
452 | for comment in message.user_comments: |
---|
453 | _write_comment(comment) |
---|
454 | _write_message(message, prefix='#~ ') |
---|
455 | _write('\n') |
---|