| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | # |
|---|
| 3 | # Copyright (C) 2007 Edgewall Software |
|---|
| 4 | # All rights reserved. |
|---|
| 5 | # |
|---|
| 6 | # This software is licensed as described in the file COPYING, which |
|---|
| 7 | # you should have received as part of this distribution. The terms |
|---|
| 8 | # are also available at http://babel.edgewall.org/wiki/License. |
|---|
| 9 | # |
|---|
| 10 | # This software consists of voluntary contributions made by many |
|---|
| 11 | # individuals. For the exact contribution history, see the revision |
|---|
| 12 | # history and logs, available at http://babel.edgewall.org/log/. |
|---|
| 13 | |
|---|
| 14 | """Reading and writing of files in the ``gettext`` PO (portable object) |
|---|
| 15 | format. |
|---|
| 16 | |
|---|
| 17 | :see: `The Format of PO Files |
|---|
| 18 | <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ |
|---|
| 19 | """ |
|---|
| 20 | |
|---|
| 21 | from datetime import date, datetime |
|---|
| 22 | import os |
|---|
| 23 | import re |
|---|
| 24 | try: |
|---|
| 25 | set |
|---|
| 26 | except NameError: |
|---|
| 27 | from sets import Set as set |
|---|
| 28 | |
|---|
| 29 | from babel import __version__ as VERSION |
|---|
| 30 | from babel.messages.catalog import Catalog, Message |
|---|
| 31 | from babel.util import wraptext, LOCALTZ |
|---|
| 32 | |
|---|
| 33 | __all__ = ['read_po', 'write_po'] |
|---|
| 34 | __docformat__ = 'restructuredtext en' |
|---|
| 35 | |
|---|
| 36 | def unescape(string): |
|---|
| 37 | r"""Reverse `escape` the given string. |
|---|
| 38 | |
|---|
| 39 | >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') |
|---|
| 40 | Say: |
|---|
| 41 | "hello, world!" |
|---|
| 42 | <BLANKLINE> |
|---|
| 43 | |
|---|
| 44 | :param string: the string to unescape |
|---|
| 45 | :return: the unescaped string |
|---|
| 46 | :rtype: `str` or `unicode` |
|---|
| 47 | """ |
|---|
| 48 | return string[1:-1].replace('\\\\', '\\') \ |
|---|
| 49 | .replace('\\t', '\t') \ |
|---|
| 50 | .replace('\\r', '\r') \ |
|---|
| 51 | .replace('\\n', '\n') \ |
|---|
| 52 | .replace('\\"', '\"') |
|---|
| 53 | |
|---|
| 54 | def denormalize(string): |
|---|
| 55 | r"""Reverse the normalization done by the `normalize` function. |
|---|
| 56 | |
|---|
| 57 | >>> print denormalize(r'''"" |
|---|
| 58 | ... "Say:\n" |
|---|
| 59 | ... " \"hello, world!\"\n"''') |
|---|
| 60 | Say: |
|---|
| 61 | "hello, world!" |
|---|
| 62 | <BLANKLINE> |
|---|
| 63 | |
|---|
| 64 | >>> print denormalize(r'''"" |
|---|
| 65 | ... "Say:\n" |
|---|
| 66 | ... " \"Lorem ipsum dolor sit " |
|---|
| 67 | ... "amet, consectetur adipisicing" |
|---|
| 68 | ... " elit, \"\n"''') |
|---|
| 69 | Say: |
|---|
| 70 | "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
|---|
| 71 | <BLANKLINE> |
|---|
| 72 | |
|---|
| 73 | :param string: the string to denormalize |
|---|
| 74 | :return: the denormalized string |
|---|
| 75 | :rtype: `unicode` or `str` |
|---|
| 76 | """ |
|---|
| 77 | if string.startswith('""'): |
|---|
| 78 | lines = [] |
|---|
| 79 | for line in string.splitlines()[1:]: |
|---|
| 80 | lines.append(unescape(line)) |
|---|
| 81 | return ''.join(lines) |
|---|
| 82 | else: |
|---|
| 83 | return unescape(string) |
|---|
| 84 | |
|---|
| 85 | def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): |
|---|
| 86 | """Read messages from a ``gettext`` PO (portable object) file from the given |
|---|
| 87 | file-like object and return a `Catalog`. |
|---|
| 88 | |
|---|
| 89 | >>> from StringIO import StringIO |
|---|
| 90 | >>> buf = StringIO(''' |
|---|
| 91 | ... #: main.py:1 |
|---|
| 92 | ... #, fuzzy, python-format |
|---|
| 93 | ... msgid "foo %(name)s" |
|---|
| 94 | ... msgstr "" |
|---|
| 95 | ... |
|---|
| 96 | ... # A user comment |
|---|
| 97 | ... #. An auto comment |
|---|
| 98 | ... #: main.py:3 |
|---|
| 99 | ... msgid "bar" |
|---|
| 100 | ... msgid_plural "baz" |
|---|
| 101 | ... msgstr[0] "" |
|---|
| 102 | ... msgstr[1] "" |
|---|
| 103 | ... ''') |
|---|
| 104 | >>> catalog = read_po(buf) |
|---|
| 105 | >>> catalog.revision_date = datetime(2007, 04, 01) |
|---|
| 106 | |
|---|
| 107 | >>> for message in catalog: |
|---|
| 108 | ... if message.id: |
|---|
| 109 | ... print (message.id, message.string) |
|---|
| 110 | ... print ' ', (message.locations, message.flags) |
|---|
| 111 | ... print ' ', (message.user_comments, message.auto_comments) |
|---|
| 112 | (u'foo %(name)s', '') |
|---|
| 113 | ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) |
|---|
| 114 | ([], []) |
|---|
| 115 | ((u'bar', u'baz'), ('', '')) |
|---|
| 116 | ([(u'main.py', 3)], set([])) |
|---|
| 117 | ([u'A user comment'], [u'An auto comment']) |
|---|
| 118 | |
|---|
| 119 | :param fileobj: the file-like object to read the PO file from |
|---|
| 120 | :param locale: the locale identifier or `Locale` object, or `None` |
|---|
| 121 | if the catalog is not bound to a locale (which basically |
|---|
| 122 | means it's a template) |
|---|
| 123 | :param domain: the message domain |
|---|
| 124 | :param ignore_obsolete: whether to ignore obsolete messages in the input |
|---|
| 125 | :return: an iterator over ``(message, translation, location)`` tuples |
|---|
| 126 | :rtype: ``iterator`` |
|---|
| 127 | """ |
|---|
| 128 | catalog = Catalog(locale=locale, domain=domain) |
|---|
| 129 | |
|---|
| 130 | counter = [0] |
|---|
| 131 | offset = [0] |
|---|
| 132 | messages = [] |
|---|
| 133 | translations = [] |
|---|
| 134 | locations = [] |
|---|
| 135 | flags = [] |
|---|
| 136 | user_comments = [] |
|---|
| 137 | auto_comments = [] |
|---|
| 138 | obsolete = [False] |
|---|
| 139 | in_msgid = [False] |
|---|
| 140 | in_msgstr = [False] |
|---|
| 141 | |
|---|
| 142 | def _add_message(): |
|---|
| 143 | translations.sort() |
|---|
| 144 | if len(messages) > 1: |
|---|
| 145 | msgid = tuple([denormalize(m) for m in messages]) |
|---|
| 146 | else: |
|---|
| 147 | msgid = denormalize(messages[0]) |
|---|
| 148 | if isinstance(msgid, (list, tuple)): |
|---|
| 149 | string = [] |
|---|
| 150 | for idx in range(catalog.num_plurals): |
|---|
| 151 | try: |
|---|
| 152 | string.append(translations[idx]) |
|---|
| 153 | except IndexError: |
|---|
| 154 | string.append((idx, '')) |
|---|
| 155 | string = tuple([denormalize(t[1]) for t in string]) |
|---|
| 156 | else: |
|---|
| 157 | string = denormalize(translations[0][1]) |
|---|
| 158 | message = Message(msgid, string, list(locations), set(flags), |
|---|
| 159 | auto_comments, user_comments, lineno=offset[0] + 1) |
|---|
| 160 | if obsolete[0]: |
|---|
| 161 | if not ignore_obsolete: |
|---|
| 162 | catalog.obsolete[msgid] = message |
|---|
| 163 | else: |
|---|
| 164 | catalog[msgid] = message |
|---|
| 165 | del messages[:]; del translations[:]; del locations[:]; |
|---|
| 166 | del flags[:]; del auto_comments[:]; del user_comments[:] |
|---|
| 167 | obsolete[0] = False |
|---|
| 168 | counter[0] += 1 |
|---|
| 169 | |
|---|
| 170 | def _process_message_line(lineno, line): |
|---|
| 171 | if line.startswith('msgid_plural'): |
|---|
| 172 | in_msgid[0] = True |
|---|
| 173 | msg = line[12:].lstrip() |
|---|
| 174 | messages.append(msg) |
|---|
| 175 | elif line.startswith('msgid'): |
|---|
| 176 | in_msgid[0] = True |
|---|
| 177 | offset[0] = lineno |
|---|
| 178 | txt = line[5:].lstrip() |
|---|
| 179 | if messages: |
|---|
| 180 | _add_message() |
|---|
| 181 | messages.append(txt) |
|---|
| 182 | elif line.startswith('msgstr'): |
|---|
| 183 | in_msgid[0] = False |
|---|
| 184 | in_msgstr[0] = True |
|---|
| 185 | msg = line[6:].lstrip() |
|---|
| 186 | if msg.startswith('['): |
|---|
| 187 | idx, msg = msg[1:].split(']') |
|---|
| 188 | translations.append([int(idx), msg.lstrip()]) |
|---|
| 189 | else: |
|---|
| 190 | translations.append([0, msg]) |
|---|
| 191 | elif line.startswith('"'): |
|---|
| 192 | if in_msgid[0]: |
|---|
| 193 | messages[-1] += u'\n' + line.rstrip() |
|---|
| 194 | elif in_msgstr[0]: |
|---|
| 195 | translations[-1][1] += u'\n' + line.rstrip() |
|---|
| 196 | |
|---|
| 197 | for lineno, line in enumerate(fileobj.readlines()): |
|---|
| 198 | line = line.strip().decode(catalog.charset) |
|---|
| 199 | if line.startswith('#'): |
|---|
| 200 | in_msgid[0] = in_msgstr[0] = False |
|---|
| 201 | if messages and translations: |
|---|
| 202 | _add_message() |
|---|
| 203 | if line[1:].startswith(':'): |
|---|
| 204 | for location in line[2:].lstrip().split(): |
|---|
| 205 | pos = location.rfind(':') |
|---|
| 206 | if pos >= 0: |
|---|
| 207 | try: |
|---|
| 208 | lineno = int(location[pos + 1:]) |
|---|
| 209 | except ValueError: |
|---|
| 210 | continue |
|---|
| 211 | locations.append((location[:pos], lineno)) |
|---|
| 212 | elif line[1:].startswith(','): |
|---|
| 213 | for flag in line[2:].lstrip().split(','): |
|---|
| 214 | flags.append(flag.strip()) |
|---|
| 215 | elif line[1:].startswith('~'): |
|---|
| 216 | obsolete[0] = True |
|---|
| 217 | _process_message_line(lineno, line[2:].lstrip()) |
|---|
| 218 | elif line[1:].startswith('.'): |
|---|
| 219 | # These are called auto-comments |
|---|
| 220 | comment = line[2:].strip() |
|---|
| 221 | if comment: # Just check that we're not adding empty comments |
|---|
| 222 | auto_comments.append(comment) |
|---|
| 223 | else: |
|---|
| 224 | # These are called user comments |
|---|
| 225 | user_comments.append(line[1:].strip()) |
|---|
| 226 | else: |
|---|
| 227 | _process_message_line(lineno, line) |
|---|
| 228 | |
|---|
| 229 | if messages: |
|---|
| 230 | _add_message() |
|---|
| 231 | |
|---|
| 232 | # No actual messages found, but there was some info in comments, from which |
|---|
| 233 | # we'll construct an empty header message |
|---|
| 234 | elif not counter[0] and (flags or user_comments or auto_comments): |
|---|
| 235 | messages.append(u'') |
|---|
| 236 | translations.append([0, u'']) |
|---|
| 237 | _add_message() |
|---|
| 238 | |
|---|
| 239 | return catalog |
|---|
| 240 | |
|---|
| 241 | WORD_SEP = re.compile('(' |
|---|
| 242 | r'\s+|' # any whitespace |
|---|
| 243 | r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words |
|---|
| 244 | r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash |
|---|
| 245 | ')') |
|---|
| 246 | |
|---|
| 247 | def escape(string): |
|---|
| 248 | r"""Escape the given string so that it can be included in double-quoted |
|---|
| 249 | strings in ``PO`` files. |
|---|
| 250 | |
|---|
| 251 | >>> escape('''Say: |
|---|
| 252 | ... "hello, world!" |
|---|
| 253 | ... ''') |
|---|
| 254 | '"Say:\\n \\"hello, world!\\"\\n"' |
|---|
| 255 | |
|---|
| 256 | :param string: the string to escape |
|---|
| 257 | :return: the escaped string |
|---|
| 258 | :rtype: `str` or `unicode` |
|---|
| 259 | """ |
|---|
| 260 | return '"%s"' % string.replace('\\', '\\\\') \ |
|---|
| 261 | .replace('\t', '\\t') \ |
|---|
| 262 | .replace('\r', '\\r') \ |
|---|
| 263 | .replace('\n', '\\n') \ |
|---|
| 264 | .replace('\"', '\\"') |
|---|
| 265 | |
|---|
| 266 | def normalize(string, prefix='', width=76): |
|---|
| 267 | r"""Convert a string into a format that is appropriate for .po files. |
|---|
| 268 | |
|---|
| 269 | >>> print normalize('''Say: |
|---|
| 270 | ... "hello, world!" |
|---|
| 271 | ... ''', width=None) |
|---|
| 272 | "" |
|---|
| 273 | "Say:\n" |
|---|
| 274 | " \"hello, world!\"\n" |
|---|
| 275 | |
|---|
| 276 | >>> print normalize('''Say: |
|---|
| 277 | ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
|---|
| 278 | ... ''', width=32) |
|---|
| 279 | "" |
|---|
| 280 | "Say:\n" |
|---|
| 281 | " \"Lorem ipsum dolor sit " |
|---|
| 282 | "amet, consectetur adipisicing" |
|---|
| 283 | " elit, \"\n" |
|---|
| 284 | |
|---|
| 285 | :param string: the string to normalize |
|---|
| 286 | :param prefix: a string that should be prepended to every line |
|---|
| 287 | :param width: the maximum line width; use `None`, 0, or a negative number |
|---|
| 288 | to completely disable line wrapping |
|---|
| 289 | :return: the normalized string |
|---|
| 290 | :rtype: `unicode` |
|---|
| 291 | """ |
|---|
| 292 | if width and width > 0: |
|---|
| 293 | prefixlen = len(prefix) |
|---|
| 294 | lines = [] |
|---|
| 295 | for idx, line in enumerate(string.splitlines(True)): |
|---|
| 296 | if len(escape(line)) + prefixlen > width: |
|---|
| 297 | chunks = WORD_SEP.split(line) |
|---|
| 298 | chunks.reverse() |
|---|
| 299 | while chunks: |
|---|
| 300 | buf = [] |
|---|
| 301 | size = 2 |
|---|
| 302 | while chunks: |
|---|
| 303 | l = len(escape(chunks[-1])) - 2 + prefixlen |
|---|
| 304 | if size + l < width: |
|---|
| 305 | buf.append(chunks.pop()) |
|---|
| 306 | size += l |
|---|
| 307 | else: |
|---|
| 308 | if not buf: |
|---|
| 309 | # handle long chunks by putting them on a |
|---|
| 310 | # separate line |
|---|
| 311 | buf.append(chunks.pop()) |
|---|
| 312 | break |
|---|
| 313 | lines.append(u''.join(buf)) |
|---|
| 314 | else: |
|---|
| 315 | lines.append(line) |
|---|
| 316 | else: |
|---|
| 317 | lines = string.splitlines(True) |
|---|
| 318 | |
|---|
| 319 | if len(lines) <= 1: |
|---|
| 320 | return escape(string) |
|---|
| 321 | |
|---|
| 322 | # Remove empty trailing line |
|---|
| 323 | if lines and not lines[-1]: |
|---|
| 324 | del lines[-1] |
|---|
| 325 | lines[-1] += '\n' |
|---|
| 326 | return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) |
|---|
| 327 | |
|---|
| 328 | def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, |
|---|
| 329 | sort_output=False, sort_by_file=False, ignore_obsolete=False, |
|---|
| 330 | include_previous=False): |
|---|
| 331 | r"""Write a ``gettext`` PO (portable object) template file for a given |
|---|
| 332 | message catalog to the provided file-like object. |
|---|
| 333 | |
|---|
| 334 | >>> catalog = Catalog() |
|---|
| 335 | >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], |
|---|
| 336 | ... flags=('fuzzy',)) |
|---|
| 337 | >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) |
|---|
| 338 | >>> from StringIO import StringIO |
|---|
| 339 | >>> buf = StringIO() |
|---|
| 340 | >>> write_po(buf, catalog, omit_header=True) |
|---|
| 341 | >>> print buf.getvalue() |
|---|
| 342 | #: main.py:1 |
|---|
| 343 | #, fuzzy, python-format |
|---|
| 344 | msgid "foo %(name)s" |
|---|
| 345 | msgstr "" |
|---|
| 346 | <BLANKLINE> |
|---|
| 347 | #: main.py:3 |
|---|
| 348 | msgid "bar" |
|---|
| 349 | msgid_plural "baz" |
|---|
| 350 | msgstr[0] "" |
|---|
| 351 | msgstr[1] "" |
|---|
| 352 | <BLANKLINE> |
|---|
| 353 | <BLANKLINE> |
|---|
| 354 | |
|---|
| 355 | :param fileobj: the file-like object to write to |
|---|
| 356 | :param catalog: the `Catalog` instance |
|---|
| 357 | :param width: the maximum line width for the generated output; use `None`, |
|---|
| 358 | 0, or a negative number to completely disable line wrapping |
|---|
| 359 | :param no_location: do not emit a location comment for every message |
|---|
| 360 | :param omit_header: do not include the ``msgid ""`` entry at the top of the |
|---|
| 361 | output |
|---|
| 362 | :param sort_output: whether to sort the messages in the output by msgid |
|---|
| 363 | :param sort_by_file: whether to sort the messages in the output by their |
|---|
| 364 | locations |
|---|
| 365 | :param ignore_obsolete: whether to ignore obsolete messages and not include |
|---|
| 366 | them in the output; by default they are included as |
|---|
| 367 | comments |
|---|
| 368 | :param include_previous: include the old msgid as a comment when |
|---|
| 369 | updating the catalog |
|---|
| 370 | """ |
|---|
| 371 | def _normalize(key, prefix=''): |
|---|
| 372 | return normalize(key, prefix=prefix, width=width) \ |
|---|
| 373 | .encode(catalog.charset, 'backslashreplace') |
|---|
| 374 | |
|---|
| 375 | def _write(text): |
|---|
| 376 | if isinstance(text, unicode): |
|---|
| 377 | text = text.encode(catalog.charset) |
|---|
| 378 | fileobj.write(text) |
|---|
| 379 | |
|---|
| 380 | def _write_comment(comment, prefix=''): |
|---|
| 381 | lines = comment |
|---|
| 382 | if width and width > 0: |
|---|
| 383 | lines = wraptext(comment, width) |
|---|
| 384 | for line in lines: |
|---|
| 385 | _write('#%s %s\n' % (prefix, line.strip())) |
|---|
| 386 | |
|---|
| 387 | def _write_message(message, prefix=''): |
|---|
| 388 | if isinstance(message.id, (list, tuple)): |
|---|
| 389 | _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) |
|---|
| 390 | _write('%smsgid_plural %s\n' % ( |
|---|
| 391 | prefix, _normalize(message.id[1], prefix) |
|---|
| 392 | )) |
|---|
| 393 | |
|---|
| 394 | for idx in range(catalog.num_plurals): |
|---|
| 395 | try: |
|---|
| 396 | string = message.string[idx] |
|---|
| 397 | except IndexError: |
|---|
| 398 | string = '' |
|---|
| 399 | _write('%smsgstr[%d] %s\n' % ( |
|---|
| 400 | prefix, idx, _normalize(string, prefix) |
|---|
| 401 | )) |
|---|
| 402 | else: |
|---|
| 403 | _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) |
|---|
| 404 | _write('%smsgstr %s\n' % ( |
|---|
| 405 | prefix, _normalize(message.string or '', prefix) |
|---|
| 406 | )) |
|---|
| 407 | |
|---|
| 408 | messages = list(catalog) |
|---|
| 409 | if sort_output: |
|---|
| 410 | messages.sort() |
|---|
| 411 | elif sort_by_file: |
|---|
| 412 | messages.sort(lambda x,y: cmp(x.locations, y.locations)) |
|---|
| 413 | |
|---|
| 414 | for message in messages: |
|---|
| 415 | if not message.id: # This is the header "message" |
|---|
| 416 | if omit_header: |
|---|
| 417 | continue |
|---|
| 418 | comment_header = catalog.header_comment |
|---|
| 419 | if width and width > 0: |
|---|
| 420 | lines = [] |
|---|
| 421 | for line in comment_header.splitlines(): |
|---|
| 422 | lines += wraptext(line, width=width, |
|---|
| 423 | subsequent_indent='# ') |
|---|
| 424 | comment_header = u'\n'.join(lines) + u'\n' |
|---|
| 425 | _write(comment_header) |
|---|
| 426 | |
|---|
| 427 | for comment in message.user_comments: |
|---|
| 428 | _write_comment(comment) |
|---|
| 429 | for comment in message.auto_comments: |
|---|
| 430 | _write_comment(comment, prefix='.') |
|---|
| 431 | |
|---|
| 432 | if not no_location: |
|---|
| 433 | locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) |
|---|
| 434 | for filename, lineno in message.locations]) |
|---|
| 435 | _write_comment(locs, prefix=':') |
|---|
| 436 | if message.flags: |
|---|
| 437 | _write('#%s\n' % ', '.join([''] + list(message.flags))) |
|---|
| 438 | |
|---|
| 439 | if message.previous_id and include_previous: |
|---|
| 440 | _write_comment('msgid %s' % _normalize(message.previous_id[0]), |
|---|
| 441 | prefix='|') |
|---|
| 442 | if len(message.previous_id) > 1: |
|---|
| 443 | _write_comment('msgid_plural %s' % _normalize( |
|---|
| 444 | message.previous_id[1] |
|---|
| 445 | ), prefix='|') |
|---|
| 446 | |
|---|
| 447 | _write_message(message) |
|---|
| 448 | _write('\n') |
|---|
| 449 | |
|---|
| 450 | if not ignore_obsolete: |
|---|
| 451 | for message in catalog.obsolete.values(): |
|---|
| 452 | for comment in message.user_comments: |
|---|
| 453 | _write_comment(comment) |
|---|
| 454 | _write_message(message, prefix='#~ ') |
|---|
| 455 | _write('\n') |
|---|