[3] | 1 | # -*- coding: utf-8 -*- |
---|
| 2 | # |
---|
| 3 | # Copyright (C) 2007 Edgewall Software |
---|
| 4 | # All rights reserved. |
---|
| 5 | # |
---|
| 6 | # This software is licensed as described in the file COPYING, which |
---|
| 7 | # you should have received as part of this distribution. The terms |
---|
| 8 | # are also available at http://babel.edgewall.org/wiki/License. |
---|
| 9 | # |
---|
| 10 | # This software consists of voluntary contributions made by many |
---|
| 11 | # individuals. For the exact contribution history, see the revision |
---|
| 12 | # history and logs, available at http://babel.edgewall.org/log/. |
---|
| 13 | |
---|
| 14 | """Reading and writing of files in the ``gettext`` PO (portable object) |
---|
| 15 | format. |
---|
| 16 | |
---|
| 17 | :see: `The Format of PO Files |
---|
| 18 | <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ |
---|
| 19 | """ |
---|
| 20 | |
---|
| 21 | from datetime import date, datetime |
---|
| 22 | import os |
---|
| 23 | import re |
---|
| 24 | try: |
---|
| 25 | set |
---|
| 26 | except NameError: |
---|
| 27 | from sets import Set as set |
---|
| 28 | |
---|
| 29 | from babel import __version__ as VERSION |
---|
| 30 | from babel.messages.catalog import Catalog, Message |
---|
| 31 | from babel.util import wraptext, LOCALTZ |
---|
| 32 | |
---|
| 33 | __all__ = ['read_po', 'write_po'] |
---|
| 34 | __docformat__ = 'restructuredtext en' |
---|
| 35 | |
---|
| 36 | def unescape(string): |
---|
| 37 | r"""Reverse `escape` the given string. |
---|
| 38 | |
---|
| 39 | >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') |
---|
| 40 | Say: |
---|
| 41 | "hello, world!" |
---|
| 42 | <BLANKLINE> |
---|
| 43 | |
---|
| 44 | :param string: the string to unescape |
---|
| 45 | :return: the unescaped string |
---|
| 46 | :rtype: `str` or `unicode` |
---|
| 47 | """ |
---|
| 48 | return string[1:-1].replace('\\\\', '\\') \ |
---|
| 49 | .replace('\\t', '\t') \ |
---|
| 50 | .replace('\\r', '\r') \ |
---|
| 51 | .replace('\\n', '\n') \ |
---|
| 52 | .replace('\\"', '\"') |
---|
| 53 | |
---|
| 54 | def denormalize(string): |
---|
| 55 | r"""Reverse the normalization done by the `normalize` function. |
---|
| 56 | |
---|
| 57 | >>> print denormalize(r'''"" |
---|
| 58 | ... "Say:\n" |
---|
| 59 | ... " \"hello, world!\"\n"''') |
---|
| 60 | Say: |
---|
| 61 | "hello, world!" |
---|
| 62 | <BLANKLINE> |
---|
| 63 | |
---|
| 64 | >>> print denormalize(r'''"" |
---|
| 65 | ... "Say:\n" |
---|
| 66 | ... " \"Lorem ipsum dolor sit " |
---|
| 67 | ... "amet, consectetur adipisicing" |
---|
| 68 | ... " elit, \"\n"''') |
---|
| 69 | Say: |
---|
| 70 | "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
---|
| 71 | <BLANKLINE> |
---|
| 72 | |
---|
| 73 | :param string: the string to denormalize |
---|
| 74 | :return: the denormalized string |
---|
| 75 | :rtype: `unicode` or `str` |
---|
| 76 | """ |
---|
| 77 | if string.startswith('""'): |
---|
| 78 | lines = [] |
---|
| 79 | for line in string.splitlines()[1:]: |
---|
| 80 | lines.append(unescape(line)) |
---|
| 81 | return ''.join(lines) |
---|
| 82 | else: |
---|
| 83 | return unescape(string) |
---|
| 84 | |
---|
| 85 | def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): |
---|
| 86 | """Read messages from a ``gettext`` PO (portable object) file from the given |
---|
| 87 | file-like object and return a `Catalog`. |
---|
| 88 | |
---|
| 89 | >>> from StringIO import StringIO |
---|
| 90 | >>> buf = StringIO(''' |
---|
| 91 | ... #: main.py:1 |
---|
| 92 | ... #, fuzzy, python-format |
---|
| 93 | ... msgid "foo %(name)s" |
---|
| 94 | ... msgstr "" |
---|
| 95 | ... |
---|
| 96 | ... # A user comment |
---|
| 97 | ... #. An auto comment |
---|
| 98 | ... #: main.py:3 |
---|
| 99 | ... msgid "bar" |
---|
| 100 | ... msgid_plural "baz" |
---|
| 101 | ... msgstr[0] "" |
---|
| 102 | ... msgstr[1] "" |
---|
| 103 | ... ''') |
---|
| 104 | >>> catalog = read_po(buf) |
---|
| 105 | >>> catalog.revision_date = datetime(2007, 04, 01) |
---|
| 106 | |
---|
| 107 | >>> for message in catalog: |
---|
| 108 | ... if message.id: |
---|
| 109 | ... print (message.id, message.string) |
---|
| 110 | ... print ' ', (message.locations, message.flags) |
---|
| 111 | ... print ' ', (message.user_comments, message.auto_comments) |
---|
| 112 | (u'foo %(name)s', '') |
---|
| 113 | ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) |
---|
| 114 | ([], []) |
---|
| 115 | ((u'bar', u'baz'), ('', '')) |
---|
| 116 | ([(u'main.py', 3)], set([])) |
---|
| 117 | ([u'A user comment'], [u'An auto comment']) |
---|
| 118 | |
---|
| 119 | :param fileobj: the file-like object to read the PO file from |
---|
| 120 | :param locale: the locale identifier or `Locale` object, or `None` |
---|
| 121 | if the catalog is not bound to a locale (which basically |
---|
| 122 | means it's a template) |
---|
| 123 | :param domain: the message domain |
---|
| 124 | :param ignore_obsolete: whether to ignore obsolete messages in the input |
---|
| 125 | :return: an iterator over ``(message, translation, location)`` tuples |
---|
| 126 | :rtype: ``iterator`` |
---|
| 127 | """ |
---|
| 128 | catalog = Catalog(locale=locale, domain=domain) |
---|
| 129 | |
---|
| 130 | counter = [0] |
---|
| 131 | offset = [0] |
---|
| 132 | messages = [] |
---|
| 133 | translations = [] |
---|
| 134 | locations = [] |
---|
| 135 | flags = [] |
---|
| 136 | user_comments = [] |
---|
| 137 | auto_comments = [] |
---|
| 138 | obsolete = [False] |
---|
| 139 | in_msgid = [False] |
---|
| 140 | in_msgstr = [False] |
---|
| 141 | |
---|
| 142 | def _add_message(): |
---|
| 143 | translations.sort() |
---|
| 144 | if len(messages) > 1: |
---|
| 145 | msgid = tuple([denormalize(m) for m in messages]) |
---|
| 146 | else: |
---|
| 147 | msgid = denormalize(messages[0]) |
---|
| 148 | if isinstance(msgid, (list, tuple)): |
---|
| 149 | string = [] |
---|
| 150 | for idx in range(catalog.num_plurals): |
---|
| 151 | try: |
---|
| 152 | string.append(translations[idx]) |
---|
| 153 | except IndexError: |
---|
| 154 | string.append((idx, '')) |
---|
| 155 | string = tuple([denormalize(t[1]) for t in string]) |
---|
| 156 | else: |
---|
| 157 | string = denormalize(translations[0][1]) |
---|
| 158 | message = Message(msgid, string, list(locations), set(flags), |
---|
| 159 | auto_comments, user_comments, lineno=offset[0] + 1) |
---|
| 160 | if obsolete[0]: |
---|
| 161 | if not ignore_obsolete: |
---|
| 162 | catalog.obsolete[msgid] = message |
---|
| 163 | else: |
---|
| 164 | catalog[msgid] = message |
---|
| 165 | del messages[:]; del translations[:]; del locations[:]; |
---|
| 166 | del flags[:]; del auto_comments[:]; del user_comments[:] |
---|
| 167 | obsolete[0] = False |
---|
| 168 | counter[0] += 1 |
---|
| 169 | |
---|
| 170 | def _process_message_line(lineno, line): |
---|
| 171 | if line.startswith('msgid_plural'): |
---|
| 172 | in_msgid[0] = True |
---|
| 173 | msg = line[12:].lstrip() |
---|
| 174 | messages.append(msg) |
---|
| 175 | elif line.startswith('msgid'): |
---|
| 176 | in_msgid[0] = True |
---|
| 177 | offset[0] = lineno |
---|
| 178 | txt = line[5:].lstrip() |
---|
| 179 | if messages: |
---|
| 180 | _add_message() |
---|
| 181 | messages.append(txt) |
---|
| 182 | elif line.startswith('msgstr'): |
---|
| 183 | in_msgid[0] = False |
---|
| 184 | in_msgstr[0] = True |
---|
| 185 | msg = line[6:].lstrip() |
---|
| 186 | if msg.startswith('['): |
---|
| 187 | idx, msg = msg[1:].split(']') |
---|
| 188 | translations.append([int(idx), msg.lstrip()]) |
---|
| 189 | else: |
---|
| 190 | translations.append([0, msg]) |
---|
| 191 | elif line.startswith('"'): |
---|
| 192 | if in_msgid[0]: |
---|
| 193 | messages[-1] += u'\n' + line.rstrip() |
---|
| 194 | elif in_msgstr[0]: |
---|
| 195 | translations[-1][1] += u'\n' + line.rstrip() |
---|
| 196 | |
---|
| 197 | for lineno, line in enumerate(fileobj.readlines()): |
---|
| 198 | line = line.strip().decode(catalog.charset) |
---|
| 199 | if line.startswith('#'): |
---|
| 200 | in_msgid[0] = in_msgstr[0] = False |
---|
| 201 | if messages and translations: |
---|
| 202 | _add_message() |
---|
| 203 | if line[1:].startswith(':'): |
---|
| 204 | for location in line[2:].lstrip().split(): |
---|
| 205 | pos = location.rfind(':') |
---|
| 206 | if pos >= 0: |
---|
| 207 | try: |
---|
| 208 | lineno = int(location[pos + 1:]) |
---|
| 209 | except ValueError: |
---|
| 210 | continue |
---|
| 211 | locations.append((location[:pos], lineno)) |
---|
| 212 | elif line[1:].startswith(','): |
---|
| 213 | for flag in line[2:].lstrip().split(','): |
---|
| 214 | flags.append(flag.strip()) |
---|
| 215 | elif line[1:].startswith('~'): |
---|
| 216 | obsolete[0] = True |
---|
| 217 | _process_message_line(lineno, line[2:].lstrip()) |
---|
| 218 | elif line[1:].startswith('.'): |
---|
| 219 | # These are called auto-comments |
---|
| 220 | comment = line[2:].strip() |
---|
| 221 | if comment: # Just check that we're not adding empty comments |
---|
| 222 | auto_comments.append(comment) |
---|
| 223 | else: |
---|
| 224 | # These are called user comments |
---|
| 225 | user_comments.append(line[1:].strip()) |
---|
| 226 | else: |
---|
| 227 | _process_message_line(lineno, line) |
---|
| 228 | |
---|
| 229 | if messages: |
---|
| 230 | _add_message() |
---|
| 231 | |
---|
| 232 | # No actual messages found, but there was some info in comments, from which |
---|
| 233 | # we'll construct an empty header message |
---|
| 234 | elif not counter[0] and (flags or user_comments or auto_comments): |
---|
| 235 | messages.append(u'') |
---|
| 236 | translations.append([0, u'']) |
---|
| 237 | _add_message() |
---|
| 238 | |
---|
| 239 | return catalog |
---|
| 240 | |
---|
| 241 | WORD_SEP = re.compile('(' |
---|
| 242 | r'\s+|' # any whitespace |
---|
| 243 | r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words |
---|
| 244 | r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash |
---|
| 245 | ')') |
---|
| 246 | |
---|
| 247 | def escape(string): |
---|
| 248 | r"""Escape the given string so that it can be included in double-quoted |
---|
| 249 | strings in ``PO`` files. |
---|
| 250 | |
---|
| 251 | >>> escape('''Say: |
---|
| 252 | ... "hello, world!" |
---|
| 253 | ... ''') |
---|
| 254 | '"Say:\\n \\"hello, world!\\"\\n"' |
---|
| 255 | |
---|
| 256 | :param string: the string to escape |
---|
| 257 | :return: the escaped string |
---|
| 258 | :rtype: `str` or `unicode` |
---|
| 259 | """ |
---|
| 260 | return '"%s"' % string.replace('\\', '\\\\') \ |
---|
| 261 | .replace('\t', '\\t') \ |
---|
| 262 | .replace('\r', '\\r') \ |
---|
| 263 | .replace('\n', '\\n') \ |
---|
| 264 | .replace('\"', '\\"') |
---|
| 265 | |
---|
| 266 | def normalize(string, prefix='', width=76): |
---|
| 267 | r"""Convert a string into a format that is appropriate for .po files. |
---|
| 268 | |
---|
| 269 | >>> print normalize('''Say: |
---|
| 270 | ... "hello, world!" |
---|
| 271 | ... ''', width=None) |
---|
| 272 | "" |
---|
| 273 | "Say:\n" |
---|
| 274 | " \"hello, world!\"\n" |
---|
| 275 | |
---|
| 276 | >>> print normalize('''Say: |
---|
| 277 | ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
---|
| 278 | ... ''', width=32) |
---|
| 279 | "" |
---|
| 280 | "Say:\n" |
---|
| 281 | " \"Lorem ipsum dolor sit " |
---|
| 282 | "amet, consectetur adipisicing" |
---|
| 283 | " elit, \"\n" |
---|
| 284 | |
---|
| 285 | :param string: the string to normalize |
---|
| 286 | :param prefix: a string that should be prepended to every line |
---|
| 287 | :param width: the maximum line width; use `None`, 0, or a negative number |
---|
| 288 | to completely disable line wrapping |
---|
| 289 | :return: the normalized string |
---|
| 290 | :rtype: `unicode` |
---|
| 291 | """ |
---|
| 292 | if width and width > 0: |
---|
| 293 | prefixlen = len(prefix) |
---|
| 294 | lines = [] |
---|
| 295 | for idx, line in enumerate(string.splitlines(True)): |
---|
| 296 | if len(escape(line)) + prefixlen > width: |
---|
| 297 | chunks = WORD_SEP.split(line) |
---|
| 298 | chunks.reverse() |
---|
| 299 | while chunks: |
---|
| 300 | buf = [] |
---|
| 301 | size = 2 |
---|
| 302 | while chunks: |
---|
| 303 | l = len(escape(chunks[-1])) - 2 + prefixlen |
---|
| 304 | if size + l < width: |
---|
| 305 | buf.append(chunks.pop()) |
---|
| 306 | size += l |
---|
| 307 | else: |
---|
| 308 | if not buf: |
---|
| 309 | # handle long chunks by putting them on a |
---|
| 310 | # separate line |
---|
| 311 | buf.append(chunks.pop()) |
---|
| 312 | break |
---|
| 313 | lines.append(u''.join(buf)) |
---|
| 314 | else: |
---|
| 315 | lines.append(line) |
---|
| 316 | else: |
---|
| 317 | lines = string.splitlines(True) |
---|
| 318 | |
---|
| 319 | if len(lines) <= 1: |
---|
| 320 | return escape(string) |
---|
| 321 | |
---|
| 322 | # Remove empty trailing line |
---|
| 323 | if lines and not lines[-1]: |
---|
| 324 | del lines[-1] |
---|
| 325 | lines[-1] += '\n' |
---|
| 326 | return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) |
---|
| 327 | |
---|
| 328 | def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, |
---|
| 329 | sort_output=False, sort_by_file=False, ignore_obsolete=False, |
---|
| 330 | include_previous=False): |
---|
| 331 | r"""Write a ``gettext`` PO (portable object) template file for a given |
---|
| 332 | message catalog to the provided file-like object. |
---|
| 333 | |
---|
| 334 | >>> catalog = Catalog() |
---|
| 335 | >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], |
---|
| 336 | ... flags=('fuzzy',)) |
---|
| 337 | >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) |
---|
| 338 | >>> from StringIO import StringIO |
---|
| 339 | >>> buf = StringIO() |
---|
| 340 | >>> write_po(buf, catalog, omit_header=True) |
---|
| 341 | >>> print buf.getvalue() |
---|
| 342 | #: main.py:1 |
---|
| 343 | #, fuzzy, python-format |
---|
| 344 | msgid "foo %(name)s" |
---|
| 345 | msgstr "" |
---|
| 346 | <BLANKLINE> |
---|
| 347 | #: main.py:3 |
---|
| 348 | msgid "bar" |
---|
| 349 | msgid_plural "baz" |
---|
| 350 | msgstr[0] "" |
---|
| 351 | msgstr[1] "" |
---|
| 352 | <BLANKLINE> |
---|
| 353 | <BLANKLINE> |
---|
| 354 | |
---|
| 355 | :param fileobj: the file-like object to write to |
---|
| 356 | :param catalog: the `Catalog` instance |
---|
| 357 | :param width: the maximum line width for the generated output; use `None`, |
---|
| 358 | 0, or a negative number to completely disable line wrapping |
---|
| 359 | :param no_location: do not emit a location comment for every message |
---|
| 360 | :param omit_header: do not include the ``msgid ""`` entry at the top of the |
---|
| 361 | output |
---|
| 362 | :param sort_output: whether to sort the messages in the output by msgid |
---|
| 363 | :param sort_by_file: whether to sort the messages in the output by their |
---|
| 364 | locations |
---|
| 365 | :param ignore_obsolete: whether to ignore obsolete messages and not include |
---|
| 366 | them in the output; by default they are included as |
---|
| 367 | comments |
---|
| 368 | :param include_previous: include the old msgid as a comment when |
---|
| 369 | updating the catalog |
---|
| 370 | """ |
---|
| 371 | def _normalize(key, prefix=''): |
---|
| 372 | return normalize(key, prefix=prefix, width=width) \ |
---|
| 373 | .encode(catalog.charset, 'backslashreplace') |
---|
| 374 | |
---|
| 375 | def _write(text): |
---|
| 376 | if isinstance(text, unicode): |
---|
| 377 | text = text.encode(catalog.charset) |
---|
| 378 | fileobj.write(text) |
---|
| 379 | |
---|
| 380 | def _write_comment(comment, prefix=''): |
---|
| 381 | lines = comment |
---|
| 382 | if width and width > 0: |
---|
| 383 | lines = wraptext(comment, width) |
---|
| 384 | for line in lines: |
---|
| 385 | _write('#%s %s\n' % (prefix, line.strip())) |
---|
| 386 | |
---|
| 387 | def _write_message(message, prefix=''): |
---|
| 388 | if isinstance(message.id, (list, tuple)): |
---|
| 389 | _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) |
---|
| 390 | _write('%smsgid_plural %s\n' % ( |
---|
| 391 | prefix, _normalize(message.id[1], prefix) |
---|
| 392 | )) |
---|
| 393 | |
---|
| 394 | for idx in range(catalog.num_plurals): |
---|
| 395 | try: |
---|
| 396 | string = message.string[idx] |
---|
| 397 | except IndexError: |
---|
| 398 | string = '' |
---|
| 399 | _write('%smsgstr[%d] %s\n' % ( |
---|
| 400 | prefix, idx, _normalize(string, prefix) |
---|
| 401 | )) |
---|
| 402 | else: |
---|
| 403 | _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) |
---|
| 404 | _write('%smsgstr %s\n' % ( |
---|
| 405 | prefix, _normalize(message.string or '', prefix) |
---|
| 406 | )) |
---|
| 407 | |
---|
| 408 | messages = list(catalog) |
---|
| 409 | if sort_output: |
---|
| 410 | messages.sort() |
---|
| 411 | elif sort_by_file: |
---|
| 412 | messages.sort(lambda x,y: cmp(x.locations, y.locations)) |
---|
| 413 | |
---|
| 414 | for message in messages: |
---|
| 415 | if not message.id: # This is the header "message" |
---|
| 416 | if omit_header: |
---|
| 417 | continue |
---|
| 418 | comment_header = catalog.header_comment |
---|
| 419 | if width and width > 0: |
---|
| 420 | lines = [] |
---|
| 421 | for line in comment_header.splitlines(): |
---|
| 422 | lines += wraptext(line, width=width, |
---|
| 423 | subsequent_indent='# ') |
---|
| 424 | comment_header = u'\n'.join(lines) + u'\n' |
---|
| 425 | _write(comment_header) |
---|
| 426 | |
---|
| 427 | for comment in message.user_comments: |
---|
| 428 | _write_comment(comment) |
---|
| 429 | for comment in message.auto_comments: |
---|
| 430 | _write_comment(comment, prefix='.') |
---|
| 431 | |
---|
| 432 | if not no_location: |
---|
| 433 | locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) |
---|
| 434 | for filename, lineno in message.locations]) |
---|
| 435 | _write_comment(locs, prefix=':') |
---|
| 436 | if message.flags: |
---|
| 437 | _write('#%s\n' % ', '.join([''] + list(message.flags))) |
---|
| 438 | |
---|
| 439 | if message.previous_id and include_previous: |
---|
| 440 | _write_comment('msgid %s' % _normalize(message.previous_id[0]), |
---|
| 441 | prefix='|') |
---|
| 442 | if len(message.previous_id) > 1: |
---|
| 443 | _write_comment('msgid_plural %s' % _normalize( |
---|
| 444 | message.previous_id[1] |
---|
| 445 | ), prefix='|') |
---|
| 446 | |
---|
| 447 | _write_message(message) |
---|
| 448 | _write('\n') |
---|
| 449 | |
---|
| 450 | if not ignore_obsolete: |
---|
| 451 | for message in catalog.obsolete.values(): |
---|
| 452 | for comment in message.user_comments: |
---|
| 453 | _write_comment(comment) |
---|
| 454 | _write_message(message, prefix='#~ ') |
---|
| 455 | _write('\n') |
---|