[3] | 1 | # -*- coding: utf-8 -*- |
---|
| 2 | # |
---|
| 3 | # Copyright (C) 2007 Edgewall Software |
---|
| 4 | # All rights reserved. |
---|
| 5 | # |
---|
| 6 | # This software is licensed as described in the file COPYING, which |
---|
| 7 | # you should have received as part of this distribution. The terms |
---|
| 8 | # are also available at http://babel.edgewall.org/wiki/License. |
---|
| 9 | # |
---|
| 10 | # This software consists of voluntary contributions made by many |
---|
| 11 | # individuals. For the exact contribution history, see the revision |
---|
| 12 | # history and logs, available at http://babel.edgewall.org/log/. |
---|
| 13 | |
---|
| 14 | """Writing of files in the ``gettext`` MO (machine object) format. |
---|
| 15 | |
---|
| 16 | :since: version 0.9 |
---|
| 17 | :see: `The Format of MO Files |
---|
| 18 | <http://www.gnu.org/software/gettext/manual/gettext.html#MO-Files>`_ |
---|
| 19 | """ |
---|
| 20 | |
---|
| 21 | import array |
---|
| 22 | import struct |
---|
| 23 | |
---|
| 24 | __all__ = ['write_mo'] |
---|
| 25 | __docformat__ = 'restructuredtext en' |
---|
| 26 | |
---|
| 27 | def write_mo(fileobj, catalog, use_fuzzy=False): |
---|
| 28 | """Write a catalog to the specified file-like object using the GNU MO file |
---|
| 29 | format. |
---|
| 30 | |
---|
| 31 | >>> from babel.messages import Catalog |
---|
| 32 | >>> from gettext import GNUTranslations |
---|
| 33 | >>> from StringIO import StringIO |
---|
| 34 | |
---|
| 35 | >>> catalog = Catalog(locale='en_US') |
---|
| 36 | >>> catalog.add('foo', 'Voh') |
---|
| 37 | >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) |
---|
| 38 | >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) |
---|
| 39 | >>> catalog.add('Fizz', '') |
---|
| 40 | >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) |
---|
| 41 | >>> buf = StringIO() |
---|
| 42 | |
---|
| 43 | >>> write_mo(buf, catalog) |
---|
| 44 | >>> buf.seek(0) |
---|
| 45 | >>> translations = GNUTranslations(fp=buf) |
---|
| 46 | >>> translations.ugettext('foo') |
---|
| 47 | u'Voh' |
---|
| 48 | >>> translations.ungettext('bar', 'baz', 1) |
---|
| 49 | u'Bahr' |
---|
| 50 | >>> translations.ungettext('bar', 'baz', 2) |
---|
| 51 | u'Batz' |
---|
| 52 | >>> translations.ugettext('fuz') |
---|
| 53 | u'fuz' |
---|
| 54 | >>> translations.ugettext('Fizz') |
---|
| 55 | u'Fizz' |
---|
| 56 | >>> translations.ugettext('Fuzz') |
---|
| 57 | u'Fuzz' |
---|
| 58 | >>> translations.ugettext('Fuzzes') |
---|
| 59 | u'Fuzzes' |
---|
| 60 | |
---|
| 61 | :param fileobj: the file-like object to write to |
---|
| 62 | :param catalog: the `Catalog` instance |
---|
| 63 | :param use_fuzzy: whether translations marked as "fuzzy" should be included |
---|
| 64 | in the output |
---|
| 65 | """ |
---|
| 66 | messages = list(catalog) |
---|
| 67 | if not use_fuzzy: |
---|
| 68 | messages[1:] = [m for m in messages[1:] if not m.fuzzy] |
---|
| 69 | messages.sort() |
---|
| 70 | |
---|
| 71 | ids = strs = '' |
---|
| 72 | offsets = [] |
---|
| 73 | |
---|
| 74 | for message in messages: |
---|
| 75 | # For each string, we need size and file offset. Each string is NUL |
---|
| 76 | # terminated; the NUL does not count into the size. |
---|
| 77 | if message.pluralizable: |
---|
| 78 | msgid = '\x00'.join([ |
---|
| 79 | msgid.encode(catalog.charset) for msgid in message.id |
---|
| 80 | ]) |
---|
| 81 | msgstrs = [] |
---|
| 82 | for idx, string in enumerate(message.string): |
---|
| 83 | if not string: |
---|
| 84 | msgstrs.append(message.id[min(int(idx), 1)]) |
---|
| 85 | else: |
---|
| 86 | msgstrs.append(string) |
---|
| 87 | msgstr = '\x00'.join([ |
---|
| 88 | msgstr.encode(catalog.charset) for msgstr in msgstrs |
---|
| 89 | ]) |
---|
| 90 | else: |
---|
| 91 | msgid = message.id.encode(catalog.charset) |
---|
| 92 | if not message.string: |
---|
| 93 | msgstr = message.id.encode(catalog.charset) |
---|
| 94 | else: |
---|
| 95 | msgstr = message.string.encode(catalog.charset) |
---|
| 96 | offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) |
---|
| 97 | ids += msgid + '\x00' |
---|
| 98 | strs += msgstr + '\x00' |
---|
| 99 | |
---|
| 100 | # The header is 7 32-bit unsigned integers. We don't use hash tables, so |
---|
| 101 | # the keys start right after the index tables. |
---|
| 102 | keystart = 7 * 4 + 16 * len(messages) |
---|
| 103 | valuestart = keystart + len(ids) |
---|
| 104 | |
---|
| 105 | # The string table first has the list of keys, then the list of values. |
---|
| 106 | # Each entry has first the size of the string, then the file offset. |
---|
| 107 | koffsets = [] |
---|
| 108 | voffsets = [] |
---|
| 109 | for o1, l1, o2, l2 in offsets: |
---|
| 110 | koffsets += [l1, o1 + keystart] |
---|
| 111 | voffsets += [l2, o2 + valuestart] |
---|
| 112 | offsets = koffsets + voffsets |
---|
| 113 | |
---|
| 114 | fileobj.write(struct.pack('Iiiiiii', |
---|
| 115 | 0x950412deL, # magic |
---|
| 116 | 0, # version |
---|
| 117 | len(messages), # number of entries |
---|
| 118 | 7 * 4, # start of key index |
---|
| 119 | 7 * 4 + len(messages) * 8, # start of value index |
---|
| 120 | 0, 0 # size and offset of hash table |
---|
| 121 | ) + array.array("i", offsets).tostring() + ids + strs) |
---|