| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | # |
|---|
| 3 | # Copyright (C) 2007 Edgewall Software |
|---|
| 4 | # All rights reserved. |
|---|
| 5 | # |
|---|
| 6 | # This software is licensed as described in the file COPYING, which |
|---|
| 7 | # you should have received as part of this distribution. The terms |
|---|
| 8 | # are also available at http://babel.edgewall.org/wiki/License. |
|---|
| 9 | # |
|---|
| 10 | # This software consists of voluntary contributions made by many |
|---|
| 11 | # individuals. For the exact contribution history, see the revision |
|---|
| 12 | # history and logs, available at http://babel.edgewall.org/log/. |
|---|
| 13 | |
|---|
| 14 | """Writing of files in the ``gettext`` MO (machine object) format. |
|---|
| 15 | |
|---|
| 16 | :since: version 0.9 |
|---|
| 17 | :see: `The Format of MO Files |
|---|
| 18 | <http://www.gnu.org/software/gettext/manual/gettext.html#MO-Files>`_ |
|---|
| 19 | """ |
|---|
| 20 | |
|---|
| 21 | import array |
|---|
| 22 | import struct |
|---|
| 23 | |
|---|
| 24 | __all__ = ['write_mo'] |
|---|
| 25 | __docformat__ = 'restructuredtext en' |
|---|
| 26 | |
|---|
| 27 | def write_mo(fileobj, catalog, use_fuzzy=False): |
|---|
| 28 | """Write a catalog to the specified file-like object using the GNU MO file |
|---|
| 29 | format. |
|---|
| 30 | |
|---|
| 31 | >>> from babel.messages import Catalog |
|---|
| 32 | >>> from gettext import GNUTranslations |
|---|
| 33 | >>> from StringIO import StringIO |
|---|
| 34 | |
|---|
| 35 | >>> catalog = Catalog(locale='en_US') |
|---|
| 36 | >>> catalog.add('foo', 'Voh') |
|---|
| 37 | >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) |
|---|
| 38 | >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) |
|---|
| 39 | >>> catalog.add('Fizz', '') |
|---|
| 40 | >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) |
|---|
| 41 | >>> buf = StringIO() |
|---|
| 42 | |
|---|
| 43 | >>> write_mo(buf, catalog) |
|---|
| 44 | >>> buf.seek(0) |
|---|
| 45 | >>> translations = GNUTranslations(fp=buf) |
|---|
| 46 | >>> translations.ugettext('foo') |
|---|
| 47 | u'Voh' |
|---|
| 48 | >>> translations.ungettext('bar', 'baz', 1) |
|---|
| 49 | u'Bahr' |
|---|
| 50 | >>> translations.ungettext('bar', 'baz', 2) |
|---|
| 51 | u'Batz' |
|---|
| 52 | >>> translations.ugettext('fuz') |
|---|
| 53 | u'fuz' |
|---|
| 54 | >>> translations.ugettext('Fizz') |
|---|
| 55 | u'Fizz' |
|---|
| 56 | >>> translations.ugettext('Fuzz') |
|---|
| 57 | u'Fuzz' |
|---|
| 58 | >>> translations.ugettext('Fuzzes') |
|---|
| 59 | u'Fuzzes' |
|---|
| 60 | |
|---|
| 61 | :param fileobj: the file-like object to write to |
|---|
| 62 | :param catalog: the `Catalog` instance |
|---|
| 63 | :param use_fuzzy: whether translations marked as "fuzzy" should be included |
|---|
| 64 | in the output |
|---|
| 65 | """ |
|---|
| 66 | messages = list(catalog) |
|---|
| 67 | if not use_fuzzy: |
|---|
| 68 | messages[1:] = [m for m in messages[1:] if not m.fuzzy] |
|---|
| 69 | messages.sort() |
|---|
| 70 | |
|---|
| 71 | ids = strs = '' |
|---|
| 72 | offsets = [] |
|---|
| 73 | |
|---|
| 74 | for message in messages: |
|---|
| 75 | # For each string, we need size and file offset. Each string is NUL |
|---|
| 76 | # terminated; the NUL does not count into the size. |
|---|
| 77 | if message.pluralizable: |
|---|
| 78 | msgid = '\x00'.join([ |
|---|
| 79 | msgid.encode(catalog.charset) for msgid in message.id |
|---|
| 80 | ]) |
|---|
| 81 | msgstrs = [] |
|---|
| 82 | for idx, string in enumerate(message.string): |
|---|
| 83 | if not string: |
|---|
| 84 | msgstrs.append(message.id[min(int(idx), 1)]) |
|---|
| 85 | else: |
|---|
| 86 | msgstrs.append(string) |
|---|
| 87 | msgstr = '\x00'.join([ |
|---|
| 88 | msgstr.encode(catalog.charset) for msgstr in msgstrs |
|---|
| 89 | ]) |
|---|
| 90 | else: |
|---|
| 91 | msgid = message.id.encode(catalog.charset) |
|---|
| 92 | if not message.string: |
|---|
| 93 | msgstr = message.id.encode(catalog.charset) |
|---|
| 94 | else: |
|---|
| 95 | msgstr = message.string.encode(catalog.charset) |
|---|
| 96 | offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) |
|---|
| 97 | ids += msgid + '\x00' |
|---|
| 98 | strs += msgstr + '\x00' |
|---|
| 99 | |
|---|
| 100 | # The header is 7 32-bit unsigned integers. We don't use hash tables, so |
|---|
| 101 | # the keys start right after the index tables. |
|---|
| 102 | keystart = 7 * 4 + 16 * len(messages) |
|---|
| 103 | valuestart = keystart + len(ids) |
|---|
| 104 | |
|---|
| 105 | # The string table first has the list of keys, then the list of values. |
|---|
| 106 | # Each entry has first the size of the string, then the file offset. |
|---|
| 107 | koffsets = [] |
|---|
| 108 | voffsets = [] |
|---|
| 109 | for o1, l1, o2, l2 in offsets: |
|---|
| 110 | koffsets += [l1, o1 + keystart] |
|---|
| 111 | voffsets += [l2, o2 + valuestart] |
|---|
| 112 | offsets = koffsets + voffsets |
|---|
| 113 | |
|---|
| 114 | fileobj.write(struct.pack('Iiiiiii', |
|---|
| 115 | 0x950412deL, # magic |
|---|
| 116 | 0, # version |
|---|
| 117 | len(messages), # number of entries |
|---|
| 118 | 7 * 4, # start of key index |
|---|
| 119 | 7 * 4 + len(messages) * 8, # start of value index |
|---|
| 120 | 0, 0 # size and offset of hash table |
|---|
| 121 | ) + array.array("i", offsets).tostring() + ids + strs) |
|---|