# -*- coding: utf-8 -*- # # Copyright (C) 2007 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. """Writing of files in the ``gettext`` MO (machine object) format. :since: version 0.9 :see: `The Format of MO Files `_ """ import array import struct __all__ = ['write_mo'] __docformat__ = 'restructuredtext en' def write_mo(fileobj, catalog, use_fuzzy=False): """Write a catalog to the specified file-like object using the GNU MO file format. >>> from babel.messages import Catalog >>> from gettext import GNUTranslations >>> from StringIO import StringIO >>> catalog = Catalog(locale='en_US') >>> catalog.add('foo', 'Voh') >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) >>> catalog.add('Fizz', '') >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) >>> buf = StringIO() >>> write_mo(buf, catalog) >>> buf.seek(0) >>> translations = GNUTranslations(fp=buf) >>> translations.ugettext('foo') u'Voh' >>> translations.ungettext('bar', 'baz', 1) u'Bahr' >>> translations.ungettext('bar', 'baz', 2) u'Batz' >>> translations.ugettext('fuz') u'fuz' >>> translations.ugettext('Fizz') u'Fizz' >>> translations.ugettext('Fuzz') u'Fuzz' >>> translations.ugettext('Fuzzes') u'Fuzzes' :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance :param use_fuzzy: whether translations marked as "fuzzy" should be included in the output """ messages = list(catalog) if not use_fuzzy: messages[1:] = [m for m in messages[1:] if not m.fuzzy] messages.sort() ids = strs = '' offsets = [] for message in messages: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. if message.pluralizable: msgid = '\x00'.join([ msgid.encode(catalog.charset) for msgid in message.id ]) msgstrs = [] for idx, string in enumerate(message.string): if not string: msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) msgstr = '\x00'.join([ msgstr.encode(catalog.charset) for msgstr in msgstrs ]) else: msgid = message.id.encode(catalog.charset) if not message.string: msgstr = message.id.encode(catalog.charset) else: msgstr = message.string.encode(catalog.charset) offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) ids += msgid + '\x00' strs += msgstr + '\x00' # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. keystart = 7 * 4 + 16 * len(messages) valuestart = keystart + len(ids) # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. koffsets = [] voffsets = [] for o1, l1, o2, l2 in offsets: koffsets += [l1, o1 + keystart] voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets fileobj.write(struct.pack('Iiiiiii', 0x950412deL, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index 7 * 4 + len(messages) * 8, # start of value index 0, 0 # size and offset of hash table ) + array.array("i", offsets).tostring() + ids + strs)