cmlenz@162: # -*- coding: utf-8 -*- cmlenz@162: # jruigrok@532: # Copyright (C) 2007-2011 Edgewall Software cmlenz@162: # All rights reserved. cmlenz@162: # cmlenz@162: # This software is licensed as described in the file COPYING, which cmlenz@162: # you should have received as part of this distribution. The terms cmlenz@162: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@162: # cmlenz@162: # This software consists of voluntary contributions made by many cmlenz@162: # individuals. For the exact contribution history, see the revision cmlenz@162: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@162: cmlenz@162: """Writing of files in the ``gettext`` MO (machine object) format. cmlenz@162: cmlenz@236: :since: version 0.9 cmlenz@162: :see: `The Format of MO Files cmlenz@162: `_ cmlenz@162: """ cmlenz@162: cmlenz@162: import array cmlenz@162: import struct cmlenz@162: cmlenz@336: from babel.messages.catalog import Catalog, Message cmlenz@336: cmlenz@336: __all__ = ['read_mo', 'write_mo'] cmlenz@163: __docformat__ = 'restructuredtext en' cmlenz@163: cmlenz@336: cmlenz@336: LE_MAGIC = 0x950412deL cmlenz@336: BE_MAGIC = 0xde120495L cmlenz@336: cmlenz@336: def read_mo(fileobj): cmlenz@336: """Read a binary MO file from the given file-like object and return a cmlenz@336: corresponding `Catalog` object. cmlenz@336: cmlenz@336: :param fileobj: the file-like object to read the MO file from cmlenz@336: :return: a catalog object representing the parsed MO file cmlenz@336: :rtype: `Catalog` cmlenz@336: cmlenz@336: :note: The implementation of this function is heavily based on the cmlenz@336: ``GNUTranslations._parse`` method of the ``gettext`` module in the cmlenz@336: standard library. cmlenz@336: """ cmlenz@336: catalog = Catalog() cmlenz@336: headers = {} cmlenz@336: cmlenz@336: filename = getattr(fileobj, 'name', '') cmlenz@336: cmlenz@336: buf = fileobj.read() cmlenz@336: buflen = len(buf) cmlenz@337: unpack = struct.unpack cmlenz@336: cmlenz@336: # Parse the .mo file header, which consists of 5 little endian 32 cmlenz@336: # bit words. cmlenz@336: magic = unpack('4I', buf[4:20]) cmlenz@336: ii = '>II' cmlenz@336: else: cmlenz@336: raise IOError(0, 'Bad magic number', filename) cmlenz@336: cmlenz@336: # Now put all messages from the .mo file buffer into the catalog cmlenz@336: # dictionary cmlenz@336: for i in xrange(0, msgcount): cmlenz@337: mlen, moff = unpack(ii, buf[origidx:origidx + 8]) cmlenz@336: mend = moff + mlen cmlenz@336: tlen, toff = unpack(ii, buf[transidx:transidx + 8]) cmlenz@336: tend = toff + tlen cmlenz@336: if mend < buflen and tend < buflen: cmlenz@336: msg = buf[moff:mend] cmlenz@336: tmsg = buf[toff:tend] cmlenz@336: else: cmlenz@336: raise IOError(0, 'File is corrupt', filename) cmlenz@336: cmlenz@336: # See if we're looking at GNU .mo conventions for metadata cmlenz@336: if mlen == 0: cmlenz@336: # Catalog description cmlenz@336: lastkey = key = None cmlenz@336: for item in tmsg.splitlines(): cmlenz@336: item = item.strip() cmlenz@336: if not item: cmlenz@336: continue cmlenz@336: if ':' in item: cmlenz@336: key, value = item.split(':', 1) cmlenz@336: lastkey = key = key.strip().lower() cmlenz@337: headers[key] = value.strip() cmlenz@336: elif lastkey: cmlenz@337: headers[lastkey] += '\n' + item cmlenz@336: cmlenz@337: if '\x04' in msg: # context cmlenz@337: ctxt, msg = msg.split('\x04') cmlenz@337: else: cmlenz@337: ctxt = None cmlenz@337: cmlenz@337: if '\x00' in msg: # plural forms cmlenz@336: msg = msg.split('\x00') cmlenz@336: tmsg = tmsg.split('\x00') cmlenz@337: if catalog.charset: cmlenz@337: msg = [x.decode(catalog.charset) for x in msg] cmlenz@337: tmsg = [x.decode(catalog.charset) for x in tmsg] cmlenz@336: else: cmlenz@337: if catalog.charset: cmlenz@337: msg = msg.decode(catalog.charset) cmlenz@337: tmsg = tmsg.decode(catalog.charset) cmlenz@337: catalog[msg] = Message(msg, tmsg, context=ctxt) cmlenz@336: cmlenz@336: # advance to next entry in the seek tables cmlenz@337: origidx += 8 cmlenz@336: transidx += 8 cmlenz@336: cmlenz@336: catalog.mime_headers = headers.items() cmlenz@336: return catalog cmlenz@336: cmlenz@162: def write_mo(fileobj, catalog, use_fuzzy=False): cmlenz@162: """Write a catalog to the specified file-like object using the GNU MO file cmlenz@162: format. cmlenz@162: cmlenz@162: >>> from babel.messages import Catalog cmlenz@162: >>> from gettext import GNUTranslations cmlenz@162: >>> from StringIO import StringIO cmlenz@162: cmlenz@162: >>> catalog = Catalog(locale='en_US') cmlenz@162: >>> catalog.add('foo', 'Voh') fschwarz@546: cmlenz@162: >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) fschwarz@546: cmlenz@162: >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) fschwarz@546: palgarvio@174: >>> catalog.add('Fizz', '') fschwarz@546: palgarvio@176: >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) fschwarz@546: cmlenz@162: >>> buf = StringIO() cmlenz@162: cmlenz@162: >>> write_mo(buf, catalog) cmlenz@162: >>> buf.seek(0) cmlenz@162: >>> translations = GNUTranslations(fp=buf) cmlenz@162: >>> translations.ugettext('foo') cmlenz@162: u'Voh' cmlenz@162: >>> translations.ungettext('bar', 'baz', 1) cmlenz@162: u'Bahr' cmlenz@162: >>> translations.ungettext('bar', 'baz', 2) cmlenz@162: u'Batz' cmlenz@162: >>> translations.ugettext('fuz') cmlenz@162: u'fuz' palgarvio@174: >>> translations.ugettext('Fizz') palgarvio@174: u'Fizz' palgarvio@176: >>> translations.ugettext('Fuzz') palgarvio@176: u'Fuzz' palgarvio@176: >>> translations.ugettext('Fuzzes') palgarvio@176: u'Fuzzes' cmlenz@162: cmlenz@162: :param fileobj: the file-like object to write to cmlenz@162: :param catalog: the `Catalog` instance cmlenz@162: :param use_fuzzy: whether translations marked as "fuzzy" should be included cmlenz@162: in the output cmlenz@162: """ cmlenz@162: messages = list(catalog) cmlenz@162: if not use_fuzzy: cmlenz@162: messages[1:] = [m for m in messages[1:] if not m.fuzzy] pjenvey@250: messages.sort() cmlenz@162: cmlenz@162: ids = strs = '' cmlenz@162: offsets = [] cmlenz@162: cmlenz@162: for message in messages: cmlenz@162: # For each string, we need size and file offset. Each string is NUL cmlenz@162: # terminated; the NUL does not count into the size. cmlenz@162: if message.pluralizable: cmlenz@162: msgid = '\x00'.join([ cmlenz@162: msgid.encode(catalog.charset) for msgid in message.id cmlenz@162: ]) palgarvio@175: msgstrs = [] palgarvio@175: for idx, string in enumerate(message.string): palgarvio@175: if not string: cmlenz@332: msgstrs.append(message.id[min(int(idx), 1)]) palgarvio@175: else: palgarvio@175: msgstrs.append(string) cmlenz@162: msgstr = '\x00'.join([ palgarvio@175: msgstr.encode(catalog.charset) for msgstr in msgstrs cmlenz@162: ]) cmlenz@162: else: cmlenz@162: msgid = message.id.encode(catalog.charset) palgarvio@174: if not message.string: palgarvio@174: msgstr = message.id.encode(catalog.charset) palgarvio@174: else: palgarvio@174: msgstr = message.string.encode(catalog.charset) cmlenz@337: if message.context: palgarvio@422: msgid = '\x04'.join([message.context.encode(catalog.charset), palgarvio@422: msgid]) cmlenz@162: offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) cmlenz@162: ids += msgid + '\x00' cmlenz@162: strs += msgstr + '\x00' cmlenz@162: cmlenz@162: # The header is 7 32-bit unsigned integers. We don't use hash tables, so cmlenz@162: # the keys start right after the index tables. cmlenz@162: keystart = 7 * 4 + 16 * len(messages) cmlenz@162: valuestart = keystart + len(ids) cmlenz@162: cmlenz@162: # The string table first has the list of keys, then the list of values. cmlenz@162: # Each entry has first the size of the string, then the file offset. cmlenz@162: koffsets = [] cmlenz@162: voffsets = [] cmlenz@162: for o1, l1, o2, l2 in offsets: cmlenz@162: koffsets += [l1, o1 + keystart] cmlenz@162: voffsets += [l2, o2 + valuestart] cmlenz@162: offsets = koffsets + voffsets cmlenz@162: cmlenz@162: fileobj.write(struct.pack('Iiiiiii', cmlenz@336: LE_MAGIC, # magic cmlenz@162: 0, # version cmlenz@162: len(messages), # number of entries cmlenz@162: 7 * 4, # start of key index cmlenz@162: 7 * 4 + len(messages) * 8, # start of value index cmlenz@162: 0, 0 # size and offset of hash table cmlenz@162: ) + array.array("i", offsets).tostring() + ids + strs)