cmlenz@160: # -*- coding: utf-8 -*- cmlenz@160: # jruigrok@530: # Copyright (C) 2007-2011 Edgewall Software cmlenz@160: # All rights reserved. cmlenz@160: # cmlenz@160: # This software is licensed as described in the file COPYING, which cmlenz@160: # you should have received as part of this distribution. The terms cmlenz@160: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@160: # cmlenz@160: # This software consists of voluntary contributions made by many cmlenz@160: # individuals. For the exact contribution history, see the revision cmlenz@160: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@160: cmlenz@160: """Writing of files in the ``gettext`` MO (machine object) format. cmlenz@160: cmlenz@234: :since: version 0.9 cmlenz@160: :see: `The Format of MO Files cmlenz@160: `_ cmlenz@160: """ cmlenz@160: cmlenz@160: import array cmlenz@160: import struct cmlenz@160: cmlenz@334: from babel.messages.catalog import Catalog, Message cmlenz@334: cmlenz@334: __all__ = ['read_mo', 'write_mo'] cmlenz@161: __docformat__ = 'restructuredtext en' cmlenz@161: cmlenz@334: cmlenz@334: LE_MAGIC = 0x950412deL cmlenz@334: BE_MAGIC = 0xde120495L cmlenz@334: cmlenz@334: def read_mo(fileobj): cmlenz@334: """Read a binary MO file from the given file-like object and return a cmlenz@334: corresponding `Catalog` object. cmlenz@334: cmlenz@334: :param fileobj: the file-like object to read the MO file from cmlenz@334: :return: a catalog object representing the parsed MO file cmlenz@334: :rtype: `Catalog` cmlenz@334: cmlenz@334: :note: The implementation of this function is heavily based on the cmlenz@334: ``GNUTranslations._parse`` method of the ``gettext`` module in the cmlenz@334: standard library. cmlenz@334: """ cmlenz@334: catalog = Catalog() cmlenz@334: headers = {} cmlenz@334: cmlenz@334: filename = getattr(fileobj, 'name', '') cmlenz@334: cmlenz@334: buf = fileobj.read() cmlenz@334: buflen = len(buf) cmlenz@335: unpack = struct.unpack cmlenz@334: cmlenz@334: # Parse the .mo file header, which consists of 5 little endian 32 cmlenz@334: # bit words. cmlenz@334: magic = unpack('4I', buf[4:20]) cmlenz@334: ii = '>II' cmlenz@334: else: cmlenz@334: raise IOError(0, 'Bad magic number', filename) cmlenz@334: cmlenz@334: # Now put all messages from the .mo file buffer into the catalog cmlenz@334: # dictionary cmlenz@334: for i in xrange(0, msgcount): cmlenz@335: mlen, moff = unpack(ii, buf[origidx:origidx + 8]) cmlenz@334: mend = moff + mlen cmlenz@334: tlen, toff = unpack(ii, buf[transidx:transidx + 8]) cmlenz@334: tend = toff + tlen cmlenz@334: if mend < buflen and tend < buflen: cmlenz@334: msg = buf[moff:mend] cmlenz@334: tmsg = buf[toff:tend] cmlenz@334: else: cmlenz@334: raise IOError(0, 'File is corrupt', filename) cmlenz@334: cmlenz@334: # See if we're looking at GNU .mo conventions for metadata cmlenz@334: if mlen == 0: cmlenz@334: # Catalog description cmlenz@334: lastkey = key = None cmlenz@334: for item in tmsg.splitlines(): cmlenz@334: item = item.strip() cmlenz@334: if not item: cmlenz@334: continue cmlenz@334: if ':' in item: cmlenz@334: key, value = item.split(':', 1) cmlenz@334: lastkey = key = key.strip().lower() cmlenz@335: headers[key] = value.strip() cmlenz@334: elif lastkey: cmlenz@335: headers[lastkey] += '\n' + item cmlenz@334: cmlenz@335: if '\x04' in msg: # context cmlenz@335: ctxt, msg = msg.split('\x04') cmlenz@335: else: cmlenz@335: ctxt = None cmlenz@335: cmlenz@335: if '\x00' in msg: # plural forms cmlenz@334: msg = msg.split('\x00') cmlenz@334: tmsg = tmsg.split('\x00') cmlenz@335: if catalog.charset: cmlenz@335: msg = [x.decode(catalog.charset) for x in msg] cmlenz@335: tmsg = [x.decode(catalog.charset) for x in tmsg] cmlenz@334: else: cmlenz@335: if catalog.charset: cmlenz@335: msg = msg.decode(catalog.charset) cmlenz@335: tmsg = tmsg.decode(catalog.charset) cmlenz@335: catalog[msg] = Message(msg, tmsg, context=ctxt) cmlenz@334: cmlenz@334: # advance to next entry in the seek tables cmlenz@335: origidx += 8 cmlenz@334: transidx += 8 cmlenz@334: cmlenz@334: catalog.mime_headers = headers.items() cmlenz@334: return catalog cmlenz@334: cmlenz@160: def write_mo(fileobj, catalog, use_fuzzy=False): cmlenz@160: """Write a catalog to the specified file-like object using the GNU MO file cmlenz@160: format. cmlenz@160: cmlenz@160: >>> from babel.messages import Catalog cmlenz@160: >>> from gettext import GNUTranslations cmlenz@160: >>> from StringIO import StringIO cmlenz@160: cmlenz@160: >>> catalog = Catalog(locale='en_US') cmlenz@160: >>> catalog.add('foo', 'Voh') fschwarz@544: cmlenz@160: >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) fschwarz@544: cmlenz@160: >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) fschwarz@544: palgarvio@172: >>> catalog.add('Fizz', '') fschwarz@544: palgarvio@174: >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) fschwarz@544: cmlenz@160: >>> buf = StringIO() cmlenz@160: cmlenz@160: >>> write_mo(buf, catalog) cmlenz@160: >>> buf.seek(0) cmlenz@160: >>> translations = GNUTranslations(fp=buf) cmlenz@160: >>> translations.ugettext('foo') cmlenz@160: u'Voh' cmlenz@160: >>> translations.ungettext('bar', 'baz', 1) cmlenz@160: u'Bahr' cmlenz@160: >>> translations.ungettext('bar', 'baz', 2) cmlenz@160: u'Batz' cmlenz@160: >>> translations.ugettext('fuz') cmlenz@160: u'fuz' palgarvio@172: >>> translations.ugettext('Fizz') palgarvio@172: u'Fizz' palgarvio@174: >>> translations.ugettext('Fuzz') palgarvio@174: u'Fuzz' palgarvio@174: >>> translations.ugettext('Fuzzes') palgarvio@174: u'Fuzzes' cmlenz@160: cmlenz@160: :param fileobj: the file-like object to write to cmlenz@160: :param catalog: the `Catalog` instance cmlenz@160: :param use_fuzzy: whether translations marked as "fuzzy" should be included cmlenz@160: in the output cmlenz@160: """ cmlenz@160: messages = list(catalog) cmlenz@160: if not use_fuzzy: cmlenz@160: messages[1:] = [m for m in messages[1:] if not m.fuzzy] pjenvey@248: messages.sort() cmlenz@160: cmlenz@160: ids = strs = '' cmlenz@160: offsets = [] cmlenz@160: cmlenz@160: for message in messages: cmlenz@160: # For each string, we need size and file offset. Each string is NUL cmlenz@160: # terminated; the NUL does not count into the size. cmlenz@160: if message.pluralizable: cmlenz@160: msgid = '\x00'.join([ cmlenz@160: msgid.encode(catalog.charset) for msgid in message.id cmlenz@160: ]) palgarvio@173: msgstrs = [] palgarvio@173: for idx, string in enumerate(message.string): palgarvio@173: if not string: cmlenz@330: msgstrs.append(message.id[min(int(idx), 1)]) palgarvio@173: else: palgarvio@173: msgstrs.append(string) cmlenz@160: msgstr = '\x00'.join([ palgarvio@173: msgstr.encode(catalog.charset) for msgstr in msgstrs cmlenz@160: ]) cmlenz@160: else: cmlenz@160: msgid = message.id.encode(catalog.charset) palgarvio@172: if not message.string: palgarvio@172: msgstr = message.id.encode(catalog.charset) palgarvio@172: else: palgarvio@172: msgstr = message.string.encode(catalog.charset) cmlenz@335: if message.context: palgarvio@420: msgid = '\x04'.join([message.context.encode(catalog.charset), palgarvio@420: msgid]) cmlenz@160: offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) cmlenz@160: ids += msgid + '\x00' cmlenz@160: strs += msgstr + '\x00' cmlenz@160: cmlenz@160: # The header is 7 32-bit unsigned integers. We don't use hash tables, so cmlenz@160: # the keys start right after the index tables. cmlenz@160: keystart = 7 * 4 + 16 * len(messages) cmlenz@160: valuestart = keystart + len(ids) cmlenz@160: cmlenz@160: # The string table first has the list of keys, then the list of values. cmlenz@160: # Each entry has first the size of the string, then the file offset. cmlenz@160: koffsets = [] cmlenz@160: voffsets = [] cmlenz@160: for o1, l1, o2, l2 in offsets: cmlenz@160: koffsets += [l1, o1 + keystart] cmlenz@160: voffsets += [l2, o2 + valuestart] cmlenz@160: offsets = koffsets + voffsets cmlenz@160: cmlenz@160: fileobj.write(struct.pack('Iiiiiii', cmlenz@334: LE_MAGIC, # magic cmlenz@160: 0, # version cmlenz@160: len(messages), # number of entries cmlenz@160: 7 * 4, # start of key index cmlenz@160: 7 * 4 + len(messages) * 8, # start of value index cmlenz@160: 0, 0 # size and offset of hash table cmlenz@160: ) + array.array("i", offsets).tostring() + ids + strs)