# HG changeset patch # User cmlenz # Date 1213117552 0 # Node ID 6e86b862af57c0da112df95e2da476cdff0920e0 # Parent 355a977c92aa755077e7259046ce084e71e2e99f Add basic MO file reading in preparation for #54. diff --git a/babel/messages/mofile.py b/babel/messages/mofile.py --- a/babel/messages/mofile.py +++ b/babel/messages/mofile.py @@ -21,9 +21,109 @@ import array import struct -__all__ = ['write_mo'] +from babel.messages.catalog import Catalog, Message + +__all__ = ['read_mo', 'write_mo'] __docformat__ = 'restructuredtext en' + +LE_MAGIC = 0x950412deL +BE_MAGIC = 0xde120495L + +def read_mo(fileobj): + """Read a binary MO file from the given file-like object and return a + corresponding `Catalog` object. + + :param fileobj: the file-like object to read the MO file from + :return: a catalog object representing the parsed MO file + :rtype: `Catalog` + + :note: The implementation of this function is heavily based on the + ``GNUTranslations._parse`` method of the ``gettext`` module in the + standard library. + """ + catalog = Catalog() + headers = {} + + unpack = struct.unpack + filename = getattr(fileobj, 'name', '') + charset = None + + buf = fileobj.read() + buflen = len(buf) + + # Parse the .mo file header, which consists of 5 little endian 32 + # bit words. + magic = unpack('4I', buf[4:20]) + ii = '>II' + else: + raise IOError(0, 'Bad magic number', filename) + + # Now put all messages from the .mo file buffer into the catalog + # dictionary + for i in xrange(0, msgcount): + mlen, moff = unpack(ii, buf[masteridx:masteridx + 8]) + mend = moff + mlen + tlen, toff = unpack(ii, buf[transidx:transidx + 8]) + tend = toff + tlen + if mend < buflen and tend < buflen: + msg = buf[moff:mend] + tmsg = buf[toff:tend] + else: + raise IOError(0, 'File is corrupt', filename) + + # See if we're looking at GNU .mo conventions for metadata + if mlen == 0: + # Catalog description + lastkey = key = None + for item in tmsg.splitlines(): + item = item.strip() + if not item: + continue + if ':' in item: + key, value = item.split(':', 1) + lastkey = key = key.strip().lower() + value = value.strip() + headers[key] = value + if key == 'content-type': + charset = value.split('charset=')[1] + elif lastkey: + self._info[lastkey] += '\n' + item + + # Note: we unconditionally convert both msgids and msgstrs to + # Unicode using the character encoding specified in the charset + # parameter of the Content-Type header. The gettext documentation + # strongly encourages msgids to be us-ascii, but some appliations + # require alternative encodings (e.g. Zope's ZCML and ZPT). For + # traditional gettext applications, the msgid conversion will + # cause no problems since us-ascii should always be a subset of + # the charset encoding. We may want to fall back to 8-bit msgids + # if the Unicode conversion fails. + if '\x00' in msg: + # Plural forms + msg = msg.split('\x00') + tmsg = tmsg.split('\x00') + if charset: + msg = [unicode(x, charset) for x in msg] + tmsg = [unicode(x, charset) for x in tmsg] + else: + if charset: + msg = unicode(msg, charset) + tmsg = unicode(tmsg, charset) + catalog[msg] = Message(msg, tmsg) + + # advance to next entry in the seek tables + masteridx += 8 + transidx += 8 + + catalog.mime_headers = headers.items() + return catalog + def write_mo(fileobj, catalog, use_fuzzy=False): """Write a catalog to the specified file-like object using the GNU MO file format. @@ -112,7 +212,7 @@ offsets = koffsets + voffsets fileobj.write(struct.pack('Iiiiiii', - 0x950412deL, # magic + LE_MAGIC, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -122,8 +122,8 @@ means it's a template) :param domain: the message domain :param ignore_obsolete: whether to ignore obsolete messages in the input - :return: an iterator over ``(message, translation, location)`` tuples - :rtype: ``iterator`` + :return: a catalog object representing the parsed PO file + :rtype: `Catalog` """ catalog = Catalog(locale=locale, domain=domain) diff --git a/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..21b1727014d7301c78245ccf10b82e484b40c36b GIT binary patch literal 547 zc${63&u-H|5XQIVPbD8XAdVb{8>kwyc2hNMNtLv5P$V0o#Bha8JaLVj-Dr1R(bquY z^>`L0q*A1g^wZ4Fe7l6{sSXY%J?}xO}O&f={ie_I*ym6!hKZRo*NI@#)12-YBT( z`(#!s?ctq%i2h8v6&`Zsj#O%*@JGjj0v%mraO1ks-(bWh?I$T, 2007. +# +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: 2007-07-30 22:18+0200\n" +"Last-Translator: FULL NAME \n" +"Language-Team: de_DE \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 0.9dev-r245\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "Stange" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "Fuhstange" +msgstr[1] "Fuhstangen" + diff --git a/babel/messages/tests/mofile.py b/babel/messages/tests/mofile.py --- a/babel/messages/tests/mofile.py +++ b/babel/messages/tests/mofile.py @@ -13,11 +13,33 @@ import doctest import gettext +import os import unittest from StringIO import StringIO from babel.messages import mofile, Catalog + +class ReadMoTestCase(unittest.TestCase): + + def setUp(self): + self.datadir = os.path.join(os.path.dirname(__file__), 'data') + + def test_basics(self): + mo_file = open(os.path.join(self.datadir, 'project', 'i18n', 'de', + 'LC_MESSAGES', 'messages.mo')) + try: + catalog = mofile.read_mo(mo_file) + self.assertEqual(2, len(catalog)) + self.assertEqual('TestProject', catalog.project) + self.assertEqual('0.1', catalog.version) + self.assertEqual('Stange', catalog['bar'].string) + self.assertEqual(['Fuhstange', 'Fuhstangen'], + catalog['foobar'].string) + finally: + mo_file.close() + + class WriteMoTestCase(unittest.TestCase): def test_sorting(self): @@ -57,6 +79,7 @@ def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(mofile)) + suite.addTest(unittest.makeSuite(ReadMoTestCase)) suite.addTest(unittest.makeSuite(WriteMoTestCase)) return suite