changeset 336:6e86b862af57

Add basic MO file reading in preparation for #54.
author cmlenz
date Tue, 10 Jun 2008 17:05:52 +0000
parents 355a977c92aa
children 662d332c0a2b
files babel/messages/mofile.py babel/messages/pofile.py babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.mo babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.po babel/messages/tests/mofile.py
diffstat 5 files changed, 159 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/mofile.py
+++ b/babel/messages/mofile.py
@@ -21,9 +21,109 @@
 import array
 import struct
 
-__all__ = ['write_mo']
+from babel.messages.catalog import Catalog, Message
+
+__all__ = ['read_mo', 'write_mo']
 __docformat__ = 'restructuredtext en'
 
+
+LE_MAGIC = 0x950412deL
+BE_MAGIC = 0xde120495L
+
+def read_mo(fileobj):
+    """Read a binary MO file from the given file-like object and return a
+    corresponding `Catalog` object.
+    
+    :param fileobj: the file-like object to read the MO file from
+    :return: a catalog object representing the parsed MO file
+    :rtype: `Catalog`
+    
+    :note: The implementation of this function is heavily based on the
+           ``GNUTranslations._parse`` method of the ``gettext`` module in the
+           standard library.
+    """
+    catalog = Catalog()
+    headers = {}
+
+    unpack = struct.unpack
+    filename = getattr(fileobj, 'name', '')
+    charset = None
+
+    buf = fileobj.read()
+    buflen = len(buf)
+
+    # Parse the .mo file header, which consists of 5 little endian 32
+    # bit words.
+    magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
+    if magic == LE_MAGIC:
+        version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
+        ii = '<II'
+    elif magic == BE_MAGIC:
+        version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
+        ii = '>II'
+    else:
+        raise IOError(0, 'Bad magic number', filename)
+
+    # Now put all messages from the .mo file buffer into the catalog
+    # dictionary
+    for i in xrange(0, msgcount):
+        mlen, moff = unpack(ii, buf[masteridx:masteridx + 8])
+        mend = moff + mlen
+        tlen, toff = unpack(ii, buf[transidx:transidx + 8])
+        tend = toff + tlen
+        if mend < buflen and tend < buflen:
+            msg = buf[moff:mend]
+            tmsg = buf[toff:tend]
+        else:
+            raise IOError(0, 'File is corrupt', filename)
+
+        # See if we're looking at GNU .mo conventions for metadata
+        if mlen == 0:
+            # Catalog description
+            lastkey = key = None
+            for item in tmsg.splitlines():
+                item = item.strip()
+                if not item:
+                    continue
+                if ':' in item:
+                    key, value = item.split(':', 1)
+                    lastkey = key = key.strip().lower()
+                    value = value.strip()
+                    headers[key] = value
+                    if key == 'content-type':
+                        charset = value.split('charset=')[1]
+                elif lastkey:
+                    self._info[lastkey] += '\n' + item
+
+        # Note: we unconditionally convert both msgids and msgstrs to
+        # Unicode using the character encoding specified in the charset
+        # parameter of the Content-Type header.  The gettext documentation
+        # strongly encourages msgids to be us-ascii, but some appliations
+        # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
+        # traditional gettext applications, the msgid conversion will
+        # cause no problems since us-ascii should always be a subset of
+        # the charset encoding.  We may want to fall back to 8-bit msgids
+        # if the Unicode conversion fails.
+        if '\x00' in msg:
+            # Plural forms
+            msg = msg.split('\x00')
+            tmsg = tmsg.split('\x00')
+            if charset:
+                msg = [unicode(x, charset) for x in msg]
+                tmsg = [unicode(x, charset) for x in tmsg]
+        else:
+            if charset:
+                msg = unicode(msg, charset)
+                tmsg = unicode(tmsg, charset)
+        catalog[msg] = Message(msg, tmsg)
+
+        # advance to next entry in the seek tables
+        masteridx += 8
+        transidx += 8
+
+    catalog.mime_headers = headers.items()
+    return catalog
+
 def write_mo(fileobj, catalog, use_fuzzy=False):
     """Write a catalog to the specified file-like object using the GNU MO file
     format.
@@ -112,7 +212,7 @@
     offsets = koffsets + voffsets
 
     fileobj.write(struct.pack('Iiiiiii',
-        0x950412deL,                # magic
+        LE_MAGIC,                   # magic
         0,                          # version
         len(messages),              # number of entries
         7 * 4,                      # start of key index
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -122,8 +122,8 @@
                    means it's a template)
     :param domain: the message domain
     :param ignore_obsolete: whether to ignore obsolete messages in the input
-    :return: an iterator over ``(message, translation, location)`` tuples
-    :rtype: ``iterator``
+    :return: a catalog object representing the parsed PO file
+    :rtype: `Catalog`
     """
     catalog = Catalog(locale=locale, domain=domain)
 
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..21b1727014d7301c78245ccf10b82e484b40c36b
GIT binary patch
literal 547
zc${63&u-H|5XQIVPbD8XAdVb{8>kwyc2hNMNtLv5P$V0o#Bha8JaLVj-Dr1R(bquY
z^>`L0q*A1g^wZ4Fe7l<c{=R(rM{qo0UNb%B8Iy~B{CJpY<^@B{?;{}s=jY7Hu@J9z
zGtNii5W73ayJD%GSX+BYUd){RNvnX<3cr!_jWr5N^5LNmGESm7ZLJGf_`0d^x~n~w
zR>6{sSXY%J?}xO}O&f={ie_I*ym6!hKZRo*NI@#)1<D@E1d?-gCQl`IScr4FYy4x~
z1)fPrQ<e1hx?KB!rPIc@I#{P*@-@%l^Qg$+V^)l&`EWEI&$GqiilwIRbWK>2-YBT(
z`<O*}K5U!Vy80@bwVl%~PORH_1!nvI=m+T~?90Jh18)Y9yo-uyksTaKVj1076Ugu?
z_gmgI(68`*+v>(#!s?ctq%i2h8v6&`Zsj#O%*@JGjj0v%mraO1ks-(bWh?I$T<ayZ
a?DB_-?$D*Z^GGZLe}lxNyY-JMBmMz*ew2m)
new file mode 100644
--- /dev/null
+++ b/babel/messages/tests/data/project/i18n/de/LC_MESSAGES/messages.po
@@ -0,0 +1,32 @@
+# German (Germany) translations for TestProject.
+# Copyright (C) 2007 FooBar, Inc.
+# This file is distributed under the same license as the TestProject
+# project.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: TestProject 0.1\n"
+"Report-Msgid-Bugs-To: bugs.address@email.tld\n"
+"POT-Creation-Date: 2007-04-01 15:30+0200\n"
+"PO-Revision-Date: 2007-07-30 22:18+0200\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: de_DE <LL@li.org>\n"
+"Plural-Forms: nplurals=2; plural=(n != 1)\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 0.9dev-r245\n"
+
+#. This will be a translator coment,
+#. that will include several lines
+#: project/file1.py:8
+msgid "bar"
+msgstr "Stange"
+
+#: project/file2.py:9
+msgid "foobar"
+msgid_plural "foobars"
+msgstr[0] "Fuhstange"
+msgstr[1] "Fuhstangen"
+
--- a/babel/messages/tests/mofile.py
+++ b/babel/messages/tests/mofile.py
@@ -13,11 +13,33 @@
 
 import doctest
 import gettext
+import os
 import unittest
 from StringIO import StringIO
 
 from babel.messages import mofile, Catalog
 
+
+class ReadMoTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.datadir = os.path.join(os.path.dirname(__file__), 'data')
+
+    def test_basics(self):
+        mo_file = open(os.path.join(self.datadir, 'project', 'i18n', 'de',
+                                    'LC_MESSAGES', 'messages.mo'))
+        try:
+            catalog = mofile.read_mo(mo_file)
+            self.assertEqual(2, len(catalog))
+            self.assertEqual('TestProject', catalog.project)
+            self.assertEqual('0.1', catalog.version)
+            self.assertEqual('Stange', catalog['bar'].string)
+            self.assertEqual(['Fuhstange', 'Fuhstangen'],
+                             catalog['foobar'].string)
+        finally:
+            mo_file.close()
+
+
 class WriteMoTestCase(unittest.TestCase):
 
     def test_sorting(self):
@@ -57,6 +79,7 @@
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(doctest.DocTestSuite(mofile))
+    suite.addTest(unittest.makeSuite(ReadMoTestCase))
     suite.addTest(unittest.makeSuite(WriteMoTestCase))
     return suite
 
Copyright (C) 2012-2017 Edgewall Software