diff babel/messages/mofile.py @ 336:6e86b862af57

Add basic MO file reading in preparation for #54.
author cmlenz
date Tue, 10 Jun 2008 17:05:52 +0000
parents a7dff175b14f
children 662d332c0a2b
line wrap: on
line diff
--- a/babel/messages/mofile.py
+++ b/babel/messages/mofile.py
@@ -21,9 +21,109 @@
 import array
 import struct
 
-__all__ = ['write_mo']
+from babel.messages.catalog import Catalog, Message
+
+__all__ = ['read_mo', 'write_mo']
 __docformat__ = 'restructuredtext en'
 
+
+LE_MAGIC = 0x950412deL
+BE_MAGIC = 0xde120495L
+
+def read_mo(fileobj):
+    """Read a binary MO file from the given file-like object and return a
+    corresponding `Catalog` object.
+    
+    :param fileobj: the file-like object to read the MO file from
+    :return: a catalog object representing the parsed MO file
+    :rtype: `Catalog`
+    
+    :note: The implementation of this function is heavily based on the
+           ``GNUTranslations._parse`` method of the ``gettext`` module in the
+           standard library.
+    """
+    catalog = Catalog()
+    headers = {}
+
+    unpack = struct.unpack
+    filename = getattr(fileobj, 'name', '')
+    charset = None
+
+    buf = fileobj.read()
+    buflen = len(buf)
+
+    # Parse the .mo file header, which consists of 5 little endian 32
+    # bit words.
+    magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
+    if magic == LE_MAGIC:
+        version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
+        ii = '<II'
+    elif magic == BE_MAGIC:
+        version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
+        ii = '>II'
+    else:
+        raise IOError(0, 'Bad magic number', filename)
+
+    # Now put all messages from the .mo file buffer into the catalog
+    # dictionary
+    for i in xrange(0, msgcount):
+        mlen, moff = unpack(ii, buf[masteridx:masteridx + 8])
+        mend = moff + mlen
+        tlen, toff = unpack(ii, buf[transidx:transidx + 8])
+        tend = toff + tlen
+        if mend < buflen and tend < buflen:
+            msg = buf[moff:mend]
+            tmsg = buf[toff:tend]
+        else:
+            raise IOError(0, 'File is corrupt', filename)
+
+        # See if we're looking at GNU .mo conventions for metadata
+        if mlen == 0:
+            # Catalog description
+            lastkey = key = None
+            for item in tmsg.splitlines():
+                item = item.strip()
+                if not item:
+                    continue
+                if ':' in item:
+                    key, value = item.split(':', 1)
+                    lastkey = key = key.strip().lower()
+                    value = value.strip()
+                    headers[key] = value
+                    if key == 'content-type':
+                        charset = value.split('charset=')[1]
+                elif lastkey:
+                    self._info[lastkey] += '\n' + item
+
+        # Note: we unconditionally convert both msgids and msgstrs to
+        # Unicode using the character encoding specified in the charset
+        # parameter of the Content-Type header.  The gettext documentation
+        # strongly encourages msgids to be us-ascii, but some appliations
+        # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
+        # traditional gettext applications, the msgid conversion will
+        # cause no problems since us-ascii should always be a subset of
+        # the charset encoding.  We may want to fall back to 8-bit msgids
+        # if the Unicode conversion fails.
+        if '\x00' in msg:
+            # Plural forms
+            msg = msg.split('\x00')
+            tmsg = tmsg.split('\x00')
+            if charset:
+                msg = [unicode(x, charset) for x in msg]
+                tmsg = [unicode(x, charset) for x in tmsg]
+        else:
+            if charset:
+                msg = unicode(msg, charset)
+                tmsg = unicode(tmsg, charset)
+        catalog[msg] = Message(msg, tmsg)
+
+        # advance to next entry in the seek tables
+        masteridx += 8
+        transidx += 8
+
+    catalog.mime_headers = headers.items()
+    return catalog
+
 def write_mo(fileobj, catalog, use_fuzzy=False):
     """Write a catalog to the specified file-like object using the GNU MO file
     format.
@@ -112,7 +212,7 @@
     offsets = koffsets + voffsets
 
     fileobj.write(struct.pack('Iiiiiii',
-        0x950412deL,                # magic
+        LE_MAGIC,                   # magic
         0,                          # version
         len(messages),              # number of entries
         7 * 4,                      # start of key index
Copyright (C) 2012-2017 Edgewall Software