changeset 106:2cd83f77cc98 trunk

Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
author cmlenz
date Thu, 14 Jun 2007 09:49:00 +0000
parents c62b68a0b65e
children fadbba1d89c8
files babel/messages/catalog.py babel/messages/pofile.py babel/messages/tests/pofile.py babel/util.py
diffstat 4 files changed, 170 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -14,6 +14,7 @@
 """Data structures for message catalogs."""
 
 from datetime import datetime
+from email import message_from_string
 import re
 try:
     set
@@ -24,7 +25,7 @@
 from babel import __version__ as VERSION
 from babel.core import Locale
 from babel.messages.plurals import PLURALS
-from babel.util import odict, LOCALTZ, UTC
+from babel.util import odict, LOCALTZ, UTC, FixedOffsetTimezone
 
 __all__ = ['Message', 'Catalog']
 __docformat__ = 'restructuredtext en'
@@ -45,7 +46,8 @@
                        ``(singular, plural)`` tuple for pluralizable messages
         :param locations: a sequence of ``(filenname, lineno)`` tuples
         :param flags: a set or sequence of flags
-        :param comments: a sequence of translator comments for the message
+        :param auto_comments: a sequence of automatic comments for the message
+        :param user_comments: a sequence of user comments for the message
         """
         self.id = id
         if not string and self.pluralizable:
@@ -149,7 +151,10 @@
         self.version = version or 'VERSION' #: the project version
         self.copyright_holder = copyright_holder or 'ORGANIZATION'
         self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
-        self.last_translator = last_translator #: last translator name + email
+
+        self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
+        """Name and email address of the last translator."""
+
         self.charset = charset or 'utf-8'
 
         if creation_date is None:
@@ -186,11 +191,11 @@
     # This file is distributed under the same license as the Foobar project.
     # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
     #
-
+    
     :type: `unicode`
     """)
 
-    def mime_headers(self):
+    def _get_mime_headers(self):
         headers = []
         headers.append(('Project-Id-Version',
                         '%s %s' % (self.project, self.version)))
@@ -213,7 +218,28 @@
         headers.append(('Content-Transfer-Encoding', '8bit'))
         headers.append(('Generated-By', 'Babel %s\n' % VERSION))
         return headers
-    mime_headers = property(mime_headers, doc="""\
+
+    def _set_mime_headers(self, headers):
+        for name, value in headers:
+            name = name.lower()
+            if name == 'project-id-version':
+                parts = value.split(' ')
+                self.project = ' '.join(parts[:-1])
+                self.version = parts[-1]
+            elif name == 'report-msgid-bugs-to':
+                self.msgid_bugs_address = value
+            elif name == 'last-translator':
+                self.last_translator = value
+            elif name == 'pot-creation-date':
+                # FIXME: this should use dates.parse_datetime as soon as that
+                #        is ready
+                value, tzoffset, _ = re.split('[+-](\d{4})$', value, 1)
+                tt = time.strptime(value, '%Y-%m-%d %H:%M')
+                ts = time.mktime(tt)
+                tzoffset = FixedOffsetTimezone(int(tzoffset))
+                self.creation_date = datetime.fromtimestamp(ts, tzoffset)
+
+    mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
     The MIME headers of the catalog, used for the special ``msgid ""`` entry.
     
     The behavior of this property changes slightly depending on whether a locale
@@ -380,6 +406,10 @@
             current.user_comments.extend(message.user_comments)
             current.flags |= message.flags
             message = current
+        elif id == '':
+            # special treatment for the header message
+            headers = message_from_string(message.string.encode(self.charset))
+            self.mime_headers = headers.items()
         else:
             if isinstance(id, (list, tuple)):
                 assert isinstance(message.string, (list, tuple))
@@ -403,7 +433,8 @@
                        ``(singular, plural)`` tuple for pluralizable messages
         :param locations: a sequence of ``(filenname, lineno)`` tuples
         :param flags: a set or sequence of flags
-        :param comments: a list of translator comments
+        :param auto_comments: a sequence of automatic comments
+        :param user_comments: a sequence of user comments
         """
         self[id] = Message(id, string, list(locations), flags, auto_comments,
                            user_comments)
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -37,12 +37,7 @@
     file-like object and return a `Catalog`.
     
     >>> from StringIO import StringIO
-    >>> buf = StringIO('''# Translations template for PROJECT.
-    ... # Copyright (C) YEAR COPYRIGHT HOLDER
-    ... # This file is distributed under the same license as the PROJECT project.
-    ... # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-    ... #
-    ... 
+    >>> buf = StringIO('''
     ... #: main.py:1
     ... #, fuzzy, python-format
     ... msgid "foo %(name)s"
@@ -59,12 +54,6 @@
     >>> catalog = read_po(buf)
     >>> catalog.revision_date = datetime(2007, 04, 01)
     
-    >>> print catalog.header_comment
-    # Translations template for PROJECT.
-    # Copyright (C) 2007 ORGANIZATION
-    # This file is distributed under the same license as the PROJECT project.
-    # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
-    
     >>> for message in catalog:
     ...     if message.id:
     ...         print (message.id, message.string)
@@ -90,19 +79,17 @@
     user_comments = []
     auto_comments = []
     in_msgid = in_msgstr = False
-    in_header = True
-    header_lines = []
 
     def _add_message():
         translations.sort()
         if len(messages) > 1:
-            msgid = tuple(messages)
+            msgid = tuple([denormalize(m) for m in messages])
         else:
-            msgid = messages[0]
+            msgid = denormalize(messages[0])
         if len(translations) > 1:
-            string = tuple([t[1] for t in translations])
+            string = tuple([denormalize(t[1]) for t in translations])
         else:
-            string = translations[0][1]
+            string = denormalize(translations[0][1])
         catalog.add(msgid, string, list(locations), set(flags),
                     list(user_comments), list(auto_comments))
         del messages[:]; del translations[:]; del locations[:];
@@ -111,59 +98,53 @@
     for line in fileobj.readlines():
         line = line.strip()
         if line.startswith('#'):
-            if in_header and line[1:].startswith(' '):
-                header_lines.append(line)
-            else:
-                in_header = in_msgid = in_msgstr = False
-                if messages:
-                    _add_message()
-                if line[1:].startswith(':'):
-                    for location in line[2:].lstrip().split():
-                        filename, lineno = location.split(':', 1)
-                        locations.append((filename, int(lineno)))
-                elif line[1:].startswith(','):
-                    for flag in line[2:].lstrip().split(','):
-                        flags.append(flag.strip())
-                elif line[1:].startswith('.'):
-                    # These are called auto-comments
-                    comment = line[2:].strip()
-                    if comment:
-                        # Just check that we're not adding empty comments
-                        auto_comments.append(comment)
-                elif line[1:].startswith(' '):
-                    # These are called user comments
-                    comment = line[1:].strip()
-                    if comment:
-                        # Just check that we're not adding empty comments
-                        user_comments.append(comment)
+            in_msgid = in_msgstr = False
+            if messages:
+                _add_message()
+            if line[1:].startswith(':'):
+                for location in line[2:].lstrip().split():
+                    filename, lineno = location.split(':', 1)
+                    locations.append((filename, int(lineno)))
+            elif line[1:].startswith(','):
+                for flag in line[2:].lstrip().split(','):
+                    flags.append(flag.strip())
+            elif line[1:].startswith('.'):
+                # These are called auto-comments
+                comment = line[2:].strip()
+                if comment:
+                    # Just check that we're not adding empty comments
+                    auto_comments.append(comment)
+            elif line[1:].startswith(' '):
+                # These are called user comments
+                comment = line[1:].strip()
+                if comment:
+                    # Just check that we're not adding empty comments
+                    user_comments.append(comment)
         else:
-            in_header = False
             if line.startswith('msgid_plural'):
                 in_msgid = True
                 msg = line[12:].lstrip()
-                messages.append(msg[1:-1])
+                messages.append(msg)
             elif line.startswith('msgid'):
                 in_msgid = True
                 if messages:
                     _add_message()
-                msg = line[5:].lstrip()
-                messages.append(msg[1:-1])
+                messages.append(line[5:].lstrip())
             elif line.startswith('msgstr'):
                 in_msgid = False
                 in_msgstr = True
                 msg = line[6:].lstrip()
                 if msg.startswith('['):
                     idx, msg = msg[1:].split(']')
-                    translations.append([int(idx), msg.lstrip()[1:-1]])
+                    translations.append([int(idx), msg.lstrip()])
                 else:
-                    translations.append([0, msg[1:-1]])
+                    translations.append([0, msg])
             elif line.startswith('"'):
                 if in_msgid:
-                    messages[-1] += line.rstrip()[1:-1]
+                    messages[-1] += u'\n' + line.rstrip()
                 elif in_msgstr:
-                    translations[-1][1] += line.rstrip()[1:-1]
+                    translations[-1][1] += u'\n' + line.rstrip()
 
-    catalog.header_comment = '\n'.join(header_lines)
     if messages:
         _add_message()
     return catalog
@@ -193,8 +174,26 @@
                           .replace('\n', '\\n') \
                           .replace('\"', '\\"')
 
+def unescape(string):
+    r"""Reverse escape the given string.
+    
+    >>> print unescape('"Say:\\n  \\"hello, world!\\"\\n"')
+    Say:
+      "hello, world!"
+    <BLANKLINE>
+    
+    :param string: the string to unescape
+    :return: the unescaped string
+    :rtype: `str` or `unicode`
+    """
+    return string[1:-1].replace('\\\\', '\\') \
+                       .replace('\\t', '\t') \
+                       .replace('\\r', '\r') \
+                       .replace('\\n', '\n') \
+                       .replace('\\"', '\"')
+
 def normalize(string, width=76):
-    r"""This converts a string into a format that is appropriate for .po files.
+    r"""Convert a string into a format that is appropriate for .po files.
     
     >>> print normalize('''Say:
     ...   "hello, world!"
@@ -253,6 +252,37 @@
         lines[-1] += '\n'
     return u'""\n' + u'\n'.join([escape(l) for l in lines])
 
+def denormalize(string):
+    r"""Reverse the normalization done by the `normalize` function.
+    
+    >>> print denormalize(r'''""
+    ... "Say:\n"
+    ... "  \"hello, world!\"\n"''')
+    Say:
+      "hello, world!"
+    <BLANKLINE>
+    
+    >>> print denormalize(r'''""
+    ... "Say:\n"
+    ... "  \"Lorem ipsum dolor sit "
+    ... "amet, consectetur adipisicing"
+    ... " elit, \"\n"''')
+    Say:
+      "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+    <BLANKLINE>
+    
+    :param string: the string to denormalize
+    :return: the denormalized string
+    :rtype: `unicode` or `str`
+    """
+    if string.startswith('""'):
+        lines = []
+        for line in string.splitlines()[1:]:
+            lines.append(unescape(line))
+        return ''.join(lines)
+    else:
+        return unescape(string)
+
 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
              sort_output=False, sort_by_file=False):
     r"""Write a ``gettext`` PO (portable object) template file for a given
--- a/babel/messages/tests/pofile.py
+++ b/babel/messages/tests/pofile.py
@@ -20,6 +20,23 @@
 from babel.messages import pofile
 
 
+class ReadPoTestCase(unittest.TestCase):
+
+    def test_read_multiline(self):
+        buf = StringIO(r'''msgid ""
+"Here's some text that\n"
+"includesareallylongwordthatmightbutshouldnt"
+" throw us into an infinite "
+"loop\n"
+msgstr ""''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(1, len(catalog))
+        message = list(catalog)[1]
+        self.assertEqual("Here's some text that\nincludesareallylongwordthat"
+                         "mightbutshouldnt throw us into an infinite loop\n",
+                         message.id)
+
+
 class WritePoTestCase(unittest.TestCase):
 
     def test_join_locations(self):
@@ -110,6 +127,7 @@
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(doctest.DocTestSuite(pofile))
+    suite.addTest(unittest.makeSuite(ReadPoTestCase))
     suite.addTest(unittest.makeSuite(WritePoTestCase))
     return suite
 
--- a/babel/util.py
+++ b/babel/util.py
@@ -142,30 +142,38 @@
         rel_list = [os.path.pardir] * (len(start_list) - i) + path_list[i:]
         return os.path.join(*rel_list)
 
+ZERO = timedelta(0)
+
+
+class FixedOffsetTimezone(tzinfo):
+    """Fixed offset in minutes east from UTC."""
+
+    def __init__(self, offset, name=None):
+        self._offset = timedelta(minutes=offset)
+        if name is None:
+            name = 'Etc/GMT+%d' % offset
+        self.zone = name
+
+    def __str__(self):
+        return self.zone
+
+    def __repr__(self):
+        return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
+
+    def utcoffset(self, dt):
+        return self._offset
+
+    def tzname(self, dt):
+        return self.zone
+
+    def dst(self, dt):
+        return ZERO
+
+
 try:
     from pytz import UTC
 except ImportError:
-    ZERO = timedelta(0)
-
-    class UTC(tzinfo):
-        """Simple `tzinfo` implementation for UTC."""
-
-        def __repr__(self):
-            return '<UTC>'
-
-        def __str__(self):
-            return 'UTC'
-
-        def utcoffset(self, dt):
-            return ZERO
-
-        def tzname(self, dt):
-            return 'UTC'
-
-        def dst(self, dt):
-            return ZERO
-
-    UTC = UTC()
+    UTC = FixedOffsetTimezone(0, 'UTC')
     """`tzinfo` object for UTC (Universal Time).
     
     :type: `tzinfo`
@@ -179,6 +187,7 @@
 
 DSTDIFF = DSTOFFSET - STDOFFSET
 
+
 class LocalTimezone(tzinfo):
 
     def utcoffset(self, dt):
@@ -204,6 +213,7 @@
         tt = time.localtime(stamp)
         return tt.tm_isdst > 0
 
+
 LOCALTZ = LocalTimezone()
 """`tzinfo` object for local time-zone.
 
Copyright (C) 2012-2017 Edgewall Software