changeset 545:afdab04b8527

Catalog class should not do decoding of input strings (fixes #256)
author fschwarz
date Sat, 19 Mar 2011 19:34:40 +0000
parents 030ddf3f5b13
children b33c36615fe9
files babel/messages/catalog.py babel/messages/tests/pofile.py
diffstat 2 files changed, 45 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -324,14 +324,7 @@
 
     def _set_mime_headers(self, headers):
         for name, value in headers:
-            if name.lower() == 'content-type':
-                mimetype, params = parse_header(value)
-                if 'charset' in params:
-                    self.charset = params['charset'].lower()
-                break
-        for name, value in headers:
-            name = name.lower().decode(self.charset)
-            value = value.decode(self.charset)
+            name = name.lower()
             if name == 'project-id-version':
                 parts = value.split(' ')
                 self.project = u' '.join(parts[:-1])
@@ -342,6 +335,10 @@
                 self.last_translator = value
             elif name == 'language-team':
                 self.language_team = value
+            elif name == 'content-type':
+                mimetype, params = parse_header(value)
+                if 'charset' in params:
+                    self.charset = params['charset'].lower()
             elif name == 'plural-forms':
                 _, params = parse_header(' ;' + value)
                 self._num_plurals = int(params.get('nplurals', 2))
@@ -590,8 +587,16 @@
             message = current
         elif id == '':
             # special treatment for the header message
-            headers = message_from_string(message.string.encode(self.charset))
-            self.mime_headers = headers.items()
+            def _parse_header(header_string):
+                # message_from_string only works for str, not for unicode
+                headers = message_from_string(header_string.encode('utf8'))
+                decoded_headers = {}
+                for name, value in headers.items():
+                    name = name.decode('utf8')
+                    value = value.decode('utf8')
+                    decoded_headers[name] = value
+                return decoded_headers
+            self.mime_headers = _parse_header(message.string).items()
             self.header_comment = '\n'.join(['# %s' % comment for comment
                                              in message.user_comments])
             self.fuzzy = message.fuzzy
--- a/babel/messages/tests/pofile.py
+++ b/babel/messages/tests/pofile.py
@@ -35,6 +35,27 @@
         catalog = pofile.read_po(buf, domain='mydomain')
         self.assertEqual('mydomain', catalog.domain)
 
+    def test_applies_specified_encoding_during_read(self):
+        buf = StringIO(u'''
+msgid ""
+msgstr ""
+"Project-Id-Version:  3.15\\n"
+"Report-Msgid-Bugs-To: Fliegender Zirkus <fliegender@zirkus.de>\\n"
+"POT-Creation-Date: 2007-09-27 11:19+0700\\n"
+"PO-Revision-Date: 2007-09-27 21:42-0700\\n"
+"Last-Translator: John <cleese@bavaria.de>\\n"
+"Language-Team: German Lang <de@babel.org>\\n"
+"Plural-Forms: nplurals=2; plural=(n != 1)\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=iso-8859-1\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+"Generated-By: Babel 1.0dev-r313\\n"
+
+msgid "foo"
+msgstr "bär"'''.encode('iso-8859-1'))
+        catalog = pofile.read_po(buf, locale='de_DE')
+        self.assertEqual(u'bär', catalog.get('foo').string)
+
     def test_read_multiline(self):
         buf = StringIO(r'''msgid ""
 "Here's some text that\n"
@@ -248,6 +269,15 @@
 msgid "foo"
 msgstr ""''', buf.getvalue().strip())
 
+    def test_write_po_file_with_specified_charset(self):
+        catalog = Catalog(charset='iso-8859-1')
+        catalog.add('foo', u'äöü', locations=[('main.py', 1)])
+        buf = StringIO()
+        pofile.write_po(buf, catalog, omit_header=False)
+        po_file = buf.getvalue().strip()
+        assert r'"Content-Type: text/plain; charset=iso-8859-1\n"' in po_file
+        assert u'msgstr "äöü"'.encode('iso-8859-1') in po_file
+
     def test_duplicate_comments(self):
         catalog = Catalog()
         catalog.add(u'foo', auto_comments=['A comment'])
Copyright (C) 2012-2017 Edgewall Software