# HG changeset patch # User fschwarz # Date 1300563280 0 # Node ID afdab04b85274e6cf01c7e17759056e554999d25 # Parent 030ddf3f5b138c43eac7187a31cce035c9d5912a Catalog class should not do decoding of input strings (fixes #256) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -324,14 +324,7 @@ def _set_mime_headers(self, headers): for name, value in headers: - if name.lower() == 'content-type': - mimetype, params = parse_header(value) - if 'charset' in params: - self.charset = params['charset'].lower() - break - for name, value in headers: - name = name.lower().decode(self.charset) - value = value.decode(self.charset) + name = name.lower() if name == 'project-id-version': parts = value.split(' ') self.project = u' '.join(parts[:-1]) @@ -342,6 +335,10 @@ self.last_translator = value elif name == 'language-team': self.language_team = value + elif name == 'content-type': + mimetype, params = parse_header(value) + if 'charset' in params: + self.charset = params['charset'].lower() elif name == 'plural-forms': _, params = parse_header(' ;' + value) self._num_plurals = int(params.get('nplurals', 2)) @@ -590,8 +587,16 @@ message = current elif id == '': # special treatment for the header message - headers = message_from_string(message.string.encode(self.charset)) - self.mime_headers = headers.items() + def _parse_header(header_string): + # message_from_string only works for str, not for unicode + headers = message_from_string(header_string.encode('utf8')) + decoded_headers = {} + for name, value in headers.items(): + name = name.decode('utf8') + value = value.decode('utf8') + decoded_headers[name] = value + return decoded_headers + self.mime_headers = _parse_header(message.string).items() self.header_comment = '\n'.join(['# %s' % comment for comment in message.user_comments]) self.fuzzy = message.fuzzy diff --git a/babel/messages/tests/pofile.py b/babel/messages/tests/pofile.py --- a/babel/messages/tests/pofile.py +++ b/babel/messages/tests/pofile.py @@ -35,6 +35,27 @@ catalog = pofile.read_po(buf, domain='mydomain') self.assertEqual('mydomain', catalog.domain) + def test_applies_specified_encoding_during_read(self): + buf = StringIO(u''' +msgid "" +msgstr "" +"Project-Id-Version: 3.15\\n" +"Report-Msgid-Bugs-To: Fliegender Zirkus \\n" +"POT-Creation-Date: 2007-09-27 11:19+0700\\n" +"PO-Revision-Date: 2007-09-27 21:42-0700\\n" +"Last-Translator: John \\n" +"Language-Team: German Lang \\n" +"Plural-Forms: nplurals=2; plural=(n != 1)\\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=iso-8859-1\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Generated-By: Babel 1.0dev-r313\\n" + +msgid "foo" +msgstr "bär"'''.encode('iso-8859-1')) + catalog = pofile.read_po(buf, locale='de_DE') + self.assertEqual(u'bär', catalog.get('foo').string) + def test_read_multiline(self): buf = StringIO(r'''msgid "" "Here's some text that\n" @@ -248,6 +269,15 @@ msgid "foo" msgstr ""''', buf.getvalue().strip()) + def test_write_po_file_with_specified_charset(self): + catalog = Catalog(charset='iso-8859-1') + catalog.add('foo', u'äöü', locations=[('main.py', 1)]) + buf = StringIO() + pofile.write_po(buf, catalog, omit_header=False) + po_file = buf.getvalue().strip() + assert r'"Content-Type: text/plain; charset=iso-8859-1\n"' in po_file + assert u'msgstr "äöü"'.encode('iso-8859-1') in po_file + def test_duplicate_comments(self): catalog = Catalog() catalog.add(u'foo', auto_comments=['A comment'])