# HG changeset patch # User cmlenz # Date 1161535458 0 # Node ID cb7326367f915b193a39ffb17fd1e34abb21cc1c # Parent 8de1ff534d22add7a9616ded78b39849945c8503 Follow-up to [385]: also decode attribute values in the `HTMLParser`. diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -309,8 +309,10 @@ fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: - value = name - fixed_attrib.append((name, unicode(stripentities(value)))) + value = unicode(name) + elif not isinstance(value, unicode): + value = value.decode(self.encoding, 'replace') + fixed_attrib.append((name, stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -120,13 +120,20 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) - def test_input_encoding(self): + def test_input_encoding_text(self): text = u'
\xf6
'.encode('iso-8859-1') events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xf6', data) + def test_input_encoding_attribute(self): + text = u'
'.encode('iso-8859-1') + events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) + kind, (tag, attrib), pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xf6', attrib.get('title')) + def test_unicode_input(self): text = u'
\u2013
' events = list(HTMLParser(StringIO(text)))