Mercurial > genshi > mirror
changeset 312:cb7326367f91 trunk
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
author | cmlenz |
---|---|
date | Sun, 22 Oct 2006 16:44:18 +0000 |
parents | 8de1ff534d22 |
children | d72d842e1083 |
files | genshi/input.py genshi/tests/input.py |
diffstat | 2 files changed, 12 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/genshi/input.py +++ b/genshi/input.py @@ -309,8 +309,10 @@ fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: - value = name - fixed_attrib.append((name, unicode(stripentities(value)))) + value = unicode(name) + elif not isinstance(value, unicode): + value = value.decode(self.encoding, 'replace') + fixed_attrib.append((name, stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS:
--- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -120,13 +120,20 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) - def test_input_encoding(self): + def test_input_encoding_text(self): text = u'<div>\xf6</div>'.encode('iso-8859-1') events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xf6', data) + def test_input_encoding_attribute(self): + text = u'<div title="\xf6"></div>'.encode('iso-8859-1') + events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) + kind, (tag, attrib), pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xf6', attrib.get('title')) + def test_unicode_input(self): text = u'<div>\u2013</div>' events = list(HTMLParser(StringIO(text)))