changeset 312:cb7326367f91 trunk

Follow-up to [385]: also decode attribute values in the `HTMLParser`.
author cmlenz
date Sun, 22 Oct 2006 16:44:18 +0000
parents 8de1ff534d22
children d72d842e1083
files genshi/input.py genshi/tests/input.py
diffstat 2 files changed, 12 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -309,8 +309,10 @@
         fixed_attrib = []
         for name, value in attrib: # Fixup minimized attributes
             if value is None:
-                value = name
-            fixed_attrib.append((name, unicode(stripentities(value))))
+                value = unicode(name)
+            elif not isinstance(value, unicode):
+                value = value.decode(self.encoding, 'replace')
+            fixed_attrib.append((name, stripentities(value)))
 
         self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
         if tag in self._EMPTY_ELEMS:
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -120,13 +120,20 @@
         if sys.version_info[:2] >= (2, 4):
             self.assertEqual((None, 1, 6), pos)
 
-    def test_input_encoding(self):
+    def test_input_encoding_text(self):
         text = u'<div>\xf6</div>'.encode('iso-8859-1')
         events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
         kind, data, pos = events[1]
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xf6', data)
 
+    def test_input_encoding_attribute(self):
+        text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
+        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, (tag, attrib), pos = events[0]
+        self.assertEqual(Stream.START, kind)
+        self.assertEqual(u'\xf6', attrib.get('title'))
+
     def test_unicode_input(self):
         text = u'<div>\u2013</div>'
         events = list(HTMLParser(StringIO(text)))
Copyright (C) 2012-2017 Edgewall Software