changeset 207:28bfc6aafab7 trunk

The `XMLParser` now correctly handles unicode input. Closes #43.
author cmlenz
date Tue, 29 Aug 2006 12:14:36 +0000
parents 75c9c019de88
children bc146e63c159
files markup/input.py markup/tests/input.py
diffstat 2 files changed, 16 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/markup/input.py
+++ b/markup/input.py
@@ -103,6 +103,8 @@
                                 del self.expat # get rid of circular references
                             done = True
                         else:
+                            if isinstance(data, unicode):
+                                data = data.encode('utf-8')
                             self.expat.Parse(data, False)
                     for event in self._queue:
                         yield event
--- a/markup/tests/input.py
+++ b/markup/tests/input.py
@@ -52,6 +52,13 @@
         self.assertEqual((u'id', u'foo'), attrib[1])
         self.assertEqual((u'class', u'bar'), attrib[2])
 
+    def test_unicode_input(self):
+        text = u'<div>\u2013</div>'
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\u2013', data)
+
 
 class HTMLParserTestCase(unittest.TestCase):
 
@@ -74,6 +81,13 @@
         if sys.version_info[:2] >= (2, 4):
             self.assertEqual((None, 1, 6), pos)
 
+    def test_unicode_input(self):
+        text = u'<div>\u2013</div>'
+        events = list(HTMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\u2013', data)
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software