# HG changeset patch # User cmlenz # Date 1156853676 0 # Node ID 0619a27f5e673ec84d55520fc4745cb0c51ce56e # Parent d122ff386411c781936a83874da3a2a1925fcd2d The `XMLParser` now correctly handles unicode input. Closes #43. diff --git a/markup/input.py b/markup/input.py --- a/markup/input.py +++ b/markup/input.py @@ -103,6 +103,8 @@ del self.expat # get rid of circular references done = True else: + if isinstance(data, unicode): + data = data.encode('utf-8') self.expat.Parse(data, False) for event in self._queue: yield event diff --git a/markup/tests/input.py b/markup/tests/input.py --- a/markup/tests/input.py +++ b/markup/tests/input.py @@ -52,6 +52,13 @@ self.assertEqual((u'id', u'foo'), attrib[1]) self.assertEqual((u'class', u'bar'), attrib[2]) + def test_unicode_input(self): + text = u'
\u2013
' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + class HTMLParserTestCase(unittest.TestCase): @@ -74,6 +81,13 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) + def test_unicode_input(self): + text = u'
\u2013
' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + def suite(): suite = unittest.TestSuite()