# HG changeset patch # User cmlenz # Date 1156853676 0 # Node ID 28bfc6aafab75cb635b40390011285da7bf4da7b # Parent 75c9c019de88040ad9fb497f3894ce03d5df02d6 The `XMLParser` now correctly handles unicode input. Closes #43. diff --git a/markup/input.py b/markup/input.py --- a/markup/input.py +++ b/markup/input.py @@ -103,6 +103,8 @@ del self.expat # get rid of circular references done = True else: + if isinstance(data, unicode): + data = data.encode('utf-8') self.expat.Parse(data, False) for event in self._queue: yield event diff --git a/markup/tests/input.py b/markup/tests/input.py --- a/markup/tests/input.py +++ b/markup/tests/input.py @@ -52,6 +52,13 @@ self.assertEqual((u'id', u'foo'), attrib[1]) self.assertEqual((u'class', u'bar'), attrib[2]) + def test_unicode_input(self): + text = u'
\u2013
' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + class HTMLParserTestCase(unittest.TestCase): @@ -74,6 +81,13 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) + def test_unicode_input(self): + text = u'
\u2013
' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + def suite(): suite = unittest.TestSuite()