Mercurial > genshi > mirror
changeset 207:28bfc6aafab7 trunk
The `XMLParser` now correctly handles unicode input. Closes #43.
author | cmlenz |
---|---|
date | Tue, 29 Aug 2006 12:14:36 +0000 |
parents | 75c9c019de88 |
children | bc146e63c159 |
files | markup/input.py markup/tests/input.py |
diffstat | 2 files changed, 16 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/markup/input.py +++ b/markup/input.py @@ -103,6 +103,8 @@ del self.expat # get rid of circular references done = True else: + if isinstance(data, unicode): + data = data.encode('utf-8') self.expat.Parse(data, False) for event in self._queue: yield event
--- a/markup/tests/input.py +++ b/markup/tests/input.py @@ -52,6 +52,13 @@ self.assertEqual((u'id', u'foo'), attrib[1]) self.assertEqual((u'class', u'bar'), attrib[2]) + def test_unicode_input(self): + text = u'<div>\u2013</div>' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + class HTMLParserTestCase(unittest.TestCase): @@ -74,6 +81,13 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) + def test_unicode_input(self): + text = u'<div>\u2013</div>' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\u2013', data) + def suite(): suite = unittest.TestSuite()