Mercurial > genshi > mirror
diff genshi/tests/input.py @ 965:2bfd8f8d241c trunk
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
author | hodgestar |
---|---|
date | Sat, 29 Dec 2012 13:02:20 +0000 |
parents | 18209925c54e |
children | 0f4b2e892a48 |
line wrap: on
line diff
--- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -253,6 +253,13 @@ self.assertEqual((Stream.TEXT, "'"), events[1][:2]) self.assertEqual((Stream.END, 'span'), events[2][:2]) + def test_multibyte_character_on_chunk_boundary(self): + text = u'a' * ((4 * 1024) - 1) + u'\xe6' + events = list(HTMLParser(BytesIO(text.encode('utf-8')), + encoding='utf-8')) + self.assertEqual(1, len(events)) + self.assertEqual((Stream.TEXT, text), events[0][:2]) + def suite(): suite = unittest.TestSuite()