diff genshi/tests/input.py @ 965:2bfd8f8d241c trunk

Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
author hodgestar
date Sat, 29 Dec 2012 13:02:20 +0000
parents 18209925c54e
children 0f4b2e892a48
line wrap: on
line diff
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -253,6 +253,13 @@
         self.assertEqual((Stream.TEXT, "'"), events[1][:2])
         self.assertEqual((Stream.END, 'span'), events[2][:2])
 
+    def test_multibyte_character_on_chunk_boundary(self):
+        text = u'a' * ((4 * 1024) - 1) + u'\xe6'
+        events = list(HTMLParser(BytesIO(text.encode('utf-8')),
+                                 encoding='utf-8'))
+        self.assertEqual(1, len(events))
+        self.assertEqual((Stream.TEXT, text), events[0][:2])
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software