# HG changeset patch # User cmlenz # Date 1164281295 0 # Node ID 0e0952d85d97b390595b781fd9deb215e0fbe1e0 # Parent f3a8686b80d7c33934b9eba23e7336ce63dc3cba Fix parsing of processing instructions in HTML input. diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -349,8 +349,9 @@ self._enqueue(TEXT, text) def handle_pi(self, data): - target, data = data.split(maxsplit=1) - data = data.rstrip('?') + target, data = data.split(None, 1) + if data.endswith('?'): + data = data[:-1] self._enqueue(PI, (target.strip(), data.strip())) def handle_comment(self, text): diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -173,6 +173,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xa0', data) + def test_processing_instruction(self): + text = '' + events = list(HTMLParser(StringIO(text))) + kind, (target, data), pos = events[0] + self.assertEqual(Stream.PI, kind) + self.assertEqual(u'php', target) + self.assertEqual(u'echo "Foobar"', data) + + def test_processing_instruction_trailing_qmark(self): + text = '' + events = list(HTMLParser(StringIO(text))) + kind, (target, data), pos = events[0] + self.assertEqual(Stream.PI, kind) + self.assertEqual(u'php', target) + self.assertEqual(u'echo "Foobar" ?', data) + def suite(): suite = unittest.TestSuite()