# HG changeset patch # User cmlenz # Date 1164281295 0 # Node ID 74b6bf92f0cd2bf445291dc0b431a4093b71dc30 # Parent 5b859df8b1841210ee235ae0f49d90b558bbf4b8 Fix parsing of processing instructions in HTML input. diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -349,8 +349,9 @@ self._enqueue(TEXT, text) def handle_pi(self, data): - target, data = data.split(maxsplit=1) - data = data.rstrip('?') + target, data = data.split(None, 1) + if data.endswith('?'): + data = data[:-1] self._enqueue(PI, (target.strip(), data.strip())) def handle_comment(self, text): diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -173,6 +173,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xa0', data) + def test_processing_instruction(self): + text = '' + events = list(HTMLParser(StringIO(text))) + kind, (target, data), pos = events[0] + self.assertEqual(Stream.PI, kind) + self.assertEqual(u'php', target) + self.assertEqual(u'echo "Foobar"', data) + + def test_processing_instruction_trailing_qmark(self): + text = '' + events = list(HTMLParser(StringIO(text))) + kind, (target, data), pos = events[0] + self.assertEqual(Stream.PI, kind) + self.assertEqual(u'php', target) + self.assertEqual(u'echo "Foobar" ?', data) + def suite(): suite = unittest.TestSuite()