Mercurial > genshi > mirror
changeset 378:873ca2a7ec05 trunk
Improve handling of incorrectly nested tags in the HTML parser.
author | cmlenz |
---|---|
date | Thu, 23 Nov 2006 17:41:52 +0000 |
parents | 9aa6aa18fa35 |
children | e1d659c87ddf |
files | ChangeLog genshi/input.py genshi/tests/input.py |
diffstat | 3 files changed, 36 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -29,6 +29,7 @@ subclasses of `tuple` instead of `list`). * `MarkupTemplate`s can now be instantiated from markup streams, in addition to strings and file-like objects (ticket #69). + * Improve handling of incorrectly nested tags in the HTML parser. Version 0.3.5
--- a/genshi/input.py +++ b/genshi/input.py @@ -327,10 +327,9 @@ if tag not in self._EMPTY_ELEMS: while self._open_tags: open_tag = self._open_tags.pop() + self._enqueue(END, QName(open_tag)) if open_tag.lower() == tag.lower(): break - self._enqueue(END, QName(open_tag)) - self._enqueue(END, QName(tag)) def handle_data(self, text): if not isinstance(text, unicode):
--- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -16,7 +16,7 @@ import sys import unittest -from genshi.core import Stream +from genshi.core import Attrs, Stream from genshi.input import XMLParser, HTMLParser, ParseError @@ -189,6 +189,39 @@ self.assertEqual(u'php', target) self.assertEqual(u'echo "Foobar" ?', data) + def test_out_of_order_tags1(self): + text = '<span><b>Foobar</span></b>' + events = list(HTMLParser(StringIO(text))) + self.assertEqual(5, len(events)) + self.assertEqual((Stream.START, ('span', ())), events[0][:2]) + self.assertEqual((Stream.START, ('b', ())), events[1][:2]) + self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) + self.assertEqual((Stream.END, 'b'), events[3][:2]) + self.assertEqual((Stream.END, 'span'), events[4][:2]) + + def test_out_of_order_tags2(self): + text = '<span class="baz"><b><i>Foobar</span></b></i>' + events = list(HTMLParser(StringIO(text))) + self.assertEqual(7, len(events)) + self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), + events[0][:2]) + self.assertEqual((Stream.START, ('b', ())), events[1][:2]) + self.assertEqual((Stream.START, ('i', ())), events[2][:2]) + self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2]) + self.assertEqual((Stream.END, 'i'), events[4][:2]) + self.assertEqual((Stream.END, 'b'), events[5][:2]) + self.assertEqual((Stream.END, 'span'), events[6][:2]) + + def test_out_of_order_tags1(self): + text = '<span><b>Foobar</i>' + events = list(HTMLParser(StringIO(text))) + self.assertEqual(5, len(events)) + self.assertEqual((Stream.START, ('span', ())), events[0][:2]) + self.assertEqual((Stream.START, ('b', ())), events[1][:2]) + self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) + self.assertEqual((Stream.END, 'b'), events[3][:2]) + self.assertEqual((Stream.END, 'span'), events[4][:2]) + def suite(): suite = unittest.TestSuite()