Mercurial > genshi > mirror
changeset 212:0141f45c18e1 trunk
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
author | cmlenz |
---|---|
date | Wed, 30 Aug 2006 12:40:44 +0000 |
parents | e5151983df0d |
children | 13d2d4420628 |
files | markup/output.py markup/tests/core.py markup/tests/output.py |
diffstat | 3 files changed, 75 insertions(+), 63 deletions(-) [+] |
line wrap: on
line diff
--- a/markup/output.py +++ b/markup/output.py @@ -22,7 +22,7 @@ from sets import ImmutableSet as frozenset import re -from markup.core import escape, Markup, Namespace, QName +from markup.core import escape, Markup, Namespace, QName, StreamEventKind from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ END_CDATA, PI, COMMENT, XML_NAMESPACE @@ -69,7 +69,7 @@ self.preamble = [] if doctype: self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) - self.filters = [] + self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) @@ -82,11 +82,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data tagname = tag.localname @@ -109,12 +107,10 @@ buf += [' ', attrname, '="', escape(value), '"'] ns_attrib = [] - kind, data, pos = stream.next() - if kind is END: + if kind is EMPTY: buf += ['/>'] else: buf += ['>'] - pushback((kind, data, pos)) yield Markup(''.join(buf)) @@ -201,11 +197,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data tagname = tag.localname @@ -232,13 +226,12 @@ buf += [' ', attrname, '="', escape(value), '"'] ns_attrib = [] - if (tagns and tagns != namespace.uri) or tagname in empty_elems: - kind, data, pos = stream.next() - if kind is END: + if kind is EMPTY: + if (tagns and tagns != namespace.uri) \ + or tag.localname in empty_elems: buf += [' />'] else: - buf += ['>'] - pushback((kind, data, pos)) + buf += ['></%s>' % tagname] else: buf += ['>'] @@ -333,11 +326,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data if not tag.namespace or tag in namespace: tagname = tag.localname @@ -352,12 +343,12 @@ else: buf += [' ', attrname, '="', escape(value), '"'] - if tagname in empty_elems: - kind, data, pos = stream.next() - if kind is not END: - pushback((kind, data, pos)) + buf += ['>'] - buf += ['>'] + if kind is EMPTY: + if tagname not in empty_elems: + buf.append('</%s>' % tagname) + yield Markup(''.join(buf)) if tagname in noescape_elems: @@ -430,6 +421,31 @@ yield unicode(data) +class EmptyTagFilter(object): + """Combines `START` and `STOP` events into `EMPTY` events for elements that + have no contents. + """ + + EMPTY = StreamEventKind('EMPTY') + + def __call__(self, stream): + prev = (None, None, None) + for kind, data, pos in stream: + if prev[0] is START: + if kind is END: + prev = EMPTY, prev[1], prev[2] + yield prev + continue + else: + yield prev + if kind is not START: + yield kind, data, pos + prev = kind, data, pos + + +EMPTY = EmptyTagFilter.EMPTY + + class WhitespaceFilter(object): """A filter that removes extraneous ignorable white space from the stream.""" @@ -507,26 +523,3 @@ if kind: yield kind, data, pos - - -class _PushbackIterator(object): - """A simple wrapper for iterators that allows pushing items back on the - queue via the `pushback()` method. - - That can effectively be used to peek at the next item.""" - __slots__ = ['iterable', 'buf'] - - def __init__(self, iterable): - self.iterable = iter(iterable) - self.buf = [] - - def __iter__(self): - return self - - def next(self): - if self.buf: - return self.buf.pop(0) - return self.iterable.next() - - def pushback(self, item): - self.buf.append(item)
--- a/markup/tests/core.py +++ b/markup/tests/core.py @@ -41,78 +41,78 @@ def test_escape(self): markup = escape('<b>"&"</b>') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>"&"</b>', markup) def test_escape_noquotes(self): markup = escape('<b>"&"</b>', quotes=False) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>"&"</b>', markup) def test_unescape_markup(self): string = '<b>"&"</b>' markup = Markup.escape(string) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals(string, unescape(markup)) def test_add_str(self): markup = Markup('<b>foo</b>') + '<br/>' - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>foo</b><br/>', markup) def test_add_markup(self): markup = Markup('<b>foo</b>') + Markup('<br/>') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>foo</b><br/>', markup) def test_add_reverse(self): markup = '<br/>' + Markup('<b>bar</b>') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<br/><b>bar</b>', markup) def test_mod(self): markup = Markup('<b>%s</b>') % '&' - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>&</b>', markup) def test_mod_multi(self): markup = Markup('<b>%s</b> %s') % ('&', 'boo') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>&</b> boo', markup) def test_mul(self): markup = Markup('<b>foo</b>') * 2 - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>foo</b><b>foo</b>', markup) def test_mul_reverse(self): markup = 2 * Markup('<b>foo</b>') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>foo</b><b>foo</b>', markup) def test_join(self): markup = Markup('<br />').join(['foo', '<bar />', Markup('<baz />')]) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo<br /><bar /><br /><baz />', markup) def test_stripentities_all(self): markup = Markup('& j').stripentities() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('& j', markup) def test_stripentities_keepxml(self): markup = Markup('& j').stripentities(keepxmlentities=True) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('& j', markup) def test_striptags_empty(self): markup = Markup('<br />').striptags() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('', markup) def test_striptags_mid(self): markup = Markup('<a href="#">fo<br />o</a>').striptags() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo', markup)
--- a/markup/tests/output.py +++ b/markup/tests/output.py @@ -18,7 +18,7 @@ from markup.core import Stream from markup.input import HTML, XML from markup.output import DocType, XMLSerializer, XHTMLSerializer, \ - HTMLSerializer + HTMLSerializer, EmptyTagFilter class XMLSerializerTestCase(unittest.TestCase): @@ -163,11 +163,30 @@ output) +class EmptyTagFilterTestCase(unittest.TestCase): + + def test_empty(self): + stream = XML('<elem></elem>') | EmptyTagFilter() + self.assertEqual([EmptyTagFilter.EMPTY], [ev[0] for ev in stream]) + + def test_text_content(self): + stream = XML('<elem>foo</elem>') | EmptyTagFilter() + self.assertEqual([Stream.START, Stream.TEXT, Stream.END], + [ev[0] for ev in stream]) + + def test_elem_content(self): + stream = XML('<elem><sub /><sub /></elem>') | EmptyTagFilter() + self.assertEqual([Stream.START, EmptyTagFilter.EMPTY, + EmptyTagFilter.EMPTY, Stream.END], + [ev[0] for ev in stream]) + + def suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(XMLSerializerTestCase, 'test')) suite.addTest(unittest.makeSuite(XHTMLSerializerTestCase, 'test')) suite.addTest(unittest.makeSuite(HTMLSerializerTestCase, 'test')) + suite.addTest(unittest.makeSuite(EmptyTagFilterTestCase, 'test')) suite.addTest(doctest.DocTestSuite(XMLSerializer.__module__)) return suite