# HG changeset patch # User cmlenz # Date 1156941644 0 # Node ID e8c43127d9a994f66681395e06dc4c0eb390e951 # Parent 0a14c2a06be3185317221dfaf7caf7a64a1ca414 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator. diff --git a/markup/output.py b/markup/output.py --- a/markup/output.py +++ b/markup/output.py @@ -22,7 +22,7 @@ from sets import ImmutableSet as frozenset import re -from markup.core import escape, Markup, Namespace, QName +from markup.core import escape, Markup, Namespace, QName, StreamEventKind from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ END_CDATA, PI, COMMENT, XML_NAMESPACE @@ -69,7 +69,7 @@ self.preamble = [] if doctype: self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) - self.filters = [] + self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) @@ -82,11 +82,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data tagname = tag.localname @@ -109,12 +107,10 @@ buf += [' ', attrname, '="', escape(value), '"'] ns_attrib = [] - kind, data, pos = stream.next() - if kind is END: + if kind is EMPTY: buf += ['/>'] else: buf += ['>'] - pushback((kind, data, pos)) yield Markup(''.join(buf)) @@ -201,11 +197,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data tagname = tag.localname @@ -232,13 +226,12 @@ buf += [' ', attrname, '="', escape(value), '"'] ns_attrib = [] - if (tagns and tagns != namespace.uri) or tagname in empty_elems: - kind, data, pos = stream.next() - if kind is END: + if kind is EMPTY: + if (tagns and tagns != namespace.uri) \ + or tag.localname in empty_elems: buf += [' />'] else: - buf += ['>'] - pushback((kind, data, pos)) + buf += ['>' % tagname] else: buf += ['>'] @@ -333,11 +326,9 @@ stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) - stream = _PushbackIterator(stream) - pushback = stream.pushback for kind, data, pos in stream: - if kind is START: + if kind is START or kind is EMPTY: tag, attrib = data if not tag.namespace or tag in namespace: tagname = tag.localname @@ -352,12 +343,12 @@ else: buf += [' ', attrname, '="', escape(value), '"'] - if tagname in empty_elems: - kind, data, pos = stream.next() - if kind is not END: - pushback((kind, data, pos)) + buf += ['>'] - buf += ['>'] + if kind is EMPTY: + if tagname not in empty_elems: + buf.append('' % tagname) + yield Markup(''.join(buf)) if tagname in noescape_elems: @@ -430,6 +421,31 @@ yield unicode(data) +class EmptyTagFilter(object): + """Combines `START` and `STOP` events into `EMPTY` events for elements that + have no contents. + """ + + EMPTY = StreamEventKind('EMPTY') + + def __call__(self, stream): + prev = (None, None, None) + for kind, data, pos in stream: + if prev[0] is START: + if kind is END: + prev = EMPTY, prev[1], prev[2] + yield prev + continue + else: + yield prev + if kind is not START: + yield kind, data, pos + prev = kind, data, pos + + +EMPTY = EmptyTagFilter.EMPTY + + class WhitespaceFilter(object): """A filter that removes extraneous ignorable white space from the stream.""" @@ -507,26 +523,3 @@ if kind: yield kind, data, pos - - -class _PushbackIterator(object): - """A simple wrapper for iterators that allows pushing items back on the - queue via the `pushback()` method. - - That can effectively be used to peek at the next item.""" - __slots__ = ['iterable', 'buf'] - - def __init__(self, iterable): - self.iterable = iter(iterable) - self.buf = [] - - def __iter__(self): - return self - - def next(self): - if self.buf: - return self.buf.pop(0) - return self.iterable.next() - - def pushback(self, item): - self.buf.append(item) diff --git a/markup/tests/core.py b/markup/tests/core.py --- a/markup/tests/core.py +++ b/markup/tests/core.py @@ -41,78 +41,78 @@ def test_escape(self): markup = escape('"&"') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>"&"</b>', markup) def test_escape_noquotes(self): markup = escape('"&"', quotes=False) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<b>"&"</b>', markup) def test_unescape_markup(self): string = '"&"' markup = Markup.escape(string) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals(string, unescape(markup)) def test_add_str(self): markup = Markup('foo') + '
' - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo<br/>', markup) def test_add_markup(self): markup = Markup('foo') + Markup('
') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo
', markup) def test_add_reverse(self): markup = '
' + Markup('bar') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('<br/>bar', markup) def test_mod(self): markup = Markup('%s') % '&' - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('&', markup) def test_mod_multi(self): markup = Markup('%s %s') % ('&', 'boo') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('& boo', markup) def test_mul(self): markup = Markup('foo') * 2 - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foofoo', markup) def test_mul_reverse(self): markup = 2 * Markup('foo') - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foofoo', markup) def test_join(self): markup = Markup('
').join(['foo', '', Markup('')]) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo
<bar />
', markup) def test_stripentities_all(self): markup = Markup('& j').stripentities() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('& j', markup) def test_stripentities_keepxml(self): markup = Markup('& j').stripentities(keepxmlentities=True) - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('& j', markup) def test_striptags_empty(self): markup = Markup('
').striptags() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('', markup) def test_striptags_mid(self): markup = Markup('fo
o
').striptags() - assert isinstance(markup, Markup) + assert type(markup) is Markup self.assertEquals('foo', markup) diff --git a/markup/tests/output.py b/markup/tests/output.py --- a/markup/tests/output.py +++ b/markup/tests/output.py @@ -18,7 +18,7 @@ from markup.core import Stream from markup.input import HTML, XML from markup.output import DocType, XMLSerializer, XHTMLSerializer, \ - HTMLSerializer + HTMLSerializer, EmptyTagFilter class XMLSerializerTestCase(unittest.TestCase): @@ -163,11 +163,30 @@ output) +class EmptyTagFilterTestCase(unittest.TestCase): + + def test_empty(self): + stream = XML('') | EmptyTagFilter() + self.assertEqual([EmptyTagFilter.EMPTY], [ev[0] for ev in stream]) + + def test_text_content(self): + stream = XML('foo') | EmptyTagFilter() + self.assertEqual([Stream.START, Stream.TEXT, Stream.END], + [ev[0] for ev in stream]) + + def test_elem_content(self): + stream = XML('') | EmptyTagFilter() + self.assertEqual([Stream.START, EmptyTagFilter.EMPTY, + EmptyTagFilter.EMPTY, Stream.END], + [ev[0] for ev in stream]) + + def suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(XMLSerializerTestCase, 'test')) suite.addTest(unittest.makeSuite(XHTMLSerializerTestCase, 'test')) suite.addTest(unittest.makeSuite(HTMLSerializerTestCase, 'test')) + suite.addTest(unittest.makeSuite(EmptyTagFilterTestCase, 'test')) suite.addTest(doctest.DocTestSuite(XMLSerializer.__module__)) return suite