# HG changeset patch
# User cmlenz
# Date 1156941644 0
# Node ID e8c43127d9a994f66681395e06dc4c0eb390e951
# Parent 0a14c2a06be3185317221dfaf7caf7a64a1ca414
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
diff --git a/markup/output.py b/markup/output.py
--- a/markup/output.py
+++ b/markup/output.py
@@ -22,7 +22,7 @@
from sets import ImmutableSet as frozenset
import re
-from markup.core import escape, Markup, Namespace, QName
+from markup.core import escape, Markup, Namespace, QName, StreamEventKind
from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
END_CDATA, PI, COMMENT, XML_NAMESPACE
@@ -69,7 +69,7 @@
self.preamble = []
if doctype:
self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
- self.filters = []
+ self.filters = [EmptyTagFilter()]
if strip_whitespace:
self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
@@ -82,11 +82,9 @@
stream = chain(self.preamble, stream)
for filter_ in self.filters:
stream = filter_(stream)
- stream = _PushbackIterator(stream)
- pushback = stream.pushback
for kind, data, pos in stream:
- if kind is START:
+ if kind is START or kind is EMPTY:
tag, attrib = data
tagname = tag.localname
@@ -109,12 +107,10 @@
buf += [' ', attrname, '="', escape(value), '"']
ns_attrib = []
- kind, data, pos = stream.next()
- if kind is END:
+ if kind is EMPTY:
buf += ['/>']
else:
buf += ['>']
- pushback((kind, data, pos))
yield Markup(''.join(buf))
@@ -201,11 +197,9 @@
stream = chain(self.preamble, stream)
for filter_ in self.filters:
stream = filter_(stream)
- stream = _PushbackIterator(stream)
- pushback = stream.pushback
for kind, data, pos in stream:
- if kind is START:
+ if kind is START or kind is EMPTY:
tag, attrib = data
tagname = tag.localname
@@ -232,13 +226,12 @@
buf += [' ', attrname, '="', escape(value), '"']
ns_attrib = []
- if (tagns and tagns != namespace.uri) or tagname in empty_elems:
- kind, data, pos = stream.next()
- if kind is END:
+ if kind is EMPTY:
+ if (tagns and tagns != namespace.uri) \
+ or tag.localname in empty_elems:
buf += [' />']
else:
- buf += ['>']
- pushback((kind, data, pos))
+ buf += ['>%s>' % tagname]
else:
buf += ['>']
@@ -333,11 +326,9 @@
stream = chain(self.preamble, stream)
for filter_ in self.filters:
stream = filter_(stream)
- stream = _PushbackIterator(stream)
- pushback = stream.pushback
for kind, data, pos in stream:
- if kind is START:
+ if kind is START or kind is EMPTY:
tag, attrib = data
if not tag.namespace or tag in namespace:
tagname = tag.localname
@@ -352,12 +343,12 @@
else:
buf += [' ', attrname, '="', escape(value), '"']
- if tagname in empty_elems:
- kind, data, pos = stream.next()
- if kind is not END:
- pushback((kind, data, pos))
+ buf += ['>']
- buf += ['>']
+ if kind is EMPTY:
+ if tagname not in empty_elems:
+ buf.append('%s>' % tagname)
+
yield Markup(''.join(buf))
if tagname in noescape_elems:
@@ -430,6 +421,31 @@
yield unicode(data)
+class EmptyTagFilter(object):
+ """Combines `START` and `STOP` events into `EMPTY` events for elements that
+ have no contents.
+ """
+
+ EMPTY = StreamEventKind('EMPTY')
+
+ def __call__(self, stream):
+ prev = (None, None, None)
+ for kind, data, pos in stream:
+ if prev[0] is START:
+ if kind is END:
+ prev = EMPTY, prev[1], prev[2]
+ yield prev
+ continue
+ else:
+ yield prev
+ if kind is not START:
+ yield kind, data, pos
+ prev = kind, data, pos
+
+
+EMPTY = EmptyTagFilter.EMPTY
+
+
class WhitespaceFilter(object):
"""A filter that removes extraneous ignorable white space from the
stream."""
@@ -507,26 +523,3 @@
if kind:
yield kind, data, pos
-
-
-class _PushbackIterator(object):
- """A simple wrapper for iterators that allows pushing items back on the
- queue via the `pushback()` method.
-
- That can effectively be used to peek at the next item."""
- __slots__ = ['iterable', 'buf']
-
- def __init__(self, iterable):
- self.iterable = iter(iterable)
- self.buf = []
-
- def __iter__(self):
- return self
-
- def next(self):
- if self.buf:
- return self.buf.pop(0)
- return self.iterable.next()
-
- def pushback(self, item):
- self.buf.append(item)
diff --git a/markup/tests/core.py b/markup/tests/core.py
--- a/markup/tests/core.py
+++ b/markup/tests/core.py
@@ -41,78 +41,78 @@
def test_escape(self):
markup = escape('"&"')
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('<b>"&"</b>', markup)
def test_escape_noquotes(self):
markup = escape('"&"', quotes=False)
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('<b>"&"</b>', markup)
def test_unescape_markup(self):
string = '"&"'
markup = Markup.escape(string)
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals(string, unescape(markup))
def test_add_str(self):
markup = Markup('foo') + '
'
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foo<br/>', markup)
def test_add_markup(self):
markup = Markup('foo') + Markup('
')
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foo
', markup)
def test_add_reverse(self):
markup = '
' + Markup('bar')
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('<br/>bar', markup)
def test_mod(self):
markup = Markup('%s') % '&'
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('&', markup)
def test_mod_multi(self):
markup = Markup('%s %s') % ('&', 'boo')
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('& boo', markup)
def test_mul(self):
markup = Markup('foo') * 2
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foofoo', markup)
def test_mul_reverse(self):
markup = 2 * Markup('foo')
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foofoo', markup)
def test_join(self):
markup = Markup('
').join(['foo', '', Markup('')])
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foo
<bar />
', markup)
def test_stripentities_all(self):
markup = Markup('& j').stripentities()
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('& j', markup)
def test_stripentities_keepxml(self):
markup = Markup('& j').stripentities(keepxmlentities=True)
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('& j', markup)
def test_striptags_empty(self):
markup = Markup('
').striptags()
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('', markup)
def test_striptags_mid(self):
markup = Markup('fo
o').striptags()
- assert isinstance(markup, Markup)
+ assert type(markup) is Markup
self.assertEquals('foo', markup)
diff --git a/markup/tests/output.py b/markup/tests/output.py
--- a/markup/tests/output.py
+++ b/markup/tests/output.py
@@ -18,7 +18,7 @@
from markup.core import Stream
from markup.input import HTML, XML
from markup.output import DocType, XMLSerializer, XHTMLSerializer, \
- HTMLSerializer
+ HTMLSerializer, EmptyTagFilter
class XMLSerializerTestCase(unittest.TestCase):
@@ -163,11 +163,30 @@
output)
+class EmptyTagFilterTestCase(unittest.TestCase):
+
+ def test_empty(self):
+ stream = XML('') | EmptyTagFilter()
+ self.assertEqual([EmptyTagFilter.EMPTY], [ev[0] for ev in stream])
+
+ def test_text_content(self):
+ stream = XML('foo') | EmptyTagFilter()
+ self.assertEqual([Stream.START, Stream.TEXT, Stream.END],
+ [ev[0] for ev in stream])
+
+ def test_elem_content(self):
+ stream = XML('') | EmptyTagFilter()
+ self.assertEqual([Stream.START, EmptyTagFilter.EMPTY,
+ EmptyTagFilter.EMPTY, Stream.END],
+ [ev[0] for ev in stream])
+
+
def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(XMLSerializerTestCase, 'test'))
suite.addTest(unittest.makeSuite(XHTMLSerializerTestCase, 'test'))
suite.addTest(unittest.makeSuite(HTMLSerializerTestCase, 'test'))
+ suite.addTest(unittest.makeSuite(EmptyTagFilterTestCase, 'test'))
suite.addTest(doctest.DocTestSuite(XMLSerializer.__module__))
return suite