changeset 212:0141f45c18e1 trunk

Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
author cmlenz
date Wed, 30 Aug 2006 12:40:44 +0000
parents e5151983df0d
children 13d2d4420628
files markup/output.py markup/tests/core.py markup/tests/output.py
diffstat 3 files changed, 75 insertions(+), 63 deletions(-) [+]
line wrap: on
line diff
--- a/markup/output.py
+++ b/markup/output.py
@@ -22,7 +22,7 @@
     from sets import ImmutableSet as frozenset
 import re
 
-from markup.core import escape, Markup, Namespace, QName
+from markup.core import escape, Markup, Namespace, QName, StreamEventKind
 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
                         END_CDATA, PI, COMMENT, XML_NAMESPACE
 
@@ -69,7 +69,7 @@
         self.preamble = []
         if doctype:
             self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
-        self.filters = []
+        self.filters = [EmptyTagFilter()]
         if strip_whitespace:
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
 
@@ -82,11 +82,9 @@
         stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
-        stream = _PushbackIterator(stream)
-        pushback = stream.pushback
         for kind, data, pos in stream:
 
-            if kind is START:
+            if kind is START or kind is EMPTY:
                 tag, attrib = data
 
                 tagname = tag.localname
@@ -109,12 +107,10 @@
                     buf += [' ', attrname, '="', escape(value), '"']
                 ns_attrib = []
 
-                kind, data, pos = stream.next()
-                if kind is END:
+                if kind is EMPTY:
                     buf += ['/>']
                 else:
                     buf += ['>']
-                    pushback((kind, data, pos))
 
                 yield Markup(''.join(buf))
 
@@ -201,11 +197,9 @@
         stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
-        stream = _PushbackIterator(stream)
-        pushback = stream.pushback
         for kind, data, pos in stream:
 
-            if kind is START:
+            if kind is START or kind is EMPTY:
                 tag, attrib = data
 
                 tagname = tag.localname
@@ -232,13 +226,12 @@
                         buf += [' ', attrname, '="', escape(value), '"']
                 ns_attrib = []
 
-                if (tagns and tagns != namespace.uri) or tagname in empty_elems:
-                    kind, data, pos = stream.next()
-                    if kind is END:
+                if kind is EMPTY:
+                    if (tagns and tagns != namespace.uri) \
+                            or tag.localname in empty_elems:
                         buf += [' />']
                     else:
-                        buf += ['>']
-                        pushback((kind, data, pos))
+                        buf += ['></%s>' % tagname]
                 else:
                     buf += ['>']
 
@@ -333,11 +326,9 @@
         stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
-        stream = _PushbackIterator(stream)
-        pushback = stream.pushback
         for kind, data, pos in stream:
 
-            if kind is START:
+            if kind is START or kind is EMPTY:
                 tag, attrib = data
                 if not tag.namespace or tag in namespace:
                     tagname = tag.localname
@@ -352,12 +343,12 @@
                             else:
                                 buf += [' ', attrname, '="', escape(value), '"']
 
-                    if tagname in empty_elems:
-                        kind, data, pos = stream.next()
-                        if kind is not END:
-                            pushback((kind, data, pos))
+                    buf += ['>']
 
-                    buf += ['>']
+                    if kind is EMPTY:
+                        if tagname not in empty_elems:
+                            buf.append('</%s>' % tagname)
+
                     yield Markup(''.join(buf))
 
                     if tagname in noescape_elems:
@@ -430,6 +421,31 @@
                 yield unicode(data)
 
 
+class EmptyTagFilter(object):
+    """Combines `START` and `STOP` events into `EMPTY` events for elements that
+    have no contents.
+    """
+
+    EMPTY = StreamEventKind('EMPTY')
+
+    def __call__(self, stream):
+        prev = (None, None, None)
+        for kind, data, pos in stream:
+            if prev[0] is START:
+                if kind is END:
+                    prev = EMPTY, prev[1], prev[2]
+                    yield prev
+                    continue
+                else:
+                    yield prev
+            if kind is not START:
+                yield kind, data, pos
+            prev = kind, data, pos
+
+
+EMPTY = EmptyTagFilter.EMPTY
+
+
 class WhitespaceFilter(object):
     """A filter that removes extraneous ignorable white space from the
     stream."""
@@ -507,26 +523,3 @@
 
                 if kind:
                     yield kind, data, pos
-
-
-class _PushbackIterator(object):
-    """A simple wrapper for iterators that allows pushing items back on the
-    queue via the `pushback()` method.
-    
-    That can effectively be used to peek at the next item."""
-    __slots__ = ['iterable', 'buf']
-
-    def __init__(self, iterable):
-        self.iterable = iter(iterable)
-        self.buf = []
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        if self.buf:
-            return self.buf.pop(0)
-        return self.iterable.next()
-
-    def pushback(self, item):
-        self.buf.append(item)
--- a/markup/tests/core.py
+++ b/markup/tests/core.py
@@ -41,78 +41,78 @@
 
     def test_escape(self):
         markup = escape('<b>"&"</b>')
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('&lt;b&gt;&#34;&amp;&#34;&lt;/b&gt;', markup)
 
     def test_escape_noquotes(self):
         markup = escape('<b>"&"</b>', quotes=False)
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('&lt;b&gt;"&amp;"&lt;/b&gt;', markup)
 
     def test_unescape_markup(self):
         string = '<b>"&"</b>'
         markup = Markup.escape(string)
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals(string, unescape(markup))
 
     def test_add_str(self):
         markup = Markup('<b>foo</b>') + '<br/>'
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>foo</b>&lt;br/&gt;', markup)
 
     def test_add_markup(self):
         markup = Markup('<b>foo</b>') + Markup('<br/>')
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>foo</b><br/>', markup)
 
     def test_add_reverse(self):
         markup = '<br/>' + Markup('<b>bar</b>')
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('&lt;br/&gt;<b>bar</b>', markup)
 
     def test_mod(self):
         markup = Markup('<b>%s</b>') % '&'
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>&amp;</b>', markup)
 
     def test_mod_multi(self):
         markup = Markup('<b>%s</b> %s') % ('&', 'boo')
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>&amp;</b> boo', markup)
 
     def test_mul(self):
         markup = Markup('<b>foo</b>') * 2
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>foo</b><b>foo</b>', markup)
 
     def test_mul_reverse(self):
         markup = 2 * Markup('<b>foo</b>')
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('<b>foo</b><b>foo</b>', markup)
 
     def test_join(self):
         markup = Markup('<br />').join(['foo', '<bar />', Markup('<baz />')])
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('foo<br />&lt;bar /&gt;<br /><baz />', markup)
 
     def test_stripentities_all(self):
         markup = Markup('&amp; &#106;').stripentities()
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('& j', markup)
 
     def test_stripentities_keepxml(self):
         markup = Markup('&amp; &#106;').stripentities(keepxmlentities=True)
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('&amp; j', markup)
 
     def test_striptags_empty(self):
         markup = Markup('<br />').striptags()
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('', markup)
 
     def test_striptags_mid(self):
         markup = Markup('<a href="#">fo<br />o</a>').striptags()
-        assert isinstance(markup, Markup)
+        assert type(markup) is Markup
         self.assertEquals('foo', markup)
 
 
--- a/markup/tests/output.py
+++ b/markup/tests/output.py
@@ -18,7 +18,7 @@
 from markup.core import Stream
 from markup.input import HTML, XML
 from markup.output import DocType, XMLSerializer, XHTMLSerializer, \
-                          HTMLSerializer
+                          HTMLSerializer, EmptyTagFilter
 
 
 class XMLSerializerTestCase(unittest.TestCase):
@@ -163,11 +163,30 @@
                          output)
 
 
+class EmptyTagFilterTestCase(unittest.TestCase):
+
+    def test_empty(self):
+        stream = XML('<elem></elem>') | EmptyTagFilter()
+        self.assertEqual([EmptyTagFilter.EMPTY], [ev[0] for ev in stream])
+
+    def test_text_content(self):
+        stream = XML('<elem>foo</elem>') | EmptyTagFilter()
+        self.assertEqual([Stream.START, Stream.TEXT, Stream.END],
+                         [ev[0] for ev in stream])
+
+    def test_elem_content(self):
+        stream = XML('<elem><sub /><sub /></elem>') | EmptyTagFilter()
+        self.assertEqual([Stream.START, EmptyTagFilter.EMPTY,
+                          EmptyTagFilter.EMPTY, Stream.END],
+                         [ev[0] for ev in stream])
+
+
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(XMLSerializerTestCase, 'test'))
     suite.addTest(unittest.makeSuite(XHTMLSerializerTestCase, 'test'))
     suite.addTest(unittest.makeSuite(HTMLSerializerTestCase, 'test'))
+    suite.addTest(unittest.makeSuite(EmptyTagFilterTestCase, 'test'))
     suite.addTest(doctest.DocTestSuite(XMLSerializer.__module__))
     return suite
 
Copyright (C) 2012-2017 Edgewall Software