changeset 940:303af96ec546 stable-0.6.x

Merge r1163 and r1164 from trunk (fix Markup event caching issue in serializers, issue #429).
author hodgestar
date Sun, 12 Jun 2011 00:45:56 +0000
parents a1fa93e72d69
children 5c7d69204287
files genshi/output.py genshi/tests/output.py
diffstat 2 files changed, 68 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -79,6 +79,30 @@
     return method(**kwargs)
 
 
+def _prepare_cache(use_cache=True):
+    """Prepare a private token serialization cache.
+
+    :param use_cache: boolean indicating whether a real cache should
+                      be used or not. If not, the returned functions
+                      are no-ops.
+
+    :return: emit and get functions, for storing and retrieving
+             serialized values from the cache.
+    """
+    cache = {}
+    if use_cache:
+        def _emit(kind, input, output):
+            cache[kind, input] = output
+            return output
+        _get = cache.get
+    else:
+        def _emit(kind, input, output):
+            return output
+        def _get(key):
+            pass
+    return _emit, _get, cache
+
+
 class DocType(object):
     """Defines a number of commonly used DOCTYPE declarations as constants."""
 
@@ -204,27 +228,23 @@
             self.filters.append(DocTypeInserter(doctype))
         self.cache = cache
 
+    def _prepare_cache(self):
+        return _prepare_cache(self.cache)[:2]
+
     def __call__(self, stream):
         have_decl = have_doctype = False
         in_cdata = False
-
-        cache = {}
-        cache_get = cache.get
-        if self.cache:
-            def _emit(kind, input, output):
-                cache[kind, input] = output
-                return output
-        else:
-            def _emit(kind, input, output):
-                return output
+        _emit, _get = self._prepare_cache()
 
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, pos in stream:
-            cached = cache_get((kind, data))
+            if kind is TEXT and isinstance(data, Markup):
+                yield data
+                continue
+            cached = _get((kind, data))
             if cached is not None:
                 yield cached
-
             elif kind is START or kind is EMPTY:
                 tag, attrib = data
                 buf = ['<', tag]
@@ -323,21 +343,15 @@
         drop_xml_decl = self.drop_xml_decl
         have_decl = have_doctype = False
         in_cdata = False
-
-        cache = {}
-        cache_get = cache.get
-        if self.cache:
-            def _emit(kind, input, output):
-                cache[kind, input] = output
-                return output
-        else:
-            def _emit(kind, input, output):
-                return output
+        _emit, _get = self._prepare_cache()
 
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, pos in stream:
-            cached = cache_get((kind, data))
+            if kind is TEXT and isinstance(data, Markup):
+                yield data
+                continue
+            cached = _get((kind, data))
             if cached is not None:
                 yield cached
 
@@ -454,21 +468,15 @@
         noescape_elems = self._NOESCAPE_ELEMS
         have_doctype = False
         noescape = False
-
-        cache = {}
-        cache_get = cache.get
-        if self.cache:
-            def _emit(kind, input, output):
-                cache[kind, input] = output
-                return output
-        else:
-            def _emit(kind, input, output):
-                return output
+        _emit, _get = self._prepare_cache()
 
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, _ in stream:
-            output = cache_get((kind, data))
+            if kind is TEXT and isinstance(data, Markup):
+                yield data
+                continue
+            output = _get((kind, data))
             if output is not None:
                 yield output
                 if (kind is START or kind is EMPTY) \
@@ -626,18 +634,9 @@
         self.cache = cache
 
     def __call__(self, stream):
-        cache = {}
-        cache_get = cache.get
-        if self.cache:
-            def _emit(kind, input, output, pos):
-                cache[kind, input] = output
-                return kind, output, pos
-        else:
-            def _emit(kind, input, output, pos):
-                return output
-
         prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
         namespaces = {XML_NAMESPACE.uri: ['xml']}
+        _emit, _get, cache = _prepare_cache(self.cache)
         def _push_ns(prefix, uri):
             namespaces.setdefault(uri, []).append(prefix)
             prefixes.setdefault(prefix, []).append(uri)
@@ -668,7 +667,10 @@
         _gen_prefix = _gen_prefix().next
 
         for kind, data, pos in stream:
-            output = cache_get((kind, data))
+            if kind is TEXT and isinstance(data, Markup):
+                yield kind, data, pos
+                continue
+            output = _get((kind, data))
             if output is not None:
                 yield kind, output, pos
 
@@ -701,7 +703,8 @@
                             attrname = '%s:%s' % (prefix, attrname)
                     new_attrs.append((attrname, value))
 
-                yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
+                data = _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)))
+                yield kind, data, pos
                 del ns_attrs[:]
 
             elif kind is END:
@@ -711,7 +714,7 @@
                     prefix = namespaces[tagns][-1]
                     if prefix:
                         tagname = '%s:%s' % (prefix, tagname)
-                yield _emit(kind, data, tagname, pos)
+                yield kind, _emit(kind, data, tagname), pos
 
             elif kind is START_NS:
                 prefix, uri = data
--- a/genshi/tests/output.py
+++ b/genshi/tests/output.py
@@ -15,7 +15,7 @@
 import unittest
 import sys
 
-from genshi.core import Attrs, Stream, QName
+from genshi.core import Attrs, Markup, QName, Stream
 from genshi.input import HTML, XML
 from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \
                           HTMLSerializer, EmptyTagFilter
@@ -361,6 +361,23 @@
                                encoding=None)
         self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
 
+    def test_ignorable_space(self):
+        text = '<foo> Mess  \n\n\n with me!  </foo>'
+        output = XML(text).render(XMLSerializer, encoding=None)
+        self.assertEqual('<foo> Mess\n with me!  </foo>', output)
+
+    def test_cache_markup(self):
+        loc = (None, -1, -1)
+        stream = Stream([(Stream.START, (QName('foo'), Attrs()), loc),
+                         (Stream.TEXT, u'&hellip;', loc),
+                         (Stream.END, QName('foo'), loc),
+                         (Stream.START, (QName('bar'), Attrs()), loc),
+                         (Stream.TEXT, Markup('&hellip;'), loc),
+                         (Stream.END, QName('bar'), loc)])
+        output = stream.render(XMLSerializer, encoding=None, 
+                               strip_whitespace=False)
+        self.assertEqual('<foo>&amp;hellip;</foo><bar>&hellip;</bar>', output)
+
 
 class HTMLSerializerTestCase(unittest.TestCase):
 
Copyright (C) 2012-2017 Edgewall Software