# HG changeset patch
# User hodgestar
# Date 1307839556 0
# Node ID 303af96ec546f88fcc6010935fe774f11cf51258
# Parent a1fa93e72d6948f549c89e2e6f692cf685bf6dbf
Merge r1163 and r1164 from trunk (fix Markup event caching issue in serializers, issue #429).
diff --git a/genshi/output.py b/genshi/output.py
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -79,6 +79,30 @@
return method(**kwargs)
+def _prepare_cache(use_cache=True):
+ """Prepare a private token serialization cache.
+
+ :param use_cache: boolean indicating whether a real cache should
+ be used or not. If not, the returned functions
+ are no-ops.
+
+ :return: emit and get functions, for storing and retrieving
+ serialized values from the cache.
+ """
+ cache = {}
+ if use_cache:
+ def _emit(kind, input, output):
+ cache[kind, input] = output
+ return output
+ _get = cache.get
+ else:
+ def _emit(kind, input, output):
+ return output
+ def _get(key):
+ pass
+ return _emit, _get, cache
+
+
class DocType(object):
"""Defines a number of commonly used DOCTYPE declarations as constants."""
@@ -204,27 +228,23 @@
self.filters.append(DocTypeInserter(doctype))
self.cache = cache
+ def _prepare_cache(self):
+ return _prepare_cache(self.cache)[:2]
+
def __call__(self, stream):
have_decl = have_doctype = False
in_cdata = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
+ _emit, _get = self._prepare_cache()
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, pos in stream:
- cached = cache_get((kind, data))
+ if kind is TEXT and isinstance(data, Markup):
+ yield data
+ continue
+ cached = _get((kind, data))
if cached is not None:
yield cached
-
elif kind is START or kind is EMPTY:
tag, attrib = data
buf = ['<', tag]
@@ -323,21 +343,15 @@
drop_xml_decl = self.drop_xml_decl
have_decl = have_doctype = False
in_cdata = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
+ _emit, _get = self._prepare_cache()
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, pos in stream:
- cached = cache_get((kind, data))
+ if kind is TEXT and isinstance(data, Markup):
+ yield data
+ continue
+ cached = _get((kind, data))
if cached is not None:
yield cached
@@ -454,21 +468,15 @@
noescape_elems = self._NOESCAPE_ELEMS
have_doctype = False
noescape = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
+ _emit, _get = self._prepare_cache()
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, _ in stream:
- output = cache_get((kind, data))
+ if kind is TEXT and isinstance(data, Markup):
+ yield data
+ continue
+ output = _get((kind, data))
if output is not None:
yield output
if (kind is START or kind is EMPTY) \
@@ -626,18 +634,9 @@
self.cache = cache
def __call__(self, stream):
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output, pos):
- cache[kind, input] = output
- return kind, output, pos
- else:
- def _emit(kind, input, output, pos):
- return output
-
prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
namespaces = {XML_NAMESPACE.uri: ['xml']}
+ _emit, _get, cache = _prepare_cache(self.cache)
def _push_ns(prefix, uri):
namespaces.setdefault(uri, []).append(prefix)
prefixes.setdefault(prefix, []).append(uri)
@@ -668,7 +667,10 @@
_gen_prefix = _gen_prefix().next
for kind, data, pos in stream:
- output = cache_get((kind, data))
+ if kind is TEXT and isinstance(data, Markup):
+ yield kind, data, pos
+ continue
+ output = _get((kind, data))
if output is not None:
yield kind, output, pos
@@ -701,7 +703,8 @@
attrname = '%s:%s' % (prefix, attrname)
new_attrs.append((attrname, value))
- yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
+ data = _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)))
+ yield kind, data, pos
del ns_attrs[:]
elif kind is END:
@@ -711,7 +714,7 @@
prefix = namespaces[tagns][-1]
if prefix:
tagname = '%s:%s' % (prefix, tagname)
- yield _emit(kind, data, tagname, pos)
+ yield kind, _emit(kind, data, tagname), pos
elif kind is START_NS:
prefix, uri = data
diff --git a/genshi/tests/output.py b/genshi/tests/output.py
--- a/genshi/tests/output.py
+++ b/genshi/tests/output.py
@@ -15,7 +15,7 @@
import unittest
import sys
-from genshi.core import Attrs, Stream, QName
+from genshi.core import Attrs, Markup, QName, Stream
from genshi.input import HTML, XML
from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \
HTMLSerializer, EmptyTagFilter
@@ -361,6 +361,23 @@
encoding=None)
self.assertEqual('\n', output)
+ def test_ignorable_space(self):
+ text = ' Mess \n\n\n with me! '
+ output = XML(text).render(XMLSerializer, encoding=None)
+ self.assertEqual(' Mess\n with me! ', output)
+
+ def test_cache_markup(self):
+ loc = (None, -1, -1)
+ stream = Stream([(Stream.START, (QName('foo'), Attrs()), loc),
+ (Stream.TEXT, u'…', loc),
+ (Stream.END, QName('foo'), loc),
+ (Stream.START, (QName('bar'), Attrs()), loc),
+ (Stream.TEXT, Markup('…'), loc),
+ (Stream.END, QName('bar'), loc)])
+ output = stream.render(XMLSerializer, encoding=None,
+ strip_whitespace=False)
+ self.assertEqual('……', output)
+
class HTMLSerializerTestCase(unittest.TestCase):