# HG changeset patch # User hodgestar # Date 1307839556 0 # Node ID 303af96ec546f88fcc6010935fe774f11cf51258 # Parent a1fa93e72d6948f549c89e2e6f692cf685bf6dbf Merge r1163 and r1164 from trunk (fix Markup event caching issue in serializers, issue #429). diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -79,6 +79,30 @@ return method(**kwargs) +def _prepare_cache(use_cache=True): + """Prepare a private token serialization cache. + + :param use_cache: boolean indicating whether a real cache should + be used or not. If not, the returned functions + are no-ops. + + :return: emit and get functions, for storing and retrieving + serialized values from the cache. + """ + cache = {} + if use_cache: + def _emit(kind, input, output): + cache[kind, input] = output + return output + _get = cache.get + else: + def _emit(kind, input, output): + return output + def _get(key): + pass + return _emit, _get, cache + + class DocType(object): """Defines a number of commonly used DOCTYPE declarations as constants.""" @@ -204,27 +228,23 @@ self.filters.append(DocTypeInserter(doctype)) self.cache = cache + def _prepare_cache(self): + return _prepare_cache(self.cache)[:2] + def __call__(self, stream): have_decl = have_doctype = False in_cdata = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output + _emit, _get = self._prepare_cache() for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: - cached = cache_get((kind, data)) + if kind is TEXT and isinstance(data, Markup): + yield data + continue + cached = _get((kind, data)) if cached is not None: yield cached - elif kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] @@ -323,21 +343,15 @@ drop_xml_decl = self.drop_xml_decl have_decl = have_doctype = False in_cdata = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output + _emit, _get = self._prepare_cache() for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: - cached = cache_get((kind, data)) + if kind is TEXT and isinstance(data, Markup): + yield data + continue + cached = _get((kind, data)) if cached is not None: yield cached @@ -454,21 +468,15 @@ noescape_elems = self._NOESCAPE_ELEMS have_doctype = False noescape = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output + _emit, _get = self._prepare_cache() for filter_ in self.filters: stream = filter_(stream) for kind, data, _ in stream: - output = cache_get((kind, data)) + if kind is TEXT and isinstance(data, Markup): + yield data + continue + output = _get((kind, data)) if output is not None: yield output if (kind is START or kind is EMPTY) \ @@ -626,18 +634,9 @@ self.cache = cache def __call__(self, stream): - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output, pos): - cache[kind, input] = output - return kind, output, pos - else: - def _emit(kind, input, output, pos): - return output - prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) namespaces = {XML_NAMESPACE.uri: ['xml']} + _emit, _get, cache = _prepare_cache(self.cache) def _push_ns(prefix, uri): namespaces.setdefault(uri, []).append(prefix) prefixes.setdefault(prefix, []).append(uri) @@ -668,7 +667,10 @@ _gen_prefix = _gen_prefix().next for kind, data, pos in stream: - output = cache_get((kind, data)) + if kind is TEXT and isinstance(data, Markup): + yield kind, data, pos + continue + output = _get((kind, data)) if output is not None: yield kind, output, pos @@ -701,7 +703,8 @@ attrname = '%s:%s' % (prefix, attrname) new_attrs.append((attrname, value)) - yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos) + data = _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs))) + yield kind, data, pos del ns_attrs[:] elif kind is END: @@ -711,7 +714,7 @@ prefix = namespaces[tagns][-1] if prefix: tagname = '%s:%s' % (prefix, tagname) - yield _emit(kind, data, tagname, pos) + yield kind, _emit(kind, data, tagname), pos elif kind is START_NS: prefix, uri = data diff --git a/genshi/tests/output.py b/genshi/tests/output.py --- a/genshi/tests/output.py +++ b/genshi/tests/output.py @@ -15,7 +15,7 @@ import unittest import sys -from genshi.core import Attrs, Stream, QName +from genshi.core import Attrs, Markup, QName, Stream from genshi.input import HTML, XML from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \ HTMLSerializer, EmptyTagFilter @@ -361,6 +361,23 @@ encoding=None) self.assertEqual('\n', output) + def test_ignorable_space(self): + text = ' Mess \n\n\n with me! ' + output = XML(text).render(XMLSerializer, encoding=None) + self.assertEqual(' Mess\n with me! ', output) + + def test_cache_markup(self): + loc = (None, -1, -1) + stream = Stream([(Stream.START, (QName('foo'), Attrs()), loc), + (Stream.TEXT, u'…', loc), + (Stream.END, QName('foo'), loc), + (Stream.START, (QName('bar'), Attrs()), loc), + (Stream.TEXT, Markup('…'), loc), + (Stream.END, QName('bar'), loc)]) + output = stream.render(XMLSerializer, encoding=None, + strip_whitespace=False) + self.assertEqual('&hellip;', output) + class HTMLSerializerTestCase(unittest.TestCase):