# HG changeset patch # User cmlenz # Date 1206713666 0 # Node ID 08f22328303d4e05bd7186e5c8fe6257fdc18214 # Parent cfe3b4f02d77ef9a482fb49822dcd6f0a17b4dee Add option for unbuffered match template processing, which could cause excessive memory usage. Closes #190. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -65,6 +65,10 @@ and some metadata. New load functions are supplied for loading from egg package data, and loading from different loaders depending on the path prefix of the requested filename (ticket #182). + * Match templates can now be processed without keeping the complete matched + content in memory, which could cause excessive memory use on long pages. + The buffering can be disabled using the new `buffer` optimization hint on + the `` directive. Version 0.4.4 diff --git a/doc/xml-templates.txt b/doc/xml-templates.txt --- a/doc/xml-templates.txt +++ b/doc/xml-templates.txt @@ -358,6 +358,17 @@ +---------------+-----------+-----------------------------------------------+ | Attribute | Default | Description | +===============+===========+===============================================+ +| ``buffer`` | ``true`` | Whether the matched content should be | +| | | buffered in memory. Buffering can improve | +| | | performance a bit at the cost of needing more | +| | | memory during rendering. Buffering is | +| | | ''required'' for match templates that contain | +| | | more than one invocation of the ``select()`` | +| | | function. If there is only one call, and the | +| | | matched content can potentially be very long, | +| | | consider disabling buffering to avoid | +| | | excessive memory use. | ++---------------+-----------+-----------------------------------------------+ | ``once`` | ``false`` | Whether the engine should stop looking for | | | | more matching elements after the first match. | | | | Use this on match templates that match | diff --git a/genshi/template/base.py b/genshi/template/base.py --- a/genshi/template/base.py +++ b/genshi/template/base.py @@ -254,18 +254,53 @@ """Pop the top-most scope from the stack.""" -def _apply_directives(stream, ctxt, directives): +def _apply_directives(stream, directives, ctxt, **vars): """Apply the given directives to the stream. :param stream: the stream the directives should be applied to + :param directives: the list of directives to apply :param ctxt: the `Context` - :param directives: the list of directives to apply + :param vars: additional variables that should be available when Python + code is executed :return: the stream with the given directives applied """ if directives: - stream = directives[0](iter(stream), ctxt, directives[1:]) + stream = directives[0](iter(stream), directives[1:], ctxt, **vars) return stream +def _eval_expr(expr, ctxt, **vars): + """Evaluate the given `Expression` object. + + :param expr: the expression to evaluate + :param ctxt: the `Context` + :param vars: additional variables that should be available to the + expression + :return: the result of the evaluation + """ + if vars: + ctxt.push(vars) + retval = expr.evaluate(ctxt) + if vars: + ctxt.pop() + return retval + +def _exec_suite(suite, ctxt, **vars): + """Execute the given `Suite` object. + + :param suite: the code suite to execute + :param ctxt: the `Context` + :param vars: additional variables that should be available to the + code + """ + if vars: + ctxt.push(vars) + ctxt.push({}) + suite.execute(_ctxt2dict(ctxt)) + if vars: + top = ctxt.pop() + ctxt.pop() + ctxt.frames[0].update(top) + class TemplateMeta(type): """Meta class for templates.""" @@ -426,21 +461,24 @@ :return: a markup event stream representing the result of applying the template to the context data. """ + vars = {} if args: assert len(args) == 1 ctxt = args[0] if ctxt is None: ctxt = Context(**kwargs) + else: + vars = kwargs assert isinstance(ctxt, Context) else: ctxt = Context(**kwargs) stream = self.stream for filter_ in self.filters: - stream = filter_(iter(stream), ctxt) + stream = filter_(iter(stream), ctxt, **vars) return Stream(stream, self.serializer) - def _eval(self, stream, ctxt): + def _eval(self, stream, ctxt, **vars): """Internal stream filter that evaluates any expressions in `START` and `TEXT` events. """ @@ -460,7 +498,8 @@ else: values = [] for subkind, subdata, subpos in self._eval(substream, - ctxt): + ctxt, + **vars): if subkind is TEXT: values.append(subdata) value = [x for x in values if x is not None] @@ -470,7 +509,7 @@ yield kind, (tag, Attrs(new_attrs)), pos elif kind is EXPR: - result = data.evaluate(ctxt) + result = _eval_expr(data, ctxt, **vars) if result is not None: # First check for a string, otherwise the iterable test # below succeeds, and the string will be chopped up into @@ -482,7 +521,7 @@ elif hasattr(result, '__iter__'): substream = _ensure(result) for filter_ in filters: - substream = filter_(substream, ctxt) + substream = filter_(substream, ctxt, **vars) for event in substream: yield event else: @@ -491,28 +530,29 @@ else: yield kind, data, pos - def _exec(self, stream, ctxt): + def _exec(self, stream, ctxt, **vars): """Internal stream filter that executes Python code blocks.""" for event in stream: if event[0] is EXEC: - event[1].execute(_ctxt2dict(ctxt)) + _exec_suite(event[1], ctxt, **vars) else: yield event - def _flatten(self, stream, ctxt): + def _flatten(self, stream, ctxt, **vars): """Internal stream filter that expands `SUB` events in the stream.""" for event in stream: if event[0] is SUB: # This event is a list of directives and a list of nested # events to which those directives should be applied directives, substream = event[1] - substream = _apply_directives(substream, ctxt, directives) - for event in self._flatten(substream, ctxt): + substream = _apply_directives(substream, directives, ctxt, + **vars) + for event in self._flatten(substream, ctxt, **vars): yield event else: yield event - def _include(self, stream, ctxt): + def _include(self, stream, ctxt, **vars): """Internal stream filter that performs inclusion of external template files. """ @@ -523,20 +563,21 @@ href, cls, fallback = event[1] if not isinstance(href, basestring): parts = [] - for subkind, subdata, subpos in self._eval(href, ctxt): + for subkind, subdata, subpos in self._eval(href, ctxt, + **vars): if subkind is TEXT: parts.append(subdata) href = u''.join([x for x in parts if x is not None]) try: tmpl = self.loader.load(href, relative_to=event[2][0], cls=cls or self.__class__) - for event in tmpl.generate(ctxt): + for event in tmpl.generate(ctxt, **vars): yield event except TemplateNotFound: if fallback is None: raise for filter_ in self.filters: - fallback = filter_(iter(fallback), ctxt) + fallback = filter_(iter(fallback), ctxt, **vars) for event in fallback: yield event else: diff --git a/genshi/template/directives.py b/genshi/template/directives.py --- a/genshi/template/directives.py +++ b/genshi/template/directives.py @@ -22,7 +22,8 @@ from genshi.core import QName, Stream from genshi.path import Path from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \ - EXPR, _apply_directives, _ctxt2dict + EXPR, _apply_directives, _eval_expr, \ + _exec_suite from genshi.template.eval import Expression, Suite, ExpressionASTTransformer, \ _parse @@ -88,13 +89,15 @@ return cls(value, template, namespaces, *pos[1:]), stream attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): """Apply the directive to the given stream. :param stream: the event stream - :param ctxt: the context data :param directives: a list of the remaining directives that should process the stream + :param ctxt: the context data + :param vars: additional variables that should be made available when + Python code is executed """ raise NotImplementedError @@ -167,10 +170,10 @@ """ __slots__ = [] - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): def _generate(): kind, (tag, attrib), pos = stream.next() - attrs = self.expr.evaluate(ctxt) + attrs = _eval_expr(self.expr, ctxt, **vars) if attrs: if isinstance(attrs, Stream): try: @@ -186,7 +189,7 @@ for event in stream: yield event - return _apply_directives(_generate(), ctxt, directives) + return _apply_directives(_generate(), directives, ctxt, **vars) class ContentDirective(Directive): @@ -291,7 +294,7 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): stream = list(stream) def function(*args, **kwargs): @@ -304,14 +307,14 @@ if name in kwargs: val = kwargs.pop(name) else: - val = self.defaults.get(name).evaluate(ctxt) + val = _eval_expr(self.defaults.get(name), ctxt, **vars) scope[name] = val if not self.star_args is None: scope[self.star_args] = args if not self.dstar_args is None: scope[self.dstar_args] = kwargs ctxt.push(scope) - for event in _apply_directives(stream, ctxt, directives): + for event in _apply_directives(stream, directives, ctxt, **vars): yield event ctxt.pop() try: @@ -364,8 +367,8 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): - iterable = self.expr.evaluate(ctxt) + def __call__(self, stream, directives, ctxt, **vars): + iterable = _eval_expr(self.expr, ctxt, **vars) if iterable is None: return @@ -375,7 +378,7 @@ for item in iterable: assign(scope, item) ctxt.push(scope) - for event in _apply_directives(stream, ctxt, directives): + for event in _apply_directives(stream, directives, ctxt, **vars): yield event ctxt.pop() @@ -405,9 +408,10 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): - if self.expr.evaluate(ctxt): - return _apply_directives(stream, ctxt, directives) + def __call__(self, stream, directives, ctxt, **vars): + value = _eval_expr(self.expr, ctxt, **vars) + if value: + return _apply_directives(stream, directives, ctxt, **vars) return [] @@ -440,6 +444,8 @@ def attach(cls, template, stream, value, namespaces, pos): hints = [] if type(value) is dict: + if value.get('buffer', '').lower() == 'false': + hints.append('not_buffered') if value.get('once', '').lower() == 'true': hints.append('match_once') if value.get('recursive', '').lower() == 'false': @@ -449,7 +455,7 @@ stream attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): ctxt._match_templates.append((self.path.test(ignore_context=True), self.path, list(stream), self.hints, self.namespaces, directives)) @@ -531,9 +537,9 @@ """ __slots__ = [] - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): def _generate(): - if self.expr.evaluate(ctxt): + if _eval_expr(self.expr, ctxt, **vars): stream.next() # skip start tag previous = stream.next() for event in stream: @@ -542,7 +548,7 @@ else: for event in stream: yield event - return _apply_directives(_generate(), ctxt, directives) + return _apply_directives(_generate(), directives, ctxt, **vars) def attach(cls, template, stream, value, namespaces, pos): if not value: @@ -600,12 +606,12 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): info = [False, bool(self.expr), None] if self.expr: - info[2] = self.expr.evaluate(ctxt) + info[2] = _eval_expr(self.expr, ctxt, **vars) ctxt._choice_stack.append(info) - for event in _apply_directives(stream, ctxt, directives): + for event in _apply_directives(stream, directives, ctxt, **vars): yield event ctxt._choice_stack.pop() @@ -629,7 +635,7 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): info = ctxt._choice_stack and ctxt._choice_stack[-1] if not info: raise TemplateRuntimeError('"when" directives can only be used ' @@ -644,16 +650,16 @@ if info[1]: value = info[2] if self.expr: - matched = value == self.expr.evaluate(ctxt) + matched = value == _eval_expr(self.expr, ctxt, **vars) else: matched = bool(value) else: - matched = bool(self.expr.evaluate(ctxt)) + matched = bool(_eval_expr(self.expr, ctxt, **vars)) info[0] = matched if not matched: return [] - return _apply_directives(stream, ctxt, directives) + return _apply_directives(stream, directives, ctxt, **vars) class OtherwiseDirective(Directive): @@ -668,7 +674,7 @@ Directive.__init__(self, None, template, namespaces, lineno, offset) self.filename = template.filepath - def __call__(self, stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): info = ctxt._choice_stack and ctxt._choice_stack[-1] if not info: raise TemplateRuntimeError('an "otherwise" directive can only be ' @@ -678,7 +684,7 @@ return [] info[0] = True - return _apply_directives(stream, ctxt, directives) + return _apply_directives(stream, directives, ctxt, **vars) class WithDirective(Directive): @@ -722,11 +728,10 @@ namespaces, pos) attach = classmethod(attach) - def __call__(self, stream, ctxt, directives): - frame = {} - ctxt.push(frame) - self.suite.execute(_ctxt2dict(ctxt)) - for event in _apply_directives(stream, ctxt, directives): + def __call__(self, stream, directives, ctxt, **vars): + ctxt.push({}) + _exec_suite(self.suite, ctxt, **vars) + for event in _apply_directives(stream, directives, ctxt, **vars): yield event ctxt.pop() diff --git a/genshi/template/markup.py b/genshi/template/markup.py --- a/genshi/template/markup.py +++ b/genshi/template/markup.py @@ -225,7 +225,7 @@ assert len(streams) == 1 return streams[0] - def _match(self, stream, ctxt, match_templates=None): + def _match(self, stream, ctxt, match_templates=None, **vars): """Internal stream filter that applies any defined match templates to the stream. """ @@ -271,32 +271,38 @@ # Consume and store all events until an end event # corresponding to this start event is encountered - inner = _strip(stream) pre_match_templates = match_templates[:idx + 1] if 'match_once' not in hints and 'not_recursive' in hints: pre_match_templates.pop() - inner = self._match(inner, ctxt, pre_match_templates) - content = list(self._include(chain([event], inner, tail), - ctxt)) + inner = _strip(stream) + if pre_match_templates: + inner = self._match(inner, ctxt, pre_match_templates) + content = self._include(chain([event], inner, tail), ctxt) + if 'not_buffered' not in hints: + content = list(content) - for test in [mt[0] for mt in match_templates]: - test(tail[0], namespaces, ctxt, updateonly=True) + if tail: + for test in [mt[0] for mt in match_templates]: + test(tail[0], namespaces, ctxt, updateonly=True) # Make the select() function available in the body of the # match template def select(path): return Stream(content).select(path, namespaces, ctxt) - ctxt.push(dict(select=select)) + vars = dict(select=select) # Recursively process the output - template = _apply_directives(template, ctxt, directives) - remaining = match_templates - for event in self._match(self._exec( - self._eval(self._flatten(template, ctxt), - ctxt), ctxt), ctxt, match_templates[idx + 1:]): + template = _apply_directives(template, directives, ctxt, + **vars) + for event in self._match( + self._exec( + self._eval( + self._flatten(template, ctxt, **vars), + ctxt, **vars), + ctxt, **vars), + ctxt, match_templates[idx + 1:], **vars): yield event - ctxt.pop() break else: # no matches diff --git a/genshi/template/tests/markup.py b/genshi/template/tests/markup.py --- a/genshi/template/tests/markup.py +++ b/genshi/template/tests/markup.py @@ -611,6 +611,22 @@ """, tmpl.generate().render()) + def test_with_in_match(self): + xml = (""" + +

${select('text()')}

+ ${select('.')} +
+

${foo}

+ """) + tmpl = MarkupTemplate(xml, filename='test.html') + self.assertEqual(""" + +

bar

+

bar

+ + """, tmpl.generate().render()) + def test_nested_include_matches(self): # See ticket #157 dirname = tempfile.mkdtemp(suffix='genshi_test') diff --git a/genshi/template/text.py b/genshi/template/text.py --- a/genshi/template/text.py +++ b/genshi/template/text.py @@ -33,7 +33,7 @@ TemplateSyntaxError, EXEC, INCLUDE, SUB from genshi.template.eval import Suite from genshi.template.directives import * -from genshi.template.directives import Directive, _apply_directives +from genshi.template.directives import Directive from genshi.template.interpolation import interpolate __all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']