changeset 700:08f22328303d trunk

Add option for unbuffered match template processing, which could cause excessive memory usage. Closes #190.
author cmlenz
date Fri, 28 Mar 2008 14:14:26 +0000
parents cfe3b4f02d77
children e5b0d9d6b406
files ChangeLog doc/xml-templates.txt genshi/template/base.py genshi/template/directives.py genshi/template/markup.py genshi/template/tests/markup.py genshi/template/text.py
diffstat 7 files changed, 148 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -65,6 +65,10 @@
    and some metadata. New load functions are supplied for loading from egg
    package data, and loading from different loaders depending on the path
    prefix of the requested filename (ticket #182).
+ * Match templates can now be processed without keeping the complete matched
+   content in memory, which could cause excessive memory use on long pages.
+   The buffering can be disabled using the new `buffer` optimization hint on
+   the `<py:match>` directive.
 
 
 Version 0.4.4
--- a/doc/xml-templates.txt
+++ b/doc/xml-templates.txt
@@ -358,6 +358,17 @@
 +---------------+-----------+-----------------------------------------------+
 | Attribute     | Default   | Description                                   |
 +===============+===========+===============================================+
+| ``buffer``    | ``true``  | Whether the matched content should be         |
+|               |           | buffered in memory. Buffering can improve     |
+|               |           | performance a bit at the cost of needing more |
+|               |           | memory during rendering. Buffering is         |
+|               |           | ''required'' for match templates that contain |
+|               |           | more than one invocation of the ``select()``  |
+|               |           | function. If there is only one call, and the  |
+|               |           | matched content can potentially be very long, |
+|               |           | consider disabling buffering to avoid         |
+|               |           | excessive memory use.                         |
++---------------+-----------+-----------------------------------------------+
 | ``once``      | ``false`` | Whether the engine should stop looking for    |
 |               |           | more matching elements after the first match. |
 |               |           | Use this on match templates that match        |
--- a/genshi/template/base.py
+++ b/genshi/template/base.py
@@ -254,18 +254,53 @@
         """Pop the top-most scope from the stack."""
 
 
-def _apply_directives(stream, ctxt, directives):
+def _apply_directives(stream, directives, ctxt, **vars):
     """Apply the given directives to the stream.
     
     :param stream: the stream the directives should be applied to
+    :param directives: the list of directives to apply
     :param ctxt: the `Context`
-    :param directives: the list of directives to apply
+    :param vars: additional variables that should be available when Python
+                 code is executed
     :return: the stream with the given directives applied
     """
     if directives:
-        stream = directives[0](iter(stream), ctxt, directives[1:])
+        stream = directives[0](iter(stream), directives[1:], ctxt, **vars)
     return stream
 
+def _eval_expr(expr, ctxt, **vars):
+    """Evaluate the given `Expression` object.
+    
+    :param expr: the expression to evaluate
+    :param ctxt: the `Context`
+    :param vars: additional variables that should be available to the
+                 expression
+    :return: the result of the evaluation
+    """
+    if vars:
+        ctxt.push(vars)
+    retval = expr.evaluate(ctxt)
+    if vars:
+        ctxt.pop()
+    return retval
+
+def _exec_suite(suite, ctxt, **vars):
+    """Execute the given `Suite` object.
+    
+    :param suite: the code suite to execute
+    :param ctxt: the `Context`
+    :param vars: additional variables that should be available to the
+                 code
+    """
+    if vars:
+        ctxt.push(vars)
+        ctxt.push({})
+    suite.execute(_ctxt2dict(ctxt))
+    if vars:
+        top = ctxt.pop()
+        ctxt.pop()
+        ctxt.frames[0].update(top)
+
 
 class TemplateMeta(type):
     """Meta class for templates."""
@@ -426,21 +461,24 @@
         :return: a markup event stream representing the result of applying
                  the template to the context data.
         """
+        vars = {}
         if args:
             assert len(args) == 1
             ctxt = args[0]
             if ctxt is None:
                 ctxt = Context(**kwargs)
+            else:
+                vars = kwargs
             assert isinstance(ctxt, Context)
         else:
             ctxt = Context(**kwargs)
 
         stream = self.stream
         for filter_ in self.filters:
-            stream = filter_(iter(stream), ctxt)
+            stream = filter_(iter(stream), ctxt, **vars)
         return Stream(stream, self.serializer)
 
-    def _eval(self, stream, ctxt):
+    def _eval(self, stream, ctxt, **vars):
         """Internal stream filter that evaluates any expressions in `START` and
         `TEXT` events.
         """
@@ -460,7 +498,8 @@
                     else:
                         values = []
                         for subkind, subdata, subpos in self._eval(substream,
-                                                                   ctxt):
+                                                                   ctxt,
+                                                                   **vars):
                             if subkind is TEXT:
                                 values.append(subdata)
                         value = [x for x in values if x is not None]
@@ -470,7 +509,7 @@
                 yield kind, (tag, Attrs(new_attrs)), pos
 
             elif kind is EXPR:
-                result = data.evaluate(ctxt)
+                result = _eval_expr(data, ctxt, **vars)
                 if result is not None:
                     # First check for a string, otherwise the iterable test
                     # below succeeds, and the string will be chopped up into
@@ -482,7 +521,7 @@
                     elif hasattr(result, '__iter__'):
                         substream = _ensure(result)
                         for filter_ in filters:
-                            substream = filter_(substream, ctxt)
+                            substream = filter_(substream, ctxt, **vars)
                         for event in substream:
                             yield event
                     else:
@@ -491,28 +530,29 @@
             else:
                 yield kind, data, pos
 
-    def _exec(self, stream, ctxt):
+    def _exec(self, stream, ctxt, **vars):
         """Internal stream filter that executes Python code blocks."""
         for event in stream:
             if event[0] is EXEC:
-                event[1].execute(_ctxt2dict(ctxt))
+                _exec_suite(event[1], ctxt, **vars)
             else:
                 yield event
 
-    def _flatten(self, stream, ctxt):
+    def _flatten(self, stream, ctxt, **vars):
         """Internal stream filter that expands `SUB` events in the stream."""
         for event in stream:
             if event[0] is SUB:
                 # This event is a list of directives and a list of nested
                 # events to which those directives should be applied
                 directives, substream = event[1]
-                substream = _apply_directives(substream, ctxt, directives)
-                for event in self._flatten(substream, ctxt):
+                substream = _apply_directives(substream, directives, ctxt,
+                                              **vars)
+                for event in self._flatten(substream, ctxt, **vars):
                     yield event
             else:
                 yield event
 
-    def _include(self, stream, ctxt):
+    def _include(self, stream, ctxt, **vars):
         """Internal stream filter that performs inclusion of external
         template files.
         """
@@ -523,20 +563,21 @@
                 href, cls, fallback = event[1]
                 if not isinstance(href, basestring):
                     parts = []
-                    for subkind, subdata, subpos in self._eval(href, ctxt):
+                    for subkind, subdata, subpos in self._eval(href, ctxt,
+                                                               **vars):
                         if subkind is TEXT:
                             parts.append(subdata)
                     href = u''.join([x for x in parts if x is not None])
                 try:
                     tmpl = self.loader.load(href, relative_to=event[2][0],
                                             cls=cls or self.__class__)
-                    for event in tmpl.generate(ctxt):
+                    for event in tmpl.generate(ctxt, **vars):
                         yield event
                 except TemplateNotFound:
                     if fallback is None:
                         raise
                     for filter_ in self.filters:
-                        fallback = filter_(iter(fallback), ctxt)
+                        fallback = filter_(iter(fallback), ctxt, **vars)
                     for event in fallback:
                         yield event
             else:
--- a/genshi/template/directives.py
+++ b/genshi/template/directives.py
@@ -22,7 +22,8 @@
 from genshi.core import QName, Stream
 from genshi.path import Path
 from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \
-                                 EXPR, _apply_directives, _ctxt2dict
+                                 EXPR, _apply_directives, _eval_expr, \
+                                 _exec_suite
 from genshi.template.eval import Expression, Suite, ExpressionASTTransformer, \
                                  _parse
 
@@ -88,13 +89,15 @@
         return cls(value, template, namespaces, *pos[1:]), stream
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         """Apply the directive to the given stream.
         
         :param stream: the event stream
-        :param ctxt: the context data
         :param directives: a list of the remaining directives that should
                            process the stream
+        :param ctxt: the context data
+        :param vars: additional variables that should be made available when
+                     Python code is executed
         """
         raise NotImplementedError
 
@@ -167,10 +170,10 @@
     """
     __slots__ = []
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         def _generate():
             kind, (tag, attrib), pos  = stream.next()
-            attrs = self.expr.evaluate(ctxt)
+            attrs = _eval_expr(self.expr, ctxt, **vars)
             if attrs:
                 if isinstance(attrs, Stream):
                     try:
@@ -186,7 +189,7 @@
             for event in stream:
                 yield event
 
-        return _apply_directives(_generate(), ctxt, directives)
+        return _apply_directives(_generate(), directives, ctxt, **vars)
 
 
 class ContentDirective(Directive):
@@ -291,7 +294,7 @@
                                                namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         stream = list(stream)
 
         def function(*args, **kwargs):
@@ -304,14 +307,14 @@
                     if name in kwargs:
                         val = kwargs.pop(name)
                     else:
-                        val = self.defaults.get(name).evaluate(ctxt)
+                        val = _eval_expr(self.defaults.get(name), ctxt, **vars)
                     scope[name] = val
             if not self.star_args is None:
                 scope[self.star_args] = args
             if not self.dstar_args is None:
                 scope[self.dstar_args] = kwargs
             ctxt.push(scope)
-            for event in _apply_directives(stream, ctxt, directives):
+            for event in _apply_directives(stream, directives, ctxt, **vars):
                 yield event
             ctxt.pop()
         try:
@@ -364,8 +367,8 @@
                                                namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
-        iterable = self.expr.evaluate(ctxt)
+    def __call__(self, stream, directives, ctxt, **vars):
+        iterable = _eval_expr(self.expr, ctxt, **vars)
         if iterable is None:
             return
 
@@ -375,7 +378,7 @@
         for item in iterable:
             assign(scope, item)
             ctxt.push(scope)
-            for event in _apply_directives(stream, ctxt, directives):
+            for event in _apply_directives(stream, directives, ctxt, **vars):
                 yield event
             ctxt.pop()
 
@@ -405,9 +408,10 @@
                                               namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
-        if self.expr.evaluate(ctxt):
-            return _apply_directives(stream, ctxt, directives)
+    def __call__(self, stream, directives, ctxt, **vars):
+        value = _eval_expr(self.expr, ctxt, **vars)
+        if value:
+            return _apply_directives(stream, directives, ctxt, **vars)
         return []
 
 
@@ -440,6 +444,8 @@
     def attach(cls, template, stream, value, namespaces, pos):
         hints = []
         if type(value) is dict:
+            if value.get('buffer', '').lower() == 'false':
+                hints.append('not_buffered')
             if value.get('once', '').lower() == 'true':
                 hints.append('match_once')
             if value.get('recursive', '').lower() == 'false':
@@ -449,7 +455,7 @@
                stream
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         ctxt._match_templates.append((self.path.test(ignore_context=True),
                                       self.path, list(stream), self.hints,
                                       self.namespaces, directives))
@@ -531,9 +537,9 @@
     """
     __slots__ = []
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         def _generate():
-            if self.expr.evaluate(ctxt):
+            if _eval_expr(self.expr, ctxt, **vars):
                 stream.next() # skip start tag
                 previous = stream.next()
                 for event in stream:
@@ -542,7 +548,7 @@
             else:
                 for event in stream:
                     yield event
-        return _apply_directives(_generate(), ctxt, directives)
+        return _apply_directives(_generate(), directives, ctxt, **vars)
 
     def attach(cls, template, stream, value, namespaces, pos):
         if not value:
@@ -600,12 +606,12 @@
                                                   namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         info = [False, bool(self.expr), None]
         if self.expr:
-            info[2] = self.expr.evaluate(ctxt)
+            info[2] = _eval_expr(self.expr, ctxt, **vars)
         ctxt._choice_stack.append(info)
-        for event in _apply_directives(stream, ctxt, directives):
+        for event in _apply_directives(stream, directives, ctxt, **vars):
             yield event
         ctxt._choice_stack.pop()
 
@@ -629,7 +635,7 @@
                                                 namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         info = ctxt._choice_stack and ctxt._choice_stack[-1]
         if not info:
             raise TemplateRuntimeError('"when" directives can only be used '
@@ -644,16 +650,16 @@
         if info[1]:
             value = info[2]
             if self.expr:
-                matched = value == self.expr.evaluate(ctxt)
+                matched = value == _eval_expr(self.expr, ctxt, **vars)
             else:
                 matched = bool(value)
         else:
-            matched = bool(self.expr.evaluate(ctxt))
+            matched = bool(_eval_expr(self.expr, ctxt, **vars))
         info[0] = matched
         if not matched:
             return []
 
-        return _apply_directives(stream, ctxt, directives)
+        return _apply_directives(stream, directives, ctxt, **vars)
 
 
 class OtherwiseDirective(Directive):
@@ -668,7 +674,7 @@
         Directive.__init__(self, None, template, namespaces, lineno, offset)
         self.filename = template.filepath
 
-    def __call__(self, stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
         info = ctxt._choice_stack and ctxt._choice_stack[-1]
         if not info:
             raise TemplateRuntimeError('an "otherwise" directive can only be '
@@ -678,7 +684,7 @@
             return []
         info[0] = True
 
-        return _apply_directives(stream, ctxt, directives)
+        return _apply_directives(stream, directives, ctxt, **vars)
 
 
 class WithDirective(Directive):
@@ -722,11 +728,10 @@
                                                 namespaces, pos)
     attach = classmethod(attach)
 
-    def __call__(self, stream, ctxt, directives):
-        frame = {}
-        ctxt.push(frame)
-        self.suite.execute(_ctxt2dict(ctxt))
-        for event in _apply_directives(stream, ctxt, directives):
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt.push({})
+        _exec_suite(self.suite, ctxt, **vars)
+        for event in _apply_directives(stream, directives, ctxt, **vars):
             yield event
         ctxt.pop()
 
--- a/genshi/template/markup.py
+++ b/genshi/template/markup.py
@@ -225,7 +225,7 @@
         assert len(streams) == 1
         return streams[0]
 
-    def _match(self, stream, ctxt, match_templates=None):
+    def _match(self, stream, ctxt, match_templates=None, **vars):
         """Internal stream filter that applies any defined match templates
         to the stream.
         """
@@ -271,32 +271,38 @@
 
                     # Consume and store all events until an end event
                     # corresponding to this start event is encountered
-                    inner = _strip(stream)
                     pre_match_templates = match_templates[:idx + 1]
                     if 'match_once' not in hints and 'not_recursive' in hints:
                         pre_match_templates.pop()
-                    inner = self._match(inner, ctxt, pre_match_templates)
-                    content = list(self._include(chain([event], inner, tail),
-                                                 ctxt))
+                    inner = _strip(stream)
+                    if pre_match_templates:
+                        inner = self._match(inner, ctxt, pre_match_templates)
+                    content = self._include(chain([event], inner, tail), ctxt)
+                    if 'not_buffered' not in hints:
+                        content = list(content)
 
-                    for test in [mt[0] for mt in match_templates]:
-                        test(tail[0], namespaces, ctxt, updateonly=True)
+                    if tail:
+                        for test in [mt[0] for mt in match_templates]:
+                            test(tail[0], namespaces, ctxt, updateonly=True)
 
                     # Make the select() function available in the body of the
                     # match template
                     def select(path):
                         return Stream(content).select(path, namespaces, ctxt)
-                    ctxt.push(dict(select=select))
+                    vars = dict(select=select)
 
                     # Recursively process the output
-                    template = _apply_directives(template, ctxt, directives)
-                    remaining = match_templates
-                    for event in self._match(self._exec(
-                                    self._eval(self._flatten(template, ctxt),
-                                    ctxt), ctxt), ctxt, match_templates[idx + 1:]):
+                    template = _apply_directives(template, directives, ctxt,
+                                                 **vars)
+                    for event in self._match(
+                            self._exec(
+                                self._eval(
+                                    self._flatten(template, ctxt, **vars),
+                                    ctxt, **vars),
+                                ctxt, **vars),
+                            ctxt, match_templates[idx + 1:], **vars):
                         yield event
 
-                    ctxt.pop()
                     break
 
             else: # no matches
--- a/genshi/template/tests/markup.py
+++ b/genshi/template/tests/markup.py
@@ -611,6 +611,22 @@
           </body>
         </html>""", tmpl.generate().render())
 
+    def test_with_in_match(self): 
+        xml = ("""<html xmlns:py="http://genshi.edgewall.org/">
+          <py:match path="body/p">
+            <h1>${select('text()')}</h1>
+            ${select('.')}
+          </py:match>
+          <body><p py:with="foo='bar'">${foo}</p></body>
+        </html>""")
+        tmpl = MarkupTemplate(xml, filename='test.html')
+        self.assertEqual("""<html>
+          <body>
+            <h1>bar</h1>
+            <p>bar</p>
+          </body>
+        </html>""", tmpl.generate().render())
+
     def test_nested_include_matches(self):
         # See ticket #157
         dirname = tempfile.mkdtemp(suffix='genshi_test')
--- a/genshi/template/text.py
+++ b/genshi/template/text.py
@@ -33,7 +33,7 @@
                                  TemplateSyntaxError, EXEC, INCLUDE, SUB
 from genshi.template.eval import Suite
 from genshi.template.directives import *
-from genshi.template.directives import Directive, _apply_directives
+from genshi.template.directives import Directive
 from genshi.template.interpolation import interpolate
 
 __all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']
Copyright (C) 2012-2017 Edgewall Software