changeset 17:74cc70129d04 trunk

Refactoring to address #6: all match templates are now processed by a single filter, which means that match templates added by included templates are properly applied. A side effect of this refactoring is that `Context` objects may not be reused across multiple template processing runs. Also, output filters are now applied in the `Stream.serialize()` method instead of by the `Template.generate()` method, which just makes more sense.
author cmlenz
date Sun, 18 Jun 2006 22:33:33 +0000
parents bcba0181049c
children 5420cfe42d36
files examples/basic/run.py markup/builder.py markup/core.py markup/filters.py markup/template.py
diffstat 5 files changed, 181 insertions(+), 205 deletions(-) [+]
line wrap: on
line diff
--- a/examples/basic/run.py
+++ b/examples/basic/run.py
@@ -17,16 +17,16 @@
     tmpl = loader.load('test.html')
     print ' --> parse stage: ', datetime.now() - start
 
-    ctxt = Context(hello='<world>', skin='default', hey='ZYX', bozz=None,
-                   items=['Number %d' % num for num in range(1, 15)],
-                   prefix='#')
+    data = dict(hello='<world>', skin='default', hey='ZYX', bozz=None,
+                items=['Number %d' % num for num in range(1, 15)],
+                prefix='#')
 
-    print tmpl.generate(ctxt).render(method='html')
+    print tmpl.generate(Context(**data)).render(method='html')
 
     times = []
     for i in range(100):
         start = datetime.now()
-        list(tmpl.generate(ctxt))
+        list(tmpl.generate(Context(**data)))
         sys.stdout.write('.')
         sys.stdout.flush()
         times.append(datetime.now() - start)
--- a/markup/builder.py
+++ b/markup/builder.py
@@ -55,7 +55,7 @@
                     for event in child.generate():
                         yield event
                 else:
-                    yield Stream.TEXT, child, (-1, -1)
+                    yield Stream.TEXT, unicode(child), (-1, -1)
         return Stream(_generate())
 
     def __iter__(self):
--- a/markup/core.py
+++ b/markup/core.py
@@ -20,17 +20,9 @@
 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Namespace', 'QName']
 
 
-class StreamEventKind(object):
+class StreamEventKind(str):
     """A kind of event on an XML stream."""
 
-    __slots__ = ['name']
-
-    def __init__(self, name):
-        self.name = name
-
-    def __repr__(self):
-        return self.name
-
 
 class Stream(object):
     """Represents a stream of markup events.
@@ -52,15 +44,15 @@
     """
     __slots__ = ['events']
 
-    START = StreamEventKind('start') # a start tag
-    END = StreamEventKind('end') # an end tag
-    TEXT = StreamEventKind('text') # literal text
-    PROLOG = StreamEventKind('prolog') # XML prolog
-    DOCTYPE = StreamEventKind('doctype') # doctype declaration
-    START_NS = StreamEventKind('start-ns') # start namespace mapping
-    END_NS = StreamEventKind('end-ns') # end namespace mapping
-    PI = StreamEventKind('pi') # processing instruction
-    COMMENT = StreamEventKind('comment') # comment
+    START = StreamEventKind('START') # a start tag
+    END = StreamEventKind('END') # an end tag
+    TEXT = StreamEventKind('TEXT') # literal text
+    PROLOG = StreamEventKind('PROLOG') # XML prolog
+    DOCTYPE = StreamEventKind('DOCTYPE') # doctype declaration
+    START_NS = StreamEventKind('START-NS') # start namespace mapping
+    END_NS = StreamEventKind('END-NS') # end namespace mapping
+    PI = StreamEventKind('PI') # processing instruction
+    COMMENT = StreamEventKind('COMMENT') # comment
 
     def __init__(self, events):
         """Initialize the stream with a sequence of markup events.
@@ -72,7 +64,7 @@
     def __iter__(self):
         return iter(self.events)
 
-    def render(self, method='xml', encoding='utf-8', **kwargs):
+    def render(self, method='xml', encoding='utf-8', filters=None, **kwargs):
         """Return a string representation of the stream.
         
         @param method: determines how the stream is serialized; can be either
@@ -83,7 +75,8 @@
         Any additional keyword arguments are passed to the serializer, and thus
         depend on the `method` parameter value.
         """
-        output = u''.join(list(self.serialize(method=method, **kwargs)))
+        generator = self.serialize(method=method, filters=filters, **kwargs)
+        output = u''.join(list(generator))
         if encoding is not None:
             return output.encode(encoding)
         return output
@@ -95,10 +88,9 @@
         @param path: a string containing the XPath expression
         """
         from markup.path import Path
-        path = Path(path)
-        return path.select(self)
+        return Path(path).select(self)
 
-    def serialize(self, method='xml', **kwargs):
+    def serialize(self, method='xml', filters=None, **kwargs):
         """Generate strings corresponding to a specific serialization of the
         stream.
         
@@ -109,6 +101,7 @@
         @param method: determines how the stream is serialized; can be either
                        'xml' or 'html', or a custom `Serializer` subclass
         """
+        from markup.filters import WhitespaceFilter
         from markup import output
         cls = method
         if isinstance(method, basestring):
@@ -117,7 +110,14 @@
         else:
             assert issubclass(cls, serializers.Serializer)
         serializer = cls(**kwargs)
-        return serializer.serialize(self)
+
+        stream = self
+        if filters is None:
+            filters = [WhitespaceFilter()]
+        for filter_ in filters:
+            stream = filter_(iter(stream))
+
+        return serializer.serialize(stream)
 
     def __str__(self):
         return self.render()
@@ -176,6 +176,9 @@
     def __mul__(self, num):
         return Markup(unicode(self) * num)
 
+    def __repr__(self):
+        return '<%s "%s">' % (self.__class__.__name__, self)
+
     def join(self, seq):
         return Markup(unicode(self).join([Markup.escape(item) for item in seq]))
 
@@ -184,7 +187,8 @@
         replaced by the equivalent UTF-8 characters.
         
         If the `keepxmlentities` parameter is provided and evaluates to `True`,
-        the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;).
+        the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not
+        stripped.
         """
         def _replace_entity(match):
             if match.group(1): # numeric entity
@@ -255,9 +259,8 @@
     def sanitize(self):
         from markup.filters import HTMLSanitizer
         from markup.input import HTMLParser
-        sanitize = HTMLSanitizer()
-        text = self.stripentities(keepxmlentities=True)
-        return Stream(sanitize(HTMLParser(StringIO(text)), None))
+        text = StringIO(self.stripentities(keepxmlentities=True))
+        return Stream(HTMLSanitizer()(HTMLParser(text)))
 
 
 escape = Markup.escape
--- a/markup/filters.py
+++ b/markup/filters.py
@@ -22,61 +22,7 @@
 from markup.core import Attributes, Markup, Stream
 from markup.path import Path
 
-__all__ = ['EvalFilter', 'IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer']
-
-
-class EvalFilter(object):
-    """Responsible for evaluating expressions in a template."""
-
-    def __call__(self, stream, ctxt=None):
-        from markup.template import Template
-
-        for kind, data, pos in stream:
-
-            if kind is Stream.START:
-                # Attributes may still contain expressions in start tags at
-                # this point, so do some evaluation
-                tag, attrib = data
-                new_attrib = []
-                for name, substream in attrib:
-                    if isinstance(substream, basestring):
-                        value = substream
-                    else:
-                        values = []
-                        for subkind, subdata, subpos in substream:
-                            if subkind is Template.EXPR:
-                                values.append(subdata.evaluate(ctxt))
-                            else:
-                                values.append(subdata)
-                        value = filter(lambda x: x is not None, values)
-                        if not value:
-                            continue
-                    new_attrib.append((name, ''.join(value)))
-                yield kind, (tag, Attributes(new_attrib)), pos
-
-            elif kind is Template.EXPR:
-                result = data.evaluate(ctxt)
-                if result is None:
-                    continue
-
-                # First check for a string, otherwise the iterable
-                # test below succeeds, and the string will be
-                # chopped up into characters
-                if isinstance(result, basestring):
-                    yield Stream.TEXT, result, pos
-                else:
-                    # Test if the expression evaluated to an
-                    # iterable, in which case we yield the
-                    # individual items
-                    try:
-                        yield Template.SUB, ([], iter(result)), pos
-                    except TypeError:
-                        # Neither a string nor an iterable, so just
-                        # pass it through
-                        yield Stream.TEXT, unicode(result), pos
-
-            else:
-                yield kind, data, pos
+__all__ = ['IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer']
 
 
 class IncludeFilter(object):
@@ -86,16 +32,13 @@
 
     _NAMESPACE = 'http://www.w3.org/2001/XInclude'
 
-    def __init__(self, loader, template):
+    def __init__(self, loader):
         """Initialize the filter.
         
         @param loader: the `TemplateLoader` to use for resolving references to
             external template files
         """
         self.loader = loader
-        self.template = template
-        if not hasattr(template, '_included_filters'):
-            template._included_filters = []
 
     def __call__(self, stream, ctxt=None, ns_prefixes=None):
         """Filter the stream, processing any XInclude directives it may
@@ -131,21 +74,8 @@
                             raise TemplateError('Include misses required '
                                                 'attribute "href"')
                         template = self.loader.load(include_href)
-                        for ikind, idata, ipos in template.generate(ctxt):
-                            # Fixup indentation of included markup
-                            if ikind is Stream.TEXT:
-                                idata = idata.replace('\n', '\n' + ' ' * indent)
-                            yield ikind, idata, ipos
-
-                        # If the included template defines any filters added at
-                        # runtime (such as py:match templates), those need to be
-                        # applied to the including template, too.
-                        filters = template._included_filters + template.filters
-                        for filter_ in filters:
-                            stream = filter_(stream, ctxt)
-
-                        # Runtime filters included need to be propagated up
-                        self.template._included_filters += filters
+                        for event in template.generate(ctxt):
+                            yield event
 
                     except TemplateNotFound:
                         if fallback_stream is None:
@@ -156,7 +86,7 @@
                     include_href = None
                     fallback_stream = None
                     indent = 0
-                    break
+
                 elif data.localname == 'fallback':
                     in_fallback = False
 
@@ -165,21 +95,12 @@
 
             elif kind is Stream.START_NS and data[1] == self._NAMESPACE:
                 ns_prefixes.append(data[0])
-                continue
 
             elif kind is Stream.END_NS and data in ns_prefixes:
                 ns_prefixes.pop()
-                continue
 
             else:
                 yield kind, data, pos
-        else:
-            # The loop exited normally, so there shouldn't be further events to
-            # process
-            return
-
-        for event in self(stream, ctxt, ns_prefixes=ns_prefixes):
-            yield event
 
 
 class WhitespaceFilter(object):
@@ -199,10 +120,10 @@
             if kind is Stream.TEXT:
                 textbuf.append(data)
             elif prev_kind is Stream.TEXT:
-                text = ''.join(textbuf)
+                text = Markup('').join(textbuf)
                 text = self._TRAILING_SPACE.sub('', text)
                 text = self._LINE_COLLAPSE.sub('\n', text)
-                yield Stream.TEXT, text, pos
+                yield Stream.TEXT, Markup(text), pos
                 del textbuf[:]
             prev_kind = kind
             if kind is not Stream.TEXT:
@@ -210,7 +131,7 @@
 
         if textbuf:
             text = self._LINE_COLLAPSE.sub('\n', ''.join(textbuf))
-            yield Stream.TEXT, text, pos
+            yield Stream.TEXT, Markup(text), pos
 
 
 class HTMLSanitizer(object):
--- a/markup/template.py
+++ b/markup/template.py
@@ -42,14 +42,13 @@
 """
 
 import compiler
-from itertools import chain
 import os
 import re
 from StringIO import StringIO
 
 from markup.core import Attributes, Stream, StreamEventKind
 from markup.eval import Expression
-from markup.filters import EvalFilter, IncludeFilter, WhitespaceFilter
+from markup.filters import IncludeFilter
 from markup.input import HTML, XMLParser, XML
 from markup.path import Path
 
@@ -388,7 +387,7 @@
     """Implementation of the `py:match` template directive.
 
     >>> tmpl = Template('''<div xmlns:py="http://purl.org/kid/ns#">
-    ...   <span py:match="div/greeting">
+    ...   <span py:match="greeting">
     ...     Hello ${select('@name')}
     ...   </span>
     ...   <greeting name="Dude" />
@@ -451,48 +450,15 @@
         Directive.__init__(self, template, None, pos)
         self.path = Path(value)
         self.stream = []
-        template.filters.append(self._filter)
 
     def __call__(self, stream, ctxt):
         self.stream = list(stream)
+        ctxt._match_templates.append((self.path.test(), self.path, self.stream))
         return []
 
     def __repr__(self):
         return '<%s "%s">' % (self.__class__.__name__, self.path.source)
 
-    def _filter(self, stream, ctxt=None):
-        test = self.path.test()
-        for event in stream:
-            if self.stream and event in self.stream[::len(self.stream)]:
-                # This is the event this filter produced itself, so matching it
-                # again would result in an infinite loop
-                yield event
-                continue
-            result = test(*event)
-            if result is True:
-                content = [event]
-                depth = 1
-                while depth > 0:
-                    ev = stream.next()
-                    if ev[0] is Stream.START:
-                        depth += 1
-                    elif ev[0] is Stream.END:
-                        depth -= 1
-                    content.append(ev)
-                    test(*ev)
-
-                yield (Template.SUB,
-                       ([lambda stream, ctxt: self._apply(content, ctxt)], []),
-                       content[0][-1])
-            else:
-                yield event
-
-    def _apply(self, orig_stream, ctxt):
-        ctxt.push(select=lambda path: Stream(orig_stream).select(path))
-        for event in self.stream:
-            yield event
-        ctxt.pop()
-
 
 class ReplaceDirective(Directive):
     """Implementation of the `py:replace` template directive.
@@ -594,8 +560,8 @@
     """
     NAMESPACE = 'http://purl.org/kid/ns#'
 
-    EXPR = StreamEventKind('expr') # an expression
-    SUB = StreamEventKind('sub') # a "subprogram"
+    EXPR = StreamEventKind('EXPR') # an expression
+    SUB = StreamEventKind('SUB') # a "subprogram"
 
     directives = [('def', DefDirective),
                   ('match', MatchDirective),
@@ -616,9 +582,7 @@
             self.source = source
         self.filename = filename or '<string>'
 
-        self.input_filters = [EvalFilter()]
-        self.filters = []
-        self.output_filters = [WhitespaceFilter()]
+        self.filters = [self._eval, self._match]
         self.parse()
 
     def __repr__(self):
@@ -699,46 +663,6 @@
 
         self.stream = stream
 
-    def generate(self, ctxt=None):
-        """Transform the template based on the given context data."""
-
-        if ctxt is None:
-            ctxt = Context()
-
-        def _transform(stream):
-            # Apply input filters
-            for filter_ in chain(self.input_filters, self.filters):
-                stream = filter_(iter(stream), ctxt)
-
-            try:
-                for kind, data, pos in stream:
-
-                    if kind is Template.SUB:
-                        # This event is a list of directives and a list of
-                        # nested events to which those directives should be
-                        # applied
-                        directives, substream = data
-                        directives.reverse()
-                        for directive in directives:
-                            substream = directive(iter(substream), ctxt)
-                        for event in _transform(iter(substream)):
-                            yield event
-
-                    else:
-                        yield kind, data, pos
-
-            except SyntaxError, err:
-                raise TemplateSyntaxError(err, self.filename, pos[0],
-                                          pos[1] + (err.offset or 0))
-
-        stream = _transform(self.stream)
-
-        # Apply output filters
-        for filter_ in self.output_filters:
-            stream = filter_(iter(stream), ctxt)
-
-        return Stream(stream)
-
     _FULL_EXPR_RE = re.compile(r'(?<!\$)\$\{(.+?)\}')
     _SHORT_EXPR_RE = re.compile(r'(?<!\$)\$([a-zA-Z][a-zA-Z0-9_\.]*)')
 
@@ -753,7 +677,7 @@
         @param offset: the column number at which the text starts in the source
             (optional)
         """
-        patterns = [cls._FULL_EXPR_RE, cls._SHORT_EXPR_RE]
+        patterns = [Template._FULL_EXPR_RE, Template._SHORT_EXPR_RE]
         def _interpolate(text):
             for idx, group in enumerate(patterns.pop(0).split(text)):
                 if idx % 2:
@@ -768,6 +692,134 @@
         return _interpolate(text)
     _interpolate = classmethod(_interpolate)
 
+    def generate(self, ctxt=None):
+        """Transform the template based on the given context data."""
+        if ctxt is None:
+            ctxt = Context()
+        if not hasattr(ctxt, '_match_templates'):
+            ctxt._match_templates = []
+
+        return Stream(self._flatten(self.stream, ctxt))
+
+    def _eval(self, stream, ctxt=None):
+        for kind, data, pos in stream:
+
+            if kind is Stream.START:
+                # Attributes may still contain expressions in start tags at
+                # this point, so do some evaluation
+                tag, attrib = data
+                new_attrib = []
+                for name, substream in attrib:
+                    if isinstance(substream, basestring):
+                        value = substream
+                    else:
+                        values = []
+                        for subkind, subdata, subpos in substream:
+                            if subkind is Template.EXPR:
+                                values.append(subdata.evaluate(ctxt))
+                            else:
+                                values.append(subdata)
+                        value = filter(lambda x: x is not None, values)
+                        if not value:
+                            continue
+                    new_attrib.append((name, ''.join(value)))
+                yield kind, (tag, Attributes(new_attrib)), pos
+
+            elif kind is Template.EXPR:
+                result = data.evaluate(ctxt)
+                if result is None:
+                    continue
+
+                # First check for a string, otherwise the iterable test below
+                # succeeds, and the string will be chopped up into individual
+                # characters
+                if isinstance(result, basestring):
+                    yield Stream.TEXT, result, pos
+                else:
+                    # Test if the expression evaluated to an iterable, in which
+                    # case we yield the individual items
+                    try:
+                        yield (Template.SUB, ([], iter(result)), pos)
+                    except TypeError:
+                        # Neither a string nor an iterable, so just pass it
+                        # through
+                        yield Stream.TEXT, unicode(result), pos
+
+            else:
+                yield kind, data, pos
+
+    def _flatten(self, stream, ctxt=None, apply_filters=True):
+        if apply_filters:
+            for filter_ in self.filters:
+                stream = filter_(iter(stream), ctxt)
+        try:
+            for kind, data, pos in stream:
+                if kind is Template.SUB:
+                    # This event is a list of directives and a list of
+                    # nested events to which those directives should be
+                    # applied
+                    directives, substream = data
+                    directives.reverse()
+                    for directive in directives:
+                        substream = directive(iter(substream), ctxt)
+                    for event in self._flatten(substream, ctxt):
+                        yield event
+                        continue
+                else:
+                    yield kind, data, pos
+        except SyntaxError, err:
+            raise TemplateSyntaxError(err, self.filename, pos[0],
+                                      pos[1] + (err.offset or 0))
+
+    def _match(self, stream, ctxt=None):
+        for kind, data, pos in stream:
+
+            # We (currently) only care about start and end events for matching
+            # We might care about namespace events in the future, though
+            if kind not in (Stream.START, Stream.END):
+                yield kind, data, pos
+                continue
+
+            for idx, (test, path, template) in enumerate(ctxt._match_templates):
+                if (kind, data, pos) in template[::len(template)]:
+                    # This is the event this match template produced itself, so
+                    # matching it  again would result in an infinite loop 
+                    continue
+
+                result = test(kind, data, pos)
+
+                if result:
+                    # Consume and store all events until an end event
+                    # corresponding to this start event is encountered
+                    content = [(kind, data, pos)]
+                    depth = 1
+                    while depth > 0:
+                        event = stream.next()
+                        if event[0] is Stream.START:
+                            depth += 1
+                        elif event[0] is Stream.END:
+                            depth -= 1
+                        content.append(event)
+
+                        # enable the path to keep track of the stream state
+                        test(*event)
+
+                    content = list(self._flatten(content, ctxt, apply_filters=False))
+
+                    def _apply(stream, ctxt):
+                        stream = list(stream)
+                        ctxt.push(select=lambda path: Stream(stream).select(path))
+                        for event in template:
+                            yield event
+                        ctxt.pop()
+
+                    yield (Template.SUB,
+                           ([lambda stream, ctxt: _apply(content, ctxt)],
+                            []), content[0][-1])
+                    break
+            else:
+                yield kind, data, pos
+
 
 class TemplateLoader(object):
     """Responsible for loading templates from files on the specified search
@@ -841,7 +893,7 @@
                 fileobj = file(filepath, 'rt')
                 try:
                     tmpl = Template(fileobj, filename=filepath)
-                    tmpl.input_filters.append(IncludeFilter(self, tmpl))
+                    tmpl.filters.append(IncludeFilter(self))
                 finally:
                     fileobj.close()
                 self._cache[filename] = tmpl
Copyright (C) 2012-2017 Edgewall Software