view genshi/template/markup.py @ 717:0e8b92905741 experimental-match-fastpaths

a performance breakthrough - bring this branch inline with the bigtable benchmark by lazily creating ctxt._match_set in a way that doesn't barf
author aflett
date Tue, 08 Apr 2008 23:36:20 +0000
parents 422d0607ba85
children d143dd73789b
line wrap: on
line source
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.

"""Markup templating engine."""

from itertools import chain

from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind
from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT
from genshi.input import XMLParser
from genshi.template.base import BadDirectiveError, Template, \
                                 TemplateSyntaxError, _apply_directives, \
                                 EXEC, INCLUDE, SUB
from genshi.template.eval import Suite
from genshi.template.interpolation import interpolate
from genshi.template.directives import *
from genshi.template.text import NewTextTemplate

__all__ = ['MarkupTemplate']
__docformat__ = 'restructuredtext en'


class MarkupTemplate(Template):
    """Implementation of the template language for XML-based templates.
    
    >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
    ...   <li py:for="item in items">${item}</li>
    ... </ul>''')
    >>> print tmpl.generate(items=[1, 2, 3])
    <ul>
      <li>1</li><li>2</li><li>3</li>
    </ul>
    """

    DIRECTIVE_NAMESPACE = Namespace('http://genshi.edgewall.org/')
    XINCLUDE_NAMESPACE = Namespace('http://www.w3.org/2001/XInclude')

    directives = [('def', DefDirective),
                  ('match', MatchDirective),
                  ('when', WhenDirective),
                  ('otherwise', OtherwiseDirective),
                  ('for', ForDirective),
                  ('if', IfDirective),
                  ('choose', ChooseDirective),
                  ('with', WithDirective),
                  ('replace', ReplaceDirective),
                  ('content', ContentDirective),
                  ('attrs', AttrsDirective),
                  ('strip', StripDirective)]
    serializer = 'xml'
    _number_conv = Markup

    def __init__(self, source, basedir=None, filename=None, loader=None,
                 encoding=None, lookup='strict', allow_exec=True):
        Template.__init__(self, source, basedir=basedir, filename=filename,
                          loader=loader, encoding=encoding, lookup=lookup,
                          allow_exec=allow_exec)
        # Make sure the include filter comes after the match filter
        if loader:
            self.filters.remove(self._include)
        self.filters += [self._match]
        if loader:
            self.filters.append(self._include)

    def _parse(self, source, encoding):
        streams = [[]] # stacked lists of events of the "compiled" template
        dirmap = {} # temporary mapping of directives to elements
        ns_prefix = {}
        depth = 0
        fallbacks = []
        includes = []

        if not isinstance(source, Stream):
            source = XMLParser(source, filename=self.filename,
                               encoding=encoding)

        for kind, data, pos in source:
            stream = streams[-1]

            if kind is START_NS:
                # Strip out the namespace declaration for template directives
                prefix, uri = data
                ns_prefix[prefix] = uri
                if uri not in (self.DIRECTIVE_NAMESPACE,
                               self.XINCLUDE_NAMESPACE):
                    stream.append((kind, data, pos))

            elif kind is END_NS:
                uri = ns_prefix.pop(data, None)
                if uri and uri not in (self.DIRECTIVE_NAMESPACE,
                                       self.XINCLUDE_NAMESPACE):
                    stream.append((kind, data, pos))

            elif kind is START:
                # Record any directive attributes in start tags
                tag, attrs = data
                directives = []
                strip = False

                if tag in self.DIRECTIVE_NAMESPACE:
                    cls = self._dir_by_name.get(tag.localname)
                    if cls is None:
                        raise BadDirectiveError(tag.localname, self.filepath,
                                                pos[1])
                    args = dict([(name.localname, value) for name, value
                                 in attrs if not name.namespace])
                    directives.append((cls, args, ns_prefix.copy(), pos))
                    strip = True

                new_attrs = []
                for name, value in attrs:
                    if name in self.DIRECTIVE_NAMESPACE:
                        cls = self._dir_by_name.get(name.localname)
                        if cls is None:
                            raise BadDirectiveError(name.localname,
                                                    self.filepath, pos[1])
                        directives.append((cls, value, ns_prefix.copy(), pos))
                    else:
                        if value:
                            value = list(interpolate(value, self.basedir,
                                                     pos[0], pos[1], pos[2],
                                                     lookup=self.lookup))
                            if len(value) == 1 and value[0][0] is TEXT:
                                value = value[0][1]
                        else:
                            value = [(TEXT, u'', pos)]
                        new_attrs.append((name, value))
                new_attrs = Attrs(new_attrs)

                if directives:
                    index = self._dir_order.index
                    directives.sort(lambda a, b: cmp(index(a[0]), index(b[0])))
                    dirmap[(depth, tag)] = (directives, len(stream), strip)

                if tag in self.XINCLUDE_NAMESPACE:
                    if tag.localname == 'include':
                        include_href = new_attrs.get('href')
                        if not include_href:
                            raise TemplateSyntaxError('Include misses required '
                                                      'attribute "href"',
                                                      self.filepath, *pos[1:])
                        includes.append((include_href, new_attrs.get('parse')))
                        streams.append([])
                    elif tag.localname == 'fallback':
                        streams.append([])
                        fallbacks.append(streams[-1])

                else:
                    stream.append((kind, (tag, new_attrs), pos))

                depth += 1

            elif kind is END:
                depth -= 1

                if fallbacks and data == self.XINCLUDE_NAMESPACE['fallback']:
                    assert streams.pop() is fallbacks[-1]
                elif data == self.XINCLUDE_NAMESPACE['include']:
                    fallback = None
                    if len(fallbacks) == len(includes):
                        fallback = fallbacks.pop()
                    streams.pop() # discard anything between the include tags
                                  # and the fallback element
                    stream = streams[-1]
                    href, parse = includes.pop()
                    try:
                        cls = {
                            'xml': MarkupTemplate,
                            'text': NewTextTemplate
                        }[parse or 'xml']
                    except KeyError:
                        raise TemplateSyntaxError('Invalid value for "parse" '
                                                  'attribute of include',
                                                  self.filepath, *pos[1:])
                    stream.append((INCLUDE, (href, cls, fallback), pos))
                else:
                    stream.append((kind, data, pos))

                # If there have have directive attributes with the corresponding
                # start tag, move the events inbetween into a "subprogram"
                if (depth, data) in dirmap:
                    directives, start_offset, strip = dirmap.pop((depth, data))
                    substream = stream[start_offset:]
                    if strip:
                        substream = substream[1:-1]
                    stream[start_offset:] = [(SUB, (directives, substream),
                                              pos)]

            elif kind is PI and data[0] == 'python':
                if not self.allow_exec:
                    raise TemplateSyntaxError('Python code blocks not allowed',
                                              self.filepath, *pos[1:])
                try:
                    suite = Suite(data[1], self.filepath, pos[1],
                                  lookup=self.lookup)
                except SyntaxError, err:
                    raise TemplateSyntaxError(err, self.filepath,
                                              pos[1] + (err.lineno or 1) - 1,
                                              pos[2] + (err.offset or 0))
                stream.append((EXEC, suite, pos))

            elif kind is TEXT:
                for kind, data, pos in interpolate(data, self.basedir, pos[0],
                                                   pos[1], pos[2],
                                                   lookup=self.lookup):
                    stream.append((kind, data, pos))

            elif kind is COMMENT:
                if not data.lstrip().startswith('!'):
                    stream.append((kind, data, pos))

            else:
                stream.append((kind, data, pos))

        assert len(streams) == 1
        return streams[0]

    def _match(self, stream, ctxt, match_set=None, **vars):
        """Internal stream filter that applies any defined match templates
        to the stream.
        """
        tail = []
        def _strip(stream):
            depth = 1
            while 1:
                event = stream.next()
                if event[0] is START:
                    depth += 1
                elif event[0] is END:
                    depth -= 1
                if depth > 0:
                    yield event
                else:
                    tail[:] = [event]
                    break

        for event in stream:

            # we may have discovered a py:match while processing the
            # stream.. so keep checking for ctxt._match_set
            if match_set is None:
                match_set = ctxt._match_set

            # We (currently) only care about start and end events for matching
            # We might care about namespace events in the future, though
            if not match_set or (event[0] is not START and
                                 event[0] is not END):
                yield event
                continue

            match_candidates = list(match_set.find_matches(event))
            for idx, match_template in enumerate(match_candidates):
                
                (test, path, template, hints, namespaces, directives) = \
                    match_template
                if test(event, namespaces, ctxt) is True:
                    post_match_templates = \
                        match_set.after_template(match_template)
                    
                    if 'match_once' in hints:

                        # need to save this before we nuke
                        # match_template from match_set
                        pre_match_templates = \
                            match_set.before_template(match_template, False)
                        
                        # forcibly remove this template from this and
                        # all child match sets
                        match_set.remove(match_template)
                        del match_candidates[idx]
                        idx -= 1
                    else:
                        inclusive = True
                        if 'not_recursive' in hints:
                            inclusive=False
                        pre_match_templates = match_set.before_template(match_template, inclusive)

                    # Let the remaining match templates know about the event so
                    # they get a chance to update their internal state
                    for test in [mt[0] for mt in match_candidates[idx + 1:]]:
                        test(event, namespaces, ctxt, updateonly=True)

                    # Consume and store all events until an end event
                    # corresponding to this start event is encountered
                    inner = _strip(stream)
                    if pre_match_templates:
                        inner = self._match(inner, ctxt, pre_match_templates)
                    content = self._include(chain([event], inner, tail), ctxt)
                    if 'not_buffered' not in hints:
                        content = list(content)

                    # Now tell all the match templates about the
                    # END event (tail[0])
                    if tail:
                        for test in [mt[0] for mt in match_candidates]:
                            test(tail[0], namespaces, ctxt, updateonly=True)

                    # Make the select() function available in the body of the
                    # match template
                    def select(path):
                        return Stream(content).select(path, namespaces, ctxt)
                    vars = dict(select=select)

                    # Recursively process the output
                    template = _apply_directives(template, directives, ctxt,
                                                 **vars)
                    for event in self._match(
                            self._exec(
                                self._eval(
                                    self._flatten(template, ctxt, **vars),
                                    ctxt, **vars),
                                ctxt, **vars),
                            ctxt, post_match_templates,
                            **vars):
                        yield event

                    break

            else: # no matches
                yield event
Copyright (C) 2012-2017 Edgewall Software