Mercurial > genshi > mirror
view genshi/template/markup.py @ 704:422d0607ba85 experimental-match-fastpaths
further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet.
Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match
Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author | aflett |
---|---|
date | Fri, 04 Apr 2008 16:57:27 +0000 |
parents | af57b12e3dd2 |
children | 0e8b92905741 |
line wrap: on
line source
# -*- coding: utf-8 -*- # # Copyright (C) 2006-2007 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://genshi.edgewall.org/wiki/License. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://genshi.edgewall.org/log/. """Markup templating engine.""" from itertools import chain from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT from genshi.input import XMLParser from genshi.template.base import BadDirectiveError, Template, \ TemplateSyntaxError, _apply_directives, \ EXEC, INCLUDE, SUB from genshi.template.eval import Suite from genshi.template.interpolation import interpolate from genshi.template.directives import * from genshi.template.text import NewTextTemplate __all__ = ['MarkupTemplate'] __docformat__ = 'restructuredtext en' class MarkupTemplate(Template): """Implementation of the template language for XML-based templates. >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> ... <li py:for="item in items">${item}</li> ... </ul>''') >>> print tmpl.generate(items=[1, 2, 3]) <ul> <li>1</li><li>2</li><li>3</li> </ul> """ DIRECTIVE_NAMESPACE = Namespace('http://genshi.edgewall.org/') XINCLUDE_NAMESPACE = Namespace('http://www.w3.org/2001/XInclude') directives = [('def', DefDirective), ('match', MatchDirective), ('when', WhenDirective), ('otherwise', OtherwiseDirective), ('for', ForDirective), ('if', IfDirective), ('choose', ChooseDirective), ('with', WithDirective), ('replace', ReplaceDirective), ('content', ContentDirective), ('attrs', AttrsDirective), ('strip', StripDirective)] serializer = 'xml' _number_conv = Markup def __init__(self, source, basedir=None, filename=None, loader=None, encoding=None, lookup='strict', allow_exec=True): Template.__init__(self, source, basedir=basedir, filename=filename, loader=loader, encoding=encoding, lookup=lookup, allow_exec=allow_exec) # Make sure the include filter comes after the match filter if loader: self.filters.remove(self._include) self.filters += [self._match] if loader: self.filters.append(self._include) def _parse(self, source, encoding): streams = [[]] # stacked lists of events of the "compiled" template dirmap = {} # temporary mapping of directives to elements ns_prefix = {} depth = 0 fallbacks = [] includes = [] if not isinstance(source, Stream): source = XMLParser(source, filename=self.filename, encoding=encoding) for kind, data, pos in source: stream = streams[-1] if kind is START_NS: # Strip out the namespace declaration for template directives prefix, uri = data ns_prefix[prefix] = uri if uri not in (self.DIRECTIVE_NAMESPACE, self.XINCLUDE_NAMESPACE): stream.append((kind, data, pos)) elif kind is END_NS: uri = ns_prefix.pop(data, None) if uri and uri not in (self.DIRECTIVE_NAMESPACE, self.XINCLUDE_NAMESPACE): stream.append((kind, data, pos)) elif kind is START: # Record any directive attributes in start tags tag, attrs = data directives = [] strip = False if tag in self.DIRECTIVE_NAMESPACE: cls = self._dir_by_name.get(tag.localname) if cls is None: raise BadDirectiveError(tag.localname, self.filepath, pos[1]) args = dict([(name.localname, value) for name, value in attrs if not name.namespace]) directives.append((cls, args, ns_prefix.copy(), pos)) strip = True new_attrs = [] for name, value in attrs: if name in self.DIRECTIVE_NAMESPACE: cls = self._dir_by_name.get(name.localname) if cls is None: raise BadDirectiveError(name.localname, self.filepath, pos[1]) directives.append((cls, value, ns_prefix.copy(), pos)) else: if value: value = list(interpolate(value, self.basedir, pos[0], pos[1], pos[2], lookup=self.lookup)) if len(value) == 1 and value[0][0] is TEXT: value = value[0][1] else: value = [(TEXT, u'', pos)] new_attrs.append((name, value)) new_attrs = Attrs(new_attrs) if directives: index = self._dir_order.index directives.sort(lambda a, b: cmp(index(a[0]), index(b[0]))) dirmap[(depth, tag)] = (directives, len(stream), strip) if tag in self.XINCLUDE_NAMESPACE: if tag.localname == 'include': include_href = new_attrs.get('href') if not include_href: raise TemplateSyntaxError('Include misses required ' 'attribute "href"', self.filepath, *pos[1:]) includes.append((include_href, new_attrs.get('parse'))) streams.append([]) elif tag.localname == 'fallback': streams.append([]) fallbacks.append(streams[-1]) else: stream.append((kind, (tag, new_attrs), pos)) depth += 1 elif kind is END: depth -= 1 if fallbacks and data == self.XINCLUDE_NAMESPACE['fallback']: assert streams.pop() is fallbacks[-1] elif data == self.XINCLUDE_NAMESPACE['include']: fallback = None if len(fallbacks) == len(includes): fallback = fallbacks.pop() streams.pop() # discard anything between the include tags # and the fallback element stream = streams[-1] href, parse = includes.pop() try: cls = { 'xml': MarkupTemplate, 'text': NewTextTemplate }[parse or 'xml'] except KeyError: raise TemplateSyntaxError('Invalid value for "parse" ' 'attribute of include', self.filepath, *pos[1:]) stream.append((INCLUDE, (href, cls, fallback), pos)) else: stream.append((kind, data, pos)) # If there have have directive attributes with the corresponding # start tag, move the events inbetween into a "subprogram" if (depth, data) in dirmap: directives, start_offset, strip = dirmap.pop((depth, data)) substream = stream[start_offset:] if strip: substream = substream[1:-1] stream[start_offset:] = [(SUB, (directives, substream), pos)] elif kind is PI and data[0] == 'python': if not self.allow_exec: raise TemplateSyntaxError('Python code blocks not allowed', self.filepath, *pos[1:]) try: suite = Suite(data[1], self.filepath, pos[1], lookup=self.lookup) except SyntaxError, err: raise TemplateSyntaxError(err, self.filepath, pos[1] + (err.lineno or 1) - 1, pos[2] + (err.offset or 0)) stream.append((EXEC, suite, pos)) elif kind is TEXT: for kind, data, pos in interpolate(data, self.basedir, pos[0], pos[1], pos[2], lookup=self.lookup): stream.append((kind, data, pos)) elif kind is COMMENT: if not data.lstrip().startswith('!'): stream.append((kind, data, pos)) else: stream.append((kind, data, pos)) assert len(streams) == 1 return streams[0] def _match(self, stream, ctxt, match_set=None, **vars): """Internal stream filter that applies any defined match templates to the stream. """ if match_set is None: match_set = ctxt._match_set tail = [] def _strip(stream): depth = 1 while 1: event = stream.next() if event[0] is START: depth += 1 elif event[0] is END: depth -= 1 if depth > 0: yield event else: tail[:] = [event] break for event in stream: # We (currently) only care about start and end events for matching # We might care about namespace events in the future, though if not match_set or (event[0] is not START and event[0] is not END): yield event continue match_candidates = list(match_set.find_matches(event)) for idx, match_template in enumerate(match_candidates): (test, path, template, hints, namespaces, directives) = \ match_template if test(event, namespaces, ctxt) is True: post_match_templates = \ match_set.after_template(match_template) if 'match_once' in hints: # need to save this before we nuke # match_template from match_set pre_match_templates = \ match_set.before_template(match_template, False) # forcibly remove this template from this and # all child match sets match_set.remove(match_template) del match_candidates[idx] idx -= 1 else: inclusive = True if 'not_recursive' in hints: inclusive=False pre_match_templates = match_set.before_template(match_template, inclusive) # Let the remaining match templates know about the event so # they get a chance to update their internal state for test in [mt[0] for mt in match_candidates[idx + 1:]]: test(event, namespaces, ctxt, updateonly=True) # Consume and store all events until an end event # corresponding to this start event is encountered inner = _strip(stream) if pre_match_templates: inner = self._match(inner, ctxt, pre_match_templates) content = self._include(chain([event], inner, tail), ctxt) if 'not_buffered' not in hints: content = list(content) # Now tell all the match templates about the # END event (tail[0]) if tail: for test in [mt[0] for mt in match_candidates]: test(tail[0], namespaces, ctxt, updateonly=True) # Make the select() function available in the body of the # match template def select(path): return Stream(content).select(path, namespaces, ctxt) vars = dict(select=select) # Recursively process the output template = _apply_directives(template, directives, ctxt, **vars) for event in self._match( self._exec( self._eval( self._flatten(template, ctxt, **vars), ctxt, **vars), ctxt, **vars), ctxt, post_match_templates, **vars): yield event break else: # no matches yield event