# HG changeset patch # User aflett # Date 1203972244 0 # Node ID 3d7288f373bd9959c05b6a262177c21325906d0d # Parent 92efe764ec819679f586cc99abb86adcb5b1696b land first cut at fast-path matching - needs some cleanup diff --git a/genshi/path.py b/genshi/path.py --- a/genshi/path.py +++ b/genshi/path.py @@ -181,9 +181,15 @@ stream against the path :rtype: ``function`` """ - paths = [(p, len(p), [0], [], [0] * len(p)) for p in [ - (ignore_context and [_DOTSLASHSLASH] or []) + p for p in self.paths - ]] + + # paths is path state that is maintained across calls to _test() + # paths is a list of tuples, one for each segment in the path + paths = [] + for p in self.paths: + if ignore_context: + p = [_DOTSLASHSLASH] + p + path = (p, len(p), [0], [], [0] * len(p)) + paths.append(path) def _test(event, namespaces, variables, updateonly=False): kind, data, pos = event[:3] @@ -279,7 +285,8 @@ if ctxtnode and axis is DESCENDANT_OR_SELF: ctxtnode = False - if (retval or not matched) and kind is START and \ + if (retval or not matched) and \ + kind is START and \ not (axis is DESCENDANT or axis is DESCENDANT_OR_SELF): # If this step is not a closure, it cannot be matched until # the current element is closed... so we need to move the diff --git a/genshi/template/base.py b/genshi/template/base.py --- a/genshi/template/base.py +++ b/genshi/template/base.py @@ -25,6 +25,7 @@ from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure from genshi.input import ParseError +from genshi.template.match import MatchSet __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError'] @@ -136,7 +137,7 @@ self.frames = deque([data]) self.pop = self.frames.popleft self.push = self.frames.appendleft - self._match_templates = [] + self._match_set = MatchSet() self._choice_stack = [] # Helper functions for use in expressions diff --git a/genshi/template/directives.py b/genshi/template/directives.py --- a/genshi/template/directives.py +++ b/genshi/template/directives.py @@ -450,9 +450,9 @@ attach = classmethod(attach) def __call__(self, stream, ctxt, directives): - ctxt._match_templates.append((self.path.test(ignore_context=True), - self.path, list(stream), self.hints, - self.namespaces, directives)) + ctxt._match_set.add((self.path.test(ignore_context=True), + self.path, list(stream), self.hints, + self.namespaces, directives)) return [] def __repr__(self): diff --git a/genshi/template/markup.py b/genshi/template/markup.py --- a/genshi/template/markup.py +++ b/genshi/template/markup.py @@ -25,6 +25,7 @@ from genshi.template.interpolation import interpolate from genshi.template.directives import * from genshi.template.text import NewTextTemplate +from genshi.template.match import MatchSet __all__ = ['MarkupTemplate'] __docformat__ = 'restructuredtext en' @@ -225,12 +226,12 @@ assert len(streams) == 1 return streams[0] - def _match(self, stream, ctxt, match_templates=None): + def _match(self, stream, ctxt, match_set=None): """Internal stream filter that applies any defined match templates to the stream. """ - if match_templates is None: - match_templates = ctxt._match_templates + if match_set is None: + match_set = ctxt._match_set tail = [] def _strip(stream): @@ -251,22 +252,25 @@ # We (currently) only care about start and end events for matching # We might care about namespace events in the future, though - if not match_templates or (event[0] is not START and - event[0] is not END): + if not match_set or (event[0] is not START and + event[0] is not END): yield event continue - for idx, (test, path, template, hints, namespaces, directives) \ - in enumerate(match_templates): - + match_candidates = list(match_set.find_matches(event)) + for idx, match_template in enumerate(match_candidates): + + (test, path, template, hints, namespaces, directives) = \ + match_template if test(event, namespaces, ctxt) is True: if 'match_once' in hints: - del match_templates[idx] + match_set.remove(match_template) + del match_candidates[idx] idx -= 1 # Let the remaining match templates know about the event so # they get a chance to update their internal state - for test in [mt[0] for mt in match_templates[idx + 1:]]: + for test in [mt[0] for mt in match_candidates[idx + 1:]]: test(event, namespaces, ctxt, updateonly=True) # Consume and store all events until an end event @@ -274,11 +278,14 @@ inner = _strip(stream) if 'match_once' not in hints \ and 'not_recursive' not in hints: - inner = self._match(inner, ctxt, [match_templates[idx]]) + inner = self._match(inner, ctxt, + MatchSet.single_match(match_template)) content = list(self._include(chain([event], inner, tail), ctxt)) - for test in [mt[0] for mt in match_templates]: + # Now tell all the match templates about the + # END event (tail[0]) + for test in [mt[0] for mt in match_candidates]: test(tail[0], namespaces, ctxt, updateonly=True) # Make the select() function available in the body of the @@ -289,12 +296,14 @@ # Recursively process the output template = _apply_directives(template, ctxt, directives) - remaining = match_templates + remaining = match_set if 'match_once' not in hints: - remaining = remaining[:idx] + remaining[idx + 1:] - for event in self._match(self._exec( - self._eval(self._flatten(template, ctxt), - ctxt), ctxt), ctxt, remaining): + # match has not been removed, so we need an exclusion matchset + remaining = match_set.with_exclusion(match_template) + + body = self._exec(self._eval(self._flatten(template, ctxt), + ctxt), ctxt) + for event in self._match(body, ctxt, remaining): yield event ctxt.pop() diff --git a/genshi/template/match.py b/genshi/template/match.py new file mode 100644 --- /dev/null +++ b/genshi/template/match.py @@ -0,0 +1,140 @@ +from genshi.core import START +from genshi.path import CHILD, LocalNameTest + +from copy import copy + +def is_simple_path(path): + """ + Is the path merely a tag match like "foo"? + """ + if len(path.paths) == 1 and len(path.paths[0]) == 1: + axis, nodetest, predicates = path.paths[0][0] + if (axis is CHILD and + not predicates and + isinstance(nodetest, LocalNameTest)): + return True + + return False + + +class MatchSet(object): + + def __init__(self, parent=None, exclude=None): + """ + If a parent is given, it means this is a wrapper around another + set. Just copy references to member variables in parent, but + also set exclude + """ + self.parent = parent + if parent is None: + self.tag_templates = {} + self.other_templates = [] + self.exclude = [] + if exclude is not None: + self.exclude.append(exclude) + else: + self.tag_templates = parent.tag_templates + self.other_templates = parent.other_templates + self.exclude = copy(parent.exclude) + if exclude is not None: + self.exclude.append(exclude) + + def add(self, match_template): + """ + match_template is a tuple the form + test, path, template, hints, namespace, directives + """ + path = match_template[1] + + if is_simple_path(path): + # special cache of tag + tag_name = path.paths[0][0][1].name + # setdefault is wasteful + if tag_name not in self.tag_templates: + self.tag_templates[tag_name] = [match_template] + else: + self.tag_templates[tag_name].append(match_template) + + else: + self.other_templates.append(match_template) + + def remove(self, match_template): + """ + Permanently remove a match_template - mainly for match_once + """ + path = match_template[1] + + if is_simple_path(path): + tag_name = path.paths[0][0][1].name + if tag_name in self.tag_templates: + template_list = self.tag_templates[tag_name] + template_list.remove(match_template) + if not template_list: + del self.tag_templates[tag_name] + + else: + self.other_templates.remove(match_template) + + def single_match(cls, match_template): + """ + Factory for creating a MatchSet with just one match + """ + match_set = cls() + match_set.add(match_template) + return match_set + single_match = classmethod(single_match) + + def with_exclusion(self, exclude): + """ + Factory for creating a MatchSet based on another MatchSet, but + with certain templates excluded + """ + cls = self.__class__ + new_match_set = cls(parent=self, exclude=exclude) + return new_match_set + + def find_matches(self, event): + """ + Return a list of all valid templates that can be used for the given event. + """ + kind, data, pos = event[:3] + + # todo: get the order right + if kind is START: + tag, attrs = data + if tag.localname in self.tag_templates: + for template in self.tag_templates[tag.localname]: + if template not in self.exclude: + yield template + + for template in self.other_templates: + if template not in self.exclude: + yield template + + + def __nonzero__(self): + """ + allow this to behave as a list + """ + return bool(self.tag_templates or self.other_templates) + + def __iter__(self): + """ + I don't think we really need this, but it lets us behave like a list + """ + for template_list in self.tag_templates.iteritems(): + for template in template_list: + yield template + for template in self.other_templates: + yield template + + def __str__(self): + parent = "" + if self.parent: + parent = ": child of 0x%x" % id(self.parent) + + exclude = "" + if self.exclude: + exclude = " / excluding %d items" % len(self.exclude) + + return "" % (id(self), len(self.tag_templates), len(self.other_templates), parent, exclude)