# HG changeset patch # User aflett # Date 1207328247 0 # Node ID 422d0607ba85219e92b258cff041ddef60b5f0f3 # Parent af57b12e3dd2e0f7a9450893a0c6527689cf3da6 further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet. Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all) diff --git a/genshi/template/base.py b/genshi/template/base.py --- a/genshi/template/base.py +++ b/genshi/template/base.py @@ -25,7 +25,6 @@ from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure from genshi.input import ParseError -from genshi.template.match import MatchSet __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError'] @@ -137,7 +136,7 @@ self.frames = deque([data]) self.pop = self.frames.popleft self.push = self.frames.appendleft - self._match_set = MatchSet() + self._match_set = None self._choice_stack = [] # Helper functions for use in expressions diff --git a/genshi/template/directives.py b/genshi/template/directives.py --- a/genshi/template/directives.py +++ b/genshi/template/directives.py @@ -26,6 +26,7 @@ _exec_suite from genshi.template.eval import Expression, Suite, ExpressionASTTransformer, \ _parse +from genshi.template.match import MatchSet __all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective', 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective', @@ -456,6 +457,10 @@ attach = classmethod(attach) def __call__(self, stream, directives, ctxt, **vars): + if ctx._match_set is None: + # lazily create MatchSet so that it doesn't burden the + # _match filter when there are no py:matches defined. + ctx._match_set = MatchSet() ctxt._match_set.add((self.path.test(ignore_context=True), self.path, list(stream), self.hints, self.namespaces, directives)) diff --git a/genshi/template/markup.py b/genshi/template/markup.py --- a/genshi/template/markup.py +++ b/genshi/template/markup.py @@ -25,7 +25,6 @@ from genshi.template.interpolation import interpolate from genshi.template.directives import * from genshi.template.text import NewTextTemplate -from genshi.template.match import MatchSet __all__ = ['MarkupTemplate'] __docformat__ = 'restructuredtext en' diff --git a/genshi/template/match.py b/genshi/template/match.py --- a/genshi/template/match.py +++ b/genshi/template/match.py @@ -17,6 +17,92 @@ return False +class MatchState(object): + """ This is a container for all py:match's found during parsing. It + does not provide a way to slice and dice which templates + match. The main interfaces are ``add``, ``remove``, and + ``find_raw_matches`` + + this class maintains a hash, ``self.match_order`` which maps a + template to its order, to make sure that events can be returned in + order + + """ + def __init__(self): + # merely for indexing. Note that this is shared between + # all MatchSets that share the same root parent. We don't + # have to worry about exclusions here + self.match_order = {} + + # tag_templates are match templates whose path are simply + # a tag, like "body" or "img" + self.tag_templates = {} + + # other_templates include all other match templates, such + # as ones with complex paths like "[class=container]" + self.other_templates = [] + + self.current_index = 0 + + def add(self, match_template): + """ + Add a template to the match state + """ + # match_templates are currently tuples that contain unhashable + # objects. So we'll use id() for now. + self.match_order[id(match_template)] = self.current_index + self.current_index += 1 + + path = match_template[1] + + if is_simple_path(path): + # special cache of tag + tag_name = path.paths[0][0][1].name + # setdefault is wasteful + if tag_name not in self.tag_templates: + self.tag_templates[tag_name] = [match_template] + else: + self.tag_templates[tag_name].append(match_template) + + else: + self.other_templates.append(match_template) + + def remove(self, match_template): + """ + Remove the template permanently + """ + path = match_template[1] + + if is_simple_path(path): + tag_name = path.paths[0][0][1].name + if tag_name in self.tag_templates: + template_list = self.tag_templates[tag_name] + template_list.remove(match_template) + if not template_list: + del self.tag_templates[tag_name] + + else: + self.other_templates.remove(match_template) + + # clean up match_order + del self.match_order[id(match_template)] + + def find_raw_matches(self, event): + """ Return a list of all valid templates that can be used for the + given event. Ordering is funky because we first check + self.tag_templates, then check self.other_templates. + """ + kind, data, pos = event[:3] + + # todo: get the order right + if kind is START: + tag, attrs = data + if tag.localname in self.tag_templates: + for template in self.tag_templates[tag.localname]: + yield template + + for template in self.other_templates: + yield template class MatchSet(object): """ A MatchSet is a set of matches discovered by the parser. This @@ -44,34 +130,21 @@ set. """ - self.parent = parent + if parent is None: + self.state = MatchState() + else: + self.state = parent.state if parent is None: - # merely for indexing. Note that this is shared between - # all MatchSets that share the same root parent. We don't - # have to worry about exclusions here - self.match_order = {} - self.min_index = None self.max_index = None - - # tag_templates are match templates whose path are simply - # a tag, like "body" or "img" - self.tag_templates = {} - - # other_templates include all other match templates, such - # as ones with complex paths like "[class=container]" - self.other_templates = [] - else: # We have a parent: Just copy references to member # variables in parent so that there's no performance loss self.max_index = parent.max_index self.min_index = parent.min_index - self.match_order = parent.match_order - self.tag_templates = parent.tag_templates - self.other_templates = parent.other_templates - + + # sub-MatchSets can only be further constrained, not expanded if max_index is not None: assert self.max_index is None or max_index <= self.max_index self.max_index = max_index @@ -80,63 +153,28 @@ assert self.min_index is None or min_index > self.min_index self.min_index = min_index - def add(self, match_template): """ match_template is a tuple the form test, path, template, hints, namespace, directives """ - - # match_templates are currently tuples that contain unhashable - # objects. So we'll use id() for now. - self.match_order[id(match_template)] = len(self.match_order) - - path = match_template[1] + self.state.add(match_template) - if is_simple_path(path): - # special cache of tag - tag_name = path.paths[0][0][1].name - # setdefault is wasteful - if tag_name not in self.tag_templates: - self.tag_templates[tag_name] = [match_template] - else: - self.tag_templates[tag_name].append(match_template) - - else: - self.other_templates.append(match_template) def remove(self, match_template): """ Permanently remove a match_template - mainly for match_once - """ - path = match_template[1] - - if is_simple_path(path): - tag_name = path.paths[0][0][1].name - if tag_name in self.tag_templates: - template_list = self.tag_templates[tag_name] - template_list.remove(match_template) - if not template_list: - del self.tag_templates[tag_name] - - else: - self.other_templates.remove(match_template) - - # clean up match_order - del self.match_order[id(match_template)] - - def single_match(cls, match_template): - """ - Factory for creating a MatchSet with just one match - """ - match_set = cls() - match_set.add(match_template) - return match_set - single_match = classmethod(single_match) + """ + self.state.remove(match_template) def before_template(self, match_template, inclusive): + """ Return a new MatchSet where only the templates that were declared + before ``match_template`` are available. If ``inclusive`` is + true, then it will also include match_template itself. + + """ cls = type(self) - max_index = self.match_order[id(match_template)] + max_index = self.state.match_order[id(match_template)] if not inclusive: max_index -= 1 return cls(parent=self, max_index=max_index) @@ -147,25 +185,9 @@ the given match """ cls = type(self) - min_index = self.match_order[id(match_template)] + 1 + min_index = self.state.match_order[id(match_template)] + 1 return cls(parent=self, min_index=min_index) - def find_raw_matches(self, event): - """ Return a list of all valid templates that can be used for the - given event. Ordering is funky because we first check - self.tag_templates, then check self.other_templates. - """ - kind, data, pos = event[:3] - - # todo: get the order right - if kind is START: - tag, attrs = data - if tag.localname in self.tag_templates: - for template in self.tag_templates[tag.localname]: - yield template - - for template in self.other_templates: - yield template def find_matches(self, event): """ Return a list of all valid templates that can be used for the @@ -173,46 +195,48 @@ The basic work here is sorting the result of find_raw_matches """ - # remove exclusions def can_match(template): # make sure that if (self.min_index is not None and - self.match_order[id(template)] < self.min_index): + self.state.match_order[id(template)] < self.min_index): return False if (self.max_index is not None and - self.match_order[id(template)] > self.max_index): + self.state.match_order[id(template)] > self.max_index): return False return True matches = ifilter(can_match, - self.find_raw_matches(event)) + self.state.find_raw_matches(event)) # sort the results according to the order they were added - return sorted(matches, key=lambda v: self.match_order[id(v)]) + return sorted(matches, key=lambda v: self.state.match_order[id(v)]) def __nonzero__(self): """ - allow this to behave as a list + allow this to behave as a list - and at least try to act empty if + the list is likely to be empty """ - + # dirt-simple case: no py:match templates at all + if not (self.state.tag_templates or self.state.other_templates): + return False # this is easy - before the first element there is nothing if self.max_index == -1: return False # this isn't always right because match_order may shrink, but # you'll never get a false-negative - if self.min_index == len(self.match_order): + if self.min_index == self.state.current_index: return False # check for a range that is completely constrained if self.min_index is not None and self.max_index is not None: if self.min_index >= self.max_index: return False - - return bool(self.tag_templates or self.other_templates) + + return True def __str__(self): parent = "" @@ -220,6 +244,6 @@ parent = ": child of 0x%x" % id(self.parent) return "" % ( - id(self), len(self.tag_templates), len(self.other_templates), + id(self), len(self.state.tag_templates), len(self.other_templates), self.min_index, self.max_index, parent)