Mercurial > genshi > mirror
view genshi/template/match.py @ 704:422d0607ba85 experimental-match-fastpaths
further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet.
Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match
Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author | aflett |
---|---|
date | Fri, 04 Apr 2008 16:57:27 +0000 |
parents | af57b12e3dd2 |
children | b33c14a1f579 |
line wrap: on
line source
from genshi.core import START from genshi.path import CHILD, LocalNameTest from copy import copy from itertools import ifilter def is_simple_path(path): """ Is the path merely a tag match like "foo"? """ if len(path.paths) == 1 and len(path.paths[0]) == 1: axis, nodetest, predicates = path.paths[0][0] if (axis is CHILD and not predicates and isinstance(nodetest, LocalNameTest)): return True return False class MatchState(object): """ This is a container for all py:match's found during parsing. It does not provide a way to slice and dice which templates match. The main interfaces are ``add``, ``remove``, and ``find_raw_matches`` this class maintains a hash, ``self.match_order`` which maps a template to its order, to make sure that events can be returned in order """ def __init__(self): # merely for indexing. Note that this is shared between # all MatchSets that share the same root parent. We don't # have to worry about exclusions here self.match_order = {} # tag_templates are match templates whose path are simply # a tag, like "body" or "img" self.tag_templates = {} # other_templates include all other match templates, such # as ones with complex paths like "[class=container]" self.other_templates = [] self.current_index = 0 def add(self, match_template): """ Add a template to the match state """ # match_templates are currently tuples that contain unhashable # objects. So we'll use id() for now. self.match_order[id(match_template)] = self.current_index self.current_index += 1 path = match_template[1] if is_simple_path(path): # special cache of tag tag_name = path.paths[0][0][1].name # setdefault is wasteful if tag_name not in self.tag_templates: self.tag_templates[tag_name] = [match_template] else: self.tag_templates[tag_name].append(match_template) else: self.other_templates.append(match_template) def remove(self, match_template): """ Remove the template permanently """ path = match_template[1] if is_simple_path(path): tag_name = path.paths[0][0][1].name if tag_name in self.tag_templates: template_list = self.tag_templates[tag_name] template_list.remove(match_template) if not template_list: del self.tag_templates[tag_name] else: self.other_templates.remove(match_template) # clean up match_order del self.match_order[id(match_template)] def find_raw_matches(self, event): """ Return a list of all valid templates that can be used for the given event. Ordering is funky because we first check self.tag_templates, then check self.other_templates. """ kind, data, pos = event[:3] # todo: get the order right if kind is START: tag, attrs = data if tag.localname in self.tag_templates: for template in self.tag_templates[tag.localname]: yield template for template in self.other_templates: yield template class MatchSet(object): """ A MatchSet is a set of matches discovered by the parser. This class encapsulates the matching of a particular event to a set of matches. It is optimized for basic tag matches, since that is by far the most common use of py:match. The two primary entry points into MatchSet are ``add``, which adds a new py:match, and ``find_matches``, which returns all /candidate/ match templates. The consumer of ``find_matches`` still must call each candidates' match() to ensure the event really matches, and to maintain state within the match. If a given py:match's path is simply a node name match, (LocalNameTest) like "xyz", then MatchSet indexes that in a dictionary that maps tag names to matches. If the path is more complex like "xyz[k=z]" then then that match will always be returned by ``find_matches``. """ def __init__(self, parent=None, min_index=None, max_index=None): """ If a parent is given, it means this is a wrapper around another set. """ if parent is None: self.state = MatchState() else: self.state = parent.state if parent is None: self.min_index = None self.max_index = None else: # We have a parent: Just copy references to member # variables in parent so that there's no performance loss self.max_index = parent.max_index self.min_index = parent.min_index # sub-MatchSets can only be further constrained, not expanded if max_index is not None: assert self.max_index is None or max_index <= self.max_index self.max_index = max_index if min_index is not None: assert self.min_index is None or min_index > self.min_index self.min_index = min_index def add(self, match_template): """ match_template is a tuple the form test, path, template, hints, namespace, directives """ self.state.add(match_template) def remove(self, match_template): """ Permanently remove a match_template - mainly for match_once """ self.state.remove(match_template) def before_template(self, match_template, inclusive): """ Return a new MatchSet where only the templates that were declared before ``match_template`` are available. If ``inclusive`` is true, then it will also include match_template itself. """ cls = type(self) max_index = self.state.match_order[id(match_template)] if not inclusive: max_index -= 1 return cls(parent=self, max_index=max_index) def after_template(self, match_template): """ Factory for creating a MatchSet that only matches templates after the given match """ cls = type(self) min_index = self.state.match_order[id(match_template)] + 1 return cls(parent=self, min_index=min_index) def find_matches(self, event): """ Return a list of all valid templates that can be used for the given event. The basic work here is sorting the result of find_raw_matches """ # remove exclusions def can_match(template): # make sure that if (self.min_index is not None and self.state.match_order[id(template)] < self.min_index): return False if (self.max_index is not None and self.state.match_order[id(template)] > self.max_index): return False return True matches = ifilter(can_match, self.state.find_raw_matches(event)) # sort the results according to the order they were added return sorted(matches, key=lambda v: self.state.match_order[id(v)]) def __nonzero__(self): """ allow this to behave as a list - and at least try to act empty if the list is likely to be empty """ # dirt-simple case: no py:match templates at all if not (self.state.tag_templates or self.state.other_templates): return False # this is easy - before the first element there is nothing if self.max_index == -1: return False # this isn't always right because match_order may shrink, but # you'll never get a false-negative if self.min_index == self.state.current_index: return False # check for a range that is completely constrained if self.min_index is not None and self.max_index is not None: if self.min_index >= self.max_index: return False return True def __str__(self): parent = "" if self.parent: parent = ": child of 0x%x" % id(self.parent) return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % ( id(self), len(self.state.tag_templates), len(self.other_templates), self.min_index, self.max_index, parent)