view genshi/template/match.py @ 704:422d0607ba85 experimental-match-fastpaths

further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet. Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author aflett
date Fri, 04 Apr 2008 16:57:27 +0000
parents af57b12e3dd2
children b33c14a1f579
line wrap: on
line source
from genshi.core import START
from genshi.path import CHILD, LocalNameTest

from copy import copy
from itertools import ifilter

def is_simple_path(path):
    """
    Is the path merely a tag match like "foo"?
    """
    if len(path.paths) == 1 and len(path.paths[0]) == 1:
        axis, nodetest, predicates = path.paths[0][0]
        if (axis is CHILD and
            not predicates and
            isinstance(nodetest, LocalNameTest)):
            return True

    return False

class MatchState(object):
    """ This is a container for all py:match's found during parsing. It
    does not provide a way to slice and dice which templates
    match. The main interfaces are ``add``, ``remove``, and
    ``find_raw_matches``

    this class maintains a hash, ``self.match_order`` which maps a
    template to its order, to make sure that events can be returned in
    order

    """
    def __init__(self):
        # merely for indexing. Note that this is shared between
        # all MatchSets that share the same root parent. We don't
        # have to worry about exclusions here
        self.match_order = {}

        # tag_templates are match templates whose path are simply
        # a tag, like "body" or "img"
        self.tag_templates = {}

        # other_templates include all other match templates, such
        # as ones with complex paths like "[class=container]"
        self.other_templates = []

        self.current_index = 0

    def add(self, match_template):
        """
        Add a template to the match state
        """
        # match_templates are currently tuples that contain unhashable
        # objects. So we'll use id() for now. 
        self.match_order[id(match_template)] = self.current_index
        self.current_index += 1
        
        path = match_template[1]

        if is_simple_path(path):
            # special cache of tag
            tag_name = path.paths[0][0][1].name
            # setdefault is wasteful
            if tag_name not in self.tag_templates:
                self.tag_templates[tag_name] = [match_template]
            else:
                self.tag_templates[tag_name].append(match_template)
                
        else:
            self.other_templates.append(match_template)

    def remove(self, match_template):
        """
        Remove the template permanently 
        """
        path = match_template[1]
        
        if is_simple_path(path):
            tag_name = path.paths[0][0][1].name
            if tag_name in self.tag_templates:
                template_list = self.tag_templates[tag_name]
                template_list.remove(match_template)
                if not template_list:
                    del self.tag_templates[tag_name]

        else:
            self.other_templates.remove(match_template)

        # clean up match_order
        del self.match_order[id(match_template)]

    def find_raw_matches(self, event):
        """ Return a list of all valid templates that can be used for the
        given event. Ordering is funky because we first check
        self.tag_templates, then check self.other_templates.
        """
        kind, data, pos = event[:3]

        # todo: get the order right
        if kind is START:
            tag, attrs = data
            if tag.localname in self.tag_templates:
                for template in self.tag_templates[tag.localname]:
                    yield template

        for template in self.other_templates:
            yield template

class MatchSet(object):
    """ A MatchSet is a set of matches discovered by the parser. This
    class encapsulates the matching of a particular event to a set of
    matches. It is optimized for basic tag matches, since that is by
    far the most common use of py:match.

    The two primary entry points into MatchSet are ``add``, which adds
    a new py:match, and ``find_matches``, which returns all
    /candidate/ match templates. The consumer of ``find_matches``
    still must call each candidates' match() to ensure the event
    really matches, and to maintain state within the match.

    If a given py:match's path is simply a node name match,
    (LocalNameTest) like "xyz", then MatchSet indexes that in a
    dictionary that maps tag names to matches.

    If the path is more complex like "xyz[k=z]" then then that match
    will always be returned by ``find_matches``.  """
    def __init__(self, parent=None,
                 min_index=None,
                 max_index=None):
        """
        If a parent is given, it means this is a wrapper around another
        set.
        
        """
        if parent is None:
            self.state = MatchState()
        else:
            self.state = parent.state

        if parent is None:
            self.min_index = None
            self.max_index = None
        else:
            # We have a parent: Just copy references to member
            # variables in parent so that there's no performance loss
            self.max_index = parent.max_index
            self.min_index = parent.min_index
            
        # sub-MatchSets can only be further constrained, not expanded
        if max_index is not None:
            assert self.max_index is None or max_index <= self.max_index
            self.max_index = max_index

        if min_index is not None:
            assert self.min_index is None or min_index > self.min_index
            self.min_index = min_index
        
    def add(self, match_template):
        """
        match_template is a tuple the form
        test, path, template, hints, namespace, directives
        """
        self.state.add(match_template)


    def remove(self, match_template):
        """
        Permanently remove a match_template - mainly for match_once
       """
        self.state.remove(match_template)

    def before_template(self, match_template, inclusive):
        """ Return a new MatchSet where only the templates that were declared
        before ``match_template`` are available. If ``inclusive`` is
        true, then it will also include match_template itself.
        
        """
        cls = type(self)
        max_index = self.state.match_order[id(match_template)]
        if not inclusive:
            max_index -= 1
        return cls(parent=self, max_index=max_index)
    
    def after_template(self, match_template):
        """
        Factory for creating a MatchSet that only matches templates after
        the given match
        """
        cls = type(self)
        min_index = self.state.match_order[id(match_template)] + 1
        return cls(parent=self, min_index=min_index)
    

    def find_matches(self, event):
        """ Return a list of all valid templates that can be used for the
        given event.

        The basic work here is sorting the result of find_raw_matches
        """
        # remove exclusions
        def can_match(template):
            # make sure that 
            if (self.min_index is not None and
                self.state.match_order[id(template)] < self.min_index):
                return False

            if (self.max_index is not None and 
                self.state.match_order[id(template)] > self.max_index):
                return False

            return True
        
        matches = ifilter(can_match,
                          self.state.find_raw_matches(event))

        # sort the results according to the order they were added
        return sorted(matches, key=lambda v: self.state.match_order[id(v)])

    def __nonzero__(self):
        """
        allow this to behave as a list - and at least try to act empty if
        the list is likely to be empty
        """
        # dirt-simple case: no py:match templates at all
        if not (self.state.tag_templates or self.state.other_templates):
            return False
        # this is easy - before the first element there is nothing
        if self.max_index == -1:
            return False

        # this isn't always right because match_order may shrink, but
        # you'll never get a false-negative
        if self.min_index == self.state.current_index:
            return False

        # check for a range that is completely constrained
        if self.min_index is not None and self.max_index is not None:
            if self.min_index >= self.max_index:
                return False

        return True

    def __str__(self):
        parent = ""
        if self.parent:
            parent = ": child of 0x%x" % id(self.parent)

        return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % (
            id(self), len(self.state.tag_templates), len(self.other_templates),
            self.min_index, self.max_index,
            parent)
Copyright (C) 2012-2017 Edgewall Software