changeset 687:3d7288f373bd experimental-match-fastpaths

land first cut at fast-path matching - needs some cleanup
author aflett
date Mon, 25 Feb 2008 20:44:04 +0000
parents 92efe764ec81
children 1240ada13334
files genshi/path.py genshi/template/base.py genshi/template/directives.py genshi/template/markup.py genshi/template/match.py
diffstat 5 files changed, 182 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/path.py
+++ b/genshi/path.py
@@ -181,9 +181,15 @@
                  stream against the path
         :rtype: ``function``
         """
-        paths = [(p, len(p), [0], [], [0] * len(p)) for p in [
-            (ignore_context and [_DOTSLASHSLASH] or []) + p for p in self.paths
-        ]]
+
+        # paths is path state that is maintained across calls to _test()
+        # paths is a list of tuples, one for each segment in the path
+        paths = []
+        for p in self.paths:
+            if ignore_context:
+                p = [_DOTSLASHSLASH] + p
+            path = (p, len(p), [0], [], [0] * len(p))
+            paths.append(path)
 
         def _test(event, namespaces, variables, updateonly=False):
             kind, data, pos = event[:3]
@@ -279,7 +285,8 @@
                     if ctxtnode and axis is DESCENDANT_OR_SELF:
                         ctxtnode = False
 
-                if (retval or not matched) and kind is START and \
+                if (retval or not matched) and \
+                        kind is START and \
                         not (axis is DESCENDANT or axis is DESCENDANT_OR_SELF):
                     # If this step is not a closure, it cannot be matched until
                     # the current element is closed... so we need to move the
--- a/genshi/template/base.py
+++ b/genshi/template/base.py
@@ -25,6 +25,7 @@
 
 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
 from genshi.input import ParseError
+from genshi.template.match import MatchSet
 
 __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError',
            'TemplateSyntaxError', 'BadDirectiveError']
@@ -136,7 +137,7 @@
         self.frames = deque([data])
         self.pop = self.frames.popleft
         self.push = self.frames.appendleft
-        self._match_templates = []
+        self._match_set = MatchSet()
         self._choice_stack = []
 
         # Helper functions for use in expressions
--- a/genshi/template/directives.py
+++ b/genshi/template/directives.py
@@ -450,9 +450,9 @@
     attach = classmethod(attach)
 
     def __call__(self, stream, ctxt, directives):
-        ctxt._match_templates.append((self.path.test(ignore_context=True),
-                                      self.path, list(stream), self.hints,
-                                      self.namespaces, directives))
+        ctxt._match_set.add((self.path.test(ignore_context=True),
+                             self.path, list(stream), self.hints,
+                             self.namespaces, directives))
         return []
 
     def __repr__(self):
--- a/genshi/template/markup.py
+++ b/genshi/template/markup.py
@@ -25,6 +25,7 @@
 from genshi.template.interpolation import interpolate
 from genshi.template.directives import *
 from genshi.template.text import NewTextTemplate
+from genshi.template.match import MatchSet
 
 __all__ = ['MarkupTemplate']
 __docformat__ = 'restructuredtext en'
@@ -225,12 +226,12 @@
         assert len(streams) == 1
         return streams[0]
 
-    def _match(self, stream, ctxt, match_templates=None):
+    def _match(self, stream, ctxt, match_set=None):
         """Internal stream filter that applies any defined match templates
         to the stream.
         """
-        if match_templates is None:
-            match_templates = ctxt._match_templates
+        if match_set is None:
+            match_set = ctxt._match_set
 
         tail = []
         def _strip(stream):
@@ -251,22 +252,25 @@
 
             # We (currently) only care about start and end events for matching
             # We might care about namespace events in the future, though
-            if not match_templates or (event[0] is not START and
-                                       event[0] is not END):
+            if not match_set or (event[0] is not START and
+                                 event[0] is not END):
                 yield event
                 continue
 
-            for idx, (test, path, template, hints, namespaces, directives) \
-                    in enumerate(match_templates):
-
+            match_candidates = list(match_set.find_matches(event))
+            for idx, match_template in enumerate(match_candidates):
+                
+                (test, path, template, hints, namespaces, directives) = \
+                    match_template
                 if test(event, namespaces, ctxt) is True:
                     if 'match_once' in hints:
-                        del match_templates[idx]
+                        match_set.remove(match_template)
+                        del match_candidates[idx]
                         idx -= 1
 
                     # Let the remaining match templates know about the event so
                     # they get a chance to update their internal state
-                    for test in [mt[0] for mt in match_templates[idx + 1:]]:
+                    for test in [mt[0] for mt in match_candidates[idx + 1:]]:
                         test(event, namespaces, ctxt, updateonly=True)
 
                     # Consume and store all events until an end event
@@ -274,11 +278,14 @@
                     inner = _strip(stream)
                     if 'match_once' not in hints \
                             and 'not_recursive' not in hints:
-                        inner = self._match(inner, ctxt, [match_templates[idx]])
+                        inner = self._match(inner, ctxt,
+                                            MatchSet.single_match(match_template))
                     content = list(self._include(chain([event], inner, tail),
                                                  ctxt))
 
-                    for test in [mt[0] for mt in match_templates]:
+                    # Now tell all the match templates about the
+                    # END event (tail[0])
+                    for test in [mt[0] for mt in match_candidates]:
                         test(tail[0], namespaces, ctxt, updateonly=True)
 
                     # Make the select() function available in the body of the
@@ -289,12 +296,14 @@
 
                     # Recursively process the output
                     template = _apply_directives(template, ctxt, directives)
-                    remaining = match_templates
+                    remaining = match_set
                     if 'match_once' not in hints:
-                        remaining = remaining[:idx] + remaining[idx + 1:]
-                    for event in self._match(self._exec(
-                                    self._eval(self._flatten(template, ctxt),
-                                    ctxt), ctxt), ctxt, remaining):
+                        # match has not been removed, so we need an exclusion matchset
+                        remaining = match_set.with_exclusion(match_template)
+                        
+                    body = self._exec(self._eval(self._flatten(template, ctxt),
+                                                 ctxt), ctxt)
+                    for event in self._match(body, ctxt, remaining):
                         yield event
 
                     ctxt.pop()
new file mode 100644
--- /dev/null
+++ b/genshi/template/match.py
@@ -0,0 +1,140 @@
+from genshi.core import START
+from genshi.path import CHILD, LocalNameTest
+
+from copy import copy
+
+def is_simple_path(path):
+    """
+    Is the path merely a tag match like "foo"?
+    """
+    if len(path.paths) == 1 and len(path.paths[0]) == 1:
+        axis, nodetest, predicates = path.paths[0][0]
+        if (axis is CHILD and
+            not predicates and
+            isinstance(nodetest, LocalNameTest)):
+            return True
+
+    return False
+
+
+class MatchSet(object):
+
+    def __init__(self, parent=None, exclude=None):
+        """
+        If a parent is given, it means this is a wrapper around another
+        set. Just copy references to member variables in parent, but
+        also set exclude
+        """
+        self.parent = parent
+        if parent is None:
+            self.tag_templates = {}
+            self.other_templates = []
+            self.exclude = []
+            if exclude is not None:
+                self.exclude.append(exclude)
+        else:
+            self.tag_templates = parent.tag_templates
+            self.other_templates = parent.other_templates
+            self.exclude = copy(parent.exclude)
+            if exclude is not None:
+                self.exclude.append(exclude)
+    
+    def add(self, match_template):
+        """
+        match_template is a tuple the form
+        test, path, template, hints, namespace, directives
+        """
+        path = match_template[1]
+        
+        if is_simple_path(path):
+            # special cache of tag
+            tag_name = path.paths[0][0][1].name
+            # setdefault is wasteful
+            if tag_name not in self.tag_templates:
+                self.tag_templates[tag_name] = [match_template]
+            else:
+                self.tag_templates[tag_name].append(match_template)
+                
+        else:
+            self.other_templates.append(match_template)
+
+    def remove(self, match_template):
+        """
+        Permanently remove a match_template - mainly for match_once
+        """
+        path = match_template[1]
+        
+        if is_simple_path(path):
+            tag_name = path.paths[0][0][1].name
+            if tag_name in self.tag_templates:
+                template_list = self.tag_templates[tag_name]
+                template_list.remove(match_template)
+                if not template_list:
+                    del self.tag_templates[tag_name]
+
+        else:
+            self.other_templates.remove(match_template)
+
+    def single_match(cls, match_template):
+        """
+        Factory for creating a MatchSet with just one match
+        """
+        match_set = cls()
+        match_set.add(match_template)
+        return match_set
+    single_match = classmethod(single_match)
+
+    def with_exclusion(self, exclude):
+        """
+        Factory for creating a MatchSet based on another MatchSet, but
+        with certain templates excluded
+        """
+        cls = self.__class__
+        new_match_set = cls(parent=self, exclude=exclude)
+        return new_match_set
+            
+    def find_matches(self, event):
+        """
+        Return a list of all valid templates that can be used for the given event.
+        """
+        kind, data, pos = event[:3]
+
+        # todo: get the order right
+        if kind is START:
+            tag, attrs = data
+            if tag.localname in self.tag_templates:
+                for template in self.tag_templates[tag.localname]:
+                    if template not in self.exclude:
+                        yield template
+
+        for template in self.other_templates:
+            if template not in self.exclude:
+                yield template
+
+
+    def __nonzero__(self):
+        """
+        allow this to behave as a list
+        """
+        return bool(self.tag_templates or self.other_templates)
+
+    def __iter__(self):
+        """
+        I don't think we really need this, but it lets us behave like a list
+        """
+        for template_list in self.tag_templates.iteritems():
+            for template in template_list:
+                yield template
+        for template in self.other_templates:
+            yield template
+
+    def __str__(self):
+        parent = ""
+        if self.parent:
+            parent = ": child of 0x%x" % id(self.parent)
+
+        exclude = ""
+        if self.exclude:
+            exclude = " / excluding %d items" % len(self.exclude)
+            
+        return "<MatchSet 0x%x %d tag templates, %d other templates%s%s>" % (id(self), len(self.tag_templates), len(self.other_templates), parent, exclude)
Copyright (C) 2012-2017 Edgewall Software