changeset 704:422d0607ba85 experimental-match-fastpaths

further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet. Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author aflett
date Fri, 04 Apr 2008 16:57:27 +0000
parents af57b12e3dd2
children 2a7a19861a89
files genshi/template/base.py genshi/template/directives.py genshi/template/markup.py genshi/template/match.py
diffstat 4 files changed, 121 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/template/base.py
+++ b/genshi/template/base.py
@@ -25,7 +25,6 @@
 
 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
 from genshi.input import ParseError
-from genshi.template.match import MatchSet
 
 __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError',
            'TemplateSyntaxError', 'BadDirectiveError']
@@ -137,7 +136,7 @@
         self.frames = deque([data])
         self.pop = self.frames.popleft
         self.push = self.frames.appendleft
-        self._match_set = MatchSet()
+        self._match_set = None
         self._choice_stack = []
 
         # Helper functions for use in expressions
--- a/genshi/template/directives.py
+++ b/genshi/template/directives.py
@@ -26,6 +26,7 @@
                                  _exec_suite
 from genshi.template.eval import Expression, Suite, ExpressionASTTransformer, \
                                  _parse
+from genshi.template.match import MatchSet
 
 __all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
            'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
@@ -456,6 +457,10 @@
     attach = classmethod(attach)
 
     def __call__(self, stream, directives, ctxt, **vars):
+        if ctx._match_set is None:
+            # lazily create MatchSet so that it doesn't burden the
+            # _match filter when there are no py:matches defined.
+            ctx._match_set = MatchSet()
         ctxt._match_set.add((self.path.test(ignore_context=True),
                              self.path, list(stream), self.hints,
                              self.namespaces, directives))
--- a/genshi/template/markup.py
+++ b/genshi/template/markup.py
@@ -25,7 +25,6 @@
 from genshi.template.interpolation import interpolate
 from genshi.template.directives import *
 from genshi.template.text import NewTextTemplate
-from genshi.template.match import MatchSet
 
 __all__ = ['MarkupTemplate']
 __docformat__ = 'restructuredtext en'
--- a/genshi/template/match.py
+++ b/genshi/template/match.py
@@ -17,6 +17,92 @@
 
     return False
 
+class MatchState(object):
+    """ This is a container for all py:match's found during parsing. It
+    does not provide a way to slice and dice which templates
+    match. The main interfaces are ``add``, ``remove``, and
+    ``find_raw_matches``
+
+    this class maintains a hash, ``self.match_order`` which maps a
+    template to its order, to make sure that events can be returned in
+    order
+
+    """
+    def __init__(self):
+        # merely for indexing. Note that this is shared between
+        # all MatchSets that share the same root parent. We don't
+        # have to worry about exclusions here
+        self.match_order = {}
+
+        # tag_templates are match templates whose path are simply
+        # a tag, like "body" or "img"
+        self.tag_templates = {}
+
+        # other_templates include all other match templates, such
+        # as ones with complex paths like "[class=container]"
+        self.other_templates = []
+
+        self.current_index = 0
+
+    def add(self, match_template):
+        """
+        Add a template to the match state
+        """
+        # match_templates are currently tuples that contain unhashable
+        # objects. So we'll use id() for now. 
+        self.match_order[id(match_template)] = self.current_index
+        self.current_index += 1
+        
+        path = match_template[1]
+
+        if is_simple_path(path):
+            # special cache of tag
+            tag_name = path.paths[0][0][1].name
+            # setdefault is wasteful
+            if tag_name not in self.tag_templates:
+                self.tag_templates[tag_name] = [match_template]
+            else:
+                self.tag_templates[tag_name].append(match_template)
+                
+        else:
+            self.other_templates.append(match_template)
+
+    def remove(self, match_template):
+        """
+        Remove the template permanently 
+        """
+        path = match_template[1]
+        
+        if is_simple_path(path):
+            tag_name = path.paths[0][0][1].name
+            if tag_name in self.tag_templates:
+                template_list = self.tag_templates[tag_name]
+                template_list.remove(match_template)
+                if not template_list:
+                    del self.tag_templates[tag_name]
+
+        else:
+            self.other_templates.remove(match_template)
+
+        # clean up match_order
+        del self.match_order[id(match_template)]
+
+    def find_raw_matches(self, event):
+        """ Return a list of all valid templates that can be used for the
+        given event. Ordering is funky because we first check
+        self.tag_templates, then check self.other_templates.
+        """
+        kind, data, pos = event[:3]
+
+        # todo: get the order right
+        if kind is START:
+            tag, attrs = data
+            if tag.localname in self.tag_templates:
+                for template in self.tag_templates[tag.localname]:
+                    yield template
+
+        for template in self.other_templates:
+            yield template
 
 class MatchSet(object):
     """ A MatchSet is a set of matches discovered by the parser. This
@@ -44,34 +130,21 @@
         set.
         
         """
-        self.parent = parent
+        if parent is None:
+            self.state = MatchState()
+        else:
+            self.state = parent.state
 
         if parent is None:
-            # merely for indexing. Note that this is shared between
-            # all MatchSets that share the same root parent. We don't
-            # have to worry about exclusions here
-            self.match_order = {}
-            
             self.min_index = None
             self.max_index = None
-
-            # tag_templates are match templates whose path are simply
-            # a tag, like "body" or "img"
-            self.tag_templates = {}
-
-            # other_templates include all other match templates, such
-            # as ones with complex paths like "[class=container]"
-            self.other_templates = []
-
         else:
             # We have a parent: Just copy references to member
             # variables in parent so that there's no performance loss
             self.max_index = parent.max_index
             self.min_index = parent.min_index
-            self.match_order = parent.match_order
-            self.tag_templates = parent.tag_templates
-            self.other_templates = parent.other_templates
-
+            
+        # sub-MatchSets can only be further constrained, not expanded
         if max_index is not None:
             assert self.max_index is None or max_index <= self.max_index
             self.max_index = max_index
@@ -80,63 +153,28 @@
             assert self.min_index is None or min_index > self.min_index
             self.min_index = min_index
         
-    
     def add(self, match_template):
         """
         match_template is a tuple the form
         test, path, template, hints, namespace, directives
         """
-
-        # match_templates are currently tuples that contain unhashable
-        # objects. So we'll use id() for now. 
-        self.match_order[id(match_template)] = len(self.match_order)
-        
-        path = match_template[1]
+        self.state.add(match_template)
 
-        if is_simple_path(path):
-            # special cache of tag
-            tag_name = path.paths[0][0][1].name
-            # setdefault is wasteful
-            if tag_name not in self.tag_templates:
-                self.tag_templates[tag_name] = [match_template]
-            else:
-                self.tag_templates[tag_name].append(match_template)
-                
-        else:
-            self.other_templates.append(match_template)
 
     def remove(self, match_template):
         """
         Permanently remove a match_template - mainly for match_once
-        """
-        path = match_template[1]
-        
-        if is_simple_path(path):
-            tag_name = path.paths[0][0][1].name
-            if tag_name in self.tag_templates:
-                template_list = self.tag_templates[tag_name]
-                template_list.remove(match_template)
-                if not template_list:
-                    del self.tag_templates[tag_name]
-
-        else:
-            self.other_templates.remove(match_template)
-
-        # clean up match_order
-        del self.match_order[id(match_template)]
-
-    def single_match(cls, match_template):
-        """
-        Factory for creating a MatchSet with just one match
-        """
-        match_set = cls()
-        match_set.add(match_template)
-        return match_set
-    single_match = classmethod(single_match)
+       """
+        self.state.remove(match_template)
 
     def before_template(self, match_template, inclusive):
+        """ Return a new MatchSet where only the templates that were declared
+        before ``match_template`` are available. If ``inclusive`` is
+        true, then it will also include match_template itself.
+        
+        """
         cls = type(self)
-        max_index = self.match_order[id(match_template)]
+        max_index = self.state.match_order[id(match_template)]
         if not inclusive:
             max_index -= 1
         return cls(parent=self, max_index=max_index)
@@ -147,25 +185,9 @@
         the given match
         """
         cls = type(self)
-        min_index = self.match_order[id(match_template)] + 1
+        min_index = self.state.match_order[id(match_template)] + 1
         return cls(parent=self, min_index=min_index)
     
-    def find_raw_matches(self, event):
-        """ Return a list of all valid templates that can be used for the
-        given event. Ordering is funky because we first check
-        self.tag_templates, then check self.other_templates.
-        """
-        kind, data, pos = event[:3]
-
-        # todo: get the order right
-        if kind is START:
-            tag, attrs = data
-            if tag.localname in self.tag_templates:
-                for template in self.tag_templates[tag.localname]:
-                    yield template
-
-        for template in self.other_templates:
-            yield template
 
     def find_matches(self, event):
         """ Return a list of all valid templates that can be used for the
@@ -173,46 +195,48 @@
 
         The basic work here is sorting the result of find_raw_matches
         """
-
         # remove exclusions
         def can_match(template):
             # make sure that 
             if (self.min_index is not None and
-                self.match_order[id(template)] < self.min_index):
+                self.state.match_order[id(template)] < self.min_index):
                 return False
 
             if (self.max_index is not None and 
-                self.match_order[id(template)] > self.max_index):
+                self.state.match_order[id(template)] > self.max_index):
                 return False
 
             return True
         
         matches = ifilter(can_match,
-                          self.find_raw_matches(event))
+                          self.state.find_raw_matches(event))
 
         # sort the results according to the order they were added
-        return sorted(matches, key=lambda v: self.match_order[id(v)])
+        return sorted(matches, key=lambda v: self.state.match_order[id(v)])
 
     def __nonzero__(self):
         """
-        allow this to behave as a list
+        allow this to behave as a list - and at least try to act empty if
+        the list is likely to be empty
         """
-
+        # dirt-simple case: no py:match templates at all
+        if not (self.state.tag_templates or self.state.other_templates):
+            return False
         # this is easy - before the first element there is nothing
         if self.max_index == -1:
             return False
 
         # this isn't always right because match_order may shrink, but
         # you'll never get a false-negative
-        if self.min_index == len(self.match_order):
+        if self.min_index == self.state.current_index:
             return False
 
         # check for a range that is completely constrained
         if self.min_index is not None and self.max_index is not None:
             if self.min_index >= self.max_index:
                 return False
-            
-        return bool(self.tag_templates or self.other_templates)
+
+        return True
 
     def __str__(self):
         parent = ""
@@ -220,6 +244,6 @@
             parent = ": child of 0x%x" % id(self.parent)
 
         return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % (
-            id(self), len(self.tag_templates), len(self.other_templates),
+            id(self), len(self.state.tag_templates), len(self.other_templates),
             self.min_index, self.max_index,
             parent)
Copyright (C) 2012-2017 Edgewall Software