comparison genshi/template/match.py @ 704:422d0607ba85 experimental-match-fastpaths

further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet. Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author aflett
date Fri, 04 Apr 2008 16:57:27 +0000
parents af57b12e3dd2
children b33c14a1f579
comparison
equal deleted inserted replaced
703:af57b12e3dd2 704:422d0607ba85
15 isinstance(nodetest, LocalNameTest)): 15 isinstance(nodetest, LocalNameTest)):
16 return True 16 return True
17 17
18 return False 18 return False
19 19
20 20 class MatchState(object):
21 class MatchSet(object): 21 """ This is a container for all py:match's found during parsing. It
22 """ A MatchSet is a set of matches discovered by the parser. This 22 does not provide a way to slice and dice which templates
23 class encapsulates the matching of a particular event to a set of 23 match. The main interfaces are ``add``, ``remove``, and
24 matches. It is optimized for basic tag matches, since that is by 24 ``find_raw_matches``
25 far the most common use of py:match. 25
26 26 this class maintains a hash, ``self.match_order`` which maps a
27 The two primary entry points into MatchSet are ``add``, which adds 27 template to its order, to make sure that events can be returned in
28 a new py:match, and ``find_matches``, which returns all 28 order
29 /candidate/ match templates. The consumer of ``find_matches`` 29
30 still must call each candidates' match() to ensure the event 30 """
31 really matches, and to maintain state within the match. 31 def __init__(self):
32 32 # merely for indexing. Note that this is shared between
33 If a given py:match's path is simply a node name match, 33 # all MatchSets that share the same root parent. We don't
34 (LocalNameTest) like "xyz", then MatchSet indexes that in a 34 # have to worry about exclusions here
35 dictionary that maps tag names to matches. 35 self.match_order = {}
36 36
37 If the path is more complex like "xyz[k=z]" then then that match 37 # tag_templates are match templates whose path are simply
38 will always be returned by ``find_matches``. """ 38 # a tag, like "body" or "img"
39 def __init__(self, parent=None, 39 self.tag_templates = {}
40 min_index=None, 40
41 max_index=None): 41 # other_templates include all other match templates, such
42 """ 42 # as ones with complex paths like "[class=container]"
43 If a parent is given, it means this is a wrapper around another 43 self.other_templates = []
44 set. 44
45 45 self.current_index = 0
46 """ 46
47 self.parent = parent
48
49 if parent is None:
50 # merely for indexing. Note that this is shared between
51 # all MatchSets that share the same root parent. We don't
52 # have to worry about exclusions here
53 self.match_order = {}
54
55 self.min_index = None
56 self.max_index = None
57
58 # tag_templates are match templates whose path are simply
59 # a tag, like "body" or "img"
60 self.tag_templates = {}
61
62 # other_templates include all other match templates, such
63 # as ones with complex paths like "[class=container]"
64 self.other_templates = []
65
66 else:
67 # We have a parent: Just copy references to member
68 # variables in parent so that there's no performance loss
69 self.max_index = parent.max_index
70 self.min_index = parent.min_index
71 self.match_order = parent.match_order
72 self.tag_templates = parent.tag_templates
73 self.other_templates = parent.other_templates
74
75 if max_index is not None:
76 assert self.max_index is None or max_index <= self.max_index
77 self.max_index = max_index
78
79 if min_index is not None:
80 assert self.min_index is None or min_index > self.min_index
81 self.min_index = min_index
82
83
84 def add(self, match_template): 47 def add(self, match_template):
85 """ 48 """
86 match_template is a tuple the form 49 Add a template to the match state
87 test, path, template, hints, namespace, directives 50 """
88 """
89
90 # match_templates are currently tuples that contain unhashable 51 # match_templates are currently tuples that contain unhashable
91 # objects. So we'll use id() for now. 52 # objects. So we'll use id() for now.
92 self.match_order[id(match_template)] = len(self.match_order) 53 self.match_order[id(match_template)] = self.current_index
54 self.current_index += 1
93 55
94 path = match_template[1] 56 path = match_template[1]
95 57
96 if is_simple_path(path): 58 if is_simple_path(path):
97 # special cache of tag 59 # special cache of tag
105 else: 67 else:
106 self.other_templates.append(match_template) 68 self.other_templates.append(match_template)
107 69
108 def remove(self, match_template): 70 def remove(self, match_template):
109 """ 71 """
110 Permanently remove a match_template - mainly for match_once 72 Remove the template permanently
111 """ 73 """
112 path = match_template[1] 74 path = match_template[1]
113 75
114 if is_simple_path(path): 76 if is_simple_path(path):
115 tag_name = path.paths[0][0][1].name 77 tag_name = path.paths[0][0][1].name
123 self.other_templates.remove(match_template) 85 self.other_templates.remove(match_template)
124 86
125 # clean up match_order 87 # clean up match_order
126 del self.match_order[id(match_template)] 88 del self.match_order[id(match_template)]
127 89
128 def single_match(cls, match_template):
129 """
130 Factory for creating a MatchSet with just one match
131 """
132 match_set = cls()
133 match_set.add(match_template)
134 return match_set
135 single_match = classmethod(single_match)
136
137 def before_template(self, match_template, inclusive):
138 cls = type(self)
139 max_index = self.match_order[id(match_template)]
140 if not inclusive:
141 max_index -= 1
142 return cls(parent=self, max_index=max_index)
143
144 def after_template(self, match_template):
145 """
146 Factory for creating a MatchSet that only matches templates after
147 the given match
148 """
149 cls = type(self)
150 min_index = self.match_order[id(match_template)] + 1
151 return cls(parent=self, min_index=min_index)
152
153 def find_raw_matches(self, event): 90 def find_raw_matches(self, event):
154 """ Return a list of all valid templates that can be used for the 91 """ Return a list of all valid templates that can be used for the
155 given event. Ordering is funky because we first check 92 given event. Ordering is funky because we first check
156 self.tag_templates, then check self.other_templates. 93 self.tag_templates, then check self.other_templates.
157 """ 94 """
165 yield template 102 yield template
166 103
167 for template in self.other_templates: 104 for template in self.other_templates:
168 yield template 105 yield template
169 106
107 class MatchSet(object):
108 """ A MatchSet is a set of matches discovered by the parser. This
109 class encapsulates the matching of a particular event to a set of
110 matches. It is optimized for basic tag matches, since that is by
111 far the most common use of py:match.
112
113 The two primary entry points into MatchSet are ``add``, which adds
114 a new py:match, and ``find_matches``, which returns all
115 /candidate/ match templates. The consumer of ``find_matches``
116 still must call each candidates' match() to ensure the event
117 really matches, and to maintain state within the match.
118
119 If a given py:match's path is simply a node name match,
120 (LocalNameTest) like "xyz", then MatchSet indexes that in a
121 dictionary that maps tag names to matches.
122
123 If the path is more complex like "xyz[k=z]" then then that match
124 will always be returned by ``find_matches``. """
125 def __init__(self, parent=None,
126 min_index=None,
127 max_index=None):
128 """
129 If a parent is given, it means this is a wrapper around another
130 set.
131
132 """
133 if parent is None:
134 self.state = MatchState()
135 else:
136 self.state = parent.state
137
138 if parent is None:
139 self.min_index = None
140 self.max_index = None
141 else:
142 # We have a parent: Just copy references to member
143 # variables in parent so that there's no performance loss
144 self.max_index = parent.max_index
145 self.min_index = parent.min_index
146
147 # sub-MatchSets can only be further constrained, not expanded
148 if max_index is not None:
149 assert self.max_index is None or max_index <= self.max_index
150 self.max_index = max_index
151
152 if min_index is not None:
153 assert self.min_index is None or min_index > self.min_index
154 self.min_index = min_index
155
156 def add(self, match_template):
157 """
158 match_template is a tuple the form
159 test, path, template, hints, namespace, directives
160 """
161 self.state.add(match_template)
162
163
164 def remove(self, match_template):
165 """
166 Permanently remove a match_template - mainly for match_once
167 """
168 self.state.remove(match_template)
169
170 def before_template(self, match_template, inclusive):
171 """ Return a new MatchSet where only the templates that were declared
172 before ``match_template`` are available. If ``inclusive`` is
173 true, then it will also include match_template itself.
174
175 """
176 cls = type(self)
177 max_index = self.state.match_order[id(match_template)]
178 if not inclusive:
179 max_index -= 1
180 return cls(parent=self, max_index=max_index)
181
182 def after_template(self, match_template):
183 """
184 Factory for creating a MatchSet that only matches templates after
185 the given match
186 """
187 cls = type(self)
188 min_index = self.state.match_order[id(match_template)] + 1
189 return cls(parent=self, min_index=min_index)
190
191
170 def find_matches(self, event): 192 def find_matches(self, event):
171 """ Return a list of all valid templates that can be used for the 193 """ Return a list of all valid templates that can be used for the
172 given event. 194 given event.
173 195
174 The basic work here is sorting the result of find_raw_matches 196 The basic work here is sorting the result of find_raw_matches
175 """ 197 """
176
177 # remove exclusions 198 # remove exclusions
178 def can_match(template): 199 def can_match(template):
179 # make sure that 200 # make sure that
180 if (self.min_index is not None and 201 if (self.min_index is not None and
181 self.match_order[id(template)] < self.min_index): 202 self.state.match_order[id(template)] < self.min_index):
182 return False 203 return False
183 204
184 if (self.max_index is not None and 205 if (self.max_index is not None and
185 self.match_order[id(template)] > self.max_index): 206 self.state.match_order[id(template)] > self.max_index):
186 return False 207 return False
187 208
188 return True 209 return True
189 210
190 matches = ifilter(can_match, 211 matches = ifilter(can_match,
191 self.find_raw_matches(event)) 212 self.state.find_raw_matches(event))
192 213
193 # sort the results according to the order they were added 214 # sort the results according to the order they were added
194 return sorted(matches, key=lambda v: self.match_order[id(v)]) 215 return sorted(matches, key=lambda v: self.state.match_order[id(v)])
195 216
196 def __nonzero__(self): 217 def __nonzero__(self):
197 """ 218 """
198 allow this to behave as a list 219 allow this to behave as a list - and at least try to act empty if
199 """ 220 the list is likely to be empty
200 221 """
222 # dirt-simple case: no py:match templates at all
223 if not (self.state.tag_templates or self.state.other_templates):
224 return False
201 # this is easy - before the first element there is nothing 225 # this is easy - before the first element there is nothing
202 if self.max_index == -1: 226 if self.max_index == -1:
203 return False 227 return False
204 228
205 # this isn't always right because match_order may shrink, but 229 # this isn't always right because match_order may shrink, but
206 # you'll never get a false-negative 230 # you'll never get a false-negative
207 if self.min_index == len(self.match_order): 231 if self.min_index == self.state.current_index:
208 return False 232 return False
209 233
210 # check for a range that is completely constrained 234 # check for a range that is completely constrained
211 if self.min_index is not None and self.max_index is not None: 235 if self.min_index is not None and self.max_index is not None:
212 if self.min_index >= self.max_index: 236 if self.min_index >= self.max_index:
213 return False 237 return False
214 238
215 return bool(self.tag_templates or self.other_templates) 239 return True
216 240
217 def __str__(self): 241 def __str__(self):
218 parent = "" 242 parent = ""
219 if self.parent: 243 if self.parent:
220 parent = ": child of 0x%x" % id(self.parent) 244 parent = ": child of 0x%x" % id(self.parent)
221 245
222 return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % ( 246 return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % (
223 id(self), len(self.tag_templates), len(self.other_templates), 247 id(self), len(self.state.tag_templates), len(self.other_templates),
224 self.min_index, self.max_index, 248 self.min_index, self.max_index,
225 parent) 249 parent)
Copyright (C) 2012-2017 Edgewall Software