Mercurial > genshi > mirror
comparison genshi/template/match.py @ 704:422d0607ba85 experimental-match-fastpaths
further performance improvements to MatchSet functionality - factor out MatchSet's State so that we dont' have to keep copying over the state every time we create a new child MatchSet.
Also fixes a bug where we could get duplicate match indexes when a new match is created while evaluating an existing match
Also, try to be lazy about creating the first MatchSet so that we don't hurt bigtable performance (i.e. when there are no py:matches in play at all)
author | aflett |
---|---|
date | Fri, 04 Apr 2008 16:57:27 +0000 |
parents | af57b12e3dd2 |
children | b33c14a1f579 |
comparison
equal
deleted
inserted
replaced
703:af57b12e3dd2 | 704:422d0607ba85 |
---|---|
15 isinstance(nodetest, LocalNameTest)): | 15 isinstance(nodetest, LocalNameTest)): |
16 return True | 16 return True |
17 | 17 |
18 return False | 18 return False |
19 | 19 |
20 | 20 class MatchState(object): |
21 class MatchSet(object): | 21 """ This is a container for all py:match's found during parsing. It |
22 """ A MatchSet is a set of matches discovered by the parser. This | 22 does not provide a way to slice and dice which templates |
23 class encapsulates the matching of a particular event to a set of | 23 match. The main interfaces are ``add``, ``remove``, and |
24 matches. It is optimized for basic tag matches, since that is by | 24 ``find_raw_matches`` |
25 far the most common use of py:match. | 25 |
26 | 26 this class maintains a hash, ``self.match_order`` which maps a |
27 The two primary entry points into MatchSet are ``add``, which adds | 27 template to its order, to make sure that events can be returned in |
28 a new py:match, and ``find_matches``, which returns all | 28 order |
29 /candidate/ match templates. The consumer of ``find_matches`` | 29 |
30 still must call each candidates' match() to ensure the event | 30 """ |
31 really matches, and to maintain state within the match. | 31 def __init__(self): |
32 | 32 # merely for indexing. Note that this is shared between |
33 If a given py:match's path is simply a node name match, | 33 # all MatchSets that share the same root parent. We don't |
34 (LocalNameTest) like "xyz", then MatchSet indexes that in a | 34 # have to worry about exclusions here |
35 dictionary that maps tag names to matches. | 35 self.match_order = {} |
36 | 36 |
37 If the path is more complex like "xyz[k=z]" then then that match | 37 # tag_templates are match templates whose path are simply |
38 will always be returned by ``find_matches``. """ | 38 # a tag, like "body" or "img" |
39 def __init__(self, parent=None, | 39 self.tag_templates = {} |
40 min_index=None, | 40 |
41 max_index=None): | 41 # other_templates include all other match templates, such |
42 """ | 42 # as ones with complex paths like "[class=container]" |
43 If a parent is given, it means this is a wrapper around another | 43 self.other_templates = [] |
44 set. | 44 |
45 | 45 self.current_index = 0 |
46 """ | 46 |
47 self.parent = parent | |
48 | |
49 if parent is None: | |
50 # merely for indexing. Note that this is shared between | |
51 # all MatchSets that share the same root parent. We don't | |
52 # have to worry about exclusions here | |
53 self.match_order = {} | |
54 | |
55 self.min_index = None | |
56 self.max_index = None | |
57 | |
58 # tag_templates are match templates whose path are simply | |
59 # a tag, like "body" or "img" | |
60 self.tag_templates = {} | |
61 | |
62 # other_templates include all other match templates, such | |
63 # as ones with complex paths like "[class=container]" | |
64 self.other_templates = [] | |
65 | |
66 else: | |
67 # We have a parent: Just copy references to member | |
68 # variables in parent so that there's no performance loss | |
69 self.max_index = parent.max_index | |
70 self.min_index = parent.min_index | |
71 self.match_order = parent.match_order | |
72 self.tag_templates = parent.tag_templates | |
73 self.other_templates = parent.other_templates | |
74 | |
75 if max_index is not None: | |
76 assert self.max_index is None or max_index <= self.max_index | |
77 self.max_index = max_index | |
78 | |
79 if min_index is not None: | |
80 assert self.min_index is None or min_index > self.min_index | |
81 self.min_index = min_index | |
82 | |
83 | |
84 def add(self, match_template): | 47 def add(self, match_template): |
85 """ | 48 """ |
86 match_template is a tuple the form | 49 Add a template to the match state |
87 test, path, template, hints, namespace, directives | 50 """ |
88 """ | |
89 | |
90 # match_templates are currently tuples that contain unhashable | 51 # match_templates are currently tuples that contain unhashable |
91 # objects. So we'll use id() for now. | 52 # objects. So we'll use id() for now. |
92 self.match_order[id(match_template)] = len(self.match_order) | 53 self.match_order[id(match_template)] = self.current_index |
54 self.current_index += 1 | |
93 | 55 |
94 path = match_template[1] | 56 path = match_template[1] |
95 | 57 |
96 if is_simple_path(path): | 58 if is_simple_path(path): |
97 # special cache of tag | 59 # special cache of tag |
105 else: | 67 else: |
106 self.other_templates.append(match_template) | 68 self.other_templates.append(match_template) |
107 | 69 |
108 def remove(self, match_template): | 70 def remove(self, match_template): |
109 """ | 71 """ |
110 Permanently remove a match_template - mainly for match_once | 72 Remove the template permanently |
111 """ | 73 """ |
112 path = match_template[1] | 74 path = match_template[1] |
113 | 75 |
114 if is_simple_path(path): | 76 if is_simple_path(path): |
115 tag_name = path.paths[0][0][1].name | 77 tag_name = path.paths[0][0][1].name |
123 self.other_templates.remove(match_template) | 85 self.other_templates.remove(match_template) |
124 | 86 |
125 # clean up match_order | 87 # clean up match_order |
126 del self.match_order[id(match_template)] | 88 del self.match_order[id(match_template)] |
127 | 89 |
128 def single_match(cls, match_template): | |
129 """ | |
130 Factory for creating a MatchSet with just one match | |
131 """ | |
132 match_set = cls() | |
133 match_set.add(match_template) | |
134 return match_set | |
135 single_match = classmethod(single_match) | |
136 | |
137 def before_template(self, match_template, inclusive): | |
138 cls = type(self) | |
139 max_index = self.match_order[id(match_template)] | |
140 if not inclusive: | |
141 max_index -= 1 | |
142 return cls(parent=self, max_index=max_index) | |
143 | |
144 def after_template(self, match_template): | |
145 """ | |
146 Factory for creating a MatchSet that only matches templates after | |
147 the given match | |
148 """ | |
149 cls = type(self) | |
150 min_index = self.match_order[id(match_template)] + 1 | |
151 return cls(parent=self, min_index=min_index) | |
152 | |
153 def find_raw_matches(self, event): | 90 def find_raw_matches(self, event): |
154 """ Return a list of all valid templates that can be used for the | 91 """ Return a list of all valid templates that can be used for the |
155 given event. Ordering is funky because we first check | 92 given event. Ordering is funky because we first check |
156 self.tag_templates, then check self.other_templates. | 93 self.tag_templates, then check self.other_templates. |
157 """ | 94 """ |
165 yield template | 102 yield template |
166 | 103 |
167 for template in self.other_templates: | 104 for template in self.other_templates: |
168 yield template | 105 yield template |
169 | 106 |
107 class MatchSet(object): | |
108 """ A MatchSet is a set of matches discovered by the parser. This | |
109 class encapsulates the matching of a particular event to a set of | |
110 matches. It is optimized for basic tag matches, since that is by | |
111 far the most common use of py:match. | |
112 | |
113 The two primary entry points into MatchSet are ``add``, which adds | |
114 a new py:match, and ``find_matches``, which returns all | |
115 /candidate/ match templates. The consumer of ``find_matches`` | |
116 still must call each candidates' match() to ensure the event | |
117 really matches, and to maintain state within the match. | |
118 | |
119 If a given py:match's path is simply a node name match, | |
120 (LocalNameTest) like "xyz", then MatchSet indexes that in a | |
121 dictionary that maps tag names to matches. | |
122 | |
123 If the path is more complex like "xyz[k=z]" then then that match | |
124 will always be returned by ``find_matches``. """ | |
125 def __init__(self, parent=None, | |
126 min_index=None, | |
127 max_index=None): | |
128 """ | |
129 If a parent is given, it means this is a wrapper around another | |
130 set. | |
131 | |
132 """ | |
133 if parent is None: | |
134 self.state = MatchState() | |
135 else: | |
136 self.state = parent.state | |
137 | |
138 if parent is None: | |
139 self.min_index = None | |
140 self.max_index = None | |
141 else: | |
142 # We have a parent: Just copy references to member | |
143 # variables in parent so that there's no performance loss | |
144 self.max_index = parent.max_index | |
145 self.min_index = parent.min_index | |
146 | |
147 # sub-MatchSets can only be further constrained, not expanded | |
148 if max_index is not None: | |
149 assert self.max_index is None or max_index <= self.max_index | |
150 self.max_index = max_index | |
151 | |
152 if min_index is not None: | |
153 assert self.min_index is None or min_index > self.min_index | |
154 self.min_index = min_index | |
155 | |
156 def add(self, match_template): | |
157 """ | |
158 match_template is a tuple the form | |
159 test, path, template, hints, namespace, directives | |
160 """ | |
161 self.state.add(match_template) | |
162 | |
163 | |
164 def remove(self, match_template): | |
165 """ | |
166 Permanently remove a match_template - mainly for match_once | |
167 """ | |
168 self.state.remove(match_template) | |
169 | |
170 def before_template(self, match_template, inclusive): | |
171 """ Return a new MatchSet where only the templates that were declared | |
172 before ``match_template`` are available. If ``inclusive`` is | |
173 true, then it will also include match_template itself. | |
174 | |
175 """ | |
176 cls = type(self) | |
177 max_index = self.state.match_order[id(match_template)] | |
178 if not inclusive: | |
179 max_index -= 1 | |
180 return cls(parent=self, max_index=max_index) | |
181 | |
182 def after_template(self, match_template): | |
183 """ | |
184 Factory for creating a MatchSet that only matches templates after | |
185 the given match | |
186 """ | |
187 cls = type(self) | |
188 min_index = self.state.match_order[id(match_template)] + 1 | |
189 return cls(parent=self, min_index=min_index) | |
190 | |
191 | |
170 def find_matches(self, event): | 192 def find_matches(self, event): |
171 """ Return a list of all valid templates that can be used for the | 193 """ Return a list of all valid templates that can be used for the |
172 given event. | 194 given event. |
173 | 195 |
174 The basic work here is sorting the result of find_raw_matches | 196 The basic work here is sorting the result of find_raw_matches |
175 """ | 197 """ |
176 | |
177 # remove exclusions | 198 # remove exclusions |
178 def can_match(template): | 199 def can_match(template): |
179 # make sure that | 200 # make sure that |
180 if (self.min_index is not None and | 201 if (self.min_index is not None and |
181 self.match_order[id(template)] < self.min_index): | 202 self.state.match_order[id(template)] < self.min_index): |
182 return False | 203 return False |
183 | 204 |
184 if (self.max_index is not None and | 205 if (self.max_index is not None and |
185 self.match_order[id(template)] > self.max_index): | 206 self.state.match_order[id(template)] > self.max_index): |
186 return False | 207 return False |
187 | 208 |
188 return True | 209 return True |
189 | 210 |
190 matches = ifilter(can_match, | 211 matches = ifilter(can_match, |
191 self.find_raw_matches(event)) | 212 self.state.find_raw_matches(event)) |
192 | 213 |
193 # sort the results according to the order they were added | 214 # sort the results according to the order they were added |
194 return sorted(matches, key=lambda v: self.match_order[id(v)]) | 215 return sorted(matches, key=lambda v: self.state.match_order[id(v)]) |
195 | 216 |
196 def __nonzero__(self): | 217 def __nonzero__(self): |
197 """ | 218 """ |
198 allow this to behave as a list | 219 allow this to behave as a list - and at least try to act empty if |
199 """ | 220 the list is likely to be empty |
200 | 221 """ |
222 # dirt-simple case: no py:match templates at all | |
223 if not (self.state.tag_templates or self.state.other_templates): | |
224 return False | |
201 # this is easy - before the first element there is nothing | 225 # this is easy - before the first element there is nothing |
202 if self.max_index == -1: | 226 if self.max_index == -1: |
203 return False | 227 return False |
204 | 228 |
205 # this isn't always right because match_order may shrink, but | 229 # this isn't always right because match_order may shrink, but |
206 # you'll never get a false-negative | 230 # you'll never get a false-negative |
207 if self.min_index == len(self.match_order): | 231 if self.min_index == self.state.current_index: |
208 return False | 232 return False |
209 | 233 |
210 # check for a range that is completely constrained | 234 # check for a range that is completely constrained |
211 if self.min_index is not None and self.max_index is not None: | 235 if self.min_index is not None and self.max_index is not None: |
212 if self.min_index >= self.max_index: | 236 if self.min_index >= self.max_index: |
213 return False | 237 return False |
214 | 238 |
215 return bool(self.tag_templates or self.other_templates) | 239 return True |
216 | 240 |
217 def __str__(self): | 241 def __str__(self): |
218 parent = "" | 242 parent = "" |
219 if self.parent: | 243 if self.parent: |
220 parent = ": child of 0x%x" % id(self.parent) | 244 parent = ": child of 0x%x" % id(self.parent) |
221 | 245 |
222 return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % ( | 246 return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % ( |
223 id(self), len(self.tag_templates), len(self.other_templates), | 247 id(self), len(self.state.tag_templates), len(self.other_templates), |
224 self.min_index, self.max_index, | 248 self.min_index, self.max_index, |
225 parent) | 249 parent) |