Mercurial > genshi > mirror
annotate genshi/template/match.py @ 703:af57b12e3dd2 experimental-match-fastpaths
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
author | aflett |
---|---|
date | Mon, 31 Mar 2008 22:47:50 +0000 |
parents | 52a597419c0d |
children | 422d0607ba85 |
rev | line source |
---|---|
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
1 from genshi.core import START |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
2 from genshi.path import CHILD, LocalNameTest |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
3 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
4 from copy import copy |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
5 from itertools import ifilter |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
6 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
7 def is_simple_path(path): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
8 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
9 Is the path merely a tag match like "foo"? |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
10 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
11 if len(path.paths) == 1 and len(path.paths[0]) == 1: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
12 axis, nodetest, predicates = path.paths[0][0] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
13 if (axis is CHILD and |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
14 not predicates and |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
15 isinstance(nodetest, LocalNameTest)): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
16 return True |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
17 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
18 return False |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
19 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
20 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
21 class MatchSet(object): |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
22 """ A MatchSet is a set of matches discovered by the parser. This |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
23 class encapsulates the matching of a particular event to a set of |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
24 matches. It is optimized for basic tag matches, since that is by |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
25 far the most common use of py:match. |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
26 |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
27 The two primary entry points into MatchSet are ``add``, which adds |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
28 a new py:match, and ``find_matches``, which returns all |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
29 /candidate/ match templates. The consumer of ``find_matches`` |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
30 still must call each candidates' match() to ensure the event |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
31 really matches, and to maintain state within the match. |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
32 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
33 If a given py:match's path is simply a node name match, |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
34 (LocalNameTest) like "xyz", then MatchSet indexes that in a |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
35 dictionary that maps tag names to matches. |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
36 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
37 If the path is more complex like "xyz[k=z]" then then that match |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
38 will always be returned by ``find_matches``. """ |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
39 def __init__(self, parent=None, |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
40 min_index=None, |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
41 max_index=None): |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
42 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
43 If a parent is given, it means this is a wrapper around another |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
44 set. |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
45 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
46 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
47 self.parent = parent |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
48 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
49 if parent is None: |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
50 # merely for indexing. Note that this is shared between |
701
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
51 # all MatchSets that share the same root parent. We don't |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
52 # have to worry about exclusions here |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
53 self.match_order = {} |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
54 |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
55 self.min_index = None |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
56 self.max_index = None |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
57 |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
58 # tag_templates are match templates whose path are simply |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
59 # a tag, like "body" or "img" |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
60 self.tag_templates = {} |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
61 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
62 # other_templates include all other match templates, such |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
63 # as ones with complex paths like "[class=container]" |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
64 self.other_templates = [] |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
65 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
66 else: |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
67 # We have a parent: Just copy references to member |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
68 # variables in parent so that there's no performance loss |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
69 self.max_index = parent.max_index |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
70 self.min_index = parent.min_index |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
71 self.match_order = parent.match_order |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
72 self.tag_templates = parent.tag_templates |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
73 self.other_templates = parent.other_templates |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
74 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
75 if max_index is not None: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
76 assert self.max_index is None or max_index <= self.max_index |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
77 self.max_index = max_index |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
78 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
79 if min_index is not None: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
80 assert self.min_index is None or min_index > self.min_index |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
81 self.min_index = min_index |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
82 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
83 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
84 def add(self, match_template): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
85 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
86 match_template is a tuple the form |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
87 test, path, template, hints, namespace, directives |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
88 """ |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
89 |
701
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
90 # match_templates are currently tuples that contain unhashable |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
91 # objects. So we'll use id() for now. |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
92 self.match_order[id(match_template)] = len(self.match_order) |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
93 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
94 path = match_template[1] |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
95 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
96 if is_simple_path(path): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
97 # special cache of tag |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
98 tag_name = path.paths[0][0][1].name |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
99 # setdefault is wasteful |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
100 if tag_name not in self.tag_templates: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
101 self.tag_templates[tag_name] = [match_template] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
102 else: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
103 self.tag_templates[tag_name].append(match_template) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
104 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
105 else: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
106 self.other_templates.append(match_template) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
107 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
108 def remove(self, match_template): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
109 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
110 Permanently remove a match_template - mainly for match_once |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
111 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
112 path = match_template[1] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
113 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
114 if is_simple_path(path): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
115 tag_name = path.paths[0][0][1].name |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
116 if tag_name in self.tag_templates: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
117 template_list = self.tag_templates[tag_name] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
118 template_list.remove(match_template) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
119 if not template_list: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
120 del self.tag_templates[tag_name] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
121 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
122 else: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
123 self.other_templates.remove(match_template) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
124 |
701
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
125 # clean up match_order |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
126 del self.match_order[id(match_template)] |
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
127 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
128 def single_match(cls, match_template): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
129 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
130 Factory for creating a MatchSet with just one match |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
131 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
132 match_set = cls() |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
133 match_set.add(match_template) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
134 return match_set |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
135 single_match = classmethod(single_match) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
136 |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
137 def before_template(self, match_template, inclusive): |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
138 cls = type(self) |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
139 max_index = self.match_order[id(match_template)] |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
140 if not inclusive: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
141 max_index -= 1 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
142 return cls(parent=self, max_index=max_index) |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
143 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
144 def after_template(self, match_template): |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
145 """ |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
146 Factory for creating a MatchSet that only matches templates after |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
147 the given match |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
148 """ |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
149 cls = type(self) |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
150 min_index = self.match_order[id(match_template)] + 1 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
151 return cls(parent=self, min_index=min_index) |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
152 |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
153 def find_raw_matches(self, event): |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
154 """ Return a list of all valid templates that can be used for the |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
155 given event. Ordering is funky because we first check |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
156 self.tag_templates, then check self.other_templates. |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
157 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
158 kind, data, pos = event[:3] |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
159 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
160 # todo: get the order right |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
161 if kind is START: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
162 tag, attrs = data |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
163 if tag.localname in self.tag_templates: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
164 for template in self.tag_templates[tag.localname]: |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
165 yield template |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
166 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
167 for template in self.other_templates: |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
168 yield template |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
169 |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
170 def find_matches(self, event): |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
171 """ Return a list of all valid templates that can be used for the |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
172 given event. |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
173 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
174 The basic work here is sorting the result of find_raw_matches |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
175 """ |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
176 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
177 # remove exclusions |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
178 def can_match(template): |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
179 # make sure that |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
180 if (self.min_index is not None and |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
181 self.match_order[id(template)] < self.min_index): |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
182 return False |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
183 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
184 if (self.max_index is not None and |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
185 self.match_order[id(template)] > self.max_index): |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
186 return False |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
187 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
188 return True |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
189 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
190 matches = ifilter(can_match, |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
191 self.find_raw_matches(event)) |
690
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
192 |
1240ada13334
more code/comment clean up - make sure to retain match order
aflett
parents:
687
diff
changeset
|
193 # sort the results according to the order they were added |
701
52a597419c0d
minor speed cleanups to match_order - use a dict rather than a list, because d[id(k)] is faster than l.index(k)
aflett
parents:
690
diff
changeset
|
194 return sorted(matches, key=lambda v: self.match_order[id(v)]) |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
195 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
196 def __nonzero__(self): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
197 """ |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
198 allow this to behave as a list |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
199 """ |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
200 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
201 # this is easy - before the first element there is nothing |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
202 if self.max_index == -1: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
203 return False |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
204 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
205 # this isn't always right because match_order may shrink, but |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
206 # you'll never get a false-negative |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
207 if self.min_index == len(self.match_order): |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
208 return False |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
209 |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
210 # check for a range that is completely constrained |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
211 if self.min_index is not None and self.max_index is not None: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
212 if self.min_index >= self.max_index: |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
213 return False |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
214 |
687
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
215 return bool(self.tag_templates or self.other_templates) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
216 |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
217 def __str__(self): |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
218 parent = "" |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
219 if self.parent: |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
220 parent = ": child of 0x%x" % id(self.parent) |
3d7288f373bd
land first cut at fast-path matching - needs some cleanup
aflett
parents:
diff
changeset
|
221 |
703
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
222 return "<MatchSet 0x%x %d tag templates, %d other templates, range=[%s:%s]%s>" % ( |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
223 id(self), len(self.tag_templates), len(self.other_templates), |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
224 self.min_index, self.max_index, |
af57b12e3dd2
merge in trunk up through r818 - fundamentally changed the way MatchSet works, but actually is more consistent now
aflett
parents:
701
diff
changeset
|
225 parent) |