Mercurial > genshi > genshi-test
comparison markup/path.py @ 1:821114ec4f69
Initial import.
author | cmlenz |
---|---|
date | Sat, 03 Jun 2006 07:16:01 +0000 |
parents | |
children | 00835401c8cc |
comparison
equal
deleted
inserted
replaced
0:20f3417d4171 | 1:821114ec4f69 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2006 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://trac.edgewall.com/license.html. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://projects.edgewall.com/trac/. | |
13 | |
14 """Basic support for evaluating XPath expressions against streams.""" | |
15 | |
16 import re | |
17 | |
18 from markup.core import QName, Stream | |
19 | |
20 __all__ = ['Path'] | |
21 | |
22 _QUOTES = (("'", "'"), ('"', '"')) | |
23 | |
24 class Path(object): | |
25 """Basic XPath support on markup event streams. | |
26 | |
27 >>> from markup.input import XML | |
28 | |
29 Selecting specific tags: | |
30 | |
31 >>> Path('root').select(XML('<root/>')).render() | |
32 '<root/>' | |
33 >>> Path('//root').select(XML('<root/>')).render() | |
34 '<root/>' | |
35 | |
36 Using wildcards for tag names: | |
37 | |
38 >>> Path('*').select(XML('<root/>')).render() | |
39 '<root/>' | |
40 >>> Path('//*').select(XML('<root/>')).render() | |
41 '<root/>' | |
42 | |
43 Selecting attribute values: | |
44 | |
45 >>> Path('@foo').select(XML('<root/>')).render() | |
46 '' | |
47 >>> Path('@foo').select(XML('<root foo="bar"/>')).render() | |
48 'bar' | |
49 | |
50 Selecting descendants: | |
51 | |
52 >>> Path("root/*").select(XML('<root><foo/><bar/></root>')).render() | |
53 '<foo/><bar/>' | |
54 >>> Path("root/bar").select(XML('<root><foo/><bar/></root>')).render() | |
55 '<bar/>' | |
56 >>> Path("root/baz").select(XML('<root><foo/><bar/></root>')).render() | |
57 '' | |
58 >>> Path("root/foo/*").select(XML('<root><foo><bar/></foo></root>')).render() | |
59 '<bar/>' | |
60 | |
61 Selecting text nodes: | |
62 >>> Path("item/text()").select(XML('<root><item>Foo</item></root>')).render() | |
63 'Foo' | |
64 >>> Path("item/text()").select(XML('<root><item>Foo</item><item>Bar</item></root>')).render() | |
65 'FooBar' | |
66 | |
67 Skipping ancestors: | |
68 | |
69 >>> Path("foo/bar").select(XML('<root><foo><bar/></foo></root>')).render() | |
70 '<bar/>' | |
71 >>> Path("foo/*").select(XML('<root><foo><bar/></foo></root>')).render() | |
72 '<bar/>' | |
73 >>> Path("root/bar").select(XML('<root><foo><bar/></foo></root>')).render() | |
74 '' | |
75 >>> Path("root/bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render() | |
76 '<bar id="2"/>' | |
77 >>> Path("root/*/bar").select(XML('<root><foo><bar/></foo></root>')).render() | |
78 '<bar/>' | |
79 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render() | |
80 '<bar id="1"/><bar id="2"/>' | |
81 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render() | |
82 '<bar id="1"/><bar id="2"/>' | |
83 | |
84 Using simple attribute predicates: | |
85 >>> Path("root/item[@important]").select(XML('<root><item/><item important="very"/></root>')).render() | |
86 '<item important="very"/>' | |
87 >>> Path('root/item[@important="very"]').select(XML('<root><item/><item important="very"/></root>')).render() | |
88 '<item important="very"/>' | |
89 >>> Path("root/item[@important='very']").select(XML('<root><item/><item important="notso"/></root>')).render() | |
90 '' | |
91 >>> Path("root/item[@important!='very']").select( | |
92 ... XML('<root><item/><item important="notso"/></root>')).render() | |
93 '<item/><item important="notso"/>' | |
94 """ | |
95 | |
96 _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|' | |
97 '([^/:\[\]\(\)@=!\s]+)|' | |
98 '\s+') | |
99 | |
100 def __init__(self, text): | |
101 self.source = text | |
102 | |
103 steps = [] | |
104 cur_op = '' | |
105 cur_tag = '' | |
106 in_predicate = False | |
107 for op, tag in self._TOKEN_RE.findall(text): | |
108 if op: | |
109 if op == '[': | |
110 in_predicate = True | |
111 elif op == ']': | |
112 in_predicate = False | |
113 elif op.startswith('('): | |
114 if cur_tag == 'text': | |
115 steps[-1] = (False, self.fn_text(), []) | |
116 else: | |
117 raise NotImplementedError('XPath function "%s" not ' | |
118 'supported' % cur_tag) | |
119 else: | |
120 cur_op += op | |
121 cur_tag = '' | |
122 else: | |
123 closure = cur_op in ('', '//') | |
124 if cur_op == '@': | |
125 if tag == '*': | |
126 node_test = self.any_attribute() | |
127 else: | |
128 node_test = self.attribute_by_name(tag) | |
129 else: | |
130 if tag == '*': | |
131 node_test = self.any_element() | |
132 elif in_predicate: | |
133 if len(tag) > 1 and (tag[0], tag[-1]) in _QUOTES: | |
134 node_test = self.literal_string(tag[1:-1]) | |
135 if cur_op == '=': | |
136 node_test = self.op_eq(steps[-1][2][-1], node_test) | |
137 steps[-1][2].pop() | |
138 elif cur_op == '!=': | |
139 node_test = self.op_neq(steps[-1][2][-1], node_test) | |
140 steps[-1][2].pop() | |
141 else: | |
142 node_test = self.element_by_name(tag) | |
143 if in_predicate: | |
144 steps[-1][2].append(node_test) | |
145 else: | |
146 steps.append([closure, node_test, []]) | |
147 cur_op = '' | |
148 cur_tag = tag | |
149 self.steps = steps | |
150 | |
151 def __repr__(self): | |
152 return '<%s "%s">' % (self.__class__.__name__, self.source) | |
153 | |
154 def select(self, stream): | |
155 stream = iter(stream) | |
156 def _generate(tests): | |
157 test = self.test() | |
158 for kind, data, pos in stream: | |
159 result = test(kind, data, pos) | |
160 if result is True: | |
161 yield kind, data, pos | |
162 depth = 1 | |
163 while depth > 0: | |
164 ev = stream.next() | |
165 if ev[0] is Stream.START: | |
166 depth += 1 | |
167 elif ev[0] is Stream.END: | |
168 depth -= 1 | |
169 yield ev | |
170 test(*ev) | |
171 elif result: | |
172 yield result | |
173 return Stream(_generate(self.steps)) | |
174 | |
175 def test(self): | |
176 stack = [0] # stack of cursors into the location path | |
177 | |
178 def _test(kind, data, pos): | |
179 #print '\nTracker %r test [%s] %r' % (self, kind, data) | |
180 | |
181 if not stack: | |
182 return False | |
183 | |
184 if kind is Stream.END: | |
185 stack.pop() | |
186 return None | |
187 | |
188 if kind is Stream.START: | |
189 stack.append(stack[-1]) | |
190 | |
191 matched = False | |
192 closure, node_test, predicates = self.steps[stack[-1]] | |
193 | |
194 #print ' Testing against %r' % node_test | |
195 matched = node_test(kind, data, pos) | |
196 if matched and predicates: | |
197 for predicate in predicates: | |
198 if not predicate(kind, data, pos): | |
199 matched = None | |
200 break | |
201 | |
202 if matched: | |
203 if stack[-1] == len(self.steps) - 1: | |
204 #print ' Last step %r... returned %r' % (node_test, matched) | |
205 return matched | |
206 | |
207 #print ' Matched intermediate step %r... proceed to next step %r' % (node_test, self.steps[stack[-1] + 1]) | |
208 stack[-1] += 1 | |
209 | |
210 elif kind is Stream.START and not closure: | |
211 # FIXME: If this step is not a closure, it cannot be matched | |
212 # until the current element is closed... so we need to | |
213 # move the cursor back to the last closure and retest | |
214 # that against the current element | |
215 closures = [step for step in self.steps[:stack[-1]] if step[0]] | |
216 closures.reverse() | |
217 for closure, node_test, predicates in closures: | |
218 stack[-1] -= 1 | |
219 if closure: | |
220 matched = node_test(kind, data, pos) | |
221 if matched: | |
222 stack[-1] += 1 | |
223 break | |
224 | |
225 return None | |
226 | |
227 return _test | |
228 | |
229 class any_element(object): | |
230 def __call__(self, kind, data, pos): | |
231 if kind is Stream.START: | |
232 return True | |
233 return None | |
234 def __repr__(self): | |
235 return '<%s>' % self.__class__.__name__ | |
236 | |
237 class element_by_name(object): | |
238 def __init__(self, name): | |
239 self.name = QName(name) | |
240 def __call__(self, kind, data, pos): | |
241 if kind is Stream.START: | |
242 return data[0].localname == self.name | |
243 return None | |
244 def __repr__(self): | |
245 return '<%s "%s">' % (self.__class__.__name__, self.name) | |
246 | |
247 class any_attribute(object): | |
248 def __call__(self, kind, data, pos): | |
249 if kind is Stream.START: | |
250 text = ''.join([val for name, val in data[1]]) | |
251 if text: | |
252 return Stream.TEXT, text, pos | |
253 return None | |
254 return None | |
255 def __repr__(self): | |
256 return '<%s>' % (self.__class__.__name__) | |
257 | |
258 class attribute_by_name(object): | |
259 def __init__(self, name): | |
260 self.name = QName(name) | |
261 def __call__(self, kind, data, pos): | |
262 if kind is Stream.START: | |
263 if self.name in data[1]: | |
264 return Stream.TEXT, data[1].get(self.name), pos | |
265 return None | |
266 return None | |
267 def __repr__(self): | |
268 return '<%s "%s">' % (self.__class__.__name__, self.name) | |
269 | |
270 class fn_text(object): | |
271 def __call__(self, kind, data, pos): | |
272 if kind is Stream.TEXT: | |
273 return kind, data, pos | |
274 return None | |
275 def __repr__(self): | |
276 return '<%s>' % (self.__class__.__name__) | |
277 | |
278 class literal_string(object): | |
279 def __init__(self, value): | |
280 self.value = value | |
281 def __call__(self, kind, data, pos): | |
282 return Stream.TEXT, self.value, (-1, -1) | |
283 def __repr__(self): | |
284 return '<%s>' % (self.__class__.__name__) | |
285 | |
286 class op_eq(object): | |
287 def __init__(self, lval, rval): | |
288 self.lval = lval | |
289 self.rval = rval | |
290 def __call__(self, kind, data, pos): | |
291 lval = self.lval(kind, data, pos) | |
292 rval = self.rval(kind, data, pos) | |
293 return (lval and lval[1]) == (rval and rval[1]) | |
294 def __repr__(self): | |
295 return '<%s %r = %r>' % (self.__class__.__name__, self.lval, | |
296 self.rval) | |
297 | |
298 class op_neq(object): | |
299 def __init__(self, lval, rval): | |
300 self.lval = lval | |
301 self.rval = rval | |
302 def __call__(self, kind, data, pos): | |
303 lval = self.lval(kind, data, pos) | |
304 rval = self.rval(kind, data, pos) | |
305 return (lval and lval[1]) != (rval and rval[1]) | |
306 def __repr__(self): | |
307 return '<%s %r != %r>' % (self.__class__.__name__, self.lval, | |
308 self.rval) |