1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Edgewall Software
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://trac.edgewall.com/license.html.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://projects.edgewall.com/trac/.
|
|
13
|
|
14 """Basic support for evaluating XPath expressions against streams."""
|
|
15
|
|
16 import re
|
|
17
|
|
18 from markup.core import QName, Stream
|
|
19
|
|
20 __all__ = ['Path']
|
|
21
|
|
22 _QUOTES = (("'", "'"), ('"', '"'))
|
|
23
|
|
24 class Path(object):
|
|
25 """Basic XPath support on markup event streams.
|
|
26
|
|
27 >>> from markup.input import XML
|
|
28
|
|
29 Selecting specific tags:
|
|
30
|
|
31 >>> Path('root').select(XML('<root/>')).render()
|
|
32 '<root/>'
|
|
33 >>> Path('//root').select(XML('<root/>')).render()
|
|
34 '<root/>'
|
|
35
|
|
36 Using wildcards for tag names:
|
|
37
|
|
38 >>> Path('*').select(XML('<root/>')).render()
|
|
39 '<root/>'
|
|
40 >>> Path('//*').select(XML('<root/>')).render()
|
|
41 '<root/>'
|
|
42
|
|
43 Selecting attribute values:
|
|
44
|
|
45 >>> Path('@foo').select(XML('<root/>')).render()
|
|
46 ''
|
|
47 >>> Path('@foo').select(XML('<root foo="bar"/>')).render()
|
|
48 'bar'
|
|
49
|
|
50 Selecting descendants:
|
|
51
|
|
52 >>> Path("root/*").select(XML('<root><foo/><bar/></root>')).render()
|
|
53 '<foo/><bar/>'
|
|
54 >>> Path("root/bar").select(XML('<root><foo/><bar/></root>')).render()
|
|
55 '<bar/>'
|
|
56 >>> Path("root/baz").select(XML('<root><foo/><bar/></root>')).render()
|
|
57 ''
|
24
|
58 >>> Path("root/foo/*").select(
|
|
59 ... XML('<root><foo><bar/></foo></root>')).render()
|
1
|
60 '<bar/>'
|
|
61
|
|
62 Selecting text nodes:
|
24
|
63 >>> Path("item/text()").select(
|
|
64 ... XML('<root><item>Foo</item></root>')).render()
|
1
|
65 'Foo'
|
24
|
66 >>> Path("item/text()").select(
|
|
67 ... XML('<root><item>Foo</item><item>Bar</item></root>')).render()
|
1
|
68 'FooBar'
|
|
69
|
|
70 Skipping ancestors:
|
|
71
|
24
|
72 >>> Path("foo/bar").select(
|
|
73 ... XML('<root><foo><bar/></foo></root>')).render()
|
1
|
74 '<bar/>'
|
24
|
75 >>> Path("foo/*").select(
|
|
76 ... XML('<root><foo><bar/></foo></root>')).render()
|
1
|
77 '<bar/>'
|
24
|
78 >>> Path("root/bar").select(
|
|
79 ... XML('<root><foo><bar/></foo></root>')).render()
|
1
|
80 ''
|
24
|
81 >>> Path("root/bar").select(
|
|
82 ... XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
1
|
83 '<bar id="2"/>'
|
24
|
84 >>> Path("root/*/bar").select(
|
|
85 ... XML('<root><foo><bar/></foo></root>')).render()
|
1
|
86 '<bar/>'
|
24
|
87 >>> Path("root//bar").select(
|
|
88 ... XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
1
|
89 '<bar id="1"/><bar id="2"/>'
|
24
|
90 >>> Path("root//bar").select(
|
|
91 ... XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
1
|
92 '<bar id="1"/><bar id="2"/>'
|
|
93
|
|
94 Using simple attribute predicates:
|
24
|
95 >>> Path("root/item[@important]").select(
|
|
96 ... XML('<root><item/><item important="very"/></root>')).render()
|
1
|
97 '<item important="very"/>'
|
24
|
98 >>> Path('root/item[@important="very"]').select(
|
|
99 ... XML('<root><item/><item important="very"/></root>')).render()
|
1
|
100 '<item important="very"/>'
|
24
|
101 >>> Path("root/item[@important='very']").select(
|
|
102 ... XML('<root><item/><item important="notso"/></root>')).render()
|
1
|
103 ''
|
|
104 >>> Path("root/item[@important!='very']").select(
|
|
105 ... XML('<root><item/><item important="notso"/></root>')).render()
|
|
106 '<item/><item important="notso"/>'
|
|
107 """
|
|
108
|
|
109 _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|'
|
|
110 '([^/:\[\]\(\)@=!\s]+)|'
|
|
111 '\s+')
|
|
112
|
|
113 def __init__(self, text):
|
|
114 self.source = text
|
|
115
|
|
116 steps = []
|
|
117 cur_op = ''
|
|
118 cur_tag = ''
|
|
119 in_predicate = False
|
|
120 for op, tag in self._TOKEN_RE.findall(text):
|
|
121 if op:
|
|
122 if op == '[':
|
|
123 in_predicate = True
|
|
124 elif op == ']':
|
|
125 in_predicate = False
|
|
126 elif op.startswith('('):
|
|
127 if cur_tag == 'text':
|
|
128 steps[-1] = (False, self.fn_text(), [])
|
|
129 else:
|
|
130 raise NotImplementedError('XPath function "%s" not '
|
|
131 'supported' % cur_tag)
|
|
132 else:
|
|
133 cur_op += op
|
|
134 cur_tag = ''
|
|
135 else:
|
|
136 closure = cur_op in ('', '//')
|
|
137 if cur_op == '@':
|
|
138 if tag == '*':
|
|
139 node_test = self.any_attribute()
|
|
140 else:
|
|
141 node_test = self.attribute_by_name(tag)
|
|
142 else:
|
|
143 if tag == '*':
|
|
144 node_test = self.any_element()
|
|
145 elif in_predicate:
|
|
146 if len(tag) > 1 and (tag[0], tag[-1]) in _QUOTES:
|
|
147 node_test = self.literal_string(tag[1:-1])
|
|
148 if cur_op == '=':
|
|
149 node_test = self.op_eq(steps[-1][2][-1], node_test)
|
|
150 steps[-1][2].pop()
|
|
151 elif cur_op == '!=':
|
|
152 node_test = self.op_neq(steps[-1][2][-1], node_test)
|
|
153 steps[-1][2].pop()
|
|
154 else:
|
|
155 node_test = self.element_by_name(tag)
|
|
156 if in_predicate:
|
|
157 steps[-1][2].append(node_test)
|
|
158 else:
|
|
159 steps.append([closure, node_test, []])
|
|
160 cur_op = ''
|
|
161 cur_tag = tag
|
|
162 self.steps = steps
|
|
163
|
|
164 def __repr__(self):
|
|
165 return '<%s "%s">' % (self.__class__.__name__, self.source)
|
|
166
|
|
167 def select(self, stream):
|
|
168 stream = iter(stream)
|
|
169 def _generate(tests):
|
|
170 test = self.test()
|
|
171 for kind, data, pos in stream:
|
|
172 result = test(kind, data, pos)
|
|
173 if result is True:
|
|
174 yield kind, data, pos
|
|
175 depth = 1
|
|
176 while depth > 0:
|
|
177 ev = stream.next()
|
|
178 if ev[0] is Stream.START:
|
|
179 depth += 1
|
|
180 elif ev[0] is Stream.END:
|
|
181 depth -= 1
|
|
182 yield ev
|
|
183 test(*ev)
|
|
184 elif result:
|
|
185 yield result
|
|
186 return Stream(_generate(self.steps))
|
|
187
|
|
188 def test(self):
|
|
189 stack = [0] # stack of cursors into the location path
|
|
190
|
|
191 def _test(kind, data, pos):
|
|
192 if not stack:
|
|
193 return False
|
|
194
|
|
195 if kind is Stream.END:
|
|
196 stack.pop()
|
|
197 return None
|
|
198
|
|
199 if kind is Stream.START:
|
|
200 stack.append(stack[-1])
|
|
201
|
|
202 matched = False
|
|
203 closure, node_test, predicates = self.steps[stack[-1]]
|
|
204
|
|
205 matched = node_test(kind, data, pos)
|
|
206 if matched and predicates:
|
|
207 for predicate in predicates:
|
|
208 if not predicate(kind, data, pos):
|
|
209 matched = None
|
|
210 break
|
|
211
|
|
212 if matched:
|
|
213 if stack[-1] == len(self.steps) - 1:
|
|
214 return matched
|
|
215
|
|
216 stack[-1] += 1
|
|
217
|
|
218 elif kind is Stream.START and not closure:
|
24
|
219 # If this step is not a closure, it cannot be matched until the
|
|
220 # current element is closed... so we need to move the cursor
|
|
221 # back to the last closure and retest that against the current
|
|
222 # element
|
1
|
223 closures = [step for step in self.steps[:stack[-1]] if step[0]]
|
24
|
224 closures.reverse()
|
1
|
225 for closure, node_test, predicates in closures:
|
|
226 stack[-1] -= 1
|
|
227 if closure:
|
|
228 matched = node_test(kind, data, pos)
|
|
229 if matched:
|
|
230 stack[-1] += 1
|
|
231 break
|
|
232
|
|
233 return None
|
|
234
|
|
235 return _test
|
|
236
|
|
237 class any_element(object):
|
|
238 def __call__(self, kind, data, pos):
|
|
239 if kind is Stream.START:
|
|
240 return True
|
|
241 return None
|
|
242 def __repr__(self):
|
|
243 return '<%s>' % self.__class__.__name__
|
|
244
|
|
245 class element_by_name(object):
|
|
246 def __init__(self, name):
|
|
247 self.name = QName(name)
|
|
248 def __call__(self, kind, data, pos):
|
|
249 if kind is Stream.START:
|
|
250 return data[0].localname == self.name
|
|
251 return None
|
|
252 def __repr__(self):
|
|
253 return '<%s "%s">' % (self.__class__.__name__, self.name)
|
|
254
|
|
255 class any_attribute(object):
|
|
256 def __call__(self, kind, data, pos):
|
|
257 if kind is Stream.START:
|
|
258 text = ''.join([val for name, val in data[1]])
|
|
259 if text:
|
|
260 return Stream.TEXT, text, pos
|
|
261 return None
|
|
262 return None
|
|
263 def __repr__(self):
|
|
264 return '<%s>' % (self.__class__.__name__)
|
|
265
|
|
266 class attribute_by_name(object):
|
|
267 def __init__(self, name):
|
|
268 self.name = QName(name)
|
|
269 def __call__(self, kind, data, pos):
|
|
270 if kind is Stream.START:
|
|
271 if self.name in data[1]:
|
|
272 return Stream.TEXT, data[1].get(self.name), pos
|
|
273 return None
|
|
274 return None
|
|
275 def __repr__(self):
|
|
276 return '<%s "%s">' % (self.__class__.__name__, self.name)
|
|
277
|
|
278 class fn_text(object):
|
|
279 def __call__(self, kind, data, pos):
|
|
280 if kind is Stream.TEXT:
|
|
281 return kind, data, pos
|
|
282 return None
|
|
283 def __repr__(self):
|
|
284 return '<%s>' % (self.__class__.__name__)
|
|
285
|
|
286 class literal_string(object):
|
|
287 def __init__(self, value):
|
|
288 self.value = value
|
|
289 def __call__(self, kind, data, pos):
|
|
290 return Stream.TEXT, self.value, (-1, -1)
|
|
291 def __repr__(self):
|
|
292 return '<%s>' % (self.__class__.__name__)
|
|
293
|
|
294 class op_eq(object):
|
|
295 def __init__(self, lval, rval):
|
|
296 self.lval = lval
|
|
297 self.rval = rval
|
|
298 def __call__(self, kind, data, pos):
|
|
299 lval = self.lval(kind, data, pos)
|
|
300 rval = self.rval(kind, data, pos)
|
|
301 return (lval and lval[1]) == (rval and rval[1])
|
|
302 def __repr__(self):
|
|
303 return '<%s %r = %r>' % (self.__class__.__name__, self.lval,
|
|
304 self.rval)
|
|
305
|
|
306 class op_neq(object):
|
|
307 def __init__(self, lval, rval):
|
|
308 self.lval = lval
|
|
309 self.rval = rval
|
|
310 def __call__(self, kind, data, pos):
|
|
311 lval = self.lval(kind, data, pos)
|
|
312 rval = self.rval(kind, data, pos)
|
|
313 return (lval and lval[1]) != (rval and rval[1])
|
|
314 def __repr__(self):
|
|
315 return '<%s %r != %r>' % (self.__class__.__name__, self.lval,
|
|
316 self.rval)
|