1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Edgewall Software
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://trac.edgewall.com/license.html.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://projects.edgewall.com/trac/.
|
|
13
|
|
14 """Basic support for evaluating XPath expressions against streams."""
|
|
15
|
|
16 import re
|
|
17
|
|
18 from markup.core import QName, Stream
|
|
19
|
|
20 __all__ = ['Path']
|
|
21
|
|
22 _QUOTES = (("'", "'"), ('"', '"'))
|
|
23
|
|
24 class Path(object):
|
|
25 """Basic XPath support on markup event streams.
|
|
26
|
|
27 >>> from markup.input import XML
|
|
28
|
|
29 Selecting specific tags:
|
|
30
|
|
31 >>> Path('root').select(XML('<root/>')).render()
|
|
32 '<root/>'
|
|
33 >>> Path('//root').select(XML('<root/>')).render()
|
|
34 '<root/>'
|
|
35
|
|
36 Using wildcards for tag names:
|
|
37
|
|
38 >>> Path('*').select(XML('<root/>')).render()
|
|
39 '<root/>'
|
|
40 >>> Path('//*').select(XML('<root/>')).render()
|
|
41 '<root/>'
|
|
42
|
|
43 Selecting attribute values:
|
|
44
|
|
45 >>> Path('@foo').select(XML('<root/>')).render()
|
|
46 ''
|
|
47 >>> Path('@foo').select(XML('<root foo="bar"/>')).render()
|
|
48 'bar'
|
|
49
|
|
50 Selecting descendants:
|
|
51
|
|
52 >>> Path("root/*").select(XML('<root><foo/><bar/></root>')).render()
|
|
53 '<foo/><bar/>'
|
|
54 >>> Path("root/bar").select(XML('<root><foo/><bar/></root>')).render()
|
|
55 '<bar/>'
|
|
56 >>> Path("root/baz").select(XML('<root><foo/><bar/></root>')).render()
|
|
57 ''
|
|
58 >>> Path("root/foo/*").select(XML('<root><foo><bar/></foo></root>')).render()
|
|
59 '<bar/>'
|
|
60
|
|
61 Selecting text nodes:
|
|
62 >>> Path("item/text()").select(XML('<root><item>Foo</item></root>')).render()
|
|
63 'Foo'
|
|
64 >>> Path("item/text()").select(XML('<root><item>Foo</item><item>Bar</item></root>')).render()
|
|
65 'FooBar'
|
|
66
|
|
67 Skipping ancestors:
|
|
68
|
|
69 >>> Path("foo/bar").select(XML('<root><foo><bar/></foo></root>')).render()
|
|
70 '<bar/>'
|
|
71 >>> Path("foo/*").select(XML('<root><foo><bar/></foo></root>')).render()
|
|
72 '<bar/>'
|
|
73 >>> Path("root/bar").select(XML('<root><foo><bar/></foo></root>')).render()
|
|
74 ''
|
|
75 >>> Path("root/bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
|
76 '<bar id="2"/>'
|
|
77 >>> Path("root/*/bar").select(XML('<root><foo><bar/></foo></root>')).render()
|
|
78 '<bar/>'
|
|
79 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
|
80 '<bar id="1"/><bar id="2"/>'
|
|
81 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
|
|
82 '<bar id="1"/><bar id="2"/>'
|
|
83
|
|
84 Using simple attribute predicates:
|
|
85 >>> Path("root/item[@important]").select(XML('<root><item/><item important="very"/></root>')).render()
|
|
86 '<item important="very"/>'
|
|
87 >>> Path('root/item[@important="very"]').select(XML('<root><item/><item important="very"/></root>')).render()
|
|
88 '<item important="very"/>'
|
|
89 >>> Path("root/item[@important='very']").select(XML('<root><item/><item important="notso"/></root>')).render()
|
|
90 ''
|
|
91 >>> Path("root/item[@important!='very']").select(
|
|
92 ... XML('<root><item/><item important="notso"/></root>')).render()
|
|
93 '<item/><item important="notso"/>'
|
|
94 """
|
|
95
|
|
96 _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|'
|
|
97 '([^/:\[\]\(\)@=!\s]+)|'
|
|
98 '\s+')
|
|
99
|
|
100 def __init__(self, text):
|
|
101 self.source = text
|
|
102
|
|
103 steps = []
|
|
104 cur_op = ''
|
|
105 cur_tag = ''
|
|
106 in_predicate = False
|
|
107 for op, tag in self._TOKEN_RE.findall(text):
|
|
108 if op:
|
|
109 if op == '[':
|
|
110 in_predicate = True
|
|
111 elif op == ']':
|
|
112 in_predicate = False
|
|
113 elif op.startswith('('):
|
|
114 if cur_tag == 'text':
|
|
115 steps[-1] = (False, self.fn_text(), [])
|
|
116 else:
|
|
117 raise NotImplementedError('XPath function "%s" not '
|
|
118 'supported' % cur_tag)
|
|
119 else:
|
|
120 cur_op += op
|
|
121 cur_tag = ''
|
|
122 else:
|
|
123 closure = cur_op in ('', '//')
|
|
124 if cur_op == '@':
|
|
125 if tag == '*':
|
|
126 node_test = self.any_attribute()
|
|
127 else:
|
|
128 node_test = self.attribute_by_name(tag)
|
|
129 else:
|
|
130 if tag == '*':
|
|
131 node_test = self.any_element()
|
|
132 elif in_predicate:
|
|
133 if len(tag) > 1 and (tag[0], tag[-1]) in _QUOTES:
|
|
134 node_test = self.literal_string(tag[1:-1])
|
|
135 if cur_op == '=':
|
|
136 node_test = self.op_eq(steps[-1][2][-1], node_test)
|
|
137 steps[-1][2].pop()
|
|
138 elif cur_op == '!=':
|
|
139 node_test = self.op_neq(steps[-1][2][-1], node_test)
|
|
140 steps[-1][2].pop()
|
|
141 else:
|
|
142 node_test = self.element_by_name(tag)
|
|
143 if in_predicate:
|
|
144 steps[-1][2].append(node_test)
|
|
145 else:
|
|
146 steps.append([closure, node_test, []])
|
|
147 cur_op = ''
|
|
148 cur_tag = tag
|
|
149 self.steps = steps
|
|
150
|
|
151 def __repr__(self):
|
|
152 return '<%s "%s">' % (self.__class__.__name__, self.source)
|
|
153
|
|
154 def select(self, stream):
|
|
155 stream = iter(stream)
|
|
156 def _generate(tests):
|
|
157 test = self.test()
|
|
158 for kind, data, pos in stream:
|
|
159 result = test(kind, data, pos)
|
|
160 if result is True:
|
|
161 yield kind, data, pos
|
|
162 depth = 1
|
|
163 while depth > 0:
|
|
164 ev = stream.next()
|
|
165 if ev[0] is Stream.START:
|
|
166 depth += 1
|
|
167 elif ev[0] is Stream.END:
|
|
168 depth -= 1
|
|
169 yield ev
|
|
170 test(*ev)
|
|
171 elif result:
|
|
172 yield result
|
|
173 return Stream(_generate(self.steps))
|
|
174
|
|
175 def test(self):
|
|
176 stack = [0] # stack of cursors into the location path
|
|
177
|
|
178 def _test(kind, data, pos):
|
|
179 #print '\nTracker %r test [%s] %r' % (self, kind, data)
|
|
180
|
|
181 if not stack:
|
|
182 return False
|
|
183
|
|
184 if kind is Stream.END:
|
|
185 stack.pop()
|
|
186 return None
|
|
187
|
|
188 if kind is Stream.START:
|
|
189 stack.append(stack[-1])
|
|
190
|
|
191 matched = False
|
|
192 closure, node_test, predicates = self.steps[stack[-1]]
|
|
193
|
|
194 #print ' Testing against %r' % node_test
|
|
195 matched = node_test(kind, data, pos)
|
|
196 if matched and predicates:
|
|
197 for predicate in predicates:
|
|
198 if not predicate(kind, data, pos):
|
|
199 matched = None
|
|
200 break
|
|
201
|
|
202 if matched:
|
|
203 if stack[-1] == len(self.steps) - 1:
|
|
204 #print ' Last step %r... returned %r' % (node_test, matched)
|
|
205 return matched
|
|
206
|
|
207 #print ' Matched intermediate step %r... proceed to next step %r' % (node_test, self.steps[stack[-1] + 1])
|
|
208 stack[-1] += 1
|
|
209
|
|
210 elif kind is Stream.START and not closure:
|
|
211 # FIXME: If this step is not a closure, it cannot be matched
|
|
212 # until the current element is closed... so we need to
|
|
213 # move the cursor back to the last closure and retest
|
|
214 # that against the current element
|
|
215 closures = [step for step in self.steps[:stack[-1]] if step[0]]
|
|
216 closures.reverse()
|
|
217 for closure, node_test, predicates in closures:
|
|
218 stack[-1] -= 1
|
|
219 if closure:
|
|
220 matched = node_test(kind, data, pos)
|
|
221 if matched:
|
|
222 stack[-1] += 1
|
|
223 break
|
|
224
|
|
225 return None
|
|
226
|
|
227 return _test
|
|
228
|
|
229 class any_element(object):
|
|
230 def __call__(self, kind, data, pos):
|
|
231 if kind is Stream.START:
|
|
232 return True
|
|
233 return None
|
|
234 def __repr__(self):
|
|
235 return '<%s>' % self.__class__.__name__
|
|
236
|
|
237 class element_by_name(object):
|
|
238 def __init__(self, name):
|
|
239 self.name = QName(name)
|
|
240 def __call__(self, kind, data, pos):
|
|
241 if kind is Stream.START:
|
|
242 return data[0].localname == self.name
|
|
243 return None
|
|
244 def __repr__(self):
|
|
245 return '<%s "%s">' % (self.__class__.__name__, self.name)
|
|
246
|
|
247 class any_attribute(object):
|
|
248 def __call__(self, kind, data, pos):
|
|
249 if kind is Stream.START:
|
|
250 text = ''.join([val for name, val in data[1]])
|
|
251 if text:
|
|
252 return Stream.TEXT, text, pos
|
|
253 return None
|
|
254 return None
|
|
255 def __repr__(self):
|
|
256 return '<%s>' % (self.__class__.__name__)
|
|
257
|
|
258 class attribute_by_name(object):
|
|
259 def __init__(self, name):
|
|
260 self.name = QName(name)
|
|
261 def __call__(self, kind, data, pos):
|
|
262 if kind is Stream.START:
|
|
263 if self.name in data[1]:
|
|
264 return Stream.TEXT, data[1].get(self.name), pos
|
|
265 return None
|
|
266 return None
|
|
267 def __repr__(self):
|
|
268 return '<%s "%s">' % (self.__class__.__name__, self.name)
|
|
269
|
|
270 class fn_text(object):
|
|
271 def __call__(self, kind, data, pos):
|
|
272 if kind is Stream.TEXT:
|
|
273 return kind, data, pos
|
|
274 return None
|
|
275 def __repr__(self):
|
|
276 return '<%s>' % (self.__class__.__name__)
|
|
277
|
|
278 class literal_string(object):
|
|
279 def __init__(self, value):
|
|
280 self.value = value
|
|
281 def __call__(self, kind, data, pos):
|
|
282 return Stream.TEXT, self.value, (-1, -1)
|
|
283 def __repr__(self):
|
|
284 return '<%s>' % (self.__class__.__name__)
|
|
285
|
|
286 class op_eq(object):
|
|
287 def __init__(self, lval, rval):
|
|
288 self.lval = lval
|
|
289 self.rval = rval
|
|
290 def __call__(self, kind, data, pos):
|
|
291 lval = self.lval(kind, data, pos)
|
|
292 rval = self.rval(kind, data, pos)
|
|
293 return (lval and lval[1]) == (rval and rval[1])
|
|
294 def __repr__(self):
|
|
295 return '<%s %r = %r>' % (self.__class__.__name__, self.lval,
|
|
296 self.rval)
|
|
297
|
|
298 class op_neq(object):
|
|
299 def __init__(self, lval, rval):
|
|
300 self.lval = lval
|
|
301 self.rval = rval
|
|
302 def __call__(self, kind, data, pos):
|
|
303 lval = self.lval(kind, data, pos)
|
|
304 rval = self.rval(kind, data, pos)
|
|
305 return (lval and lval[1]) != (rval and rval[1])
|
|
306 def __repr__(self):
|
|
307 return '<%s %r != %r>' % (self.__class__.__name__, self.lval,
|
|
308 self.rval)
|