comparison markup/path.py @ 1:821114ec4f69

Initial import.
author cmlenz
date Sat, 03 Jun 2006 07:16:01 +0000
parents
children 00835401c8cc
comparison
equal deleted inserted replaced
0:20f3417d4171 1:821114ec4f69
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://trac.edgewall.com/license.html.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://projects.edgewall.com/trac/.
13
14 """Basic support for evaluating XPath expressions against streams."""
15
16 import re
17
18 from markup.core import QName, Stream
19
20 __all__ = ['Path']
21
22 _QUOTES = (("'", "'"), ('"', '"'))
23
24 class Path(object):
25 """Basic XPath support on markup event streams.
26
27 >>> from markup.input import XML
28
29 Selecting specific tags:
30
31 >>> Path('root').select(XML('<root/>')).render()
32 '<root/>'
33 >>> Path('//root').select(XML('<root/>')).render()
34 '<root/>'
35
36 Using wildcards for tag names:
37
38 >>> Path('*').select(XML('<root/>')).render()
39 '<root/>'
40 >>> Path('//*').select(XML('<root/>')).render()
41 '<root/>'
42
43 Selecting attribute values:
44
45 >>> Path('@foo').select(XML('<root/>')).render()
46 ''
47 >>> Path('@foo').select(XML('<root foo="bar"/>')).render()
48 'bar'
49
50 Selecting descendants:
51
52 >>> Path("root/*").select(XML('<root><foo/><bar/></root>')).render()
53 '<foo/><bar/>'
54 >>> Path("root/bar").select(XML('<root><foo/><bar/></root>')).render()
55 '<bar/>'
56 >>> Path("root/baz").select(XML('<root><foo/><bar/></root>')).render()
57 ''
58 >>> Path("root/foo/*").select(XML('<root><foo><bar/></foo></root>')).render()
59 '<bar/>'
60
61 Selecting text nodes:
62 >>> Path("item/text()").select(XML('<root><item>Foo</item></root>')).render()
63 'Foo'
64 >>> Path("item/text()").select(XML('<root><item>Foo</item><item>Bar</item></root>')).render()
65 'FooBar'
66
67 Skipping ancestors:
68
69 >>> Path("foo/bar").select(XML('<root><foo><bar/></foo></root>')).render()
70 '<bar/>'
71 >>> Path("foo/*").select(XML('<root><foo><bar/></foo></root>')).render()
72 '<bar/>'
73 >>> Path("root/bar").select(XML('<root><foo><bar/></foo></root>')).render()
74 ''
75 >>> Path("root/bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
76 '<bar id="2"/>'
77 >>> Path("root/*/bar").select(XML('<root><foo><bar/></foo></root>')).render()
78 '<bar/>'
79 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
80 '<bar id="1"/><bar id="2"/>'
81 >>> Path("root//bar").select(XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
82 '<bar id="1"/><bar id="2"/>'
83
84 Using simple attribute predicates:
85 >>> Path("root/item[@important]").select(XML('<root><item/><item important="very"/></root>')).render()
86 '<item important="very"/>'
87 >>> Path('root/item[@important="very"]').select(XML('<root><item/><item important="very"/></root>')).render()
88 '<item important="very"/>'
89 >>> Path("root/item[@important='very']").select(XML('<root><item/><item important="notso"/></root>')).render()
90 ''
91 >>> Path("root/item[@important!='very']").select(
92 ... XML('<root><item/><item important="notso"/></root>')).render()
93 '<item/><item important="notso"/>'
94 """
95
96 _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|'
97 '([^/:\[\]\(\)@=!\s]+)|'
98 '\s+')
99
100 def __init__(self, text):
101 self.source = text
102
103 steps = []
104 cur_op = ''
105 cur_tag = ''
106 in_predicate = False
107 for op, tag in self._TOKEN_RE.findall(text):
108 if op:
109 if op == '[':
110 in_predicate = True
111 elif op == ']':
112 in_predicate = False
113 elif op.startswith('('):
114 if cur_tag == 'text':
115 steps[-1] = (False, self.fn_text(), [])
116 else:
117 raise NotImplementedError('XPath function "%s" not '
118 'supported' % cur_tag)
119 else:
120 cur_op += op
121 cur_tag = ''
122 else:
123 closure = cur_op in ('', '//')
124 if cur_op == '@':
125 if tag == '*':
126 node_test = self.any_attribute()
127 else:
128 node_test = self.attribute_by_name(tag)
129 else:
130 if tag == '*':
131 node_test = self.any_element()
132 elif in_predicate:
133 if len(tag) > 1 and (tag[0], tag[-1]) in _QUOTES:
134 node_test = self.literal_string(tag[1:-1])
135 if cur_op == '=':
136 node_test = self.op_eq(steps[-1][2][-1], node_test)
137 steps[-1][2].pop()
138 elif cur_op == '!=':
139 node_test = self.op_neq(steps[-1][2][-1], node_test)
140 steps[-1][2].pop()
141 else:
142 node_test = self.element_by_name(tag)
143 if in_predicate:
144 steps[-1][2].append(node_test)
145 else:
146 steps.append([closure, node_test, []])
147 cur_op = ''
148 cur_tag = tag
149 self.steps = steps
150
151 def __repr__(self):
152 return '<%s "%s">' % (self.__class__.__name__, self.source)
153
154 def select(self, stream):
155 stream = iter(stream)
156 def _generate(tests):
157 test = self.test()
158 for kind, data, pos in stream:
159 result = test(kind, data, pos)
160 if result is True:
161 yield kind, data, pos
162 depth = 1
163 while depth > 0:
164 ev = stream.next()
165 if ev[0] is Stream.START:
166 depth += 1
167 elif ev[0] is Stream.END:
168 depth -= 1
169 yield ev
170 test(*ev)
171 elif result:
172 yield result
173 return Stream(_generate(self.steps))
174
175 def test(self):
176 stack = [0] # stack of cursors into the location path
177
178 def _test(kind, data, pos):
179 #print '\nTracker %r test [%s] %r' % (self, kind, data)
180
181 if not stack:
182 return False
183
184 if kind is Stream.END:
185 stack.pop()
186 return None
187
188 if kind is Stream.START:
189 stack.append(stack[-1])
190
191 matched = False
192 closure, node_test, predicates = self.steps[stack[-1]]
193
194 #print ' Testing against %r' % node_test
195 matched = node_test(kind, data, pos)
196 if matched and predicates:
197 for predicate in predicates:
198 if not predicate(kind, data, pos):
199 matched = None
200 break
201
202 if matched:
203 if stack[-1] == len(self.steps) - 1:
204 #print ' Last step %r... returned %r' % (node_test, matched)
205 return matched
206
207 #print ' Matched intermediate step %r... proceed to next step %r' % (node_test, self.steps[stack[-1] + 1])
208 stack[-1] += 1
209
210 elif kind is Stream.START and not closure:
211 # FIXME: If this step is not a closure, it cannot be matched
212 # until the current element is closed... so we need to
213 # move the cursor back to the last closure and retest
214 # that against the current element
215 closures = [step for step in self.steps[:stack[-1]] if step[0]]
216 closures.reverse()
217 for closure, node_test, predicates in closures:
218 stack[-1] -= 1
219 if closure:
220 matched = node_test(kind, data, pos)
221 if matched:
222 stack[-1] += 1
223 break
224
225 return None
226
227 return _test
228
229 class any_element(object):
230 def __call__(self, kind, data, pos):
231 if kind is Stream.START:
232 return True
233 return None
234 def __repr__(self):
235 return '<%s>' % self.__class__.__name__
236
237 class element_by_name(object):
238 def __init__(self, name):
239 self.name = QName(name)
240 def __call__(self, kind, data, pos):
241 if kind is Stream.START:
242 return data[0].localname == self.name
243 return None
244 def __repr__(self):
245 return '<%s "%s">' % (self.__class__.__name__, self.name)
246
247 class any_attribute(object):
248 def __call__(self, kind, data, pos):
249 if kind is Stream.START:
250 text = ''.join([val for name, val in data[1]])
251 if text:
252 return Stream.TEXT, text, pos
253 return None
254 return None
255 def __repr__(self):
256 return '<%s>' % (self.__class__.__name__)
257
258 class attribute_by_name(object):
259 def __init__(self, name):
260 self.name = QName(name)
261 def __call__(self, kind, data, pos):
262 if kind is Stream.START:
263 if self.name in data[1]:
264 return Stream.TEXT, data[1].get(self.name), pos
265 return None
266 return None
267 def __repr__(self):
268 return '<%s "%s">' % (self.__class__.__name__, self.name)
269
270 class fn_text(object):
271 def __call__(self, kind, data, pos):
272 if kind is Stream.TEXT:
273 return kind, data, pos
274 return None
275 def __repr__(self):
276 return '<%s>' % (self.__class__.__name__)
277
278 class literal_string(object):
279 def __init__(self, value):
280 self.value = value
281 def __call__(self, kind, data, pos):
282 return Stream.TEXT, self.value, (-1, -1)
283 def __repr__(self):
284 return '<%s>' % (self.__class__.__name__)
285
286 class op_eq(object):
287 def __init__(self, lval, rval):
288 self.lval = lval
289 self.rval = rval
290 def __call__(self, kind, data, pos):
291 lval = self.lval(kind, data, pos)
292 rval = self.rval(kind, data, pos)
293 return (lval and lval[1]) == (rval and rval[1])
294 def __repr__(self):
295 return '<%s %r = %r>' % (self.__class__.__name__, self.lval,
296 self.rval)
297
298 class op_neq(object):
299 def __init__(self, lval, rval):
300 self.lval = lval
301 self.rval = rval
302 def __call__(self, kind, data, pos):
303 lval = self.lval(kind, data, pos)
304 rval = self.rval(kind, data, pos)
305 return (lval and lval[1]) != (rval and rval[1])
306 def __repr__(self):
307 return '<%s %r != %r>' % (self.__class__.__name__, self.lval,
308 self.rval)
Copyright (C) 2012-2017 Edgewall Software