Mercurial > genshi > mirror
annotate markup/path.py @ 37:37557b8fb925 trunk
Moved some of the tests for the strip directive to a new unittest test case to not clutter up the documentation.
author | cmlenz |
---|---|
date | Sun, 02 Jul 2006 23:10:27 +0000 |
parents | 1fdb7054fb42 |
children | ee669cb9cccc |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
27 | 3 # Copyright (C) 2006 Christopher Lenz |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
27 | 8 # are also available at http://markup.cmlenz.net/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
27 | 12 # history and logs, available at http://markup.cmlenz.net/log/. |
1 | 13 |
14 """Basic support for evaluating XPath expressions against streams.""" | |
15 | |
16 import re | |
17 | |
18 from markup.core import QName, Stream | |
19 | |
20 __all__ = ['Path'] | |
21 | |
22 | |
23 class Path(object): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
24 """Implements basic XPath support on streams. |
1 | 25 |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
26 Instances of this class represent a "compiled" XPath expression, and provide |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
27 methods for testing the path against a stream, as well as extracting a |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
28 substream matching that path. |
1 | 29 """ |
30 _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|' | |
31 '([^/:\[\]\(\)@=!\s]+)|' | |
32 '\s+') | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
33 _QUOTES = (("'", "'"), ('"', '"')) |
1 | 34 |
35 def __init__(self, text): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
36 """Create the path object from a string. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
37 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
38 @param text: the path expression |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
39 """ |
1 | 40 self.source = text |
41 | |
42 steps = [] | |
43 cur_op = '' | |
44 cur_tag = '' | |
45 in_predicate = False | |
46 for op, tag in self._TOKEN_RE.findall(text): | |
47 if op: | |
48 if op == '[': | |
49 in_predicate = True | |
50 elif op == ']': | |
51 in_predicate = False | |
52 elif op.startswith('('): | |
53 if cur_tag == 'text': | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
54 steps[-1] = (False, self._FunctionText(), []) |
1 | 55 else: |
56 raise NotImplementedError('XPath function "%s" not ' | |
57 'supported' % cur_tag) | |
58 else: | |
59 cur_op += op | |
60 cur_tag = '' | |
61 else: | |
62 closure = cur_op in ('', '//') | |
63 if cur_op == '@': | |
64 if tag == '*': | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
65 node_test = self._AnyAttribute() |
1 | 66 else: |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
67 node_test = self._AttributeByName(tag) |
1 | 68 else: |
69 if tag == '*': | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
70 node_test = self._AnyElement() |
1 | 71 elif in_predicate: |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
72 if len(tag) > 1 and (tag[0], tag[-1]) in self._QUOTES: |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
73 node_test = self._LiteralString(tag[1:-1]) |
1 | 74 if cur_op == '=': |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
75 node_test = self._OperatorEq(steps[-1][2][-1], |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
76 node_test) |
1 | 77 steps[-1][2].pop() |
78 elif cur_op == '!=': | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
79 node_test = self._OperatorNeq(steps[-1][2][-1], |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
80 node_test) |
1 | 81 steps[-1][2].pop() |
82 else: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
83 node_test = self._ElementByName(tag) |
1 | 84 if in_predicate: |
85 steps[-1][2].append(node_test) | |
86 else: | |
87 steps.append([closure, node_test, []]) | |
88 cur_op = '' | |
89 cur_tag = tag | |
90 self.steps = steps | |
91 | |
92 def __repr__(self): | |
93 return '<%s "%s">' % (self.__class__.__name__, self.source) | |
94 | |
95 def select(self, stream): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
96 """Returns a substream of the given stream that matches the path. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
97 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
98 If there are no matches, this method returns an empty stream. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
99 |
33 | 100 >>> from markup.input import XML |
101 >>> xml = XML('<root><elem><child>Text</child></elem></root>') | |
37
37557b8fb925
Moved some of the tests for the strip directive to a new unittest test case to not clutter up the documentation.
cmlenz
parents:
33
diff
changeset
|
102 |
33 | 103 >>> print Path('child').select(xml) |
104 <child>Text</child> | |
105 | |
106 >>> print Path('child/text()').select(xml) | |
107 Text | |
108 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
109 @param stream: the stream to select from |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
110 @return: the substream matching the path, or an empty stream |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
111 """ |
1 | 112 stream = iter(stream) |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
113 def _generate(): |
1 | 114 test = self.test() |
115 for kind, data, pos in stream: | |
116 result = test(kind, data, pos) | |
117 if result is True: | |
118 yield kind, data, pos | |
119 depth = 1 | |
120 while depth > 0: | |
121 ev = stream.next() | |
122 if ev[0] is Stream.START: | |
123 depth += 1 | |
124 elif ev[0] is Stream.END: | |
125 depth -= 1 | |
126 yield ev | |
127 test(*ev) | |
128 elif result: | |
129 yield result | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
130 return Stream(_generate()) |
1 | 131 |
132 def test(self): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
133 """Returns a function that can be used to track whether the path matches |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
134 a specific stream event. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
135 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
136 The function returned expects the positional arguments `kind`, `data`, |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
137 and `pos`, i.e. basically an unpacked stream event. If the path matches |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
138 the event, the function returns the match (for example, a `START` or |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
139 `TEXT` event.) Otherwise, it returns `None` or `False`. |
33 | 140 |
141 >>> from markup.input import XML | |
142 >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>') | |
143 >>> test = Path('child').test() | |
144 >>> for kind, data, pos in xml: | |
145 ... if test(kind, data, pos): | |
146 ... print kind, data | |
147 START (u'child', [(u'id', u'1')]) | |
148 START (u'child', [(u'id', u'2')]) | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
149 """ |
1 | 150 stack = [0] # stack of cursors into the location path |
151 | |
152 def _test(kind, data, pos): | |
153 if not stack: | |
154 return False | |
155 | |
156 if kind is Stream.END: | |
157 stack.pop() | |
158 return None | |
159 | |
160 if kind is Stream.START: | |
161 stack.append(stack[-1]) | |
162 | |
163 matched = False | |
164 closure, node_test, predicates = self.steps[stack[-1]] | |
165 | |
166 matched = node_test(kind, data, pos) | |
167 if matched and predicates: | |
168 for predicate in predicates: | |
169 if not predicate(kind, data, pos): | |
170 matched = None | |
171 break | |
172 | |
173 if matched: | |
174 if stack[-1] == len(self.steps) - 1: | |
175 return matched | |
176 | |
177 stack[-1] += 1 | |
178 | |
179 elif kind is Stream.START and not closure: | |
24 | 180 # If this step is not a closure, it cannot be matched until the |
181 # current element is closed... so we need to move the cursor | |
182 # back to the last closure and retest that against the current | |
183 # element | |
1 | 184 closures = [step for step in self.steps[:stack[-1]] if step[0]] |
25 | 185 closures.reverse() |
1 | 186 for closure, node_test, predicates in closures: |
187 stack[-1] -= 1 | |
188 if closure: | |
189 matched = node_test(kind, data, pos) | |
190 if matched: | |
191 stack[-1] += 1 | |
192 break | |
193 | |
194 return None | |
195 | |
196 return _test | |
197 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
198 class _AnyElement(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
199 """Node test that matches any element.""" |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
200 def __call__(self, kind, *_): |
1 | 201 if kind is Stream.START: |
202 return True | |
203 return None | |
204 def __repr__(self): | |
205 return '<%s>' % self.__class__.__name__ | |
206 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
207 class _ElementByName(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
208 """Node test that matches an element with a specific tag name.""" |
1 | 209 def __init__(self, name): |
210 self.name = QName(name) | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
211 def __call__(self, kind, data, _): |
1 | 212 if kind is Stream.START: |
213 return data[0].localname == self.name | |
214 return None | |
215 def __repr__(self): | |
216 return '<%s "%s">' % (self.__class__.__name__, self.name) | |
217 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
218 class _AnyAttribute(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
219 """Node test that matches any attribute.""" |
1 | 220 def __call__(self, kind, data, pos): |
221 if kind is Stream.START: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
222 text = ''.join([val for _, val in data[1]]) |
1 | 223 if text: |
224 return Stream.TEXT, text, pos | |
225 return None | |
226 return None | |
227 def __repr__(self): | |
228 return '<%s>' % (self.__class__.__name__) | |
229 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
230 class _AttributeByName(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
231 """Node test that matches an attribute with a specific name.""" |
1 | 232 def __init__(self, name): |
233 self.name = QName(name) | |
234 def __call__(self, kind, data, pos): | |
235 if kind is Stream.START: | |
236 if self.name in data[1]: | |
237 return Stream.TEXT, data[1].get(self.name), pos | |
238 return None | |
239 return None | |
240 def __repr__(self): | |
241 return '<%s "%s">' % (self.__class__.__name__, self.name) | |
242 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
243 class _FunctionText(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
244 """Function that returns text content.""" |
1 | 245 def __call__(self, kind, data, pos): |
246 if kind is Stream.TEXT: | |
247 return kind, data, pos | |
248 return None | |
249 def __repr__(self): | |
250 return '<%s>' % (self.__class__.__name__) | |
251 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
252 class _LiteralString(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
253 """Always returns a literal string.""" |
1 | 254 def __init__(self, value): |
255 self.value = value | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
256 def __call__(self, *_): |
1 | 257 return Stream.TEXT, self.value, (-1, -1) |
258 def __repr__(self): | |
259 return '<%s>' % (self.__class__.__name__) | |
260 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
261 class _OperatorEq(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
262 """Equality comparison operator.""" |
1 | 263 def __init__(self, lval, rval): |
264 self.lval = lval | |
265 self.rval = rval | |
266 def __call__(self, kind, data, pos): | |
267 lval = self.lval(kind, data, pos) | |
268 rval = self.rval(kind, data, pos) | |
269 return (lval and lval[1]) == (rval and rval[1]) | |
270 def __repr__(self): | |
271 return '<%s %r = %r>' % (self.__class__.__name__, self.lval, | |
272 self.rval) | |
273 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
274 class _OperatorNeq(object): |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
275 """Inequality comparison operator.""" |
1 | 276 def __init__(self, lval, rval): |
277 self.lval = lval | |
278 self.rval = rval | |
279 def __call__(self, kind, data, pos): | |
280 lval = self.lval(kind, data, pos) | |
281 rval = self.rval(kind, data, pos) | |
282 return (lval and lval[1]) != (rval and rval[1]) | |
283 def __repr__(self): | |
284 return '<%s %r != %r>' % (self.__class__.__name__, self.lval, | |
285 self.rval) |