Mercurial > genshi > mirror
annotate markup/path.py @ 150:d35688d16831 trunk
Removed to many classes from the `__all__` list of `markup.template` in [191].
author | cmlenz |
---|---|
date | Tue, 15 Aug 2006 22:01:01 +0000 |
parents | 47bbd9d2a5af |
children | 9a5aedda1099 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
14 """Basic support for evaluating XPath expressions against streams. |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
15 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
16 >>> from markup.input import XML |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
17 >>> doc = XML('''<doc> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
18 ... <items count="2"> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
19 ... <item status="new"> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
20 ... <summary>Foo</summary> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
21 ... </item> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
22 ... <item status="closed"> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
23 ... <summary>Bar</summary> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
24 ... </item> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
25 ... </items> |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
26 ... </doc>''') |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
27 >>> print doc.select('items/item[@status="closed"]/summary/text()') |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
28 Bar |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
29 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
30 Because the XPath engine operates on markup streams (as opposed to tree |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
31 structures), it only implements a subset of the full XPath 1.0 language. |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
32 """ |
1 | 33 |
34 import re | |
35 | |
145
47bbd9d2a5af
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
139
diff
changeset
|
36 from markup.core import Stream, START, END, TEXT, COMMENT, PI |
1 | 37 |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
38 __all__ = ['Path', 'PathSyntaxError'] |
1 | 39 |
40 | |
114 | 41 class Axis(object): |
42 """Defines constants for the various supported XPath axes.""" | |
43 | |
44 ATTRIBUTE = 'attribute' | |
45 CHILD = 'child' | |
46 DESCENDANT = 'descendant' | |
47 DESCENDANT_OR_SELF = 'descendant-or-self' | |
48 NAMESPACE = 'namespace' | |
49 SELF = 'self' | |
50 | |
51 def forname(cls, name): | |
52 """Return the axis constant for the given name, or `None` if no such | |
53 axis was defined. | |
54 """ | |
55 return getattr(cls, name.upper().replace('-', '_'), None) | |
56 forname = classmethod(forname) | |
57 | |
58 | |
59 ATTRIBUTE = Axis.ATTRIBUTE | |
60 CHILD = Axis.CHILD | |
61 DESCENDANT = Axis.DESCENDANT | |
62 DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF | |
63 NAMESPACE = Axis.NAMESPACE | |
64 SELF = Axis.SELF | |
65 | |
66 | |
1 | 67 class Path(object): |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
68 """Implements basic XPath support on streams. |
1 | 69 |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
70 Instances of this class represent a "compiled" XPath expression, and provide |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
71 methods for testing the path against a stream, as well as extracting a |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
72 substream matching that path. |
1 | 73 """ |
74 | |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
75 def __init__(self, text, filename=None, lineno=-1): |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
76 """Create the path object from a string. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
77 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
78 @param text: the path expression |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
79 """ |
1 | 80 self.source = text |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
81 self.paths = PathParser(text, filename, lineno).parse() |
1 | 82 |
83 def __repr__(self): | |
137 | 84 paths = [] |
85 for path in self.paths: | |
86 steps = [] | |
87 for axis, nodetest, predicates in path: | |
88 steps.append('%s::%s' % (axis, nodetest)) | |
89 for predicate in predicates: | |
90 steps.append('[%s]' % predicate) | |
91 paths.append('/'.join(steps)) | |
92 return '<%s "%s">' % (self.__class__.__name__, '|'.join(paths)) | |
1 | 93 |
94 def select(self, stream): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
95 """Returns a substream of the given stream that matches the path. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
96 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
97 If there are no matches, this method returns an empty stream. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
98 |
33 | 99 >>> from markup.input import XML |
100 >>> xml = XML('<root><elem><child>Text</child></elem></root>') | |
61 | 101 |
33 | 102 >>> print Path('child').select(xml) |
103 <child>Text</child> | |
104 | |
105 >>> print Path('child/text()').select(xml) | |
106 Text | |
107 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
108 @param stream: the stream to select from |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
109 @return: the substream matching the path, or an empty stream |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
110 """ |
1 | 111 stream = iter(stream) |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
112 def _generate(): |
1 | 113 test = self.test() |
114 for kind, data, pos in stream: | |
115 result = test(kind, data, pos) | |
116 if result is True: | |
117 yield kind, data, pos | |
118 depth = 1 | |
119 while depth > 0: | |
73 | 120 subkind, subdata, subpos = stream.next() |
121 if subkind is START: | |
122 depth += 1 | |
123 elif subkind is END: | |
124 depth -= 1 | |
125 yield subkind, subdata, subpos | |
126 test(subkind, subdata, subpos) | |
1 | 127 elif result: |
128 yield result | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
129 return Stream(_generate()) |
1 | 130 |
38
ee669cb9cccc
Fix for #2 (incorrect context node in path expressions). Still some paths that produce incorrect results, but the common case seems to work now.
cmlenz
parents:
37
diff
changeset
|
131 def test(self, ignore_context=False): |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
132 """Returns a function that can be used to track whether the path matches |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
133 a specific stream event. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
134 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
135 The function returned expects the positional arguments `kind`, `data`, |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
136 and `pos`, i.e. basically an unpacked stream event. If the path matches |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
137 the event, the function returns the match (for example, a `START` or |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
138 `TEXT` event.) Otherwise, it returns `None`. |
33 | 139 |
140 >>> from markup.input import XML | |
141 >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>') | |
142 >>> test = Path('child').test() | |
143 >>> for kind, data, pos in xml: | |
144 ... if test(kind, data, pos): | |
145 ... print kind, data | |
146 START (u'child', [(u'id', u'1')]) | |
147 START (u'child', [(u'id', u'2')]) | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
148 """ |
137 | 149 paths = [(steps, len(steps), [0]) for steps in self.paths] |
1 | 150 |
151 def _test(kind, data, pos): | |
137 | 152 for steps, size, stack in paths: |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
153 if not stack: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
154 continue |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
155 cursor = stack[-1] |
1 | 156 |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
157 if kind is END: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
158 stack.pop() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
159 continue |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
160 elif kind is START: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
161 stack.append(cursor) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
162 |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
163 while 1: |
137 | 164 axis, nodetest, predicates = steps[cursor] |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
165 |
137 | 166 matched = nodetest(kind, data, pos) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
167 if matched and predicates: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
168 for predicate in predicates: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
169 if not predicate(kind, data, pos): |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
170 matched = None |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
171 break |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
172 |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
173 if matched: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
174 if cursor + 1 == size: # the last location step |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
175 if ignore_context or \ |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
176 kind is not START or \ |
137 | 177 axis in (ATTRIBUTE, NAMESPACE, SELF) or \ |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
178 len(stack) > 2: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
179 return matched |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
180 else: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
181 cursor += 1 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
182 stack[-1] = cursor |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
183 |
114 | 184 if axis is not SELF: |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
185 break |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
186 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
187 if not matched and kind is START \ |
114 | 188 and axis not in (DESCENDANT, DESCENDANT_OR_SELF): |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
189 # If this step is not a closure, it cannot be matched until |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
190 # the current element is closed... so we need to move the |
114 | 191 # cursor back to the previous closure and retest that |
192 # against the current element | |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
193 backsteps = [step for step in steps[:cursor] |
114 | 194 if step[0] in (DESCENDANT, DESCENDANT_OR_SELF)] |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
195 backsteps.reverse() |
137 | 196 for axis, nodetest, predicates in backsteps: |
197 matched = nodetest(kind, data, pos) | |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
198 if not matched: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
199 cursor -= 1 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
200 break |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
201 stack[-1] = cursor |
1 | 202 |
203 return None | |
204 | |
205 return _test | |
206 | |
207 | |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
208 class PathSyntaxError(Exception): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
209 """Exception raised when an XPath expression is syntactically incorrect.""" |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
210 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
211 def __init__(self, message, filename=None, lineno=-1, offset=-1): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
212 if filename: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
213 message = '%s (%s, line %d)' % (message, filename, lineno) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
214 Exception.__init__(self, message) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
215 self.filename = filename |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
216 self.lineno = lineno |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
217 self.offset = offset |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
218 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
219 |
137 | 220 class PathParser(object): |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
221 """Tokenizes and parses an XPath expression.""" |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
222 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
223 _QUOTES = (("'", "'"), ('"', '"')) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
224 _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@', |
121
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
225 '=', '!=', '!', '|', ',') |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
226 _tokenize = re.compile('(%s)|([^%s\s]+)|\s+' % ( |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
227 '|'.join([re.escape(t) for t in _TOKENS]), |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
228 ''.join([re.escape(t[0]) for t in _TOKENS]))).findall |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
229 |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
230 def __init__(self, text, filename=None, lineno=-1): |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
231 self.filename = filename |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
232 self.lineno = lineno |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
233 self.tokens = filter(None, [a or b for a, b in self._tokenize(text)]) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
234 self.pos = 0 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
235 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
236 # Tokenizer |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
237 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
238 at_end = property(lambda self: self.pos == len(self.tokens) - 1) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
239 cur_token = property(lambda self: self.tokens[self.pos]) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
240 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
241 def next_token(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
242 self.pos += 1 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
243 return self.tokens[self.pos] |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
244 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
245 def peek_token(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
246 if not self.at_end: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
247 return self.tokens[self.pos + 1] |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
248 return None |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
249 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
250 # Recursive descent parser |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
251 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
252 def parse(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
253 """Parses the XPath expression and returns a list of location path |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
254 tests. |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
255 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
256 For union expressions (such as `*|text()`), this function returns one |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
257 test for each operand in the union. For patch expressions that don't |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
258 use the union operator, the function always returns a list of size 1. |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
259 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
260 Each path test in turn is a sequence of tests that correspond to the |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
261 location steps, each tuples of the form `(axis, testfunc, predicates)` |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
262 """ |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
263 paths = [self._location_path()] |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
264 while self.cur_token == '|': |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
265 self.next_token() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
266 paths.append(self._location_path()) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
267 if not self.at_end: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
268 raise PathSyntaxError('Unexpected token %r after end of expression' |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
269 % self.cur_token, self.filename, self.lineno) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
270 return paths |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
271 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
272 def _location_path(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
273 steps = [] |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
274 while True: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
275 if self.cur_token == '//': |
137 | 276 steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
277 self.next_token() |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
278 elif self.cur_token == '/' and not steps: |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
279 raise PathSyntaxError('Absolute location paths not supported', |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
280 self.filename, self.lineno) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
281 |
137 | 282 axis, nodetest, predicates = self._location_step() |
283 if not axis: | |
145
47bbd9d2a5af
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
139
diff
changeset
|
284 axis = CHILD |
137 | 285 steps.append((axis, nodetest, predicates)) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
286 |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
287 if self.at_end or not self.cur_token.startswith('/'): |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
288 break |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
289 self.next_token() |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
290 |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
291 return steps |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
292 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
293 def _location_step(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
294 if self.cur_token == '@': |
114 | 295 axis = ATTRIBUTE |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
296 self.next_token() |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
297 elif self.cur_token == '.': |
114 | 298 axis = SELF |
137 | 299 elif self.cur_token == '..': |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
300 raise PathSyntaxError('Unsupported axis "parent"', self.filename, |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
301 self.lineno) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
302 elif self.peek_token() == '::': |
114 | 303 axis = Axis.forname(self.cur_token) |
304 if axis is None: | |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
305 raise PathSyntaxError('Unsupport axis "%s"' % axis, |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
306 self.filename, self.lineno) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
307 self.next_token() |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
308 self.next_token() |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
309 else: |
137 | 310 axis = None |
311 nodetest = self._node_test(axis or CHILD) | |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
312 predicates = [] |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
313 while self.cur_token == '[': |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
314 predicates.append(self._predicate()) |
137 | 315 return axis, nodetest, predicates |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
316 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
317 def _node_test(self, axis=None): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
318 test = None |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
319 if self.peek_token() in ('(', '()'): # Node type test |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
320 test = self._node_type() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
321 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
322 else: # Name test |
137 | 323 if self.cur_token == '*': |
324 test = PrincipalTypeTest(axis) | |
325 elif self.cur_token == '.': | |
326 test = NodeTest() | |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
327 else: |
137 | 328 test = LocalNameTest(axis, self.cur_token) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
329 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
330 if not self.at_end: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
331 self.next_token() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
332 return test |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
333 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
334 def _node_type(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
335 name = self.cur_token |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
336 self.next_token() |
137 | 337 |
338 args = [] | |
339 if self.cur_token != '()': | |
340 # The processing-instruction() function optionally accepts the | |
341 # name of the PI as argument, which must be a literal string | |
342 self.next_token() # ( | |
343 if self.cur_token != ')': | |
344 string = self.cur_token | |
345 if (string[0], string[-1]) in self._QUOTES: | |
346 string = string[1:-1] | |
347 args.append(string) | |
348 | |
349 cls = _nodetest_map.get(name) | |
350 if not cls: | |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
351 raise PathSyntaxError('%s() not allowed here' % name, self.filename, |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
352 self.lineno) |
137 | 353 return cls(*args) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
354 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
355 def _predicate(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
356 assert self.cur_token == '[' |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
357 self.next_token() |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
358 expr = self._or_expr() |
121
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
359 if self.cur_token != ']': |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
360 raise PathSyntaxError('Expected "]" to close predicate, ' |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
361 'but found "%s"' % self.cur_token, |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
362 self.filename, self.lineno) |
111
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
363 if not self.at_end: |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
364 self.next_token() |
2368c3becc52
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
365 return expr |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
366 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
367 def _or_expr(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
368 expr = self._and_expr() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
369 while self.cur_token == 'or': |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
370 self.next_token() |
137 | 371 expr = OrOperator(expr, self._and_expr()) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
372 return expr |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
373 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
374 def _and_expr(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
375 expr = self._equality_expr() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
376 while self.cur_token == 'and': |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
377 self.next_token() |
137 | 378 expr = AndOperator(expr, self._equality_expr()) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
379 return expr |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
380 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
381 def _equality_expr(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
382 expr = self._primary_expr() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
383 while self.cur_token in ('=', '!='): |
137 | 384 op = _operator_map.get(self.cur_token) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
385 self.next_token() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
386 expr = op(expr, self._primary_expr()) |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
387 return expr |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
388 |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
389 def _primary_expr(self): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
390 token = self.cur_token |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
391 if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
392 self.next_token() |
137 | 393 return StringLiteral(token[1:-1]) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
394 elif token[0].isdigit(): |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
395 self.next_token() |
137 | 396 return NumberLiteral(float(token)) |
121
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
397 elif not self.at_end and self.peek_token().startswith('('): |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
398 if self.next_token() == '()': |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
399 args = [] |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
400 else: |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
401 self.next_token() |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
402 args = [self._or_expr()] |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
403 while self.cur_token not in (',', ')'): |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
404 args.append(self._or_expr()) |
062e51ad7b19
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
405 self.next_token() |
137 | 406 cls = _function_map.get(token) |
407 if not cls: | |
139
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
408 raise PathSyntaxError('Unsupported function "%s"' % token, |
8332287b5508
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
409 self.filename, self.lineno) |
137 | 410 return cls(*args) |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
411 else: |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
412 axis = None |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
413 if token == '@': |
114 | 414 axis = ATTRIBUTE |
106
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
415 self.next_token() |
f9473bdc93b2
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
416 return self._node_test(axis) |
137 | 417 |
418 | |
419 # Node tests | |
420 | |
421 class PrincipalTypeTest(object): | |
422 __slots__ = ['principal_type'] | |
423 def __init__(self, principal_type): | |
424 self.principal_type = principal_type | |
425 def __call__(self, kind, data, pos): | |
426 if kind is START: | |
427 if self.principal_type is ATTRIBUTE: | |
428 return data[1] or None | |
429 else: | |
430 return True | |
431 def __repr__(self): | |
432 return '*' | |
433 | |
434 class LocalNameTest(object): | |
435 __slots__ = ['principal_type', 'name'] | |
436 def __init__(self, principal_type, name): | |
437 self.principal_type = principal_type | |
438 self.name = name | |
439 def __call__(self, kind, data, pos): | |
440 if kind is START: | |
441 if self.principal_type is ATTRIBUTE and self.name in data[1]: | |
442 return TEXT, data[1].get(self.name), pos | |
443 else: | |
444 return data[0].localname == self.name | |
445 def __repr__(self): | |
446 return self.name | |
447 | |
448 class CommentNodeTest(object): | |
449 __slots__ = [] | |
450 def __call__(self, kind, data, pos): | |
451 return kind is COMMENT and (kind, data, pos) | |
452 def __repr__(self): | |
453 return 'comment()' | |
454 | |
455 class NodeTest(object): | |
456 __slots__ = [] | |
457 def __call__(self, kind, data, pos): | |
458 if kind is START: | |
459 return True | |
460 return kind, data, pos | |
461 def __repr__(self): | |
462 return 'node()' | |
463 | |
464 class ProcessingInstructionNodeTest(object): | |
465 __slots__ = ['target'] | |
466 def __init__(self, target=None): | |
467 self.target = target | |
468 def __call__(self, kind, data, pos): | |
469 if kind is PI and (not self.target or data[0] == self.target): | |
470 return (kind, data, pos) | |
471 def __repr__(self): | |
472 arg = '' | |
473 if self.target: | |
474 arg = '"' + self.target + '"' | |
475 return 'processing-instruction(%s)' % arg | |
476 | |
477 class TextNodeTest(object): | |
478 __slots__ = [] | |
479 def __call__(self, kind, data, pos): | |
480 return kind is TEXT and (kind, data, pos) | |
481 def __repr__(self): | |
482 return 'text()' | |
483 | |
484 _nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, | |
485 'processing-instruction': ProcessingInstructionNodeTest, | |
486 'text': TextNodeTest} | |
487 | |
488 # Functions | |
489 | |
490 class LocalNameFunction(object): | |
491 __slots__ = [] | |
492 def __call__(self, kind, data, pos): | |
493 if kind is START: | |
494 return TEXT, data[0].localname, pos | |
495 def __repr__(self): | |
496 return 'local-name()' | |
497 | |
498 class NameFunction(object): | |
499 __slots__ = [] | |
500 def __call__(self, kind, data, pos): | |
501 if kind is START: | |
502 return TEXT, data[0], pos | |
503 def __repr__(self): | |
504 return 'name()' | |
505 | |
506 class NamespaceUriFunction(object): | |
507 __slots__ = [] | |
508 def __call__(self, kind, data, pos): | |
509 if kind is START: | |
510 return TEXT, data[0].namespace, pos | |
511 def __repr__(self): | |
512 return 'namespace-uri()' | |
513 | |
514 class NotFunction(object): | |
515 __slots__ = ['expr'] | |
516 def __init__(self, expr): | |
517 self.expr = expr | |
518 def __call__(self, kind, data, pos): | |
519 return not self.expr(kind, data, pos) | |
520 def __repr__(self): | |
521 return 'not(%s)' % self.expr | |
522 | |
523 _function_map = {'local-name': LocalNameFunction, 'name': NameFunction, | |
524 'namespace-uri': NamespaceUriFunction, 'not': NotFunction} | |
525 | |
526 # Literals | |
527 | |
528 class StringLiteral(object): | |
529 __slots__ = ['text'] | |
530 def __init__(self, text): | |
531 self.text = text | |
532 def __call__(self, kind, data, pos): | |
533 return TEXT, self.text, (None, -1, -1) | |
534 def __repr__(self): | |
535 return '"%s"' % self.text | |
536 | |
537 class NumberLiteral(object): | |
538 __slots__ = ['number'] | |
539 def __init__(self, number): | |
540 self.number = number | |
541 def __call__(self, kind, data, pos): | |
542 return TEXT, unicode(self.number), (None, -1, -1) | |
543 def __repr__(self): | |
544 return str(self.number) | |
545 | |
546 # Operators | |
547 | |
548 class AndOperator(object): | |
549 __slots__ = ['lval', 'rval'] | |
550 def __init__(self, lval, rval): | |
551 self.lval = lval | |
552 self.rval = rval | |
553 def __call__(self, kind, data, pos): | |
554 lv = self.lval(kind, data, pos) | |
555 if type(lv) is tuple: | |
556 lv = lv[1] | |
557 if not lv: | |
558 return False | |
559 rv = self.rval(kind, data, pos) | |
560 if type(rv) is tuple: | |
561 rv = rv[1] | |
562 return bool(rv) | |
563 def __repr__(self): | |
564 return '%s and %s' % (self.lval, self.rval) | |
565 | |
566 class EqualsOperator(object): | |
567 __slots__ = ['lval', 'rval'] | |
568 def __init__(self, lval, rval): | |
569 self.lval = lval | |
570 self.rval = rval | |
571 def __call__(self, kind, data, pos): | |
572 lv = self.lval(kind, data, pos) | |
573 if type(lv) is tuple: | |
574 lv = lv[1] | |
575 rv = self.rval(kind, data, pos) | |
576 if type(rv) is tuple: | |
577 rv = rv[1] | |
578 return lv == rv | |
579 def __repr__(self): | |
580 return '%s=%s' % (self.lval, self.rval) | |
581 | |
582 class NotEqualsOperator(object): | |
583 __slots__ = ['lval', 'rval'] | |
584 def __init__(self, lval, rval): | |
585 self.lval = lval | |
586 self.rval = rval | |
587 def __call__(self, kind, data, pos): | |
588 lv = self.lval(kind, data, pos) | |
589 if type(lv) is tuple: | |
590 lv = lv[1] | |
591 rv = self.rval(kind, data, pos) | |
592 if type(rv) is tuple: | |
593 rv = rv[1] | |
594 return lv != rv | |
595 def __repr__(self): | |
596 return '%s!=%s' % (self.lval, self.rval) | |
597 | |
598 class OrOperator(object): | |
599 __slots__ = ['lval', 'rval'] | |
600 def __init__(self, lval, rval): | |
601 self.lval = lval | |
602 self.rval = rval | |
603 def __call__(self, kind, data, pos): | |
604 lv = self.lval(kind, data, pos) | |
605 if type(lv) is tuple: | |
606 lv = lv[1] | |
607 if lv: | |
608 return True | |
609 rv = self.rval(kind, data, pos) | |
610 if type(rv) is tuple: | |
611 rv = rv[1] | |
612 return bool(rv) | |
613 def __repr__(self): | |
614 return '%s or %s' % (self.lval, self.rval) | |
615 | |
616 _operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator} |