Mercurial > genshi > genshi-test
annotate markup/path.py @ 161:a25f9fc5787d
Various docstring additions and other cosmetic changes.
author | cmlenz |
---|---|
date | Wed, 16 Aug 2006 22:32:31 +0000 |
parents | cc707c5873f3 |
children | f767cf98e3e3 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
61
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
14 """Basic support for evaluating XPath expressions against streams. |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
15 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
16 >>> from markup.input import XML |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
17 >>> doc = XML('''<doc> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
18 ... <items count="2"> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
19 ... <item status="new"> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
20 ... <summary>Foo</summary> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
21 ... </item> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
22 ... <item status="closed"> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
23 ... <summary>Bar</summary> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
24 ... </item> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
25 ... </items> |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
26 ... </doc>''') |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
27 >>> print doc.select('items/item[@status="closed"]/summary/text()') |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
28 Bar |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
29 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
30 Because the XPath engine operates on markup streams (as opposed to tree |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
31 structures), it only implements a subset of the full XPath 1.0 language. |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
32 """ |
1 | 33 |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
34 from math import ceil, floor |
1 | 35 import re |
36 | |
145
56d534eb53f9
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
139
diff
changeset
|
37 from markup.core import Stream, START, END, TEXT, COMMENT, PI |
1 | 38 |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
39 __all__ = ['Path', 'PathSyntaxError'] |
1 | 40 |
41 | |
114 | 42 class Axis(object): |
43 """Defines constants for the various supported XPath axes.""" | |
44 | |
45 ATTRIBUTE = 'attribute' | |
46 CHILD = 'child' | |
47 DESCENDANT = 'descendant' | |
48 DESCENDANT_OR_SELF = 'descendant-or-self' | |
49 NAMESPACE = 'namespace' | |
50 SELF = 'self' | |
51 | |
52 def forname(cls, name): | |
53 """Return the axis constant for the given name, or `None` if no such | |
54 axis was defined. | |
55 """ | |
56 return getattr(cls, name.upper().replace('-', '_'), None) | |
57 forname = classmethod(forname) | |
58 | |
59 | |
60 ATTRIBUTE = Axis.ATTRIBUTE | |
61 CHILD = Axis.CHILD | |
62 DESCENDANT = Axis.DESCENDANT | |
63 DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF | |
64 NAMESPACE = Axis.NAMESPACE | |
65 SELF = Axis.SELF | |
66 | |
67 | |
1 | 68 class Path(object): |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
69 """Implements basic XPath support on streams. |
1 | 70 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
71 Instances of this class represent a "compiled" XPath expression, and provide |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
72 methods for testing the path against a stream, as well as extracting a |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
73 substream matching that path. |
1 | 74 """ |
75 | |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
76 def __init__(self, text, filename=None, lineno=-1): |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
77 """Create the path object from a string. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
78 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
79 @param text: the path expression |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
80 """ |
1 | 81 self.source = text |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
82 self.paths = PathParser(text, filename, lineno).parse() |
1 | 83 |
84 def __repr__(self): | |
137 | 85 paths = [] |
86 for path in self.paths: | |
87 steps = [] | |
88 for axis, nodetest, predicates in path: | |
89 steps.append('%s::%s' % (axis, nodetest)) | |
90 for predicate in predicates: | |
91 steps.append('[%s]' % predicate) | |
92 paths.append('/'.join(steps)) | |
93 return '<%s "%s">' % (self.__class__.__name__, '|'.join(paths)) | |
1 | 94 |
95 def select(self, stream): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
96 """Returns a substream of the given stream that matches the path. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
97 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
98 If there are no matches, this method returns an empty stream. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
99 |
33 | 100 >>> from markup.input import XML |
101 >>> xml = XML('<root><elem><child>Text</child></elem></root>') | |
61 | 102 |
33 | 103 >>> print Path('child').select(xml) |
104 <child>Text</child> | |
105 | |
106 >>> print Path('child/text()').select(xml) | |
107 Text | |
108 | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
109 @param stream: the stream to select from |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
110 @return: the substream matching the path, or an empty stream |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
111 """ |
1 | 112 stream = iter(stream) |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
113 def _generate(): |
1 | 114 test = self.test() |
115 for kind, data, pos in stream: | |
116 result = test(kind, data, pos) | |
117 if result is True: | |
118 yield kind, data, pos | |
119 depth = 1 | |
120 while depth > 0: | |
73 | 121 subkind, subdata, subpos = stream.next() |
122 if subkind is START: | |
123 depth += 1 | |
124 elif subkind is END: | |
125 depth -= 1 | |
126 yield subkind, subdata, subpos | |
127 test(subkind, subdata, subpos) | |
1 | 128 elif result: |
129 yield result | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
130 return Stream(_generate()) |
1 | 131 |
38
fec9f4897415
Fix for #2 (incorrect context node in path expressions). Still some paths that produce incorrect results, but the common case seems to work now.
cmlenz
parents:
37
diff
changeset
|
132 def test(self, ignore_context=False): |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
133 """Returns a function that can be used to track whether the path matches |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
134 a specific stream event. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
135 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
136 The function returned expects the positional arguments `kind`, `data`, |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
137 and `pos`, i.e. basically an unpacked stream event. If the path matches |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
138 the event, the function returns the match (for example, a `START` or |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
139 `TEXT` event.) Otherwise, it returns `None`. |
33 | 140 |
141 >>> from markup.input import XML | |
142 >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>') | |
143 >>> test = Path('child').test() | |
144 >>> for kind, data, pos in xml: | |
145 ... if test(kind, data, pos): | |
146 ... print kind, data | |
147 START (u'child', [(u'id', u'1')]) | |
148 START (u'child', [(u'id', u'2')]) | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
25
diff
changeset
|
149 """ |
137 | 150 paths = [(steps, len(steps), [0]) for steps in self.paths] |
1 | 151 |
152 def _test(kind, data, pos): | |
137 | 153 for steps, size, stack in paths: |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
154 if not stack: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
155 continue |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
156 cursor = stack[-1] |
1 | 157 |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
158 if kind is END: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
159 stack.pop() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
160 continue |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
161 elif kind is START: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
162 stack.append(cursor) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
163 |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
164 while 1: |
137 | 165 axis, nodetest, predicates = steps[cursor] |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
166 |
137 | 167 matched = nodetest(kind, data, pos) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
168 if matched and predicates: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
169 for predicate in predicates: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
170 if not predicate(kind, data, pos): |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
171 matched = None |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
172 break |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
173 |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
174 if matched: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
175 if cursor + 1 == size: # the last location step |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
176 if ignore_context or \ |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
177 kind is not START or \ |
137 | 178 axis in (ATTRIBUTE, NAMESPACE, SELF) or \ |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
179 len(stack) > 2: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
180 return matched |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
181 else: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
182 cursor += 1 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
183 stack[-1] = cursor |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
184 |
114 | 185 if axis is not SELF: |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
186 break |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
187 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
188 if not matched and kind is START \ |
114 | 189 and axis not in (DESCENDANT, DESCENDANT_OR_SELF): |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
190 # If this step is not a closure, it cannot be matched until |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
191 # the current element is closed... so we need to move the |
114 | 192 # cursor back to the previous closure and retest that |
193 # against the current element | |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
194 backsteps = [step for step in steps[:cursor] |
114 | 195 if step[0] in (DESCENDANT, DESCENDANT_OR_SELF)] |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
196 backsteps.reverse() |
137 | 197 for axis, nodetest, predicates in backsteps: |
198 matched = nodetest(kind, data, pos) | |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
199 if not matched: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
200 cursor -= 1 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
201 break |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
202 stack[-1] = cursor |
1 | 203 |
204 return None | |
205 | |
206 return _test | |
207 | |
208 | |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
209 class PathSyntaxError(Exception): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
210 """Exception raised when an XPath expression is syntactically incorrect.""" |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
211 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
212 def __init__(self, message, filename=None, lineno=-1, offset=-1): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
213 if filename: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
214 message = '%s (%s, line %d)' % (message, filename, lineno) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
215 Exception.__init__(self, message) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
216 self.filename = filename |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
217 self.lineno = lineno |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
218 self.offset = offset |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
219 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
220 |
137 | 221 class PathParser(object): |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
222 """Tokenizes and parses an XPath expression.""" |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
223 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
224 _QUOTES = (("'", "'"), ('"', '"')) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
225 _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@', |
121
22a7080ed242
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
226 '=', '!=', '!', '|', ',') |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
227 _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|(%s)|([^%s\s]+)|\s+' % ( |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
228 '|'.join([re.escape(t) for t in _TOKENS]), |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
229 ''.join([re.escape(t[0]) for t in _TOKENS]))).findall |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
230 |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
231 def __init__(self, text, filename=None, lineno=-1): |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
232 self.filename = filename |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
233 self.lineno = lineno |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
234 self.tokens = filter(None, [a or b or c or d for a, b, c, d in |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
235 self._tokenize(text)]) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
236 self.pos = 0 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
237 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
238 # Tokenizer |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
239 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
240 at_end = property(lambda self: self.pos == len(self.tokens) - 1) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
241 cur_token = property(lambda self: self.tokens[self.pos]) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
242 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
243 def next_token(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
244 self.pos += 1 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
245 return self.tokens[self.pos] |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
246 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
247 def peek_token(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
248 if not self.at_end: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
249 return self.tokens[self.pos + 1] |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
250 return None |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
251 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
252 # Recursive descent parser |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
253 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
254 def parse(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
255 """Parses the XPath expression and returns a list of location path |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
256 tests. |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
257 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
258 For union expressions (such as `*|text()`), this function returns one |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
259 test for each operand in the union. For patch expressions that don't |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
260 use the union operator, the function always returns a list of size 1. |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
261 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
262 Each path test in turn is a sequence of tests that correspond to the |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
263 location steps, each tuples of the form `(axis, testfunc, predicates)` |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
264 """ |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
265 paths = [self._location_path()] |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
266 while self.cur_token == '|': |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
267 self.next_token() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
268 paths.append(self._location_path()) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
269 if not self.at_end: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
270 raise PathSyntaxError('Unexpected token %r after end of expression' |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
271 % self.cur_token, self.filename, self.lineno) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
272 return paths |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
273 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
274 def _location_path(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
275 steps = [] |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
276 while True: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
277 if self.cur_token == '//': |
137 | 278 steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
279 self.next_token() |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
280 elif self.cur_token == '/' and not steps: |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
281 raise PathSyntaxError('Absolute location paths not supported', |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
282 self.filename, self.lineno) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
283 |
137 | 284 axis, nodetest, predicates = self._location_step() |
285 if not axis: | |
145
56d534eb53f9
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
139
diff
changeset
|
286 axis = CHILD |
137 | 287 steps.append((axis, nodetest, predicates)) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
288 |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
289 if self.at_end or not self.cur_token.startswith('/'): |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
290 break |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
291 self.next_token() |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
292 |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
293 return steps |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
294 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
295 def _location_step(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
296 if self.cur_token == '@': |
114 | 297 axis = ATTRIBUTE |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
298 self.next_token() |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
299 elif self.cur_token == '.': |
114 | 300 axis = SELF |
137 | 301 elif self.cur_token == '..': |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
302 raise PathSyntaxError('Unsupported axis "parent"', self.filename, |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
303 self.lineno) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
304 elif self.peek_token() == '::': |
114 | 305 axis = Axis.forname(self.cur_token) |
306 if axis is None: | |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
307 raise PathSyntaxError('Unsupport axis "%s"' % axis, |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
308 self.filename, self.lineno) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
309 self.next_token() |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
310 self.next_token() |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
311 else: |
137 | 312 axis = None |
313 nodetest = self._node_test(axis or CHILD) | |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
314 predicates = [] |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
315 while self.cur_token == '[': |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
316 predicates.append(self._predicate()) |
137 | 317 return axis, nodetest, predicates |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
318 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
319 def _node_test(self, axis=None): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
320 test = None |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
321 if self.peek_token() in ('(', '()'): # Node type test |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
322 test = self._node_type() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
323 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
324 else: # Name test |
137 | 325 if self.cur_token == '*': |
326 test = PrincipalTypeTest(axis) | |
327 elif self.cur_token == '.': | |
328 test = NodeTest() | |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
329 else: |
137 | 330 test = LocalNameTest(axis, self.cur_token) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
331 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
332 if not self.at_end: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
333 self.next_token() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
334 return test |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
335 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
336 def _node_type(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
337 name = self.cur_token |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
338 self.next_token() |
137 | 339 |
340 args = [] | |
341 if self.cur_token != '()': | |
342 # The processing-instruction() function optionally accepts the | |
343 # name of the PI as argument, which must be a literal string | |
344 self.next_token() # ( | |
345 if self.cur_token != ')': | |
346 string = self.cur_token | |
347 if (string[0], string[-1]) in self._QUOTES: | |
348 string = string[1:-1] | |
349 args.append(string) | |
350 | |
351 cls = _nodetest_map.get(name) | |
352 if not cls: | |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
353 raise PathSyntaxError('%s() not allowed here' % name, self.filename, |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
354 self.lineno) |
137 | 355 return cls(*args) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
356 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
357 def _predicate(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
358 assert self.cur_token == '[' |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
359 self.next_token() |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
360 expr = self._or_expr() |
121
22a7080ed242
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
361 if self.cur_token != ']': |
22a7080ed242
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
362 raise PathSyntaxError('Expected "]" to close predicate, ' |
139
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
363 'but found "%s"' % self.cur_token, |
54131cbb91a5
Implement position reporting for XPath syntax errors. Closes #20.
cmlenz
parents:
137
diff
changeset
|
364 self.filename, self.lineno) |
111
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
365 if not self.at_end: |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
366 self.next_token() |
8a4d9064f363
Some fixes and more unit tests for the XPath engine.
cmlenz
parents:
106
diff
changeset
|
367 return expr |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
368 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
369 def _or_expr(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
370 expr = self._and_expr() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
371 while self.cur_token == 'or': |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
372 self.next_token() |
137 | 373 expr = OrOperator(expr, self._and_expr()) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
374 return expr |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
375 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
376 def _and_expr(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
377 expr = self._equality_expr() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
378 while self.cur_token == 'and': |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
379 self.next_token() |
137 | 380 expr = AndOperator(expr, self._equality_expr()) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
381 return expr |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
382 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
383 def _equality_expr(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
384 expr = self._primary_expr() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
385 while self.cur_token in ('=', '!='): |
137 | 386 op = _operator_map.get(self.cur_token) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
387 self.next_token() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
388 expr = op(expr, self._primary_expr()) |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
389 return expr |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
390 |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
391 def _primary_expr(self): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
392 token = self.cur_token |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
393 if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
394 self.next_token() |
137 | 395 return StringLiteral(token[1:-1]) |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
396 elif token[0].isdigit(): |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
397 self.next_token() |
137 | 398 return NumberLiteral(float(token)) |
121
22a7080ed242
Added support for the XPath functions `name()`, `namespace-uri()`, `local-name()`, and `not()`.
cmlenz
parents:
114
diff
changeset
|
399 elif not self.at_end and self.peek_token().startswith('('): |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
400 return self._function_call() |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
401 else: |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
402 axis = None |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
403 if token == '@': |
114 | 404 axis = ATTRIBUTE |
106
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
405 self.next_token() |
61fa4cadb766
Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents:
77
diff
changeset
|
406 return self._node_test(axis) |
137 | 407 |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
408 def _function_call(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
409 name = self.cur_token |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
410 if self.next_token() == '()': |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
411 args = [] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
412 else: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
413 assert self.cur_token == '(' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
414 self.next_token() |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
415 args = [self._or_expr()] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
416 while self.cur_token == ',': |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
417 self.next_token() |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
418 args.append(self._or_expr()) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
419 if not self.cur_token == ')': |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
420 raise PathSyntaxError('Expected ")" to close function argument ' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
421 'list, but found "%s"' % self.cur_token, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
422 self.filename, self.lineno) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
423 self.next_token() |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
424 cls = _function_map.get(name) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
425 if not cls: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
426 raise PathSyntaxError('Unsupported function "%s"' % name, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
427 self.filename, self.lineno) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
428 return cls(*args) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
429 |
137 | 430 |
431 # Node tests | |
432 | |
433 class PrincipalTypeTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
434 """Node test that matches any event with the given principal type.""" |
137 | 435 __slots__ = ['principal_type'] |
436 def __init__(self, principal_type): | |
437 self.principal_type = principal_type | |
438 def __call__(self, kind, data, pos): | |
439 if kind is START: | |
440 if self.principal_type is ATTRIBUTE: | |
441 return data[1] or None | |
442 else: | |
443 return True | |
444 def __repr__(self): | |
445 return '*' | |
446 | |
447 class LocalNameTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
448 """Node test that matches any event with the given prinipal type and |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
449 local name. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
450 """ |
137 | 451 __slots__ = ['principal_type', 'name'] |
452 def __init__(self, principal_type, name): | |
453 self.principal_type = principal_type | |
454 self.name = name | |
455 def __call__(self, kind, data, pos): | |
456 if kind is START: | |
457 if self.principal_type is ATTRIBUTE and self.name in data[1]: | |
458 return TEXT, data[1].get(self.name), pos | |
459 else: | |
460 return data[0].localname == self.name | |
461 def __repr__(self): | |
462 return self.name | |
463 | |
464 class CommentNodeTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
465 """Node test that matches any comment events.""" |
137 | 466 __slots__ = [] |
467 def __call__(self, kind, data, pos): | |
468 return kind is COMMENT and (kind, data, pos) | |
469 def __repr__(self): | |
470 return 'comment()' | |
471 | |
472 class NodeTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
473 """Node test that matches any node.""" |
137 | 474 __slots__ = [] |
475 def __call__(self, kind, data, pos): | |
476 if kind is START: | |
477 return True | |
478 return kind, data, pos | |
479 def __repr__(self): | |
480 return 'node()' | |
481 | |
482 class ProcessingInstructionNodeTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
483 """Node test that matches any processing instruction event.""" |
137 | 484 __slots__ = ['target'] |
485 def __init__(self, target=None): | |
486 self.target = target | |
487 def __call__(self, kind, data, pos): | |
488 if kind is PI and (not self.target or data[0] == self.target): | |
489 return (kind, data, pos) | |
490 def __repr__(self): | |
491 arg = '' | |
492 if self.target: | |
493 arg = '"' + self.target + '"' | |
494 return 'processing-instruction(%s)' % arg | |
495 | |
496 class TextNodeTest(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
497 """Node test that matches any text event.""" |
137 | 498 __slots__ = [] |
499 def __call__(self, kind, data, pos): | |
500 return kind is TEXT and (kind, data, pos) | |
501 def __repr__(self): | |
502 return 'text()' | |
503 | |
504 _nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, | |
505 'processing-instruction': ProcessingInstructionNodeTest, | |
506 'text': TextNodeTest} | |
507 | |
508 # Functions | |
509 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
510 class Function(object): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
511 """Base class for function nodes in XPath expressions.""" |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
512 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
513 class BooleanFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
514 """The `boolean` function, which converts its argument to a boolean |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
515 value. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
516 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
517 __slots__ = ['expr'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
518 def __init__(self, expr): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
519 self.expr = expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
520 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
521 val = self.expr(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
522 if type(val) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
523 val = val[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
524 return bool(val) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
525 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
526 return 'boolean(%r)' % self.expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
527 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
528 class CeilingFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
529 """The `ceiling` function, which returns the nearest lower integer number |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
530 for the given number. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
531 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
532 __slots__ = ['number'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
533 def __init__(self, number): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
534 self.number = number |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
535 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
536 number = self.number(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
537 if type(number) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
538 number = number[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
539 return ceil(float(number)) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
540 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
541 return 'ceiling(%r)' % self.number |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
542 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
543 class ConcatFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
544 """The `concat` function, which concatenates (joins) the variable number of |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
545 strings it gets as arguments. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
546 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
547 __slots__ = ['exprs'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
548 def __init__(self, *exprs): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
549 self.exprs = exprs |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
550 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
551 strings = [] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
552 for item in [expr(kind, data, pos) for expr in self.exprs]: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
553 if type(item) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
554 assert item[0] is TEXT |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
555 item = item[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
556 strings.append(item) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
557 return u''.join(strings) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
558 def __repr__(self): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
559 return 'concat(%s)' % [repr(expr for expr in self.exprs)] |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
560 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
561 class ContainsFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
562 """The `contains` function, which returns whether a string contains a given |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
563 substring. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
564 """ |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
565 __slots__ = ['string1', 'string2'] |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
566 def __init__(self, string1, string2): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
567 self.string1 = string1 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
568 self.string2 = string2 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
569 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
570 string1 = self.string1(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
571 if type(string1) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
572 string1 = string1[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
573 string2 = self.string2(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
574 if type(string2) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
575 string2 = string2[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
576 return string2 in string1 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
577 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
578 return 'contains(%r, %r)' % (self.string1, self.string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
579 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
580 class FalseFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
581 """The `false` function, which always returns the boolean `false` value.""" |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
582 __slots__ = [] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
583 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
584 return False |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
585 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
586 return 'false()' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
587 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
588 class FloorFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
589 """The `ceiling` function, which returns the nearest higher integer number |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
590 for the given number. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
591 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
592 __slots__ = ['number'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
593 def __init__(self, number): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
594 self.number = number |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
595 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
596 number = self.number(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
597 if type(number) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
598 number = number[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
599 return floor(float(number)) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
600 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
601 return 'floor(%r)' % self.number |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
602 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
603 class LocalNameFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
604 """The `local-name` function, which returns the local name of the current |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
605 element. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
606 """ |
137 | 607 __slots__ = [] |
608 def __call__(self, kind, data, pos): | |
609 if kind is START: | |
610 return TEXT, data[0].localname, pos | |
611 def __repr__(self): | |
612 return 'local-name()' | |
613 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
614 class NameFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
615 """The `name` function, which returns the qualified name of the current |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
616 element. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
617 """ |
137 | 618 __slots__ = [] |
619 def __call__(self, kind, data, pos): | |
620 if kind is START: | |
621 return TEXT, data[0], pos | |
622 def __repr__(self): | |
623 return 'name()' | |
624 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
625 class NamespaceUriFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
626 """The `namespace-uri` function, which returns the namespace URI of the |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
627 current element. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
628 """ |
137 | 629 __slots__ = [] |
630 def __call__(self, kind, data, pos): | |
631 if kind is START: | |
632 return TEXT, data[0].namespace, pos | |
633 def __repr__(self): | |
634 return 'namespace-uri()' | |
635 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
636 class NotFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
637 """The `not` function, which returns the negated boolean value of its |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
638 argument. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
639 """ |
137 | 640 __slots__ = ['expr'] |
641 def __init__(self, expr): | |
642 self.expr = expr | |
643 def __call__(self, kind, data, pos): | |
644 return not self.expr(kind, data, pos) | |
645 def __repr__(self): | |
646 return 'not(%s)' % self.expr | |
647 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
648 class NormalizeSpaceFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
649 """The `normalize-space` function, which removes leading and trailing |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
650 whitespace in the given string, and replaces multiple adjacent whitespace |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
651 characters inside the string with a single space. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
652 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
653 __slots__ = ['expr'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
654 _normalize = re.compile(r'\s{2,}').sub |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
655 def __init__(self, expr): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
656 self.expr = expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
657 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
658 string = self.expr(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
659 if type(string) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
660 string = string[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
661 return self._normalize(' ', string.strip()) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
662 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
663 return 'normalize-space(%s)' % repr(self.expr) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
664 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
665 class NumberFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
666 """The `number` function that converts its argument to a number.""" |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
667 __slots__ = ['expr'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
668 def __init__(self, expr): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
669 self.expr = expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
670 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
671 val = self.expr(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
672 if type(val) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
673 val = val[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
674 return float(val) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
675 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
676 return 'number(%r)' % self.expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
677 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
678 class StartsWithFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
679 """The `starts-with` function that returns whether one string starts with |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
680 a given substring. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
681 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
682 __slots__ = ['string1', 'string2'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
683 def __init__(self, string1, string2): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
684 self.string1 = string2 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
685 self.string2 = string2 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
686 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
687 string1 = self.string1(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
688 if type(string1) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
689 string1 = string1[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
690 string2 = self.string2(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
691 if type(string2) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
692 string2 = string2[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
693 return string1.startswith(string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
694 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
695 return 'starts-with(%r, %r)' % (self.string1, self.string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
696 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
697 class StringLengthFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
698 """The `string-length` function that returns the length of the given |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
699 string. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
700 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
701 __slots__ = ['expr'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
702 def __init__(self, expr): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
703 self.expr = expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
704 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
705 string = self.expr(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
706 if type(string) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
707 string = string[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
708 return len(string) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
709 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
710 return 'string-length(%r)' % self.expr |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
711 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
712 class SubstringFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
713 """The `substring` function that returns the part of a string that starts |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
714 at the given offset, and optionally limited to the given length. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
715 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
716 __slots__ = ['string', 'start', 'length'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
717 def __init__(self, string, start, length=None): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
718 self.string = string |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
719 self.start = start |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
720 self.length = length |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
721 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
722 string = self.string(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
723 if type(string) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
724 string = string[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
725 start = self.start(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
726 if type(start) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
727 start = start[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
728 length = 0 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
729 if self.length is not None: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
730 length = self.length(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
731 if type(length) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
732 length = length[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
733 return string[int(start):len(string) - int(length)] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
734 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
735 if self.length is not None: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
736 return 'substring(%r, %r, %r)' % (self.string, self.start, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
737 self.length) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
738 else: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
739 return 'substring(%r, %r)' % (self.string, self.start) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
740 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
741 class SubstringAfterFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
742 """The `substring-after` function that returns the part of a string that |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
743 is found after the given substring. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
744 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
745 __slots__ = ['string1', 'string2'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
746 def __init__(self, string1, string2): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
747 self.string1 = string1 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
748 self.string2 = string2 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
749 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
750 string1 = self.string1(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
751 if type(string1) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
752 string1 = string1[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
753 string2 = self.string2(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
754 if type(string2) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
755 string2 = string2[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
756 index = string1.find(string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
757 if index >= 0: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
758 return string1[index + len(string2):] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
759 return u'' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
760 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
761 return 'substring-after(%r, %r)' % (self.string1, self.string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
762 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
763 class SubstringBeforeFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
764 """The `substring-before` function that returns the part of a string that |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
765 is found before the given substring. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
766 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
767 __slots__ = ['string1', 'string2'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
768 def __init__(self, string1, string2): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
769 self.string1 = string1 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
770 self.string2 = string2 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
771 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
772 string1 = self.string1(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
773 if type(string1) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
774 string1 = string1[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
775 string2 = self.string2(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
776 if type(string2) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
777 string2 = string2[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
778 index = string1.find(string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
779 if index >= 0: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
780 return string1[:index] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
781 return u'' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
782 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
783 return 'substring-after(%r, %r)' % (self.string1, self.string2) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
784 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
785 class TranslateFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
786 """The `translate` function that translates a set of characters in a |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
787 string to target set of characters. |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
788 """ |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
789 __slots__ = ['string', 'fromchars', 'tochars'] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
790 def __init__(self, string, fromchars, tochars): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
791 self.string = string |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
792 self.fromchars = fromchars |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
793 self.tochars = tochars |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
794 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
795 string = self.string(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
796 if type(string) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
797 string = string[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
798 fromchars = self.fromchars(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
799 if type(fromchars) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
800 fromchars = fromchars[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
801 tochars = self.tochars(kind, data, pos) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
802 if type(tochars) is tuple: |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
803 tochars = tochars[1] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
804 table = dict(zip([ord(c) for c in fromchars], |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
805 [ord(c) for c in tochars])) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
806 return string.translate(table) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
807 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
808 return 'translate(%r, %r, %r)' % (self.string, self.fromchars, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
809 self.tochars) |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
810 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
811 class TrueFunction(Function): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
812 """The `true` function, which always returns the boolean `true` value.""" |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
813 __slots__ = [] |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
814 def __call__(self, kind, data, pos): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
815 return True |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
816 def __repr__(self): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
817 return 'true()' |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
818 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
819 _function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
820 'concat': ConcatFunction, 'contains': ContainsFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
821 'false': FalseFunction, 'floor': FloorFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
822 'local-name': LocalNameFunction, 'name': NameFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
823 'namespace-uri': NamespaceUriFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
824 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
825 'number': NumberFunction, 'starts-with': StartsWithFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
826 'string-length': StringLengthFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
827 'substring': SubstringFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
828 'substring-after': SubstringAfterFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
829 'substring-before': SubstringBeforeFunction, |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
830 'translate': TranslateFunction, 'true': TrueFunction} |
137 | 831 |
832 # Literals | |
833 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
834 class Literal(object): |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
835 """Abstract base class for literal nodes.""" |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
836 |
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
837 class StringLiteral(Literal): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
838 """A string literal node.""" |
137 | 839 __slots__ = ['text'] |
840 def __init__(self, text): | |
841 self.text = text | |
842 def __call__(self, kind, data, pos): | |
843 return TEXT, self.text, (None, -1, -1) | |
844 def __repr__(self): | |
845 return '"%s"' % self.text | |
846 | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
847 class NumberLiteral(Literal): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
848 """A number literal node.""" |
137 | 849 __slots__ = ['number'] |
850 def __init__(self, number): | |
851 self.number = number | |
852 def __call__(self, kind, data, pos): | |
155
50d4b08017df
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
cmlenz
parents:
145
diff
changeset
|
853 return TEXT, self.number, (None, -1, -1) |
137 | 854 def __repr__(self): |
855 return str(self.number) | |
856 | |
857 # Operators | |
858 | |
859 class AndOperator(object): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
860 """The boolean operator `and`.""" |
137 | 861 __slots__ = ['lval', 'rval'] |
862 def __init__(self, lval, rval): | |
863 self.lval = lval | |
864 self.rval = rval | |
865 def __call__(self, kind, data, pos): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
866 lval = self.lval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
867 if type(lval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
868 lval = lval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
869 if not lval: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
870 return False |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
871 rval = self.rval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
872 if type(rval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
873 rval = rval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
874 return bool(rval) |
137 | 875 def __repr__(self): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
876 return '%s and %s' % (self.lval, self.rval) |
137 | 877 |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
878 class EqualsOperator(object): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
879 """The equality operator `=`.""" |
137 | 880 __slots__ = ['lval', 'rval'] |
881 def __init__(self, lval, rval): | |
882 self.lval = lval | |
883 self.rval = rval | |
884 def __call__(self, kind, data, pos): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
885 lval = self.lval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
886 if type(lval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
887 lval = lval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
888 rval = self.rval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
889 if type(rval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
890 rval = rval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
891 return lval == rval |
137 | 892 def __repr__(self): |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
893 return '%s=%s' % (self.lval, self.rval) |
137 | 894 |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
895 class NotEqualsOperator(object): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
896 """The equality operator `!=`.""" |
137 | 897 __slots__ = ['lval', 'rval'] |
898 def __init__(self, lval, rval): | |
899 self.lval = lval | |
900 self.rval = rval | |
901 def __call__(self, kind, data, pos): | |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
902 lval = self.lval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
903 if type(lval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
904 lval = lval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
905 rval = self.rval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
906 if type(rval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
907 rval = rval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
908 return lval != rval |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
909 def __repr__(self): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
910 return '%s!=%s' % (self.lval, self.rval) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
911 |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
912 class OrOperator(object): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
913 """The boolean operator `or`.""" |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
914 __slots__ = ['lval', 'rval'] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
915 def __init__(self, lval, rval): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
916 self.lval = lval |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
917 self.rval = rval |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
918 def __call__(self, kind, data, pos): |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
919 lval = self.lval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
920 if type(lval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
921 lval = lval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
922 if lval: |
137 | 923 return True |
161
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
924 rval = self.rval(kind, data, pos) |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
925 if type(rval) is tuple: |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
926 rval = rval[1] |
a25f9fc5787d
Various docstring additions and other cosmetic changes.
cmlenz
parents:
156
diff
changeset
|
927 return bool(rval) |
137 | 928 def __repr__(self): |
929 return '%s or %s' % (self.lval, self.rval) | |
930 | |
931 _operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator} |