annotate markup/path.py @ 106:61fa4cadb766

Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
author cmlenz
date Fri, 28 Jul 2006 16:51:35 +0000
parents f1aa49c759b2
children 8a4d9064f363
rev   line source
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
2 #
66
822089ae65ce Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 61
diff changeset
3 # Copyright (C) 2006 Edgewall Software
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
5 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
66
822089ae65ce Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 61
diff changeset
8 # are also available at http://markup.edgewall.org/wiki/License.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
9 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
66
822089ae65ce Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 61
diff changeset
12 # history and logs, available at http://markup.edgewall.org/log/.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
13
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
14 """Basic support for evaluating XPath expressions against streams."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
15
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
16 import re
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
17
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
18 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
19
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
20 __all__ = ['Path', 'PathSyntaxError']
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
21
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
22
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
23 class Path(object):
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
24 """Implements basic XPath support on streams.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
25
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
26 Instances of this class represent a "compiled" XPath expression, and provide
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
27 methods for testing the path against a stream, as well as extracting a
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
28 substream matching that path.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
29 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
30
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
31 def __init__(self, text):
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
32 """Create the path object from a string.
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
33
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
34 @param text: the path expression
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
35 """
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
36 self.source = text
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
37 self.paths = _PathParser(text).parse()
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
38
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
39 def __repr__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
40 return '<%s "%s">' % (self.__class__.__name__, self.source)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
41
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
42 def select(self, stream):
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
43 """Returns a substream of the given stream that matches the path.
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
44
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
45 If there are no matches, this method returns an empty stream.
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
46
33
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
47 >>> from markup.input import XML
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
48 >>> xml = XML('<root><elem><child>Text</child></elem></root>')
61
33c2702cf6da Use a different namespace than Kid uses.
cmlenz
parents: 38
diff changeset
49
33
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
50 >>> print Path('child').select(xml)
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
51 <child>Text</child>
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
52
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
53 >>> print Path('child/text()').select(xml)
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
54 Text
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
55
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
56 @param stream: the stream to select from
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
57 @return: the substream matching the path, or an empty stream
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
58 """
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
59 stream = iter(stream)
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
60 def _generate():
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
61 test = self.test()
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
62 for kind, data, pos in stream:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
63 result = test(kind, data, pos)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
64 if result is True:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
65 yield kind, data, pos
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
66 depth = 1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
67 while depth > 0:
73
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
68 subkind, subdata, subpos = stream.next()
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
69 if subkind is START:
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
70 depth += 1
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
71 elif subkind is END:
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
72 depth -= 1
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
73 yield subkind, subdata, subpos
b0fd16111f2e Some more performance tweaks.
cmlenz
parents: 70
diff changeset
74 test(subkind, subdata, subpos)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
75 elif result:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
76 yield result
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
77 return Stream(_generate())
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
78
38
fec9f4897415 Fix for #2 (incorrect context node in path expressions). Still some paths that produce incorrect results, but the common case seems to work now.
cmlenz
parents: 37
diff changeset
79 def test(self, ignore_context=False):
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
80 """Returns a function that can be used to track whether the path matches
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
81 a specific stream event.
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
82
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
83 The function returned expects the positional arguments `kind`, `data`,
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
84 and `pos`, i.e. basically an unpacked stream event. If the path matches
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
85 the event, the function returns the match (for example, a `START` or
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
86 `TEXT` event.) Otherwise, it returns `None`.
33
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
87
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
88 >>> from markup.input import XML
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
89 >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>')
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
90 >>> test = Path('child').test()
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
91 >>> for kind, data, pos in xml:
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
92 ... if test(kind, data, pos):
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
93 ... print kind, data
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
94 START (u'child', [(u'id', u'1')])
0e1fc0211416 Add doctests for path module.
cmlenz
parents: 27
diff changeset
95 START (u'child', [(u'id', u'2')])
26
039fc5b87405 * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 25
diff changeset
96 """
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
97 paths = [(idx, steps, len(steps), [0])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
98 for idx, steps in enumerate(self.paths)]
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
99
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
100 def _test(kind, data, pos):
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
101 for idx, steps, size, stack in paths:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
102 if not stack:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
103 continue
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
104 cursor = stack[-1]
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
105
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
106 if kind is END:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
107 stack.pop()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
108 continue
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
109
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
110 elif kind is START:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
111 stack.append(cursor)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
112
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
113 matched = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
114 closure, node_test, predicates = steps[cursor]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
115
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
116 matched = node_test(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
117 if matched and predicates:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
118 for predicate in predicates:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
119 if not predicate(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
120 matched = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
121 break
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
122
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
123 if matched:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
124 if cursor + 1 == size: # the last location step
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
125 if ignore_context or len(stack) > 2 \
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
126 or node_test.axis != 'child':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
127 return matched
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
128 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
129 stack[-1] += 1
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
130
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
131 elif kind is START and not closure:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
132 # If this step is not a closure, it cannot be matched until
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
133 # the current element is closed... so we need to move the
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
134 # cursor back to the last closure and retest that against
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
135 # the current element
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
136 closures = [step for step in steps[:cursor] if step[0]]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
137 closures.reverse()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
138 for closure, node_test, predicates in closures:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
139 cursor -= 1
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
140 if closure:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
141 matched = node_test(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
142 if matched:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
143 cursor += 1
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
144 break
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
145 stack[-1] = cursor
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
146
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
147 return None
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
148
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
149 return _test
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
150
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
151
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
152 def _node_test_current_element():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
153 def _node_test_current_element(kind, *_):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
154 return kind is START
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
155 _node_test_current_element.axis = 'self'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
156 return _node_test_current_element
77
f1aa49c759b2 * Simplify implementation of the individual XPath tests (use closures instead of callable classes)
cmlenz
parents: 73
diff changeset
157
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
158 def _node_test_any_child_element():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
159 def _node_test_any_child_element(kind, *_):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
160 return kind is START
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
161 _node_test_any_child_element.axis = 'child'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
162 return _node_test_any_child_element
77
f1aa49c759b2 * Simplify implementation of the individual XPath tests (use closures instead of callable classes)
cmlenz
parents: 73
diff changeset
163
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
164 def _node_test_child_element_by_name(name):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
165 def _node_test_child_element_by_name(kind, data, _):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
166 return kind is START and data[0].localname == name
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
167 _node_test_child_element_by_name.axis = 'child'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
168 return _node_test_child_element_by_name
77
f1aa49c759b2 * Simplify implementation of the individual XPath tests (use closures instead of callable classes)
cmlenz
parents: 73
diff changeset
169
106
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
170 def _node_test_any_attribute():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
171 def _node_test_any_attribute(kind, data, _):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
172 if kind is START and data[1]:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
173 return data[1]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
174 _node_test_any_attribute.axis = 'attribute'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
175 return _node_test_any_attribute
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
176
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
177 def _node_test_attribute_by_name(name):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
178 def _node_test_attribute_by_name(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
179 if kind is START and name in data[1]:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
180 return TEXT, data[1].get(name), pos
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
181 _node_test_attribute_by_name.axis = 'attribute'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
182 return _node_test_attribute_by_name
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
183
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
184 def _function_comment():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
185 def _function_comment(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
186 return kind is COMMENT and (kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
187 _function_comment.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
188 return _function_comment
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
189
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
190 def _function_node():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
191 def _function_node(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
192 return True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
193 _function_node.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
194 return _function_node
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
195
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
196 def _function_processing_instruction(name=None):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
197 def _function_processing_instruction(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
198 if kind is PI and (not name or data[0] == name):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
199 return (kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
200 _function_processing_instruction.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
201 return _function_processing_instruction
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
202
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
203 def _function_text():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
204 def _function_text(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
205 return kind is TEXT and (kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
206 _function_text.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
207 return _function_text
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
208
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
209 def _literal_string(text):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
210 def _literal_string(*_):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
211 return TEXT, text, (None, -1, -1)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
212 _literal_string.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
213 return _literal_string
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
214
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
215 def _operator_eq(lval, rval):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
216 def _operator_eq(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
217 lv = lval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
218 rv = rval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
219 return (lv and lv[1]) == (rv and rv[1])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
220 _operator_eq.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
221 return _operator_eq
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
222
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
223 def _operator_neq(lval, rval):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
224 def _operator_neq(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
225 lv = lval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
226 rv = rval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
227 return (lv and lv[1]) != (rv and rv[1])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
228 _operator_neq.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
229 return _operator_neq
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
230
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
231 def _operator_and(lval, rval):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
232 def _operator_and(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
233 lv = lval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
234 if not lv or (lv is not True and not lv[1]):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
235 return False
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
236 rv = rval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
237 if not rv or (rv is not True and not rv[1]):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
238 return False
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
239 return True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
240 _operator_and.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
241 return _operator_and
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
242
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
243 def _operator_or(lval, rval):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
244 def _operator_or(kind, data, pos):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
245 lv = lval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
246 if lv and (lv is True or lv[1]):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
247 return True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
248 rv = rval(kind, data, pos)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
249 if rv and (rv is True or rv[1]):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
250 return True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
251 return False
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
252 _operator_or.axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
253 return _operator_or
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
254
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
255
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
256 class PathSyntaxError(Exception):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
257 """Exception raised when an XPath expression is syntactically incorrect."""
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
258
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
259 def __init__(self, message, filename=None, lineno=-1, offset=-1):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
260 if filename:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
261 message = '%s (%s, line %d)' % (message, filename, lineno)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
262 Exception.__init__(self, message)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
263 self.filename = filename
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
264 self.lineno = lineno
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
265 self.offset = offset
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
266
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
267
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
268 class _PathParser(object):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
269 """Tokenizes and parses an XPath expression."""
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
270
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
271 _QUOTES = (("'", "'"), ('"', '"'))
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
272 _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
273 '=', '!=', '!', '|')
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
274 _tokenize = re.compile('(%s)|([^%s\s]+)|\s+' % (
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
275 '|'.join([re.escape(t) for t in _TOKENS]),
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
276 ''.join([re.escape(t[0]) for t in _TOKENS]))).findall
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
277
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
278 def __init__(self, text):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
279 self.tokens = filter(None, [a or b for a, b in self._tokenize(text)])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
280 self.pos = 0
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
281
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
282 # Tokenizer
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
283
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
284 at_end = property(lambda self: self.pos == len(self.tokens) - 1)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
285 cur_token = property(lambda self: self.tokens[self.pos])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
286
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
287 def next_token(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
288 self.pos += 1
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
289 return self.tokens[self.pos]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
290
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
291 def peek_token(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
292 if not self.at_end:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
293 return self.tokens[self.pos + 1]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
294 return None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
295
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
296 # Recursive descent parser
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
297
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
298 def parse(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
299 """Parses the XPath expression and returns a list of location path
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
300 tests.
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
301
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
302 For union expressions (such as `*|text()`), this function returns one
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
303 test for each operand in the union. For patch expressions that don't
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
304 use the union operator, the function always returns a list of size 1.
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
305
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
306 Each path test in turn is a sequence of tests that correspond to the
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
307 location steps, each tuples of the form `(closure, testfunc, predicates)`
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
308 """
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
309 paths = [self._location_path()]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
310 while self.cur_token == '|':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
311 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
312 paths.append(self._location_path())
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
313 if not self.at_end:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
314 raise PathSyntaxError('Unexpected token %r after end of expression'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
315 % self.cur_token)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
316 return paths
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
317
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
318 def _location_path(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
319 next_is_closure = True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
320 if self.cur_token.startswith('/'):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
321 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
322
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
323 steps = []
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
324 while True:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
325 step = self._location_step()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
326 steps.append((next_is_closure, step[1], step[2]))
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
327 next_is_closure = False
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
328 if self.cur_token == '//':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
329 next_is_closure = True
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
330 elif self.at_end or self.cur_token != '/':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
331 break
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
332 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
333 return steps
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
334
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
335 def _location_step(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
336 step = [False, None, []]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
337 if self.cur_token == '@':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
338 axis = 'attribute'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
339 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
340 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
341 # FIXME: support full axis specifiers (name followed by ::)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
342 axis = 'child'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
343 step[1] = self._node_test(axis)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
344 while self.cur_token == '[':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
345 step[2].append(self._predicate())
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
346 return step
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
347
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
348 def _node_test(self, axis=None):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
349 test = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
350 if self.peek_token() in ('(', '()'): # Node type test
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
351 test = self._node_type()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
352
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
353 else: # Name test
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
354 if axis == 'attribute':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
355 if self.cur_token == '*':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
356 test = _node_test_any_attribute()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
357 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
358 test = _node_test_attribute_by_name(self.cur_token)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
359 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
360 if self.cur_token == '.':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
361 test = _node_test_current_element()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
362 elif self.cur_token == '*':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
363 test = _node_test_any_child_element()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
364 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
365 test = _node_test_child_element_by_name(self.cur_token)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
366
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
367 if not self.at_end:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
368 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
369 return test
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
370
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
371 def _node_type(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
372 name = self.cur_token
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
373 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
374 if name == 'comment':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
375 return _function_comment()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
376 elif name == 'node':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
377 return _function_node()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
378 elif name == 'processing-instruction':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
379 args = []
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
380 if self.cur_token != '()':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
381 # The processing-instruction() function optionally accepts the
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
382 # name of the PI as argument, which must be a literal string
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
383 self.next_token() # (
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
384 if self.cur_token != ')':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
385 string = self.cur_token
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
386 if (string[0], string[-1]) in self._QUOTES:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
387 string = string[1:-1]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
388 args.append(string)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
389 return _function_processing_instruction(*args)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
390 elif name == 'text':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
391 return _function_text()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
392 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
393 raise PathSyntaxError('%s() not allowed here' % name)
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
394
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
395 def _predicate(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
396 assert self.cur_token == '['
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
397 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
398 return self._or_expr()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
399 assert self.cur_token == ']'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
400 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
401
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
402 def _or_expr(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
403 expr = self._and_expr()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
404 while self.cur_token == 'or':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
405 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
406 expr = _operator_or(expr, self._and_expr())
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
407 return expr
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
408
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
409 def _and_expr(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
410 expr = self._equality_expr()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
411 while self.cur_token == 'and':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
412 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
413 expr = _operator_and(expr, self._equality_expr())
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
414 return expr
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
415
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
416 def _equality_expr(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
417 expr = self._primary_expr()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
418 while self.cur_token in ('=', '!='):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
419 op = {'=': _operator_eq, '!=': _operator_neq}[self.cur_token]
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
420 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
421 expr = op(expr, self._primary_expr())
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
422 return expr
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
423
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
424 def _primary_expr(self):
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
425 token = self.cur_token
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
426 if len(token) > 1 and (token[0], token[-1]) in self._QUOTES:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
427 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
428 return _literal_string(token[1:-1])
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
429 elif token[0].isdigit():
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
430 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
431 return _literal_number(float(token))
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
432 else:
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
433 axis = None
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
434 if token == '@':
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
435 axis = 'attribute'
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
436 self.next_token()
61fa4cadb766 Complete rewrite of the XPath parsing, which was a mess before. Closes #19.
cmlenz
parents: 77
diff changeset
437 return self._node_test(axis)
Copyright (C) 2012-2017 Edgewall Software