Mercurial > genshi > genshi-test
comparison markup/path.py @ 111:8a4d9064f363
Some fixes and more unit tests for the XPath engine.
author | cmlenz |
---|---|
date | Mon, 31 Jul 2006 17:25:43 +0000 |
parents | 61fa4cadb766 |
children | 8f53c3ad385c |
comparison
equal
deleted
inserted
replaced
110:44fbc30d78cd | 111:8a4d9064f363 |
---|---|
9 # | 9 # |
10 # This software consists of voluntary contributions made by many | 10 # This software consists of voluntary contributions made by many |
11 # individuals. For the exact contribution history, see the revision | 11 # individuals. For the exact contribution history, see the revision |
12 # history and logs, available at http://markup.edgewall.org/log/. | 12 # history and logs, available at http://markup.edgewall.org/log/. |
13 | 13 |
14 """Basic support for evaluating XPath expressions against streams.""" | 14 """Basic support for evaluating XPath expressions against streams. |
15 | |
16 >>> from markup.input import XML | |
17 >>> doc = XML('''<doc> | |
18 ... <items count="2"> | |
19 ... <item status="new"> | |
20 ... <summary>Foo</summary> | |
21 ... </item> | |
22 ... <item status="closed"> | |
23 ... <summary>Bar</summary> | |
24 ... </item> | |
25 ... </items> | |
26 ... </doc>''') | |
27 >>> print doc.select('items/item[@status="closed"]/summary/text()') | |
28 Bar | |
29 | |
30 Because the XPath engine operates on markup streams (as opposed to tree | |
31 structures), it only implements a subset of the full XPath 1.0 language. | |
32 """ | |
15 | 33 |
16 import re | 34 import re |
17 | 35 |
18 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI | 36 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI |
19 | 37 |
109 | 127 |
110 elif kind is START: | 128 elif kind is START: |
111 stack.append(cursor) | 129 stack.append(cursor) |
112 | 130 |
113 matched = None | 131 matched = None |
114 closure, node_test, predicates = steps[cursor] | 132 while 1: |
115 | 133 axis, node_test, predicates = steps[cursor] |
116 matched = node_test(kind, data, pos) | 134 |
117 if matched and predicates: | 135 matched = node_test(kind, data, pos) |
118 for predicate in predicates: | 136 if matched and predicates: |
119 if not predicate(kind, data, pos): | 137 for predicate in predicates: |
120 matched = None | 138 if not predicate(kind, data, pos): |
121 break | 139 matched = None |
122 | 140 break |
123 if matched: | 141 |
124 if cursor + 1 == size: # the last location step | 142 if matched: |
125 if ignore_context or len(stack) > 2 \ | 143 if cursor + 1 == size: # the last location step |
126 or node_test.axis != 'child': | 144 if ignore_context or \ |
127 return matched | 145 kind is not START or \ |
128 else: | 146 axis in ('attribute', 'self') or \ |
129 stack[-1] += 1 | 147 len(stack) > 2: |
130 | 148 return matched |
131 elif kind is START and not closure: | 149 else: |
150 cursor += 1 | |
151 stack[-1] = cursor | |
152 | |
153 if axis != 'self': | |
154 break | |
155 | |
156 if not matched and kind is START \ | |
157 and not axis.startswith('descendant'): | |
132 # If this step is not a closure, it cannot be matched until | 158 # If this step is not a closure, it cannot be matched until |
133 # the current element is closed... so we need to move the | 159 # the current element is closed... so we need to move the |
134 # cursor back to the last closure and retest that against | 160 # cursor back to the last closure and retest that against |
135 # the current element | 161 # the current element |
136 closures = [step for step in steps[:cursor] if step[0]] | 162 backsteps = [step for step in steps[:cursor] |
137 closures.reverse() | 163 if step[0].startswith('descendant')] |
138 for closure, node_test, predicates in closures: | 164 backsteps.reverse() |
139 cursor -= 1 | 165 for axis, node_test, predicates in backsteps: |
140 if closure: | 166 matched = node_test(kind, data, pos) |
141 matched = node_test(kind, data, pos) | 167 if not matched: |
142 if matched: | 168 cursor -= 1 |
143 cursor += 1 | 169 break |
144 break | |
145 stack[-1] = cursor | 170 stack[-1] = cursor |
146 | 171 |
147 return None | 172 return None |
148 | 173 |
149 return _test | 174 return _test |
187 _function_comment.axis = None | 212 _function_comment.axis = None |
188 return _function_comment | 213 return _function_comment |
189 | 214 |
190 def _function_node(): | 215 def _function_node(): |
191 def _function_node(kind, data, pos): | 216 def _function_node(kind, data, pos): |
192 return True | 217 if kind is START: |
218 return True | |
219 return kind, data, pos | |
193 _function_node.axis = None | 220 _function_node.axis = None |
194 return _function_node | 221 return _function_node |
195 | 222 |
196 def _function_processing_instruction(name=None): | 223 def _function_processing_instruction(name=None): |
197 def _function_processing_instruction(kind, data, pos): | 224 def _function_processing_instruction(kind, data, pos): |
302 For union expressions (such as `*|text()`), this function returns one | 329 For union expressions (such as `*|text()`), this function returns one |
303 test for each operand in the union. For patch expressions that don't | 330 test for each operand in the union. For patch expressions that don't |
304 use the union operator, the function always returns a list of size 1. | 331 use the union operator, the function always returns a list of size 1. |
305 | 332 |
306 Each path test in turn is a sequence of tests that correspond to the | 333 Each path test in turn is a sequence of tests that correspond to the |
307 location steps, each tuples of the form `(closure, testfunc, predicates)` | 334 location steps, each tuples of the form `(axis, testfunc, predicates)` |
308 """ | 335 """ |
309 paths = [self._location_path()] | 336 paths = [self._location_path()] |
310 while self.cur_token == '|': | 337 while self.cur_token == '|': |
311 self.next_token() | 338 self.next_token() |
312 paths.append(self._location_path()) | 339 paths.append(self._location_path()) |
315 % self.cur_token) | 342 % self.cur_token) |
316 return paths | 343 return paths |
317 | 344 |
318 def _location_path(self): | 345 def _location_path(self): |
319 next_is_closure = True | 346 next_is_closure = True |
320 if self.cur_token.startswith('/'): | |
321 self.next_token() | |
322 | |
323 steps = [] | 347 steps = [] |
324 while True: | 348 while True: |
325 step = self._location_step() | |
326 steps.append((next_is_closure, step[1], step[2])) | |
327 next_is_closure = False | |
328 if self.cur_token == '//': | 349 if self.cur_token == '//': |
329 next_is_closure = True | 350 next_is_closure = True |
330 elif self.at_end or self.cur_token != '/': | 351 self.next_token() |
352 elif self.cur_token == '/' and not steps: | |
353 raise PathSyntaxError('Absolute location paths not supported') | |
354 | |
355 axis, node_test, predicates = self._location_step() | |
356 if axis == 'child' and next_is_closure: | |
357 axis = 'descendant-or-self' | |
358 steps.append((axis, node_test, predicates)) | |
359 next_is_closure = False | |
360 | |
361 if self.at_end or not self.cur_token.startswith('/'): | |
331 break | 362 break |
332 self.next_token() | 363 self.next_token() |
364 | |
333 return steps | 365 return steps |
334 | 366 |
335 def _location_step(self): | 367 def _location_step(self): |
336 step = [False, None, []] | |
337 if self.cur_token == '@': | 368 if self.cur_token == '@': |
338 axis = 'attribute' | 369 axis = 'attribute' |
339 self.next_token() | 370 self.next_token() |
371 elif self.cur_token == '.': | |
372 axis = 'self' | |
373 elif self.peek_token() == '::': | |
374 axis = self.cur_token | |
375 if axis not in ('attribute', 'child', 'descendant', | |
376 'descendant-or-self', 'namespace', 'self'): | |
377 raise PathSyntaxError('Unsupport axis "%s"' % axis) | |
378 self.next_token() | |
379 self.next_token() | |
340 else: | 380 else: |
341 # FIXME: support full axis specifiers (name followed by ::) | |
342 axis = 'child' | 381 axis = 'child' |
343 step[1] = self._node_test(axis) | 382 node_test = self._node_test(axis) |
383 predicates = [] | |
344 while self.cur_token == '[': | 384 while self.cur_token == '[': |
345 step[2].append(self._predicate()) | 385 predicates.append(self._predicate()) |
346 return step | 386 return axis, node_test, predicates |
347 | 387 |
348 def _node_test(self, axis=None): | 388 def _node_test(self, axis=None): |
349 test = None | 389 test = None |
350 if self.peek_token() in ('(', '()'): # Node type test | 390 if self.peek_token() in ('(', '()'): # Node type test |
351 test = self._node_type() | 391 test = self._node_type() |
354 if axis == 'attribute': | 394 if axis == 'attribute': |
355 if self.cur_token == '*': | 395 if self.cur_token == '*': |
356 test = _node_test_any_attribute() | 396 test = _node_test_any_attribute() |
357 else: | 397 else: |
358 test = _node_test_attribute_by_name(self.cur_token) | 398 test = _node_test_attribute_by_name(self.cur_token) |
399 elif axis == 'self': | |
400 test = _node_test_current_element() | |
359 else: | 401 else: |
360 if self.cur_token == '.': | 402 if self.cur_token == '*': |
361 test = _node_test_current_element() | |
362 elif self.cur_token == '*': | |
363 test = _node_test_any_child_element() | 403 test = _node_test_any_child_element() |
364 else: | 404 else: |
365 test = _node_test_child_element_by_name(self.cur_token) | 405 test = _node_test_child_element_by_name(self.cur_token) |
366 | 406 |
367 if not self.at_end: | 407 if not self.at_end: |
393 raise PathSyntaxError('%s() not allowed here' % name) | 433 raise PathSyntaxError('%s() not allowed here' % name) |
394 | 434 |
395 def _predicate(self): | 435 def _predicate(self): |
396 assert self.cur_token == '[' | 436 assert self.cur_token == '[' |
397 self.next_token() | 437 self.next_token() |
398 return self._or_expr() | 438 expr = self._or_expr() |
399 assert self.cur_token == ']' | 439 assert self.cur_token == ']' |
400 self.next_token() | 440 if not self.at_end: |
441 self.next_token() | |
442 return expr | |
401 | 443 |
402 def _or_expr(self): | 444 def _or_expr(self): |
403 expr = self._and_expr() | 445 expr = self._and_expr() |
404 while self.cur_token == 'or': | 446 while self.cur_token == 'or': |
405 self.next_token() | 447 self.next_token() |