genshi/genshi-test: markup/path.py comparison

comparison markup/path.py @ 111:8a4d9064f363

Some fixes and more unit tests for the XPath engine.

author	cmlenz
date	Mon, 31 Jul 2006 17:25:43 +0000
parents	61fa4cadb766
children	8f53c3ad385c

comparison

equal deleted inserted replaced

-:44fbc30d78cd
+:8a4d9064f363
 #
 # This software consists of voluntary contributions made by many
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://markup.edgewall.org/log/.
-"""Basic support for evaluating XPath expressions against streams."""
+"""Basic support for evaluating XPath expressions against streams.
+>>> from markup.input import XML
+>>> doc = XML('''<doc>
+...  <items count="2">
+...       <item status="new">
+...         <summary>Foo</summary>
+...       </item>
+...       <item status="closed">
+...         <summary>Bar</summary>
+...       </item>
+...   </items>
+... </doc>''')
+>>> print doc.select('items/item[@status="closed"]/summary/text()')
+Bar
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
 import re
 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI
 elif kind is START:
 stack.append(cursor)
 matched = None
-closure, node_test, predicates = steps[cursor]
+while 1:
+axis, node_test, predicates = steps[cursor]
-matched = node_test(kind, data, pos)
-if matched and predicates:
+matched = node_test(kind, data, pos)
-for predicate in predicates:
+if matched and predicates:
-if not predicate(kind, data, pos):
+for predicate in predicates:
-matched = None
+if not predicate(kind, data, pos):
-break
+matched = None
+break
-if matched:
-if cursor + 1 == size: # the last location step
+if matched:
-if ignore_context or len(stack) > 2 \
+if cursor + 1 == size: # the last location step
-or node_test.axis != 'child':
+if ignore_context or \
-return matched
+kind is not START or \
-else:
+axis in ('attribute', 'self') or \
-stack[-1] += 1
+len(stack) > 2:
+return matched
-elif kind is START and not closure:
+else:
+cursor += 1
+stack[-1] = cursor
+if axis != 'self':
+break
+if not matched and kind is START \
+and not axis.startswith('descendant'):
 # If this step is not a closure, it cannot be matched until
 # the current element is closed... so we need to move the
 # cursor back to the last closure and retest that against
 # the current element
-closures = [step for step in steps[:cursor] if step[0]]
+backsteps = [step for step in steps[:cursor]
-closures.reverse()
+if step[0].startswith('descendant')]
-for closure, node_test, predicates in closures:
+backsteps.reverse()
-cursor -= 1
+for axis, node_test, predicates in backsteps:
-if closure:
+matched = node_test(kind, data, pos)
-matched = node_test(kind, data, pos)
+if not matched:
-if matched:
+cursor -= 1
-cursor += 1
+break
-break
 stack[-1] = cursor
 return None
 return _test
 _function_comment.axis = None
 return _function_comment
 def _function_node():
 def _function_node(kind, data, pos):
-return True
+if kind is START:
+return True
+return kind, data, pos
 _function_node.axis = None
 return _function_node
 def _function_processing_instruction(name=None):
 def _function_processing_instruction(kind, data, pos):
 For union expressions (such as `*|text()`), this function returns one
 test for each operand in the union. For patch expressions that don't
 use the union operator, the function always returns a list of size 1.
 Each path test in turn is a sequence of tests that correspond to the
-location steps, each tuples of the form `(closure, testfunc, predicates)`
+location steps, each tuples of the form `(axis, testfunc, predicates)`
 """
 paths = [self._location_path()]
 while self.cur_token == '|':
 self.next_token()
 paths.append(self._location_path())
 % self.cur_token)
 return paths
 def _location_path(self):
 next_is_closure = True
-if self.cur_token.startswith('/'):
-self.next_token()
 steps = []
 while True:
-step = self._location_step()
-steps.append((next_is_closure, step[1], step[2]))
-next_is_closure = False
 if self.cur_token == '//':
 next_is_closure = True
-elif self.at_end or self.cur_token != '/':
+self.next_token()
+elif self.cur_token == '/' and not steps:
+raise PathSyntaxError('Absolute location paths not supported')
+axis, node_test, predicates = self._location_step()
+if axis == 'child' and next_is_closure:
+axis = 'descendant-or-self'
+steps.append((axis, node_test, predicates))
+next_is_closure = False
+if self.at_end or not self.cur_token.startswith('/'):
 break
 self.next_token()
 return steps
 def _location_step(self):
-step = [False, None, []]
 if self.cur_token == '@':
 axis = 'attribute'
 self.next_token()
+elif self.cur_token == '.':
+axis = 'self'
+elif self.peek_token() == '::':
+axis = self.cur_token
+if axis not in ('attribute', 'child', 'descendant',
+'descendant-or-self', 'namespace', 'self'):
+raise PathSyntaxError('Unsupport axis "%s"' % axis)
+self.next_token()
+self.next_token()
 else:
-# FIXME: support full axis specifiers (name followed by ::)
 axis = 'child'
-step[1] = self._node_test(axis)
+node_test = self._node_test(axis)
+predicates = []
 while self.cur_token == '[':
-step[2].append(self._predicate())
+predicates.append(self._predicate())
-return step
+return axis, node_test, predicates
 def _node_test(self, axis=None):
 test = None
 if self.peek_token() in ('(', '()'): # Node type test
 test = self._node_type()
 if axis == 'attribute':
 if self.cur_token == '*':
 test = _node_test_any_attribute()
 else:
 test = _node_test_attribute_by_name(self.cur_token)
+elif axis == 'self':
+test = _node_test_current_element()
 else:
-if self.cur_token == '.':
+if self.cur_token == '*':
-test = _node_test_current_element()
-elif self.cur_token == '*':
 test = _node_test_any_child_element()
 else:
 test = _node_test_child_element_by_name(self.cur_token)
 if not self.at_end:
 raise PathSyntaxError('%s() not allowed here' % name)
 def _predicate(self):
 assert self.cur_token == '['
 self.next_token()
-return self._or_expr()
+expr = self._or_expr()
 assert self.cur_token == ']'
-self.next_token()
+if not self.at_end:
+self.next_token()
+return expr
 def _or_expr(self):
 expr = self._and_expr()
 while self.cur_token == 'or':
 self.next_token()

Mercurial > genshi > genshi-test

comparison markup/path.py @ 111:8a4d9064f363