diff markup/path.py @ 111:8a4d9064f363

Some fixes and more unit tests for the XPath engine.
author cmlenz
date Mon, 31 Jul 2006 17:25:43 +0000
parents 61fa4cadb766
children 8f53c3ad385c
line wrap: on
line diff
--- a/markup/path.py
+++ b/markup/path.py
@@ -11,7 +11,25 @@
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://markup.edgewall.org/log/.
 
-"""Basic support for evaluating XPath expressions against streams."""
+"""Basic support for evaluating XPath expressions against streams.
+
+>>> from markup.input import XML
+>>> doc = XML('''<doc>
+...  <items count="2">
+...       <item status="new">
+...         <summary>Foo</summary>
+...       </item>
+...       <item status="closed">
+...         <summary>Bar</summary>
+...       </item>
+...   </items>
+... </doc>''')
+>>> print doc.select('items/item[@status="closed"]/summary/text()')
+Bar
+
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
 
 import re
 
@@ -111,37 +129,44 @@
                     stack.append(cursor)
 
                 matched = None
-                closure, node_test, predicates = steps[cursor]
-
-                matched = node_test(kind, data, pos)
-                if matched and predicates:
-                    for predicate in predicates:
-                        if not predicate(kind, data, pos):
-                            matched = None
-                            break
+                while 1:
+                    axis, node_test, predicates = steps[cursor]
 
-                if matched:
-                    if cursor + 1 == size: # the last location step
-                        if ignore_context or len(stack) > 2 \
-                                          or node_test.axis != 'child':
-                            return matched
-                    else:
-                        stack[-1] += 1
+                    matched = node_test(kind, data, pos)
+                    if matched and predicates:
+                        for predicate in predicates:
+                            if not predicate(kind, data, pos):
+                                matched = None
+                                break
 
-                elif kind is START and not closure:
+                    if matched:
+                        if cursor + 1 == size: # the last location step
+                            if ignore_context or \
+                                    kind is not START or \
+                                    axis in ('attribute', 'self') or \
+                                    len(stack) > 2:
+                                return matched
+                        else:
+                            cursor += 1
+                            stack[-1] = cursor
+
+                    if axis != 'self':
+                        break
+
+                if not matched and kind is START \
+                               and not axis.startswith('descendant'):
                     # If this step is not a closure, it cannot be matched until
                     # the current element is closed... so we need to move the
                     # cursor back to the last closure and retest that against
                     # the current element
-                    closures = [step for step in steps[:cursor] if step[0]]
-                    closures.reverse()
-                    for closure, node_test, predicates in closures:
-                        cursor -= 1
-                        if closure:
-                            matched = node_test(kind, data, pos)
-                            if matched:
-                                cursor += 1
-                            break
+                    backsteps = [step for step in steps[:cursor]
+                                 if step[0].startswith('descendant')]
+                    backsteps.reverse()
+                    for axis, node_test, predicates in backsteps:
+                        matched = node_test(kind, data, pos)
+                        if not matched:
+                            cursor -= 1
+                        break
                     stack[-1] = cursor
 
             return None
@@ -189,7 +214,9 @@
 
 def _function_node():
     def _function_node(kind, data, pos):
-        return True
+        if kind is START:
+            return True
+        return kind, data, pos
     _function_node.axis = None
     return _function_node
 
@@ -304,7 +331,7 @@
         use the union operator, the function always returns a list of size 1.
         
         Each path test in turn is a sequence of tests that correspond to the
-        location steps, each tuples of the form `(closure, testfunc, predicates)`
+        location steps, each tuples of the form `(axis, testfunc, predicates)`
         """
         paths = [self._location_path()]
         while self.cur_token == '|':
@@ -317,33 +344,46 @@
 
     def _location_path(self):
         next_is_closure = True
-        if self.cur_token.startswith('/'):
-            self.next_token()
-
         steps = []
         while True:
-            step = self._location_step()
-            steps.append((next_is_closure, step[1], step[2]))
-            next_is_closure = False
             if self.cur_token == '//':
                 next_is_closure = True
-            elif self.at_end or self.cur_token != '/':
+                self.next_token()
+            elif self.cur_token == '/' and not steps:
+                raise PathSyntaxError('Absolute location paths not supported')
+
+            axis, node_test, predicates = self._location_step()
+            if axis == 'child' and next_is_closure:
+                axis = 'descendant-or-self'
+            steps.append((axis, node_test, predicates))
+            next_is_closure = False
+
+            if self.at_end or not self.cur_token.startswith('/'):
                 break
             self.next_token()
+
         return steps
 
     def _location_step(self):
-        step = [False, None, []]
         if self.cur_token == '@':
             axis = 'attribute'
             self.next_token()
+        elif self.cur_token == '.':
+            axis = 'self'
+        elif self.peek_token() == '::':
+            axis = self.cur_token
+            if axis not in ('attribute', 'child', 'descendant',
+                            'descendant-or-self', 'namespace', 'self'):
+                raise PathSyntaxError('Unsupport axis "%s"' % axis)
+            self.next_token()
+            self.next_token()
         else:
-            # FIXME: support full axis specifiers (name followed by ::)
             axis = 'child'
-        step[1] = self._node_test(axis)
+        node_test = self._node_test(axis)
+        predicates = []
         while self.cur_token == '[':
-            step[2].append(self._predicate())
-        return step
+            predicates.append(self._predicate())
+        return axis, node_test, predicates
 
     def _node_test(self, axis=None):
         test = None
@@ -356,10 +396,10 @@
                     test = _node_test_any_attribute()
                 else:
                     test = _node_test_attribute_by_name(self.cur_token)
+            elif axis == 'self':
+                test = _node_test_current_element()
             else:
-                if self.cur_token == '.':
-                    test = _node_test_current_element()
-                elif self.cur_token == '*':
+                if self.cur_token == '*':
                     test = _node_test_any_child_element()
                 else:
                     test = _node_test_child_element_by_name(self.cur_token)
@@ -395,9 +435,11 @@
     def _predicate(self):
         assert self.cur_token == '['
         self.next_token()
-        return self._or_expr()
+        expr = self._or_expr()
         assert self.cur_token == ']'
-        self.next_token()
+        if not self.at_end:
+            self.next_token()
+        return expr
 
     def _or_expr(self):
         expr = self._and_expr()
Copyright (C) 2012-2017 Edgewall Software