changeset 111:2368c3becc52 trunk

Some fixes and more unit tests for the XPath engine.
author cmlenz
date Mon, 31 Jul 2006 17:25:43 +0000
parents 64ff134868c4
children 5f9af749341c
files examples/transform/template.xml markup/core.py markup/path.py markup/template.py markup/tests/path.py
diffstat 5 files changed, 131 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/examples/transform/template.xml
+++ b/examples/transform/template.xml
@@ -4,7 +4,7 @@
 <html xmlns:py="http://markup.edgewall.org/" py:strip="">
 
   <!--! Add a header DIV on top of every page with a logo image -->
-  <body py:match="body">
+  <body py:match="BODY|body">
     <div id="header">
       <img src="logo.png" alt="Bad Style"/>
     </div>
--- a/markup/core.py
+++ b/markup/core.py
@@ -117,7 +117,7 @@
             assert issubclass(cls, output.Serializer)
         serializer = cls(**kwargs)
 
-        stream = self
+        stream = _ensure(self)
         if filters is None:
             filters = [WhitespaceFilter()]
         for filter_ in filters:
@@ -142,6 +142,15 @@
 PI = Stream.PI
 COMMENT = Stream.COMMENT
 
+def _ensure(stream):
+    """Ensure that every item on the stream is actually a markup event."""
+    for event in stream:
+        try:
+            kind, data, pos = event
+        except ValueError:
+            kind, data, pos = event.totuple()
+        yield kind, data, pos
+
 
 class Attributes(list):
     """Sequence type that stores the attributes of an element.
--- a/markup/path.py
+++ b/markup/path.py
@@ -11,7 +11,25 @@
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://markup.edgewall.org/log/.
 
-"""Basic support for evaluating XPath expressions against streams."""
+"""Basic support for evaluating XPath expressions against streams.
+
+>>> from markup.input import XML
+>>> doc = XML('''<doc>
+...  <items count="2">
+...       <item status="new">
+...         <summary>Foo</summary>
+...       </item>
+...       <item status="closed">
+...         <summary>Bar</summary>
+...       </item>
+...   </items>
+... </doc>''')
+>>> print doc.select('items/item[@status="closed"]/summary/text()')
+Bar
+
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
 
 import re
 
@@ -111,37 +129,44 @@
                     stack.append(cursor)
 
                 matched = None
-                closure, node_test, predicates = steps[cursor]
-
-                matched = node_test(kind, data, pos)
-                if matched and predicates:
-                    for predicate in predicates:
-                        if not predicate(kind, data, pos):
-                            matched = None
-                            break
+                while 1:
+                    axis, node_test, predicates = steps[cursor]
 
-                if matched:
-                    if cursor + 1 == size: # the last location step
-                        if ignore_context or len(stack) > 2 \
-                                          or node_test.axis != 'child':
-                            return matched
-                    else:
-                        stack[-1] += 1
+                    matched = node_test(kind, data, pos)
+                    if matched and predicates:
+                        for predicate in predicates:
+                            if not predicate(kind, data, pos):
+                                matched = None
+                                break
 
-                elif kind is START and not closure:
+                    if matched:
+                        if cursor + 1 == size: # the last location step
+                            if ignore_context or \
+                                    kind is not START or \
+                                    axis in ('attribute', 'self') or \
+                                    len(stack) > 2:
+                                return matched
+                        else:
+                            cursor += 1
+                            stack[-1] = cursor
+
+                    if axis != 'self':
+                        break
+
+                if not matched and kind is START \
+                               and not axis.startswith('descendant'):
                     # If this step is not a closure, it cannot be matched until
                     # the current element is closed... so we need to move the
                     # cursor back to the last closure and retest that against
                     # the current element
-                    closures = [step for step in steps[:cursor] if step[0]]
-                    closures.reverse()
-                    for closure, node_test, predicates in closures:
-                        cursor -= 1
-                        if closure:
-                            matched = node_test(kind, data, pos)
-                            if matched:
-                                cursor += 1
-                            break
+                    backsteps = [step for step in steps[:cursor]
+                                 if step[0].startswith('descendant')]
+                    backsteps.reverse()
+                    for axis, node_test, predicates in backsteps:
+                        matched = node_test(kind, data, pos)
+                        if not matched:
+                            cursor -= 1
+                        break
                     stack[-1] = cursor
 
             return None
@@ -189,7 +214,9 @@
 
 def _function_node():
     def _function_node(kind, data, pos):
-        return True
+        if kind is START:
+            return True
+        return kind, data, pos
     _function_node.axis = None
     return _function_node
 
@@ -304,7 +331,7 @@
         use the union operator, the function always returns a list of size 1.
         
         Each path test in turn is a sequence of tests that correspond to the
-        location steps, each tuples of the form `(closure, testfunc, predicates)`
+        location steps, each tuples of the form `(axis, testfunc, predicates)`
         """
         paths = [self._location_path()]
         while self.cur_token == '|':
@@ -317,33 +344,46 @@
 
     def _location_path(self):
         next_is_closure = True
-        if self.cur_token.startswith('/'):
-            self.next_token()
-
         steps = []
         while True:
-            step = self._location_step()
-            steps.append((next_is_closure, step[1], step[2]))
-            next_is_closure = False
             if self.cur_token == '//':
                 next_is_closure = True
-            elif self.at_end or self.cur_token != '/':
+                self.next_token()
+            elif self.cur_token == '/' and not steps:
+                raise PathSyntaxError('Absolute location paths not supported')
+
+            axis, node_test, predicates = self._location_step()
+            if axis == 'child' and next_is_closure:
+                axis = 'descendant-or-self'
+            steps.append((axis, node_test, predicates))
+            next_is_closure = False
+
+            if self.at_end or not self.cur_token.startswith('/'):
                 break
             self.next_token()
+
         return steps
 
     def _location_step(self):
-        step = [False, None, []]
         if self.cur_token == '@':
             axis = 'attribute'
             self.next_token()
+        elif self.cur_token == '.':
+            axis = 'self'
+        elif self.peek_token() == '::':
+            axis = self.cur_token
+            if axis not in ('attribute', 'child', 'descendant',
+                            'descendant-or-self', 'namespace', 'self'):
+                raise PathSyntaxError('Unsupport axis "%s"' % axis)
+            self.next_token()
+            self.next_token()
         else:
-            # FIXME: support full axis specifiers (name followed by ::)
             axis = 'child'
-        step[1] = self._node_test(axis)
+        node_test = self._node_test(axis)
+        predicates = []
         while self.cur_token == '[':
-            step[2].append(self._predicate())
-        return step
+            predicates.append(self._predicate())
+        return axis, node_test, predicates
 
     def _node_test(self, axis=None):
         test = None
@@ -356,10 +396,10 @@
                     test = _node_test_any_attribute()
                 else:
                     test = _node_test_attribute_by_name(self.cur_token)
+            elif axis == 'self':
+                test = _node_test_current_element()
             else:
-                if self.cur_token == '.':
-                    test = _node_test_current_element()
-                elif self.cur_token == '*':
+                if self.cur_token == '*':
                     test = _node_test_any_child_element()
                 else:
                     test = _node_test_child_element_by_name(self.cur_token)
@@ -395,9 +435,11 @@
     def _predicate(self):
         assert self.cur_token == '['
         self.next_token()
-        return self._or_expr()
+        expr = self._or_expr()
         assert self.cur_token == ']'
-        self.next_token()
+        if not self.at_end:
+            self.next_token()
+        return expr
 
     def _or_expr(self):
         expr = self._and_expr()
--- a/markup/template.py
+++ b/markup/template.py
@@ -26,7 +26,7 @@
 from StringIO import StringIO
 
 from markup.core import Attributes, Namespace, Stream, StreamEventKind
-from markup.core import START, END, START_NS, END_NS, TEXT, COMMENT
+from markup.core import _ensure, START, END, START_NS, END_NS, TEXT, COMMENT
 from markup.eval import Expression
 from markup.input import XMLParser
 from markup.path import Path
@@ -848,20 +848,11 @@
             stream = filter_(iter(stream), ctxt)
         return Stream(stream)
 
-    def _ensure(self, stream, ctxt=None):
-        """Ensure that every item on the stream is actually a markup event."""
-        for event in stream:
-            try:
-                kind, data, pos = event
-            except ValueError:
-                kind, data, pos = event.totuple()
-            yield kind, data, pos
-
     def _eval(self, stream, ctxt=None):
         """Internal stream filter that evaluates any expressions in `START` and
         `TEXT` events.
         """
-        filters = (self._ensure, self._eval, self._match)
+        filters = (self._eval, self._match)
 
         for kind, data, pos in stream:
 
@@ -900,7 +891,7 @@
                     # Test if the expression evaluated to an iterable, in which
                     # case we yield the individual items
                     try:
-                        substream = iter(result)
+                        substream = _ensure(result)
                         for filter_ in filters:
                             substream = filter_(substream, ctxt)
                         for event in substream:
--- a/markup/tests/path.py
+++ b/markup/tests/path.py
@@ -15,53 +15,68 @@
 import unittest
 
 from markup.input import XML
-from markup.path import Path
+from markup.path import Path, PathSyntaxError
 
 
 class PathTestCase(unittest.TestCase):
 
+    def test_error_no_absolute_path(self):
+        self.assertRaises(PathSyntaxError, Path, '/root')
+
+    def test_error_unsupported_axis(self):
+        self.assertRaises(PathSyntaxError, Path, 'parent::ma')
+
     def test_1step(self):
         xml = XML('<root><elem/></root>')
         self.assertEqual('<elem/>', Path('elem').select(xml).render())
+        self.assertEqual('<elem/>', Path('child::elem').select(xml).render())
         self.assertEqual('<elem/>', Path('//elem').select(xml).render())
+        self.assertEqual('<elem/>', Path('descendant::elem').select(xml).render())
 
     def test_1step_self(self):
         xml = XML('<root><elem/></root>')
         self.assertEqual('<root><elem/></root>', Path('.').select(xml).render())
+        #self.assertEqual('<root><elem/></root>', Path('self::node()').select(xml).render())
 
     def test_1step_wildcard(self):
         xml = XML('<root><elem/></root>')
         self.assertEqual('<elem/>', Path('*').select(xml).render())
+        self.assertEqual('<elem/>', Path('child::node()').select(xml).render())
         self.assertEqual('<elem/>', Path('//*').select(xml).render())
 
     def test_1step_attribute(self):
-        path = Path('@foo')
-        self.assertEqual('', path.select(XML('<root/>')).render())
-        self.assertEqual('bar', path.select(XML('<root foo="bar"/>')).render())
+        self.assertEqual('', Path('@foo').select(XML('<root/>')).render())
+        xml = XML('<root foo="bar"/>')
+        self.assertEqual('bar', Path('@foo').select(xml).render())
+        self.assertEqual('bar', Path('./@foo').select(xml).render())
 
-    def test_1step_attribute(self):
-        path = Path('@foo')
-        self.assertEqual('', path.select(XML('<root/>')).render())
-        self.assertEqual('bar', path.select(XML('<root foo="bar"/>')).render())
+    def test_1step_text(self):
+        xml = XML('<root>Hey</root>')
+        self.assertEqual('Hey', Path('text()').select(xml).render())
+        self.assertEqual('Hey', Path('./text()').select(xml).render())
+        self.assertEqual('Hey', Path('//text()').select(xml).render())
+        self.assertEqual('Hey', Path('.//text()').select(xml).render())
 
     def test_2step(self):
         xml = XML('<root><foo/><bar/></root>')
-        self.assertEqual('<foo/><bar/>', Path('root/*').select(xml).render())
-        self.assertEqual('<bar/>', Path('root/bar').select(xml).render())
-        self.assertEqual('', Path('root/baz').select(xml).render())
+        self.assertEqual('<foo/><bar/>', Path('*').select(xml).render())
+        self.assertEqual('<bar/>', Path('bar').select(xml).render())
+        self.assertEqual('', Path('baz').select(xml).render())
 
     def test_2step_complex(self):
         xml = XML('<root><foo><bar/></foo></root>')
         self.assertEqual('<bar/>', Path('foo/bar').select(xml).render())
         self.assertEqual('<bar/>', Path('foo/*').select(xml).render())
-        self.assertEqual('', Path('root/bar').select(xml).render())
 
         xml = XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')
-        self.assertEqual('<bar id="2"/>', Path('root/bar').select(xml).render())
+        self.assertEqual('<bar id="1"/><bar id="2"/>',
+                         Path('bar').select(xml).render())
 
     def test_2step_text(self):
         xml = XML('<root><item>Foo</item></root>')
         self.assertEqual('Foo', Path('item/text()').select(xml).render())
+        self.assertEqual('Foo', Path('*/text()').select(xml).render())
+        self.assertEqual('Foo', Path('//text()').select(xml).render())
         xml = XML('<root><item>Foo</item><item>Bar</item></root>')
         self.assertEqual('FooBar', Path('item/text()').select(xml).render())
 
@@ -88,7 +103,7 @@
 
     def test_node_type_node(self):
         xml = XML('<root>Some text <br/>in here.</root>')
-        self.assertEqual('<root>Some text <br/>in here.</root>',
+        self.assertEqual('Some text <br/>in here.',
                          Path('node()').select(xml).render())
 
     def test_node_type_processing_instruction(self):
@@ -134,7 +149,7 @@
 
 def suite():
     suite = unittest.TestSuite()
-    #suite.addTest(doctest.DocTestSuite(Path.__module__))
+    suite.addTest(doctest.DocTestSuite(Path.__module__))
     suite.addTest(unittest.makeSuite(PathTestCase, 'test'))
     return suite
 
Copyright (C) 2012-2017 Edgewall Software