# HG changeset patch
# User cmlenz
# Date 1154366743 0
# Node ID 2368c3becc5210105678e846888019a424dfdf40
# Parent 64ff134868c4dbd95fedb307ca56a56c9c128288
Some fixes and more unit tests for the XPath engine.
diff --git a/examples/transform/template.xml b/examples/transform/template.xml
--- a/examples/transform/template.xml
+++ b/examples/transform/template.xml
@@ -4,7 +4,7 @@
-
+
diff --git a/markup/core.py b/markup/core.py
--- a/markup/core.py
+++ b/markup/core.py
@@ -117,7 +117,7 @@
assert issubclass(cls, output.Serializer)
serializer = cls(**kwargs)
- stream = self
+ stream = _ensure(self)
if filters is None:
filters = [WhitespaceFilter()]
for filter_ in filters:
@@ -142,6 +142,15 @@
PI = Stream.PI
COMMENT = Stream.COMMENT
+def _ensure(stream):
+ """Ensure that every item on the stream is actually a markup event."""
+ for event in stream:
+ try:
+ kind, data, pos = event
+ except ValueError:
+ kind, data, pos = event.totuple()
+ yield kind, data, pos
+
class Attributes(list):
"""Sequence type that stores the attributes of an element.
diff --git a/markup/path.py b/markup/path.py
--- a/markup/path.py
+++ b/markup/path.py
@@ -11,7 +11,25 @@
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://markup.edgewall.org/log/.
-"""Basic support for evaluating XPath expressions against streams."""
+"""Basic support for evaluating XPath expressions against streams.
+
+>>> from markup.input import XML
+>>> doc = XML('''
+...
+... -
+... Foo
+...
+... -
+... Bar
+...
+...
+... ''')
+>>> print doc.select('items/item[@status="closed"]/summary/text()')
+Bar
+
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
import re
@@ -111,37 +129,44 @@
stack.append(cursor)
matched = None
- closure, node_test, predicates = steps[cursor]
-
- matched = node_test(kind, data, pos)
- if matched and predicates:
- for predicate in predicates:
- if not predicate(kind, data, pos):
- matched = None
- break
+ while 1:
+ axis, node_test, predicates = steps[cursor]
- if matched:
- if cursor + 1 == size: # the last location step
- if ignore_context or len(stack) > 2 \
- or node_test.axis != 'child':
- return matched
- else:
- stack[-1] += 1
+ matched = node_test(kind, data, pos)
+ if matched and predicates:
+ for predicate in predicates:
+ if not predicate(kind, data, pos):
+ matched = None
+ break
- elif kind is START and not closure:
+ if matched:
+ if cursor + 1 == size: # the last location step
+ if ignore_context or \
+ kind is not START or \
+ axis in ('attribute', 'self') or \
+ len(stack) > 2:
+ return matched
+ else:
+ cursor += 1
+ stack[-1] = cursor
+
+ if axis != 'self':
+ break
+
+ if not matched and kind is START \
+ and not axis.startswith('descendant'):
# If this step is not a closure, it cannot be matched until
# the current element is closed... so we need to move the
# cursor back to the last closure and retest that against
# the current element
- closures = [step for step in steps[:cursor] if step[0]]
- closures.reverse()
- for closure, node_test, predicates in closures:
- cursor -= 1
- if closure:
- matched = node_test(kind, data, pos)
- if matched:
- cursor += 1
- break
+ backsteps = [step for step in steps[:cursor]
+ if step[0].startswith('descendant')]
+ backsteps.reverse()
+ for axis, node_test, predicates in backsteps:
+ matched = node_test(kind, data, pos)
+ if not matched:
+ cursor -= 1
+ break
stack[-1] = cursor
return None
@@ -189,7 +214,9 @@
def _function_node():
def _function_node(kind, data, pos):
- return True
+ if kind is START:
+ return True
+ return kind, data, pos
_function_node.axis = None
return _function_node
@@ -304,7 +331,7 @@
use the union operator, the function always returns a list of size 1.
Each path test in turn is a sequence of tests that correspond to the
- location steps, each tuples of the form `(closure, testfunc, predicates)`
+ location steps, each tuples of the form `(axis, testfunc, predicates)`
"""
paths = [self._location_path()]
while self.cur_token == '|':
@@ -317,33 +344,46 @@
def _location_path(self):
next_is_closure = True
- if self.cur_token.startswith('/'):
- self.next_token()
-
steps = []
while True:
- step = self._location_step()
- steps.append((next_is_closure, step[1], step[2]))
- next_is_closure = False
if self.cur_token == '//':
next_is_closure = True
- elif self.at_end or self.cur_token != '/':
+ self.next_token()
+ elif self.cur_token == '/' and not steps:
+ raise PathSyntaxError('Absolute location paths not supported')
+
+ axis, node_test, predicates = self._location_step()
+ if axis == 'child' and next_is_closure:
+ axis = 'descendant-or-self'
+ steps.append((axis, node_test, predicates))
+ next_is_closure = False
+
+ if self.at_end or not self.cur_token.startswith('/'):
break
self.next_token()
+
return steps
def _location_step(self):
- step = [False, None, []]
if self.cur_token == '@':
axis = 'attribute'
self.next_token()
+ elif self.cur_token == '.':
+ axis = 'self'
+ elif self.peek_token() == '::':
+ axis = self.cur_token
+ if axis not in ('attribute', 'child', 'descendant',
+ 'descendant-or-self', 'namespace', 'self'):
+ raise PathSyntaxError('Unsupport axis "%s"' % axis)
+ self.next_token()
+ self.next_token()
else:
- # FIXME: support full axis specifiers (name followed by ::)
axis = 'child'
- step[1] = self._node_test(axis)
+ node_test = self._node_test(axis)
+ predicates = []
while self.cur_token == '[':
- step[2].append(self._predicate())
- return step
+ predicates.append(self._predicate())
+ return axis, node_test, predicates
def _node_test(self, axis=None):
test = None
@@ -356,10 +396,10 @@
test = _node_test_any_attribute()
else:
test = _node_test_attribute_by_name(self.cur_token)
+ elif axis == 'self':
+ test = _node_test_current_element()
else:
- if self.cur_token == '.':
- test = _node_test_current_element()
- elif self.cur_token == '*':
+ if self.cur_token == '*':
test = _node_test_any_child_element()
else:
test = _node_test_child_element_by_name(self.cur_token)
@@ -395,9 +435,11 @@
def _predicate(self):
assert self.cur_token == '['
self.next_token()
- return self._or_expr()
+ expr = self._or_expr()
assert self.cur_token == ']'
- self.next_token()
+ if not self.at_end:
+ self.next_token()
+ return expr
def _or_expr(self):
expr = self._and_expr()
diff --git a/markup/template.py b/markup/template.py
--- a/markup/template.py
+++ b/markup/template.py
@@ -26,7 +26,7 @@
from StringIO import StringIO
from markup.core import Attributes, Namespace, Stream, StreamEventKind
-from markup.core import START, END, START_NS, END_NS, TEXT, COMMENT
+from markup.core import _ensure, START, END, START_NS, END_NS, TEXT, COMMENT
from markup.eval import Expression
from markup.input import XMLParser
from markup.path import Path
@@ -848,20 +848,11 @@
stream = filter_(iter(stream), ctxt)
return Stream(stream)
- def _ensure(self, stream, ctxt=None):
- """Ensure that every item on the stream is actually a markup event."""
- for event in stream:
- try:
- kind, data, pos = event
- except ValueError:
- kind, data, pos = event.totuple()
- yield kind, data, pos
-
def _eval(self, stream, ctxt=None):
"""Internal stream filter that evaluates any expressions in `START` and
`TEXT` events.
"""
- filters = (self._ensure, self._eval, self._match)
+ filters = (self._eval, self._match)
for kind, data, pos in stream:
@@ -900,7 +891,7 @@
# Test if the expression evaluated to an iterable, in which
# case we yield the individual items
try:
- substream = iter(result)
+ substream = _ensure(result)
for filter_ in filters:
substream = filter_(substream, ctxt)
for event in substream:
diff --git a/markup/tests/path.py b/markup/tests/path.py
--- a/markup/tests/path.py
+++ b/markup/tests/path.py
@@ -15,53 +15,68 @@
import unittest
from markup.input import XML
-from markup.path import Path
+from markup.path import Path, PathSyntaxError
class PathTestCase(unittest.TestCase):
+ def test_error_no_absolute_path(self):
+ self.assertRaises(PathSyntaxError, Path, '/root')
+
+ def test_error_unsupported_axis(self):
+ self.assertRaises(PathSyntaxError, Path, 'parent::ma')
+
def test_1step(self):
xml = XML('')
self.assertEqual('', Path('elem').select(xml).render())
+ self.assertEqual('', Path('child::elem').select(xml).render())
self.assertEqual('', Path('//elem').select(xml).render())
+ self.assertEqual('', Path('descendant::elem').select(xml).render())
def test_1step_self(self):
xml = XML('')
self.assertEqual('', Path('.').select(xml).render())
+ #self.assertEqual('', Path('self::node()').select(xml).render())
def test_1step_wildcard(self):
xml = XML('')
self.assertEqual('', Path('*').select(xml).render())
+ self.assertEqual('', Path('child::node()').select(xml).render())
self.assertEqual('', Path('//*').select(xml).render())
def test_1step_attribute(self):
- path = Path('@foo')
- self.assertEqual('', path.select(XML('')).render())
- self.assertEqual('bar', path.select(XML('')).render())
+ self.assertEqual('', Path('@foo').select(XML('')).render())
+ xml = XML('')
+ self.assertEqual('bar', Path('@foo').select(xml).render())
+ self.assertEqual('bar', Path('./@foo').select(xml).render())
- def test_1step_attribute(self):
- path = Path('@foo')
- self.assertEqual('', path.select(XML('')).render())
- self.assertEqual('bar', path.select(XML('')).render())
+ def test_1step_text(self):
+ xml = XML('Hey')
+ self.assertEqual('Hey', Path('text()').select(xml).render())
+ self.assertEqual('Hey', Path('./text()').select(xml).render())
+ self.assertEqual('Hey', Path('//text()').select(xml).render())
+ self.assertEqual('Hey', Path('.//text()').select(xml).render())
def test_2step(self):
xml = XML('')
- self.assertEqual('', Path('root/*').select(xml).render())
- self.assertEqual('', Path('root/bar').select(xml).render())
- self.assertEqual('', Path('root/baz').select(xml).render())
+ self.assertEqual('', Path('*').select(xml).render())
+ self.assertEqual('', Path('bar').select(xml).render())
+ self.assertEqual('', Path('baz').select(xml).render())
def test_2step_complex(self):
xml = XML('')
self.assertEqual('', Path('foo/bar').select(xml).render())
self.assertEqual('', Path('foo/*').select(xml).render())
- self.assertEqual('', Path('root/bar').select(xml).render())
xml = XML('')
- self.assertEqual('', Path('root/bar').select(xml).render())
+ self.assertEqual('',
+ Path('bar').select(xml).render())
def test_2step_text(self):
xml = XML('- Foo
')
self.assertEqual('Foo', Path('item/text()').select(xml).render())
+ self.assertEqual('Foo', Path('*/text()').select(xml).render())
+ self.assertEqual('Foo', Path('//text()').select(xml).render())
xml = XML('- Foo
- Bar
')
self.assertEqual('FooBar', Path('item/text()').select(xml).render())
@@ -88,7 +103,7 @@
def test_node_type_node(self):
xml = XML('Some text
in here.')
- self.assertEqual('Some text
in here.',
+ self.assertEqual('Some text
in here.',
Path('node()').select(xml).render())
def test_node_type_processing_instruction(self):
@@ -134,7 +149,7 @@
def suite():
suite = unittest.TestSuite()
- #suite.addTest(doctest.DocTestSuite(Path.__module__))
+ suite.addTest(doctest.DocTestSuite(Path.__module__))
suite.addTest(unittest.makeSuite(PathTestCase, 'test'))
return suite