# HG changeset patch # User cmlenz # Date 1154366743 0 # Node ID 8a4d9064f36375b4012636ca3387ca04cd03358b # Parent 44fbc30d78cd310401619728c3711d06a8915db1 Some fixes and more unit tests for the XPath engine. diff --git a/examples/transform/template.xml b/examples/transform/template.xml --- a/examples/transform/template.xml +++ b/examples/transform/template.xml @@ -4,7 +4,7 @@ - + diff --git a/markup/core.py b/markup/core.py --- a/markup/core.py +++ b/markup/core.py @@ -117,7 +117,7 @@ assert issubclass(cls, output.Serializer) serializer = cls(**kwargs) - stream = self + stream = _ensure(self) if filters is None: filters = [WhitespaceFilter()] for filter_ in filters: @@ -142,6 +142,15 @@ PI = Stream.PI COMMENT = Stream.COMMENT +def _ensure(stream): + """Ensure that every item on the stream is actually a markup event.""" + for event in stream: + try: + kind, data, pos = event + except ValueError: + kind, data, pos = event.totuple() + yield kind, data, pos + class Attributes(list): """Sequence type that stores the attributes of an element. diff --git a/markup/path.py b/markup/path.py --- a/markup/path.py +++ b/markup/path.py @@ -11,7 +11,25 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://markup.edgewall.org/log/. -"""Basic support for evaluating XPath expressions against streams.""" +"""Basic support for evaluating XPath expressions against streams. + +>>> from markup.input import XML +>>> doc = XML(''' +... +... +... Foo +... +... +... Bar +... +... +... ''') +>>> print doc.select('items/item[@status="closed"]/summary/text()') +Bar + +Because the XPath engine operates on markup streams (as opposed to tree +structures), it only implements a subset of the full XPath 1.0 language. +""" import re @@ -111,37 +129,44 @@ stack.append(cursor) matched = None - closure, node_test, predicates = steps[cursor] - - matched = node_test(kind, data, pos) - if matched and predicates: - for predicate in predicates: - if not predicate(kind, data, pos): - matched = None - break + while 1: + axis, node_test, predicates = steps[cursor] - if matched: - if cursor + 1 == size: # the last location step - if ignore_context or len(stack) > 2 \ - or node_test.axis != 'child': - return matched - else: - stack[-1] += 1 + matched = node_test(kind, data, pos) + if matched and predicates: + for predicate in predicates: + if not predicate(kind, data, pos): + matched = None + break - elif kind is START and not closure: + if matched: + if cursor + 1 == size: # the last location step + if ignore_context or \ + kind is not START or \ + axis in ('attribute', 'self') or \ + len(stack) > 2: + return matched + else: + cursor += 1 + stack[-1] = cursor + + if axis != 'self': + break + + if not matched and kind is START \ + and not axis.startswith('descendant'): # If this step is not a closure, it cannot be matched until # the current element is closed... so we need to move the # cursor back to the last closure and retest that against # the current element - closures = [step for step in steps[:cursor] if step[0]] - closures.reverse() - for closure, node_test, predicates in closures: - cursor -= 1 - if closure: - matched = node_test(kind, data, pos) - if matched: - cursor += 1 - break + backsteps = [step for step in steps[:cursor] + if step[0].startswith('descendant')] + backsteps.reverse() + for axis, node_test, predicates in backsteps: + matched = node_test(kind, data, pos) + if not matched: + cursor -= 1 + break stack[-1] = cursor return None @@ -189,7 +214,9 @@ def _function_node(): def _function_node(kind, data, pos): - return True + if kind is START: + return True + return kind, data, pos _function_node.axis = None return _function_node @@ -304,7 +331,7 @@ use the union operator, the function always returns a list of size 1. Each path test in turn is a sequence of tests that correspond to the - location steps, each tuples of the form `(closure, testfunc, predicates)` + location steps, each tuples of the form `(axis, testfunc, predicates)` """ paths = [self._location_path()] while self.cur_token == '|': @@ -317,33 +344,46 @@ def _location_path(self): next_is_closure = True - if self.cur_token.startswith('/'): - self.next_token() - steps = [] while True: - step = self._location_step() - steps.append((next_is_closure, step[1], step[2])) - next_is_closure = False if self.cur_token == '//': next_is_closure = True - elif self.at_end or self.cur_token != '/': + self.next_token() + elif self.cur_token == '/' and not steps: + raise PathSyntaxError('Absolute location paths not supported') + + axis, node_test, predicates = self._location_step() + if axis == 'child' and next_is_closure: + axis = 'descendant-or-self' + steps.append((axis, node_test, predicates)) + next_is_closure = False + + if self.at_end or not self.cur_token.startswith('/'): break self.next_token() + return steps def _location_step(self): - step = [False, None, []] if self.cur_token == '@': axis = 'attribute' self.next_token() + elif self.cur_token == '.': + axis = 'self' + elif self.peek_token() == '::': + axis = self.cur_token + if axis not in ('attribute', 'child', 'descendant', + 'descendant-or-self', 'namespace', 'self'): + raise PathSyntaxError('Unsupport axis "%s"' % axis) + self.next_token() + self.next_token() else: - # FIXME: support full axis specifiers (name followed by ::) axis = 'child' - step[1] = self._node_test(axis) + node_test = self._node_test(axis) + predicates = [] while self.cur_token == '[': - step[2].append(self._predicate()) - return step + predicates.append(self._predicate()) + return axis, node_test, predicates def _node_test(self, axis=None): test = None @@ -356,10 +396,10 @@ test = _node_test_any_attribute() else: test = _node_test_attribute_by_name(self.cur_token) + elif axis == 'self': + test = _node_test_current_element() else: - if self.cur_token == '.': - test = _node_test_current_element() - elif self.cur_token == '*': + if self.cur_token == '*': test = _node_test_any_child_element() else: test = _node_test_child_element_by_name(self.cur_token) @@ -395,9 +435,11 @@ def _predicate(self): assert self.cur_token == '[' self.next_token() - return self._or_expr() + expr = self._or_expr() assert self.cur_token == ']' - self.next_token() + if not self.at_end: + self.next_token() + return expr def _or_expr(self): expr = self._and_expr() diff --git a/markup/template.py b/markup/template.py --- a/markup/template.py +++ b/markup/template.py @@ -26,7 +26,7 @@ from StringIO import StringIO from markup.core import Attributes, Namespace, Stream, StreamEventKind -from markup.core import START, END, START_NS, END_NS, TEXT, COMMENT +from markup.core import _ensure, START, END, START_NS, END_NS, TEXT, COMMENT from markup.eval import Expression from markup.input import XMLParser from markup.path import Path @@ -848,20 +848,11 @@ stream = filter_(iter(stream), ctxt) return Stream(stream) - def _ensure(self, stream, ctxt=None): - """Ensure that every item on the stream is actually a markup event.""" - for event in stream: - try: - kind, data, pos = event - except ValueError: - kind, data, pos = event.totuple() - yield kind, data, pos - def _eval(self, stream, ctxt=None): """Internal stream filter that evaluates any expressions in `START` and `TEXT` events. """ - filters = (self._ensure, self._eval, self._match) + filters = (self._eval, self._match) for kind, data, pos in stream: @@ -900,7 +891,7 @@ # Test if the expression evaluated to an iterable, in which # case we yield the individual items try: - substream = iter(result) + substream = _ensure(result) for filter_ in filters: substream = filter_(substream, ctxt) for event in substream: diff --git a/markup/tests/path.py b/markup/tests/path.py --- a/markup/tests/path.py +++ b/markup/tests/path.py @@ -15,53 +15,68 @@ import unittest from markup.input import XML -from markup.path import Path +from markup.path import Path, PathSyntaxError class PathTestCase(unittest.TestCase): + def test_error_no_absolute_path(self): + self.assertRaises(PathSyntaxError, Path, '/root') + + def test_error_unsupported_axis(self): + self.assertRaises(PathSyntaxError, Path, 'parent::ma') + def test_1step(self): xml = XML('') self.assertEqual('', Path('elem').select(xml).render()) + self.assertEqual('', Path('child::elem').select(xml).render()) self.assertEqual('', Path('//elem').select(xml).render()) + self.assertEqual('', Path('descendant::elem').select(xml).render()) def test_1step_self(self): xml = XML('') self.assertEqual('', Path('.').select(xml).render()) + #self.assertEqual('', Path('self::node()').select(xml).render()) def test_1step_wildcard(self): xml = XML('') self.assertEqual('', Path('*').select(xml).render()) + self.assertEqual('', Path('child::node()').select(xml).render()) self.assertEqual('', Path('//*').select(xml).render()) def test_1step_attribute(self): - path = Path('@foo') - self.assertEqual('', path.select(XML('')).render()) - self.assertEqual('bar', path.select(XML('')).render()) + self.assertEqual('', Path('@foo').select(XML('')).render()) + xml = XML('') + self.assertEqual('bar', Path('@foo').select(xml).render()) + self.assertEqual('bar', Path('./@foo').select(xml).render()) - def test_1step_attribute(self): - path = Path('@foo') - self.assertEqual('', path.select(XML('')).render()) - self.assertEqual('bar', path.select(XML('')).render()) + def test_1step_text(self): + xml = XML('Hey') + self.assertEqual('Hey', Path('text()').select(xml).render()) + self.assertEqual('Hey', Path('./text()').select(xml).render()) + self.assertEqual('Hey', Path('//text()').select(xml).render()) + self.assertEqual('Hey', Path('.//text()').select(xml).render()) def test_2step(self): xml = XML('') - self.assertEqual('', Path('root/*').select(xml).render()) - self.assertEqual('', Path('root/bar').select(xml).render()) - self.assertEqual('', Path('root/baz').select(xml).render()) + self.assertEqual('', Path('*').select(xml).render()) + self.assertEqual('', Path('bar').select(xml).render()) + self.assertEqual('', Path('baz').select(xml).render()) def test_2step_complex(self): xml = XML('') self.assertEqual('', Path('foo/bar').select(xml).render()) self.assertEqual('', Path('foo/*').select(xml).render()) - self.assertEqual('', Path('root/bar').select(xml).render()) xml = XML('') - self.assertEqual('', Path('root/bar').select(xml).render()) + self.assertEqual('', + Path('bar').select(xml).render()) def test_2step_text(self): xml = XML('Foo') self.assertEqual('Foo', Path('item/text()').select(xml).render()) + self.assertEqual('Foo', Path('*/text()').select(xml).render()) + self.assertEqual('Foo', Path('//text()').select(xml).render()) xml = XML('FooBar') self.assertEqual('FooBar', Path('item/text()').select(xml).render()) @@ -88,7 +103,7 @@ def test_node_type_node(self): xml = XML('Some text
in here.
') - self.assertEqual('Some text
in here.
', + self.assertEqual('Some text
in here.', Path('node()').select(xml).render()) def test_node_type_processing_instruction(self): @@ -134,7 +149,7 @@ def suite(): suite = unittest.TestSuite() - #suite.addTest(doctest.DocTestSuite(Path.__module__)) + suite.addTest(doctest.DocTestSuite(Path.__module__)) suite.addTest(unittest.makeSuite(PathTestCase, 'test')) return suite