# HG changeset patch # User cmlenz # Date 1154973267 0 # Node ID ac0bc4a6aeba655ff12c3537b080da904d2d846f # Parent b86f496f6035bb575c91ac00a5c27c210a56b086 Further cleanup of XPath engine. diff --git a/markup/path.py b/markup/path.py --- a/markup/path.py +++ b/markup/path.py @@ -78,10 +78,18 @@ @param text: the path expression """ self.source = text - self.paths = _PathParser(text).parse() + self.paths = PathParser(text).parse() def __repr__(self): - return '<%s "%s">' % (self.__class__.__name__, self.source) + paths = [] + for path in self.paths: + steps = [] + for axis, nodetest, predicates in path: + steps.append('%s::%s' % (axis, nodetest)) + for predicate in predicates: + steps.append('[%s]' % predicate) + paths.append('/'.join(steps)) + return '<%s "%s">' % (self.__class__.__name__, '|'.join(paths)) def select(self, stream): """Returns a substream of the given stream that matches the path. @@ -138,11 +146,10 @@ START (u'child', [(u'id', u'1')]) START (u'child', [(u'id', u'2')]) """ - paths = [(idx, steps, len(steps), [0]) - for idx, steps in enumerate(self.paths)] + paths = [(steps, len(steps), [0]) for steps in self.paths] def _test(kind, data, pos): - for idx, steps, size, stack in paths: + for steps, size, stack in paths: if not stack: continue cursor = stack[-1] @@ -150,15 +157,13 @@ if kind is END: stack.pop() continue - elif kind is START: stack.append(cursor) - matched = None while 1: - axis, node_test, predicates = steps[cursor] + axis, nodetest, predicates = steps[cursor] - matched = node_test(kind, data, pos) + matched = nodetest(kind, data, pos) if matched and predicates: for predicate in predicates: if not predicate(kind, data, pos): @@ -169,7 +174,7 @@ if cursor + 1 == size: # the last location step if ignore_context or \ kind is not START or \ - axis in (ATTRIBUTE, SELF) or \ + axis in (ATTRIBUTE, NAMESPACE, SELF) or \ len(stack) > 2: return matched else: @@ -188,8 +193,8 @@ backsteps = [step for step in steps[:cursor] if step[0] in (DESCENDANT, DESCENDANT_OR_SELF)] backsteps.reverse() - for axis, node_test, predicates in backsteps: - matched = node_test(kind, data, pos) + for axis, nodetest, predicates in backsteps: + matched = nodetest(kind, data, pos) if not matched: cursor -= 1 break @@ -200,133 +205,6 @@ return _test -def _node_test_current_element(): - def _node_test_current_element(kind, *_): - return kind is START - return _node_test_current_element - -def _node_test_any_child_element(): - def _node_test_any_child_element(kind, *_): - return kind is START - return _node_test_any_child_element - -def _node_test_child_element_by_name(name): - def _node_test_child_element_by_name(kind, data, _): - return kind is START and data[0].localname == name - return _node_test_child_element_by_name - -def _node_test_any_attribute(): - def _node_test_any_attribute(kind, data, _): - if kind is START and data[1]: - return data[1] - return _node_test_any_attribute - -def _node_test_attribute_by_name(name): - def _node_test_attribute_by_name(kind, data, pos): - if kind is START and name in data[1]: - return TEXT, data[1].get(name), pos - return _node_test_attribute_by_name - -def _function_comment(): - def _function_comment(kind, data, pos): - return kind is COMMENT and (kind, data, pos) - return _function_comment - -def _function_local_name(): - def _function_local_name(kind, data, pos): - if kind is START: - return TEXT, data[0].localname, pos - return _function_local_name - -def _function_name(): - def _function_name(kind, data, pos): - if kind is START: - return TEXT, data[0], pos - return _function_name - -def _function_namespace_uri(): - def _function_namespace_uri(kind, data, pos): - if kind is START: - return TEXT, data[0].namespace, pos - return _function_namespace_uri - -def _function_node(): - def _function_node(kind, data, pos): - if kind is START: - return True - return kind, data, pos - return _function_node - -def _function_not(expr): - def _function_not(kind, data, pos): - return not expr(kind, data, pos) - return _function_not - -def _function_processing_instruction(name=None): - def _function_processing_instruction(kind, data, pos): - if kind is PI and (not name or data[0] == name): - return (kind, data, pos) - return _function_processing_instruction - -def _function_text(): - def _function_text(kind, data, pos): - return kind is TEXT and (kind, data, pos) - return _function_text - -def _literal_string(text): - def _literal_string(*_): - return TEXT, text, (None, -1, -1) - return _literal_string - -def _operator_eq(lval, rval): - def _operator_eq(kind, data, pos): - lv = lval(kind, data, pos) - if type(lv) is tuple: - lv = lv[1] - rv = rval(kind, data, pos) - if type(rv) is tuple: - rv = rv[1] - return lv == rv - return _operator_eq - -def _operator_neq(lval, rval): - def _operator_neq(kind, data, pos): - lv = lval(kind, data, pos) - if type(lv) is tuple: - lv = lv[1] - rv = rval(kind, data, pos) - if type(rv) is tuple: - rv = rv[1] - return lv != rv - return _operator_neq - -def _operator_and(lval, rval): - def _operator_and(kind, data, pos): - lv = lval(kind, data, pos) - if type(lv) is tuple: - lv = lv[1] - if not lv: - return False - rv = rval(kind, data, pos) - if type(rv) is tuple: - rv = rv[1] - return bool(rv) - return _operator_and - -def _operator_or(lval, rval): - def _operator_or(kind, data, pos): - lv = lval(kind, data, pos) - if type(lv) is tuple: - lv = lv[1] - if lv: - return True - rv = rval(kind, data, pos) - if type(rv) is tuple: - rv = rv[1] - return bool(rv) - return _operator_or - - class PathSyntaxError(Exception): """Exception raised when an XPath expression is syntactically incorrect.""" @@ -339,7 +217,7 @@ self.offset = offset -class _PathParser(object): +class PathParser(object): """Tokenizes and parses an XPath expression.""" _QUOTES = (("'", "'"), ('"', '"')) @@ -390,20 +268,20 @@ return paths def _location_path(self): - next_is_closure = True steps = [] while True: if self.cur_token == '//': - next_is_closure = True + steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) self.next_token() elif self.cur_token == '/' and not steps: raise PathSyntaxError('Absolute location paths not supported') - axis, node_test, predicates = self._location_step() - if axis is CHILD and next_is_closure: - axis = DESCENDANT_OR_SELF - steps.append((axis, node_test, predicates)) - next_is_closure = False + axis, nodetest, predicates = self._location_step() + if not axis: + # The default axis for the first step is "descendant", for other + # steps it's "child" + axis = steps and CHILD or DESCENDANT + steps.append((axis, nodetest, predicates)) if self.at_end or not self.cur_token.startswith('/'): break @@ -417,6 +295,8 @@ self.next_token() elif self.cur_token == '.': axis = SELF + elif self.cur_token == '..': + raise PathSyntaxError('Unsupported axis "parent"') elif self.peek_token() == '::': axis = Axis.forname(self.cur_token) if axis is None: @@ -424,12 +304,12 @@ self.next_token() self.next_token() else: - axis = CHILD - node_test = self._node_test(axis) + axis = None + nodetest = self._node_test(axis or CHILD) predicates = [] while self.cur_token == '[': predicates.append(self._predicate()) - return axis, node_test, predicates + return axis, nodetest, predicates def _node_test(self, axis=None): test = None @@ -437,18 +317,12 @@ test = self._node_type() else: # Name test - if axis is ATTRIBUTE: - if self.cur_token == '*': - test = _node_test_any_attribute() - else: - test = _node_test_attribute_by_name(self.cur_token) - elif axis is SELF: - test = _node_test_current_element() + if self.cur_token == '*': + test = PrincipalTypeTest(axis) + elif self.cur_token == '.': + test = NodeTest() else: - if self.cur_token == '*': - test = _node_test_any_child_element() - else: - test = _node_test_child_element_by_name(self.cur_token) + test = LocalNameTest(axis, self.cur_token) if not self.at_end: self.next_token() @@ -457,26 +331,22 @@ def _node_type(self): name = self.cur_token self.next_token() - if name == 'comment': - return _function_comment() - elif name == 'node': - return _function_node() - elif name == 'processing-instruction': - args = [] - if self.cur_token != '()': - # The processing-instruction() function optionally accepts the - # name of the PI as argument, which must be a literal string - self.next_token() # ( - if self.cur_token != ')': - string = self.cur_token - if (string[0], string[-1]) in self._QUOTES: - string = string[1:-1] - args.append(string) - return _function_processing_instruction(*args) - elif name == 'text': - return _function_text() - else: + + args = [] + if self.cur_token != '()': + # The processing-instruction() function optionally accepts the + # name of the PI as argument, which must be a literal string + self.next_token() # ( + if self.cur_token != ')': + string = self.cur_token + if (string[0], string[-1]) in self._QUOTES: + string = string[1:-1] + args.append(string) + + cls = _nodetest_map.get(name) + if not cls: raise PathSyntaxError('%s() not allowed here' % name) + return cls(*args) def _predicate(self): assert self.cur_token == '[' @@ -493,20 +363,20 @@ expr = self._and_expr() while self.cur_token == 'or': self.next_token() - expr = _operator_or(expr, self._and_expr()) + expr = OrOperator(expr, self._and_expr()) return expr def _and_expr(self): expr = self._equality_expr() while self.cur_token == 'and': self.next_token() - expr = _operator_and(expr, self._equality_expr()) + expr = AndOperator(expr, self._equality_expr()) return expr def _equality_expr(self): expr = self._primary_expr() while self.cur_token in ('=', '!='): - op = {'=': _operator_eq, '!=': _operator_neq}[self.cur_token] + op = _operator_map.get(self.cur_token) self.next_token() expr = op(expr, self._primary_expr()) return expr @@ -515,10 +385,10 @@ token = self.cur_token if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: self.next_token() - return _literal_string(token[1:-1]) + return StringLiteral(token[1:-1]) elif token[0].isdigit(): self.next_token() - return _literal_number(float(token)) + return NumberLiteral(float(token)) elif not self.at_end and self.peek_token().startswith('('): if self.next_token() == '()': args = [] @@ -528,19 +398,213 @@ while self.cur_token not in (',', ')'): args.append(self._or_expr()) self.next_token() - if token == 'local-name': - return _function_local_name(*args) - elif token == 'name': - return _function_name(*args) - elif token == 'namespace-uri': - return _function_namespace_uri(*args) - elif token == 'not': - return _function_not(*args) - else: + cls = _function_map.get(token) + if not cls: raise PathSyntaxError('Unsupported function "%s"' % token) + return cls(*args) else: axis = None if token == '@': axis = ATTRIBUTE self.next_token() return self._node_test(axis) + + +# Node tests + +class PrincipalTypeTest(object): + __slots__ = ['principal_type'] + def __init__(self, principal_type): + self.principal_type = principal_type + def __call__(self, kind, data, pos): + if kind is START: + if self.principal_type is ATTRIBUTE: + return data[1] or None + else: + return True + def __repr__(self): + return '*' + +class LocalNameTest(object): + __slots__ = ['principal_type', 'name'] + def __init__(self, principal_type, name): + self.principal_type = principal_type + self.name = name + def __call__(self, kind, data, pos): + if kind is START: + if self.principal_type is ATTRIBUTE and self.name in data[1]: + return TEXT, data[1].get(self.name), pos + else: + return data[0].localname == self.name + def __repr__(self): + return self.name + +class CommentNodeTest(object): + __slots__ = [] + def __call__(self, kind, data, pos): + return kind is COMMENT and (kind, data, pos) + def __repr__(self): + return 'comment()' + +class NodeTest(object): + __slots__ = [] + def __call__(self, kind, data, pos): + if kind is START: + return True + return kind, data, pos + def __repr__(self): + return 'node()' + +class ProcessingInstructionNodeTest(object): + __slots__ = ['target'] + def __init__(self, target=None): + self.target = target + def __call__(self, kind, data, pos): + if kind is PI and (not self.target or data[0] == self.target): + return (kind, data, pos) + def __repr__(self): + arg = '' + if self.target: + arg = '"' + self.target + '"' + return 'processing-instruction(%s)' % arg + +class TextNodeTest(object): + __slots__ = [] + def __call__(self, kind, data, pos): + return kind is TEXT and (kind, data, pos) + def __repr__(self): + return 'text()' + +_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, + 'processing-instruction': ProcessingInstructionNodeTest, + 'text': TextNodeTest} + +# Functions + +class LocalNameFunction(object): + __slots__ = [] + def __call__(self, kind, data, pos): + if kind is START: + return TEXT, data[0].localname, pos + def __repr__(self): + return 'local-name()' + +class NameFunction(object): + __slots__ = [] + def __call__(self, kind, data, pos): + if kind is START: + return TEXT, data[0], pos + def __repr__(self): + return 'name()' + +class NamespaceUriFunction(object): + __slots__ = [] + def __call__(self, kind, data, pos): + if kind is START: + return TEXT, data[0].namespace, pos + def __repr__(self): + return 'namespace-uri()' + +class NotFunction(object): + __slots__ = ['expr'] + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos): + return not self.expr(kind, data, pos) + def __repr__(self): + return 'not(%s)' % self.expr + +_function_map = {'local-name': LocalNameFunction, 'name': NameFunction, + 'namespace-uri': NamespaceUriFunction, 'not': NotFunction} + +# Literals + +class StringLiteral(object): + __slots__ = ['text'] + def __init__(self, text): + self.text = text + def __call__(self, kind, data, pos): + return TEXT, self.text, (None, -1, -1) + def __repr__(self): + return '"%s"' % self.text + +class NumberLiteral(object): + __slots__ = ['number'] + def __init__(self, number): + self.number = number + def __call__(self, kind, data, pos): + return TEXT, unicode(self.number), (None, -1, -1) + def __repr__(self): + return str(self.number) + +# Operators + +class AndOperator(object): + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos): + lv = self.lval(kind, data, pos) + if type(lv) is tuple: + lv = lv[1] + if not lv: + return False + rv = self.rval(kind, data, pos) + if type(rv) is tuple: + rv = rv[1] + return bool(rv) + def __repr__(self): + return '%s and %s' % (self.lval, self.rval) + +class EqualsOperator(object): + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos): + lv = self.lval(kind, data, pos) + if type(lv) is tuple: + lv = lv[1] + rv = self.rval(kind, data, pos) + if type(rv) is tuple: + rv = rv[1] + return lv == rv + def __repr__(self): + return '%s=%s' % (self.lval, self.rval) + +class NotEqualsOperator(object): + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos): + lv = self.lval(kind, data, pos) + if type(lv) is tuple: + lv = lv[1] + rv = self.rval(kind, data, pos) + if type(rv) is tuple: + rv = rv[1] + return lv != rv + def __repr__(self): + return '%s!=%s' % (self.lval, self.rval) + +class OrOperator(object): + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos): + lv = self.lval(kind, data, pos) + if type(lv) is tuple: + lv = lv[1] + if lv: + return True + rv = self.rval(kind, data, pos) + if type(rv) is tuple: + rv = rv[1] + return bool(rv) + def __repr__(self): + return '%s or %s' % (self.lval, self.rval) + +_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator} diff --git a/markup/tests/path.py b/markup/tests/path.py --- a/markup/tests/path.py +++ b/markup/tests/path.py @@ -24,25 +24,59 @@ self.assertRaises(PathSyntaxError, Path, '/root') def test_error_unsupported_axis(self): + self.assertRaises(PathSyntaxError, Path, '..') self.assertRaises(PathSyntaxError, Path, 'parent::ma') def test_1step(self): xml = XML('') - self.assertEqual('', Path('elem').select(xml).render()) - self.assertEqual('', Path('child::elem').select(xml).render()) - self.assertEqual('', Path('//elem').select(xml).render()) - self.assertEqual('', Path('descendant::elem').select(xml).render()) + + path = Path('elem') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('child::elem') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('//elem') + self.assertEqual('', + repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('descendant::elem') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) def test_1step_self(self): xml = XML('') - self.assertEqual('', Path('.').select(xml).render()) - #self.assertEqual('', Path('self::node()').select(xml).render()) + + path = Path('.') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('self::node()') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) def test_1step_wildcard(self): xml = XML('') - self.assertEqual('', Path('*').select(xml).render()) + + path = Path('*') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('child::*') + self.assertEqual('', repr(path)) + self.assertEqual('', path.select(xml).render()) + + path = Path('child::node()') + self.assertEqual('', repr(path)) self.assertEqual('', Path('child::node()').select(xml).render()) - self.assertEqual('', Path('//*').select(xml).render()) + + path = Path('//*') + self.assertEqual('', + repr(path)) + self.assertEqual('', path.select(xml).render()) def test_1step_attribute(self): self.assertEqual('', Path('@foo').select(XML('')).render()) @@ -69,8 +103,8 @@ self.assertEqual('', Path('foo/*').select(xml).render()) xml = XML('') - self.assertEqual('', - Path('bar').select(xml).render()) + self.assertEqual('', + Path('bar').select(xml).render()) def test_2step_text(self): xml = XML('Foo')