cmlenz@1: # -*- coding: utf-8 -*- cmlenz@1: # cmlenz@66: # Copyright (C) 2006 Edgewall Software cmlenz@1: # All rights reserved. cmlenz@1: # cmlenz@1: # This software is licensed as described in the file COPYING, which cmlenz@1: # you should have received as part of this distribution. The terms cmlenz@66: # are also available at http://markup.edgewall.org/wiki/License. cmlenz@1: # cmlenz@1: # This software consists of voluntary contributions made by many cmlenz@1: # individuals. For the exact contribution history, see the revision cmlenz@66: # history and logs, available at http://markup.edgewall.org/log/. cmlenz@1: cmlenz@111: """Basic support for evaluating XPath expressions against streams. cmlenz@111: cmlenz@111: >>> from markup.input import XML cmlenz@111: >>> doc = XML(''' cmlenz@111: ... cmlenz@111: ... cmlenz@111: ... Foo cmlenz@111: ... cmlenz@111: ... cmlenz@111: ... Bar cmlenz@111: ... cmlenz@111: ... cmlenz@111: ... ''') cmlenz@111: >>> print doc.select('items/item[@status="closed"]/summary/text()') cmlenz@111: Bar cmlenz@111: cmlenz@111: Because the XPath engine operates on markup streams (as opposed to tree cmlenz@111: structures), it only implements a subset of the full XPath 1.0 language. cmlenz@111: """ cmlenz@1: cmlenz@155: from math import ceil, floor cmlenz@1: import re cmlenz@1: cmlenz@145: from markup.core import Stream, START, END, TEXT, COMMENT, PI cmlenz@1: cmlenz@106: __all__ = ['Path', 'PathSyntaxError'] cmlenz@1: cmlenz@1: cmlenz@114: class Axis(object): cmlenz@114: """Defines constants for the various supported XPath axes.""" cmlenz@114: cmlenz@114: ATTRIBUTE = 'attribute' cmlenz@114: CHILD = 'child' cmlenz@114: DESCENDANT = 'descendant' cmlenz@114: DESCENDANT_OR_SELF = 'descendant-or-self' cmlenz@114: NAMESPACE = 'namespace' cmlenz@114: SELF = 'self' cmlenz@114: cmlenz@114: def forname(cls, name): cmlenz@114: """Return the axis constant for the given name, or `None` if no such cmlenz@114: axis was defined. cmlenz@114: """ cmlenz@114: return getattr(cls, name.upper().replace('-', '_'), None) cmlenz@114: forname = classmethod(forname) cmlenz@114: cmlenz@114: cmlenz@114: ATTRIBUTE = Axis.ATTRIBUTE cmlenz@114: CHILD = Axis.CHILD cmlenz@114: DESCENDANT = Axis.DESCENDANT cmlenz@114: DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF cmlenz@114: NAMESPACE = Axis.NAMESPACE cmlenz@114: SELF = Axis.SELF cmlenz@114: cmlenz@114: cmlenz@1: class Path(object): cmlenz@26: """Implements basic XPath support on streams. cmlenz@1: cmlenz@26: Instances of this class represent a "compiled" XPath expression, and provide cmlenz@26: methods for testing the path against a stream, as well as extracting a cmlenz@26: substream matching that path. cmlenz@1: """ cmlenz@1: cmlenz@139: def __init__(self, text, filename=None, lineno=-1): cmlenz@26: """Create the path object from a string. cmlenz@26: cmlenz@26: @param text: the path expression cmlenz@26: """ cmlenz@1: self.source = text cmlenz@139: self.paths = PathParser(text, filename, lineno).parse() cmlenz@1: cmlenz@1: def __repr__(self): cmlenz@137: paths = [] cmlenz@137: for path in self.paths: cmlenz@137: steps = [] cmlenz@137: for axis, nodetest, predicates in path: cmlenz@137: steps.append('%s::%s' % (axis, nodetest)) cmlenz@137: for predicate in predicates: cmlenz@137: steps.append('[%s]' % predicate) cmlenz@137: paths.append('/'.join(steps)) cmlenz@137: return '<%s "%s">' % (self.__class__.__name__, '|'.join(paths)) cmlenz@1: cmlenz@179: def select(self, stream, variables=None): cmlenz@26: """Returns a substream of the given stream that matches the path. cmlenz@26: cmlenz@26: If there are no matches, this method returns an empty stream. cmlenz@26: cmlenz@33: >>> from markup.input import XML cmlenz@33: >>> xml = XML('Text') cmlenz@61: cmlenz@33: >>> print Path('child').select(xml) cmlenz@33: Text cmlenz@33: cmlenz@33: >>> print Path('child/text()').select(xml) cmlenz@33: Text cmlenz@33: cmlenz@26: @param stream: the stream to select from cmlenz@26: @return: the substream matching the path, or an empty stream cmlenz@26: """ cmlenz@1: stream = iter(stream) cmlenz@26: def _generate(): cmlenz@1: test = self.test() cmlenz@1: for kind, data, pos in stream: cmlenz@179: result = test(kind, data, pos, variables) cmlenz@1: if result is True: cmlenz@1: yield kind, data, pos cmlenz@1: depth = 1 cmlenz@1: while depth > 0: cmlenz@73: subkind, subdata, subpos = stream.next() cmlenz@73: if subkind is START: cmlenz@73: depth += 1 cmlenz@73: elif subkind is END: cmlenz@73: depth -= 1 cmlenz@73: yield subkind, subdata, subpos cmlenz@179: test(subkind, subdata, subpos, variables) cmlenz@1: elif result: cmlenz@1: yield result cmlenz@26: return Stream(_generate()) cmlenz@1: cmlenz@38: def test(self, ignore_context=False): cmlenz@26: """Returns a function that can be used to track whether the path matches cmlenz@26: a specific stream event. cmlenz@26: cmlenz@26: The function returned expects the positional arguments `kind`, `data`, cmlenz@26: and `pos`, i.e. basically an unpacked stream event. If the path matches cmlenz@26: the event, the function returns the match (for example, a `START` or cmlenz@106: `TEXT` event.) Otherwise, it returns `None`. cmlenz@33: cmlenz@33: >>> from markup.input import XML cmlenz@33: >>> xml = XML('') cmlenz@33: >>> test = Path('child').test() cmlenz@33: >>> for kind, data, pos in xml: cmlenz@179: ... if test(kind, data, pos, {}): cmlenz@33: ... print kind, data cmlenz@33: START (u'child', [(u'id', u'1')]) cmlenz@33: START (u'child', [(u'id', u'2')]) cmlenz@26: """ cmlenz@137: paths = [(steps, len(steps), [0]) for steps in self.paths] cmlenz@1: cmlenz@179: def _test(kind, data, pos, variables): cmlenz@137: for steps, size, stack in paths: cmlenz@106: if not stack: cmlenz@106: continue cmlenz@106: cursor = stack[-1] cmlenz@1: cmlenz@106: if kind is END: cmlenz@106: stack.pop() cmlenz@106: continue cmlenz@106: elif kind is START: cmlenz@106: stack.append(cursor) cmlenz@106: cmlenz@111: while 1: cmlenz@137: axis, nodetest, predicates = steps[cursor] cmlenz@106: cmlenz@179: matched = nodetest(kind, data, pos, variables) cmlenz@111: if matched and predicates: cmlenz@111: for predicate in predicates: cmlenz@179: if not predicate(kind, data, pos, variables): cmlenz@111: matched = None cmlenz@111: break cmlenz@106: cmlenz@111: if matched: cmlenz@111: if cursor + 1 == size: # the last location step cmlenz@111: if ignore_context or \ cmlenz@111: kind is not START or \ cmlenz@137: axis in (ATTRIBUTE, NAMESPACE, SELF) or \ cmlenz@111: len(stack) > 2: cmlenz@111: return matched cmlenz@111: else: cmlenz@111: cursor += 1 cmlenz@111: stack[-1] = cursor cmlenz@111: cmlenz@114: if axis is not SELF: cmlenz@111: break cmlenz@111: cmlenz@111: if not matched and kind is START \ cmlenz@114: and axis not in (DESCENDANT, DESCENDANT_OR_SELF): cmlenz@106: # If this step is not a closure, it cannot be matched until cmlenz@106: # the current element is closed... so we need to move the cmlenz@114: # cursor back to the previous closure and retest that cmlenz@114: # against the current element cmlenz@111: backsteps = [step for step in steps[:cursor] cmlenz@114: if step[0] in (DESCENDANT, DESCENDANT_OR_SELF)] cmlenz@111: backsteps.reverse() cmlenz@137: for axis, nodetest, predicates in backsteps: cmlenz@179: matched = nodetest(kind, data, pos, variables) cmlenz@111: if not matched: cmlenz@111: cursor -= 1 cmlenz@111: break cmlenz@106: stack[-1] = cursor cmlenz@1: cmlenz@1: return None cmlenz@1: cmlenz@1: return _test cmlenz@1: cmlenz@1: cmlenz@106: class PathSyntaxError(Exception): cmlenz@106: """Exception raised when an XPath expression is syntactically incorrect.""" cmlenz@106: cmlenz@106: def __init__(self, message, filename=None, lineno=-1, offset=-1): cmlenz@106: if filename: cmlenz@106: message = '%s (%s, line %d)' % (message, filename, lineno) cmlenz@106: Exception.__init__(self, message) cmlenz@106: self.filename = filename cmlenz@106: self.lineno = lineno cmlenz@106: self.offset = offset cmlenz@106: cmlenz@106: cmlenz@137: class PathParser(object): cmlenz@106: """Tokenizes and parses an XPath expression.""" cmlenz@106: cmlenz@106: _QUOTES = (("'", "'"), ('"', '"')) cmlenz@106: _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@', cmlenz@179: '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$') cmlenz@163: _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % ( cmlenz@106: '|'.join([re.escape(t) for t in _TOKENS]), cmlenz@106: ''.join([re.escape(t[0]) for t in _TOKENS]))).findall cmlenz@106: cmlenz@139: def __init__(self, text, filename=None, lineno=-1): cmlenz@139: self.filename = filename cmlenz@139: self.lineno = lineno cmlenz@163: self.tokens = filter(None, [dqstr or sqstr or number or token or name cmlenz@163: for dqstr, sqstr, number, token, name in cmlenz@155: self._tokenize(text)]) cmlenz@106: self.pos = 0 cmlenz@106: cmlenz@106: # Tokenizer cmlenz@106: cmlenz@106: at_end = property(lambda self: self.pos == len(self.tokens) - 1) cmlenz@106: cur_token = property(lambda self: self.tokens[self.pos]) cmlenz@106: cmlenz@106: def next_token(self): cmlenz@106: self.pos += 1 cmlenz@106: return self.tokens[self.pos] cmlenz@106: cmlenz@106: def peek_token(self): cmlenz@106: if not self.at_end: cmlenz@106: return self.tokens[self.pos + 1] cmlenz@106: return None cmlenz@106: cmlenz@106: # Recursive descent parser cmlenz@106: cmlenz@106: def parse(self): cmlenz@106: """Parses the XPath expression and returns a list of location path cmlenz@106: tests. cmlenz@106: cmlenz@106: For union expressions (such as `*|text()`), this function returns one cmlenz@106: test for each operand in the union. For patch expressions that don't cmlenz@106: use the union operator, the function always returns a list of size 1. cmlenz@106: cmlenz@106: Each path test in turn is a sequence of tests that correspond to the cmlenz@111: location steps, each tuples of the form `(axis, testfunc, predicates)` cmlenz@106: """ cmlenz@106: paths = [self._location_path()] cmlenz@106: while self.cur_token == '|': cmlenz@106: self.next_token() cmlenz@106: paths.append(self._location_path()) cmlenz@106: if not self.at_end: cmlenz@106: raise PathSyntaxError('Unexpected token %r after end of expression' cmlenz@139: % self.cur_token, self.filename, self.lineno) cmlenz@106: return paths cmlenz@106: cmlenz@106: def _location_path(self): cmlenz@106: steps = [] cmlenz@106: while True: cmlenz@106: if self.cur_token == '//': cmlenz@137: steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) cmlenz@111: self.next_token() cmlenz@111: elif self.cur_token == '/' and not steps: cmlenz@139: raise PathSyntaxError('Absolute location paths not supported', cmlenz@139: self.filename, self.lineno) cmlenz@111: cmlenz@137: axis, nodetest, predicates = self._location_step() cmlenz@137: if not axis: cmlenz@145: axis = CHILD cmlenz@137: steps.append((axis, nodetest, predicates)) cmlenz@111: cmlenz@111: if self.at_end or not self.cur_token.startswith('/'): cmlenz@106: break cmlenz@106: self.next_token() cmlenz@111: cmlenz@106: return steps cmlenz@106: cmlenz@106: def _location_step(self): cmlenz@106: if self.cur_token == '@': cmlenz@114: axis = ATTRIBUTE cmlenz@106: self.next_token() cmlenz@111: elif self.cur_token == '.': cmlenz@114: axis = SELF cmlenz@137: elif self.cur_token == '..': cmlenz@139: raise PathSyntaxError('Unsupported axis "parent"', self.filename, cmlenz@139: self.lineno) cmlenz@111: elif self.peek_token() == '::': cmlenz@114: axis = Axis.forname(self.cur_token) cmlenz@114: if axis is None: cmlenz@139: raise PathSyntaxError('Unsupport axis "%s"' % axis, cmlenz@139: self.filename, self.lineno) cmlenz@111: self.next_token() cmlenz@111: self.next_token() cmlenz@106: else: cmlenz@137: axis = None cmlenz@137: nodetest = self._node_test(axis or CHILD) cmlenz@111: predicates = [] cmlenz@106: while self.cur_token == '[': cmlenz@111: predicates.append(self._predicate()) cmlenz@137: return axis, nodetest, predicates cmlenz@106: cmlenz@106: def _node_test(self, axis=None): cmlenz@106: test = None cmlenz@106: if self.peek_token() in ('(', '()'): # Node type test cmlenz@106: test = self._node_type() cmlenz@106: cmlenz@106: else: # Name test cmlenz@137: if self.cur_token == '*': cmlenz@137: test = PrincipalTypeTest(axis) cmlenz@137: elif self.cur_token == '.': cmlenz@137: test = NodeTest() cmlenz@106: else: cmlenz@137: test = LocalNameTest(axis, self.cur_token) cmlenz@106: cmlenz@106: if not self.at_end: cmlenz@106: self.next_token() cmlenz@106: return test cmlenz@106: cmlenz@106: def _node_type(self): cmlenz@106: name = self.cur_token cmlenz@106: self.next_token() cmlenz@137: cmlenz@137: args = [] cmlenz@137: if self.cur_token != '()': cmlenz@137: # The processing-instruction() function optionally accepts the cmlenz@137: # name of the PI as argument, which must be a literal string cmlenz@137: self.next_token() # ( cmlenz@137: if self.cur_token != ')': cmlenz@137: string = self.cur_token cmlenz@137: if (string[0], string[-1]) in self._QUOTES: cmlenz@137: string = string[1:-1] cmlenz@137: args.append(string) cmlenz@137: cmlenz@137: cls = _nodetest_map.get(name) cmlenz@137: if not cls: cmlenz@139: raise PathSyntaxError('%s() not allowed here' % name, self.filename, cmlenz@139: self.lineno) cmlenz@137: return cls(*args) cmlenz@106: cmlenz@106: def _predicate(self): cmlenz@106: assert self.cur_token == '[' cmlenz@106: self.next_token() cmlenz@111: expr = self._or_expr() cmlenz@164: if isinstance(expr, NumberLiteral): cmlenz@164: raise PathSyntaxError('Position predicates not yet supported') cmlenz@121: if self.cur_token != ']': cmlenz@121: raise PathSyntaxError('Expected "]" to close predicate, ' cmlenz@139: 'but found "%s"' % self.cur_token, cmlenz@139: self.filename, self.lineno) cmlenz@111: if not self.at_end: cmlenz@111: self.next_token() cmlenz@111: return expr cmlenz@106: cmlenz@106: def _or_expr(self): cmlenz@106: expr = self._and_expr() cmlenz@106: while self.cur_token == 'or': cmlenz@106: self.next_token() cmlenz@137: expr = OrOperator(expr, self._and_expr()) cmlenz@106: return expr cmlenz@106: cmlenz@106: def _and_expr(self): cmlenz@106: expr = self._equality_expr() cmlenz@106: while self.cur_token == 'and': cmlenz@106: self.next_token() cmlenz@137: expr = AndOperator(expr, self._equality_expr()) cmlenz@106: return expr cmlenz@106: cmlenz@106: def _equality_expr(self): cmlenz@162: expr = self._relational_expr() cmlenz@162: while self.cur_token in ('=', '!='): cmlenz@162: op = _operator_map[self.cur_token] cmlenz@162: self.next_token() cmlenz@162: expr = op(expr, self._relational_expr()) cmlenz@162: return expr cmlenz@162: cmlenz@162: def _relational_expr(self): cmlenz@106: expr = self._primary_expr() cmlenz@162: while self.cur_token in ('>', '>=', '<', '>='): cmlenz@162: op = _operator_map[self.cur_token] cmlenz@106: self.next_token() cmlenz@106: expr = op(expr, self._primary_expr()) cmlenz@106: return expr cmlenz@106: cmlenz@106: def _primary_expr(self): cmlenz@106: token = self.cur_token cmlenz@106: if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: cmlenz@106: self.next_token() cmlenz@137: return StringLiteral(token[1:-1]) cmlenz@163: elif token[0].isdigit() or token[0] == '.': cmlenz@106: self.next_token() cmlenz@137: return NumberLiteral(float(token)) cmlenz@179: elif token == '$': cmlenz@179: token = self.next_token() cmlenz@179: self.next_token() cmlenz@179: return VariableReference(token) cmlenz@121: elif not self.at_end and self.peek_token().startswith('('): cmlenz@155: return self._function_call() cmlenz@106: else: cmlenz@106: axis = None cmlenz@106: if token == '@': cmlenz@114: axis = ATTRIBUTE cmlenz@106: self.next_token() cmlenz@106: return self._node_test(axis) cmlenz@137: cmlenz@155: def _function_call(self): cmlenz@155: name = self.cur_token cmlenz@155: if self.next_token() == '()': cmlenz@155: args = [] cmlenz@155: else: cmlenz@155: assert self.cur_token == '(' cmlenz@155: self.next_token() cmlenz@155: args = [self._or_expr()] cmlenz@155: while self.cur_token == ',': cmlenz@155: self.next_token() cmlenz@155: args.append(self._or_expr()) cmlenz@155: if not self.cur_token == ')': cmlenz@155: raise PathSyntaxError('Expected ")" to close function argument ' cmlenz@155: 'list, but found "%s"' % self.cur_token, cmlenz@155: self.filename, self.lineno) cmlenz@155: self.next_token() cmlenz@155: cls = _function_map.get(name) cmlenz@155: if not cls: cmlenz@155: raise PathSyntaxError('Unsupported function "%s"' % name, cmlenz@155: self.filename, self.lineno) cmlenz@155: return cls(*args) cmlenz@155: cmlenz@137: cmlenz@137: # Node tests cmlenz@137: cmlenz@137: class PrincipalTypeTest(object): cmlenz@161: """Node test that matches any event with the given principal type.""" cmlenz@137: __slots__ = ['principal_type'] cmlenz@137: def __init__(self, principal_type): cmlenz@137: self.principal_type = principal_type cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: if self.principal_type is ATTRIBUTE: cmlenz@137: return data[1] or None cmlenz@137: else: cmlenz@137: return True cmlenz@137: def __repr__(self): cmlenz@137: return '*' cmlenz@137: cmlenz@137: class LocalNameTest(object): cmlenz@161: """Node test that matches any event with the given prinipal type and cmlenz@161: local name. cmlenz@161: """ cmlenz@137: __slots__ = ['principal_type', 'name'] cmlenz@137: def __init__(self, principal_type, name): cmlenz@137: self.principal_type = principal_type cmlenz@137: self.name = name cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: if self.principal_type is ATTRIBUTE and self.name in data[1]: cmlenz@137: return TEXT, data[1].get(self.name), pos cmlenz@137: else: cmlenz@137: return data[0].localname == self.name cmlenz@137: def __repr__(self): cmlenz@137: return self.name cmlenz@137: cmlenz@137: class CommentNodeTest(object): cmlenz@161: """Node test that matches any comment events.""" cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: return kind is COMMENT and (kind, data, pos) cmlenz@137: def __repr__(self): cmlenz@137: return 'comment()' cmlenz@137: cmlenz@137: class NodeTest(object): cmlenz@161: """Node test that matches any node.""" cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: return True cmlenz@137: return kind, data, pos cmlenz@137: def __repr__(self): cmlenz@137: return 'node()' cmlenz@137: cmlenz@137: class ProcessingInstructionNodeTest(object): cmlenz@161: """Node test that matches any processing instruction event.""" cmlenz@137: __slots__ = ['target'] cmlenz@137: def __init__(self, target=None): cmlenz@137: self.target = target cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is PI and (not self.target or data[0] == self.target): cmlenz@137: return (kind, data, pos) cmlenz@137: def __repr__(self): cmlenz@137: arg = '' cmlenz@137: if self.target: cmlenz@137: arg = '"' + self.target + '"' cmlenz@137: return 'processing-instruction(%s)' % arg cmlenz@137: cmlenz@137: class TextNodeTest(object): cmlenz@161: """Node test that matches any text event.""" cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: return kind is TEXT and (kind, data, pos) cmlenz@137: def __repr__(self): cmlenz@137: return 'text()' cmlenz@137: cmlenz@137: _nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, cmlenz@137: 'processing-instruction': ProcessingInstructionNodeTest, cmlenz@137: 'text': TextNodeTest} cmlenz@137: cmlenz@137: # Functions cmlenz@137: cmlenz@155: class Function(object): cmlenz@155: """Base class for function nodes in XPath expressions.""" cmlenz@155: cmlenz@155: class BooleanFunction(Function): cmlenz@161: """The `boolean` function, which converts its argument to a boolean cmlenz@161: value. cmlenz@161: """ cmlenz@155: __slots__ = ['expr'] cmlenz@155: def __init__(self, expr): cmlenz@155: self.expr = expr cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: val = self.expr(kind, data, pos, variables) cmlenz@155: if type(val) is tuple: cmlenz@155: val = val[1] cmlenz@155: return bool(val) cmlenz@155: def __repr__(self): cmlenz@155: return 'boolean(%r)' % self.expr cmlenz@155: cmlenz@155: class CeilingFunction(Function): cmlenz@161: """The `ceiling` function, which returns the nearest lower integer number cmlenz@161: for the given number. cmlenz@161: """ cmlenz@155: __slots__ = ['number'] cmlenz@155: def __init__(self, number): cmlenz@155: self.number = number cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: number = self.number(kind, data, pos, variables) cmlenz@155: if type(number) is tuple: cmlenz@155: number = number[1] cmlenz@155: return ceil(float(number)) cmlenz@155: def __repr__(self): cmlenz@155: return 'ceiling(%r)' % self.number cmlenz@155: cmlenz@155: class ConcatFunction(Function): cmlenz@161: """The `concat` function, which concatenates (joins) the variable number of cmlenz@161: strings it gets as arguments. cmlenz@161: """ cmlenz@155: __slots__ = ['exprs'] cmlenz@155: def __init__(self, *exprs): cmlenz@155: self.exprs = exprs cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@155: strings = [] cmlenz@179: for item in [expr(kind, data, pos, variables) for expr in self.exprs]: cmlenz@155: if type(item) is tuple: cmlenz@155: assert item[0] is TEXT cmlenz@155: item = item[1] cmlenz@155: strings.append(item) cmlenz@155: return u''.join(strings) cmlenz@155: def __repr__(self): cmlenz@169: return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs]) cmlenz@155: cmlenz@155: class ContainsFunction(Function): cmlenz@161: """The `contains` function, which returns whether a string contains a given cmlenz@161: substring. cmlenz@161: """ cmlenz@161: __slots__ = ['string1', 'string2'] cmlenz@155: def __init__(self, string1, string2): cmlenz@155: self.string1 = string1 cmlenz@155: self.string2 = string2 cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string1 = self.string1(kind, data, pos, variables) cmlenz@155: if type(string1) is tuple: cmlenz@155: string1 = string1[1] cmlenz@179: string2 = self.string2(kind, data, pos, variables) cmlenz@155: if type(string2) is tuple: cmlenz@155: string2 = string2[1] cmlenz@155: return string2 in string1 cmlenz@155: def __repr__(self): cmlenz@155: return 'contains(%r, %r)' % (self.string1, self.string2) cmlenz@155: cmlenz@155: class FalseFunction(Function): cmlenz@161: """The `false` function, which always returns the boolean `false` value.""" cmlenz@155: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@155: return False cmlenz@155: def __repr__(self): cmlenz@155: return 'false()' cmlenz@155: cmlenz@155: class FloorFunction(Function): cmlenz@161: """The `ceiling` function, which returns the nearest higher integer number cmlenz@161: for the given number. cmlenz@161: """ cmlenz@155: __slots__ = ['number'] cmlenz@155: def __init__(self, number): cmlenz@155: self.number = number cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: number = self.number(kind, data, pos, variables) cmlenz@155: if type(number) is tuple: cmlenz@155: number = number[1] cmlenz@155: return floor(float(number)) cmlenz@155: def __repr__(self): cmlenz@155: return 'floor(%r)' % self.number cmlenz@155: cmlenz@155: class LocalNameFunction(Function): cmlenz@161: """The `local-name` function, which returns the local name of the current cmlenz@161: element. cmlenz@161: """ cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: return TEXT, data[0].localname, pos cmlenz@137: def __repr__(self): cmlenz@137: return 'local-name()' cmlenz@137: cmlenz@155: class NameFunction(Function): cmlenz@161: """The `name` function, which returns the qualified name of the current cmlenz@161: element. cmlenz@161: """ cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: return TEXT, data[0], pos cmlenz@137: def __repr__(self): cmlenz@137: return 'name()' cmlenz@137: cmlenz@155: class NamespaceUriFunction(Function): cmlenz@161: """The `namespace-uri` function, which returns the namespace URI of the cmlenz@161: current element. cmlenz@161: """ cmlenz@137: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: if kind is START: cmlenz@137: return TEXT, data[0].namespace, pos cmlenz@137: def __repr__(self): cmlenz@137: return 'namespace-uri()' cmlenz@137: cmlenz@155: class NotFunction(Function): cmlenz@161: """The `not` function, which returns the negated boolean value of its cmlenz@161: argument. cmlenz@161: """ cmlenz@137: __slots__ = ['expr'] cmlenz@137: def __init__(self, expr): cmlenz@137: self.expr = expr cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: return not self.expr(kind, data, pos, variables) cmlenz@137: def __repr__(self): cmlenz@137: return 'not(%s)' % self.expr cmlenz@137: cmlenz@155: class NormalizeSpaceFunction(Function): cmlenz@161: """The `normalize-space` function, which removes leading and trailing cmlenz@161: whitespace in the given string, and replaces multiple adjacent whitespace cmlenz@161: characters inside the string with a single space. cmlenz@161: """ cmlenz@155: __slots__ = ['expr'] cmlenz@155: _normalize = re.compile(r'\s{2,}').sub cmlenz@155: def __init__(self, expr): cmlenz@155: self.expr = expr cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string = self.expr(kind, data, pos, variables) cmlenz@155: if type(string) is tuple: cmlenz@155: string = string[1] cmlenz@155: return self._normalize(' ', string.strip()) cmlenz@155: def __repr__(self): cmlenz@155: return 'normalize-space(%s)' % repr(self.expr) cmlenz@155: cmlenz@155: class NumberFunction(Function): cmlenz@161: """The `number` function that converts its argument to a number.""" cmlenz@155: __slots__ = ['expr'] cmlenz@155: def __init__(self, expr): cmlenz@155: self.expr = expr cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: val = self.expr(kind, data, pos, variables) cmlenz@155: if type(val) is tuple: cmlenz@155: val = val[1] cmlenz@155: return float(val) cmlenz@155: def __repr__(self): cmlenz@155: return 'number(%r)' % self.expr cmlenz@155: cmlenz@162: class RoundFunction(Function): cmlenz@162: """The `round` function, which returns the nearest integer number for the cmlenz@162: given number. cmlenz@162: """ cmlenz@162: __slots__ = ['number'] cmlenz@162: def __init__(self, number): cmlenz@162: self.number = number cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: number = self.number(kind, data, pos, variables) cmlenz@162: if type(number) is tuple: cmlenz@162: number = number[1] cmlenz@162: return round(float(number)) cmlenz@162: def __repr__(self): cmlenz@162: return 'round(%r)' % self.number cmlenz@162: cmlenz@155: class StartsWithFunction(Function): cmlenz@161: """The `starts-with` function that returns whether one string starts with cmlenz@161: a given substring. cmlenz@161: """ cmlenz@155: __slots__ = ['string1', 'string2'] cmlenz@155: def __init__(self, string1, string2): cmlenz@155: self.string1 = string2 cmlenz@155: self.string2 = string2 cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string1 = self.string1(kind, data, pos, variables) cmlenz@155: if type(string1) is tuple: cmlenz@155: string1 = string1[1] cmlenz@179: string2 = self.string2(kind, data, pos, variables) cmlenz@155: if type(string2) is tuple: cmlenz@155: string2 = string2[1] cmlenz@155: return string1.startswith(string2) cmlenz@155: def __repr__(self): cmlenz@155: return 'starts-with(%r, %r)' % (self.string1, self.string2) cmlenz@155: cmlenz@155: class StringLengthFunction(Function): cmlenz@161: """The `string-length` function that returns the length of the given cmlenz@161: string. cmlenz@161: """ cmlenz@155: __slots__ = ['expr'] cmlenz@155: def __init__(self, expr): cmlenz@155: self.expr = expr cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string = self.expr(kind, data, pos, variables) cmlenz@155: if type(string) is tuple: cmlenz@155: string = string[1] cmlenz@155: return len(string) cmlenz@155: def __repr__(self): cmlenz@155: return 'string-length(%r)' % self.expr cmlenz@155: cmlenz@155: class SubstringFunction(Function): cmlenz@161: """The `substring` function that returns the part of a string that starts cmlenz@161: at the given offset, and optionally limited to the given length. cmlenz@161: """ cmlenz@155: __slots__ = ['string', 'start', 'length'] cmlenz@155: def __init__(self, string, start, length=None): cmlenz@155: self.string = string cmlenz@155: self.start = start cmlenz@155: self.length = length cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string = self.string(kind, data, pos, variables) cmlenz@155: if type(string) is tuple: cmlenz@155: string = string[1] cmlenz@179: start = self.start(kind, data, pos, variables) cmlenz@155: if type(start) is tuple: cmlenz@155: start = start[1] cmlenz@155: length = 0 cmlenz@155: if self.length is not None: cmlenz@179: length = self.length(kind, data, pos, variables) cmlenz@155: if type(length) is tuple: cmlenz@155: length = length[1] cmlenz@155: return string[int(start):len(string) - int(length)] cmlenz@155: def __repr__(self): cmlenz@155: if self.length is not None: cmlenz@155: return 'substring(%r, %r, %r)' % (self.string, self.start, cmlenz@155: self.length) cmlenz@155: else: cmlenz@155: return 'substring(%r, %r)' % (self.string, self.start) cmlenz@155: cmlenz@155: class SubstringAfterFunction(Function): cmlenz@161: """The `substring-after` function that returns the part of a string that cmlenz@161: is found after the given substring. cmlenz@161: """ cmlenz@155: __slots__ = ['string1', 'string2'] cmlenz@155: def __init__(self, string1, string2): cmlenz@155: self.string1 = string1 cmlenz@155: self.string2 = string2 cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string1 = self.string1(kind, data, pos, variables) cmlenz@155: if type(string1) is tuple: cmlenz@155: string1 = string1[1] cmlenz@179: string2 = self.string2(kind, data, pos, variables) cmlenz@155: if type(string2) is tuple: cmlenz@155: string2 = string2[1] cmlenz@155: index = string1.find(string2) cmlenz@155: if index >= 0: cmlenz@155: return string1[index + len(string2):] cmlenz@155: return u'' cmlenz@155: def __repr__(self): cmlenz@155: return 'substring-after(%r, %r)' % (self.string1, self.string2) cmlenz@155: cmlenz@155: class SubstringBeforeFunction(Function): cmlenz@161: """The `substring-before` function that returns the part of a string that cmlenz@161: is found before the given substring. cmlenz@161: """ cmlenz@155: __slots__ = ['string1', 'string2'] cmlenz@155: def __init__(self, string1, string2): cmlenz@155: self.string1 = string1 cmlenz@155: self.string2 = string2 cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string1 = self.string1(kind, data, pos, variables) cmlenz@155: if type(string1) is tuple: cmlenz@155: string1 = string1[1] cmlenz@179: string2 = self.string2(kind, data, pos, variables) cmlenz@155: if type(string2) is tuple: cmlenz@155: string2 = string2[1] cmlenz@155: index = string1.find(string2) cmlenz@155: if index >= 0: cmlenz@155: return string1[:index] cmlenz@155: return u'' cmlenz@155: def __repr__(self): cmlenz@155: return 'substring-after(%r, %r)' % (self.string1, self.string2) cmlenz@155: cmlenz@155: class TranslateFunction(Function): cmlenz@161: """The `translate` function that translates a set of characters in a cmlenz@161: string to target set of characters. cmlenz@161: """ cmlenz@155: __slots__ = ['string', 'fromchars', 'tochars'] cmlenz@155: def __init__(self, string, fromchars, tochars): cmlenz@155: self.string = string cmlenz@155: self.fromchars = fromchars cmlenz@155: self.tochars = tochars cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: string = self.string(kind, data, pos, variables) cmlenz@155: if type(string) is tuple: cmlenz@155: string = string[1] cmlenz@179: fromchars = self.fromchars(kind, data, pos, variables) cmlenz@155: if type(fromchars) is tuple: cmlenz@155: fromchars = fromchars[1] cmlenz@179: tochars = self.tochars(kind, data, pos, variables) cmlenz@155: if type(tochars) is tuple: cmlenz@155: tochars = tochars[1] cmlenz@155: table = dict(zip([ord(c) for c in fromchars], cmlenz@155: [ord(c) for c in tochars])) cmlenz@155: return string.translate(table) cmlenz@155: def __repr__(self): cmlenz@155: return 'translate(%r, %r, %r)' % (self.string, self.fromchars, cmlenz@155: self.tochars) cmlenz@155: cmlenz@155: class TrueFunction(Function): cmlenz@161: """The `true` function, which always returns the boolean `true` value.""" cmlenz@155: __slots__ = [] cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@155: return True cmlenz@155: def __repr__(self): cmlenz@155: return 'true()' cmlenz@155: cmlenz@155: _function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction, cmlenz@155: 'concat': ConcatFunction, 'contains': ContainsFunction, cmlenz@155: 'false': FalseFunction, 'floor': FloorFunction, cmlenz@155: 'local-name': LocalNameFunction, 'name': NameFunction, cmlenz@155: 'namespace-uri': NamespaceUriFunction, cmlenz@155: 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction, cmlenz@162: 'number': NumberFunction, 'round': RoundFunction, cmlenz@162: 'starts-with': StartsWithFunction, cmlenz@155: 'string-length': StringLengthFunction, cmlenz@155: 'substring': SubstringFunction, cmlenz@155: 'substring-after': SubstringAfterFunction, cmlenz@155: 'substring-before': SubstringBeforeFunction, cmlenz@155: 'translate': TranslateFunction, 'true': TrueFunction} cmlenz@137: cmlenz@179: # Literals & Variables cmlenz@137: cmlenz@155: class Literal(object): cmlenz@155: """Abstract base class for literal nodes.""" cmlenz@155: cmlenz@155: class StringLiteral(Literal): cmlenz@161: """A string literal node.""" cmlenz@137: __slots__ = ['text'] cmlenz@137: def __init__(self, text): cmlenz@137: self.text = text cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@137: return TEXT, self.text, (None, -1, -1) cmlenz@137: def __repr__(self): cmlenz@137: return '"%s"' % self.text cmlenz@137: cmlenz@155: class NumberLiteral(Literal): cmlenz@161: """A number literal node.""" cmlenz@137: __slots__ = ['number'] cmlenz@137: def __init__(self, number): cmlenz@137: self.number = number cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@155: return TEXT, self.number, (None, -1, -1) cmlenz@137: def __repr__(self): cmlenz@137: return str(self.number) cmlenz@137: cmlenz@179: class VariableReference(Literal): cmlenz@179: """A variable reference node.""" cmlenz@179: __slots__ = ['name'] cmlenz@179: def __init__(self, name): cmlenz@179: self.name = name cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: return TEXT, variables.get(self.name), (None, -1, -1) cmlenz@179: def __repr__(self): cmlenz@179: return str(self.number) cmlenz@179: cmlenz@137: # Operators cmlenz@137: cmlenz@137: class AndOperator(object): cmlenz@161: """The boolean operator `and`.""" cmlenz@137: __slots__ = ['lval', 'rval'] cmlenz@137: def __init__(self, lval, rval): cmlenz@137: self.lval = lval cmlenz@137: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@161: if type(lval) is tuple: cmlenz@161: lval = lval[1] cmlenz@161: if not lval: cmlenz@161: return False cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@161: if type(rval) is tuple: cmlenz@161: rval = rval[1] cmlenz@161: return bool(rval) cmlenz@137: def __repr__(self): cmlenz@161: return '%s and %s' % (self.lval, self.rval) cmlenz@137: cmlenz@161: class EqualsOperator(object): cmlenz@161: """The equality operator `=`.""" cmlenz@137: __slots__ = ['lval', 'rval'] cmlenz@137: def __init__(self, lval, rval): cmlenz@137: self.lval = lval cmlenz@137: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@161: if type(lval) is tuple: cmlenz@161: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@161: if type(rval) is tuple: cmlenz@161: rval = rval[1] cmlenz@161: return lval == rval cmlenz@137: def __repr__(self): cmlenz@161: return '%s=%s' % (self.lval, self.rval) cmlenz@137: cmlenz@161: class NotEqualsOperator(object): cmlenz@161: """The equality operator `!=`.""" cmlenz@137: __slots__ = ['lval', 'rval'] cmlenz@137: def __init__(self, lval, rval): cmlenz@137: self.lval = lval cmlenz@137: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@161: if type(lval) is tuple: cmlenz@161: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@161: if type(rval) is tuple: cmlenz@161: rval = rval[1] cmlenz@161: return lval != rval cmlenz@161: def __repr__(self): cmlenz@161: return '%s!=%s' % (self.lval, self.rval) cmlenz@161: cmlenz@161: class OrOperator(object): cmlenz@161: """The boolean operator `or`.""" cmlenz@161: __slots__ = ['lval', 'rval'] cmlenz@161: def __init__(self, lval, rval): cmlenz@161: self.lval = lval cmlenz@161: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@161: if type(lval) is tuple: cmlenz@161: lval = lval[1] cmlenz@161: if lval: cmlenz@137: return True cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@161: if type(rval) is tuple: cmlenz@161: rval = rval[1] cmlenz@161: return bool(rval) cmlenz@137: def __repr__(self): cmlenz@137: return '%s or %s' % (self.lval, self.rval) cmlenz@137: cmlenz@162: class GreaterThanOperator(object): cmlenz@162: """The relational operator `>` (greater than).""" cmlenz@162: __slots__ = ['lval', 'rval'] cmlenz@162: def __init__(self, lval, rval): cmlenz@162: self.lval = lval cmlenz@162: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@162: if type(lval) is tuple: cmlenz@162: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@162: if type(rval) is tuple: cmlenz@162: rval = rval[1] cmlenz@162: return float(lval) > float(rval) cmlenz@162: def __repr__(self): cmlenz@162: return '%s>%s' % (self.lval, self.rval) cmlenz@162: cmlenz@162: class GreaterThanOperator(object): cmlenz@162: """The relational operator `>` (greater than).""" cmlenz@162: __slots__ = ['lval', 'rval'] cmlenz@162: def __init__(self, lval, rval): cmlenz@162: self.lval = lval cmlenz@162: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@162: if type(lval) is tuple: cmlenz@162: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@162: if type(rval) is tuple: cmlenz@162: rval = rval[1] cmlenz@162: return float(lval) > float(rval) cmlenz@162: def __repr__(self): cmlenz@162: return '%s>%s' % (self.lval, self.rval) cmlenz@162: cmlenz@162: class GreaterThanOrEqualOperator(object): cmlenz@162: """The relational operator `>=` (greater than or equal).""" cmlenz@162: __slots__ = ['lval', 'rval'] cmlenz@162: def __init__(self, lval, rval): cmlenz@162: self.lval = lval cmlenz@162: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@162: if type(lval) is tuple: cmlenz@162: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@162: if type(rval) is tuple: cmlenz@162: rval = rval[1] cmlenz@162: return float(lval) >= float(rval) cmlenz@162: def __repr__(self): cmlenz@162: return '%s>=%s' % (self.lval, self.rval) cmlenz@162: cmlenz@162: class LessThanOperator(object): cmlenz@162: """The relational operator `<` (less than).""" cmlenz@162: __slots__ = ['lval', 'rval'] cmlenz@162: def __init__(self, lval, rval): cmlenz@162: self.lval = lval cmlenz@162: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@162: if type(lval) is tuple: cmlenz@162: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@162: if type(rval) is tuple: cmlenz@162: rval = rval[1] cmlenz@162: return float(lval) < float(rval) cmlenz@162: def __repr__(self): cmlenz@162: return '%s<%s' % (self.lval, self.rval) cmlenz@162: cmlenz@162: class LessThanOrEqualOperator(object): cmlenz@162: """The relational operator `<=` (less than or equal).""" cmlenz@162: __slots__ = ['lval', 'rval'] cmlenz@162: def __init__(self, lval, rval): cmlenz@162: self.lval = lval cmlenz@162: self.rval = rval cmlenz@179: def __call__(self, kind, data, pos, variables): cmlenz@179: lval = self.lval(kind, data, pos, variables) cmlenz@162: if type(lval) is tuple: cmlenz@162: lval = lval[1] cmlenz@179: rval = self.rval(kind, data, pos, variables) cmlenz@162: if type(rval) is tuple: cmlenz@162: rval = rval[1] cmlenz@162: return float(lval) <= float(rval) cmlenz@162: def __repr__(self): cmlenz@162: return '%s<=%s' % (self.lval, self.rval) cmlenz@162: cmlenz@162: _operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator, cmlenz@162: '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator, cmlenz@162: '<': LessThanOperator, '>=': LessThanOrEqualOperator}