cmlenz@1: # -*- coding: utf-8 -*-
cmlenz@1: #
cmlenz@719: # Copyright (C) 2006-2008 Edgewall Software
cmlenz@1: # All rights reserved.
cmlenz@1: #
cmlenz@1: # This software is licensed as described in the file COPYING, which
cmlenz@1: # you should have received as part of this distribution. The terms
cmlenz@230: # are also available at http://genshi.edgewall.org/wiki/License.
cmlenz@1: #
cmlenz@1: # This software consists of voluntary contributions made by many
cmlenz@1: # individuals. For the exact contribution history, see the revision
cmlenz@230: # history and logs, available at http://genshi.edgewall.org/log/.
cmlenz@1:
cmlenz@111: """Basic support for evaluating XPath expressions against streams.
cmlenz@111:
cmlenz@230: >>> from genshi.input import XML
cmlenz@111: >>> doc = XML('''
athomas@516: ...
cmlenz@111: ... -
cmlenz@111: ... Foo
cmlenz@111: ...
cmlenz@111: ... -
cmlenz@111: ... Bar
cmlenz@111: ...
athomas@516: ... -
athomas@516: ... Baz
athomas@516: ...
athomas@516: ... -
athomas@516: ... Waz
athomas@516: ...
cmlenz@111: ...
cmlenz@111: ... ''')
athomas@516: >>> print doc.select('items/item[@status="closed" and '
athomas@516: ... '(@resolution="invalid" or not(@resolution))]/summary/text()')
athomas@516: BarBaz
cmlenz@111:
cmlenz@111: Because the XPath engine operates on markup streams (as opposed to tree
cmlenz@111: structures), it only implements a subset of the full XPath 1.0 language.
cmlenz@111: """
cmlenz@1:
cmlenz@818: from collections import deque
cmlenz@792: try:
cmlenz@792: from functools import reduce
cmlenz@792: except ImportError:
cmlenz@792: pass # builtin in Python <= 2.5
cmlenz@155: from math import ceil, floor
cmlenz@593: import operator
cmlenz@1: import re
cmlenz@818: from itertools import chain
cmlenz@1:
cmlenz@230: from genshi.core import Stream, Attrs, Namespace, QName
cmlenz@638: from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \
cmlenz@638: START_CDATA, END_CDATA
cmlenz@1:
cmlenz@106: __all__ = ['Path', 'PathSyntaxError']
cmlenz@425: __docformat__ = 'restructuredtext en'
cmlenz@1:
cmlenz@1:
cmlenz@114: class Axis(object):
cmlenz@114: """Defines constants for the various supported XPath axes."""
cmlenz@114:
cmlenz@114: ATTRIBUTE = 'attribute'
cmlenz@114: CHILD = 'child'
cmlenz@114: DESCENDANT = 'descendant'
cmlenz@114: DESCENDANT_OR_SELF = 'descendant-or-self'
cmlenz@114: SELF = 'self'
cmlenz@114:
cmlenz@822: @classmethod
cmlenz@114: def forname(cls, name):
cmlenz@114: """Return the axis constant for the given name, or `None` if no such
cmlenz@114: axis was defined.
cmlenz@114: """
cmlenz@114: return getattr(cls, name.upper().replace('-', '_'), None)
cmlenz@114:
cmlenz@114:
cmlenz@114: ATTRIBUTE = Axis.ATTRIBUTE
cmlenz@114: CHILD = Axis.CHILD
cmlenz@114: DESCENDANT = Axis.DESCENDANT
cmlenz@114: DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF
cmlenz@114: SELF = Axis.SELF
cmlenz@114:
cmlenz@114:
cmlenz@818: class GenericStrategy(object):
cmlenz@818:
cmlenz@818: @classmethod
cmlenz@818: def supports(cls, path):
cmlenz@818: return True
cmlenz@818:
cmlenz@818: def __init__(self, path):
cmlenz@818: self.path = path
cmlenz@818:
cmlenz@818: def test(self, ignore_context):
cmlenz@818: p = self.path
cmlenz@818: if ignore_context:
cmlenz@818: if p[0][0] is ATTRIBUTE:
cmlenz@818: steps = [_DOTSLASHSLASH] + p
cmlenz@818: else:
cmlenz@818: steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:]
cmlenz@818: elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \
cmlenz@818: or p[0][0] is DESCENDANT:
cmlenz@818: steps = [_DOTSLASH] + p
cmlenz@818: else:
cmlenz@818: steps = p
cmlenz@818:
cmlenz@818: # for node it contains all positions of xpath expression
cmlenz@818: # where its child should start checking for matches
cmlenz@818: # with list of corresponding context counters
cmlenz@818: # there can be many of them, because position that is from
cmlenz@818: # descendant-like axis can be achieved from different nodes
cmlenz@818: # for example should match both //a//b[1]
cmlenz@818: # and //a//b[2]
cmlenz@818: # positions always form increasing sequence (invariant)
cmlenz@818: stack = [[(0, [[]])]]
cmlenz@818:
cmlenz@818: def _test(event, namespaces, variables, updateonly=False):
cmlenz@818: kind, data, pos = event[:3]
cmlenz@818: retval = None
cmlenz@818:
cmlenz@818: # Manage the stack that tells us "where we are" in the stream
cmlenz@818: if kind is END:
cmlenz@818: if stack:
cmlenz@818: stack.pop()
cmlenz@818: return None
cmlenz@818: if kind is START_NS or kind is END_NS \
cmlenz@818: or kind is START_CDATA or kind is END_CDATA:
cmlenz@818: # should we make namespaces work?
cmlenz@818: return None
cmlenz@818:
cmlenz@818: pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]])
cmlenz@818: next_pos = []
cmlenz@818:
cmlenz@818: # length of real part of path - we omit attribute axis
cmlenz@818: real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0)
cmlenz@818: last_checked = -1
cmlenz@818:
cmlenz@818: # places where we have to check for match, are these
cmlenz@818: # provided by parent
cmlenz@818: while pos_queue:
cmlenz@818: x, pcou, mcou = pos_queue.popleft()
cmlenz@818: axis, nodetest, predicates = steps[x]
cmlenz@818:
cmlenz@818: # we need to push descendant-like positions from parent
cmlenz@818: # further
cmlenz@818: if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou:
cmlenz@818: if next_pos and next_pos[-1][0] == x:
cmlenz@818: next_pos[-1][1].extend(pcou)
cmlenz@818: else:
cmlenz@818: next_pos.append((x, pcou))
cmlenz@818:
cmlenz@818: # nodetest first
cmlenz@818: if not nodetest(kind, data, pos, namespaces, variables):
cmlenz@818: continue
cmlenz@818:
cmlenz@818: # counters packs that were already bad
cmlenz@818: missed = set()
cmlenz@818: counters_len = len(pcou) + len(mcou)
cmlenz@818:
cmlenz@818: # number of counters - we have to create one
cmlenz@818: # for every context position based predicate
cmlenz@818: cnum = 0
cmlenz@818:
cmlenz@818: # tells if we have match with position x
cmlenz@818: matched = True
cmlenz@818:
cmlenz@818: if predicates:
cmlenz@818: for predicate in predicates:
cmlenz@818: pretval = predicate(kind, data, pos,
cmlenz@818: namespaces,
cmlenz@818: variables)
cmlenz@818: if type(pretval) is float: # FIXME <- need to check
cmlenz@818: # this for other types that
cmlenz@818: # can be coerced to float
cmlenz@818:
cmlenz@818: # each counter pack needs to be checked
cmlenz@818: for i, cou in enumerate(chain(pcou, mcou)):
cmlenz@818: # it was bad before
cmlenz@818: if i in missed:
cmlenz@818: continue
cmlenz@818:
cmlenz@818: if len(cou) < cnum + 1:
cmlenz@818: cou.append(0)
cmlenz@818: cou[cnum] += 1
cmlenz@818:
cmlenz@818: # it is bad now
cmlenz@818: if cou[cnum] != int(pretval):
cmlenz@818: missed.add(i)
cmlenz@818:
cmlenz@818: # none of counters pack was good
cmlenz@818: if len(missed) == counters_len:
cmlenz@818: pretval = False
cmlenz@818: cnum += 1
cmlenz@818:
cmlenz@818: if not pretval:
cmlenz@818: matched = False
cmlenz@818: break
cmlenz@818:
cmlenz@818: if not matched:
cmlenz@818: continue
cmlenz@818:
cmlenz@818: # counter for next position with current node as context node
cmlenz@818: child_counter = []
cmlenz@818:
cmlenz@818: if x + 1 == real_len:
cmlenz@818: # we reached end of expression, because x + 1
cmlenz@818: # is equal to the length of expression
cmlenz@818: matched = True
cmlenz@818: axis, nodetest, predicates = steps[-1]
cmlenz@818: if axis is ATTRIBUTE:
cmlenz@818: matched = nodetest(kind, data, pos, namespaces,
cmlenz@818: variables)
cmlenz@818: if matched:
cmlenz@818: retval = matched
cmlenz@818: else:
cmlenz@818: next_axis = steps[x + 1][0]
cmlenz@818:
cmlenz@818: # if next axis allows matching self we have
cmlenz@818: # to add next position to our queue
cmlenz@818: if next_axis is DESCENDANT_OR_SELF or next_axis is SELF:
cmlenz@818: if not pos_queue or pos_queue[0][0] > x + 1:
cmlenz@818: pos_queue.appendleft((x + 1, [], [child_counter]))
cmlenz@818: else:
cmlenz@818: pos_queue[0][2].append(child_counter)
cmlenz@818:
cmlenz@818: # if axis is not self we have to add it to child's list
cmlenz@818: if next_axis is not SELF:
cmlenz@818: next_pos.append((x + 1, [child_counter]))
cmlenz@818:
cmlenz@818: if kind is START:
cmlenz@818: stack.append(next_pos)
cmlenz@818:
cmlenz@818: return retval
cmlenz@818:
cmlenz@818: return _test
cmlenz@818:
cmlenz@818:
cmlenz@818: class SimplePathStrategy(object):
cmlenz@818: """Strategy for path with only local names, attributes and text nodes."""
cmlenz@818:
cmlenz@818: @classmethod
cmlenz@818: def supports(cls, path):
cmlenz@818: if path[0][0] is ATTRIBUTE:
cmlenz@818: return False
cmlenz@818: allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest)
cmlenz@818: for _, nodetest, predicates in path:
cmlenz@818: if predicates:
cmlenz@818: return False
cmlenz@818: if not isinstance(nodetest, allowed_tests):
cmlenz@818: return False
cmlenz@818: return True
cmlenz@818:
cmlenz@818: def __init__(self, path):
cmlenz@818: # fragments is list of tuples (fragment, pi, attr, self_beginning)
cmlenz@818: # fragment is list of nodetests for fragment of path with only
cmlenz@818: # child:: axes between
cmlenz@818: # pi is KMP partial match table for this fragment
cmlenz@818: # attr is attribute nodetest if fragment ends with @ and None otherwise
cmlenz@818: # self_beginning is True if axis for first fragment element
cmlenz@818: # was self (first fragment) or descendant-or-self (farther fragment)
cmlenz@818: self.fragments = []
cmlenz@818:
cmlenz@818: self_beginning = False
cmlenz@818: fragment = []
cmlenz@818:
cmlenz@818: def nodes_equal(node1, node2):
cmlenz@818: """Tests if two node tests are equal"""
cmlenz@818: if node1.__class__ is not node2.__class__:
cmlenz@818: return False
cmlenz@818: if node1.__class__ == LocalNameTest:
cmlenz@818: return node1.name == node2.name
cmlenz@818: return True
cmlenz@818:
cmlenz@818: def calculate_pi(f):
cmlenz@818: """KMP prefix calculation for table"""
cmlenz@818: # the indexes in prefix table are shifted by one
cmlenz@818: # in comparision with common implementations
cmlenz@818: # pi[i] = NORMAL_PI[i + 1]
cmlenz@818: if len(f) == 0:
cmlenz@818: return []
cmlenz@818: pi = [0]
cmlenz@818: s = 0
cmlenz@818: for i in xrange(1, len(f)):
cmlenz@818: while s > 0 and not nodes_equal(f[s], f[i]):
cmlenz@818: s = pi[s-1]
cmlenz@818: if nodes_equal(f[s], f[i]):
cmlenz@818: s += 1
cmlenz@818: pi.append(s)
cmlenz@818: return pi
cmlenz@818:
cmlenz@818: for axis in path:
cmlenz@818: if axis[0] is SELF:
cmlenz@818: if len(fragment) != 0:
cmlenz@818: # if element is not first in fragment it has to be
cmlenz@818: # the same as previous one
cmlenz@818: # for example child::a/self::b is always wrong
cmlenz@818: if axis[1] != fragment[-1][1]:
cmlenz@818: self.fragments = None
cmlenz@818: return
cmlenz@818: else:
cmlenz@818: self_beginning = True
cmlenz@818: fragment.append(axis[1])
cmlenz@818: elif axis[0] is CHILD:
cmlenz@818: fragment.append(axis[1])
cmlenz@818: elif axis[0] is ATTRIBUTE:
cmlenz@818: pi = calculate_pi(fragment)
cmlenz@818: self.fragments.append((fragment, pi, axis[1], self_beginning))
cmlenz@818: # attribute has always to be at the end, so we can jump out
cmlenz@818: return
cmlenz@818: else:
cmlenz@818: pi = calculate_pi(fragment)
cmlenz@818: self.fragments.append((fragment, pi, None, self_beginning))
cmlenz@818: fragment = [axis[1]]
cmlenz@818: if axis[0] is DESCENDANT:
cmlenz@818: self_beginning = False
cmlenz@818: else: # DESCENDANT_OR_SELF
cmlenz@818: self_beginning = True
cmlenz@818: pi = calculate_pi(fragment)
cmlenz@818: self.fragments.append((fragment, pi, None, self_beginning))
cmlenz@818:
cmlenz@818: def test(self, ignore_context):
cmlenz@818: # stack of triples (fid, p, ic)
cmlenz@818: # fid is index of current fragment
cmlenz@818: # p is position in this fragment
cmlenz@818: # ic is if we ignore context in this fragment
cmlenz@818: stack = []
cmlenz@818: stack_push = stack.append
cmlenz@818: stack_pop = stack.pop
cmlenz@818: frags = self.fragments
cmlenz@818: frags_len = len(frags)
cmlenz@818:
cmlenz@818: def _test(event, namespaces, variables, updateonly=False):
cmlenz@818: # expression found impossible during init
cmlenz@818: if frags is None:
cmlenz@818: return None
cmlenz@818:
cmlenz@818: kind, data, pos = event[:3]
cmlenz@818:
cmlenz@818: # skip events we don't care about
cmlenz@818: if kind is END:
cmlenz@818: if stack:
cmlenz@818: stack_pop()
cmlenz@818: return None
cmlenz@818: if kind is START_NS or kind is END_NS \
cmlenz@818: or kind is START_CDATA or kind is END_CDATA:
cmlenz@818: return None
cmlenz@818:
cmlenz@818: if not stack:
cmlenz@818: # root node, nothing on stack, special case
cmlenz@818: fid = 0
cmlenz@818: # skip empty fragments (there can be actually only one)
cmlenz@818: while not frags[fid][0]:
cmlenz@818: fid += 1
cmlenz@818: p = 0
cmlenz@818: # empty fragment means descendant node at beginning
cmlenz@818: ic = ignore_context or (fid > 0)
cmlenz@818:
cmlenz@818: # expression can match first node, if first axis is self::,
cmlenz@818: # descendant-or-self:: or if ignore_context is True and
cmlenz@818: # axis is not descendant::
cmlenz@818: if not frags[fid][3] and (not ignore_context or fid > 0):
cmlenz@818: # axis is not self-beggining, we have to skip this node
cmlenz@818: stack_push((fid, p, ic))
cmlenz@818: return None
cmlenz@818: else:
cmlenz@818: # take position of parent
cmlenz@818: fid, p, ic = stack[-1]
cmlenz@818:
cmlenz@818: if fid is not None and not ic:
cmlenz@818: # fragment not ignoring context - we can't jump back
cmlenz@818: frag, pi, attrib, _ = frags[fid]
cmlenz@818: frag_len = len(frag)
cmlenz@818:
cmlenz@818: if p == frag_len:
cmlenz@818: # that probably means empty first fragment
cmlenz@818: pass
cmlenz@818: elif frag[p](kind, data, pos, namespaces, variables):
cmlenz@818: # match, so we can go further
cmlenz@818: p += 1
cmlenz@818: else:
cmlenz@818: # not matched, so there will be no match in subtree
cmlenz@818: fid, p = None, None
cmlenz@818:
cmlenz@818: if p == frag_len and fid + 1 != frags_len:
cmlenz@818: # we made it to end of fragment, we can go to following
cmlenz@818: fid += 1
cmlenz@818: p = 0
cmlenz@818: ic = True
cmlenz@818:
cmlenz@818: if fid is None:
cmlenz@818: # there was no match in fragment not ignoring context
cmlenz@818: if kind is START:
cmlenz@818: stack_push((fid, p, ic))
cmlenz@818: return None
cmlenz@818:
cmlenz@818: if ic:
cmlenz@818: # we are in fragment ignoring context
cmlenz@818: while True:
cmlenz@818: frag, pi, attrib, _ = frags[fid]
cmlenz@818: frag_len = len(frag)
cmlenz@818:
cmlenz@818: # KMP new "character"
cmlenz@818: while p > 0 and (p >= frag_len or not \
cmlenz@818: frag[p](kind, data, pos, namespaces, variables)):
cmlenz@818: p = pi[p-1]
cmlenz@818: if frag[p](kind, data, pos, namespaces, variables):
cmlenz@818: p += 1
cmlenz@818:
cmlenz@818: if p == frag_len:
cmlenz@818: # end of fragment reached
cmlenz@818: if fid + 1 == frags_len:
cmlenz@818: # that was last fragment
cmlenz@818: break
cmlenz@818: else:
cmlenz@818: fid += 1
cmlenz@818: p = 0
cmlenz@818: ic = True
cmlenz@818: if not frags[fid][3]:
cmlenz@818: # next fragment not self-beginning
cmlenz@818: break
cmlenz@818: else:
cmlenz@818: break
cmlenz@818:
cmlenz@818: if kind is START:
cmlenz@818: # we have to put new position on stack, for children
cmlenz@818:
cmlenz@818: if not ic and fid + 1 == frags_len and p == frag_len:
cmlenz@818: # it is end of the only, not context ignoring fragment
cmlenz@818: # so there will be no matches in subtree
cmlenz@818: stack_push((None, None, ic))
cmlenz@818: else:
cmlenz@818: stack_push((fid, p, ic))
cmlenz@818:
cmlenz@818: # have we reached the end of the last fragment?
cmlenz@818: if fid + 1 == frags_len and p == frag_len:
cmlenz@818: if attrib: # attribute ended path, return value
cmlenz@818: return attrib(kind, data, pos, namespaces, variables)
cmlenz@818: return True
cmlenz@818:
cmlenz@818: return None
cmlenz@818:
cmlenz@818: return _test
cmlenz@818:
cmlenz@818:
cmlenz@818: class SingleStepStrategy(object):
cmlenz@818:
cmlenz@818: @classmethod
cmlenz@818: def supports(cls, path):
cmlenz@818: return len(path) == 1
cmlenz@818:
cmlenz@818: def __init__(self, path):
cmlenz@818: self.path = path
cmlenz@818:
cmlenz@818: def test(self, ignore_context):
cmlenz@818: steps = self.path
cmlenz@818: if steps[0][0] is ATTRIBUTE:
cmlenz@818: steps = [_DOTSLASH] + steps
cmlenz@818: select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None
cmlenz@818:
cmlenz@818: # for every position in expression stores counters' list
cmlenz@818: # it is used for position based predicates
cmlenz@818: counters = []
cmlenz@818: depth = [0]
cmlenz@818:
cmlenz@818: def _test(event, namespaces, variables, updateonly=False):
cmlenz@818: kind, data, pos = event[:3]
cmlenz@818:
cmlenz@818: # Manage the stack that tells us "where we are" in the stream
cmlenz@818: if kind is END:
cmlenz@818: if not ignore_context:
cmlenz@818: depth[0] -= 1
cmlenz@818: return None
cmlenz@818: elif kind is START_NS or kind is END_NS \
cmlenz@818: or kind is START_CDATA or kind is END_CDATA:
cmlenz@818: # should we make namespaces work?
cmlenz@818: return None
cmlenz@818:
cmlenz@818: if not ignore_context:
cmlenz@818: outside = (steps[0][0] is SELF and depth[0] != 0) \
cmlenz@818: or (steps[0][0] is CHILD and depth[0] != 1) \
cmlenz@818: or (steps[0][0] is DESCENDANT and depth[0] < 1)
cmlenz@818: if kind is START:
cmlenz@818: depth[0] += 1
cmlenz@818: if outside:
cmlenz@818: return None
cmlenz@818:
cmlenz@818: axis, nodetest, predicates = steps[0]
cmlenz@818: if not nodetest(kind, data, pos, namespaces, variables):
cmlenz@818: return None
cmlenz@818:
cmlenz@818: if predicates:
cmlenz@818: cnum = 0
cmlenz@818: for predicate in predicates:
cmlenz@818: pretval = predicate(kind, data, pos, namespaces, variables)
cmlenz@818: if type(pretval) is float: # FIXME <- need to check this
cmlenz@818: # for other types that can be
cmlenz@818: # coerced to float
cmlenz@818: if len(counters) < cnum + 1:
cmlenz@818: counters.append(0)
cmlenz@818: counters[cnum] += 1
cmlenz@818: if counters[cnum] != int(pretval):
cmlenz@818: pretval = False
cmlenz@818: cnum += 1
cmlenz@818: if not pretval:
cmlenz@818: return None
cmlenz@818:
cmlenz@818: if select_attr:
cmlenz@818: return select_attr(kind, data, pos, namespaces, variables)
cmlenz@818:
cmlenz@818: return True
cmlenz@818:
cmlenz@818: return _test
cmlenz@818:
cmlenz@818:
cmlenz@1: class Path(object):
cmlenz@26: """Implements basic XPath support on streams.
cmlenz@1:
cmlenz@818: Instances of this class represent a "compiled" XPath expression, and
cmlenz@818: provide methods for testing the path against a stream, as well as
cmlenz@818: extracting a substream matching that path.
cmlenz@1: """
cmlenz@1:
cmlenz@818: STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
cmlenz@818:
cmlenz@139: def __init__(self, text, filename=None, lineno=-1):
cmlenz@26: """Create the path object from a string.
cmlenz@26:
cmlenz@425: :param text: the path expression
cmlenz@498: :param filename: the name of the file in which the path expression was
cmlenz@498: found (used in error messages)
cmlenz@498: :param lineno: the line on which the expression was found
cmlenz@26: """
cmlenz@1: self.source = text
cmlenz@139: self.paths = PathParser(text, filename, lineno).parse()
cmlenz@818: self.strategies = []
cmlenz@818: for path in self.paths:
cmlenz@818: for strategy_class in self.STRATEGIES:
cmlenz@818: if strategy_class.supports(path):
cmlenz@818: self.strategies.append(strategy_class(path))
cmlenz@818: break
cmlenz@818: else:
cmlenz@818: raise NotImplemented, "This path is not implemented"
cmlenz@1:
cmlenz@1: def __repr__(self):
cmlenz@137: paths = []
cmlenz@137: for path in self.paths:
cmlenz@137: steps = []
cmlenz@137: for axis, nodetest, predicates in path:
cmlenz@137: steps.append('%s::%s' % (axis, nodetest))
cmlenz@137: for predicate in predicates:
cmlenz@224: steps[-1] += '[%s]' % predicate
cmlenz@137: paths.append('/'.join(steps))
cmlenz@137: return '<%s "%s">' % (self.__class__.__name__, '|'.join(paths))
cmlenz@1:
cmlenz@224: def select(self, stream, namespaces=None, variables=None):
cmlenz@26: """Returns a substream of the given stream that matches the path.
cmlenz@26:
cmlenz@26: If there are no matches, this method returns an empty stream.
cmlenz@26:
cmlenz@230: >>> from genshi.input import XML
cmlenz@33: >>> xml = XML('Text')
cmlenz@61:
cmlenz@216: >>> print Path('.//child').select(xml)
cmlenz@33: Text
cmlenz@33:
cmlenz@216: >>> print Path('.//child/text()').select(xml)
cmlenz@33: Text
cmlenz@33:
cmlenz@425: :param stream: the stream to select from
cmlenz@425: :param namespaces: (optional) a mapping of namespace prefixes to URIs
cmlenz@425: :param variables: (optional) a mapping of variable names to values
cmlenz@425: :return: the substream matching the path, or an empty stream
cmlenz@498: :rtype: `Stream`
cmlenz@26: """
cmlenz@224: if namespaces is None:
cmlenz@224: namespaces = {}
cmlenz@224: if variables is None:
cmlenz@224: variables = {}
cmlenz@1: stream = iter(stream)
cmlenz@818: def _generate(stream=stream, ns=namespaces, vs=variables):
cmlenz@818: next = stream.next
cmlenz@1: test = self.test()
cmlenz@305: for event in stream:
cmlenz@818: result = test(event, ns, vs)
cmlenz@1: if result is True:
cmlenz@305: yield event
cmlenz@330: if event[0] is START:
cmlenz@330: depth = 1
cmlenz@330: while depth > 0:
cmlenz@818: subevent = next()
cmlenz@330: if subevent[0] is START:
cmlenz@330: depth += 1
cmlenz@330: elif subevent[0] is END:
cmlenz@330: depth -= 1
cmlenz@330: yield subevent
cmlenz@818: test(subevent, ns, vs, updateonly=True)
cmlenz@1: elif result:
cmlenz@1: yield result
cmlenz@605: return Stream(_generate(),
cmlenz@605: serializer=getattr(stream, 'serializer', None))
cmlenz@1:
cmlenz@38: def test(self, ignore_context=False):
cmlenz@26: """Returns a function that can be used to track whether the path matches
cmlenz@26: a specific stream event.
cmlenz@26:
cmlenz@425: The function returned expects the positional arguments ``event``,
cmlenz@425: ``namespaces`` and ``variables``. The first is a stream event, while the
cmlenz@305: latter two are a mapping of namespace prefixes to URIs, and a mapping
cmlenz@306: of variable names to values, respectively. In addition, the function
cmlenz@425: accepts an ``updateonly`` keyword argument that default to ``False``. If
cmlenz@425: it is set to ``True``, the function only updates its internal state,
cmlenz@306: but does not perform any tests or return a result.
cmlenz@224:
cmlenz@224: If the path matches the event, the function returns the match (for
cmlenz@425: example, a `START` or `TEXT` event.) Otherwise, it returns ``None``.
cmlenz@33:
cmlenz@230: >>> from genshi.input import XML
cmlenz@33: >>> xml = XML('')
cmlenz@33: >>> test = Path('child').test()
cmlenz@818: >>> namespaces, variables = {}, {}
cmlenz@305: >>> for event in xml:
cmlenz@818: ... if test(event, namespaces, variables):
cmlenz@386: ... print event[0], repr(event[1])
cmlenz@386: START (QName(u'child'), Attrs([(QName(u'id'), u'2')]))
cmlenz@498:
cmlenz@498: :param ignore_context: if `True`, the path is interpreted like a pattern
cmlenz@498: in XSLT, meaning for example that it will match
cmlenz@498: at any depth
cmlenz@498: :return: a function that can be used to test individual events in a
cmlenz@498: stream against the path
cmlenz@498: :rtype: ``function``
cmlenz@26: """
cmlenz@818: tests = [s.test(ignore_context) for s in self.strategies]
cmlenz@818: if len(tests) == 1:
cmlenz@818: return tests[0]
cmlenz@216:
cmlenz@818: def _multi(event, namespaces, variables, updateonly=False):
cmlenz@818: retval = None
cmlenz@818: for test in tests:
cmlenz@818: val = test(event, namespaces, variables, updateonly=updateonly)
cmlenz@818: if retval is None:
cmlenz@818: retval = val
cmlenz@259: return retval
cmlenz@818: return _multi
cmlenz@1:
cmlenz@1:
cmlenz@106: class PathSyntaxError(Exception):
cmlenz@106: """Exception raised when an XPath expression is syntactically incorrect."""
cmlenz@106:
cmlenz@106: def __init__(self, message, filename=None, lineno=-1, offset=-1):
cmlenz@106: if filename:
cmlenz@106: message = '%s (%s, line %d)' % (message, filename, lineno)
cmlenz@106: Exception.__init__(self, message)
cmlenz@106: self.filename = filename
cmlenz@106: self.lineno = lineno
cmlenz@106: self.offset = offset
cmlenz@106:
cmlenz@106:
cmlenz@137: class PathParser(object):
cmlenz@106: """Tokenizes and parses an XPath expression."""
cmlenz@106:
cmlenz@106: _QUOTES = (("'", "'"), ('"', '"'))
cmlenz@106: _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
cmlenz@179: '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$')
cmlenz@163: _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % (
cmlenz@106: '|'.join([re.escape(t) for t in _TOKENS]),
cmlenz@106: ''.join([re.escape(t[0]) for t in _TOKENS]))).findall
cmlenz@106:
cmlenz@139: def __init__(self, text, filename=None, lineno=-1):
cmlenz@139: self.filename = filename
cmlenz@139: self.lineno = lineno
cmlenz@163: self.tokens = filter(None, [dqstr or sqstr or number or token or name
cmlenz@163: for dqstr, sqstr, number, token, name in
cmlenz@155: self._tokenize(text)])
cmlenz@106: self.pos = 0
cmlenz@106:
cmlenz@106: # Tokenizer
cmlenz@106:
cmlenz@822: @property
cmlenz@822: def at_end(self):
cmlenz@822: return self.pos == len(self.tokens) - 1
cmlenz@822:
cmlenz@822: @property
cmlenz@822: def cur_token(self):
cmlenz@822: return self.tokens[self.pos]
cmlenz@106:
cmlenz@106: def next_token(self):
cmlenz@106: self.pos += 1
cmlenz@106: return self.tokens[self.pos]
cmlenz@106:
cmlenz@106: def peek_token(self):
cmlenz@106: if not self.at_end:
cmlenz@106: return self.tokens[self.pos + 1]
cmlenz@106: return None
cmlenz@106:
cmlenz@106: # Recursive descent parser
cmlenz@106:
cmlenz@106: def parse(self):
cmlenz@106: """Parses the XPath expression and returns a list of location path
cmlenz@106: tests.
cmlenz@106:
cmlenz@106: For union expressions (such as `*|text()`), this function returns one
cmlenz@106: test for each operand in the union. For patch expressions that don't
cmlenz@106: use the union operator, the function always returns a list of size 1.
cmlenz@106:
cmlenz@106: Each path test in turn is a sequence of tests that correspond to the
cmlenz@111: location steps, each tuples of the form `(axis, testfunc, predicates)`
cmlenz@106: """
cmlenz@106: paths = [self._location_path()]
cmlenz@106: while self.cur_token == '|':
cmlenz@106: self.next_token()
cmlenz@106: paths.append(self._location_path())
cmlenz@106: if not self.at_end:
cmlenz@106: raise PathSyntaxError('Unexpected token %r after end of expression'
cmlenz@139: % self.cur_token, self.filename, self.lineno)
cmlenz@106: return paths
cmlenz@106:
cmlenz@106: def _location_path(self):
cmlenz@106: steps = []
cmlenz@106: while True:
cmlenz@215: if self.cur_token.startswith('/'):
cmlenz@818: if not steps:
cmlenz@818: if self.cur_token == '//':
cmlenz@818: # hack to make //* match every node - also root
cmlenz@818: self.next_token()
cmlenz@818: axis, nodetest, predicates = self._location_step()
cmlenz@818: steps.append((DESCENDANT_OR_SELF, nodetest,
cmlenz@818: predicates))
cmlenz@818: if self.at_end or not self.cur_token.startswith('/'):
cmlenz@818: break
cmlenz@818: continue
cmlenz@818: else:
cmlenz@818: raise PathSyntaxError('Absolute location paths not '
cmlenz@818: 'supported', self.filename,
cmlenz@818: self.lineno)
cmlenz@818: elif self.cur_token == '//':
cmlenz@215: steps.append((DESCENDANT_OR_SELF, NodeTest(), []))
cmlenz@111: self.next_token()
cmlenz@111:
cmlenz@137: axis, nodetest, predicates = self._location_step()
cmlenz@137: if not axis:
cmlenz@145: axis = CHILD
cmlenz@137: steps.append((axis, nodetest, predicates))
cmlenz@111: if self.at_end or not self.cur_token.startswith('/'):
cmlenz@106: break
cmlenz@111:
cmlenz@106: return steps
cmlenz@106:
cmlenz@106: def _location_step(self):
cmlenz@106: if self.cur_token == '@':
cmlenz@114: axis = ATTRIBUTE
cmlenz@106: self.next_token()
cmlenz@111: elif self.cur_token == '.':
cmlenz@114: axis = SELF
cmlenz@137: elif self.cur_token == '..':
cmlenz@139: raise PathSyntaxError('Unsupported axis "parent"', self.filename,
cmlenz@139: self.lineno)
cmlenz@111: elif self.peek_token() == '::':
cmlenz@114: axis = Axis.forname(self.cur_token)
cmlenz@114: if axis is None:
cmlenz@139: raise PathSyntaxError('Unsupport axis "%s"' % axis,
cmlenz@139: self.filename, self.lineno)
cmlenz@111: self.next_token()
cmlenz@111: self.next_token()
cmlenz@106: else:
cmlenz@137: axis = None
cmlenz@137: nodetest = self._node_test(axis or CHILD)
cmlenz@111: predicates = []
cmlenz@106: while self.cur_token == '[':
cmlenz@111: predicates.append(self._predicate())
cmlenz@137: return axis, nodetest, predicates
cmlenz@106:
cmlenz@106: def _node_test(self, axis=None):
cmlenz@224: test = prefix = None
cmlenz@224: next_token = self.peek_token()
cmlenz@224: if next_token in ('(', '()'): # Node type test
cmlenz@106: test = self._node_type()
cmlenz@106:
cmlenz@224: elif next_token == ':': # Namespace prefix
cmlenz@224: prefix = self.cur_token
cmlenz@224: self.next_token()
cmlenz@224: localname = self.next_token()
cmlenz@224: if localname == '*':
cmlenz@224: test = QualifiedPrincipalTypeTest(axis, prefix)
cmlenz@224: else:
cmlenz@224: test = QualifiedNameTest(axis, prefix, localname)
cmlenz@224:
cmlenz@106: else: # Name test
cmlenz@137: if self.cur_token == '*':
cmlenz@137: test = PrincipalTypeTest(axis)
cmlenz@137: elif self.cur_token == '.':
cmlenz@137: test = NodeTest()
cmlenz@106: else:
cmlenz@137: test = LocalNameTest(axis, self.cur_token)
cmlenz@106:
cmlenz@106: if not self.at_end:
cmlenz@106: self.next_token()
cmlenz@106: return test
cmlenz@106:
cmlenz@106: def _node_type(self):
cmlenz@106: name = self.cur_token
cmlenz@106: self.next_token()
cmlenz@137:
cmlenz@137: args = []
cmlenz@137: if self.cur_token != '()':
cmlenz@137: # The processing-instruction() function optionally accepts the
cmlenz@137: # name of the PI as argument, which must be a literal string
cmlenz@137: self.next_token() # (
cmlenz@137: if self.cur_token != ')':
cmlenz@137: string = self.cur_token
cmlenz@137: if (string[0], string[-1]) in self._QUOTES:
cmlenz@137: string = string[1:-1]
cmlenz@137: args.append(string)
cmlenz@137:
cmlenz@137: cls = _nodetest_map.get(name)
cmlenz@137: if not cls:
cmlenz@139: raise PathSyntaxError('%s() not allowed here' % name, self.filename,
cmlenz@139: self.lineno)
cmlenz@137: return cls(*args)
cmlenz@106:
cmlenz@106: def _predicate(self):
cmlenz@106: assert self.cur_token == '['
cmlenz@106: self.next_token()
cmlenz@111: expr = self._or_expr()
cmlenz@121: if self.cur_token != ']':
cmlenz@121: raise PathSyntaxError('Expected "]" to close predicate, '
cmlenz@139: 'but found "%s"' % self.cur_token,
cmlenz@139: self.filename, self.lineno)
cmlenz@111: if not self.at_end:
cmlenz@111: self.next_token()
cmlenz@111: return expr
cmlenz@106:
cmlenz@106: def _or_expr(self):
cmlenz@106: expr = self._and_expr()
cmlenz@106: while self.cur_token == 'or':
cmlenz@106: self.next_token()
cmlenz@137: expr = OrOperator(expr, self._and_expr())
cmlenz@106: return expr
cmlenz@106:
cmlenz@106: def _and_expr(self):
cmlenz@106: expr = self._equality_expr()
cmlenz@106: while self.cur_token == 'and':
cmlenz@106: self.next_token()
cmlenz@137: expr = AndOperator(expr, self._equality_expr())
cmlenz@106: return expr
cmlenz@106:
cmlenz@106: def _equality_expr(self):
cmlenz@162: expr = self._relational_expr()
cmlenz@162: while self.cur_token in ('=', '!='):
cmlenz@162: op = _operator_map[self.cur_token]
cmlenz@162: self.next_token()
cmlenz@162: expr = op(expr, self._relational_expr())
cmlenz@162: return expr
cmlenz@162:
cmlenz@162: def _relational_expr(self):
athomas@516: expr = self._sub_expr()
cmlenz@162: while self.cur_token in ('>', '>=', '<', '>='):
cmlenz@162: op = _operator_map[self.cur_token]
cmlenz@106: self.next_token()
athomas@516: expr = op(expr, self._sub_expr())
athomas@516: return expr
athomas@516:
athomas@516: def _sub_expr(self):
athomas@516: token = self.cur_token
athomas@516: if token != '(':
athomas@516: return self._primary_expr()
athomas@516: self.next_token()
athomas@516: expr = self._or_expr()
athomas@516: if self.cur_token != ')':
athomas@516: raise PathSyntaxError('Expected ")" to close sub-expression, '
athomas@516: 'but found "%s"' % self.cur_token,
athomas@516: self.filename, self.lineno)
athomas@516: self.next_token()
cmlenz@106: return expr
cmlenz@106:
cmlenz@106: def _primary_expr(self):
cmlenz@106: token = self.cur_token
cmlenz@106: if len(token) > 1 and (token[0], token[-1]) in self._QUOTES:
cmlenz@106: self.next_token()
cmlenz@137: return StringLiteral(token[1:-1])
cmlenz@163: elif token[0].isdigit() or token[0] == '.':
cmlenz@106: self.next_token()
athomas@518: return NumberLiteral(as_float(token))
cmlenz@179: elif token == '$':
cmlenz@179: token = self.next_token()
cmlenz@179: self.next_token()
cmlenz@179: return VariableReference(token)
cmlenz@121: elif not self.at_end and self.peek_token().startswith('('):
cmlenz@155: return self._function_call()
cmlenz@106: else:
cmlenz@106: axis = None
cmlenz@106: if token == '@':
cmlenz@114: axis = ATTRIBUTE
cmlenz@106: self.next_token()
cmlenz@106: return self._node_test(axis)
cmlenz@137:
cmlenz@155: def _function_call(self):
cmlenz@155: name = self.cur_token
cmlenz@155: if self.next_token() == '()':
cmlenz@155: args = []
cmlenz@155: else:
cmlenz@155: assert self.cur_token == '('
cmlenz@155: self.next_token()
cmlenz@155: args = [self._or_expr()]
cmlenz@155: while self.cur_token == ',':
cmlenz@155: self.next_token()
cmlenz@155: args.append(self._or_expr())
cmlenz@155: if not self.cur_token == ')':
cmlenz@155: raise PathSyntaxError('Expected ")" to close function argument '
cmlenz@155: 'list, but found "%s"' % self.cur_token,
cmlenz@155: self.filename, self.lineno)
cmlenz@155: self.next_token()
cmlenz@155: cls = _function_map.get(name)
cmlenz@155: if not cls:
cmlenz@155: raise PathSyntaxError('Unsupported function "%s"' % name,
cmlenz@155: self.filename, self.lineno)
cmlenz@155: return cls(*args)
cmlenz@155:
cmlenz@137:
athomas@518: # Type coercion
athomas@518:
athomas@518: def as_scalar(value):
athomas@518: """Convert value to a scalar. If a single element Attrs() object is passed
athomas@518: the value of the single attribute will be returned."""
athomas@518: if isinstance(value, Attrs):
athomas@518: assert len(value) == 1
athomas@518: return value[0][1]
athomas@518: else:
athomas@518: return value
athomas@518:
athomas@518: def as_float(value):
athomas@518: # FIXME - if value is a bool it will be coerced to 0.0 and consequently
athomas@518: # compared as a float. This is probably not ideal.
athomas@518: return float(as_scalar(value))
athomas@518:
athomas@518: def as_long(value):
athomas@518: return long(as_scalar(value))
athomas@518:
athomas@518: def as_string(value):
athomas@518: value = as_scalar(value)
athomas@518: if value is False:
cmlenz@852: return ''
athomas@518: return unicode(value)
athomas@518:
athomas@518: def as_bool(value):
athomas@518: return bool(as_scalar(value))
athomas@518:
athomas@518:
cmlenz@137: # Node tests
cmlenz@137:
cmlenz@137: class PrincipalTypeTest(object):
cmlenz@161: """Node test that matches any event with the given principal type."""
cmlenz@137: __slots__ = ['principal_type']
cmlenz@137: def __init__(self, principal_type):
cmlenz@137: self.principal_type = principal_type
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@137: if self.principal_type is ATTRIBUTE:
cmlenz@137: return data[1] or None
cmlenz@137: else:
cmlenz@137: return True
cmlenz@137: def __repr__(self):
cmlenz@137: return '*'
cmlenz@137:
cmlenz@224: class QualifiedPrincipalTypeTest(object):
cmlenz@224: """Node test that matches any event with the given principal type in a
cmlenz@224: specific namespace."""
cmlenz@224: __slots__ = ['principal_type', 'prefix']
cmlenz@224: def __init__(self, principal_type, prefix):
cmlenz@224: self.principal_type = principal_type
cmlenz@224: self.prefix = prefix
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: namespace = Namespace(namespaces.get(self.prefix))
cmlenz@224: if kind is START:
cmlenz@224: if self.principal_type is ATTRIBUTE and data[1]:
cmlenz@224: return Attrs([(name, value) for name, value in data[1]
cmlenz@224: if name in namespace]) or None
cmlenz@224: else:
cmlenz@224: return data[0] in namespace
cmlenz@224: def __repr__(self):
cmlenz@224: return '%s:*' % self.prefix
cmlenz@224:
cmlenz@137: class LocalNameTest(object):
cmlenz@364: """Node test that matches any event with the given principal type and
cmlenz@161: local name.
cmlenz@161: """
cmlenz@137: __slots__ = ['principal_type', 'name']
cmlenz@137: def __init__(self, principal_type, name):
cmlenz@137: self.principal_type = principal_type
cmlenz@137: self.name = name
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@137: if self.principal_type is ATTRIBUTE and self.name in data[1]:
athomas@518: return Attrs([(self.name, data[1].get(self.name))])
cmlenz@137: else:
cmlenz@137: return data[0].localname == self.name
cmlenz@137: def __repr__(self):
cmlenz@137: return self.name
cmlenz@137:
cmlenz@224: class QualifiedNameTest(object):
cmlenz@364: """Node test that matches any event with the given principal type and
cmlenz@224: qualified name.
cmlenz@224: """
cmlenz@224: __slots__ = ['principal_type', 'prefix', 'name']
cmlenz@224: def __init__(self, principal_type, prefix, name):
cmlenz@224: self.principal_type = principal_type
cmlenz@224: self.prefix = prefix
cmlenz@224: self.name = name
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name))
cmlenz@224: if kind is START:
cmlenz@224: if self.principal_type is ATTRIBUTE and qname in data[1]:
athomas@518: return Attrs([(self.name, data[1].get(self.name))])
cmlenz@224: else:
cmlenz@224: return data[0] == qname
cmlenz@224: def __repr__(self):
cmlenz@224: return '%s:%s' % (self.prefix, self.name)
cmlenz@224:
cmlenz@137: class CommentNodeTest(object):
cmlenz@161: """Node test that matches any comment events."""
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@330: return kind is COMMENT
cmlenz@137: def __repr__(self):
cmlenz@137: return 'comment()'
cmlenz@137:
cmlenz@137: class NodeTest(object):
cmlenz@161: """Node test that matches any node."""
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@137: return True
cmlenz@137: return kind, data, pos
cmlenz@137: def __repr__(self):
cmlenz@137: return 'node()'
cmlenz@137:
cmlenz@137: class ProcessingInstructionNodeTest(object):
cmlenz@161: """Node test that matches any processing instruction event."""
cmlenz@137: __slots__ = ['target']
cmlenz@137: def __init__(self, target=None):
cmlenz@137: self.target = target
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@330: return kind is PI and (not self.target or data[0] == self.target)
cmlenz@137: def __repr__(self):
cmlenz@137: arg = ''
cmlenz@137: if self.target:
cmlenz@137: arg = '"' + self.target + '"'
cmlenz@137: return 'processing-instruction(%s)' % arg
cmlenz@137:
cmlenz@137: class TextNodeTest(object):
cmlenz@161: """Node test that matches any text event."""
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@330: return kind is TEXT
cmlenz@137: def __repr__(self):
cmlenz@137: return 'text()'
cmlenz@137:
cmlenz@137: _nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest,
cmlenz@137: 'processing-instruction': ProcessingInstructionNodeTest,
cmlenz@137: 'text': TextNodeTest}
cmlenz@137:
cmlenz@137: # Functions
cmlenz@137:
cmlenz@155: class Function(object):
cmlenz@155: """Base class for function nodes in XPath expressions."""
cmlenz@155:
cmlenz@155: class BooleanFunction(Function):
cmlenz@161: """The `boolean` function, which converts its argument to a boolean
cmlenz@161: value.
cmlenz@161: """
cmlenz@155: __slots__ = ['expr']
cmlenz@818: _return_type = bool
cmlenz@155: def __init__(self, expr):
cmlenz@155: self.expr = expr
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: val = self.expr(kind, data, pos, namespaces, variables)
athomas@518: return as_bool(val)
cmlenz@155: def __repr__(self):
cmlenz@155: return 'boolean(%r)' % self.expr
cmlenz@155:
cmlenz@155: class CeilingFunction(Function):
cmlenz@161: """The `ceiling` function, which returns the nearest lower integer number
cmlenz@161: for the given number.
cmlenz@161: """
cmlenz@155: __slots__ = ['number']
cmlenz@155: def __init__(self, number):
cmlenz@155: self.number = number
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: number = self.number(kind, data, pos, namespaces, variables)
athomas@518: return ceil(as_float(number))
cmlenz@155: def __repr__(self):
cmlenz@155: return 'ceiling(%r)' % self.number
cmlenz@155:
cmlenz@155: class ConcatFunction(Function):
cmlenz@161: """The `concat` function, which concatenates (joins) the variable number of
cmlenz@161: strings it gets as arguments.
cmlenz@161: """
cmlenz@155: __slots__ = ['exprs']
cmlenz@155: def __init__(self, *exprs):
cmlenz@155: self.exprs = exprs
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@155: strings = []
cmlenz@224: for item in [expr(kind, data, pos, namespaces, variables)
cmlenz@224: for expr in self.exprs]:
athomas@518: strings.append(as_string(item))
cmlenz@852: return ''.join(strings)
cmlenz@155: def __repr__(self):
cmlenz@169: return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])
cmlenz@155:
cmlenz@155: class ContainsFunction(Function):
cmlenz@161: """The `contains` function, which returns whether a string contains a given
cmlenz@161: substring.
cmlenz@161: """
cmlenz@161: __slots__ = ['string1', 'string2']
cmlenz@155: def __init__(self, string1, string2):
cmlenz@155: self.string1 = string1
cmlenz@155: self.string2 = string2
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: string1 = self.string1(kind, data, pos, namespaces, variables)
cmlenz@224: string2 = self.string2(kind, data, pos, namespaces, variables)
athomas@518: return as_string(string2) in as_string(string1)
cmlenz@155: def __repr__(self):
cmlenz@155: return 'contains(%r, %r)' % (self.string1, self.string2)
cmlenz@155:
athomas@534: class MatchesFunction(Function):
athomas@534: """The `matches` function, which returns whether a string matches a regular
athomas@534: expression.
athomas@534: """
athomas@534: __slots__ = ['string1', 'string2']
athomas@534: flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X}
athomas@534:
athomas@534: def __init__(self, string1, string2, flags=''):
athomas@534: self.string1 = string1
athomas@534: self.string2 = string2
athomas@534: self.flags = self._map_flags(flags)
athomas@534: def __call__(self, kind, data, pos, namespaces, variables):
athomas@534: string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
athomas@534: string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
athomas@534: return re.search(string2, string1, self.flags)
athomas@534: def _map_flags(self, flags):
cmlenz@593: return reduce(operator.or_,
athomas@534: [self.flag_map[flag] for flag in flags], re.U)
athomas@534: def __repr__(self):
athomas@534: return 'contains(%r, %r)' % (self.string1, self.string2)
athomas@534:
cmlenz@155: class FalseFunction(Function):
cmlenz@161: """The `false` function, which always returns the boolean `false` value."""
cmlenz@155: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@155: return False
cmlenz@155: def __repr__(self):
cmlenz@155: return 'false()'
cmlenz@155:
cmlenz@155: class FloorFunction(Function):
cmlenz@161: """The `ceiling` function, which returns the nearest higher integer number
cmlenz@161: for the given number.
cmlenz@161: """
cmlenz@155: __slots__ = ['number']
cmlenz@155: def __init__(self, number):
cmlenz@155: self.number = number
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: number = self.number(kind, data, pos, namespaces, variables)
athomas@518: return floor(as_float(number))
cmlenz@155: def __repr__(self):
cmlenz@155: return 'floor(%r)' % self.number
cmlenz@155:
cmlenz@155: class LocalNameFunction(Function):
cmlenz@161: """The `local-name` function, which returns the local name of the current
cmlenz@161: element.
cmlenz@161: """
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@234: return data[0].localname
cmlenz@137: def __repr__(self):
cmlenz@137: return 'local-name()'
cmlenz@137:
cmlenz@155: class NameFunction(Function):
cmlenz@161: """The `name` function, which returns the qualified name of the current
cmlenz@161: element.
cmlenz@161: """
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@234: return data[0]
cmlenz@137: def __repr__(self):
cmlenz@137: return 'name()'
cmlenz@137:
cmlenz@155: class NamespaceUriFunction(Function):
cmlenz@161: """The `namespace-uri` function, which returns the namespace URI of the
cmlenz@161: current element.
cmlenz@161: """
cmlenz@137: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@137: if kind is START:
cmlenz@234: return data[0].namespace
cmlenz@137: def __repr__(self):
cmlenz@137: return 'namespace-uri()'
cmlenz@137:
cmlenz@155: class NotFunction(Function):
cmlenz@161: """The `not` function, which returns the negated boolean value of its
cmlenz@161: argument.
cmlenz@161: """
cmlenz@137: __slots__ = ['expr']
cmlenz@137: def __init__(self, expr):
cmlenz@137: self.expr = expr
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: return not as_bool(self.expr(kind, data, pos, namespaces, variables))
cmlenz@137: def __repr__(self):
cmlenz@137: return 'not(%s)' % self.expr
cmlenz@137:
cmlenz@155: class NormalizeSpaceFunction(Function):
cmlenz@161: """The `normalize-space` function, which removes leading and trailing
cmlenz@161: whitespace in the given string, and replaces multiple adjacent whitespace
cmlenz@161: characters inside the string with a single space.
cmlenz@161: """
cmlenz@155: __slots__ = ['expr']
cmlenz@155: _normalize = re.compile(r'\s{2,}').sub
cmlenz@155: def __init__(self, expr):
cmlenz@155: self.expr = expr
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: string = self.expr(kind, data, pos, namespaces, variables)
athomas@518: return self._normalize(' ', as_string(string).strip())
cmlenz@155: def __repr__(self):
cmlenz@155: return 'normalize-space(%s)' % repr(self.expr)
cmlenz@155:
cmlenz@155: class NumberFunction(Function):
cmlenz@161: """The `number` function that converts its argument to a number."""
cmlenz@155: __slots__ = ['expr']
cmlenz@155: def __init__(self, expr):
cmlenz@155: self.expr = expr
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: val = self.expr(kind, data, pos, namespaces, variables)
athomas@518: return as_float(val)
cmlenz@155: def __repr__(self):
cmlenz@155: return 'number(%r)' % self.expr
cmlenz@155:
cmlenz@162: class RoundFunction(Function):
cmlenz@162: """The `round` function, which returns the nearest integer number for the
cmlenz@162: given number.
cmlenz@162: """
cmlenz@162: __slots__ = ['number']
cmlenz@162: def __init__(self, number):
cmlenz@162: self.number = number
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: number = self.number(kind, data, pos, namespaces, variables)
athomas@518: return round(as_float(number))
cmlenz@162: def __repr__(self):
cmlenz@162: return 'round(%r)' % self.number
cmlenz@162:
cmlenz@155: class StartsWithFunction(Function):
cmlenz@161: """The `starts-with` function that returns whether one string starts with
cmlenz@161: a given substring.
cmlenz@161: """
cmlenz@155: __slots__ = ['string1', 'string2']
cmlenz@155: def __init__(self, string1, string2):
cmlenz@282: self.string1 = string1
cmlenz@155: self.string2 = string2
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: string1 = self.string1(kind, data, pos, namespaces, variables)
cmlenz@224: string2 = self.string2(kind, data, pos, namespaces, variables)
athomas@518: return as_string(string1).startswith(as_string(string2))
cmlenz@155: def __repr__(self):
cmlenz@155: return 'starts-with(%r, %r)' % (self.string1, self.string2)
cmlenz@155:
cmlenz@155: class StringLengthFunction(Function):
cmlenz@161: """The `string-length` function that returns the length of the given
cmlenz@161: string.
cmlenz@161: """
cmlenz@155: __slots__ = ['expr']
cmlenz@155: def __init__(self, expr):
cmlenz@155: self.expr = expr
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: string = self.expr(kind, data, pos, namespaces, variables)
athomas@518: return len(as_string(string))
cmlenz@155: def __repr__(self):
cmlenz@155: return 'string-length(%r)' % self.expr
cmlenz@155:
cmlenz@155: class SubstringFunction(Function):
cmlenz@161: """The `substring` function that returns the part of a string that starts
cmlenz@161: at the given offset, and optionally limited to the given length.
cmlenz@161: """
cmlenz@155: __slots__ = ['string', 'start', 'length']
cmlenz@155: def __init__(self, string, start, length=None):
cmlenz@155: self.string = string
cmlenz@155: self.start = start
cmlenz@155: self.length = length
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: string = self.string(kind, data, pos, namespaces, variables)
cmlenz@224: start = self.start(kind, data, pos, namespaces, variables)
cmlenz@155: length = 0
cmlenz@155: if self.length is not None:
cmlenz@224: length = self.length(kind, data, pos, namespaces, variables)
athomas@518: return string[as_long(start):len(as_string(string)) - as_long(length)]
cmlenz@155: def __repr__(self):
cmlenz@155: if self.length is not None:
cmlenz@155: return 'substring(%r, %r, %r)' % (self.string, self.start,
cmlenz@155: self.length)
cmlenz@155: else:
cmlenz@155: return 'substring(%r, %r)' % (self.string, self.start)
cmlenz@155:
cmlenz@155: class SubstringAfterFunction(Function):
cmlenz@161: """The `substring-after` function that returns the part of a string that
cmlenz@161: is found after the given substring.
cmlenz@161: """
cmlenz@155: __slots__ = ['string1', 'string2']
cmlenz@155: def __init__(self, string1, string2):
cmlenz@155: self.string1 = string1
cmlenz@155: self.string2 = string2
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
athomas@518: string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
cmlenz@155: index = string1.find(string2)
cmlenz@155: if index >= 0:
cmlenz@155: return string1[index + len(string2):]
cmlenz@852: return ''
cmlenz@155: def __repr__(self):
cmlenz@155: return 'substring-after(%r, %r)' % (self.string1, self.string2)
cmlenz@155:
cmlenz@155: class SubstringBeforeFunction(Function):
cmlenz@161: """The `substring-before` function that returns the part of a string that
cmlenz@161: is found before the given substring.
cmlenz@161: """
cmlenz@155: __slots__ = ['string1', 'string2']
cmlenz@155: def __init__(self, string1, string2):
cmlenz@155: self.string1 = string1
cmlenz@155: self.string2 = string2
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
athomas@518: string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
cmlenz@155: index = string1.find(string2)
cmlenz@155: if index >= 0:
cmlenz@155: return string1[:index]
cmlenz@852: return ''
cmlenz@155: def __repr__(self):
cmlenz@155: return 'substring-after(%r, %r)' % (self.string1, self.string2)
cmlenz@155:
cmlenz@155: class TranslateFunction(Function):
cmlenz@161: """The `translate` function that translates a set of characters in a
cmlenz@161: string to target set of characters.
cmlenz@161: """
cmlenz@155: __slots__ = ['string', 'fromchars', 'tochars']
cmlenz@155: def __init__(self, string, fromchars, tochars):
cmlenz@155: self.string = string
cmlenz@155: self.fromchars = fromchars
cmlenz@155: self.tochars = tochars
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: string = as_string(self.string(kind, data, pos, namespaces, variables))
athomas@518: fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables))
athomas@518: tochars = as_string(self.tochars(kind, data, pos, namespaces, variables))
cmlenz@155: table = dict(zip([ord(c) for c in fromchars],
cmlenz@155: [ord(c) for c in tochars]))
cmlenz@155: return string.translate(table)
cmlenz@155: def __repr__(self):
cmlenz@155: return 'translate(%r, %r, %r)' % (self.string, self.fromchars,
cmlenz@155: self.tochars)
cmlenz@155:
cmlenz@155: class TrueFunction(Function):
cmlenz@161: """The `true` function, which always returns the boolean `true` value."""
cmlenz@155: __slots__ = []
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@155: return True
cmlenz@155: def __repr__(self):
cmlenz@155: return 'true()'
cmlenz@155:
cmlenz@155: _function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction,
cmlenz@155: 'concat': ConcatFunction, 'contains': ContainsFunction,
athomas@534: 'matches': MatchesFunction, 'false': FalseFunction, 'floor':
athomas@534: FloorFunction, 'local-name': LocalNameFunction, 'name':
athomas@534: NameFunction, 'namespace-uri': NamespaceUriFunction,
cmlenz@155: 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction,
cmlenz@162: 'number': NumberFunction, 'round': RoundFunction,
athomas@534: 'starts-with': StartsWithFunction, 'string-length':
athomas@534: StringLengthFunction, 'substring': SubstringFunction,
athomas@534: 'substring-after': SubstringAfterFunction, 'substring-before':
athomas@534: SubstringBeforeFunction, 'translate': TranslateFunction,
athomas@534: 'true': TrueFunction}
cmlenz@137:
cmlenz@179: # Literals & Variables
cmlenz@137:
cmlenz@155: class Literal(object):
cmlenz@155: """Abstract base class for literal nodes."""
cmlenz@155:
cmlenz@155: class StringLiteral(Literal):
cmlenz@161: """A string literal node."""
cmlenz@137: __slots__ = ['text']
cmlenz@137: def __init__(self, text):
cmlenz@137: self.text = text
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@228: return self.text
cmlenz@137: def __repr__(self):
cmlenz@137: return '"%s"' % self.text
cmlenz@137:
cmlenz@155: class NumberLiteral(Literal):
cmlenz@161: """A number literal node."""
cmlenz@137: __slots__ = ['number']
cmlenz@137: def __init__(self, number):
cmlenz@137: self.number = number
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@228: return self.number
cmlenz@137: def __repr__(self):
cmlenz@137: return str(self.number)
cmlenz@137:
cmlenz@179: class VariableReference(Literal):
cmlenz@179: """A variable reference node."""
cmlenz@179: __slots__ = ['name']
cmlenz@179: def __init__(self, name):
cmlenz@179: self.name = name
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@228: return variables.get(self.name)
cmlenz@179: def __repr__(self):
cmlenz@215: return str(self.name)
cmlenz@179:
cmlenz@137: # Operators
cmlenz@137:
cmlenz@137: class AndOperator(object):
cmlenz@161: """The boolean operator `and`."""
cmlenz@137: __slots__ = ['lval', 'rval']
cmlenz@137: def __init__(self, lval, rval):
cmlenz@137: self.lval = lval
cmlenz@137: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
cmlenz@161: if not lval:
cmlenz@161: return False
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_bool(rval)
cmlenz@137: def __repr__(self):
cmlenz@161: return '%s and %s' % (self.lval, self.rval)
cmlenz@137:
cmlenz@161: class EqualsOperator(object):
cmlenz@161: """The equality operator `=`."""
cmlenz@137: __slots__ = ['lval', 'rval']
cmlenz@137: def __init__(self, lval, rval):
cmlenz@137: self.lval = lval
cmlenz@137: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
athomas@518: rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
cmlenz@161: return lval == rval
cmlenz@137: def __repr__(self):
cmlenz@161: return '%s=%s' % (self.lval, self.rval)
cmlenz@137:
cmlenz@161: class NotEqualsOperator(object):
cmlenz@161: """The equality operator `!=`."""
cmlenz@137: __slots__ = ['lval', 'rval']
cmlenz@137: def __init__(self, lval, rval):
cmlenz@137: self.lval = lval
cmlenz@137: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
athomas@518: rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
cmlenz@161: return lval != rval
cmlenz@161: def __repr__(self):
cmlenz@161: return '%s!=%s' % (self.lval, self.rval)
cmlenz@161:
cmlenz@161: class OrOperator(object):
cmlenz@161: """The boolean operator `or`."""
cmlenz@161: __slots__ = ['lval', 'rval']
cmlenz@161: def __init__(self, lval, rval):
cmlenz@161: self.lval = lval
cmlenz@161: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
athomas@518: lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
cmlenz@161: if lval:
cmlenz@137: return True
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_bool(rval)
cmlenz@137: def __repr__(self):
cmlenz@137: return '%s or %s' % (self.lval, self.rval)
cmlenz@137:
cmlenz@162: class GreaterThanOperator(object):
cmlenz@162: """The relational operator `>` (greater than)."""
cmlenz@162: __slots__ = ['lval', 'rval']
cmlenz@162: def __init__(self, lval, rval):
cmlenz@162: self.lval = lval
cmlenz@162: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: lval = self.lval(kind, data, pos, namespaces, variables)
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_float(lval) > as_float(rval)
cmlenz@162: def __repr__(self):
cmlenz@162: return '%s>%s' % (self.lval, self.rval)
cmlenz@162:
cmlenz@162: class GreaterThanOrEqualOperator(object):
cmlenz@162: """The relational operator `>=` (greater than or equal)."""
cmlenz@162: __slots__ = ['lval', 'rval']
cmlenz@162: def __init__(self, lval, rval):
cmlenz@162: self.lval = lval
cmlenz@162: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: lval = self.lval(kind, data, pos, namespaces, variables)
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_float(lval) >= as_float(rval)
cmlenz@162: def __repr__(self):
cmlenz@162: return '%s>=%s' % (self.lval, self.rval)
cmlenz@162:
cmlenz@162: class LessThanOperator(object):
cmlenz@162: """The relational operator `<` (less than)."""
cmlenz@162: __slots__ = ['lval', 'rval']
cmlenz@162: def __init__(self, lval, rval):
cmlenz@162: self.lval = lval
cmlenz@162: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: lval = self.lval(kind, data, pos, namespaces, variables)
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_float(lval) < as_float(rval)
cmlenz@162: def __repr__(self):
cmlenz@162: return '%s<%s' % (self.lval, self.rval)
cmlenz@162:
cmlenz@162: class LessThanOrEqualOperator(object):
cmlenz@162: """The relational operator `<=` (less than or equal)."""
cmlenz@162: __slots__ = ['lval', 'rval']
cmlenz@162: def __init__(self, lval, rval):
cmlenz@162: self.lval = lval
cmlenz@162: self.rval = rval
cmlenz@224: def __call__(self, kind, data, pos, namespaces, variables):
cmlenz@224: lval = self.lval(kind, data, pos, namespaces, variables)
cmlenz@224: rval = self.rval(kind, data, pos, namespaces, variables)
athomas@518: return as_float(lval) <= as_float(rval)
cmlenz@162: def __repr__(self):
cmlenz@162: return '%s<=%s' % (self.lval, self.rval)
cmlenz@162:
cmlenz@162: _operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator,
cmlenz@162: '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator,
cmlenz@162: '<': LessThanOperator, '>=': LessThanOrEqualOperator}
cmlenz@333:
cmlenz@333:
cmlenz@333: _DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ())
cmlenz@818: _DOTSLASH = (SELF, PrincipalTypeTest(None), ())