# HG changeset patch
# User cmlenz
# Date 1155751982 0
# Node ID 9a5aedda10990fe19f528dd8bd489715972843fc
# Parent 8bd5c8cd33e0e6dc4f8f9f7c1573484d3b4dfceb
* String literals in XPath expressions that contains spaces are now tokenizes correctly.
* Added implementation of all of the string functions, and most of the number functions.
diff --git a/markup/path.py b/markup/path.py
--- a/markup/path.py
+++ b/markup/path.py
@@ -31,6 +31,7 @@
structures), it only implements a subset of the full XPath 1.0 language.
"""
+from math import ceil, floor
import re
from markup.core import Stream, START, END, TEXT, COMMENT, PI
@@ -223,14 +224,15 @@
_QUOTES = (("'", "'"), ('"', '"'))
_TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
'=', '!=', '!', '|', ',')
- _tokenize = re.compile('(%s)|([^%s\s]+)|\s+' % (
+ _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|(%s)|([^%s\s]+)|\s+' % (
'|'.join([re.escape(t) for t in _TOKENS]),
''.join([re.escape(t[0]) for t in _TOKENS]))).findall
def __init__(self, text, filename=None, lineno=-1):
self.filename = filename
self.lineno = lineno
- self.tokens = filter(None, [a or b for a, b in self._tokenize(text)])
+ self.tokens = filter(None, [a or b or c or d for a, b, c, d in
+ self._tokenize(text)])
self.pos = 0
# Tokenizer
@@ -395,19 +397,7 @@
self.next_token()
return NumberLiteral(float(token))
elif not self.at_end and self.peek_token().startswith('('):
- if self.next_token() == '()':
- args = []
- else:
- self.next_token()
- args = [self._or_expr()]
- while self.cur_token not in (',', ')'):
- args.append(self._or_expr())
- self.next_token()
- cls = _function_map.get(token)
- if not cls:
- raise PathSyntaxError('Unsupported function "%s"' % token,
- self.filename, self.lineno)
- return cls(*args)
+ return self._function_call()
else:
axis = None
if token == '@':
@@ -415,6 +405,28 @@
self.next_token()
return self._node_test(axis)
+ def _function_call(self):
+ name = self.cur_token
+ if self.next_token() == '()':
+ args = []
+ else:
+ assert self.cur_token == '('
+ self.next_token()
+ args = [self._or_expr()]
+ while self.cur_token == ',':
+ self.next_token()
+ args.append(self._or_expr())
+ if not self.cur_token == ')':
+ raise PathSyntaxError('Expected ")" to close function argument '
+ 'list, but found "%s"' % self.cur_token,
+ self.filename, self.lineno)
+ self.next_token()
+ cls = _function_map.get(name)
+ if not cls:
+ raise PathSyntaxError('Unsupported function "%s"' % name,
+ self.filename, self.lineno)
+ return cls(*args)
+
# Node tests
@@ -487,7 +499,84 @@
# Functions
-class LocalNameFunction(object):
+class Function(object):
+ """Base class for function nodes in XPath expressions."""
+
+class BooleanFunction(Function):
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos):
+ val = self.expr(kind, data, pos)
+ if type(val) is tuple:
+ val = val[1]
+ return bool(val)
+ def __repr__(self):
+ return 'boolean(%r)' % self.expr
+
+class CeilingFunction(Function):
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos):
+ number = self.number(kind, data, pos)
+ if type(number) is tuple:
+ number = number[1]
+ return ceil(float(number))
+ def __repr__(self):
+ return 'ceiling(%r)' % self.number
+
+class ConcatFunction(Function):
+ __slots__ = ['exprs']
+ def __init__(self, *exprs):
+ self.exprs = exprs
+ def __call__(self, kind, data, pos):
+ strings = []
+ for item in [expr(kind, data, pos) for expr in self.exprs]:
+ if type(item) is tuple:
+ assert item[0] is TEXT
+ item = item[1]
+ strings.append(item)
+ return u''.join(strings)
+ def __repr__(self):
+ return 'concat(%s)' % [repr(expr for expr in self.expr)]
+
+class ContainsFunction(Function):
+ __slots__ = ['string1' ,'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos):
+ string1 = self.string1(kind, data, pos)
+ if type(string1) is tuple:
+ string1 = string1[1]
+ string2 = self.string2(kind, data, pos)
+ if type(string2) is tuple:
+ string2 = string2[1]
+ return string2 in string1
+ def __repr__(self):
+ return 'contains(%r, %r)' % (self.string1, self.string2)
+
+class FalseFunction(Function):
+ __slots__ = []
+ def __call__(self, kind, data, pos):
+ return False
+ def __repr__(self):
+ return 'false()'
+
+class FloorFunction(Function):
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos):
+ number = self.number(kind, data, pos)
+ if type(number) is tuple:
+ number = number[1]
+ return floor(float(number))
+ def __repr__(self):
+ return 'floor(%r)' % self.number
+
+class LocalNameFunction(Function):
__slots__ = []
def __call__(self, kind, data, pos):
if kind is START:
@@ -495,7 +584,7 @@
def __repr__(self):
return 'local-name()'
-class NameFunction(object):
+class NameFunction(Function):
__slots__ = []
def __call__(self, kind, data, pos):
if kind is START:
@@ -503,7 +592,7 @@
def __repr__(self):
return 'name()'
-class NamespaceUriFunction(object):
+class NamespaceUriFunction(Function):
__slots__ = []
def __call__(self, kind, data, pos):
if kind is START:
@@ -511,7 +600,7 @@
def __repr__(self):
return 'namespace-uri()'
-class NotFunction(object):
+class NotFunction(Function):
__slots__ = ['expr']
def __init__(self, expr):
self.expr = expr
@@ -520,12 +609,173 @@
def __repr__(self):
return 'not(%s)' % self.expr
-_function_map = {'local-name': LocalNameFunction, 'name': NameFunction,
- 'namespace-uri': NamespaceUriFunction, 'not': NotFunction}
+class NormalizeSpaceFunction(Function):
+ __slots__ = ['expr']
+ _normalize = re.compile(r'\s{2,}').sub
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos):
+ string = self.expr(kind, data, pos)
+ if type(string) is tuple:
+ string = string[1]
+ return self._normalize(' ', string.strip())
+ def __repr__(self):
+ return 'normalize-space(%s)' % repr(self.expr)
+
+class NumberFunction(Function):
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos):
+ val = self.expr(kind, data, pos)
+ if type(val) is tuple:
+ val = val[1]
+ return float(val)
+ def __repr__(self):
+ return 'number(%r)' % self.expr
+
+class StartsWithFunction(Function):
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string2
+ self.string2 = string2
+ def __call__(self, kind, data, pos):
+ string1 = self.string1(kind, data, pos)
+ if type(string1) is tuple:
+ string1 = string1[1]
+ string2 = self.string2(kind, data, pos)
+ if type(string2) is tuple:
+ string2 = string2[1]
+ return string1.startswith(string2)
+ def __repr__(self):
+ return 'starts-with(%r, %r)' % (self.string1, self.string2)
+
+class StringLengthFunction(Function):
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos):
+ string = self.expr(kind, data, pos)
+ if type(string) is tuple:
+ string = string[1]
+ return len(string)
+ def __repr__(self):
+ return 'string-length(%r)' % self.expr
+
+class SubstringFunction(Function):
+ __slots__ = ['string', 'start', 'length']
+ def __init__(self, string, start, length=None):
+ self.string = string
+ self.start = start
+ self.length = length
+ def __call__(self, kind, data, pos):
+ string = self.string(kind, data, pos)
+ if type(string) is tuple:
+ string = string[1]
+ start = self.start(kind, data, pos)
+ if type(start) is tuple:
+ start = start[1]
+ length = 0
+ if self.length is not None:
+ length = self.length(kind, data, pos)
+ if type(length) is tuple:
+ length = length[1]
+ return string[int(start):len(string) - int(length)]
+ def __repr__(self):
+ if self.length is not None:
+ return 'substring(%r, %r, %r)' % (self.string, self.start,
+ self.length)
+ else:
+ return 'substring(%r, %r)' % (self.string, self.start)
+
+class SubstringAfterFunction(Function):
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos):
+ string1 = self.string1(kind, data, pos)
+ if type(string1) is tuple:
+ string1 = string1[1]
+ string2 = self.string2(kind, data, pos)
+ if type(string2) is tuple:
+ string2 = string2[1]
+ index = string1.find(string2)
+ if index >= 0:
+ return string1[index + len(string2):]
+ return u''
+ def __repr__(self):
+ return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class SubstringBeforeFunction(Function):
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos):
+ string1 = self.string1(kind, data, pos)
+ if type(string1) is tuple:
+ string1 = string1[1]
+ string2 = self.string2(kind, data, pos)
+ if type(string2) is tuple:
+ string2 = string2[1]
+ index = string1.find(string2)
+ if index >= 0:
+ return string1[:index]
+ return u''
+ def __repr__(self):
+ return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class TranslateFunction(Function):
+ __slots__ = ['string', 'fromchars', 'tochars']
+ def __init__(self, string, fromchars, tochars):
+ self.string = string
+ self.fromchars = fromchars
+ self.tochars = tochars
+ def __call__(self, kind, data, pos):
+ string = self.string(kind, data, pos)
+ if type(string) is tuple:
+ string = string[1]
+ fromchars = self.fromchars(kind, data, pos)
+ if type(fromchars) is tuple:
+ fromchars = fromchars[1]
+ tochars = self.tochars(kind, data, pos)
+ if type(tochars) is tuple:
+ tochars = tochars[1]
+ table = dict(zip([ord(c) for c in fromchars],
+ [ord(c) for c in tochars]))
+ return string.translate(table)
+ def __repr__(self):
+ return 'translate(%r, %r, %r)' % (self.string, self.fromchars,
+ self.tochars)
+
+class TrueFunction(Function):
+ __slots__ = []
+ def __call__(self, kind, data, pos):
+ return True
+ def __repr__(self):
+ return 'true()'
+
+
+_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction,
+ 'concat': ConcatFunction, 'contains': ContainsFunction,
+ 'false': FalseFunction, 'floor': FloorFunction,
+ 'local-name': LocalNameFunction, 'name': NameFunction,
+ 'namespace-uri': NamespaceUriFunction,
+ 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction,
+ 'number': NumberFunction, 'starts-with': StartsWithFunction,
+ 'string-length': StringLengthFunction,
+ 'substring': SubstringFunction,
+ 'substring-after': SubstringAfterFunction,
+ 'substring-before': SubstringBeforeFunction,
+ 'translate': TranslateFunction, 'true': TrueFunction}
# Literals
-class StringLiteral(object):
+class Literal(object):
+ """Abstract base class for literal nodes."""
+
+class StringLiteral(Literal):
__slots__ = ['text']
def __init__(self, text):
self.text = text
@@ -534,12 +784,12 @@
def __repr__(self):
return '"%s"' % self.text
-class NumberLiteral(object):
+class NumberLiteral(Literal):
__slots__ = ['number']
def __init__(self, number):
self.number = number
def __call__(self, kind, data, pos):
- return TEXT, unicode(self.number), (None, -1, -1)
+ return TEXT, self.number, (None, -1, -1)
def __repr__(self):
return str(self.number)
diff --git a/markup/tests/path.py b/markup/tests/path.py
--- a/markup/tests/path.py
+++ b/markup/tests/path.py
@@ -282,6 +282,93 @@
path = Path('root/item[@urgent or @notso]')
self.assertEqual('', path.select(xml).render())
+ def test_predicate_boolean_function(self):
+ xml = XML('bar')
+ path = Path('*[boolean("")]')
+ self.assertEqual('', path.select(xml).render())
+ path = Path('*[boolean("yo")]')
+ self.assertEqual('bar', path.select(xml).render())
+ path = Path('*[boolean(0)]')
+ self.assertEqual('', path.select(xml).render())
+ path = Path('*[boolean(42)]')
+ self.assertEqual('bar', path.select(xml).render())
+ path = Path('*[boolean(false())]')
+ self.assertEqual('', path.select(xml).render())
+ path = Path('*[boolean(true())]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_ceil_function(self):
+ xml = XML('bar')
+ path = Path('*[ceiling("4.5")=5]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_concat_function(self):
+ xml = XML('bar')
+ path = Path('*[name()=concat("f", "oo")]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_contains_function(self):
+ xml = XML('bar')
+ path = Path('*[contains(name(), "oo")]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_false_function(self):
+ xml = XML('bar')
+ path = Path('*[false()]')
+ self.assertEqual('', path.select(xml).render())
+
+ def test_predicate_floor_function(self):
+ xml = XML('bar')
+ path = Path('*[floor("4.5")=4]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_normalize_space_function(self):
+ xml = XML('bar')
+ path = Path('*[normalize-space(" foo bar ")="foo bar"]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_number_function(self):
+ xml = XML('bar')
+ path = Path('*[number("3.0")=3]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_starts_with_function(self):
+ xml = XML('bar')
+ path = Path('*[starts-with(name(), "f")]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_string_length_function(self):
+ xml = XML('bar')
+ path = Path('*[string-length(name())=3]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_substring_function(self):
+ xml = XML('bar')
+ path = Path('*[substring(name(), 1)="oo"]')
+ self.assertEqual('bar', path.select(xml).render())
+ path = Path('*[substring(name(), 1, 1)="o"]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_substring_after_function(self):
+ xml = XML('bar')
+ path = Path('*[substring-after(name(), "f")="oo"]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_substring_before_function(self):
+ xml = XML('bar')
+ path = Path('*[substring-before(name(), "oo")="f"]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_translate_function(self):
+ xml = XML('bar')
+ path = Path('*[translate(name(), "fo", "ba")="baa"]')
+ self.assertEqual('bar', path.select(xml).render())
+
+ def test_predicate_true_function(self):
+ xml = XML('bar')
+ path = Path('*[true()]')
+ self.assertEqual('bar', path.select(xml).render())
+
def suite():
suite = unittest.TestSuite()