comparison markup/path.py @ 111:8a4d9064f363

Some fixes and more unit tests for the XPath engine.
author cmlenz
date Mon, 31 Jul 2006 17:25:43 +0000
parents 61fa4cadb766
children 8f53c3ad385c
comparison
equal deleted inserted replaced
110:44fbc30d78cd 111:8a4d9064f363
9 # 9 #
10 # This software consists of voluntary contributions made by many 10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision 11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://markup.edgewall.org/log/. 12 # history and logs, available at http://markup.edgewall.org/log/.
13 13
14 """Basic support for evaluating XPath expressions against streams.""" 14 """Basic support for evaluating XPath expressions against streams.
15
16 >>> from markup.input import XML
17 >>> doc = XML('''<doc>
18 ... <items count="2">
19 ... <item status="new">
20 ... <summary>Foo</summary>
21 ... </item>
22 ... <item status="closed">
23 ... <summary>Bar</summary>
24 ... </item>
25 ... </items>
26 ... </doc>''')
27 >>> print doc.select('items/item[@status="closed"]/summary/text()')
28 Bar
29
30 Because the XPath engine operates on markup streams (as opposed to tree
31 structures), it only implements a subset of the full XPath 1.0 language.
32 """
15 33
16 import re 34 import re
17 35
18 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI 36 from markup.core import QName, Stream, START, END, TEXT, COMMENT, PI
19 37
109 127
110 elif kind is START: 128 elif kind is START:
111 stack.append(cursor) 129 stack.append(cursor)
112 130
113 matched = None 131 matched = None
114 closure, node_test, predicates = steps[cursor] 132 while 1:
115 133 axis, node_test, predicates = steps[cursor]
116 matched = node_test(kind, data, pos) 134
117 if matched and predicates: 135 matched = node_test(kind, data, pos)
118 for predicate in predicates: 136 if matched and predicates:
119 if not predicate(kind, data, pos): 137 for predicate in predicates:
120 matched = None 138 if not predicate(kind, data, pos):
121 break 139 matched = None
122 140 break
123 if matched: 141
124 if cursor + 1 == size: # the last location step 142 if matched:
125 if ignore_context or len(stack) > 2 \ 143 if cursor + 1 == size: # the last location step
126 or node_test.axis != 'child': 144 if ignore_context or \
127 return matched 145 kind is not START or \
128 else: 146 axis in ('attribute', 'self') or \
129 stack[-1] += 1 147 len(stack) > 2:
130 148 return matched
131 elif kind is START and not closure: 149 else:
150 cursor += 1
151 stack[-1] = cursor
152
153 if axis != 'self':
154 break
155
156 if not matched and kind is START \
157 and not axis.startswith('descendant'):
132 # If this step is not a closure, it cannot be matched until 158 # If this step is not a closure, it cannot be matched until
133 # the current element is closed... so we need to move the 159 # the current element is closed... so we need to move the
134 # cursor back to the last closure and retest that against 160 # cursor back to the last closure and retest that against
135 # the current element 161 # the current element
136 closures = [step for step in steps[:cursor] if step[0]] 162 backsteps = [step for step in steps[:cursor]
137 closures.reverse() 163 if step[0].startswith('descendant')]
138 for closure, node_test, predicates in closures: 164 backsteps.reverse()
139 cursor -= 1 165 for axis, node_test, predicates in backsteps:
140 if closure: 166 matched = node_test(kind, data, pos)
141 matched = node_test(kind, data, pos) 167 if not matched:
142 if matched: 168 cursor -= 1
143 cursor += 1 169 break
144 break
145 stack[-1] = cursor 170 stack[-1] = cursor
146 171
147 return None 172 return None
148 173
149 return _test 174 return _test
187 _function_comment.axis = None 212 _function_comment.axis = None
188 return _function_comment 213 return _function_comment
189 214
190 def _function_node(): 215 def _function_node():
191 def _function_node(kind, data, pos): 216 def _function_node(kind, data, pos):
192 return True 217 if kind is START:
218 return True
219 return kind, data, pos
193 _function_node.axis = None 220 _function_node.axis = None
194 return _function_node 221 return _function_node
195 222
196 def _function_processing_instruction(name=None): 223 def _function_processing_instruction(name=None):
197 def _function_processing_instruction(kind, data, pos): 224 def _function_processing_instruction(kind, data, pos):
302 For union expressions (such as `*|text()`), this function returns one 329 For union expressions (such as `*|text()`), this function returns one
303 test for each operand in the union. For patch expressions that don't 330 test for each operand in the union. For patch expressions that don't
304 use the union operator, the function always returns a list of size 1. 331 use the union operator, the function always returns a list of size 1.
305 332
306 Each path test in turn is a sequence of tests that correspond to the 333 Each path test in turn is a sequence of tests that correspond to the
307 location steps, each tuples of the form `(closure, testfunc, predicates)` 334 location steps, each tuples of the form `(axis, testfunc, predicates)`
308 """ 335 """
309 paths = [self._location_path()] 336 paths = [self._location_path()]
310 while self.cur_token == '|': 337 while self.cur_token == '|':
311 self.next_token() 338 self.next_token()
312 paths.append(self._location_path()) 339 paths.append(self._location_path())
315 % self.cur_token) 342 % self.cur_token)
316 return paths 343 return paths
317 344
318 def _location_path(self): 345 def _location_path(self):
319 next_is_closure = True 346 next_is_closure = True
320 if self.cur_token.startswith('/'):
321 self.next_token()
322
323 steps = [] 347 steps = []
324 while True: 348 while True:
325 step = self._location_step()
326 steps.append((next_is_closure, step[1], step[2]))
327 next_is_closure = False
328 if self.cur_token == '//': 349 if self.cur_token == '//':
329 next_is_closure = True 350 next_is_closure = True
330 elif self.at_end or self.cur_token != '/': 351 self.next_token()
352 elif self.cur_token == '/' and not steps:
353 raise PathSyntaxError('Absolute location paths not supported')
354
355 axis, node_test, predicates = self._location_step()
356 if axis == 'child' and next_is_closure:
357 axis = 'descendant-or-self'
358 steps.append((axis, node_test, predicates))
359 next_is_closure = False
360
361 if self.at_end or not self.cur_token.startswith('/'):
331 break 362 break
332 self.next_token() 363 self.next_token()
364
333 return steps 365 return steps
334 366
335 def _location_step(self): 367 def _location_step(self):
336 step = [False, None, []]
337 if self.cur_token == '@': 368 if self.cur_token == '@':
338 axis = 'attribute' 369 axis = 'attribute'
339 self.next_token() 370 self.next_token()
371 elif self.cur_token == '.':
372 axis = 'self'
373 elif self.peek_token() == '::':
374 axis = self.cur_token
375 if axis not in ('attribute', 'child', 'descendant',
376 'descendant-or-self', 'namespace', 'self'):
377 raise PathSyntaxError('Unsupport axis "%s"' % axis)
378 self.next_token()
379 self.next_token()
340 else: 380 else:
341 # FIXME: support full axis specifiers (name followed by ::)
342 axis = 'child' 381 axis = 'child'
343 step[1] = self._node_test(axis) 382 node_test = self._node_test(axis)
383 predicates = []
344 while self.cur_token == '[': 384 while self.cur_token == '[':
345 step[2].append(self._predicate()) 385 predicates.append(self._predicate())
346 return step 386 return axis, node_test, predicates
347 387
348 def _node_test(self, axis=None): 388 def _node_test(self, axis=None):
349 test = None 389 test = None
350 if self.peek_token() in ('(', '()'): # Node type test 390 if self.peek_token() in ('(', '()'): # Node type test
351 test = self._node_type() 391 test = self._node_type()
354 if axis == 'attribute': 394 if axis == 'attribute':
355 if self.cur_token == '*': 395 if self.cur_token == '*':
356 test = _node_test_any_attribute() 396 test = _node_test_any_attribute()
357 else: 397 else:
358 test = _node_test_attribute_by_name(self.cur_token) 398 test = _node_test_attribute_by_name(self.cur_token)
399 elif axis == 'self':
400 test = _node_test_current_element()
359 else: 401 else:
360 if self.cur_token == '.': 402 if self.cur_token == '*':
361 test = _node_test_current_element()
362 elif self.cur_token == '*':
363 test = _node_test_any_child_element() 403 test = _node_test_any_child_element()
364 else: 404 else:
365 test = _node_test_child_element_by_name(self.cur_token) 405 test = _node_test_child_element_by_name(self.cur_token)
366 406
367 if not self.at_end: 407 if not self.at_end:
393 raise PathSyntaxError('%s() not allowed here' % name) 433 raise PathSyntaxError('%s() not allowed here' % name)
394 434
395 def _predicate(self): 435 def _predicate(self):
396 assert self.cur_token == '[' 436 assert self.cur_token == '['
397 self.next_token() 437 self.next_token()
398 return self._or_expr() 438 expr = self._or_expr()
399 assert self.cur_token == ']' 439 assert self.cur_token == ']'
400 self.next_token() 440 if not self.at_end:
441 self.next_token()
442 return expr
401 443
402 def _or_expr(self): 444 def _or_expr(self):
403 expr = self._and_expr() 445 expr = self._and_expr()
404 while self.cur_token == 'or': 446 while self.cur_token == 'or':
405 self.next_token() 447 self.next_token()
Copyright (C) 2012-2017 Edgewall Software