diff markup/path.py @ 26:3c1a022be04c trunk

* Split out the XPath tests into a separate `unittest`-based file. * Added many more docstrings. * Cleaned up the implementation of the XML/HTML parsers a bit. * The HTML parser now correctly handles minimized attributes. * Added `COPYING` and `README` files.
author cmlenz
date Wed, 28 Jun 2006 08:55:04 +0000
parents e3be27f5bcf5
children b4f78c05e5c9
line wrap: on
line diff
--- a/markup/path.py
+++ b/markup/path.py
@@ -19,98 +19,24 @@
 
 __all__ = ['Path']
 
-_QUOTES = (("'", "'"), ('"', '"'))
 
 class Path(object):
-    """Basic XPath support on markup event streams.
-    
-    >>> from markup.input import XML
-    
-    Selecting specific tags:
-    
-    >>> Path('root').select(XML('<root/>')).render()
-    '<root/>'
-    >>> Path('//root').select(XML('<root/>')).render()
-    '<root/>'
-    
-    Using wildcards for tag names:
-    
-    >>> Path('*').select(XML('<root/>')).render()
-    '<root/>'
-    >>> Path('//*').select(XML('<root/>')).render()
-    '<root/>'
-    
-    Selecting attribute values:
-    
-    >>> Path('@foo').select(XML('<root/>')).render()
-    ''
-    >>> Path('@foo').select(XML('<root foo="bar"/>')).render()
-    'bar'
-    
-    Selecting descendants:
-    
-    >>> Path("root/*").select(XML('<root><foo/><bar/></root>')).render()
-    '<foo/><bar/>'
-    >>> Path("root/bar").select(XML('<root><foo/><bar/></root>')).render()
-    '<bar/>'
-    >>> Path("root/baz").select(XML('<root><foo/><bar/></root>')).render()
-    ''
-    >>> Path("root/foo/*").select(
-    ...      XML('<root><foo><bar/></foo></root>')).render()
-    '<bar/>'
+    """Implements basic XPath support on streams.
     
-    Selecting text nodes:
-    >>> Path("item/text()").select(
-    ...      XML('<root><item>Foo</item></root>')).render()
-    'Foo'
-    >>> Path("item/text()").select(
-    ...      XML('<root><item>Foo</item><item>Bar</item></root>')).render()
-    'FooBar'
-    
-    Skipping ancestors:
-    
-    >>> Path("foo/bar").select(
-    ...      XML('<root><foo><bar/></foo></root>')).render()
-    '<bar/>'
-    >>> Path("foo/*").select(
-    ...      XML('<root><foo><bar/></foo></root>')).render()
-    '<bar/>'
-    >>> Path("root/bar").select(
-    ...      XML('<root><foo><bar/></foo></root>')).render()
-    ''
-    >>> Path("root/bar").select(
-    ...      XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
-    '<bar id="2"/>'
-    >>> Path("root/*/bar").select(
-    ...      XML('<root><foo><bar/></foo></root>')).render()
-    '<bar/>'
-    >>> Path("root//bar").select(
-    ...      XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
-    '<bar id="1"/><bar id="2"/>'
-    >>> Path("root//bar").select(
-    ...      XML('<root><foo><bar id="1"/></foo><bar id="2"/></root>')).render()
-    '<bar id="1"/><bar id="2"/>'
-    
-    Using simple attribute predicates:
-    >>> Path("root/item[@important]").select(
-    ...      XML('<root><item/><item important="very"/></root>')).render()
-    '<item important="very"/>'
-    >>> Path('root/item[@important="very"]').select(
-    ...      XML('<root><item/><item important="very"/></root>')).render()
-    '<item important="very"/>'
-    >>> Path("root/item[@important='very']").select(
-    ...      XML('<root><item/><item important="notso"/></root>')).render()
-    ''
-    >>> Path("root/item[@important!='very']").select(
-    ...     XML('<root><item/><item important="notso"/></root>')).render()
-    '<item/><item important="notso"/>'
+    Instances of this class represent a "compiled" XPath expression, and provide
+    methods for testing the path against a stream, as well as extracting a
+    substream matching that path.
     """
-
     _TOKEN_RE = re.compile('(::|\.\.|\(\)|[/.:\[\]\(\)@=!])|'
                            '([^/:\[\]\(\)@=!\s]+)|'
                            '\s+')
+    _QUOTES = (("'", "'"), ('"', '"'))
 
     def __init__(self, text):
+        """Create the path object from a string.
+        
+        @param text: the path expression
+        """
         self.source = text
 
         steps = []
@@ -125,7 +51,7 @@
                     in_predicate = False
                 elif op.startswith('('):
                     if cur_tag == 'text':
-                        steps[-1] = (False, self.fn_text(), [])
+                        steps[-1] = (False, self._FunctionText(), [])
                     else:
                         raise NotImplementedError('XPath function "%s" not '
                                                   'supported' % cur_tag)
@@ -136,23 +62,25 @@
                 closure = cur_op in ('', '//')
                 if cur_op == '@':
                     if tag == '*':
-                        node_test = self.any_attribute()
+                        node_test = self._AnyAttribute()
                     else:
-                        node_test = self.attribute_by_name(tag)
+                        node_test = self._AttributeByName(tag)
                 else:
                     if tag == '*':
-                        node_test = self.any_element()
+                        node_test = self._AnyElement()
                     elif in_predicate:
-                        if len(tag) > 1 and (tag[0], tag[-1]) in _QUOTES:
-                            node_test = self.literal_string(tag[1:-1])
+                        if len(tag) > 1 and (tag[0], tag[-1]) in self._QUOTES:
+                            node_test = self._LiteralString(tag[1:-1])
                         if cur_op == '=':
-                            node_test = self.op_eq(steps[-1][2][-1], node_test)
+                            node_test = self._OperatorEq(steps[-1][2][-1],
+                                                         node_test)
                             steps[-1][2].pop()
                         elif cur_op == '!=':
-                            node_test = self.op_neq(steps[-1][2][-1], node_test)
+                            node_test = self._OperatorNeq(steps[-1][2][-1],
+                                                          node_test)
                             steps[-1][2].pop()
                     else:
-                        node_test = self.element_by_name(tag)
+                        node_test = self._ElementByName(tag)
                 if in_predicate:
                     steps[-1][2].append(node_test)
                 else:
@@ -165,8 +93,15 @@
         return '<%s "%s">' % (self.__class__.__name__, self.source)
 
     def select(self, stream):
+        """Returns a substream of the given stream that matches the path.
+        
+        If there are no matches, this method returns an empty stream.
+        
+        @param stream: the stream to select from
+        @return: the substream matching the path, or an empty stream
+        """
         stream = iter(stream)
-        def _generate(tests):
+        def _generate():
             test = self.test()
             for kind, data, pos in stream:
                 result = test(kind, data, pos)
@@ -183,9 +118,17 @@
                         test(*ev)
                 elif result:
                     yield result
-        return Stream(_generate(self.steps))
+        return Stream(_generate())
 
     def test(self):
+        """Returns a function that can be used to track whether the path matches
+        a specific stream event.
+        
+        The function returned expects the positional arguments `kind`, `data`,
+        and `pos`, i.e. basically an unpacked stream event. If the path matches
+        the event, the function returns the match (for example, a `START` or
+        `TEXT` event.) Otherwise, it returns `None` or `False`.
+        """
         stack = [0] # stack of cursors into the location path
 
         def _test(kind, data, pos):
@@ -234,28 +177,31 @@
 
         return _test
 
-    class any_element(object):
-        def __call__(self, kind, data, pos):
+    class _AnyElement(object):
+        """Node test that matches any element."""
+        def __call__(self, kind, *_):
             if kind is Stream.START:
                 return True
             return None
         def __repr__(self):
             return '<%s>' % self.__class__.__name__
 
-    class element_by_name(object):
+    class _ElementByName(object):
+        """Node test that matches an element with a specific tag name."""
         def __init__(self, name):
             self.name = QName(name)
-        def __call__(self, kind, data, pos):
+        def __call__(self, kind, data, _):
             if kind is Stream.START:
                 return data[0].localname == self.name
             return None
         def __repr__(self):
             return '<%s "%s">' % (self.__class__.__name__, self.name)
 
-    class any_attribute(object):
+    class _AnyAttribute(object):
+        """Node test that matches any attribute."""
         def __call__(self, kind, data, pos):
             if kind is Stream.START:
-                text = ''.join([val for name, val in data[1]])
+                text = ''.join([val for _, val in data[1]])
                 if text:
                     return Stream.TEXT, text, pos
                 return None
@@ -263,7 +209,8 @@
         def __repr__(self):
             return '<%s>' % (self.__class__.__name__)
 
-    class attribute_by_name(object):
+    class _AttributeByName(object):
+        """Node test that matches an attribute with a specific name."""
         def __init__(self, name):
             self.name = QName(name)
         def __call__(self, kind, data, pos):
@@ -275,7 +222,8 @@
         def __repr__(self):
             return '<%s "%s">' % (self.__class__.__name__, self.name)
 
-    class fn_text(object):
+    class _FunctionText(object):
+        """Function that returns text content."""
         def __call__(self, kind, data, pos):
             if kind is Stream.TEXT:
                 return kind, data, pos
@@ -283,15 +231,17 @@
         def __repr__(self):
             return '<%s>' % (self.__class__.__name__)
 
-    class literal_string(object):
+    class _LiteralString(object):
+        """Always returns a literal string."""
         def __init__(self, value):
             self.value = value
-        def __call__(self, kind, data, pos):
+        def __call__(self, *_):
             return Stream.TEXT, self.value, (-1, -1)
         def __repr__(self):
             return '<%s>' % (self.__class__.__name__)
 
-    class op_eq(object):
+    class _OperatorEq(object):
+        """Equality comparison operator."""
         def __init__(self, lval, rval):
             self.lval = lval
             self.rval = rval
@@ -303,7 +253,8 @@
             return '<%s %r = %r>' % (self.__class__.__name__, self.lval,
                                      self.rval)
 
-    class op_neq(object):
+    class _OperatorNeq(object):
+        """Inequality comparison operator."""
         def __init__(self, lval, rval):
             self.lval = lval
             self.rval = rval
Copyright (C) 2012-2017 Edgewall Software