Mercurial > genshi > mirror

--- a/markup/core.py
+++ b/markup/core.py
@@ -94,12 +94,15 @@
         """Generate strings corresponding to a specific serialization of the
         stream.

-        Unlike the `render()` method, this method is a generator this returns
+        Unlike the `render()` method, this method is a generator that returns
         the serialized output incrementally, as opposed to returning a single
         string.

         @param method: determines how the stream is serialized; can be either
                        'xml' or 'html', or a custom `Serializer` subclass
+        @param filters: list of filters to apply to the stream before
+                        serialization. The default is to apply whitespace
+                        reduction using `markup.filters.WhitespaceFilter`.
         """
         from markup.filters import WhitespaceFilter
         from markup import output
@@ -127,26 +130,78 @@


 class Attributes(list):
+    """Sequence type that stores the attributes of an element.
+
+    The order of the attributes is preserved, while accessing and manipulating
+    attributes by name is also supported.
+
+    >>> attrs = Attributes([('href', '#'), ('title', 'Foo')])
+    >>> attrs
+    [(u'href', '#'), (u'title', 'Foo')]
+
+    >>> 'href' in attrs
+    True
+    >>> 'tabindex' in attrs
+    False
+
+    >>> attrs.get(u'title')
+    'Foo'
+    >>> attrs.set(u'title', 'Bar')
+    >>> attrs
+    [(u'href', '#'), (u'title', 'Bar')]
+    >>> attrs.remove(u'title')
+    >>> attrs
+    [(u'href', '#')]
+
+    New attributes added using the `set()` method are appended to the end of
+    the list:
+
+    >>> attrs.set(u'accesskey', 'k')
+    >>> attrs
+    [(u'href', '#'), (u'accesskey', 'k')]
+    """
+    __slots__ = []

     def __init__(self, attrib=None):
+        """Create the `Attributes` instance.
+
+        If the `attrib` parameter is provided, it is expected to be a sequence
+        of `(name, value)` tuples.
+        """
         list.__init__(self, map(lambda (k, v): (QName(k), v), attrib or []))

     def __contains__(self, name):
+        """Return whether the list includes an attribute with the specified
+        name.
+        """
         return name in [attr for attr, value in self]

     def get(self, name, default=None):
+        """Return the value of the attribute with the specified name, or the
+        value of the `default` parameter if no such attribute is found.
+        """
         for attr, value in self:
             if attr == name:
                 return value
         return default

     def remove(self, name):
+        """Removes the attribute with the specified name.
+
+        If no such attribute is found, this method does nothing.
+        """
         for idx, (attr, _) in enumerate(self):
             if attr == name:
                 del self[idx]
                 break

     def set(self, name, value):
+        """Sets the specified attribute to the given value.
+
+        If an attribute with the specified name is already in the list, the
+        value of the existing entry is updated. Otherwise, a new attribute is
+        appended to the end of the list.
+        """
         for idx, (attr, _) in enumerate(self):
             if attr == name:
                 self[idx] = (attr, value)
@@ -159,13 +214,15 @@
     """Marks a string as being safe for inclusion in HTML/XML output without
     needing to be escaped.
     """
+    __slots__ = []
+
     def __new__(self, text='', *args):
         if args:
             text %= tuple([escape(arg) for arg in args])
         return unicode.__new__(self, text)

     def __add__(self, other):
-        return Markup(unicode(self) + Markup.escape(other))
+        return Markup(unicode(self) + escape(other))

     def __mod__(self, args):
         if not isinstance(args, (list, tuple)):
@@ -180,7 +237,7 @@
         return '<%s "%s">' % (self.__class__.__name__, self)

     def join(self, seq):
-        return Markup(unicode(self).join([Markup.escape(item) for item in seq]))
+        return Markup(unicode(self).join([escape(item) for item in seq]))

     def stripentities(self, keepxmlentities=False):
         """Return a copy of the text with any character or numeric entities
@@ -228,7 +285,7 @@
         if isinstance(text, cls):
             return text
         text = unicode(text)
-        if not text:
+        if not text or isinstance(text, cls):
             return cls()
         text = text.replace('&', '&amp;') \
                    .replace('<', '&lt;') \
@@ -241,7 +298,7 @@
     def unescape(self):
         """Reverse-escapes &, <, > and \" and returns a `unicode` object."""
         if not self:
-            return ''
+            return u''
         return unicode(self).replace('&#34;', '"') \
                             .replace('&gt;', '>') \
                             .replace('&lt;', '<') \
@@ -253,7 +310,7 @@
         """
         text = unicode(self.striptags().stripentities())
         if not keeplinebreaks:
-            text = text.replace('\n', ' ')
+            text = text.replace(u'\n', u' ')
         return text

     def sanitize(self):
@@ -273,12 +330,58 @@


 class Namespace(object):
+    """Utility class creating and testing elements with a namespace.
+
+    Internally, namespace URIs are encoded in the `QName` of any element or
+    attribute, the namespace URI being enclosed in curly braces. This class
+    helps create and test these strings.
+
+    A `Namespace` object is instantiated with the namespace URI.
+
+    >>> html = Namespace('http://www.w3.org/1999/xhtml')
+    >>> html
+    <Namespace "http://www.w3.org/1999/xhtml">
+    >>> html.uri
+    u'http://www.w3.org/1999/xhtml'
+
+    The `Namespace` object can than be used to generate `QName` objects with
+    that namespace:
+
+    >>> html.body
+    u'{http://www.w3.org/1999/xhtml}body'
+    >>> html.body.localname
+    u'body'
+    >>> html.body.namespace
+    u'http://www.w3.org/1999/xhtml'
+
+    The same works using item access notation, which is useful for element or
+    attribute names that are not valid Python identifiers:
+
+    >>> html['body']
+    u'{http://www.w3.org/1999/xhtml}body'
+
+    A `Namespace` object can also be used to test whether a specific `QName`
+    belongs to that namespace using the `in` operator:
+
+    >>> qname = html.body
+    >>> qname in html
+    True
+    >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
+    False
+    """
+    def __init__(self, uri):
+        self.uri = unicode(uri)

-    def __init__(self, uri):
-        self.uri = uri
+    def __contains__(self, qname):
+        return qname.namespace == self.uri
+
+    def __eq__(self, other):
+        if isinstance(other, Namespace):
+            return self.uri == other.uri
+        return self.uri == other

     def __getitem__(self, name):
-        return QName(self.uri + '}' + name)
+        return QName(self.uri + u'}' + name)

     __getattr__ = __getitem__

@@ -286,10 +389,10 @@
         return '<Namespace "%s">' % self.uri

     def __str__(self):
-        return self.uri
+        return self.uri.encode('utf-8')

     def __unicode__(self):
-        return unicode(self.uri)
+        return self.uri


 class QName(unicode):
@@ -299,6 +402,21 @@
     the element or attribute, in the form `{namespace}localname`. The namespace
     URI can be obtained through the additional `namespace` attribute, while the
     local name can be accessed through the `localname` attribute.
+
+    >>> qname = QName('foo')
+    >>> qname
+    u'foo'
+    >>> qname.localname
+    u'foo'
+    >>> qname.namespace
+
+    >>> qname = QName('http://www.w3.org/1999/xhtml}body')
+    >>> qname
+    u'{http://www.w3.org/1999/xhtml}body'
+    >>> qname.localname
+    u'body'
+    >>> qname.namespace
+    u'http://www.w3.org/1999/xhtml'
     """
     __slots__ = ['namespace', 'localname']

@@ -306,13 +424,13 @@
         if isinstance(qname, QName):
             return qname

-        parts = qname.split('}', 1)
-        if qname.find('}') > 0:
-            self = unicode.__new__(cls, '{' + qname)
-            self.namespace = parts[0]
-            self.localname = parts[1]
+        parts = qname.split(u'}', 1)
+        if qname.find(u'}') > 0:
+            self = unicode.__new__(cls, u'{' + qname)
+            self.namespace = unicode(parts[0])
+            self.localname = unicode(parts[1])
         else:
             self = unicode.__new__(cls, qname)
             self.namespace = None
-            self.localname = qname
+            self.localname = unicode(qname)
         return self
--- a/markup/filters.py
+++ b/markup/filters.py
@@ -19,7 +19,7 @@
     from sets import ImmutableSet as frozenset
 import re

-from markup.core import Attributes, Markup, Stream
+from markup.core import Attributes, Markup, Namespace, Stream
 from markup.path import Path

 __all__ = ['IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer']
@@ -30,7 +30,7 @@
     (see http://www.w3.org/TR/xinclude/) in templates.
     """

-    _NAMESPACE = 'http://www.w3.org/2001/XInclude'
+    NAMESPACE = Namespace('http://www.w3.org/2001/XInclude')

     def __init__(self, loader):
         """Initialize the filter.
@@ -57,7 +57,7 @@

         for kind, data, pos in stream:

-            if kind is Stream.START and data[0].namespace == self._NAMESPACE \
+            if kind is Stream.START and data[0] in self.NAMESPACE \
                     and not in_fallback:
                 tag, attrib = data
                 if tag.localname == 'include':
@@ -67,7 +67,7 @@
                     in_fallback = True
                     fallback_stream = []

-            elif kind is Stream.END and data.namespace == self._NAMESPACE:
+            elif kind is Stream.END and data in self.NAMESPACE:
                 if data.localname == 'include':
                     try:
                         if not include_href:
@@ -93,7 +93,7 @@
             elif in_fallback:
                 fallback_stream.append((kind, data, pos))

-            elif kind is Stream.START_NS and data[1] == self._NAMESPACE:
+            elif kind is Stream.START_NS and data[1] == self.NAMESPACE:
                 ns_prefixes.append(data[0])

             elif kind is Stream.END_NS and data in ns_prefixes:
--- a/markup/output.py
+++ b/markup/output.py
@@ -20,7 +20,7 @@
 except NameError:
     from sets import ImmutableSet as frozenset

-from markup.core import Markup, QName, Stream
+from markup.core import Markup, Namespace, QName, Stream
 from markup.filters import WhitespaceFilter

 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
@@ -121,7 +121,7 @@
     <div><a href="foo"></a><br><hr noshade></div>
     """

-    NAMESPACE = 'http://www.w3.org/1999/xhtml'
+    NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')

     _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
                               'hr', 'img', 'input', 'isindex', 'link', 'meta',
@@ -146,11 +146,11 @@

             elif kind is Stream.START:
                 tag, attrib = data
-                if tag.namespace and tag.namespace != self.NAMESPACE:
+                if tag.namespace and tag not in self.NAMESPACE:
                     continue # not in the HTML namespace, so don't emit
                 buf = ['<', tag.localname]
                 for attr, value in attrib:
-                    if attr.namespace and attr.namespace != self.NAMESPACE:
+                    if attr.namespace and attr not in self.NAMESPACE:
                         continue # not in the HTML namespace, so don't emit
                     if attr.localname in self._BOOLEAN_ATTRS:
                         if value:
@@ -168,7 +168,7 @@

             elif kind is Stream.END:
                 tag = data
-                if tag.namespace and tag.namespace != self.NAMESPACE:
+                if tag.namespace and tag not in self.NAMESPACE:
                     continue # not in the HTML namespace, so don't emit
                 yield Markup('</%s>' % tag.localname)
--- a/markup/template.py
+++ b/markup/template.py
@@ -46,7 +46,7 @@
 import re
 from StringIO import StringIO

-from markup.core import Attributes, Stream, StreamEventKind
+from markup.core import Attributes, Namespace, Stream, StreamEventKind
 from markup.eval import Expression
 from markup.filters import IncludeFilter
 from markup.input import HTML, XMLParser, XML
@@ -558,7 +558,7 @@
     """Can parse a template and transform it into the corresponding output
     based on context data.
     """
-    NAMESPACE = 'http://purl.org/kid/ns#'
+    NAMESPACE = Namespace('http://purl.org/kid/ns#')

     EXPR = StreamEventKind('EXPR') # an expression
     SUB = StreamEventKind('SUB') # a "subprogram"
@@ -625,7 +625,7 @@
                 directives = []
                 new_attrib = []
                 for name, value in attrib:
-                    if name.namespace == self.NAMESPACE:
+                    if name in self.NAMESPACE:
                         cls = self._dir_by_name.get(name.localname)
                         if cls is None:
                             raise BadDirectiveError(name, self.filename, pos[0])
--- a/markup/tests/core.py
+++ b/markup/tests/core.py
@@ -15,7 +15,7 @@
 from HTMLParser import HTMLParseError
 import unittest

-from markup.core import Markup, escape, unescape
+from markup.core import *


 class MarkupTestCase(unittest.TestCase):
@@ -183,6 +183,7 @@
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(MarkupTestCase, 'test'))
+    suite.addTest(doctest.DocTestSuite(Markup.__module__))
     return suite

 if __name__ == '__main__':