# HG changeset patch # User cmlenz # Date 1150732048 0 # Node ID 5420cfe42d365680f88e28bae605095c60bc1b2d # Parent 74cc70129d046c885d0ffff7193806f62244e907 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests. diff --git a/markup/core.py b/markup/core.py --- a/markup/core.py +++ b/markup/core.py @@ -94,12 +94,15 @@ """Generate strings corresponding to a specific serialization of the stream. - Unlike the `render()` method, this method is a generator this returns + Unlike the `render()` method, this method is a generator that returns the serialized output incrementally, as opposed to returning a single string. @param method: determines how the stream is serialized; can be either 'xml' or 'html', or a custom `Serializer` subclass + @param filters: list of filters to apply to the stream before + serialization. The default is to apply whitespace + reduction using `markup.filters.WhitespaceFilter`. """ from markup.filters import WhitespaceFilter from markup import output @@ -127,26 +130,78 @@ class Attributes(list): + """Sequence type that stores the attributes of an element. + + The order of the attributes is preserved, while accessing and manipulating + attributes by name is also supported. + + >>> attrs = Attributes([('href', '#'), ('title', 'Foo')]) + >>> attrs + [(u'href', '#'), (u'title', 'Foo')] + + >>> 'href' in attrs + True + >>> 'tabindex' in attrs + False + + >>> attrs.get(u'title') + 'Foo' + >>> attrs.set(u'title', 'Bar') + >>> attrs + [(u'href', '#'), (u'title', 'Bar')] + >>> attrs.remove(u'title') + >>> attrs + [(u'href', '#')] + + New attributes added using the `set()` method are appended to the end of + the list: + + >>> attrs.set(u'accesskey', 'k') + >>> attrs + [(u'href', '#'), (u'accesskey', 'k')] + """ + __slots__ = [] def __init__(self, attrib=None): + """Create the `Attributes` instance. + + If the `attrib` parameter is provided, it is expected to be a sequence + of `(name, value)` tuples. + """ list.__init__(self, map(lambda (k, v): (QName(k), v), attrib or [])) def __contains__(self, name): + """Return whether the list includes an attribute with the specified + name. + """ return name in [attr for attr, value in self] def get(self, name, default=None): + """Return the value of the attribute with the specified name, or the + value of the `default` parameter if no such attribute is found. + """ for attr, value in self: if attr == name: return value return default def remove(self, name): + """Removes the attribute with the specified name. + + If no such attribute is found, this method does nothing. + """ for idx, (attr, _) in enumerate(self): if attr == name: del self[idx] break def set(self, name, value): + """Sets the specified attribute to the given value. + + If an attribute with the specified name is already in the list, the + value of the existing entry is updated. Otherwise, a new attribute is + appended to the end of the list. + """ for idx, (attr, _) in enumerate(self): if attr == name: self[idx] = (attr, value) @@ -159,13 +214,15 @@ """Marks a string as being safe for inclusion in HTML/XML output without needing to be escaped. """ + __slots__ = [] + def __new__(self, text='', *args): if args: text %= tuple([escape(arg) for arg in args]) return unicode.__new__(self, text) def __add__(self, other): - return Markup(unicode(self) + Markup.escape(other)) + return Markup(unicode(self) + escape(other)) def __mod__(self, args): if not isinstance(args, (list, tuple)): @@ -180,7 +237,7 @@ return '<%s "%s">' % (self.__class__.__name__, self) def join(self, seq): - return Markup(unicode(self).join([Markup.escape(item) for item in seq])) + return Markup(unicode(self).join([escape(item) for item in seq])) def stripentities(self, keepxmlentities=False): """Return a copy of the text with any character or numeric entities @@ -228,7 +285,7 @@ if isinstance(text, cls): return text text = unicode(text) - if not text: + if not text or isinstance(text, cls): return cls() text = text.replace('&', '&') \ .replace('<', '<') \ @@ -241,7 +298,7 @@ def unescape(self): """Reverse-escapes &, <, > and \" and returns a `unicode` object.""" if not self: - return '' + return u'' return unicode(self).replace('"', '"') \ .replace('>', '>') \ .replace('<', '<') \ @@ -253,7 +310,7 @@ """ text = unicode(self.striptags().stripentities()) if not keeplinebreaks: - text = text.replace('\n', ' ') + text = text.replace(u'\n', u' ') return text def sanitize(self): @@ -273,12 +330,58 @@ class Namespace(object): + """Utility class creating and testing elements with a namespace. + + Internally, namespace URIs are encoded in the `QName` of any element or + attribute, the namespace URI being enclosed in curly braces. This class + helps create and test these strings. + + A `Namespace` object is instantiated with the namespace URI. + + >>> html = Namespace('http://www.w3.org/1999/xhtml') + >>> html + + >>> html.uri + u'http://www.w3.org/1999/xhtml' + + The `Namespace` object can than be used to generate `QName` objects with + that namespace: + + >>> html.body + u'{http://www.w3.org/1999/xhtml}body' + >>> html.body.localname + u'body' + >>> html.body.namespace + u'http://www.w3.org/1999/xhtml' + + The same works using item access notation, which is useful for element or + attribute names that are not valid Python identifiers: + + >>> html['body'] + u'{http://www.w3.org/1999/xhtml}body' + + A `Namespace` object can also be used to test whether a specific `QName` + belongs to that namespace using the `in` operator: + + >>> qname = html.body + >>> qname in html + True + >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2') + False + """ + def __init__(self, uri): + self.uri = unicode(uri) - def __init__(self, uri): - self.uri = uri + def __contains__(self, qname): + return qname.namespace == self.uri + + def __eq__(self, other): + if isinstance(other, Namespace): + return self.uri == other.uri + return self.uri == other def __getitem__(self, name): - return QName(self.uri + '}' + name) + return QName(self.uri + u'}' + name) __getattr__ = __getitem__ @@ -286,10 +389,10 @@ return '' % self.uri def __str__(self): - return self.uri + return self.uri.encode('utf-8') def __unicode__(self): - return unicode(self.uri) + return self.uri class QName(unicode): @@ -299,6 +402,21 @@ the element or attribute, in the form `{namespace}localname`. The namespace URI can be obtained through the additional `namespace` attribute, while the local name can be accessed through the `localname` attribute. + + >>> qname = QName('foo') + >>> qname + u'foo' + >>> qname.localname + u'foo' + >>> qname.namespace + + >>> qname = QName('http://www.w3.org/1999/xhtml}body') + >>> qname + u'{http://www.w3.org/1999/xhtml}body' + >>> qname.localname + u'body' + >>> qname.namespace + u'http://www.w3.org/1999/xhtml' """ __slots__ = ['namespace', 'localname'] @@ -306,13 +424,13 @@ if isinstance(qname, QName): return qname - parts = qname.split('}', 1) - if qname.find('}') > 0: - self = unicode.__new__(cls, '{' + qname) - self.namespace = parts[0] - self.localname = parts[1] + parts = qname.split(u'}', 1) + if qname.find(u'}') > 0: + self = unicode.__new__(cls, u'{' + qname) + self.namespace = unicode(parts[0]) + self.localname = unicode(parts[1]) else: self = unicode.__new__(cls, qname) self.namespace = None - self.localname = qname + self.localname = unicode(qname) return self diff --git a/markup/filters.py b/markup/filters.py --- a/markup/filters.py +++ b/markup/filters.py @@ -19,7 +19,7 @@ from sets import ImmutableSet as frozenset import re -from markup.core import Attributes, Markup, Stream +from markup.core import Attributes, Markup, Namespace, Stream from markup.path import Path __all__ = ['IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer'] @@ -30,7 +30,7 @@ (see http://www.w3.org/TR/xinclude/) in templates. """ - _NAMESPACE = 'http://www.w3.org/2001/XInclude' + NAMESPACE = Namespace('http://www.w3.org/2001/XInclude') def __init__(self, loader): """Initialize the filter. @@ -57,7 +57,7 @@ for kind, data, pos in stream: - if kind is Stream.START and data[0].namespace == self._NAMESPACE \ + if kind is Stream.START and data[0] in self.NAMESPACE \ and not in_fallback: tag, attrib = data if tag.localname == 'include': @@ -67,7 +67,7 @@ in_fallback = True fallback_stream = [] - elif kind is Stream.END and data.namespace == self._NAMESPACE: + elif kind is Stream.END and data in self.NAMESPACE: if data.localname == 'include': try: if not include_href: @@ -93,7 +93,7 @@ elif in_fallback: fallback_stream.append((kind, data, pos)) - elif kind is Stream.START_NS and data[1] == self._NAMESPACE: + elif kind is Stream.START_NS and data[1] == self.NAMESPACE: ns_prefixes.append(data[0]) elif kind is Stream.END_NS and data in ns_prefixes: diff --git a/markup/output.py b/markup/output.py --- a/markup/output.py +++ b/markup/output.py @@ -20,7 +20,7 @@ except NameError: from sets import ImmutableSet as frozenset -from markup.core import Markup, QName, Stream +from markup.core import Markup, Namespace, QName, Stream from markup.filters import WhitespaceFilter __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] @@ -121,7 +121,7 @@


""" - NAMESPACE = 'http://www.w3.org/1999/xhtml' + NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', @@ -146,11 +146,11 @@ elif kind is Stream.START: tag, attrib = data - if tag.namespace and tag.namespace != self.NAMESPACE: + if tag.namespace and tag not in self.NAMESPACE: continue # not in the HTML namespace, so don't emit buf = ['<', tag.localname] for attr, value in attrib: - if attr.namespace and attr.namespace != self.NAMESPACE: + if attr.namespace and attr not in self.NAMESPACE: continue # not in the HTML namespace, so don't emit if attr.localname in self._BOOLEAN_ATTRS: if value: @@ -168,7 +168,7 @@ elif kind is Stream.END: tag = data - if tag.namespace and tag.namespace != self.NAMESPACE: + if tag.namespace and tag not in self.NAMESPACE: continue # not in the HTML namespace, so don't emit yield Markup('' % tag.localname) diff --git a/markup/template.py b/markup/template.py --- a/markup/template.py +++ b/markup/template.py @@ -46,7 +46,7 @@ import re from StringIO import StringIO -from markup.core import Attributes, Stream, StreamEventKind +from markup.core import Attributes, Namespace, Stream, StreamEventKind from markup.eval import Expression from markup.filters import IncludeFilter from markup.input import HTML, XMLParser, XML @@ -558,7 +558,7 @@ """Can parse a template and transform it into the corresponding output based on context data. """ - NAMESPACE = 'http://purl.org/kid/ns#' + NAMESPACE = Namespace('http://purl.org/kid/ns#') EXPR = StreamEventKind('EXPR') # an expression SUB = StreamEventKind('SUB') # a "subprogram" @@ -625,7 +625,7 @@ directives = [] new_attrib = [] for name, value in attrib: - if name.namespace == self.NAMESPACE: + if name in self.NAMESPACE: cls = self._dir_by_name.get(name.localname) if cls is None: raise BadDirectiveError(name, self.filename, pos[0]) diff --git a/markup/tests/core.py b/markup/tests/core.py --- a/markup/tests/core.py +++ b/markup/tests/core.py @@ -15,7 +15,7 @@ from HTMLParser import HTMLParseError import unittest -from markup.core import Markup, escape, unescape +from markup.core import * class MarkupTestCase(unittest.TestCase): @@ -183,6 +183,7 @@ def suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(MarkupTestCase, 'test')) + suite.addTest(doctest.DocTestSuite(Markup.__module__)) return suite if __name__ == '__main__':