cmlenz@1: # -*- coding: utf-8 -*- cmlenz@1: # cmlenz@1: # Copyright (C) 2006 Christopher Lenz cmlenz@1: # All rights reserved. cmlenz@1: # cmlenz@1: # This software is licensed as described in the file COPYING, which cmlenz@1: # you should have received as part of this distribution. The terms cmlenz@1: # are also available at http://trac.edgewall.com/license.html. cmlenz@1: # cmlenz@1: # This software consists of voluntary contributions made by many cmlenz@1: # individuals. For the exact contribution history, see the revision cmlenz@1: # history and logs, available at http://projects.edgewall.com/trac/. cmlenz@1: cmlenz@1: """This module provides different kinds of serialization methods for XML event cmlenz@1: streams. cmlenz@1: """ cmlenz@1: cmlenz@1: try: cmlenz@1: frozenset cmlenz@1: except NameError: cmlenz@1: from sets import ImmutableSet as frozenset cmlenz@1: cmlenz@18: from markup.core import Markup, Namespace, QName, Stream cmlenz@1: from markup.filters import WhitespaceFilter cmlenz@1: cmlenz@1: __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] cmlenz@1: cmlenz@1: cmlenz@1: class Serializer(object): cmlenz@1: """Base class for serializers.""" cmlenz@1: cmlenz@1: def serialize(self, stream): cmlenz@1: raise NotImplementedError cmlenz@1: cmlenz@1: cmlenz@1: class XMLSerializer(Serializer): cmlenz@1: """Produces XML text from an event stream. cmlenz@1: cmlenz@1: >>> from markup.builder import tag cmlenz@1: >>> elem = tag.DIV(tag.A(href='foo'), tag.BR, tag.HR(noshade=True)) cmlenz@1: >>> print ''.join(XMLSerializer().serialize(elem.generate())) cmlenz@1:


cmlenz@1: """ cmlenz@1: cmlenz@1: def serialize(self, stream): cmlenz@1: ns_attrib = [] cmlenz@1: ns_mapping = {} cmlenz@1: cmlenz@1: stream = PushbackIterator(stream) cmlenz@1: for kind, data, pos in stream: cmlenz@1: cmlenz@1: if kind is Stream.DOCTYPE: cmlenz@1: # FIXME: what if there's no system or public ID in the input? cmlenz@1: yield Markup('\n' % data) cmlenz@1: cmlenz@1: elif kind is Stream.START_NS: cmlenz@1: prefix, uri = data cmlenz@1: if uri not in ns_mapping: cmlenz@1: ns_mapping[uri] = prefix cmlenz@1: if not prefix: cmlenz@1: ns_attrib.append((QName('xmlns'), uri)) cmlenz@1: else: cmlenz@1: ns_attrib.append((QName('xmlns:%s' % prefix), uri)) cmlenz@1: cmlenz@1: elif kind is Stream.START: cmlenz@1: tag, attrib = data cmlenz@1: cmlenz@1: tagname = tag.localname cmlenz@1: if tag.namespace: cmlenz@1: try: cmlenz@1: prefix = ns_mapping[tag.namespace] cmlenz@1: if prefix: cmlenz@1: tagname = prefix + ':' + tag.localname cmlenz@1: except KeyError: cmlenz@1: ns_attrib.append((QName('xmlns'), tag.namespace)) cmlenz@1: buf = ['<', tagname] cmlenz@1: cmlenz@1: if ns_attrib: cmlenz@1: attrib.extend(ns_attrib) cmlenz@1: ns_attrib = [] cmlenz@1: for attr, value in attrib: cmlenz@1: attrname = attr.localname cmlenz@1: if attr.namespace: cmlenz@1: try: cmlenz@1: prefix = ns_mapping[attr.namespace] cmlenz@1: except KeyError: cmlenz@1: # FIXME: synthesize a prefix for the attribute? cmlenz@1: prefix = '' cmlenz@1: if prefix: cmlenz@1: attrname = prefix + ':' + attrname cmlenz@1: buf.append(' %s="%s"' % (attrname, Markup.escape(value))) cmlenz@1: cmlenz@1: kind, data, pos = stream.next() cmlenz@1: if kind is Stream.END: cmlenz@1: buf.append('/>') cmlenz@1: else: cmlenz@1: buf.append('>') cmlenz@1: stream.pushback((kind, data, pos)) cmlenz@1: cmlenz@1: yield Markup(''.join(buf)) cmlenz@1: cmlenz@1: elif kind is Stream.END: cmlenz@1: tag = data cmlenz@1: tagname = tag.localname cmlenz@1: if tag.namespace: cmlenz@19: try: cmlenz@19: prefix = ns_mapping[tag.namespace] cmlenz@19: if prefix: cmlenz@19: tagname = prefix + ':' + tag.localname cmlenz@19: except KeyError: cmlenz@19: pass cmlenz@1: yield Markup('' % tagname) cmlenz@1: cmlenz@1: elif kind is Stream.TEXT: cmlenz@1: yield Markup.escape(data, quotes=False) cmlenz@1: cmlenz@1: cmlenz@1: class HTMLSerializer(Serializer): cmlenz@1: """Produces HTML text from an event stream. cmlenz@1: cmlenz@1: >>> from markup.builder import tag cmlenz@1: >>> elem = tag.DIV(tag.A(href='foo'), tag.BR, tag.HR(noshade=True)) cmlenz@1: >>> print ''.join(HTMLSerializer().serialize(elem.generate())) cmlenz@1:


cmlenz@1: """ cmlenz@1: cmlenz@18: NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') cmlenz@1: cmlenz@1: _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', cmlenz@1: 'hr', 'img', 'input', 'isindex', 'link', 'meta', cmlenz@1: 'param']) cmlenz@1: _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', cmlenz@1: 'defer', 'disabled', 'ismap', 'multiple', cmlenz@1: 'nohref', 'noresize', 'noshade', 'nowrap']) cmlenz@1: cmlenz@1: def serialize(self, stream): cmlenz@1: ns_mapping = {} cmlenz@1: cmlenz@1: stream = PushbackIterator(stream) cmlenz@1: for kind, data, pos in stream: cmlenz@1: cmlenz@1: if kind is Stream.DOCTYPE: cmlenz@1: yield Markup('\n' % data) cmlenz@1: cmlenz@1: elif kind is Stream.START_NS: cmlenz@1: prefix, uri = data cmlenz@1: if uri not in ns_mapping: cmlenz@1: ns_mapping[uri] = prefix cmlenz@1: cmlenz@1: elif kind is Stream.START: cmlenz@1: tag, attrib = data cmlenz@18: if tag.namespace and tag not in self.NAMESPACE: cmlenz@1: continue # not in the HTML namespace, so don't emit cmlenz@1: buf = ['<', tag.localname] cmlenz@1: for attr, value in attrib: cmlenz@18: if attr.namespace and attr not in self.NAMESPACE: cmlenz@1: continue # not in the HTML namespace, so don't emit cmlenz@1: if attr.localname in self._BOOLEAN_ATTRS: cmlenz@1: if value: cmlenz@1: buf.append(' %s' % attr.localname) cmlenz@1: else: cmlenz@1: buf.append(' %s="%s"' % (attr.localname, cmlenz@1: Markup.escape(value))) cmlenz@1: cmlenz@1: if tag.localname in self._EMPTY_ELEMS: cmlenz@1: kind, data, pos = stream.next() cmlenz@1: if kind is not Stream.END: cmlenz@1: stream.pushback((kind, data, pos)) cmlenz@1: cmlenz@1: yield Markup(''.join(buf + ['>'])) cmlenz@1: cmlenz@1: elif kind is Stream.END: cmlenz@1: tag = data cmlenz@18: if tag.namespace and tag not in self.NAMESPACE: cmlenz@1: continue # not in the HTML namespace, so don't emit cmlenz@1: yield Markup('' % tag.localname) cmlenz@1: cmlenz@1: elif kind is Stream.TEXT: cmlenz@1: yield Markup.escape(data, quotes=False) cmlenz@1: cmlenz@1: cmlenz@1: class PushbackIterator(object): cmlenz@1: """A simple wrapper for iterators that allows pushing items back on the cmlenz@1: queue via the `pushback()` method. cmlenz@1: cmlenz@1: That can effectively be used to peek at the next item.""" cmlenz@1: __slots__ = ['iterable', 'buf'] cmlenz@1: cmlenz@1: def __init__(self, iterable): cmlenz@1: self.iterable = iter(iterable) cmlenz@1: self.buf = [] cmlenz@1: cmlenz@1: def __iter__(self): cmlenz@1: return self cmlenz@1: cmlenz@1: def next(self): cmlenz@1: if self.buf: cmlenz@1: return self.buf.pop(0) cmlenz@1: return self.iterable.next() cmlenz@1: cmlenz@1: def pushback(self, item): cmlenz@1: self.buf.append(item)