diff genshi/output.py @ 820:1837f39efd6f experimental-inline

Sync (old) experimental inline branch with trunk@1027.
author cmlenz
date Wed, 11 Mar 2009 17:51:06 +0000
parents 0742f421caba
children de82830f8816
line wrap: on
line diff
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2006-2007 Edgewall Software
+# Copyright (C) 2006-2008 Edgewall Software
 # All rights reserved.
 #
 # This software is licensed as described in the file COPYING, which
@@ -16,10 +16,6 @@
 """
 
 from itertools import chain
-try:
-    frozenset
-except NameError:
-    from sets import ImmutableSet as frozenset
 import re
 
 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
@@ -30,7 +26,7 @@
            'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
 __docformat__ = 'restructuredtext en'
 
-def encode(iterator, method='xml', encoding='utf-8'):
+def encode(iterator, method='xml', encoding='utf-8', out=None):
     """Encode serializer output into a string.
     
     :param iterator: the iterator returned from serializing a stream (basically
@@ -39,16 +35,27 @@
                    representable in the specified encoding are treated
     :param encoding: how the output string should be encoded; if set to `None`,
                      this method returns a `unicode` object
-    :return: a string or unicode object (depending on the `encoding` parameter)
+    :param out: a file-like object that the output should be written to
+                instead of being returned as one big string; note that if
+                this is a file or socket (or similar), the `encoding` must
+                not be `None` (that is, the output must be encoded)
+    :return: a `str` or `unicode` object (depending on the `encoding`
+             parameter), or `None` if the `out` parameter is provided
+    
     :since: version 0.4.1
+    :note: Changed in 0.5: added the `out` parameter
     """
-    output = u''.join(list(iterator))
     if encoding is not None:
         errors = 'replace'
         if method != 'text' and not isinstance(method, TextSerializer):
             errors = 'xmlcharrefreplace'
-        return output.encode(encoding, errors)
-    return output
+        _encode = lambda string: string.encode(encoding, errors)
+    else:
+        _encode = lambda string: string
+    if out is None:
+        return _encode(u''.join(list(iterator)))
+    for chunk in iterator:
+        out.write(_encode(chunk))
 
 def get_serializer(method='xml', **kwargs):
     """Return a serializer object for the given method.
@@ -103,6 +110,25 @@
     )
     XHTML = XHTML_STRICT
 
+    XHTML11 = (
+        'html', '-//W3C//DTD XHTML 1.1//EN',
+        'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+    )
+
+    SVG_FULL = (
+        'svg', '-//W3C//DTD SVG 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
+    )
+    SVG_BASIC = (
+        'svg', '-//W3C//DTD SVG Basic 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
+    )
+    SVG_TINY = (
+        'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
+    )
+    SVG = SVG_FULL
+
     def get(cls, name):
         """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
         declaration for the specified name.
@@ -110,11 +136,15 @@
         The following names are recognized in this version:
          * "html" or "html-strict" for the HTML 4.01 strict DTD
          * "html-transitional" for the HTML 4.01 transitional DTD
-         * "html-transitional" for the HTML 4.01 frameset DTD
+         * "html-frameset" for the HTML 4.01 frameset DTD
          * "html5" for the ``DOCTYPE`` proposed for HTML5
          * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
          * "xhtml-transitional" for the XHTML 1.0 transitional DTD
          * "xhtml-frameset" for the XHTML 1.0 frameset DTD
+         * "xhtml11" for the XHTML 1.1 DTD
+         * "svg" or "svg-full" for the SVG 1.1 DTD
+         * "svg-basic" for the SVG Basic 1.1 DTD
+         * "svg-tiny" for the SVG Tiny 1.1 DTD
         
         :param name: the name of the ``DOCTYPE``
         :return: the ``(name, pubid, sysid)`` tuple for the requested
@@ -129,6 +159,10 @@
             'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
             'xhtml-transitional': cls.XHTML_TRANSITIONAL,
             'xhtml-frameset': cls.XHTML_FRAMESET,
+            'xhtml11': cls.XHTML11,
+            'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
+            'svg-basic': cls.SVG_BASIC,
+            'svg-tiny': cls.SVG_TINY
         }.get(name.lower())
     get = classmethod(get)
 
@@ -156,21 +190,17 @@
                                  stripped from the output
         :note: Changed in 0.4.2: The  `doctype` parameter can now be a string.
         """
-        self.preamble = []
-        if doctype:
-            if isinstance(doctype, basestring):
-                doctype = DocType.get(doctype)
-            self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
         self.filters = [EmptyTagFilter()]
         if strip_whitespace:
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
         self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
 
     def __call__(self, stream):
         have_decl = have_doctype = False
         in_cdata = False
 
-        stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, pos in stream:
@@ -217,7 +247,7 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf), *filter(None, data))
+                yield Markup(u''.join(buf)) % filter(None, data)
                 have_doctype = True
 
             elif kind is START_CDATA:
@@ -253,7 +283,7 @@
     ])
 
     def __init__(self, doctype=None, strip_whitespace=True,
-                 namespace_prefixes=None):
+                 namespace_prefixes=None, drop_xml_decl=True):
         super(XHTMLSerializer, self).__init__(doctype, False)
         self.filters = [EmptyTagFilter()]
         if strip_whitespace:
@@ -261,14 +291,17 @@
         namespace_prefixes = namespace_prefixes or {}
         namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
         self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.drop_xml_decl = drop_xml_decl
 
     def __call__(self, stream):
         boolean_attrs = self._BOOLEAN_ATTRS
         empty_elems = self._EMPTY_ELEMS
-        have_doctype = False
+        drop_xml_decl = self.drop_xml_decl
+        have_decl = have_doctype = False
         in_cdata = False
 
-        stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, pos in stream:
@@ -279,6 +312,10 @@
                 for attr, value in attrib:
                     if attr in boolean_attrs:
                         value = attr
+                    elif attr == u'xml:lang' and u'lang' not in attrib:
+                        buf += [' lang="', escape(value), '"']
+                    elif attr == u'xml:space':
+                        continue
                     buf += [' ', attr, '="', escape(value), '"']
                 if kind is EMPTY:
                     if tag in empty_elems:
@@ -311,9 +348,21 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf), *filter(None, data))
+                yield Markup(u''.join(buf)) % filter(None, data)
                 have_doctype = True
 
+            elif kind is XML_DECL and not have_decl and not drop_xml_decl:
+                version, encoding, standalone = data
+                buf = ['<?xml version="%s"' % version]
+                if encoding:
+                    buf.append(' encoding="%s"' % encoding)
+                if standalone != -1:
+                    standalone = standalone and 'yes' or 'no'
+                    buf.append(' standalone="%s"' % standalone)
+                buf.append('?>\n')
+                yield Markup(u''.join(buf))
+                have_decl = True
+
             elif kind is START_CDATA:
                 yield Markup('<![CDATA[')
                 in_cdata = True
@@ -354,7 +403,11 @@
         if strip_whitespace:
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
                                                  self._NOESCAPE_ELEMS))
-        self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))
+        self.filters.append(NamespaceFlattener(prefixes={
+            'http://www.w3.org/1999/xhtml': ''
+        }))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
 
     def __call__(self, stream):
         boolean_attrs = self._BOOLEAN_ATTRS
@@ -363,7 +416,6 @@
         have_doctype = False
         noescape = False
 
-        stream = chain(self.preamble, stream)
         for filter_ in self.filters:
             stream = filter_(stream)
         for kind, data, pos in stream:
@@ -375,7 +427,10 @@
                     if attr in boolean_attrs:
                         if value:
                             buf += [' ', attr]
-                    else:
+                    elif ':' in attr:
+                        if attr == 'xml:lang' and u'lang' not in attrib:
+                            buf += [' lang="', escape(value), '"']
+                    elif attr != 'xmlns':
                         buf += [' ', attr, '="', escape(value), '"']
                 buf.append('>')
                 if kind is EMPTY:
@@ -408,7 +463,7 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf), *filter(None, data))
+                yield Markup(u''.join(buf)) % filter(None, data)
                 have_doctype = True
 
             elif kind is PI:
@@ -429,20 +484,34 @@
     <Hello!>
 
     If text events contain literal markup (instances of the `Markup` class),
-    tags or entities are stripped from the output:
+    that markup is by default passed through unchanged:
     
-    >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>'))
-    >>> print elem
-    <div><a href="foo">Hello!</a><br/></div>
-    >>> print ''.join(TextSerializer()(elem.generate()))
-    Hello!
+    >>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
+    >>> print elem.generate().render(TextSerializer)
+    <a href="foo">Hello &amp; Bye!</a><br/>
+    
+    You can use the ``strip_markup`` to change this behavior, so that tags and
+    entities are stripped from the output (or in the case of entities,
+    replaced with the equivalent character):
+
+    >>> print elem.generate().render(TextSerializer, strip_markup=True)
+    Hello & Bye!
     """
 
+    def __init__(self, strip_markup=False):
+        """Create the serializer.
+        
+        :param strip_markup: whether markup (tags and encoded characters) found
+                             in the text should be removed
+        """
+        self.strip_markup = strip_markup
+
     def __call__(self, stream):
+        strip_markup = self.strip_markup
         for event in stream:
             if event[0] is TEXT:
                 data = event[1]
-                if type(data) is Markup:
+                if strip_markup and type(data) is Markup:
                     data = data.striptags().stripentities()
                 yield unicode(data)
 
@@ -586,60 +655,6 @@
                 yield kind, data, pos
 
 
-class NamespaceStripper(object):
-    r"""Stream filter that removes all namespace information from a stream, and
-    optionally strips out all tags not in a given namespace.
-    
-    :param namespace: the URI of the namespace that should not be stripped. If
-                      not set, only elements with no namespace are included in
-                      the output.
-    
-    >>> from genshi.input import XML
-    >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
-    ...   <two:item/>
-    ... </doc>''')
-    >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml):
-    ...     print kind, repr(data)
-    START (u'doc', Attrs())
-    TEXT u'\n  '
-    TEXT u'\n'
-    END u'doc'
-    """
-
-    def __init__(self, namespace=None):
-        if namespace is not None:
-            self.namespace = Namespace(namespace)
-        else:
-            self.namespace = {}
-
-    def __call__(self, stream):
-        namespace = self.namespace
-
-        for kind, data, pos in stream:
-
-            if kind is START or kind is EMPTY:
-                tag, attrs = data
-                if tag.namespace and tag not in namespace:
-                    continue
-
-                new_attrs = []
-                for attr, value in attrs:
-                    if not attr.namespace or attr in namespace:
-                        new_attrs.append((attr, value))
-
-                data = tag.localname, Attrs(new_attrs)
-
-            elif kind is END:
-                if data.namespace and data not in namespace:
-                    continue
-                data = data.localname
-
-            elif kind is START_NS or kind is END_NS:
-                continue
-
-            yield kind, data, pos
-
-
 class WhitespaceFilter(object):
     """A filter that removes extraneous ignorable white space from the
     stream.
@@ -714,3 +729,33 @@
 
                 if kind:
                     yield kind, data, pos
+
+
+class DocTypeInserter(object):
+    """A filter that inserts the DOCTYPE declaration in the correct location,
+    after the XML declaration.
+    """
+    def __init__(self, doctype):
+        """Initialize the filter.
+
+        :param doctype: DOCTYPE as a string or DocType object.
+        """
+        if isinstance(doctype, basestring):
+            doctype = DocType.get(doctype)
+        self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
+
+    def __call__(self, stream):
+        doctype_inserted = False
+        for kind, data, pos in stream:
+            if not doctype_inserted:
+                doctype_inserted = True
+                if kind is XML_DECL:
+                    yield (kind, data, pos)
+                    yield self.doctype_event
+                    continue
+                yield self.doctype_event
+
+            yield (kind, data, pos)
+
+        if not doctype_inserted:
+            yield self.doctype_event
Copyright (C) 2012-2017 Edgewall Software