Mercurial > genshi > mirror

--- a/markup/core.py
+++ b/markup/core.py
@@ -68,7 +68,8 @@
         """Return a string representation of the stream.

         @param method: determines how the stream is serialized; can be either
-                       'xml' or 'html', or a custom `Serializer` subclass
+                       "xml", "xhtml", or "html", or a custom `Serializer`
+                       subclass
         @param encoding: how the output string should be encoded; if set to
                          `None`, this method returns a `unicode` object

@@ -99,7 +100,8 @@
         string.

         @param method: determines how the stream is serialized; can be either
-                       'xml' or 'html', or a custom `Serializer` subclass
+                       "xml", "xhtml", or "html", or a custom `Serializer`
+                       subclass
         @param filters: list of filters to apply to the stream before
                         serialization. The default is to apply whitespace
                         reduction using `markup.filters.WhitespaceFilter`.
@@ -108,8 +110,9 @@
         from markup import output
         cls = method
         if isinstance(method, basestring):
-            cls = {'xml': output.XMLSerializer,
-                   'html': output.HTMLSerializer}[method]
+            cls = {'xml':   output.XMLSerializer,
+                   'xhtml': output.XHTMLSerializer,
+                   'html':  output.HTMLSerializer}[method]
         else:
             assert issubclass(cls, output.Serializer)
         serializer = cls(**kwargs)
--- a/markup/output.py
+++ b/markup/output.py
@@ -155,13 +155,13 @@
                 yield Markup('<!--%s-->' % data)


-class HTMLSerializer(Serializer):
-    """Produces HTML text from an event stream.
+class XHTMLSerializer(XMLSerializer):
+    """Produces XHTML text from an event stream.

     >>> from markup.builder import tag
     >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
-    >>> print ''.join(HTMLSerializer().serialize(elem.generate()))
-    <div><a href="foo"></a><br><hr noshade></div>
+    >>> print ''.join(XHTMLSerializer().serialize(elem.generate()))
+    <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
     """

     NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
@@ -173,17 +173,6 @@
                                 'defer', 'disabled', 'ismap', 'multiple',
                                 'nohref', 'noresize', 'noshade', 'nowrap'])

-    def __init__(self, doctype=None):
-        """Initialize the HTML serializer.
-
-        @param doctype: a `(name, pubid, sysid)` tuple that represents the
-            DOCTYPE declaration that should be included at the top of the
-            generated output
-        """
-        self.preamble = []
-        if doctype:
-            self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
-
     def serialize(self, stream):
         have_doctype = False
         ns_mapping = {}
@@ -215,6 +204,82 @@
                 if tag.namespace and tag not in self.NAMESPACE:
                     continue # not in the HTML namespace, so don't emit
                 buf = ['<', tag.localname]
+
+                for attr, value in attrib:
+                    if attr.namespace and attr not in self.NAMESPACE:
+                        continue # not in the HTML namespace, so don't emit
+                    if attr.localname in self._BOOLEAN_ATTRS:
+                        if value:
+                            buf.append(' %s="%s"' % (attr.localname, attr.localname))
+                    else:
+                        buf.append(' %s="%s"' % (attr.localname, escape(value)))
+
+                if tag.localname in self._EMPTY_ELEMS:
+                    kind, data, pos = stream.next()
+                    if kind is END:
+                        buf.append(' />')
+                    else:
+                        buf.append('>')
+                        stream.pushback((kind, data, pos))
+                else:
+                    buf.append('>')
+
+                yield Markup(''.join(buf))
+
+            elif kind is END:
+                tag = data
+                if tag.namespace and tag not in self.NAMESPACE:
+                    continue # not in the HTML namespace, so don't emit
+                yield Markup('</%s>' % tag.localname)
+
+            elif kind is TEXT:
+                yield escape(data, quotes=False)
+
+            elif kind is COMMENT:
+                yield Markup('<!--%s-->' % data)
+
+
+class HTMLSerializer(XHTMLSerializer):
+    """Produces HTML text from an event stream.
+
+    >>> from markup.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print ''.join(HTMLSerializer().serialize(elem.generate()))
+    <div><a href="foo"></a><br><hr noshade></div>
+    """
+
+    def serialize(self, stream):
+        have_doctype = False
+        ns_mapping = {}
+
+        stream = _PushbackIterator(chain(self.preamble, stream))
+        for kind, data, pos in stream:
+
+            if kind is DOCTYPE:
+                if not have_doctype:
+                    name, pubid, sysid = data
+                    buf = ['<!DOCTYPE %s']
+                    if pubid:
+                        buf.append(' PUBLIC "%s"')
+                    elif sysid:
+                        buf.append(' SYSTEM')
+                    if sysid:
+                        buf.append(' "%s"')
+                    buf.append('>\n')
+                    yield Markup(''.join(buf), *filter(None, data))
+                    have_doctype = True
+
+            elif kind is START_NS:
+                prefix, uri = data
+                if uri not in ns_mapping:
+                    ns_mapping[uri] = prefix
+
+            elif kind is START:
+                tag, attrib = data
+                if tag.namespace and tag not in self.NAMESPACE:
+                    continue # not in the HTML namespace, so don't emit
+                buf = ['<', tag.localname]
+
                 for attr, value in attrib:
                     if attr.namespace and attr not in self.NAMESPACE:
                         continue # not in the HTML namespace, so don't emit