changeset 200:5861f4446c26 trunk

Add serialization to plain text, based on cboos' patch. Closes #41.
author cmlenz
date Fri, 25 Aug 2006 11:14:04 +0000
parents 71ce4b5335ba
children c5e0a1c86173
files ChangeLog markup/core.py markup/output.py
diffstat 3 files changed, 45 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -13,6 +13,7 @@
    to multiple names, and semicolons inside string literals are treated as
    expected.
  * Generator expressions can now be used in template expressions (ticket #16).
+ * Added serialization to plain text (ticket #41).
 
 
 Version 0.2
--- a/markup/core.py
+++ b/markup/core.py
@@ -81,8 +81,8 @@
         """Return a string representation of the stream.
         
         @param method: determines how the stream is serialized; can be either
-                       "xml", "xhtml", or "html", or a custom `Serializer`
-                       subclass
+                       "xml", "xhtml", "html", "text", or a custom serializer
+                       class
         @param encoding: how the output string should be encoded; if set to
                          `None`, this method returns a `unicode` object
 
@@ -92,7 +92,10 @@
         generator = self.serialize(method=method, **kwargs)
         output = u''.join(list(generator))
         if encoding is not None:
-            return output.encode(encoding, 'xmlcharrefreplace')
+            errors = 'replace'
+            if method != 'text':
+                errors = 'xmlcharrefreplace'
+            return output.encode(encoding, errors)
         return output
 
     def select(self, path):
@@ -113,7 +116,8 @@
         string.
         
         @param method: determines how the stream is serialized; can be either
-                       "xml", "xhtml", or "html", or a custom serializer class
+                       "xml", "xhtml", "html", "text", or a custom serializer
+                       class
 
         Any additional keyword arguments are passed to the serializer, and thus
         depend on the `method` parameter value.
@@ -123,7 +127,8 @@
         if isinstance(method, basestring):
             cls = {'xml':   output.XMLSerializer,
                    'xhtml': output.XHTMLSerializer,
-                   'html':  output.HTMLSerializer}[method]
+                   'html':  output.HTMLSerializer,
+                   'text':  output.TextSerializer}[method]
         serialize = cls(**kwargs)
         return serialize(_ensure(self))
 
@@ -300,8 +305,7 @@
             return unichr(ref)
         else: # character entity
             ref = match.group(2)
-            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
-                                           'quot'):
+            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
                 return '&%s;' % ref
             try:
                 codepoint = htmlentitydefs.name2codepoint[ref]
--- a/markup/output.py
+++ b/markup/output.py
@@ -26,7 +26,8 @@
 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
                         END_CDATA, PI, COMMENT, XML_NAMESPACE
 
-__all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
+__all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
+           'TextSerializer']
 
 
 class DocType(object):
@@ -398,6 +399,37 @@
                 yield Markup('<?%s %s?>' % data)
 
 
+class TextSerializer(object):
+    """Produces plain text from an event stream.
+    
+    Only text events are included in the output. Unlike the other serializer,
+    special XML characters are not escaped:
+    
+    >>> from markup.builder import tag
+    >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
+    >>> print elem
+    <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
+    >>> print ''.join(TextSerializer()(elem.generate()))
+    <Hello!>
+
+    If text events contain literal markup (instances of the `Markup` class),
+    tags or entities are stripped from the output:
+    
+    >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>'))
+    >>> print elem
+    <div><a href="foo">Hello!</a><br/></div>
+    >>> print ''.join(TextSerializer()(elem.generate()))
+    Hello!
+    """
+
+    def __call__(self, stream):
+        for kind, data, pos in stream:
+            if kind is TEXT:
+                if type(data) is Markup:
+                    data = data.striptags().stripentities()
+                yield data
+
+
 class WhitespaceFilter(object):
     """A filter that removes extraneous ignorable white space from the
     stream."""
Copyright (C) 2012-2017 Edgewall Software