diff markup/core.py @ 113:d10fbba1d5e0 trunk

Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.
author cmlenz
date Mon, 31 Jul 2006 23:00:06 +0000
parents 2368c3becc52
children 4c4e81d12649
line wrap: on
line diff
--- a/markup/core.py
+++ b/markup/core.py
@@ -64,6 +64,14 @@
     def __iter__(self):
         return iter(self.events)
 
+    def filter(self, filter):
+        """Apply a filter to the stream.
+        
+        This method returns a new stream with the given filter applied. The
+        filter must be a callable that accepts the stream object as parameter.
+        """
+        return Stream(filter(html))
+
     def render(self, method='xml', encoding='utf-8', filters=None, **kwargs):
         """Return a string representation of the stream.
         
@@ -238,6 +246,39 @@
         return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
 
 
+def stripentities(text, keepxmlentities=False):
+    """Return a copy of the given text with any character or numeric entities
+    replaced by the equivalent UTF-8 characters.
+    
+    If the `keepxmlentities` parameter is provided and evaluates to `True`,
+    the core XML entities (&, ', >, < and ") are not
+    stripped.
+    """
+    def _replace_entity(match):
+        if match.group(1): # numeric entity
+            ref = match.group(1)
+            if ref.startswith('x'):
+                ref = int(ref[1:], 16)
+            else:
+                ref = int(ref, 10)
+            return unichr(ref)
+        else: # character entity
+            ref = match.group(2)
+            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
+                                           'quot'):
+                return '&%s;' % ref
+            try:
+                codepoint = htmlentitydefs.name2codepoint[ref]
+                return unichr(codepoint)
+            except KeyError:
+                if keepxmlentities:
+                    return '&%s;' % ref
+                else:
+                    return ref
+    return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
+                  _replace_entity, text)
+
+
 class Markup(unicode):
     """Marks a string as being safe for inclusion in HTML/XML output without
     needing to be escaped.
@@ -276,29 +317,7 @@
         the core XML entities (&, ', >, < and ") are not
         stripped.
         """
-        def _replace_entity(match):
-            if match.group(1): # numeric entity
-                ref = match.group(1)
-                if ref.startswith('x'):
-                    ref = int(ref[1:], 16)
-                else:
-                    ref = int(ref, 10)
-                return unichr(ref)
-            else: # character entity
-                ref = match.group(2)
-                if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
-                                               'quot'):
-                    return '&%s;' % ref
-                try:
-                    codepoint = htmlentitydefs.name2codepoint[ref]
-                    return unichr(codepoint)
-                except KeyError:
-                    if keepxmlentities:
-                        return '&%s;' % ref
-                    else:
-                        return ref
-        return Markup(re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
-                             _replace_entity, self))
+        return Markup(stripentities(self, keepxmlentities=keepxmlentities))
 
     def striptags(self):
         """Return a copy of the text with all XML/HTML tags removed."""
@@ -342,12 +361,6 @@
             text = text.replace(u'\n', u' ')
         return text
 
-    def sanitize(self):
-        from markup.filters import HTMLSanitizer
-        from markup.input import HTMLParser
-        text = StringIO(self.stripentities(keepxmlentities=True))
-        return Markup(Stream(HTMLSanitizer()(HTMLParser(text))))
-
 
 escape = Markup.escape
 
Copyright (C) 2012-2017 Edgewall Software