genshi/genshi-test: markup/core.py comparison

comparison markup/core.py @ 113:e815c2c07572

Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.

author	cmlenz
date	Mon, 31 Jul 2006 23:00:06 +0000
parents	8a4d9064f363
children	8f53c3ad385c

comparison

equal deleted inserted replaced

-:a834a6669681
+:e815c2c07572
 self.events = events
 def __iter__(self):
 return iter(self.events)
+def filter(self, filter):
+"""Apply a filter to the stream.
+This method returns a new stream with the given filter applied. The
+filter must be a callable that accepts the stream object as parameter.
+"""
+return Stream(filter(html))
 def render(self, method='xml', encoding='utf-8', filters=None, **kwargs):
 """Return a string representation of the stream.
 @param method: determines how the stream is serialized; can be either
 "xml", "xhtml", or "html", or a custom `Serializer`
 def totuple(self):
 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
+def stripentities(text, keepxmlentities=False):
+"""Return a copy of the given text with any character or numeric entities
+replaced by the equivalent UTF-8 characters.
+If the `keepxmlentities` parameter is provided and evaluates to `True`,
+the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not
+stripped.
+"""
+def _replace_entity(match):
+if match.group(1): # numeric entity
+ref = match.group(1)
+if ref.startswith('x'):
+ref = int(ref[1:], 16)
+else:
+ref = int(ref, 10)
+return unichr(ref)
+else: # character entity
+ref = match.group(2)
+if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
+'quot'):
+return '&%s;' % ref
+try:
+codepoint = htmlentitydefs.name2codepoint[ref]
+return unichr(codepoint)
+except KeyError:
+if keepxmlentities:
+return '&amp;%s;' % ref
+else:
+return ref
+return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
+_replace_entity, text)
 class Markup(unicode):
 """Marks a string as being safe for inclusion in HTML/XML output without
 needing to be escaped.
 """
 __slots__ = []
 If the `keepxmlentities` parameter is provided and evaluates to `True`,
 the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not
 stripped.
 """
-def _replace_entity(match):
+return Markup(stripentities(self, keepxmlentities=keepxmlentities))
-if match.group(1): # numeric entity
-ref = match.group(1)
-if ref.startswith('x'):
-ref = int(ref[1:], 16)
-else:
-ref = int(ref, 10)
-return unichr(ref)
-else: # character entity
-ref = match.group(2)
-if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
-'quot'):
-return '&%s;' % ref
-try:
-codepoint = htmlentitydefs.name2codepoint[ref]
-return unichr(codepoint)
-except KeyError:
-if keepxmlentities:
-return '&amp;%s;' % ref
-else:
-return ref
-return Markup(re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
-_replace_entity, self))
 def striptags(self):
 """Return a copy of the text with all XML/HTML tags removed."""
 return Markup(re.sub(r'<[^>]*?>', '', self))
 text = unicode(self.striptags().stripentities())
 if not keeplinebreaks:
 text = text.replace(u'\n', u' ')
 return text
-def sanitize(self):
-from markup.filters import HTMLSanitizer
-from markup.input import HTMLParser
-text = StringIO(self.stripentities(keepxmlentities=True))
-return Markup(Stream(HTMLSanitizer()(HTMLParser(text))))
 escape = Markup.escape
 def unescape(text):
 """Reverse-escapes &, <, > and \" and returns a `unicode` object."""

Mercurial > genshi > genshi-test

comparison markup/core.py @ 113:e815c2c07572