comparison markup/core.py @ 113:e815c2c07572

Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.
author cmlenz
date Mon, 31 Jul 2006 23:00:06 +0000
parents 8a4d9064f363
children 8f53c3ad385c
comparison
equal deleted inserted replaced
112:a834a6669681 113:e815c2c07572
62 self.events = events 62 self.events = events
63 63
64 def __iter__(self): 64 def __iter__(self):
65 return iter(self.events) 65 return iter(self.events)
66 66
67 def filter(self, filter):
68 """Apply a filter to the stream.
69
70 This method returns a new stream with the given filter applied. The
71 filter must be a callable that accepts the stream object as parameter.
72 """
73 return Stream(filter(html))
74
67 def render(self, method='xml', encoding='utf-8', filters=None, **kwargs): 75 def render(self, method='xml', encoding='utf-8', filters=None, **kwargs):
68 """Return a string representation of the stream. 76 """Return a string representation of the stream.
69 77
70 @param method: determines how the stream is serialized; can be either 78 @param method: determines how the stream is serialized; can be either
71 "xml", "xhtml", or "html", or a custom `Serializer` 79 "xml", "xhtml", or "html", or a custom `Serializer`
236 244
237 def totuple(self): 245 def totuple(self):
238 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) 246 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
239 247
240 248
249 def stripentities(text, keepxmlentities=False):
250 """Return a copy of the given text with any character or numeric entities
251 replaced by the equivalent UTF-8 characters.
252
253 If the `keepxmlentities` parameter is provided and evaluates to `True`,
254 the core XML entities (&, ', >, < and ") are not
255 stripped.
256 """
257 def _replace_entity(match):
258 if match.group(1): # numeric entity
259 ref = match.group(1)
260 if ref.startswith('x'):
261 ref = int(ref[1:], 16)
262 else:
263 ref = int(ref, 10)
264 return unichr(ref)
265 else: # character entity
266 ref = match.group(2)
267 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
268 'quot'):
269 return '&%s;' % ref
270 try:
271 codepoint = htmlentitydefs.name2codepoint[ref]
272 return unichr(codepoint)
273 except KeyError:
274 if keepxmlentities:
275 return '&%s;' % ref
276 else:
277 return ref
278 return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
279 _replace_entity, text)
280
281
241 class Markup(unicode): 282 class Markup(unicode):
242 """Marks a string as being safe for inclusion in HTML/XML output without 283 """Marks a string as being safe for inclusion in HTML/XML output without
243 needing to be escaped. 284 needing to be escaped.
244 """ 285 """
245 __slots__ = [] 286 __slots__ = []
274 315
275 If the `keepxmlentities` parameter is provided and evaluates to `True`, 316 If the `keepxmlentities` parameter is provided and evaluates to `True`,
276 the core XML entities (&, ', >, < and ") are not 317 the core XML entities (&, ', >, < and ") are not
277 stripped. 318 stripped.
278 """ 319 """
279 def _replace_entity(match): 320 return Markup(stripentities(self, keepxmlentities=keepxmlentities))
280 if match.group(1): # numeric entity
281 ref = match.group(1)
282 if ref.startswith('x'):
283 ref = int(ref[1:], 16)
284 else:
285 ref = int(ref, 10)
286 return unichr(ref)
287 else: # character entity
288 ref = match.group(2)
289 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt',
290 'quot'):
291 return '&%s;' % ref
292 try:
293 codepoint = htmlentitydefs.name2codepoint[ref]
294 return unichr(codepoint)
295 except KeyError:
296 if keepxmlentities:
297 return '&%s;' % ref
298 else:
299 return ref
300 return Markup(re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
301 _replace_entity, self))
302 321
303 def striptags(self): 322 def striptags(self):
304 """Return a copy of the text with all XML/HTML tags removed.""" 323 """Return a copy of the text with all XML/HTML tags removed."""
305 return Markup(re.sub(r'<[^>]*?>', '', self)) 324 return Markup(re.sub(r'<[^>]*?>', '', self))
306 325
340 text = unicode(self.striptags().stripentities()) 359 text = unicode(self.striptags().stripentities())
341 if not keeplinebreaks: 360 if not keeplinebreaks:
342 text = text.replace(u'\n', u' ') 361 text = text.replace(u'\n', u' ')
343 return text 362 return text
344 363
345 def sanitize(self):
346 from markup.filters import HTMLSanitizer
347 from markup.input import HTMLParser
348 text = StringIO(self.stripentities(keepxmlentities=True))
349 return Markup(Stream(HTMLSanitizer()(HTMLParser(text))))
350
351 364
352 escape = Markup.escape 365 escape = Markup.escape
353 366
354 def unescape(text): 367 def unescape(text):
355 """Reverse-escapes &, <, > and \" and returns a `unicode` object.""" 368 """Reverse-escapes &, <, > and \" and returns a `unicode` object."""
Copyright (C) 2012-2017 Edgewall Software