Mercurial > genshi > genshi-test
comparison markup/core.py @ 113:e815c2c07572
Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.
author | cmlenz |
---|---|
date | Mon, 31 Jul 2006 23:00:06 +0000 |
parents | 8a4d9064f363 |
children | 8f53c3ad385c |
comparison
equal
deleted
inserted
replaced
112:a834a6669681 | 113:e815c2c07572 |
---|---|
62 self.events = events | 62 self.events = events |
63 | 63 |
64 def __iter__(self): | 64 def __iter__(self): |
65 return iter(self.events) | 65 return iter(self.events) |
66 | 66 |
67 def filter(self, filter): | |
68 """Apply a filter to the stream. | |
69 | |
70 This method returns a new stream with the given filter applied. The | |
71 filter must be a callable that accepts the stream object as parameter. | |
72 """ | |
73 return Stream(filter(html)) | |
74 | |
67 def render(self, method='xml', encoding='utf-8', filters=None, **kwargs): | 75 def render(self, method='xml', encoding='utf-8', filters=None, **kwargs): |
68 """Return a string representation of the stream. | 76 """Return a string representation of the stream. |
69 | 77 |
70 @param method: determines how the stream is serialized; can be either | 78 @param method: determines how the stream is serialized; can be either |
71 "xml", "xhtml", or "html", or a custom `Serializer` | 79 "xml", "xhtml", or "html", or a custom `Serializer` |
236 | 244 |
237 def totuple(self): | 245 def totuple(self): |
238 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) | 246 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) |
239 | 247 |
240 | 248 |
249 def stripentities(text, keepxmlentities=False): | |
250 """Return a copy of the given text with any character or numeric entities | |
251 replaced by the equivalent UTF-8 characters. | |
252 | |
253 If the `keepxmlentities` parameter is provided and evaluates to `True`, | |
254 the core XML entities (&, ', >, < and ") are not | |
255 stripped. | |
256 """ | |
257 def _replace_entity(match): | |
258 if match.group(1): # numeric entity | |
259 ref = match.group(1) | |
260 if ref.startswith('x'): | |
261 ref = int(ref[1:], 16) | |
262 else: | |
263 ref = int(ref, 10) | |
264 return unichr(ref) | |
265 else: # character entity | |
266 ref = match.group(2) | |
267 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', | |
268 'quot'): | |
269 return '&%s;' % ref | |
270 try: | |
271 codepoint = htmlentitydefs.name2codepoint[ref] | |
272 return unichr(codepoint) | |
273 except KeyError: | |
274 if keepxmlentities: | |
275 return '&%s;' % ref | |
276 else: | |
277 return ref | |
278 return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)', | |
279 _replace_entity, text) | |
280 | |
281 | |
241 class Markup(unicode): | 282 class Markup(unicode): |
242 """Marks a string as being safe for inclusion in HTML/XML output without | 283 """Marks a string as being safe for inclusion in HTML/XML output without |
243 needing to be escaped. | 284 needing to be escaped. |
244 """ | 285 """ |
245 __slots__ = [] | 286 __slots__ = [] |
274 | 315 |
275 If the `keepxmlentities` parameter is provided and evaluates to `True`, | 316 If the `keepxmlentities` parameter is provided and evaluates to `True`, |
276 the core XML entities (&, ', >, < and ") are not | 317 the core XML entities (&, ', >, < and ") are not |
277 stripped. | 318 stripped. |
278 """ | 319 """ |
279 def _replace_entity(match): | 320 return Markup(stripentities(self, keepxmlentities=keepxmlentities)) |
280 if match.group(1): # numeric entity | |
281 ref = match.group(1) | |
282 if ref.startswith('x'): | |
283 ref = int(ref[1:], 16) | |
284 else: | |
285 ref = int(ref, 10) | |
286 return unichr(ref) | |
287 else: # character entity | |
288 ref = match.group(2) | |
289 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', | |
290 'quot'): | |
291 return '&%s;' % ref | |
292 try: | |
293 codepoint = htmlentitydefs.name2codepoint[ref] | |
294 return unichr(codepoint) | |
295 except KeyError: | |
296 if keepxmlentities: | |
297 return '&%s;' % ref | |
298 else: | |
299 return ref | |
300 return Markup(re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)', | |
301 _replace_entity, self)) | |
302 | 321 |
303 def striptags(self): | 322 def striptags(self): |
304 """Return a copy of the text with all XML/HTML tags removed.""" | 323 """Return a copy of the text with all XML/HTML tags removed.""" |
305 return Markup(re.sub(r'<[^>]*?>', '', self)) | 324 return Markup(re.sub(r'<[^>]*?>', '', self)) |
306 | 325 |
340 text = unicode(self.striptags().stripentities()) | 359 text = unicode(self.striptags().stripentities()) |
341 if not keeplinebreaks: | 360 if not keeplinebreaks: |
342 text = text.replace(u'\n', u' ') | 361 text = text.replace(u'\n', u' ') |
343 return text | 362 return text |
344 | 363 |
345 def sanitize(self): | |
346 from markup.filters import HTMLSanitizer | |
347 from markup.input import HTMLParser | |
348 text = StringIO(self.stripentities(keepxmlentities=True)) | |
349 return Markup(Stream(HTMLSanitizer()(HTMLParser(text)))) | |
350 | |
351 | 364 |
352 escape = Markup.escape | 365 escape = Markup.escape |
353 | 366 |
354 def unescape(text): | 367 def unescape(text): |
355 """Reverse-escapes &, <, > and \" and returns a `unicode` object.""" | 368 """Reverse-escapes &, <, > and \" and returns a `unicode` object.""" |