Mercurial > genshi > mirror
diff genshi/core.py @ 397:31742fe6d47e trunk
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
* Minor performance tweaks for the serializers
author | cmlenz |
---|---|
date | Tue, 02 Jan 2007 17:48:06 +0000 |
parents | 2682dabbcd04 |
children | 228907abb726 |
line wrap: on
line diff
--- a/genshi/core.py +++ b/genshi/core.py @@ -13,16 +13,16 @@ """Core classes for markup processing.""" -import htmlentitydefs import operator -import re + +from genshi.util import plaintext, stripentities, striptags __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', 'QName'] class StreamEventKind(str): - """A kind of event on an XML stream.""" + """A kind of event on a markup stream.""" __slots__ = [] _instances = {} @@ -35,19 +35,19 @@ This class is basically an iterator over the events. + Stream events are tuples of the form: + + (kind, data, position) + + where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data` + depends on the kind of event, and `position` is a `(filename, line, offset)` + tuple that contains the location of the original element or text in the + input. If the original location is unknown, `position` is `(None, -1, -1)`. + Also provided are ways to serialize the stream to text. The `serialize()` method will return an iterator over generated strings, while `render()` returns the complete generated text at once. Both accept various parameters that impact the way the stream is serialized. - - Stream events are tuples of the form: - - (kind, data, position) - - where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data` - depends on the kind of event, and `position` is a `(filename, line, offset)` - tuple that contains the location of the original element or text in the - input. If the original location is unknown, `position` is `(None, -1, -1)`. """ __slots__ = ['events'] @@ -92,7 +92,7 @@ <p>Hello, world!</p> Filters can be any function that accepts and produces a stream (where - a stream is anything that iterators over events): + a stream is anything that iterates over events): >>> def uppercase(stream): ... for kind, data, pos in stream: @@ -326,51 +326,6 @@ return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) -def plaintext(text, keeplinebreaks=True): - """Returns the text as a `unicode` string with all entities and tags - removed. - """ - text = stripentities(striptags(text)) - if not keeplinebreaks: - text = text.replace(u'\n', u' ') - return text - -def stripentities(text, keepxmlentities=False): - """Return a copy of the given text with any character or numeric entities - replaced by the equivalent UTF-8 characters. - - If the `keepxmlentities` parameter is provided and evaluates to `True`, - the core XML entities (&, ', >, < and ") are not - stripped. - """ - def _replace_entity(match): - if match.group(1): # numeric entity - ref = match.group(1) - if ref.startswith('x'): - ref = int(ref[1:], 16) - else: - ref = int(ref, 10) - return unichr(ref) - else: # character entity - ref = match.group(2) - if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): - return '&%s;' % ref - try: - codepoint = htmlentitydefs.name2codepoint[ref] - return unichr(codepoint) - except KeyError: - if keepxmlentities: - return '&%s;' % ref - else: - return ref - return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)', - _replace_entity, text) - -def striptags(text): - """Return a copy of the text with all XML/HTML tags removed.""" - return re.sub(r'<[^>]*?>', '', text) - - class Markup(unicode): """Marks a string as being safe for inclusion in HTML/XML output without needing to be escaped.