# HG changeset patch # User cmlenz # Date 1167761181 0 # Node ID 3c0a97ff3924660aa28e4d8f74c3f77a67ce56b6 # Parent a816755902581023180356b03d38231189e1dd46 inline branch: Merged [480:486/trunk]. diff --git a/UPGRADE.txt b/UPGRADE.txt --- a/UPGRADE.txt +++ b/UPGRADE.txt @@ -16,7 +16,7 @@ Instances of `genshi.core.Attrs` are now immutable. Filters manipulating the attributes in a stream may need to be updated. See -the docstring of the `Attrs` for more information. +the documentation of the `Attrs` class for more information. Upgrading from Markup diff --git a/genshi/core.py b/genshi/core.py --- a/genshi/core.py +++ b/genshi/core.py @@ -13,16 +13,16 @@ """Core classes for markup processing.""" -import htmlentitydefs import operator -import re + +from genshi.util import plaintext, stripentities, striptags __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', 'QName'] class StreamEventKind(str): - """A kind of event on an XML stream.""" + """A kind of event on a markup stream.""" __slots__ = [] _instances = {} @@ -35,19 +35,19 @@ This class is basically an iterator over the events. + Stream events are tuples of the form: + + (kind, data, position) + + where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data` + depends on the kind of event, and `position` is a `(filename, line, offset)` + tuple that contains the location of the original element or text in the + input. If the original location is unknown, `position` is `(None, -1, -1)`. + Also provided are ways to serialize the stream to text. The `serialize()` method will return an iterator over generated strings, while `render()` returns the complete generated text at once. Both accept various parameters that impact the way the stream is serialized. - - Stream events are tuples of the form: - - (kind, data, position) - - where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data` - depends on the kind of event, and `position` is a `(filename, line, offset)` - tuple that contains the location of the original element or text in the - input. If the original location is unknown, `position` is `(None, -1, -1)`. """ __slots__ = ['events'] @@ -92,7 +92,7 @@

Hello, world!

Filters can be any function that accepts and produces a stream (where - a stream is anything that iterators over events): + a stream is anything that iterates over events): >>> def uppercase(stream): ... for kind, data, pos in stream: @@ -326,51 +326,6 @@ return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) -def plaintext(text, keeplinebreaks=True): - """Returns the text as a `unicode` string with all entities and tags - removed. - """ - text = stripentities(striptags(text)) - if not keeplinebreaks: - text = text.replace(u'\n', u' ') - return text - -def stripentities(text, keepxmlentities=False): - """Return a copy of the given text with any character or numeric entities - replaced by the equivalent UTF-8 characters. - - If the `keepxmlentities` parameter is provided and evaluates to `True`, - the core XML entities (&, ', >, < and ") are not - stripped. - """ - def _replace_entity(match): - if match.group(1): # numeric entity - ref = match.group(1) - if ref.startswith('x'): - ref = int(ref[1:], 16) - else: - ref = int(ref, 10) - return unichr(ref) - else: # character entity - ref = match.group(2) - if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): - return '&%s;' % ref - try: - codepoint = htmlentitydefs.name2codepoint[ref] - return unichr(codepoint) - except KeyError: - if keepxmlentities: - return '&%s;' % ref - else: - return ref - return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)', - _replace_entity, text) - -def striptags(text): - """Return a copy of the text with all XML/HTML tags removed.""" - return re.sub(r'<[^>]*?>', '', text) - - class Markup(unicode): """Marks a string as being safe for inclusion in HTML/XML output without needing to be escaped. diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -98,21 +98,21 @@ ns_attrib.append((QName('xmlns'), namespace)) buf = ['<', tagname] - for attr, value in attrib + tuple(ns_attrib): + if ns_attrib: + attrib += tuple(ns_attrib) + for attr, value in attrib: attrname = attr.localname - if attr.namespace: - prefix = ns_mapping.get(attr.namespace) + attrns = attr.namespace + if attrns: + prefix = ns_mapping.get(attrns) if prefix: attrname = '%s:%s' % (prefix, attrname) buf += [' ', attrname, '="', escape(value), '"'] ns_attrib = [] - if kind is EMPTY: - buf += ['/>'] - else: - buf += ['>'] + buf.append(kind is EMPTY and '/>' or '>') - yield Markup(''.join(buf)) + yield Markup(u''.join(buf)) elif kind is END: tag = data @@ -136,13 +136,13 @@ name, pubid, sysid = data buf = ['\n'] - yield Markup(''.join(buf), *filter(None, data)) + buf.append(' "%s"') + buf.append('>\n') + yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_NS: @@ -216,10 +216,13 @@ ns_attrib.append((QName('xmlns'), tagns)) buf = ['<', tagname] - for attr, value in chain(attrib, ns_attrib): + if ns_attrib: + attrib += tuple(ns_attrib) + for attr, value in attrib: attrname = attr.localname - if attr.namespace: - prefix = ns_mapping.get(attr.namespace) + attrns = attr.namespace + if attrns: + prefix = ns_mapping.get(attrns) if prefix: attrname = '%s:%s' % (prefix, attrname) if attrname in boolean_attrs: @@ -231,14 +234,14 @@ if kind is EMPTY: if (tagns and tagns != namespace.uri) \ - or tag.localname in empty_elems: - buf += [' />'] + or tagname in empty_elems: + buf.append(' />') else: - buf += ['>' % tagname] + buf.append('>' % tagname) else: - buf += ['>'] + buf.append('>') - yield Markup(''.join(buf)) + yield Markup(u''.join(buf)) elif kind is END: tag = data @@ -262,13 +265,13 @@ name, pubid, sysid = data buf = ['\n'] - yield Markup(''.join(buf), *filter(None, data)) + buf.append(' "%s"') + buf.append('>\n') + yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_NS: @@ -349,13 +352,13 @@ else: buf += [' ', attrname, '="', escape(value), '"'] - buf += ['>'] + buf.append('>') if kind is EMPTY: if tagname not in empty_elems: - buf += ['' % tagname] + buf.append('' % tagname) - yield Markup(''.join(buf)) + yield Markup(u''.join(buf)) if tagname in noescape_elems: noescape = True @@ -380,13 +383,13 @@ name, pubid, sysid = data buf = ['\n'] - yield Markup(''.join(buf), *filter(None, data)) + buf.append(' "%s"') + buf.append('>\n') + yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_NS and data[1] not in ns_mapping: @@ -460,7 +463,7 @@ """Initialize the filter. @param preserve: a set or sequence of tag names for which white-space - should be ignored. + should be preserved @param noescape: a set or sequence of tag names for which text content should not be escaped diff --git a/genshi/template/eval.py b/genshi/template/eval.py --- a/genshi/template/eval.py +++ b/genshi/template/eval.py @@ -289,7 +289,8 @@ def _visitBoolOp(self, node): node.nodes = [self.visit(x) for x in node.nodes] return node - visitAnd = visitOr = visitBitand = visitBitor = visitAssTuple = _visitBoolOp + visitAnd = visitOr = visitBitand = visitBitor = visitBitxor = _visitBoolOp + visitAssTuple = _visitBoolOp def _visitBinOp(self, node): node.left = self.visit(node.left) diff --git a/genshi/template/inline.py b/genshi/template/inline.py --- a/genshi/template/inline.py +++ b/genshi/template/inline.py @@ -68,7 +68,7 @@ for idx, child in enumerate(node.nodes): _build(child, indices + (idx,)) elif isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): - buf.append('"%s": v%s' % (node.name, ''.join(['[%s]' % i for i in indices]))) + buf.append('%r: v%s' % (node.name, ''.join(['[%s]' % i for i in indices]))) _build(ast, ()) return '{%s}' % ', '.join(buf) @@ -120,7 +120,7 @@ for line in _predecl_vars(substream): yield line - def _predecl_funcs(stream): + def _predecl_defs(stream): for kind, data, pos in stream: if kind is SUB: directives, substream = data @@ -144,28 +144,28 @@ yield line return - directive = directives[0] - directives = directives[1:] + d = directives[0] + rest = directives[1:] - if isinstance(directive, DefDirective): - return + if isinstance(d, DefDirective): + return # already added yield w() - yield w('# Applying %r', directive) + yield w('# Applying %r', d) - if isinstance(directive, ForDirective): - yield w('for v in e[%d].evaluate(ctxt):', index['E'][directive.expr]) + if isinstance(d, ForDirective): + yield w('for v in e[%d].evaluate(ctxt):', index['E'][d.expr]) w.shift() - yield w('ctxt.push(%s)', _assign(directive.target)) - for line in _apply(directives, stream): + yield w('ctxt.push(%s)', _assign(d.target)) + for line in _apply(rest, stream): yield line yield w('ctxt.pop()') w.unshift() - elif isinstance(directive, IfDirective): - yield w('if e[%d].evaluate(ctxt):', index['E'][directive.expr]) + elif isinstance(d, IfDirective): + yield w('if e[%d].evaluate(ctxt):', index['E'][d.expr]) w.shift() - for line in _apply(directives, stream): + for line in _apply(rest, stream): yield line w.unshift() @@ -248,7 +248,7 @@ # Define macro functions defs = [] - for line in _predecl_funcs(template.stream): + for line in _predecl_defs(template.stream): yield line if defs: yield w() diff --git a/genshi/template/tests/eval.py b/genshi/template/tests/eval.py --- a/genshi/template/tests/eval.py +++ b/genshi/template/tests/eval.py @@ -133,6 +133,10 @@ self.assertEqual(1, Expression("1 | 0").evaluate({})) self.assertEqual(1, Expression("x | y").evaluate({'x': 1, 'y': 0})) + def test_binop_xor(self): + self.assertEqual(1, Expression("1 ^ 0").evaluate({})) + self.assertEqual(1, Expression("x ^ y").evaluate({'x': 1, 'y': 0})) + def test_binop_contains(self): self.assertEqual(True, Expression("1 in (1, 2, 3)").evaluate({})) self.assertEqual(True, Expression("x in y").evaluate({'x': 1, diff --git a/genshi/util.py b/genshi/util.py --- a/genshi/util.py +++ b/genshi/util.py @@ -13,6 +13,9 @@ """Various utility classes and functions.""" +import htmlentitydefs +import re + class LRUCache(dict): """A dictionary-like object that stores only a certain number of items, and @@ -150,3 +153,66 @@ else: retval.append(item) return retval + +def plaintext(text, keeplinebreaks=True): + """Returns the text as a `unicode` string with all entities and tags + removed. + """ + text = stripentities(striptags(text)) + if not keeplinebreaks: + text = text.replace(u'\n', u' ') + return text + +_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') +def stripentities(text, keepxmlentities=False): + """Return a copy of the given text with any character or numeric entities + replaced by the equivalent UTF-8 characters. + + >>> stripentities('1 < 2') + u'1 < 2' + >>> stripentities('more …') + u'more \u2026' + >>> stripentities('…') + u'\u2026' + >>> stripentities('…') + u'\u2026' + + If the `keepxmlentities` parameter is provided and is a truth value, the + core XML entities (&, ', >, < and ") are left intact. + + >>> stripentities('1 < 2 …', keepxmlentities=True) + u'1 < 2 \u2026' + """ + def _replace_entity(match): + if match.group(1): # numeric entity + ref = match.group(1) + if ref.startswith('x'): + ref = int(ref[1:], 16) + else: + ref = int(ref, 10) + return unichr(ref) + else: # character entity + ref = match.group(2) + if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): + return u'&%s;' % ref + try: + return unichr(htmlentitydefs.name2codepoint[ref]) + except KeyError: + if keepxmlentities: + return u'&%s;' % ref + else: + return ref + return _STRIPENTITIES_RE.sub(_replace_entity, text) + +_STRIPTAGS_RE = re.compile(r'<[^>]*?>') +def striptags(text): + """Return a copy of the text with all XML/HTML tags removed. + + >>> striptags('Foo bar') + 'Foo bar' + >>> striptags('Foo') + 'Foo' + >>> striptags('Foo
') + 'Foo' + """ + return _STRIPTAGS_RE.sub('', text)