Mercurial > genshi > mirror

--- a/UPGRADE.txt
+++ b/UPGRADE.txt
@@ -16,7 +16,7 @@

 Instances of `genshi.core.Attrs` are now immutable. Filters
 manipulating the attributes in a stream may need to be updated. See
-the docstring of the `Attrs` for more information.
+the documentation of the `Attrs` class for more information.


 Upgrading from Markup
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -13,16 +13,16 @@

 """Core classes for markup processing."""

-import htmlentitydefs
 import operator
-import re
+
+from genshi.util import plaintext, stripentities, striptags

 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
            'QName']


 class StreamEventKind(str):
-    """A kind of event on an XML stream."""
+    """A kind of event on a markup stream."""
     __slots__ = []
     _instances = {}

@@ -35,19 +35,19 @@

     This class is basically an iterator over the events.

+    Stream events are tuples of the form:
+
+      (kind, data, position)
+
+    where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data`
+    depends on the kind of event, and `position` is a `(filename, line, offset)`
+    tuple that contains the location of the original element or text in the
+    input. If the original location is unknown, `position` is `(None, -1, -1)`.
+
     Also provided are ways to serialize the stream to text. The `serialize()`
     method will return an iterator over generated strings, while `render()`
     returns the complete generated text at once. Both accept various parameters
     that impact the way the stream is serialized.
-
-    Stream events are tuples of the form:
-
-      (kind, data, position)
-
-    where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data`
-    depends on the kind of event, and `position` is a `(filename, line, offset)`
-    tuple that contains the location of the original element or text in the
-    input. If the original location is unknown, `position` is `(None, -1, -1)`.
     """
     __slots__ = ['events']

@@ -92,7 +92,7 @@
         <p>Hello, world!</p>

         Filters can be any function that accepts and produces a stream (where
-        a stream is anything that iterators over events):
+        a stream is anything that iterates over events):

         >>> def uppercase(stream):
         ...     for kind, data, pos in stream:
@@ -326,51 +326,6 @@
         return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)


-def plaintext(text, keeplinebreaks=True):
-    """Returns the text as a `unicode` string with all entities and tags
-    removed.
-    """
-    text = stripentities(striptags(text))
-    if not keeplinebreaks:
-        text = text.replace(u'\n', u' ')
-    return text
-
-def stripentities(text, keepxmlentities=False):
-    """Return a copy of the given text with any character or numeric entities
-    replaced by the equivalent UTF-8 characters.
-
-    If the `keepxmlentities` parameter is provided and evaluates to `True`,
-    the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not
-    stripped.
-    """
-    def _replace_entity(match):
-        if match.group(1): # numeric entity
-            ref = match.group(1)
-            if ref.startswith('x'):
-                ref = int(ref[1:], 16)
-            else:
-                ref = int(ref, 10)
-            return unichr(ref)
-        else: # character entity
-            ref = match.group(2)
-            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
-                return '&%s;' % ref
-            try:
-                codepoint = htmlentitydefs.name2codepoint[ref]
-                return unichr(codepoint)
-            except KeyError:
-                if keepxmlentities:
-                    return '&amp;%s;' % ref
-                else:
-                    return ref
-    return re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
-                  _replace_entity, text)
-
-def striptags(text):
-    """Return a copy of the text with all XML/HTML tags removed."""
-    return re.sub(r'<[^>]*?>', '', text)
-
-
 class Markup(unicode):
     """Marks a string as being safe for inclusion in HTML/XML output without
     needing to be escaped.
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -98,21 +98,21 @@
                         ns_attrib.append((QName('xmlns'), namespace))
                 buf = ['<', tagname]

-                for attr, value in attrib + tuple(ns_attrib):
+                if ns_attrib:
+                    attrib += tuple(ns_attrib)
+                for attr, value in attrib:
                     attrname = attr.localname
-                    if attr.namespace:
-                        prefix = ns_mapping.get(attr.namespace)
+                    attrns = attr.namespace
+                    if attrns:
+                        prefix = ns_mapping.get(attrns)
                         if prefix:
                             attrname = '%s:%s' % (prefix, attrname)
                     buf += [' ', attrname, '="', escape(value), '"']
                 ns_attrib = []

-                if kind is EMPTY:
-                    buf += ['/>']
-                else:
-                    buf += ['>']
+                buf.append(kind is EMPTY and '/>' or '>')

-                yield Markup(''.join(buf))
+                yield Markup(u''.join(buf))

             elif kind is END:
                 tag = data
@@ -136,13 +136,13 @@
                 name, pubid, sysid = data
                 buf = ['<!DOCTYPE %s']
                 if pubid:
-                    buf += [' PUBLIC "%s"']
+                    buf.append(' PUBLIC "%s"')
                 elif sysid:
-                    buf += [' SYSTEM']
+                    buf.append(' SYSTEM')
                 if sysid:
-                    buf += [' "%s"']
-                buf += ['>\n']
-                yield Markup(''.join(buf), *filter(None, data))
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(u''.join(buf), *filter(None, data))
                 have_doctype = True

             elif kind is START_NS:
@@ -216,10 +216,13 @@
                         ns_attrib.append((QName('xmlns'), tagns))
                 buf = ['<', tagname]

-                for attr, value in chain(attrib, ns_attrib):
+                if ns_attrib:
+                    attrib += tuple(ns_attrib)
+                for attr, value in attrib:
                     attrname = attr.localname
-                    if attr.namespace:
-                        prefix = ns_mapping.get(attr.namespace)
+                    attrns = attr.namespace
+                    if attrns:
+                        prefix = ns_mapping.get(attrns)
                         if prefix:
                             attrname = '%s:%s' % (prefix, attrname)
                     if attrname in boolean_attrs:
@@ -231,14 +234,14 @@

                 if kind is EMPTY:
                     if (tagns and tagns != namespace.uri) \
-                            or tag.localname in empty_elems:
-                        buf += [' />']
+                            or tagname in empty_elems:
+                        buf.append(' />')
                     else:
-                        buf += ['></%s>' % tagname]
+                        buf.append('></%s>' % tagname)
                 else:
-                    buf += ['>']
+                    buf.append('>')

-                yield Markup(''.join(buf))
+                yield Markup(u''.join(buf))

             elif kind is END:
                 tag = data
@@ -262,13 +265,13 @@
                 name, pubid, sysid = data
                 buf = ['<!DOCTYPE %s']
                 if pubid:
-                    buf += [' PUBLIC "%s"']
+                    buf.append(' PUBLIC "%s"')
                 elif sysid:
-                    buf += [' SYSTEM']
+                    buf.append(' SYSTEM')
                 if sysid:
-                    buf += [' "%s"']
-                buf += ['>\n']
-                yield Markup(''.join(buf), *filter(None, data))
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(u''.join(buf), *filter(None, data))
                 have_doctype = True

             elif kind is START_NS:
@@ -349,13 +352,13 @@
                             else:
                                 buf += [' ', attrname, '="', escape(value), '"']

-                    buf += ['>']
+                    buf.append('>')

                     if kind is EMPTY:
                         if tagname not in empty_elems:
-                            buf += ['</%s>' % tagname]
+                            buf.append('</%s>' % tagname)

-                    yield Markup(''.join(buf))
+                    yield Markup(u''.join(buf))

                     if tagname in noescape_elems:
                         noescape = True
@@ -380,13 +383,13 @@
                 name, pubid, sysid = data
                 buf = ['<!DOCTYPE %s']
                 if pubid:
-                    buf += [' PUBLIC "%s"']
+                    buf.append(' PUBLIC "%s"')
                 elif sysid:
-                    buf += [' SYSTEM']
+                    buf.append(' SYSTEM')
                 if sysid:
-                    buf += [' "%s"']
-                buf += ['>\n']
-                yield Markup(''.join(buf), *filter(None, data))
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(u''.join(buf), *filter(None, data))
                 have_doctype = True

             elif kind is START_NS and data[1] not in ns_mapping:
@@ -460,7 +463,7 @@
         """Initialize the filter.

         @param preserve: a set or sequence of tag names for which white-space
-            should be ignored.
+            should be preserved
         @param noescape: a set or sequence of tag names for which text content
             should not be escaped
--- a/genshi/template/eval.py
+++ b/genshi/template/eval.py
@@ -289,7 +289,8 @@
     def _visitBoolOp(self, node):
         node.nodes = [self.visit(x) for x in node.nodes]
         return node
-    visitAnd = visitOr = visitBitand = visitBitor = visitAssTuple = _visitBoolOp
+    visitAnd = visitOr = visitBitand = visitBitor = visitBitxor = _visitBoolOp
+    visitAssTuple = _visitBoolOp

     def _visitBinOp(self, node):
         node.left = self.visit(node.left)
--- a/genshi/template/inline.py
+++ b/genshi/template/inline.py
@@ -68,7 +68,7 @@
             for idx, child in enumerate(node.nodes):
                 _build(child, indices + (idx,))
         elif isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):
-            buf.append('"%s": v%s' % (node.name, ''.join(['[%s]' % i for i in indices])))
+            buf.append('%r: v%s' % (node.name, ''.join(['[%s]' % i for i in indices])))
     _build(ast, ())
     return '{%s}' % ', '.join(buf)

@@ -120,7 +120,7 @@
                 for line in _predecl_vars(substream):
                     yield line

-    def _predecl_funcs(stream):
+    def _predecl_defs(stream):
         for kind, data, pos in stream:
             if kind is SUB:
                 directives, substream = data
@@ -144,28 +144,28 @@
                 yield line
             return

-        directive = directives[0]
-        directives = directives[1:]
+        d = directives[0]
+        rest = directives[1:]

-        if isinstance(directive, DefDirective):
-            return
+        if isinstance(d, DefDirective):
+            return # already added

         yield w()
-        yield w('# Applying %r', directive)
+        yield w('# Applying %r', d)

-        if isinstance(directive, ForDirective):
-            yield w('for v in e[%d].evaluate(ctxt):', index['E'][directive.expr])
+        if isinstance(d, ForDirective):
+            yield w('for v in e[%d].evaluate(ctxt):', index['E'][d.expr])
             w.shift()
-            yield w('ctxt.push(%s)', _assign(directive.target))
-            for line in _apply(directives, stream):
+            yield w('ctxt.push(%s)', _assign(d.target))
+            for line in _apply(rest, stream):
                 yield line
             yield w('ctxt.pop()')
             w.unshift()

-        elif isinstance(directive, IfDirective):
-            yield w('if e[%d].evaluate(ctxt):', index['E'][directive.expr])
+        elif isinstance(d, IfDirective):
+            yield w('if e[%d].evaluate(ctxt):', index['E'][d.expr])
             w.shift()
-            for line in _apply(directives, stream):
+            for line in _apply(rest, stream):
                 yield line
             w.unshift()

@@ -248,7 +248,7 @@

     # Define macro functions
     defs = []
-    for line in _predecl_funcs(template.stream):
+    for line in _predecl_defs(template.stream):
         yield line
     if defs:
         yield w()
--- a/genshi/template/tests/eval.py
+++ b/genshi/template/tests/eval.py
@@ -133,6 +133,10 @@
         self.assertEqual(1, Expression("1 | 0").evaluate({}))
         self.assertEqual(1, Expression("x | y").evaluate({'x': 1, 'y': 0}))

+    def test_binop_xor(self):
+        self.assertEqual(1, Expression("1 ^ 0").evaluate({}))
+        self.assertEqual(1, Expression("x ^ y").evaluate({'x': 1, 'y': 0}))
+
     def test_binop_contains(self):
         self.assertEqual(True, Expression("1 in (1, 2, 3)").evaluate({}))
         self.assertEqual(True, Expression("x in y").evaluate({'x': 1,
--- a/genshi/util.py
+++ b/genshi/util.py
@@ -13,6 +13,9 @@

 """Various utility classes and functions."""

+import htmlentitydefs
+import re
+

 class LRUCache(dict):
     """A dictionary-like object that stores only a certain number of items, and
@@ -150,3 +153,66 @@
         else:
             retval.append(item)
     return retval
+
+def plaintext(text, keeplinebreaks=True):
+    """Returns the text as a `unicode` string with all entities and tags
+    removed.
+    """
+    text = stripentities(striptags(text))
+    if not keeplinebreaks:
+        text = text.replace(u'\n', u' ')
+    return text
+
+_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
+def stripentities(text, keepxmlentities=False):
+    """Return a copy of the given text with any character or numeric entities
+    replaced by the equivalent UTF-8 characters.
+
+    >>> stripentities('1 &lt; 2')
+    u'1 < 2'
+    >>> stripentities('more &hellip;')
+    u'more \u2026'
+    >>> stripentities('&#8230;')
+    u'\u2026'
+    >>> stripentities('&#x2026;')
+    u'\u2026'
+
+    If the `keepxmlentities` parameter is provided and is a truth value, the
+    core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
+
+    >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
+    u'1 &lt; 2 \u2026'
+    """
+    def _replace_entity(match):
+        if match.group(1): # numeric entity
+            ref = match.group(1)
+            if ref.startswith('x'):
+                ref = int(ref[1:], 16)
+            else:
+                ref = int(ref, 10)
+            return unichr(ref)
+        else: # character entity
+            ref = match.group(2)
+            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
+                return u'&%s;' % ref
+            try:
+                return unichr(htmlentitydefs.name2codepoint[ref])
+            except KeyError:
+                if keepxmlentities:
+                    return u'&amp;%s;' % ref
+                else:
+                    return ref
+    return _STRIPENTITIES_RE.sub(_replace_entity, text)
+
+_STRIPTAGS_RE = re.compile(r'<[^>]*?>')
+def striptags(text):
+    """Return a copy of the text with all XML/HTML tags removed.
+
+    >>> striptags('<span>Foo</span> bar')
+    'Foo bar'
+    >>> striptags('<span class="bar">Foo</span>')
+    'Foo'
+    >>> striptags('Foo<br />')
+    'Foo'
+    """
+    return _STRIPTAGS_RE.sub('', text)