Mercurial > genshi > genshi-test

--- a/genshi/__init__.py
+++ b/genshi/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2006-2008 Edgewall Software
+# Copyright (C) 2006-2009 Edgewall Software
 # All rights reserved.
 #
 # This software is licensed as described in the file COPYING, which
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -93,7 +93,7 @@

         >>> from genshi.input import HTML
         >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
-        >>> print html
+        >>> print(html)
         <p onclick="alert('Whoa')">Hello, world!</p>

         A filter such as the HTML sanitizer can be applied to that stream using
@@ -101,7 +101,7 @@

         >>> from genshi.filters import HTMLSanitizer
         >>> sanitizer = HTMLSanitizer()
-        >>> print html | sanitizer
+        >>> print(html | sanitizer)
         <p>Hello, world!</p>

         Filters can be any function that accepts and produces a stream (where
@@ -112,14 +112,14 @@
         ...         if kind is TEXT:
         ...             data = data.upper()
         ...         yield kind, data, pos
-        >>> print html | sanitizer | uppercase
+        >>> print(html | sanitizer | uppercase)
         <p>HELLO, WORLD!</p>

         Serializers can also be used with this notation:

         >>> from genshi.output import TextSerializer
         >>> output = TextSerializer()
-        >>> print html | sanitizer | uppercase | output
+        >>> print(html | sanitizer | uppercase | output)
         HELLO, WORLD!

         Commonly, serializers should be used at the end of the "pipeline";
@@ -188,9 +188,9 @@

         >>> from genshi import HTML
         >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
-        >>> print stream.select('elem')
+        >>> print(stream.select('elem'))
         <elem>foo</elem><elem>bar</elem>
-        >>> print stream.select('elem/text()')
+        >>> print(stream.select('elem/text()'))
         foobar

         Note that the outermost element of the stream becomes the *context
@@ -198,13 +198,13 @@
         not match anything in the example above, because it only tests against
         child elements of the outermost element:

-        >>> print stream.select('doc')
+        >>> print(stream.select('doc'))
         <BLANKLINE>

         You can use the "." expression to match the context node itself
         (although that usually makes little sense):

-        >>> print stream.select('.')
+        >>> print(stream.select('.'))
         <doc><elem>foo</elem><elem>bar</elem></doc>

         :param path: a string containing the XPath expression
@@ -354,6 +354,20 @@
             if attr == name:
                 return True

+    def __getitem__(self, i):
+        """Return an item or slice of the attributes list.
+
+        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+        >>> attrs[1]
+        ('title', 'Foo')
+        >>> attrs[1:]
+        Attrs([('title', 'Foo')])
+        """
+        items = tuple.__getitem__(self, i)
+        if type(i) is slice:
+            return Attrs(items)
+        return items
+
     def __getslice__(self, i, j):
         """Return a slice of the attributes list.

@@ -413,12 +427,12 @@
         attributes joined together.

         >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
-        ('TEXT', u'#Foo', (None, -1, -1))
+        ('TEXT', '#Foo', (None, -1, -1))

         :return: a `TEXT` event
         :rtype: `tuple`
         """
-        return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
+        return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)


 class Markup(unicode):
@@ -515,7 +529,7 @@
         :see: `genshi.core.unescape`
         """
         if not self:
-            return u''
+            return ''
         return unicode(self).replace('&#34;', '"') \
                             .replace('&gt;', '>') \
                             .replace('&lt;', '<') \
@@ -644,7 +658,7 @@
         return self.uri == other

     def __getitem__(self, name):
-        return QName(self.uri + u'}' + name)
+        return QName(self.uri + '}' + name)
     __getattr__ = __getitem__

     def __hash__(self):
@@ -699,9 +713,9 @@
         if type(qname) is cls:
             return qname

-        parts = qname.lstrip(u'{').split(u'}', 1)
+        parts = qname.lstrip('{').split('}', 1)
         if len(parts) > 1:
-            self = unicode.__new__(cls, u'{%s' % qname)
+            self = unicode.__new__(cls, '{%s' % qname)
             self.namespace, self.localname = map(unicode, parts)
         else:
             self = unicode.__new__(cls, qname)
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -434,7 +434,7 @@
                 textpos = pos
         else:
             if textbuf:
-                yield TEXT, u''.join(textbuf), textpos
+                yield TEXT, ''.join(textbuf), textpos
                 del textbuf[:]
                 textpos = None
             if kind:
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -53,7 +53,7 @@
     else:
         _encode = lambda string: string
     if out is None:
-        return _encode(u''.join(list(iterator)))
+        return _encode(''.join(list(iterator)))
     for chunk in iterator:
         out.write(_encode(chunk))

@@ -229,7 +229,7 @@
                 for attr, value in attrib:
                     buf += [' ', attr, '="', escape(value), '"']
                 buf.append(kind is EMPTY and '/>' or '>')
-                yield _emit(kind, data, Markup(u''.join(buf)))
+                yield _emit(kind, data, Markup(''.join(buf)))

             elif kind is END:
                 yield _emit(kind, data, Markup('</%s>' % data))
@@ -252,7 +252,7 @@
                     standalone = standalone and 'yes' or 'no'
                     buf.append(' standalone="%s"' % standalone)
                 buf.append('?>\n')
-                yield Markup(u''.join(buf))
+                yield Markup(''.join(buf))
                 have_decl = True

             elif kind is DOCTYPE and not have_doctype:
@@ -265,7 +265,7 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
+                yield Markup(''.join(buf)) % filter(None, data)
                 have_doctype = True

             elif kind is START_CDATA:
@@ -345,9 +345,9 @@
                 for attr, value in attrib:
                     if attr in boolean_attrs:
                         value = attr
-                    elif attr == u'xml:lang' and u'lang' not in attrib:
+                    elif attr == 'xml:lang' and 'lang' not in attrib:
                         buf += [' lang="', escape(value), '"']
-                    elif attr == u'xml:space':
+                    elif attr == 'xml:space':
                         continue
                     buf += [' ', attr, '="', escape(value), '"']
                 if kind is EMPTY:
@@ -357,7 +357,7 @@
                         buf.append('></%s>' % tag)
                 else:
                     buf.append('>')
-                yield _emit(kind, data, Markup(u''.join(buf)))
+                yield _emit(kind, data, Markup(''.join(buf)))

             elif kind is END:
                 yield _emit(kind, data, Markup('</%s>' % data))
@@ -381,7 +381,7 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
+                yield Markup(''.join(buf)) % filter(None, data)
                 have_doctype = True

             elif kind is XML_DECL and not have_decl and not drop_xml_decl:
@@ -393,7 +393,7 @@
                     standalone = standalone and 'yes' or 'no'
                     buf.append(' standalone="%s"' % standalone)
                 buf.append('?>\n')
-                yield Markup(u''.join(buf))
+                yield Markup(''.join(buf))
                 have_decl = True

             elif kind is START_CDATA:
@@ -483,7 +483,7 @@
                         if value:
                             buf += [' ', attr]
                     elif ':' in attr:
-                        if attr == 'xml:lang' and u'lang' not in attrib:
+                        if attr == 'xml:lang' and 'lang' not in attrib:
                             buf += [' lang="', escape(value), '"']
                     elif attr != 'xmlns':
                         buf += [' ', attr, '="', escape(value), '"']
@@ -491,7 +491,7 @@
                 if kind is EMPTY:
                     if tag not in empty_elems:
                         buf.append('</%s>' % tag)
-                yield _emit(kind, data, Markup(u''.join(buf)))
+                yield _emit(kind, data, Markup(''.join(buf)))
                 if tag in noescape_elems:
                     noescape = True

@@ -518,7 +518,7 @@
                 if sysid:
                     buf.append(' "%s"')
                 buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
+                yield Markup(''.join(buf)) % filter(None, data)
                 have_doctype = True

             elif kind is PI:
@@ -608,7 +608,7 @@
     ... </doc>''')
     >>> for kind, data, pos in NamespaceFlattener()(xml):
     ...     print kind, repr(data)
-    START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
+    START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
     TEXT u'\n  '
     START (u'two:item', Attrs())
     END u'two:item'
@@ -655,7 +655,7 @@
         ns_attrs = []
         _push_ns_attr = ns_attrs.append
         def _make_ns_attr(prefix, uri):
-            return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
+            return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri

         def _gen_prefix():
             val = 0
@@ -678,9 +678,9 @@
                     if tagns in namespaces:
                         prefix = namespaces[tagns][-1]
                         if prefix:
-                            tagname = u'%s:%s' % (prefix, tagname)
+                            tagname = '%s:%s' % (prefix, tagname)
                     else:
-                        _push_ns_attr((u'xmlns', tagns))
+                        _push_ns_attr(('xmlns', tagns))
                         _push_ns('', tagns)

                 new_attrs = []
@@ -695,7 +695,7 @@
                         else:
                             prefix = namespaces[attrns][-1]
                         if prefix:
-                            attrname = u'%s:%s' % (prefix, attrname)
+                            attrname = '%s:%s' % (prefix, attrname)
                     new_attrs.append((attrname, value))

                 yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
@@ -707,7 +707,7 @@
                 if tagns:
                     prefix = namespaces[tagns][-1]
                     if prefix:
-                        tagname = u'%s:%s' % (prefix, tagname)
+                        tagname = '%s:%s' % (prefix, tagname)
                 yield _emit(kind, data, tagname, pos)

             elif kind is START_NS:
--- a/genshi/path.py
+++ b/genshi/path.py
@@ -937,7 +937,7 @@
 def as_string(value):
     value = as_scalar(value)
     if value is False:
-        return u''
+        return ''
     return unicode(value)

 def as_bool(value):
@@ -1101,7 +1101,7 @@
         for item in [expr(kind, data, pos, namespaces, variables)
                      for expr in self.exprs]:
             strings.append(as_string(item))
-        return u''.join(strings)
+        return ''.join(strings)
     def __repr__(self):
         return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])

@@ -1311,7 +1311,7 @@
         index = string1.find(string2)
         if index >= 0:
             return string1[index + len(string2):]
-        return u''
+        return ''
     def __repr__(self):
         return 'substring-after(%r, %r)' % (self.string1, self.string2)

@@ -1329,7 +1329,7 @@
         index = string1.find(string2)
         if index >= 0:
             return string1[:index]
-        return u''
+        return ''
     def __repr__(self):
         return 'substring-after(%r, %r)' % (self.string1, self.string2)
--- a/genshi/tests/core.py
+++ b/genshi/tests/core.py
@@ -175,6 +175,10 @@
         self.assertEquals("Attrs([('attr1', 'foo'), ('attr2', 'bar')])",
                           repr(unpickled))

+    def test_non_ascii(self):
+        attrs_tuple = Attrs([("attr1", u"föö"), ("attr2", u"bär")]).totuple()
+        self.assertEqual(u'fööbär', attrs_tuple[1])
+

 class NamespaceTestCase(unittest.TestCase):

@@ -212,6 +216,10 @@
         self.assertEquals('http://www.example.org/namespace', qname.namespace)
         self.assertEquals('elem', qname.localname)

+    def test_non_ascii(self):
+        qname = QName(u'http://www.example.org/namespace}gürü')
+        self.assertEqual(u'gürü', qname.localname)
+

 def suite():
     suite = unittest.TestSuite()
--- a/genshi/util.py
+++ b/genshi/util.py
@@ -158,6 +158,7 @@
             retval.append(item)
     return retval

+
 def plaintext(text, keeplinebreaks=True):
     """Returns the text as a `unicode` string with all entities and tags
     removed.
@@ -179,9 +180,10 @@
     """
     text = stripentities(striptags(text))
     if not keeplinebreaks:
-        text = text.replace(u'\n', u' ')
+        text = text.replace('\n', ' ')
     return text

+
 _STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
 def stripentities(text, keepxmlentities=False):
     """Return a copy of the given text with any character or numeric entities
@@ -213,16 +215,17 @@
         else: # character entity
             ref = match.group(2)
             if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
-                return u'&%s;' % ref
+                return '&%s;' % ref
             try:
                 return unichr(htmlentitydefs.name2codepoint[ref])
             except KeyError:
                 if keepxmlentities:
-                    return u'&amp;%s;' % ref
+                    return '&amp;%s;' % ref
                 else:
                     return ref
     return _STRIPENTITIES_RE.sub(_replace_entity, text)

+
 _STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
 def striptags(text):
     """Return a copy of the text with any XML/HTML tags removed.