# HG changeset patch
# User cmlenz
# Date 1257887223 0
# Node ID 07f4339fecb034f915539c04602455d7e7477859
# Parent e04c544a2c412a7dab1db6b885cf59b4c7ee4f23
Remove usage of unicode literals in a couple of places where they were not strictly necessary.
diff --git a/genshi/__init__.py b/genshi/__init__.py
--- a/genshi/__init__.py
+++ b/genshi/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
-# Copyright (C) 2006-2008 Edgewall Software
+# Copyright (C) 2006-2009 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
diff --git a/genshi/core.py b/genshi/core.py
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -93,7 +93,7 @@
>>> from genshi.input import HTML
>>> html = HTML('''
Hello, world!
''')
- >>> print html
+ >>> print(html)
Hello, world!
A filter such as the HTML sanitizer can be applied to that stream using
@@ -101,7 +101,7 @@
>>> from genshi.filters import HTMLSanitizer
>>> sanitizer = HTMLSanitizer()
- >>> print html | sanitizer
+ >>> print(html | sanitizer)
Hello, world!
Filters can be any function that accepts and produces a stream (where
@@ -112,14 +112,14 @@
... if kind is TEXT:
... data = data.upper()
... yield kind, data, pos
- >>> print html | sanitizer | uppercase
+ >>> print(html | sanitizer | uppercase)
HELLO, WORLD!
Serializers can also be used with this notation:
>>> from genshi.output import TextSerializer
>>> output = TextSerializer()
- >>> print html | sanitizer | uppercase | output
+ >>> print(html | sanitizer | uppercase | output)
HELLO, WORLD!
Commonly, serializers should be used at the end of the "pipeline";
@@ -188,9 +188,9 @@
>>> from genshi import HTML
>>> stream = HTML('foobar')
- >>> print stream.select('elem')
+ >>> print(stream.select('elem'))
foobar
- >>> print stream.select('elem/text()')
+ >>> print(stream.select('elem/text()'))
foobar
Note that the outermost element of the stream becomes the *context
@@ -198,13 +198,13 @@
not match anything in the example above, because it only tests against
child elements of the outermost element:
- >>> print stream.select('doc')
+ >>> print(stream.select('doc'))
You can use the "." expression to match the context node itself
(although that usually makes little sense):
- >>> print stream.select('.')
+ >>> print(stream.select('.'))
foobar
:param path: a string containing the XPath expression
@@ -354,6 +354,20 @@
if attr == name:
return True
+ def __getitem__(self, i):
+ """Return an item or slice of the attributes list.
+
+ >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+ >>> attrs[1]
+ ('title', 'Foo')
+ >>> attrs[1:]
+ Attrs([('title', 'Foo')])
+ """
+ items = tuple.__getitem__(self, i)
+ if type(i) is slice:
+ return Attrs(items)
+ return items
+
def __getslice__(self, i, j):
"""Return a slice of the attributes list.
@@ -413,12 +427,12 @@
attributes joined together.
>>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
- ('TEXT', u'#Foo', (None, -1, -1))
+ ('TEXT', '#Foo', (None, -1, -1))
:return: a `TEXT` event
:rtype: `tuple`
"""
- return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
+ return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
class Markup(unicode):
@@ -515,7 +529,7 @@
:see: `genshi.core.unescape`
"""
if not self:
- return u''
+ return ''
return unicode(self).replace('"', '"') \
.replace('>', '>') \
.replace('<', '<') \
@@ -644,7 +658,7 @@
return self.uri == other
def __getitem__(self, name):
- return QName(self.uri + u'}' + name)
+ return QName(self.uri + '}' + name)
__getattr__ = __getitem__
def __hash__(self):
@@ -699,9 +713,9 @@
if type(qname) is cls:
return qname
- parts = qname.lstrip(u'{').split(u'}', 1)
+ parts = qname.lstrip('{').split('}', 1)
if len(parts) > 1:
- self = unicode.__new__(cls, u'{%s' % qname)
+ self = unicode.__new__(cls, '{%s' % qname)
self.namespace, self.localname = map(unicode, parts)
else:
self = unicode.__new__(cls, qname)
diff --git a/genshi/input.py b/genshi/input.py
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -434,7 +434,7 @@
textpos = pos
else:
if textbuf:
- yield TEXT, u''.join(textbuf), textpos
+ yield TEXT, ''.join(textbuf), textpos
del textbuf[:]
textpos = None
if kind:
diff --git a/genshi/output.py b/genshi/output.py
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -53,7 +53,7 @@
else:
_encode = lambda string: string
if out is None:
- return _encode(u''.join(list(iterator)))
+ return _encode(''.join(list(iterator)))
for chunk in iterator:
out.write(_encode(chunk))
@@ -229,7 +229,7 @@
for attr, value in attrib:
buf += [' ', attr, '="', escape(value), '"']
buf.append(kind is EMPTY and '/>' or '>')
- yield _emit(kind, data, Markup(u''.join(buf)))
+ yield _emit(kind, data, Markup(''.join(buf)))
elif kind is END:
yield _emit(kind, data, Markup('%s>' % data))
@@ -252,7 +252,7 @@
standalone = standalone and 'yes' or 'no'
buf.append(' standalone="%s"' % standalone)
buf.append('?>\n')
- yield Markup(u''.join(buf))
+ yield Markup(''.join(buf))
have_decl = True
elif kind is DOCTYPE and not have_doctype:
@@ -265,7 +265,7 @@
if sysid:
buf.append(' "%s"')
buf.append('>\n')
- yield Markup(u''.join(buf)) % filter(None, data)
+ yield Markup(''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is START_CDATA:
@@ -345,9 +345,9 @@
for attr, value in attrib:
if attr in boolean_attrs:
value = attr
- elif attr == u'xml:lang' and u'lang' not in attrib:
+ elif attr == 'xml:lang' and 'lang' not in attrib:
buf += [' lang="', escape(value), '"']
- elif attr == u'xml:space':
+ elif attr == 'xml:space':
continue
buf += [' ', attr, '="', escape(value), '"']
if kind is EMPTY:
@@ -357,7 +357,7 @@
buf.append('>%s>' % tag)
else:
buf.append('>')
- yield _emit(kind, data, Markup(u''.join(buf)))
+ yield _emit(kind, data, Markup(''.join(buf)))
elif kind is END:
yield _emit(kind, data, Markup('%s>' % data))
@@ -381,7 +381,7 @@
if sysid:
buf.append(' "%s"')
buf.append('>\n')
- yield Markup(u''.join(buf)) % filter(None, data)
+ yield Markup(''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is XML_DECL and not have_decl and not drop_xml_decl:
@@ -393,7 +393,7 @@
standalone = standalone and 'yes' or 'no'
buf.append(' standalone="%s"' % standalone)
buf.append('?>\n')
- yield Markup(u''.join(buf))
+ yield Markup(''.join(buf))
have_decl = True
elif kind is START_CDATA:
@@ -483,7 +483,7 @@
if value:
buf += [' ', attr]
elif ':' in attr:
- if attr == 'xml:lang' and u'lang' not in attrib:
+ if attr == 'xml:lang' and 'lang' not in attrib:
buf += [' lang="', escape(value), '"']
elif attr != 'xmlns':
buf += [' ', attr, '="', escape(value), '"']
@@ -491,7 +491,7 @@
if kind is EMPTY:
if tag not in empty_elems:
buf.append('%s>' % tag)
- yield _emit(kind, data, Markup(u''.join(buf)))
+ yield _emit(kind, data, Markup(''.join(buf)))
if tag in noescape_elems:
noescape = True
@@ -518,7 +518,7 @@
if sysid:
buf.append(' "%s"')
buf.append('>\n')
- yield Markup(u''.join(buf)) % filter(None, data)
+ yield Markup(''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is PI:
@@ -608,7 +608,7 @@
... ''')
>>> for kind, data, pos in NamespaceFlattener()(xml):
... print kind, repr(data)
- START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
+ START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
TEXT u'\n '
START (u'two:item', Attrs())
END u'two:item'
@@ -655,7 +655,7 @@
ns_attrs = []
_push_ns_attr = ns_attrs.append
def _make_ns_attr(prefix, uri):
- return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
+ return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
def _gen_prefix():
val = 0
@@ -678,9 +678,9 @@
if tagns in namespaces:
prefix = namespaces[tagns][-1]
if prefix:
- tagname = u'%s:%s' % (prefix, tagname)
+ tagname = '%s:%s' % (prefix, tagname)
else:
- _push_ns_attr((u'xmlns', tagns))
+ _push_ns_attr(('xmlns', tagns))
_push_ns('', tagns)
new_attrs = []
@@ -695,7 +695,7 @@
else:
prefix = namespaces[attrns][-1]
if prefix:
- attrname = u'%s:%s' % (prefix, attrname)
+ attrname = '%s:%s' % (prefix, attrname)
new_attrs.append((attrname, value))
yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
@@ -707,7 +707,7 @@
if tagns:
prefix = namespaces[tagns][-1]
if prefix:
- tagname = u'%s:%s' % (prefix, tagname)
+ tagname = '%s:%s' % (prefix, tagname)
yield _emit(kind, data, tagname, pos)
elif kind is START_NS:
diff --git a/genshi/path.py b/genshi/path.py
--- a/genshi/path.py
+++ b/genshi/path.py
@@ -937,7 +937,7 @@
def as_string(value):
value = as_scalar(value)
if value is False:
- return u''
+ return ''
return unicode(value)
def as_bool(value):
@@ -1101,7 +1101,7 @@
for item in [expr(kind, data, pos, namespaces, variables)
for expr in self.exprs]:
strings.append(as_string(item))
- return u''.join(strings)
+ return ''.join(strings)
def __repr__(self):
return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])
@@ -1311,7 +1311,7 @@
index = string1.find(string2)
if index >= 0:
return string1[index + len(string2):]
- return u''
+ return ''
def __repr__(self):
return 'substring-after(%r, %r)' % (self.string1, self.string2)
@@ -1329,7 +1329,7 @@
index = string1.find(string2)
if index >= 0:
return string1[:index]
- return u''
+ return ''
def __repr__(self):
return 'substring-after(%r, %r)' % (self.string1, self.string2)
diff --git a/genshi/tests/core.py b/genshi/tests/core.py
--- a/genshi/tests/core.py
+++ b/genshi/tests/core.py
@@ -175,6 +175,10 @@
self.assertEquals("Attrs([('attr1', 'foo'), ('attr2', 'bar')])",
repr(unpickled))
+ def test_non_ascii(self):
+ attrs_tuple = Attrs([("attr1", u"föö"), ("attr2", u"bär")]).totuple()
+ self.assertEqual(u'fööbär', attrs_tuple[1])
+
class NamespaceTestCase(unittest.TestCase):
@@ -212,6 +216,10 @@
self.assertEquals('http://www.example.org/namespace', qname.namespace)
self.assertEquals('elem', qname.localname)
+ def test_non_ascii(self):
+ qname = QName(u'http://www.example.org/namespace}gürü')
+ self.assertEqual(u'gürü', qname.localname)
+
def suite():
suite = unittest.TestSuite()
diff --git a/genshi/util.py b/genshi/util.py
--- a/genshi/util.py
+++ b/genshi/util.py
@@ -158,6 +158,7 @@
retval.append(item)
return retval
+
def plaintext(text, keeplinebreaks=True):
"""Returns the text as a `unicode` string with all entities and tags
removed.
@@ -179,9 +180,10 @@
"""
text = stripentities(striptags(text))
if not keeplinebreaks:
- text = text.replace(u'\n', u' ')
+ text = text.replace('\n', ' ')
return text
+
_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
def stripentities(text, keepxmlentities=False):
"""Return a copy of the given text with any character or numeric entities
@@ -213,16 +215,17 @@
else: # character entity
ref = match.group(2)
if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
- return u'&%s;' % ref
+ return '&%s;' % ref
try:
return unichr(htmlentitydefs.name2codepoint[ref])
except KeyError:
if keepxmlentities:
- return u'&%s;' % ref
+ return '&%s;' % ref
else:
return ref
return _STRIPENTITIES_RE.sub(_replace_entity, text)
+
_STRIPTAGS_RE = re.compile(r'(|<[^>]*>)')
def striptags(text):
"""Return a copy of the text with any XML/HTML tags removed.