# HG changeset patch # User cmlenz # Date 1257887223 0 # Node ID 04945cd67dad2ca1a1e8887cb29a76df580f1b20 # Parent 354eec9b2bbcbddb44f31814876b1b544515983f Remove usage of unicode literals in a couple of places where they were not strictly necessary. diff --git a/genshi/__init__.py b/genshi/__init__.py --- a/genshi/__init__.py +++ b/genshi/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2006-2008 Edgewall Software +# Copyright (C) 2006-2009 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which diff --git a/genshi/core.py b/genshi/core.py --- a/genshi/core.py +++ b/genshi/core.py @@ -93,7 +93,7 @@ >>> from genshi.input import HTML >>> html = HTML('''

Hello, world!

''') - >>> print html + >>> print(html)

Hello, world!

A filter such as the HTML sanitizer can be applied to that stream using @@ -101,7 +101,7 @@ >>> from genshi.filters import HTMLSanitizer >>> sanitizer = HTMLSanitizer() - >>> print html | sanitizer + >>> print(html | sanitizer)

Hello, world!

Filters can be any function that accepts and produces a stream (where @@ -112,14 +112,14 @@ ... if kind is TEXT: ... data = data.upper() ... yield kind, data, pos - >>> print html | sanitizer | uppercase + >>> print(html | sanitizer | uppercase)

HELLO, WORLD!

Serializers can also be used with this notation: >>> from genshi.output import TextSerializer >>> output = TextSerializer() - >>> print html | sanitizer | uppercase | output + >>> print(html | sanitizer | uppercase | output) HELLO, WORLD! Commonly, serializers should be used at the end of the "pipeline"; @@ -188,9 +188,9 @@ >>> from genshi import HTML >>> stream = HTML('foobar') - >>> print stream.select('elem') + >>> print(stream.select('elem')) foobar - >>> print stream.select('elem/text()') + >>> print(stream.select('elem/text()')) foobar Note that the outermost element of the stream becomes the *context @@ -198,13 +198,13 @@ not match anything in the example above, because it only tests against child elements of the outermost element: - >>> print stream.select('doc') + >>> print(stream.select('doc')) You can use the "." expression to match the context node itself (although that usually makes little sense): - >>> print stream.select('.') + >>> print(stream.select('.')) foobar :param path: a string containing the XPath expression @@ -354,6 +354,20 @@ if attr == name: return True + def __getitem__(self, i): + """Return an item or slice of the attributes list. + + >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) + >>> attrs[1] + ('title', 'Foo') + >>> attrs[1:] + Attrs([('title', 'Foo')]) + """ + items = tuple.__getitem__(self, i) + if type(i) is slice: + return Attrs(items) + return items + def __getslice__(self, i, j): """Return a slice of the attributes list. @@ -413,12 +427,12 @@ attributes joined together. >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() - ('TEXT', u'#Foo', (None, -1, -1)) + ('TEXT', '#Foo', (None, -1, -1)) :return: a `TEXT` event :rtype: `tuple` """ - return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) + return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) class Markup(unicode): @@ -515,7 +529,7 @@ :see: `genshi.core.unescape` """ if not self: - return u'' + return '' return unicode(self).replace('"', '"') \ .replace('>', '>') \ .replace('<', '<') \ @@ -644,7 +658,7 @@ return self.uri == other def __getitem__(self, name): - return QName(self.uri + u'}' + name) + return QName(self.uri + '}' + name) __getattr__ = __getitem__ def __hash__(self): @@ -699,9 +713,9 @@ if type(qname) is cls: return qname - parts = qname.lstrip(u'{').split(u'}', 1) + parts = qname.lstrip('{').split('}', 1) if len(parts) > 1: - self = unicode.__new__(cls, u'{%s' % qname) + self = unicode.__new__(cls, '{%s' % qname) self.namespace, self.localname = map(unicode, parts) else: self = unicode.__new__(cls, qname) diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -434,7 +434,7 @@ textpos = pos else: if textbuf: - yield TEXT, u''.join(textbuf), textpos + yield TEXT, ''.join(textbuf), textpos del textbuf[:] textpos = None if kind: diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -53,7 +53,7 @@ else: _encode = lambda string: string if out is None: - return _encode(u''.join(list(iterator))) + return _encode(''.join(list(iterator))) for chunk in iterator: out.write(_encode(chunk)) @@ -229,7 +229,7 @@ for attr, value in attrib: buf += [' ', attr, '="', escape(value), '"'] buf.append(kind is EMPTY and '/>' or '>') - yield _emit(kind, data, Markup(u''.join(buf))) + yield _emit(kind, data, Markup(''.join(buf))) elif kind is END: yield _emit(kind, data, Markup('' % data)) @@ -252,7 +252,7 @@ standalone = standalone and 'yes' or 'no' buf.append(' standalone="%s"' % standalone) buf.append('?>\n') - yield Markup(u''.join(buf)) + yield Markup(''.join(buf)) have_decl = True elif kind is DOCTYPE and not have_doctype: @@ -265,7 +265,7 @@ if sysid: buf.append(' "%s"') buf.append('>\n') - yield Markup(u''.join(buf)) % filter(None, data) + yield Markup(''.join(buf)) % filter(None, data) have_doctype = True elif kind is START_CDATA: @@ -345,9 +345,9 @@ for attr, value in attrib: if attr in boolean_attrs: value = attr - elif attr == u'xml:lang' and u'lang' not in attrib: + elif attr == 'xml:lang' and 'lang' not in attrib: buf += [' lang="', escape(value), '"'] - elif attr == u'xml:space': + elif attr == 'xml:space': continue buf += [' ', attr, '="', escape(value), '"'] if kind is EMPTY: @@ -357,7 +357,7 @@ buf.append('>' % tag) else: buf.append('>') - yield _emit(kind, data, Markup(u''.join(buf))) + yield _emit(kind, data, Markup(''.join(buf))) elif kind is END: yield _emit(kind, data, Markup('' % data)) @@ -381,7 +381,7 @@ if sysid: buf.append(' "%s"') buf.append('>\n') - yield Markup(u''.join(buf)) % filter(None, data) + yield Markup(''.join(buf)) % filter(None, data) have_doctype = True elif kind is XML_DECL and not have_decl and not drop_xml_decl: @@ -393,7 +393,7 @@ standalone = standalone and 'yes' or 'no' buf.append(' standalone="%s"' % standalone) buf.append('?>\n') - yield Markup(u''.join(buf)) + yield Markup(''.join(buf)) have_decl = True elif kind is START_CDATA: @@ -483,7 +483,7 @@ if value: buf += [' ', attr] elif ':' in attr: - if attr == 'xml:lang' and u'lang' not in attrib: + if attr == 'xml:lang' and 'lang' not in attrib: buf += [' lang="', escape(value), '"'] elif attr != 'xmlns': buf += [' ', attr, '="', escape(value), '"'] @@ -491,7 +491,7 @@ if kind is EMPTY: if tag not in empty_elems: buf.append('' % tag) - yield _emit(kind, data, Markup(u''.join(buf))) + yield _emit(kind, data, Markup(''.join(buf))) if tag in noescape_elems: noescape = True @@ -518,7 +518,7 @@ if sysid: buf.append(' "%s"') buf.append('>\n') - yield Markup(u''.join(buf)) % filter(None, data) + yield Markup(''.join(buf)) % filter(None, data) have_doctype = True elif kind is PI: @@ -608,7 +608,7 @@ ... ''') >>> for kind, data, pos in NamespaceFlattener()(xml): ... print kind, repr(data) - START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) + START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) TEXT u'\n ' START (u'two:item', Attrs()) END u'two:item' @@ -655,7 +655,7 @@ ns_attrs = [] _push_ns_attr = ns_attrs.append def _make_ns_attr(prefix, uri): - return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri + return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri def _gen_prefix(): val = 0 @@ -678,9 +678,9 @@ if tagns in namespaces: prefix = namespaces[tagns][-1] if prefix: - tagname = u'%s:%s' % (prefix, tagname) + tagname = '%s:%s' % (prefix, tagname) else: - _push_ns_attr((u'xmlns', tagns)) + _push_ns_attr(('xmlns', tagns)) _push_ns('', tagns) new_attrs = [] @@ -695,7 +695,7 @@ else: prefix = namespaces[attrns][-1] if prefix: - attrname = u'%s:%s' % (prefix, attrname) + attrname = '%s:%s' % (prefix, attrname) new_attrs.append((attrname, value)) yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos) @@ -707,7 +707,7 @@ if tagns: prefix = namespaces[tagns][-1] if prefix: - tagname = u'%s:%s' % (prefix, tagname) + tagname = '%s:%s' % (prefix, tagname) yield _emit(kind, data, tagname, pos) elif kind is START_NS: diff --git a/genshi/path.py b/genshi/path.py --- a/genshi/path.py +++ b/genshi/path.py @@ -937,7 +937,7 @@ def as_string(value): value = as_scalar(value) if value is False: - return u'' + return '' return unicode(value) def as_bool(value): @@ -1101,7 +1101,7 @@ for item in [expr(kind, data, pos, namespaces, variables) for expr in self.exprs]: strings.append(as_string(item)) - return u''.join(strings) + return ''.join(strings) def __repr__(self): return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs]) @@ -1311,7 +1311,7 @@ index = string1.find(string2) if index >= 0: return string1[index + len(string2):] - return u'' + return '' def __repr__(self): return 'substring-after(%r, %r)' % (self.string1, self.string2) @@ -1329,7 +1329,7 @@ index = string1.find(string2) if index >= 0: return string1[:index] - return u'' + return '' def __repr__(self): return 'substring-after(%r, %r)' % (self.string1, self.string2) diff --git a/genshi/tests/core.py b/genshi/tests/core.py --- a/genshi/tests/core.py +++ b/genshi/tests/core.py @@ -175,6 +175,10 @@ self.assertEquals("Attrs([('attr1', 'foo'), ('attr2', 'bar')])", repr(unpickled)) + def test_non_ascii(self): + attrs_tuple = Attrs([("attr1", u"föö"), ("attr2", u"bär")]).totuple() + self.assertEqual(u'fööbär', attrs_tuple[1]) + class NamespaceTestCase(unittest.TestCase): @@ -212,6 +216,10 @@ self.assertEquals('http://www.example.org/namespace', qname.namespace) self.assertEquals('elem', qname.localname) + def test_non_ascii(self): + qname = QName(u'http://www.example.org/namespace}gürü') + self.assertEqual(u'gürü', qname.localname) + def suite(): suite = unittest.TestSuite() diff --git a/genshi/util.py b/genshi/util.py --- a/genshi/util.py +++ b/genshi/util.py @@ -158,6 +158,7 @@ retval.append(item) return retval + def plaintext(text, keeplinebreaks=True): """Returns the text as a `unicode` string with all entities and tags removed. @@ -179,9 +180,10 @@ """ text = stripentities(striptags(text)) if not keeplinebreaks: - text = text.replace(u'\n', u' ') + text = text.replace('\n', ' ') return text + _STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') def stripentities(text, keepxmlentities=False): """Return a copy of the given text with any character or numeric entities @@ -213,16 +215,17 @@ else: # character entity ref = match.group(2) if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): - return u'&%s;' % ref + return '&%s;' % ref try: return unichr(htmlentitydefs.name2codepoint[ref]) except KeyError: if keepxmlentities: - return u'&%s;' % ref + return '&%s;' % ref else: return ref return _STRIPENTITIES_RE.sub(_replace_entity, text) + _STRIPTAGS_RE = re.compile(r'(|<[^>]*>)') def striptags(text): """Return a copy of the text with any XML/HTML tags removed.