genshi/genshi-test: genshi/core.py comparison

comparison genshi/core.py @ 902:09cc3627654c experimental-inline

Sync `experimental/inline` branch with [source:trunk@1126].

author	cmlenz
date	Fri, 23 Apr 2010 21:08:26 +0000
parents	de82830f8816
children	bb813ef5fe25

comparison

equal deleted inserted replaced

-:de82830f8816
+:09cc3627654c
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2006-2008 Edgewall Software
+# Copyright (C) 2006-2009 Edgewall Software
 # All rights reserved.
 #
 # This software is licensed as described in the file COPYING, which
 # you should have received as part of this distribution. The terms
 # are also available at http://genshi.edgewall.org/wiki/License.
 # history and logs, available at http://genshi.edgewall.org/log/.
 """Core classes for markup processing."""
 try:
+reduce # builtin in Python < 3
+except NameError:
 from functools import reduce
-except ImportError:
-pass # builtin in Python <= 2.5
 from itertools import chain
 import operator
-from genshi.util import plaintext, stripentities, striptags
+from genshi.util import plaintext, stripentities, striptags, stringrepr
 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
 'QName']
 __docformat__ = 'restructuredtext en'
 Assume the following stream produced by the `HTML` function:
 >>> from genshi.input import HTML
 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
->>> print html
+>>> print(html)
 <p onclick="alert('Whoa')">Hello, world!</p>
 A filter such as the HTML sanitizer can be applied to that stream using
 the pipe notation as follows:
 >>> from genshi.filters import HTMLSanitizer
 >>> sanitizer = HTMLSanitizer()
->>> print html | sanitizer
+>>> print(html | sanitizer)
 <p>Hello, world!</p>
 Filters can be any function that accepts and produces a stream (where
 a stream is anything that iterates over events):
 >>> def uppercase(stream):
 ...     for kind, data, pos in stream:
 ...         if kind is TEXT:
 ...             data = data.upper()
 ...         yield kind, data, pos
->>> print html | sanitizer | uppercase
+>>> print(html | sanitizer | uppercase)
 <p>HELLO, WORLD!</p>
 Serializers can also be used with this notation:
 >>> from genshi.output import TextSerializer
 >>> output = TextSerializer()
->>> print html | sanitizer | uppercase | output
+>>> print(html | sanitizer | uppercase | output)
 HELLO, WORLD!
 Commonly, serializers should be used at the end of the "pipeline";
 using them somewhere in the middle may produce unexpected results.
 """Return a new stream that contains the events matching the given
 XPath expression.
 >>> from genshi import HTML
 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
->>> print stream.select('elem')
+>>> print(stream.select('elem'))
 <elem>foo</elem><elem>bar</elem>
->>> print stream.select('elem/text()')
+>>> print(stream.select('elem/text()'))
 foobar
 Note that the outermost element of the stream becomes the *context
 node* for the XPath test. That means that the expression "doc" would
 not match anything in the example above, because it only tests against
 child elements of the outermost element:
->>> print stream.select('doc')
+>>> print(stream.select('doc'))
 <BLANKLINE>
 You can use the "." expression to match the context node itself
 (although that usually makes little sense):
->>> print stream.select('.')
+>>> print(stream.select('.'))
 <doc><elem>foo</elem><elem>bar</elem></doc>
 :param path: a string containing the XPath expression
 :param namespaces: mapping of namespace prefixes used in the path
 :param variables: mapping of variable names to values
 START_CDATA = Stream.START_CDATA
 END_CDATA = Stream.END_CDATA
 PI = Stream.PI
 COMMENT = Stream.COMMENT
 def _ensure(stream):
 """Ensure that every item on the stream is actually a markup event."""
 stream = iter(stream)
 event = stream.next()
 """
 for attr, _ in self:
 if attr == name:
 return True
+def __getitem__(self, i):
+"""Return an item or slice of the attributes list.
+>>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+>>> attrs[1]
+('title', 'Foo')
+>>> attrs[1:]
+Attrs([('title', 'Foo')])
+"""
+items = tuple.__getitem__(self, i)
+if type(i) is slice:
+return Attrs(items)
+return items
 def __getslice__(self, i, j):
 """Return a slice of the attributes list.
 >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
 >>> attrs[1:]
 The returned event is a `TEXT` event, the data is the value of all
 attributes joined together.
 >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
-('TEXT', u'#Foo', (None, -1, -1))
+('TEXT', '#Foo', (None, -1, -1))
 :return: a `TEXT` event
 :rtype: `tuple`
 """
-return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
+return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
 class Markup(unicode):
 """Marks a string as being safe for inclusion in HTML/XML output without
 needing to be escaped.
 """
 __slots__ = []
 def __add__(self, other):
-return Markup(unicode(self) + unicode(escape(other)))
+return Markup(unicode.__add__(self, escape(other)))
 def __radd__(self, other):
-return Markup(unicode(escape(other)) + unicode(self))
+return Markup(unicode.__add__(escape(other), self))
 def __mod__(self, args):
 if isinstance(args, dict):
 args = dict(zip(args.keys(), map(escape, args.values())))
 elif isinstance(args, (list, tuple)):
 else:
 args = escape(args)
 return Markup(unicode.__mod__(self, args))
 def __mul__(self, num):
-return Markup(unicode(self) * num)
+return Markup(unicode.__mul__(self, num))
+__rmul__ = __mul__
-def __rmul__(self, num):
-return Markup(num * unicode(self))
 def __repr__(self):
-return '<%s %r>' % (self.__class__.__name__, unicode(self))
+return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
 def join(self, seq, escape_quotes=True):
 """Return a `Markup` object which is the concatenation of the strings
 in the given sequence, where this `Markup` object is the separator
 between the joined elements.
 should be escaped
 :return: the joined `Markup` object
 :rtype: `Markup`
 :see: `escape`
 """
-return Markup(unicode(self).join([escape(item, quotes=escape_quotes)
+return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
 for item in seq]))
 @classmethod
 def escape(cls, text, quotes=True):
 """Create a Markup instance from a string and escape special characters
 if type(text) is cls:
 return text
 if hasattr(text, '__html__'):
 return Markup(text.__html__())
-text = unicode(text).replace('&', '&amp;') \
+text = text.replace('&', '&amp;') \
 .replace('<', '&lt;') \
 .replace('>', '&gt;')
 if quotes:
 text = text.replace('"', '&#34;')
 return cls(text)
 def unescape(self):
 :return: the unescaped string
 :rtype: `unicode`
 :see: `genshi.core.unescape`
 """
 if not self:
-return u''
+return ''
 return unicode(self).replace('&#34;', '"') \
 .replace('&gt;', '>') \
 .replace('&lt;', '<') \
 .replace('&amp;', '&')
 try:
 from genshi._speedups import Markup
 except ImportError:
 pass # just use the Python implementation
 escape = Markup.escape
 def unescape(text):
 """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
 >>> unescape(Markup('1 &lt; 2'))
 A `Namespace` object is instantiated with the namespace URI.
 >>> html = Namespace('http://www.w3.org/1999/xhtml')
 >>> html
-<Namespace "http://www.w3.org/1999/xhtml">
+Namespace('http://www.w3.org/1999/xhtml')
 >>> html.uri
 u'http://www.w3.org/1999/xhtml'
 The `Namespace` object can than be used to generate `QName` objects with
 that namespace:
 >>> html.body
-QName(u'http://www.w3.org/1999/xhtml}body')
+QName('http://www.w3.org/1999/xhtml}body')
 >>> html.body.localname
 u'body'
 >>> html.body.namespace
 u'http://www.w3.org/1999/xhtml'
 The same works using item access notation, which is useful for element or
 attribute names that are not valid Python identifiers:
 >>> html['body']
-QName(u'http://www.w3.org/1999/xhtml}body')
+QName('http://www.w3.org/1999/xhtml}body')
 A `Namespace` object can also be used to test whether a specific `QName`
 belongs to that namespace using the ``in`` operator:
 >>> qname = html.body
 if isinstance(other, Namespace):
 return self.uri == other.uri
 return self.uri == other
 def __getitem__(self, name):
-return QName(self.uri + u'}' + name)
+return QName(self.uri + '}' + name)
 __getattr__ = __getitem__
 def __hash__(self):
 return hash(self.uri)
 def __repr__(self):
-return '<Namespace "%s">' % self.uri
+return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
 def __str__(self):
 return self.uri.encode('utf-8')
 def __unicode__(self):
 namespace URI can be obtained through the additional `namespace` attribute,
 while the local name can be accessed through the `localname` attribute.
 >>> qname = QName('foo')
 >>> qname
-QName(u'foo')
+QName('foo')
 >>> qname.localname
 u'foo'
 >>> qname.namespace
 >>> qname = QName('http://www.w3.org/1999/xhtml}body')
 >>> qname
-QName(u'http://www.w3.org/1999/xhtml}body')
+QName('http://www.w3.org/1999/xhtml}body')
 >>> qname.localname
 u'body'
 >>> qname.namespace
 u'http://www.w3.org/1999/xhtml'
 """
 brace is optional
 """
 if type(qname) is cls:
 return qname
-parts = qname.lstrip(u'{').split(u'}', 1)
+parts = qname.lstrip('{').split('}', 1)
 if len(parts) > 1:
-self = unicode.__new__(cls, u'{%s' % qname)
+self = unicode.__new__(cls, '{%s' % qname)
 self.namespace, self.localname = map(unicode, parts)
 else:
 self = unicode.__new__(cls, qname)
 self.namespace, self.localname = None, unicode(qname)
 return self
 def __getnewargs__(self):
 return (self.lstrip('{'),)
 def __repr__(self):
-return 'QName(%s)' % unicode.__repr__(self.lstrip('{'))
+return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))

Mercurial > genshi > genshi-test

comparison genshi/core.py @ 902:09cc3627654c experimental-inline