comparison genshi/core.py @ 902:09cc3627654c experimental-inline

Sync `experimental/inline` branch with [source:trunk@1126].
author cmlenz
date Fri, 23 Apr 2010 21:08:26 +0000
parents de82830f8816
children bb813ef5fe25
comparison
equal deleted inserted replaced
830:de82830f8816 902:09cc3627654c
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 # 2 #
3 # Copyright (C) 2006-2008 Edgewall Software 3 # Copyright (C) 2006-2009 Edgewall Software
4 # All rights reserved. 4 # All rights reserved.
5 # 5 #
6 # This software is licensed as described in the file COPYING, which 6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms 7 # you should have received as part of this distribution. The terms
8 # are also available at http://genshi.edgewall.org/wiki/License. 8 # are also available at http://genshi.edgewall.org/wiki/License.
12 # history and logs, available at http://genshi.edgewall.org/log/. 12 # history and logs, available at http://genshi.edgewall.org/log/.
13 13
14 """Core classes for markup processing.""" 14 """Core classes for markup processing."""
15 15
16 try: 16 try:
17 reduce # builtin in Python < 3
18 except NameError:
17 from functools import reduce 19 from functools import reduce
18 except ImportError:
19 pass # builtin in Python <= 2.5
20 from itertools import chain 20 from itertools import chain
21 import operator 21 import operator
22 22
23 from genshi.util import plaintext, stripentities, striptags 23 from genshi.util import plaintext, stripentities, striptags, stringrepr
24 24
25 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', 25 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
26 'QName'] 26 'QName']
27 __docformat__ = 'restructuredtext en' 27 __docformat__ = 'restructuredtext en'
28 28
91 91
92 Assume the following stream produced by the `HTML` function: 92 Assume the following stream produced by the `HTML` function:
93 93
94 >>> from genshi.input import HTML 94 >>> from genshi.input import HTML
95 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''') 95 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
96 >>> print html 96 >>> print(html)
97 <p onclick="alert('Whoa')">Hello, world!</p> 97 <p onclick="alert('Whoa')">Hello, world!</p>
98 98
99 A filter such as the HTML sanitizer can be applied to that stream using 99 A filter such as the HTML sanitizer can be applied to that stream using
100 the pipe notation as follows: 100 the pipe notation as follows:
101 101
102 >>> from genshi.filters import HTMLSanitizer 102 >>> from genshi.filters import HTMLSanitizer
103 >>> sanitizer = HTMLSanitizer() 103 >>> sanitizer = HTMLSanitizer()
104 >>> print html | sanitizer 104 >>> print(html | sanitizer)
105 <p>Hello, world!</p> 105 <p>Hello, world!</p>
106 106
107 Filters can be any function that accepts and produces a stream (where 107 Filters can be any function that accepts and produces a stream (where
108 a stream is anything that iterates over events): 108 a stream is anything that iterates over events):
109 109
110 >>> def uppercase(stream): 110 >>> def uppercase(stream):
111 ... for kind, data, pos in stream: 111 ... for kind, data, pos in stream:
112 ... if kind is TEXT: 112 ... if kind is TEXT:
113 ... data = data.upper() 113 ... data = data.upper()
114 ... yield kind, data, pos 114 ... yield kind, data, pos
115 >>> print html | sanitizer | uppercase 115 >>> print(html | sanitizer | uppercase)
116 <p>HELLO, WORLD!</p> 116 <p>HELLO, WORLD!</p>
117 117
118 Serializers can also be used with this notation: 118 Serializers can also be used with this notation:
119 119
120 >>> from genshi.output import TextSerializer 120 >>> from genshi.output import TextSerializer
121 >>> output = TextSerializer() 121 >>> output = TextSerializer()
122 >>> print html | sanitizer | uppercase | output 122 >>> print(html | sanitizer | uppercase | output)
123 HELLO, WORLD! 123 HELLO, WORLD!
124 124
125 Commonly, serializers should be used at the end of the "pipeline"; 125 Commonly, serializers should be used at the end of the "pipeline";
126 using them somewhere in the middle may produce unexpected results. 126 using them somewhere in the middle may produce unexpected results.
127 127
186 """Return a new stream that contains the events matching the given 186 """Return a new stream that contains the events matching the given
187 XPath expression. 187 XPath expression.
188 188
189 >>> from genshi import HTML 189 >>> from genshi import HTML
190 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>') 190 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
191 >>> print stream.select('elem') 191 >>> print(stream.select('elem'))
192 <elem>foo</elem><elem>bar</elem> 192 <elem>foo</elem><elem>bar</elem>
193 >>> print stream.select('elem/text()') 193 >>> print(stream.select('elem/text()'))
194 foobar 194 foobar
195 195
196 Note that the outermost element of the stream becomes the *context 196 Note that the outermost element of the stream becomes the *context
197 node* for the XPath test. That means that the expression "doc" would 197 node* for the XPath test. That means that the expression "doc" would
198 not match anything in the example above, because it only tests against 198 not match anything in the example above, because it only tests against
199 child elements of the outermost element: 199 child elements of the outermost element:
200 200
201 >>> print stream.select('doc') 201 >>> print(stream.select('doc'))
202 <BLANKLINE> 202 <BLANKLINE>
203 203
204 You can use the "." expression to match the context node itself 204 You can use the "." expression to match the context node itself
205 (although that usually makes little sense): 205 (although that usually makes little sense):
206 206
207 >>> print stream.select('.') 207 >>> print(stream.select('.'))
208 <doc><elem>foo</elem><elem>bar</elem></doc> 208 <doc><elem>foo</elem><elem>bar</elem></doc>
209 209
210 :param path: a string containing the XPath expression 210 :param path: a string containing the XPath expression
211 :param namespaces: mapping of namespace prefixes used in the path 211 :param namespaces: mapping of namespace prefixes used in the path
212 :param variables: mapping of variable names to values 212 :param variables: mapping of variable names to values
263 START_CDATA = Stream.START_CDATA 263 START_CDATA = Stream.START_CDATA
264 END_CDATA = Stream.END_CDATA 264 END_CDATA = Stream.END_CDATA
265 PI = Stream.PI 265 PI = Stream.PI
266 COMMENT = Stream.COMMENT 266 COMMENT = Stream.COMMENT
267 267
268
268 def _ensure(stream): 269 def _ensure(stream):
269 """Ensure that every item on the stream is actually a markup event.""" 270 """Ensure that every item on the stream is actually a markup event."""
270 stream = iter(stream) 271 stream = iter(stream)
271 event = stream.next() 272 event = stream.next()
272 273
351 """ 352 """
352 for attr, _ in self: 353 for attr, _ in self:
353 if attr == name: 354 if attr == name:
354 return True 355 return True
355 356
357 def __getitem__(self, i):
358 """Return an item or slice of the attributes list.
359
360 >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
361 >>> attrs[1]
362 ('title', 'Foo')
363 >>> attrs[1:]
364 Attrs([('title', 'Foo')])
365 """
366 items = tuple.__getitem__(self, i)
367 if type(i) is slice:
368 return Attrs(items)
369 return items
370
356 def __getslice__(self, i, j): 371 def __getslice__(self, i, j):
357 """Return a slice of the attributes list. 372 """Return a slice of the attributes list.
358 373
359 >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) 374 >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
360 >>> attrs[1:] 375 >>> attrs[1:]
410 425
411 The returned event is a `TEXT` event, the data is the value of all 426 The returned event is a `TEXT` event, the data is the value of all
412 attributes joined together. 427 attributes joined together.
413 428
414 >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() 429 >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
415 ('TEXT', u'#Foo', (None, -1, -1)) 430 ('TEXT', '#Foo', (None, -1, -1))
416 431
417 :return: a `TEXT` event 432 :return: a `TEXT` event
418 :rtype: `tuple` 433 :rtype: `tuple`
419 """ 434 """
420 return TEXT, u''.join([x[1] for x in self]), (None, -1, -1) 435 return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
421 436
422 437
423 class Markup(unicode): 438 class Markup(unicode):
424 """Marks a string as being safe for inclusion in HTML/XML output without 439 """Marks a string as being safe for inclusion in HTML/XML output without
425 needing to be escaped. 440 needing to be escaped.
426 """ 441 """
427 __slots__ = [] 442 __slots__ = []
428 443
429 def __add__(self, other): 444 def __add__(self, other):
430 return Markup(unicode(self) + unicode(escape(other))) 445 return Markup(unicode.__add__(self, escape(other)))
431 446
432 def __radd__(self, other): 447 def __radd__(self, other):
433 return Markup(unicode(escape(other)) + unicode(self)) 448 return Markup(unicode.__add__(escape(other), self))
434 449
435 def __mod__(self, args): 450 def __mod__(self, args):
436 if isinstance(args, dict): 451 if isinstance(args, dict):
437 args = dict(zip(args.keys(), map(escape, args.values()))) 452 args = dict(zip(args.keys(), map(escape, args.values())))
438 elif isinstance(args, (list, tuple)): 453 elif isinstance(args, (list, tuple)):
440 else: 455 else:
441 args = escape(args) 456 args = escape(args)
442 return Markup(unicode.__mod__(self, args)) 457 return Markup(unicode.__mod__(self, args))
443 458
444 def __mul__(self, num): 459 def __mul__(self, num):
445 return Markup(unicode(self) * num) 460 return Markup(unicode.__mul__(self, num))
446 461 __rmul__ = __mul__
447 def __rmul__(self, num):
448 return Markup(num * unicode(self))
449 462
450 def __repr__(self): 463 def __repr__(self):
451 return '<%s %r>' % (self.__class__.__name__, unicode(self)) 464 return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
452 465
453 def join(self, seq, escape_quotes=True): 466 def join(self, seq, escape_quotes=True):
454 """Return a `Markup` object which is the concatenation of the strings 467 """Return a `Markup` object which is the concatenation of the strings
455 in the given sequence, where this `Markup` object is the separator 468 in the given sequence, where this `Markup` object is the separator
456 between the joined elements. 469 between the joined elements.
463 should be escaped 476 should be escaped
464 :return: the joined `Markup` object 477 :return: the joined `Markup` object
465 :rtype: `Markup` 478 :rtype: `Markup`
466 :see: `escape` 479 :see: `escape`
467 """ 480 """
468 return Markup(unicode(self).join([escape(item, quotes=escape_quotes) 481 return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
469 for item in seq])) 482 for item in seq]))
470 483
471 @classmethod 484 @classmethod
472 def escape(cls, text, quotes=True): 485 def escape(cls, text, quotes=True):
473 """Create a Markup instance from a string and escape special characters 486 """Create a Markup instance from a string and escape special characters
494 if type(text) is cls: 507 if type(text) is cls:
495 return text 508 return text
496 if hasattr(text, '__html__'): 509 if hasattr(text, '__html__'):
497 return Markup(text.__html__()) 510 return Markup(text.__html__())
498 511
499 text = unicode(text).replace('&', '&amp;') \ 512 text = text.replace('&', '&amp;') \
500 .replace('<', '&lt;') \ 513 .replace('<', '&lt;') \
501 .replace('>', '&gt;') 514 .replace('>', '&gt;')
502 if quotes: 515 if quotes:
503 text = text.replace('"', '&#34;') 516 text = text.replace('"', '&#34;')
504 return cls(text) 517 return cls(text)
505 518
506 def unescape(self): 519 def unescape(self):
512 :return: the unescaped string 525 :return: the unescaped string
513 :rtype: `unicode` 526 :rtype: `unicode`
514 :see: `genshi.core.unescape` 527 :see: `genshi.core.unescape`
515 """ 528 """
516 if not self: 529 if not self:
517 return u'' 530 return ''
518 return unicode(self).replace('&#34;', '"') \ 531 return unicode(self).replace('&#34;', '"') \
519 .replace('&gt;', '>') \ 532 .replace('&gt;', '>') \
520 .replace('&lt;', '<') \ 533 .replace('&lt;', '<') \
521 .replace('&amp;', '&') 534 .replace('&amp;', '&')
522 535
547 try: 560 try:
548 from genshi._speedups import Markup 561 from genshi._speedups import Markup
549 except ImportError: 562 except ImportError:
550 pass # just use the Python implementation 563 pass # just use the Python implementation
551 564
565
552 escape = Markup.escape 566 escape = Markup.escape
567
553 568
554 def unescape(text): 569 def unescape(text):
555 """Reverse-escapes &, <, >, and \" and returns a `unicode` object. 570 """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
556 571
557 >>> unescape(Markup('1 &lt; 2')) 572 >>> unescape(Markup('1 &lt; 2'))
581 596
582 A `Namespace` object is instantiated with the namespace URI. 597 A `Namespace` object is instantiated with the namespace URI.
583 598
584 >>> html = Namespace('http://www.w3.org/1999/xhtml') 599 >>> html = Namespace('http://www.w3.org/1999/xhtml')
585 >>> html 600 >>> html
586 <Namespace "http://www.w3.org/1999/xhtml"> 601 Namespace('http://www.w3.org/1999/xhtml')
587 >>> html.uri 602 >>> html.uri
588 u'http://www.w3.org/1999/xhtml' 603 u'http://www.w3.org/1999/xhtml'
589 604
590 The `Namespace` object can than be used to generate `QName` objects with 605 The `Namespace` object can than be used to generate `QName` objects with
591 that namespace: 606 that namespace:
592 607
593 >>> html.body 608 >>> html.body
594 QName(u'http://www.w3.org/1999/xhtml}body') 609 QName('http://www.w3.org/1999/xhtml}body')
595 >>> html.body.localname 610 >>> html.body.localname
596 u'body' 611 u'body'
597 >>> html.body.namespace 612 >>> html.body.namespace
598 u'http://www.w3.org/1999/xhtml' 613 u'http://www.w3.org/1999/xhtml'
599 614
600 The same works using item access notation, which is useful for element or 615 The same works using item access notation, which is useful for element or
601 attribute names that are not valid Python identifiers: 616 attribute names that are not valid Python identifiers:
602 617
603 >>> html['body'] 618 >>> html['body']
604 QName(u'http://www.w3.org/1999/xhtml}body') 619 QName('http://www.w3.org/1999/xhtml}body')
605 620
606 A `Namespace` object can also be used to test whether a specific `QName` 621 A `Namespace` object can also be used to test whether a specific `QName`
607 belongs to that namespace using the ``in`` operator: 622 belongs to that namespace using the ``in`` operator:
608 623
609 >>> qname = html.body 624 >>> qname = html.body
639 if isinstance(other, Namespace): 654 if isinstance(other, Namespace):
640 return self.uri == other.uri 655 return self.uri == other.uri
641 return self.uri == other 656 return self.uri == other
642 657
643 def __getitem__(self, name): 658 def __getitem__(self, name):
644 return QName(self.uri + u'}' + name) 659 return QName(self.uri + '}' + name)
645 __getattr__ = __getitem__ 660 __getattr__ = __getitem__
646 661
647 def __hash__(self): 662 def __hash__(self):
648 return hash(self.uri) 663 return hash(self.uri)
649 664
650 def __repr__(self): 665 def __repr__(self):
651 return '<Namespace "%s">' % self.uri 666 return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
652 667
653 def __str__(self): 668 def __str__(self):
654 return self.uri.encode('utf-8') 669 return self.uri.encode('utf-8')
655 670
656 def __unicode__(self): 671 def __unicode__(self):
669 namespace URI can be obtained through the additional `namespace` attribute, 684 namespace URI can be obtained through the additional `namespace` attribute,
670 while the local name can be accessed through the `localname` attribute. 685 while the local name can be accessed through the `localname` attribute.
671 686
672 >>> qname = QName('foo') 687 >>> qname = QName('foo')
673 >>> qname 688 >>> qname
674 QName(u'foo') 689 QName('foo')
675 >>> qname.localname 690 >>> qname.localname
676 u'foo' 691 u'foo'
677 >>> qname.namespace 692 >>> qname.namespace
678 693
679 >>> qname = QName('http://www.w3.org/1999/xhtml}body') 694 >>> qname = QName('http://www.w3.org/1999/xhtml}body')
680 >>> qname 695 >>> qname
681 QName(u'http://www.w3.org/1999/xhtml}body') 696 QName('http://www.w3.org/1999/xhtml}body')
682 >>> qname.localname 697 >>> qname.localname
683 u'body' 698 u'body'
684 >>> qname.namespace 699 >>> qname.namespace
685 u'http://www.w3.org/1999/xhtml' 700 u'http://www.w3.org/1999/xhtml'
686 """ 701 """
694 brace is optional 709 brace is optional
695 """ 710 """
696 if type(qname) is cls: 711 if type(qname) is cls:
697 return qname 712 return qname
698 713
699 parts = qname.lstrip(u'{').split(u'}', 1) 714 parts = qname.lstrip('{').split('}', 1)
700 if len(parts) > 1: 715 if len(parts) > 1:
701 self = unicode.__new__(cls, u'{%s' % qname) 716 self = unicode.__new__(cls, '{%s' % qname)
702 self.namespace, self.localname = map(unicode, parts) 717 self.namespace, self.localname = map(unicode, parts)
703 else: 718 else:
704 self = unicode.__new__(cls, qname) 719 self = unicode.__new__(cls, qname)
705 self.namespace, self.localname = None, unicode(qname) 720 self.namespace, self.localname = None, unicode(qname)
706 return self 721 return self
707 722
708 def __getnewargs__(self): 723 def __getnewargs__(self):
709 return (self.lstrip('{'),) 724 return (self.lstrip('{'),)
710 725
711 def __repr__(self): 726 def __repr__(self):
712 return 'QName(%s)' % unicode.__repr__(self.lstrip('{')) 727 return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
Copyright (C) 2012-2017 Edgewall Software