annotate genshi/output.py @ 437:3d82c5bdbf46

Fix for #107.
author cmlenz
date Mon, 02 Apr 2007 15:52:21 +0000
parents 5b248708bbed
children 0407937b2853
rev   line source
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
2 #
408
49a3bae5a8bb Update copyright year for files modified this year.
cmlenz
parents: 402
diff changeset
3 # Copyright (C) 2006-2007 Edgewall Software
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
5 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
8 # are also available at http://genshi.edgewall.org/wiki/License.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
9 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
12 # history and logs, available at http://genshi.edgewall.org/log/.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
13
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
14 """This module provides different kinds of serialization methods for XML event
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
15 streams.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
16 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
17
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
18 from itertools import chain
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
19 try:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
20 frozenset
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
21 except NameError:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
22 from sets import ImmutableSet as frozenset
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
23 import re
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
24
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
25 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
402
cc7f5b3fbbed Fix output of namespace declarations for namespace URLs appearing more than once in a stream. Thanks to Jeff Cutsinger for reporting the problem.
cmlenz
parents: 397
diff changeset
26 from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
cc7f5b3fbbed Fix output of namespace declarations for namespace URLs appearing more than once in a stream. Thanks to Jeff Cutsinger for reporting the problem.
cmlenz
parents: 397
diff changeset
27 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
28
200
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
30 'TextSerializer']
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
31 __docformat__ = 'restructuredtext en'
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
32
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
33
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
34 class DocType(object):
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
35 """Defines a number of commonly used DOCTYPE declarations as constants."""
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
36
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
37 HTML_STRICT = (
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
38 'html', '-//W3C//DTD HTML 4.01//EN',
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
39 'http://www.w3.org/TR/html4/strict.dtd'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
40 )
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
41 HTML_TRANSITIONAL = (
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
42 'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
43 'http://www.w3.org/TR/html4/loose.dtd'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
44 )
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
45 HTML = HTML_STRICT
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
46
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
47 XHTML_STRICT = (
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
48 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
49 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
50 )
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
51 XHTML_TRANSITIONAL = (
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
52 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
53 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
54 )
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
55 XHTML = XHTML_STRICT
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
56
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
57
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
58 class XMLSerializer(object):
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
59 """Produces XML text from an event stream.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
60
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
61 >>> from genshi.builder import tag
20
e3d3c1d8c98a Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
62 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
63 >>> print ''.join(XMLSerializer()(elem.generate()))
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
64 <div><a href="foo"/><br/><hr noshade="True"/></div>
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
65 """
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
66
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
67 _PRESERVE_SPACE = frozenset()
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
68
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
69 def __init__(self, doctype=None, strip_whitespace=True,
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
70 namespace_prefixes=None):
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
71 """Initialize the XML serializer.
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
72
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
73 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
74 DOCTYPE declaration that should be included at the top
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
75 of the generated output
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
76 :param strip_whitespace: whether extraneous whitespace should be
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
77 stripped from the output
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
78 """
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
79 self.preamble = []
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
80 if doctype:
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
81 self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
82 self.filters = [EmptyTagFilter()]
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
83 if strip_whitespace:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
84 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
85 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
86
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
87 def __call__(self, stream):
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
88 have_doctype = False
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
89 in_cdata = False
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
90
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
91 stream = chain(self.preamble, stream)
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
92 for filter_ in self.filters:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
93 stream = filter_(stream)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
94 for kind, data, pos in stream:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
95
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
96 if kind is START or kind is EMPTY:
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
97 tag, attrib = data
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
98 buf = ['<', tag]
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
99 for attr, value in attrib:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
100 buf += [' ', attr, '="', escape(value), '"']
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
101 buf.append(kind is EMPTY and '/>' or '>')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
102 yield Markup(u''.join(buf))
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
103
69
e9a3930f8823 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
104 elif kind is END:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
105 yield Markup('</%s>' % data)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
106
69
e9a3930f8823 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
107 elif kind is TEXT:
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
108 if in_cdata:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
109 yield data
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
110 else:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
111 yield escape(data, quotes=False)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
112
89
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
113 elif kind is COMMENT:
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
114 yield Markup('<!--%s-->' % data)
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
115
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
116 elif kind is DOCTYPE and not have_doctype:
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
117 name, pubid, sysid = data
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
118 buf = ['<!DOCTYPE %s']
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
119 if pubid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
120 buf.append(' PUBLIC "%s"')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
121 elif sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
122 buf.append(' SYSTEM')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
123 if sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
124 buf.append(' "%s"')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
125 buf.append('>\n')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
126 yield Markup(u''.join(buf), *filter(None, data))
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
127 have_doctype = True
109
2de3f9d84a1c Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
128
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
129 elif kind is START_CDATA:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
130 yield Markup('<![CDATA[')
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
131 in_cdata = True
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
132
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
133 elif kind is END_CDATA:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
134 yield Markup(']]>')
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
135 in_cdata = False
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
136
105
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
137 elif kind is PI:
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
138 yield Markup('<?%s %s?>' % data)
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
139
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
140
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
141 class XHTMLSerializer(XMLSerializer):
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
142 """Produces XHTML text from an event stream.
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
143
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
144 >>> from genshi.builder import tag
20
e3d3c1d8c98a Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
145 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
146 >>> print ''.join(XHTMLSerializer()(elem.generate()))
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
147 <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
148 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
149
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
150 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
151 'hr', 'img', 'input', 'isindex', 'link', 'meta',
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
152 'param'])
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
153 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
154 'defer', 'disabled', 'ismap', 'multiple',
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
155 'nohref', 'noresize', 'noshade', 'nowrap'])
346
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
156 _PRESERVE_SPACE = frozenset([
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
157 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
158 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
159 ])
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
160
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
161 def __init__(self, doctype=None, strip_whitespace=True,
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
162 namespace_prefixes=None):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
163 super(XHTMLSerializer, self).__init__(doctype, False)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
164 self.filters = [EmptyTagFilter()]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
165 if strip_whitespace:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
166 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
167 namespace_prefixes = namespace_prefixes or {}
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
168 namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
169 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
170
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
171 def __call__(self, stream):
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
172 boolean_attrs = self._BOOLEAN_ATTRS
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
173 empty_elems = self._EMPTY_ELEMS
85
db8f2958c670 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
174 have_doctype = False
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
175 in_cdata = False
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
176
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
177 stream = chain(self.preamble, stream)
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
178 for filter_ in self.filters:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
179 stream = filter_(stream)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
180 for kind, data, pos in stream:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
181
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
182 if kind is START or kind is EMPTY:
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
183 tag, attrib = data
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
184 buf = ['<', tag]
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
185 for attr, value in attrib:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
186 if attr in boolean_attrs:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
187 value = attr
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
188 buf += [' ', attr, '="', escape(value), '"']
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
189 if kind is EMPTY:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
190 if tag in empty_elems:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
191 buf.append(' />')
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
192 else:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
193 buf.append('></%s>' % tag)
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
194 else:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
195 buf.append('>')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
196 yield Markup(u''.join(buf))
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
197
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
198 elif kind is END:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
199 yield Markup('</%s>' % data)
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
200
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
201 elif kind is TEXT:
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
202 if in_cdata:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
203 yield data
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
204 else:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
205 yield escape(data, quotes=False)
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
206
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
207 elif kind is COMMENT:
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
208 yield Markup('<!--%s-->' % data)
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
209
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
210 elif kind is DOCTYPE and not have_doctype:
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
211 name, pubid, sysid = data
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
212 buf = ['<!DOCTYPE %s']
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
213 if pubid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
214 buf.append(' PUBLIC "%s"')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
215 elif sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
216 buf.append(' SYSTEM')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
217 if sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
218 buf.append(' "%s"')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
219 buf.append('>\n')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
220 yield Markup(u''.join(buf), *filter(None, data))
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
221 have_doctype = True
109
2de3f9d84a1c Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
222
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
223 elif kind is START_CDATA:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
224 yield Markup('<![CDATA[')
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
225 in_cdata = True
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
226
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
227 elif kind is END_CDATA:
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
228 yield Markup(']]>')
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
229 in_cdata = False
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
230
105
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
231 elif kind is PI:
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
232 yield Markup('<?%s %s?>' % data)
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
233
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
234
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
235 class HTMLSerializer(XHTMLSerializer):
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
236 """Produces HTML text from an event stream.
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
237
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
238 >>> from genshi.builder import tag
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
239 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
240 >>> print ''.join(HTMLSerializer()(elem.generate()))
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
241 <div><a href="foo"></a><br><hr noshade></div>
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
242 """
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
243
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
244 _NOESCAPE_ELEMS = frozenset([
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
245 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
246 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
247 ])
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
248
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
249 def __init__(self, doctype=None, strip_whitespace=True):
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
250 """Initialize the HTML serializer.
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
251
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
252 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
253 DOCTYPE declaration that should be included at the top
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
254 of the generated output
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
255 :param strip_whitespace: whether extraneous whitespace should be
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
256 stripped from the output
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
257 """
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
258 super(HTMLSerializer, self).__init__(doctype, False)
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
259 self.filters = [EmptyTagFilter()]
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
260 if strip_whitespace:
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
261 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
305
6e6950ac0e56 Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
262 self._NOESCAPE_ELEMS))
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
263 self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
264
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
265 def __call__(self, stream):
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
266 boolean_attrs = self._BOOLEAN_ATTRS
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
267 empty_elems = self._EMPTY_ELEMS
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
268 noescape_elems = self._NOESCAPE_ELEMS
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
269 have_doctype = False
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
270 noescape = False
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
271
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
272 stream = chain(self.preamble, stream)
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
273 for filter_ in self.filters:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
274 stream = filter_(stream)
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
275 for kind, data, pos in stream:
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
276
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
277 if kind is START or kind is EMPTY:
96
35d681a94763 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
278 tag, attrib = data
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
279 buf = ['<', tag]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
280 for attr, value in attrib:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
281 if attr in boolean_attrs:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
282 if value:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
283 buf += [' ', attr]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
284 else:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
285 buf += [' ', attr, '="', escape(value), '"']
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
286 buf.append('>')
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
287 if kind is EMPTY:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
288 if tag not in empty_elems:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
289 buf.append('</%s>' % tag)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
290 yield Markup(u''.join(buf))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
291 if tag in noescape_elems:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
292 noescape = True
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
293
69
e9a3930f8823 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
294 elif kind is END:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
295 yield Markup('</%s>' % data)
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
296 noescape = False
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
297
69
e9a3930f8823 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
298 elif kind is TEXT:
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
299 if noescape:
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
300 yield data
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
301 else:
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
302 yield escape(data, quotes=False)
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
303
89
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
304 elif kind is COMMENT:
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
305 yield Markup('<!--%s-->' % data)
d4c7617900e3 Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
306
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
307 elif kind is DOCTYPE and not have_doctype:
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
308 name, pubid, sysid = data
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
309 buf = ['<!DOCTYPE %s']
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
310 if pubid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
311 buf.append(' PUBLIC "%s"')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
312 elif sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
313 buf.append(' SYSTEM')
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
314 if sysid:
397
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
315 buf.append(' "%s"')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
316 buf.append('>\n')
d6e9170c5ccc * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
317 yield Markup(u''.join(buf), *filter(None, data))
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
318 have_doctype = True
109
2de3f9d84a1c Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
319
105
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
320 elif kind is PI:
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
321 yield Markup('<?%s %s?>' % data)
334a338847af Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
322
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
323
200
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
324 class TextSerializer(object):
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
325 """Produces plain text from an event stream.
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
326
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
327 Only text events are included in the output. Unlike the other serializer,
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
328 special XML characters are not escaped:
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
329
230
24757b771651 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
330 >>> from genshi.builder import tag
200
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
331 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
332 >>> print elem
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
333 <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
334 >>> print ''.join(TextSerializer()(elem.generate()))
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
335 <Hello!>
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
336
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
337 If text events contain literal markup (instances of the `Markup` class),
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
338 tags or entities are stripped from the output:
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
339
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
340 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>'))
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
341 >>> print elem
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
342 <div><a href="foo">Hello!</a><br/></div>
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
343 >>> print ''.join(TextSerializer()(elem.generate()))
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
344 Hello!
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
345 """
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
346
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
347 def __call__(self, stream):
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
348 for event in stream:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
349 if event[0] is TEXT:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
350 data = event[1]
200
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
351 if type(data) is Markup:
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
352 data = data.striptags().stripentities()
201
0f16c907077e The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents: 200
diff changeset
353 yield unicode(data)
200
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
354
50eab0469148 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
355
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
356 class EmptyTagFilter(object):
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
357 """Combines `START` and `STOP` events into `EMPTY` events for elements that
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
358 have no contents.
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
359 """
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
360
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
361 EMPTY = StreamEventKind('EMPTY')
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
362
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
363 def __call__(self, stream):
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
364 prev = (None, None, None)
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
365 for ev in stream:
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
366 if prev[0] is START:
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
367 if ev[0] is END:
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
368 prev = EMPTY, prev[1], prev[2]
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
369 yield prev
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
370 continue
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
371 else:
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
372 yield prev
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
373 if ev[0] is not START:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
374 yield ev
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
375 prev = ev
212
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
376
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
377
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
378 EMPTY = EmptyTagFilter.EMPTY
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
379
e8c43127d9a9 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
380
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
381 class NamespaceFlattener(object):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
382 r"""Output stream filter that removes namespace information from the stream,
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
383 instead adding namespace attributes and prefixes as needed.
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
384
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
385 :param prefixes: optional mapping of namespace URIs to prefixes
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
386
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
387 >>> from genshi.input import XML
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
388 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
389 ... <two:item/>
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
390 ... </doc>''')
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
391 >>> for kind, data, pos in NamespaceFlattener()(xml):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
392 ... print kind, repr(data)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
393 START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
394 TEXT u'\n '
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
395 START (u'two:item', Attrs())
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
396 END u'two:item'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
397 TEXT u'\n'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
398 END u'doc'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
399 """
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
400
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
401 def __init__(self, prefixes=None):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
402 self.prefixes = {XML_NAMESPACE.uri: 'xml'}
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
403 if prefixes is not None:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
404 self.prefixes.update(prefixes)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
405
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
406 def __call__(self, stream):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
407 prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
408 namespaces = {XML_NAMESPACE.uri: ['xml']}
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
409 def _push_ns(prefix, uri):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
410 namespaces.setdefault(uri, []).append(prefix)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
411 prefixes.setdefault(prefix, []).append(uri)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
412
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
413 ns_attrs = []
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
414 _push_ns_attr = ns_attrs.append
437
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
415 def _make_ns_attr(prefix, uri):
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
416 return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
417
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
418 def _gen_prefix():
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
419 val = 0
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
420 while 1:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
421 val += 1
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
422 yield 'ns%d' % val
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
423 _gen_prefix = _gen_prefix().next
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
424
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
425 for kind, data, pos in stream:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
426
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
427 if kind is START or kind is EMPTY:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
428 tag, attrs = data
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
429
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
430 tagname = tag.localname
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
431 tagns = tag.namespace
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
432 if tagns:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
433 if tagns in namespaces:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
434 prefix = namespaces[tagns][-1]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
435 if prefix:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
436 tagname = u'%s:%s' % (prefix, tagname)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
437 else:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
438 _push_ns_attr((u'xmlns', tagns))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
439 _push_ns('', tagns)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
440
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
441 new_attrs = []
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
442 for attr, value in attrs:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
443 attrname = attr.localname
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
444 attrns = attr.namespace
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
445 if attrns:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
446 if attrns not in namespaces:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
447 prefix = _gen_prefix()
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
448 _push_ns(prefix, attrns)
412
29cddd600245 Actually write xmlns declaratons for generated attribute namespace prefixes.
cmlenz
parents: 410
diff changeset
449 _push_ns_attr(('xmlns:%s' % prefix, attrns))
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
450 else:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
451 prefix = namespaces[attrns][-1]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
452 if prefix:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
453 attrname = u'%s:%s' % (prefix, attrname)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
454 new_attrs.append((attrname, value))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
455
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
456 yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
457 del ns_attrs[:]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
458
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
459 elif kind is END:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
460 tagname = data.localname
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
461 tagns = data.namespace
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
462 if tagns:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
463 prefix = namespaces[tagns][-1]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
464 if prefix:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
465 tagname = u'%s:%s' % (prefix, tagname)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
466 yield kind, tagname, pos
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
467
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
468 elif kind is START_NS:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
469 prefix, uri = data
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
470 if uri not in namespaces:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
471 prefix = prefixes.get(uri, [prefix])[-1]
437
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
472 _push_ns_attr(_make_ns_attr(prefix, uri))
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
473 _push_ns(prefix, uri)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
474
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
475 elif kind is END_NS:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
476 if data in prefixes:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
477 uris = prefixes.get(data)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
478 uri = uris.pop()
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
479 if not uris:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
480 del prefixes[data]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
481 if uri not in uris or uri != uris[-1]:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
482 uri_prefixes = namespaces[uri]
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
483 uri_prefixes.pop()
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
484 if not uri_prefixes:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
485 del namespaces[uri]
437
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
486 if ns_attrs:
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
487 attr = _make_ns_attr(data, uri)
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
488 if attr in ns_attrs:
3d82c5bdbf46 Fix for #107.
cmlenz
parents: 425
diff changeset
489 ns_attrs.remove(attr)
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
490
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
491 else:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
492 yield kind, data, pos
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
493
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
494
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
495 class NamespaceStripper(object):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
496 r"""Stream filter that removes all namespace information from a stream, and
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
497 optionally strips out all tags not in a given namespace.
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
498
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
499 :param namespace: the URI of the namespace that should not be stripped. If
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
500 not set, only elements with no namespace are included in
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
501 the output.
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
502
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
503 >>> from genshi.input import XML
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
504 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
505 ... <two:item/>
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
506 ... </doc>''')
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
507 >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
508 ... print kind, repr(data)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
509 START (u'doc', Attrs())
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
510 TEXT u'\n '
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
511 TEXT u'\n'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
512 END u'doc'
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
513 """
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
514
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
515 def __init__(self, namespace=None):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
516 if namespace is not None:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
517 self.namespace = Namespace(namespace)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
518 else:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
519 self.namespace = {}
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
520
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
521 def __call__(self, stream):
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
522 namespace = self.namespace
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
523
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
524 for kind, data, pos in stream:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
525
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
526 if kind is START or kind is EMPTY:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
527 tag, attrs = data
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
528 if tag.namespace and tag not in namespace:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
529 continue
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
530
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
531 new_attrs = []
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
532 for attr, value in attrs:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
533 if not attr.namespace or attr in namespace:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
534 new_attrs.append((attr, value))
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
535
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
536 data = tag.localname, Attrs(new_attrs)
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
537
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
538 elif kind is END:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
539 if data.namespace and data not in namespace:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
540 continue
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
541 data = data.localname
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
542
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
543 elif kind is START_NS or kind is END_NS:
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
544 continue
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
545
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
546 yield kind, data, pos
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
547
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
548
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
549 class WhitespaceFilter(object):
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
550 """A filter that removes extraneous ignorable white space from the
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
551 stream.
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
552 """
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
553
305
6e6950ac0e56 Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
554 def __init__(self, preserve=None, noescape=None):
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
555 """Initialize the filter.
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
556
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
557 :param preserve: a set or sequence of tag names for which white-space
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
558 should be preserved
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
559 :param noescape: a set or sequence of tag names for which text content
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
560 should not be escaped
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
561
346
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
562 The `noescape` set is expected to refer to elements that cannot contain
425
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
563 further child elements (such as ``<style>`` or ``<script>`` in HTML
5b248708bbed Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents: 412
diff changeset
564 documents).
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
565 """
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
566 if preserve is None:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
567 preserve = []
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
568 self.preserve = frozenset(preserve)
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
569 if noescape is None:
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
570 noescape = []
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
571 self.noescape = frozenset(noescape)
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
572
219
0f897d319002 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
573 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
0f897d319002 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
574 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
0f897d319002 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
575 collapse_lines=re.compile('\n{2,}').sub):
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
576 mjoin = Markup('').join
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
577 preserve_elems = self.preserve
346
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
578 preserve = 0
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
579 noescape_elems = self.noescape
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
580 noescape = False
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
581
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
582 textbuf = []
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
583 push_text = textbuf.append
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
584 pop_text = textbuf.pop
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
585 for kind, data, pos in chain(stream, [(None, None, None)]):
410
3460b04daeac Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
586
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
587 if kind is TEXT:
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
588 if noescape:
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
589 data = Markup(data)
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
590 push_text(data)
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
591 else:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
592 if textbuf:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
593 if len(textbuf) > 1:
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
594 text = mjoin(textbuf, escape_quotes=False)
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
595 del textbuf[:]
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
596 else:
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
597 text = escape(pop_text(), quotes=False)
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
598 if not preserve:
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
599 text = collapse_lines('\n', trim_trailing_space('', text))
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
600 yield TEXT, Markup(text), pos
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
601
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
602 if kind is START:
346
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
603 tag, attrs = data
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
604 if preserve or (tag in preserve_elems or
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
605 attrs.get(space) == 'preserve'):
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
606 preserve += 1
219
0f897d319002 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
607 if not noescape and tag in noescape_elems:
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
608 noescape = True
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
609
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
610 elif kind is END:
346
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
611 noescape = False
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
612 if preserve:
2304e080ec07 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
613 preserve -= 1
141
b3ceaa35fb6b * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
614
305
6e6950ac0e56 Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
615 elif kind is START_CDATA:
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
616 noescape = True
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
617
305
6e6950ac0e56 Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
618 elif kind is END_CDATA:
143
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
619 noescape = False
ef761afcedff CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
620
136
636e0100fcaf Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
621 if kind:
123
93bbdcf9428b Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
622 yield kind, data, pos
Copyright (C) 2012-2017 Edgewall Software