annotate genshi/output.py @ 412:bd51adc20a67 trunk

Actually write xmlns declaratons for generated attribute namespace prefixes.
author cmlenz
date Mon, 26 Feb 2007 18:39:57 +0000
parents d14d89995c29
children 073640758a42
rev   line source
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
2 #
408
4675d5cf6c67 Update copyright year for files modified this year.
cmlenz
parents: 402
diff changeset
3 # Copyright (C) 2006-2007 Edgewall Software
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
5 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
8 # are also available at http://genshi.edgewall.org/wiki/License.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
9 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
12 # history and logs, available at http://genshi.edgewall.org/log/.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
13
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
14 """This module provides different kinds of serialization methods for XML event
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
15 streams.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
16 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
17
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
18 from itertools import chain
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
19 try:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
20 frozenset
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
21 except NameError:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
22 from sets import ImmutableSet as frozenset
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
23 import re
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
24
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
25 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
402
c199e9b95884 Fix output of namespace declarations for namespace URLs appearing more than once in a stream. Thanks to Jeff Cutsinger for reporting the problem.
cmlenz
parents: 397
diff changeset
26 from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
c199e9b95884 Fix output of namespace declarations for namespace URLs appearing more than once in a stream. Thanks to Jeff Cutsinger for reporting the problem.
cmlenz
parents: 397
diff changeset
27 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
28
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
30 'TextSerializer']
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
31
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
32
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
33 class DocType(object):
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
34 """Defines a number of commonly used DOCTYPE declarations as constants."""
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
35
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
36 HTML_STRICT = (
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
37 'html', '-//W3C//DTD HTML 4.01//EN',
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
38 'http://www.w3.org/TR/html4/strict.dtd'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
39 )
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
40 HTML_TRANSITIONAL = (
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
41 'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
42 'http://www.w3.org/TR/html4/loose.dtd'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
43 )
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
44 HTML = HTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
45
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
46 XHTML_STRICT = (
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
47 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
48 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
49 )
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
50 XHTML_TRANSITIONAL = (
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
51 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
52 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
53 )
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
54 XHTML = XHTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
55
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
56
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
57 class XMLSerializer(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
58 """Produces XML text from an event stream.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
59
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
60 >>> from genshi.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
61 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
62 >>> print ''.join(XMLSerializer()(elem.generate()))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
63 <div><a href="foo"/><br/><hr noshade="True"/></div>
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
64 """
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
65
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
66 _PRESERVE_SPACE = frozenset()
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
67
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
68 def __init__(self, doctype=None, strip_whitespace=True,
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
69 namespace_prefixes=None):
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
70 """Initialize the XML serializer.
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
71
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
72 @param doctype: a `(name, pubid, sysid)` tuple that represents the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
73 DOCTYPE declaration that should be included at the top of the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
74 generated output
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
75 @param strip_whitespace: whether extraneous whitespace should be
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
76 stripped from the output
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
77 """
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
78 self.preamble = []
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
79 if doctype:
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
80 self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
81 self.filters = [EmptyTagFilter()]
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
82 if strip_whitespace:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
83 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
84 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
85
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
86 def __call__(self, stream):
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
87 have_doctype = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
88 in_cdata = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
89
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
90 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
91 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
92 stream = filter_(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
93 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
94
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
95 if kind is START or kind is EMPTY:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
96 tag, attrib = data
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
97 buf = ['<', tag]
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
98 for attr, value in attrib:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
99 buf += [' ', attr, '="', escape(value), '"']
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
100 buf.append(kind is EMPTY and '/>' or '>')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
101 yield Markup(u''.join(buf))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
102
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
103 elif kind is END:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
104 yield Markup('</%s>' % data)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
105
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
106 elif kind is TEXT:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
107 if in_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
108 yield data
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
109 else:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
110 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
111
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
112 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
113 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
114
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
115 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
116 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
117 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
118 if pubid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
119 buf.append(' PUBLIC "%s"')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
120 elif sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
121 buf.append(' SYSTEM')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
122 if sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
123 buf.append(' "%s"')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
124 buf.append('>\n')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
125 yield Markup(u''.join(buf), *filter(None, data))
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
126 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
127
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
128 elif kind is START_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
129 yield Markup('<![CDATA[')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
130 in_cdata = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
131
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
132 elif kind is END_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
133 yield Markup(']]>')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
134 in_cdata = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
135
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
136 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
137 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
138
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
139
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
140 class XHTMLSerializer(XMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
141 """Produces XHTML text from an event stream.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
142
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
143 >>> from genshi.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
144 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
145 >>> print ''.join(XHTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
146 <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
147 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
148
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
149 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
150 'hr', 'img', 'input', 'isindex', 'link', 'meta',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
151 'param'])
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
152 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
153 'defer', 'disabled', 'ismap', 'multiple',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
154 'nohref', 'noresize', 'noshade', 'nowrap'])
346
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
155 _PRESERVE_SPACE = frozenset([
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
156 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
157 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
158 ])
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
159
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
160 def __init__(self, doctype=None, strip_whitespace=True,
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
161 namespace_prefixes=None):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
162 super(XHTMLSerializer, self).__init__(doctype, False)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
163 self.filters = [EmptyTagFilter()]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
164 if strip_whitespace:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
165 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
166 namespace_prefixes = namespace_prefixes or {}
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
167 namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
168 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
169
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
170 def __call__(self, stream):
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
171 boolean_attrs = self._BOOLEAN_ATTRS
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
172 empty_elems = self._EMPTY_ELEMS
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
173 have_doctype = False
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
174 in_cdata = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
175
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
176 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
177 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
178 stream = filter_(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
179 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
180
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
181 if kind is START or kind is EMPTY:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
182 tag, attrib = data
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
183 buf = ['<', tag]
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
184 for attr, value in attrib:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
185 if attr in boolean_attrs:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
186 value = attr
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
187 buf += [' ', attr, '="', escape(value), '"']
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
188 if kind is EMPTY:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
189 if tag in empty_elems:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
190 buf.append(' />')
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
191 else:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
192 buf.append('></%s>' % tag)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
193 else:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
194 buf.append('>')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
195 yield Markup(u''.join(buf))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
196
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
197 elif kind is END:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
198 yield Markup('</%s>' % data)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
199
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
200 elif kind is TEXT:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
201 if in_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
202 yield data
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
203 else:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
204 yield escape(data, quotes=False)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
205
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
206 elif kind is COMMENT:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
207 yield Markup('<!--%s-->' % data)
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
208
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
209 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
210 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
211 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
212 if pubid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
213 buf.append(' PUBLIC "%s"')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
214 elif sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
215 buf.append(' SYSTEM')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
216 if sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
217 buf.append(' "%s"')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
218 buf.append('>\n')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
219 yield Markup(u''.join(buf), *filter(None, data))
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
220 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
221
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
222 elif kind is START_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
223 yield Markup('<![CDATA[')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
224 in_cdata = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
225
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
226 elif kind is END_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
227 yield Markup(']]>')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
228 in_cdata = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
229
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
230 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
231 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
232
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
233
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
234 class HTMLSerializer(XHTMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
235 """Produces HTML text from an event stream.
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
236
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
237 >>> from genshi.builder import tag
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
238 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
239 >>> print ''.join(HTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
240 <div><a href="foo"></a><br><hr noshade></div>
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
241 """
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
242
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
243 _NOESCAPE_ELEMS = frozenset([
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
244 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
245 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
246 ])
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
247
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
248 def __init__(self, doctype=None, strip_whitespace=True):
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
249 """Initialize the HTML serializer.
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
250
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
251 @param doctype: a `(name, pubid, sysid)` tuple that represents the
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
252 DOCTYPE declaration that should be included at the top of the
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
253 generated output
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
254 @param strip_whitespace: whether extraneous whitespace should be
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
255 stripped from the output
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
256 """
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
257 super(HTMLSerializer, self).__init__(doctype, False)
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
258 self.filters = [EmptyTagFilter()]
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
259 if strip_whitespace:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
260 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
305
60111a041e7c Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
261 self._NOESCAPE_ELEMS))
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
262 self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
263
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
264 def __call__(self, stream):
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
265 boolean_attrs = self._BOOLEAN_ATTRS
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
266 empty_elems = self._EMPTY_ELEMS
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
267 noescape_elems = self._NOESCAPE_ELEMS
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
268 have_doctype = False
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
269 noescape = False
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
270
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
271 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
272 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
273 stream = filter_(stream)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
274 for kind, data, pos in stream:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
275
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
276 if kind is START or kind is EMPTY:
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
277 tag, attrib = data
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
278 buf = ['<', tag]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
279 for attr, value in attrib:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
280 if attr in boolean_attrs:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
281 if value:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
282 buf += [' ', attr]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
283 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
284 buf += [' ', attr, '="', escape(value), '"']
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
285 buf.append('>')
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
286 if kind is EMPTY:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
287 if tag not in empty_elems:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
288 buf.append('</%s>' % tag)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
289 yield Markup(u''.join(buf))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
290 if tag in noescape_elems:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
291 noescape = True
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
292
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
293 elif kind is END:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
294 yield Markup('</%s>' % data)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
295 noescape = False
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
296
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
297 elif kind is TEXT:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
298 if noescape:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
299 yield data
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
300 else:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
301 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
302
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
303 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
304 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
305
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
306 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
307 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
308 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
309 if pubid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
310 buf.append(' PUBLIC "%s"')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
311 elif sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
312 buf.append(' SYSTEM')
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
313 if sysid:
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
314 buf.append(' "%s"')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
315 buf.append('>\n')
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
316 yield Markup(u''.join(buf), *filter(None, data))
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
317 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
318
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
319 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
320 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
321
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
322
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
323 class TextSerializer(object):
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
324 """Produces plain text from an event stream.
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
325
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
326 Only text events are included in the output. Unlike the other serializer,
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
327 special XML characters are not escaped:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
328
230
84168828b074 Renamed Markup to Genshi in repository.
cmlenz
parents: 219
diff changeset
329 >>> from genshi.builder import tag
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
330 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
331 >>> print elem
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
332 <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
333 >>> print ''.join(TextSerializer()(elem.generate()))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
334 <Hello!>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
335
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
336 If text events contain literal markup (instances of the `Markup` class),
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
337 tags or entities are stripped from the output:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
338
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
339 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>'))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
340 >>> print elem
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
341 <div><a href="foo">Hello!</a><br/></div>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
342 >>> print ''.join(TextSerializer()(elem.generate()))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
343 Hello!
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
344 """
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
345
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
346 def __call__(self, stream):
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
347 for event in stream:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
348 if event[0] is TEXT:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
349 data = event[1]
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
350 if type(data) is Markup:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
351 data = data.striptags().stripentities()
201
c5e0a1c86173 The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents: 200
diff changeset
352 yield unicode(data)
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
353
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
354
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
355 class EmptyTagFilter(object):
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
356 """Combines `START` and `STOP` events into `EMPTY` events for elements that
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
357 have no contents.
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
358 """
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
359
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
360 EMPTY = StreamEventKind('EMPTY')
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
361
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
362 def __call__(self, stream):
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
363 prev = (None, None, None)
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
364 for ev in stream:
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
365 if prev[0] is START:
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
366 if ev[0] is END:
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
367 prev = EMPTY, prev[1], prev[2]
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
368 yield prev
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
369 continue
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
370 else:
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
371 yield prev
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
372 if ev[0] is not START:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
373 yield ev
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
374 prev = ev
212
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
375
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
376
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
377 EMPTY = EmptyTagFilter.EMPTY
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
378
0141f45c18e1 Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents: 201
diff changeset
379
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
380 class NamespaceFlattener(object):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
381 r"""Output stream filter that removes namespace information from the stream,
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
382 instead adding namespace attributes and prefixes as needed.
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
383
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
384 @param prefixes: optional mapping of namespace URIs to prefixes
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
385
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
386 >>> from genshi.input import XML
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
387 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
388 ... <two:item/>
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
389 ... </doc>''')
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
390 >>> for kind, data, pos in NamespaceFlattener()(xml):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
391 ... print kind, repr(data)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
392 START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
393 TEXT u'\n '
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
394 START (u'two:item', Attrs())
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
395 END u'two:item'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
396 TEXT u'\n'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
397 END u'doc'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
398 """
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
399
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
400 def __init__(self, prefixes=None):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
401 self.prefixes = {XML_NAMESPACE.uri: 'xml'}
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
402 if prefixes is not None:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
403 self.prefixes.update(prefixes)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
404
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
405 def __call__(self, stream):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
406 prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
407 namespaces = {XML_NAMESPACE.uri: ['xml']}
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
408 def _push_ns(prefix, uri):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
409 namespaces.setdefault(uri, []).append(prefix)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
410 prefixes.setdefault(prefix, []).append(uri)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
411
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
412 ns_attrs = []
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
413 _push_ns_attr = ns_attrs.append
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
414
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
415 def _gen_prefix():
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
416 val = 0
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
417 while 1:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
418 val += 1
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
419 yield 'ns%d' % val
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
420 _gen_prefix = _gen_prefix().next
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
421
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
422 for kind, data, pos in stream:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
423
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
424 if kind is START or kind is EMPTY:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
425 tag, attrs = data
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
426
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
427 tagname = tag.localname
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
428 tagns = tag.namespace
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
429 if tagns:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
430 if tagns in namespaces:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
431 prefix = namespaces[tagns][-1]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
432 if prefix:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
433 tagname = u'%s:%s' % (prefix, tagname)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
434 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
435 _push_ns_attr((u'xmlns', tagns))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
436 _push_ns('', tagns)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
437
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
438 new_attrs = []
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
439 for attr, value in attrs:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
440 attrname = attr.localname
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
441 attrns = attr.namespace
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
442 if attrns:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
443 if attrns not in namespaces:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
444 prefix = _gen_prefix()
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
445 _push_ns(prefix, attrns)
412
bd51adc20a67 Actually write xmlns declaratons for generated attribute namespace prefixes.
cmlenz
parents: 410
diff changeset
446 _push_ns_attr(('xmlns:%s' % prefix, attrns))
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
447 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
448 prefix = namespaces[attrns][-1]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
449 if prefix:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
450 attrname = u'%s:%s' % (prefix, attrname)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
451 new_attrs.append((attrname, value))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
452
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
453 yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
454 del ns_attrs[:]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
455
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
456 elif kind is END:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
457 tagname = data.localname
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
458 tagns = data.namespace
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
459 if tagns:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
460 prefix = namespaces[tagns][-1]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
461 if prefix:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
462 tagname = u'%s:%s' % (prefix, tagname)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
463 yield kind, tagname, pos
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
464
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
465 elif kind is START_NS:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
466 prefix, uri = data
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
467 if uri not in namespaces:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
468 prefix = prefixes.get(uri, [prefix])[-1]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
469 if not prefix:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
470 _push_ns_attr((u'xmlns', uri))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
471 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
472 _push_ns_attr((u'xmlns:%s' % prefix, uri))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
473 _push_ns(prefix, uri)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
474
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
475 elif kind is END_NS:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
476 if data in prefixes:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
477 uris = prefixes.get(data)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
478 uri = uris.pop()
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
479 if not uris:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
480 del prefixes[data]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
481 if uri not in uris or uri != uris[-1]:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
482 uri_prefixes = namespaces[uri]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
483 uri_prefixes.pop()
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
484 if not uri_prefixes:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
485 del namespaces[uri]
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
486
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
487 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
488 yield kind, data, pos
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
489
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
490
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
491 class NamespaceStripper(object):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
492 r"""Stream filter that removes all namespace information from a stream, and
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
493 optionally strips out all tags not in a given namespace.
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
494
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
495 @param namespace: the URI of the namespace that should not be stripped. If
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
496 not set, only elements with no namespace are included in the output.
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
497
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
498 >>> from genshi.input import XML
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
499 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
500 ... <two:item/>
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
501 ... </doc>''')
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
502 >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
503 ... print kind, repr(data)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
504 START (u'doc', Attrs())
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
505 TEXT u'\n '
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
506 TEXT u'\n'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
507 END u'doc'
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
508 """
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
509
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
510 def __init__(self, namespace=None):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
511 if namespace is not None:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
512 self.namespace = Namespace(namespace)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
513 else:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
514 self.namespace = {}
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
515
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
516 def __call__(self, stream):
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
517 namespace = self.namespace
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
518
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
519 for kind, data, pos in stream:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
520
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
521 if kind is START or kind is EMPTY:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
522 tag, attrs = data
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
523 if tag.namespace and tag not in namespace:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
524 continue
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
525
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
526 new_attrs = []
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
527 for attr, value in attrs:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
528 if not attr.namespace or attr in namespace:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
529 new_attrs.append((attr, value))
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
530
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
531 data = tag.localname, Attrs(new_attrs)
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
532
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
533 elif kind is END:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
534 if data.namespace and data not in namespace:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
535 continue
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
536 data = data.localname
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
537
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
538 elif kind is START_NS or kind is END_NS:
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
539 continue
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
540
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
541 yield kind, data, pos
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
542
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
543
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
544 class WhitespaceFilter(object):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
545 """A filter that removes extraneous ignorable white space from the
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
546 stream.
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
547 """
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
548
305
60111a041e7c Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
549 def __init__(self, preserve=None, noescape=None):
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
550 """Initialize the filter.
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
551
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
552 @param preserve: a set or sequence of tag names for which white-space
397
31742fe6d47e * Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents: 346
diff changeset
553 should be preserved
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
554 @param noescape: a set or sequence of tag names for which text content
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
555 should not be escaped
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
556
346
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
557 The `noescape` set is expected to refer to elements that cannot contain
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
558 further child elements (such as <style> or <script> in HTML documents).
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
559 """
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
560 if preserve is None:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
561 preserve = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
562 self.preserve = frozenset(preserve)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
563 if noescape is None:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
564 noescape = []
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
565 self.noescape = frozenset(noescape)
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
566
219
ebceef564b79 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
567 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
ebceef564b79 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
568 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
ebceef564b79 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
569 collapse_lines=re.compile('\n{2,}').sub):
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
570 mjoin = Markup('').join
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
571 preserve_elems = self.preserve
346
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
572 preserve = 0
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
573 noescape_elems = self.noescape
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
574 noescape = False
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
575
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
576 textbuf = []
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
577 push_text = textbuf.append
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
578 pop_text = textbuf.pop
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
579 for kind, data, pos in chain(stream, [(None, None, None)]):
410
d14d89995c29 Improve the handling of namespaces in serialization.
cmlenz
parents: 408
diff changeset
580
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
581 if kind is TEXT:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
582 if noescape:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
583 data = Markup(data)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
584 push_text(data)
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
585 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
586 if textbuf:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
587 if len(textbuf) > 1:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
588 text = mjoin(textbuf, escape_quotes=False)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
589 del textbuf[:]
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
590 else:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
591 text = escape(pop_text(), quotes=False)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
592 if not preserve:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
593 text = collapse_lines('\n', trim_trailing_space('', text))
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
594 yield TEXT, Markup(text), pos
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
595
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
596 if kind is START:
346
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
597 tag, attrs = data
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
598 if preserve or (tag in preserve_elems or
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
599 attrs.get(space) == 'preserve'):
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
600 preserve += 1
219
ebceef564b79 Minor improvements to `WhitespaceFilter`.
cmlenz
parents: 213
diff changeset
601 if not noescape and tag in noescape_elems:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
602 noescape = True
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
603
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
604 elif kind is END:
346
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
605 noescape = False
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
606 if preserve:
96882a191686 Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents: 345
diff changeset
607 preserve -= 1
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
608
305
60111a041e7c Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
609 elif kind is START_CDATA:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
610 noescape = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
611
305
60111a041e7c Various performance-oriented tweaks.
cmlenz
parents: 280
diff changeset
612 elif kind is END_CDATA:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
613 noescape = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
614
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
615 if kind:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
616 yield kind, data, pos
Copyright (C) 2012-2017 Edgewall Software