Mercurial > genshi > genshi-test
annotate markup/output.py @ 221:c448cf114c30
Fix Python 2.3 incompatibility introduced in [276].
author | cmlenz |
---|---|
date | Tue, 05 Sep 2006 16:35:54 +0000 |
parents | 0f897d319002 |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
18 from itertools import chain |
1 | 19 try: |
20 frozenset | |
21 except NameError: | |
22 from sets import ImmutableSet as frozenset | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
23 import re |
1 | 24 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
25 from markup.core import escape, Markup, Namespace, QName, StreamEventKind |
145
56d534eb53f9
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
143
diff
changeset
|
26 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ |
56d534eb53f9
* Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents:
143
diff
changeset
|
27 END_CDATA, PI, COMMENT, XML_NAMESPACE |
1 | 28 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer', |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
30 'TextSerializer'] |
1 | 31 |
32 | |
85 | 33 class DocType(object): |
34 """Defines a number of commonly used DOCTYPE declarations as constants.""" | |
35 | |
36 HTML_STRICT = ('html', '-//W3C//DTD HTML 4.01//EN', | |
37 'http://www.w3.org/TR/html4/strict.dtd') | |
38 HTML_TRANSITIONAL = ('html', '-//W3C//DTD HTML 4.01 Transitional//EN', | |
39 'http://www.w3.org/TR/html4/loose.dtd') | |
40 HTML = HTML_STRICT | |
41 | |
42 XHTML_STRICT = ('html', '-//W3C//DTD XHTML 1.0 Strict//EN', | |
43 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd') | |
44 XHTML_TRANSITIONAL = ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', | |
45 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') | |
46 XHTML = XHTML_STRICT | |
47 | |
48 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
49 class XMLSerializer(object): |
1 | 50 """Produces XML text from an event stream. |
51 | |
52 >>> from markup.builder import tag | |
20 | 53 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
54 >>> print ''.join(XMLSerializer()(elem.generate())) |
1 | 55 <div><a href="foo"/><br/><hr noshade="True"/></div> |
56 """ | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
57 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
58 _PRESERVE_SPACE = frozenset() |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
59 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
60 def __init__(self, doctype=None, strip_whitespace=True): |
85 | 61 """Initialize the XML serializer. |
62 | |
63 @param doctype: a `(name, pubid, sysid)` tuple that represents the | |
64 DOCTYPE declaration that should be included at the top of the | |
65 generated output | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
66 @param strip_whitespace: whether extraneous whitespace should be |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
67 stripped from the output |
85 | 68 """ |
69 self.preamble = [] | |
70 if doctype: | |
71 self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
72 self.filters = [EmptyTagFilter()] |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
73 if strip_whitespace: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
74 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
1 | 75 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
76 def __call__(self, stream): |
1 | 77 ns_attrib = [] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
78 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
79 have_doctype = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
80 in_cdata = False |
1 | 81 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
82 stream = chain(self.preamble, stream) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
83 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
84 stream = filter_(stream) |
1 | 85 for kind, data, pos in stream: |
86 | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
87 if kind is START or kind is EMPTY: |
1 | 88 tag, attrib = data |
89 | |
90 tagname = tag.localname | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
91 namespace = tag.namespace |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
92 if namespace: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
93 if namespace in ns_mapping: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
94 prefix = ns_mapping[namespace] |
1 | 95 if prefix: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
96 tagname = '%s:%s' % (prefix, tagname) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
97 else: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
98 ns_attrib.append((QName('xmlns'), namespace)) |
136 | 99 buf = ['<', tagname] |
1 | 100 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
101 for attr, value in attrib + ns_attrib: |
1 | 102 attrname = attr.localname |
103 if attr.namespace: | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
104 prefix = ns_mapping.get(attr.namespace) |
1 | 105 if prefix: |
69 | 106 attrname = '%s:%s' % (prefix, attrname) |
136 | 107 buf += [' ', attrname, '="', escape(value), '"'] |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
108 ns_attrib = [] |
1 | 109 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
110 if kind is EMPTY: |
136 | 111 buf += ['/>'] |
1 | 112 else: |
136 | 113 buf += ['>'] |
1 | 114 |
115 yield Markup(''.join(buf)) | |
116 | |
69 | 117 elif kind is END: |
1 | 118 tag = data |
119 tagname = tag.localname | |
120 if tag.namespace: | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
121 prefix = ns_mapping.get(tag.namespace) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
122 if prefix: |
69 | 123 tagname = '%s:%s' % (prefix, tag.localname) |
1 | 124 yield Markup('</%s>' % tagname) |
125 | |
69 | 126 elif kind is TEXT: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
127 if in_cdata: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
128 yield data |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
129 else: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
130 yield escape(data, quotes=False) |
1 | 131 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
132 elif kind is COMMENT: |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
133 yield Markup('<!--%s-->' % data) |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
134 |
136 | 135 elif kind is DOCTYPE and not have_doctype: |
136 name, pubid, sysid = data | |
137 buf = ['<!DOCTYPE %s'] | |
138 if pubid: | |
139 buf += [' PUBLIC "%s"'] | |
140 elif sysid: | |
141 buf += [' SYSTEM'] | |
142 if sysid: | |
143 buf += [' "%s"'] | |
144 buf += ['>\n'] | |
145 yield Markup(''.join(buf), *filter(None, data)) | |
146 have_doctype = True | |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
147 |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
148 elif kind is START_NS: |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
149 prefix, uri = data |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
150 if uri not in ns_mapping: |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
151 ns_mapping[uri] = prefix |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
152 if not prefix: |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
153 ns_attrib.append((QName('xmlns'), uri)) |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
154 else: |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
155 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
156 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
157 elif kind is START_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
158 yield Markup('<![CDATA[') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
159 in_cdata = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
160 |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
161 elif kind is END_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
162 yield Markup(']]>') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
163 in_cdata = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
164 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
165 elif kind is PI: |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
166 yield Markup('<?%s %s?>' % data) |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
167 |
1 | 168 |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
169 class XHTMLSerializer(XMLSerializer): |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
170 """Produces XHTML text from an event stream. |
1 | 171 |
172 >>> from markup.builder import tag | |
20 | 173 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
174 >>> print ''.join(XHTMLSerializer()(elem.generate())) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
175 <div><a href="foo"></a><br /><hr noshade="noshade" /></div> |
1 | 176 """ |
177 | |
18
4cbebb15a834
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
178 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') |
1 | 179 |
180 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
181 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
182 'param']) | |
183 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
184 'defer', 'disabled', 'ismap', 'multiple', | |
185 'nohref', 'noresize', 'noshade', 'nowrap']) | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
186 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) |
1 | 187 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
188 def __call__(self, stream): |
136 | 189 namespace = self.NAMESPACE |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
190 ns_attrib = [] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
191 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
136 | 192 boolean_attrs = self._BOOLEAN_ATTRS |
193 empty_elems = self._EMPTY_ELEMS | |
85 | 194 have_doctype = False |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
195 in_cdata = False |
1 | 196 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
197 stream = chain(self.preamble, stream) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
198 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
199 stream = filter_(stream) |
1 | 200 for kind, data, pos in stream: |
201 | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
202 if kind is START or kind is EMPTY: |
1 | 203 tag, attrib = data |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
204 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
205 tagname = tag.localname |
177
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
206 tagns = tag.namespace |
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
207 if tagns: |
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
208 if tagns in ns_mapping: |
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
209 prefix = ns_mapping[tagns] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
210 if prefix: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
211 tagname = '%s:%s' % (prefix, tagname) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
212 else: |
177
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
213 ns_attrib.append((QName('xmlns'), tagns)) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
214 buf = ['<', tagname] |
136 | 215 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
216 for attr, value in attrib + ns_attrib: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
217 attrname = attr.localname |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
218 if attr.namespace: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
219 prefix = ns_mapping.get(attr.namespace) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
220 if prefix: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
221 attrname = '%s:%s' % (prefix, attrname) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
222 if attrname in boolean_attrs: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
223 if value: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
224 buf += [' ', attrname, '="', attrname, '"'] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
225 else: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
226 buf += [' ', attrname, '="', escape(value), '"'] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
227 ns_attrib = [] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
228 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
229 if kind is EMPTY: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
230 if (tagns and tagns != namespace.uri) \ |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
231 or tag.localname in empty_elems: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
232 buf += [' />'] |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
233 else: |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
234 buf += ['></%s>' % tagname] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
235 else: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
236 buf += ['>'] |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
237 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
238 yield Markup(''.join(buf)) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
239 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
240 elif kind is END: |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
241 tag = data |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
242 tagname = tag.localname |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
243 if tag.namespace: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
244 prefix = ns_mapping.get(tag.namespace) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
245 if prefix: |
177
dbae9efe5704
* Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents:
158
diff
changeset
|
246 tagname = '%s:%s' % (prefix, tagname) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
247 yield Markup('</%s>' % tagname) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
248 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
249 elif kind is TEXT: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
250 if in_cdata: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
251 yield data |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
252 else: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
253 yield escape(data, quotes=False) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
254 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
255 elif kind is COMMENT: |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
256 yield Markup('<!--%s-->' % data) |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
257 |
136 | 258 elif kind is DOCTYPE and not have_doctype: |
259 name, pubid, sysid = data | |
260 buf = ['<!DOCTYPE %s'] | |
261 if pubid: | |
262 buf += [' PUBLIC "%s"'] | |
263 elif sysid: | |
264 buf += [' SYSTEM'] | |
265 if sysid: | |
266 buf += [' "%s"'] | |
267 buf += ['>\n'] | |
268 yield Markup(''.join(buf), *filter(None, data)) | |
269 have_doctype = True | |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
270 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
271 elif kind is START_NS: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
272 prefix, uri = data |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
273 if uri not in ns_mapping: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
274 ns_mapping[uri] = prefix |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
275 if not prefix: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
276 ns_attrib.append((QName('xmlns'), uri)) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
277 else: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
278 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
279 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
280 elif kind is START_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
281 yield Markup('<![CDATA[') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
282 in_cdata = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
283 |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
284 elif kind is END_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
285 yield Markup(']]>') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
286 in_cdata = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
287 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
288 elif kind is PI: |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
289 yield Markup('<?%s %s?>' % data) |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
290 |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
291 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
292 class HTMLSerializer(XHTMLSerializer): |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
293 """Produces HTML text from an event stream. |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
294 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
295 >>> from markup.builder import tag |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
296 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
297 >>> print ''.join(HTMLSerializer()(elem.generate())) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
298 <div><a href="foo"></a><br><hr noshade></div> |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
299 """ |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
300 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
301 _NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')]) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
302 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
303 def __init__(self, doctype=None, strip_whitespace=True): |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
304 """Initialize the HTML serializer. |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
305 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
306 @param doctype: a `(name, pubid, sysid)` tuple that represents the |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
307 DOCTYPE declaration that should be included at the top of the |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
308 generated output |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
309 @param strip_whitespace: whether extraneous whitespace should be |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
310 stripped from the output |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
311 """ |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
312 super(HTMLSerializer, self).__init__(doctype, False) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
313 if strip_whitespace: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
314 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
315 self._NOESCAPE_ELEMS, True)) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
316 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
317 def __call__(self, stream): |
136 | 318 namespace = self.NAMESPACE |
319 ns_mapping = {} | |
320 boolean_attrs = self._BOOLEAN_ATTRS | |
321 empty_elems = self._EMPTY_ELEMS | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
322 noescape_elems = self._NOESCAPE_ELEMS |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
323 have_doctype = False |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
324 noescape = False |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
325 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
326 stream = chain(self.preamble, stream) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
327 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
328 stream = filter_(stream) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
329 for kind, data, pos in stream: |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
330 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
331 if kind is START or kind is EMPTY: |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
332 tag, attrib = data |
136 | 333 if not tag.namespace or tag in namespace: |
334 tagname = tag.localname | |
335 buf = ['<', tagname] | |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
336 |
136 | 337 for attr, value in attrib: |
338 attrname = attr.localname | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
339 if not attr.namespace or attr in namespace: |
136 | 340 if attrname in boolean_attrs: |
341 if value: | |
342 buf += [' ', attrname] | |
343 else: | |
344 buf += [' ', attrname, '="', escape(value), '"'] | |
1 | 345 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
346 buf += ['>'] |
1 | 347 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
348 if kind is EMPTY: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
349 if tagname not in empty_elems: |
213 | 350 buf += ['</%s>' % tagname] |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
351 |
140
a2edde90ad24
Fix bug in HTML serializer, plus some other minor tweaks.
cmlenz
parents:
136
diff
changeset
|
352 yield Markup(''.join(buf)) |
1 | 353 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
354 if tagname in noescape_elems: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
355 noescape = True |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
356 |
69 | 357 elif kind is END: |
1 | 358 tag = data |
136 | 359 if not tag.namespace or tag in namespace: |
360 yield Markup('</%s>' % tag.localname) | |
1 | 361 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
362 noescape = False |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
363 |
69 | 364 elif kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
365 if noescape: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
366 yield data |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
367 else: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
368 yield escape(data, quotes=False) |
1 | 369 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
370 elif kind is COMMENT: |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
371 yield Markup('<!--%s-->' % data) |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
372 |
136 | 373 elif kind is DOCTYPE and not have_doctype: |
374 name, pubid, sysid = data | |
375 buf = ['<!DOCTYPE %s'] | |
376 if pubid: | |
377 buf += [' PUBLIC "%s"'] | |
378 elif sysid: | |
379 buf += [' SYSTEM'] | |
380 if sysid: | |
381 buf += [' "%s"'] | |
382 buf += ['>\n'] | |
383 yield Markup(''.join(buf), *filter(None, data)) | |
384 have_doctype = True | |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
385 |
136 | 386 elif kind is START_NS and data[1] not in ns_mapping: |
387 ns_mapping[data[1]] = data[0] | |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
388 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
389 elif kind is PI: |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
390 yield Markup('<?%s %s?>' % data) |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
391 |
1 | 392 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
393 class TextSerializer(object): |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
394 """Produces plain text from an event stream. |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
395 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
396 Only text events are included in the output. Unlike the other serializer, |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
397 special XML characters are not escaped: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
398 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
399 >>> from markup.builder import tag |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
400 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
401 >>> print elem |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
402 <div><a href="foo"><Hello!></a><br/></div> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
403 >>> print ''.join(TextSerializer()(elem.generate())) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
404 <Hello!> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
405 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
406 If text events contain literal markup (instances of the `Markup` class), |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
407 tags or entities are stripped from the output: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
408 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
409 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>')) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
410 >>> print elem |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
411 <div><a href="foo">Hello!</a><br/></div> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
412 >>> print ''.join(TextSerializer()(elem.generate())) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
413 Hello! |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
414 """ |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
415 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
416 def __call__(self, stream): |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
417 for kind, data, pos in stream: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
418 if kind is TEXT: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
419 if type(data) is Markup: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
420 data = data.striptags().stripentities() |
201
0f16c907077e
The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents:
200
diff
changeset
|
421 yield unicode(data) |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
422 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
423 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
424 class EmptyTagFilter(object): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
425 """Combines `START` and `STOP` events into `EMPTY` events for elements that |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
426 have no contents. |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
427 """ |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
428 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
429 EMPTY = StreamEventKind('EMPTY') |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
430 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
431 def __call__(self, stream): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
432 prev = (None, None, None) |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
433 for kind, data, pos in stream: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
434 if prev[0] is START: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
435 if kind is END: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
436 prev = EMPTY, prev[1], prev[2] |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
437 yield prev |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
438 continue |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
439 else: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
440 yield prev |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
441 if kind is not START: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
442 yield kind, data, pos |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
443 prev = kind, data, pos |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
444 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
445 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
446 EMPTY = EmptyTagFilter.EMPTY |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
447 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
448 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
449 class WhitespaceFilter(object): |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
450 """A filter that removes extraneous ignorable white space from the |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
451 stream.""" |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
452 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
453 def __init__(self, preserve=None, noescape=None, escape_cdata=False): |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
454 """Initialize the filter. |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
455 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
456 @param preserve: a set or sequence of tag names for which white-space |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
457 should be ignored. |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
458 @param noescape: a set or sequence of tag names for which text content |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
459 should not be escaped |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
460 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
461 Both the `preserve` and `noescape` sets are expected to refer to |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
462 elements that cannot contain further child elements. |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
463 """ |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
464 if preserve is None: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
465 preserve = [] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
466 self.preserve = frozenset(preserve) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
467 if noescape is None: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
468 noescape = [] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
469 self.noescape = frozenset(noescape) |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
470 self.escape_cdata = escape_cdata |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
471 |
219 | 472 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], |
473 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, | |
474 collapse_lines=re.compile('\n{2,}').sub): | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
475 mjoin = Markup('').join |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
476 preserve_elems = self.preserve |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
477 preserve = False |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
478 noescape_elems = self.noescape |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
479 noescape = False |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
480 escape_cdata = self.escape_cdata |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
481 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
482 textbuf = [] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
483 push_text = textbuf.append |
136 | 484 pop_text = textbuf.pop |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
485 for kind, data, pos in chain(stream, [(None, None, None)]): |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
486 if kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
487 if noescape: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
488 data = Markup(data) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
489 push_text(data) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
490 else: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
491 if textbuf: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
492 if len(textbuf) > 1: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
493 text = mjoin(textbuf, escape_quotes=False) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
494 del textbuf[:] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
495 else: |
136 | 496 text = escape(pop_text(), quotes=False) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
497 if not preserve: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
498 text = collapse_lines('\n', trim_trailing_space('', text)) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
499 yield TEXT, Markup(text), pos |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
500 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
501 if kind is START: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
502 tag, attrib = data |
219 | 503 if not preserve and (tag in preserve_elems or |
504 attrib.get(space) == 'preserve'): | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
505 preserve = True |
219 | 506 if not noescape and tag in noescape_elems: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
507 noescape = True |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
508 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
509 elif kind is END: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
510 preserve = noescape = False |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
511 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
512 elif kind is START_CDATA and not escape_cdata: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
513 noescape = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
514 |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
515 elif kind is END_CDATA and not escape_cdata: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
516 noescape = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
517 |
136 | 518 if kind: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
519 yield kind, data, pos |