Mercurial > genshi > genshi-test
annotate genshi/output.py @ 902:09cc3627654c experimental-inline
Sync `experimental/inline` branch with [source:trunk@1126].
author | cmlenz |
---|---|
date | Fri, 23 Apr 2010 21:08:26 +0000 |
parents | de82830f8816 |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
3 # Copyright (C) 2006-2009 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
18 from itertools import chain |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
19 import re |
1 | 20 |
500 | 21 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind |
22 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ | |
23 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE | |
1 | 24 |
500 | 25 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', |
26 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] | |
27 __docformat__ = 'restructuredtext en' | |
28 | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
29 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
30 def encode(iterator, method='xml', encoding='utf-8', out=None): |
500 | 31 """Encode serializer output into a string. |
32 | |
33 :param iterator: the iterator returned from serializing a stream (basically | |
34 any iterator that yields unicode objects) | |
35 :param method: the serialization method; determines how characters not | |
36 representable in the specified encoding are treated | |
37 :param encoding: how the output string should be encoded; if set to `None`, | |
38 this method returns a `unicode` object | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
39 :param out: a file-like object that the output should be written to |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
40 instead of being returned as one big string; note that if |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
41 this is a file or socket (or similar), the `encoding` must |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
42 not be `None` (that is, the output must be encoded) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
43 :return: a `str` or `unicode` object (depending on the `encoding` |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
44 parameter), or `None` if the `out` parameter is provided |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
45 |
500 | 46 :since: version 0.4.1 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
47 :note: Changed in 0.5: added the `out` parameter |
500 | 48 """ |
49 if encoding is not None: | |
50 errors = 'replace' | |
51 if method != 'text' and not isinstance(method, TextSerializer): | |
52 errors = 'xmlcharrefreplace' | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
53 _encode = lambda string: string.encode(encoding, errors) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
54 else: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
55 _encode = lambda string: string |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
56 if out is None: |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
57 return _encode(''.join(list(iterator))) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
58 for chunk in iterator: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
59 out.write(_encode(chunk)) |
500 | 60 |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
61 |
500 | 62 def get_serializer(method='xml', **kwargs): |
63 """Return a serializer object for the given method. | |
64 | |
65 :param method: the serialization method; can be either "xml", "xhtml", | |
66 "html", "text", or a custom serializer class | |
67 | |
68 Any additional keyword arguments are passed to the serializer, and thus | |
69 depend on the `method` parameter value. | |
70 | |
71 :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` | |
72 :since: version 0.4.1 | |
73 """ | |
74 if isinstance(method, basestring): | |
75 method = {'xml': XMLSerializer, | |
76 'xhtml': XHTMLSerializer, | |
77 'html': HTMLSerializer, | |
78 'text': TextSerializer}[method.lower()] | |
79 return method(**kwargs) | |
1 | 80 |
81 | |
85 | 82 class DocType(object): |
83 """Defines a number of commonly used DOCTYPE declarations as constants.""" | |
84 | |
500 | 85 HTML_STRICT = ( |
86 'html', '-//W3C//DTD HTML 4.01//EN', | |
87 'http://www.w3.org/TR/html4/strict.dtd' | |
88 ) | |
89 HTML_TRANSITIONAL = ( | |
90 'html', '-//W3C//DTD HTML 4.01 Transitional//EN', | |
91 'http://www.w3.org/TR/html4/loose.dtd' | |
92 ) | |
93 HTML_FRAMESET = ( | |
94 'html', '-//W3C//DTD HTML 4.01 Frameset//EN', | |
95 'http://www.w3.org/TR/html4/frameset.dtd' | |
96 ) | |
85 | 97 HTML = HTML_STRICT |
98 | |
500 | 99 HTML5 = ('html', None, None) |
100 | |
101 XHTML_STRICT = ( | |
102 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', | |
103 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' | |
104 ) | |
105 XHTML_TRANSITIONAL = ( | |
106 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN', | |
107 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' | |
108 ) | |
109 XHTML_FRAMESET = ( | |
110 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', | |
111 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' | |
112 ) | |
85 | 113 XHTML = XHTML_STRICT |
114 | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
115 XHTML11 = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
116 'html', '-//W3C//DTD XHTML 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
117 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
118 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
119 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
120 SVG_FULL = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
121 'svg', '-//W3C//DTD SVG 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
122 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
123 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
124 SVG_BASIC = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
125 'svg', '-//W3C//DTD SVG Basic 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
126 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
127 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
128 SVG_TINY = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
129 'svg', '-//W3C//DTD SVG Tiny 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
130 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
131 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
132 SVG = SVG_FULL |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
133 |
830 | 134 @classmethod |
500 | 135 def get(cls, name): |
136 """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` | |
137 declaration for the specified name. | |
138 | |
139 The following names are recognized in this version: | |
140 * "html" or "html-strict" for the HTML 4.01 strict DTD | |
141 * "html-transitional" for the HTML 4.01 transitional DTD | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
142 * "html-frameset" for the HTML 4.01 frameset DTD |
500 | 143 * "html5" for the ``DOCTYPE`` proposed for HTML5 |
144 * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD | |
145 * "xhtml-transitional" for the XHTML 1.0 transitional DTD | |
146 * "xhtml-frameset" for the XHTML 1.0 frameset DTD | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
147 * "xhtml11" for the XHTML 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
148 * "svg" or "svg-full" for the SVG 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
149 * "svg-basic" for the SVG Basic 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
150 * "svg-tiny" for the SVG Tiny 1.1 DTD |
500 | 151 |
152 :param name: the name of the ``DOCTYPE`` | |
153 :return: the ``(name, pubid, sysid)`` tuple for the requested | |
154 ``DOCTYPE``, or ``None`` if the name is not recognized | |
155 :since: version 0.4.1 | |
156 """ | |
157 return { | |
158 'html': cls.HTML, 'html-strict': cls.HTML_STRICT, | |
159 'html-transitional': DocType.HTML_TRANSITIONAL, | |
160 'html-frameset': DocType.HTML_FRAMESET, | |
161 'html5': cls.HTML5, | |
162 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, | |
163 'xhtml-transitional': cls.XHTML_TRANSITIONAL, | |
164 'xhtml-frameset': cls.XHTML_FRAMESET, | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
165 'xhtml11': cls.XHTML11, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
166 'svg': cls.SVG, 'svg-full': cls.SVG_FULL, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
167 'svg-basic': cls.SVG_BASIC, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
168 'svg-tiny': cls.SVG_TINY |
500 | 169 }.get(name.lower()) |
170 | |
85 | 171 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
172 class XMLSerializer(object): |
1 | 173 """Produces XML text from an event stream. |
174 | |
230 | 175 >>> from genshi.builder import tag |
20 | 176 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
177 >>> print(''.join(XMLSerializer()(elem.generate()))) |
1 | 178 <div><a href="foo"/><br/><hr noshade="True"/></div> |
179 """ | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
180 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
181 _PRESERVE_SPACE = frozenset() |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
182 |
500 | 183 def __init__(self, doctype=None, strip_whitespace=True, |
830 | 184 namespace_prefixes=None, cache=True): |
85 | 185 """Initialize the XML serializer. |
186 | |
500 | 187 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
188 DOCTYPE declaration that should be included at the top | |
189 of the generated output, or the name of a DOCTYPE as | |
190 defined in `DocType.get` | |
191 :param strip_whitespace: whether extraneous whitespace should be | |
192 stripped from the output | |
830 | 193 :param cache: whether to cache the text output per event, which |
194 improves performance for repetitive markup | |
500 | 195 :note: Changed in 0.4.2: The `doctype` parameter can now be a string. |
830 | 196 :note: Changed in 0.6: The `cache` parameter was added |
85 | 197 """ |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
198 self.filters = [EmptyTagFilter()] |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
199 if strip_whitespace: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
200 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
830 | 201 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, |
202 cache=cache)) | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
203 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
204 self.filters.append(DocTypeInserter(doctype)) |
830 | 205 self.cache = cache |
1 | 206 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
207 def __call__(self, stream): |
500 | 208 have_decl = have_doctype = False |
209 in_cdata = False | |
210 | |
830 | 211 cache = {} |
212 cache_get = cache.get | |
213 if self.cache: | |
214 def _emit(kind, input, output): | |
215 cache[kind, input] = output | |
216 return output | |
217 else: | |
218 def _emit(kind, input, output): | |
219 return output | |
220 | |
500 | 221 for filter_ in self.filters: |
222 stream = filter_(stream) | |
223 for kind, data, pos in stream: | |
830 | 224 cached = cache_get((kind, data)) |
225 if cached is not None: | |
226 yield cached | |
500 | 227 |
830 | 228 elif kind is START or kind is EMPTY: |
500 | 229 tag, attrib = data |
230 buf = ['<', tag] | |
231 for attr, value in attrib: | |
232 buf += [' ', attr, '="', escape(value), '"'] | |
233 buf.append(kind is EMPTY and '/>' or '>') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
234 yield _emit(kind, data, Markup(''.join(buf))) |
500 | 235 |
236 elif kind is END: | |
830 | 237 yield _emit(kind, data, Markup('</%s>' % data)) |
500 | 238 |
239 elif kind is TEXT: | |
240 if in_cdata: | |
830 | 241 yield _emit(kind, data, data) |
500 | 242 else: |
830 | 243 yield _emit(kind, data, escape(data, quotes=False)) |
500 | 244 |
245 elif kind is COMMENT: | |
830 | 246 yield _emit(kind, data, Markup('<!--%s-->' % data)) |
500 | 247 |
248 elif kind is XML_DECL and not have_decl: | |
249 version, encoding, standalone = data | |
250 buf = ['<?xml version="%s"' % version] | |
251 if encoding: | |
252 buf.append(' encoding="%s"' % encoding) | |
253 if standalone != -1: | |
254 standalone = standalone and 'yes' or 'no' | |
255 buf.append(' standalone="%s"' % standalone) | |
256 buf.append('?>\n') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
257 yield Markup(''.join(buf)) |
500 | 258 have_decl = True |
259 | |
260 elif kind is DOCTYPE and not have_doctype: | |
261 name, pubid, sysid = data | |
262 buf = ['<!DOCTYPE %s'] | |
263 if pubid: | |
264 buf.append(' PUBLIC "%s"') | |
265 elif sysid: | |
266 buf.append(' SYSTEM') | |
267 if sysid: | |
268 buf.append(' "%s"') | |
269 buf.append('>\n') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
270 yield Markup(''.join(buf)) % tuple([p for p in data if p]) |
500 | 271 have_doctype = True |
272 | |
273 elif kind is START_CDATA: | |
274 yield Markup('<![CDATA[') | |
275 in_cdata = True | |
276 | |
277 elif kind is END_CDATA: | |
278 yield Markup(']]>') | |
279 in_cdata = False | |
280 | |
281 elif kind is PI: | |
830 | 282 yield _emit(kind, data, Markup('<?%s %s?>' % data)) |
500 | 283 |
284 | |
285 class XHTMLSerializer(XMLSerializer): | |
286 """Produces XHTML text from an event stream. | |
287 | |
288 >>> from genshi.builder import tag | |
289 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
290 >>> print(''.join(XHTMLSerializer()(elem.generate()))) |
500 | 291 <div><a href="foo"></a><br /><hr noshade="noshade" /></div> |
292 """ | |
293 | |
294 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
295 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
296 'param']) | |
297 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
298 'defer', 'disabled', 'ismap', 'multiple', | |
299 'nohref', 'noresize', 'noshade', 'nowrap']) | |
300 _PRESERVE_SPACE = frozenset([ | |
301 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), | |
302 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') | |
303 ]) | |
304 | |
305 def __init__(self, doctype=None, strip_whitespace=True, | |
830 | 306 namespace_prefixes=None, drop_xml_decl=True, cache=True): |
500 | 307 super(XHTMLSerializer, self).__init__(doctype, False) |
308 self.filters = [EmptyTagFilter()] | |
309 if strip_whitespace: | |
310 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | |
311 namespace_prefixes = namespace_prefixes or {} | |
312 namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' | |
830 | 313 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, |
314 cache=cache)) | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
315 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
316 self.filters.append(DocTypeInserter(doctype)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
317 self.drop_xml_decl = drop_xml_decl |
830 | 318 self.cache = cache |
500 | 319 |
320 def __call__(self, stream): | |
321 boolean_attrs = self._BOOLEAN_ATTRS | |
322 empty_elems = self._EMPTY_ELEMS | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
323 drop_xml_decl = self.drop_xml_decl |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
324 have_decl = have_doctype = False |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
325 in_cdata = False |
1 | 326 |
830 | 327 cache = {} |
328 cache_get = cache.get | |
329 if self.cache: | |
330 def _emit(kind, input, output): | |
331 cache[kind, input] = output | |
332 return output | |
333 else: | |
334 def _emit(kind, input, output): | |
335 return output | |
336 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
337 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
338 stream = filter_(stream) |
1 | 339 for kind, data, pos in stream: |
830 | 340 cached = cache_get((kind, data)) |
341 if cached is not None: | |
342 yield cached | |
1 | 343 |
830 | 344 elif kind is START or kind is EMPTY: |
1 | 345 tag, attrib = data |
500 | 346 buf = ['<', tag] |
347 for attr, value in attrib: | |
348 if attr in boolean_attrs: | |
349 value = attr | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
350 elif attr == 'xml:lang' and 'lang' not in attrib: |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
351 buf += [' lang="', escape(value), '"'] |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
352 elif attr == 'xml:space': |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
353 continue |
500 | 354 buf += [' ', attr, '="', escape(value), '"'] |
355 if kind is EMPTY: | |
356 if tag in empty_elems: | |
357 buf.append(' />') | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
358 else: |
500 | 359 buf.append('></%s>' % tag) |
360 else: | |
361 buf.append('>') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
362 yield _emit(kind, data, Markup(''.join(buf))) |
1 | 363 |
69 | 364 elif kind is END: |
830 | 365 yield _emit(kind, data, Markup('</%s>' % data)) |
1 | 366 |
69 | 367 elif kind is TEXT: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
368 if in_cdata: |
830 | 369 yield _emit(kind, data, data) |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
370 else: |
830 | 371 yield _emit(kind, data, escape(data, quotes=False)) |
1 | 372 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
373 elif kind is COMMENT: |
830 | 374 yield _emit(kind, data, Markup('<!--%s-->' % data)) |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
375 |
136 | 376 elif kind is DOCTYPE and not have_doctype: |
377 name, pubid, sysid = data | |
378 buf = ['<!DOCTYPE %s'] | |
379 if pubid: | |
398 | 380 buf.append(' PUBLIC "%s"') |
136 | 381 elif sysid: |
398 | 382 buf.append(' SYSTEM') |
136 | 383 if sysid: |
398 | 384 buf.append(' "%s"') |
385 buf.append('>\n') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
386 yield Markup(''.join(buf)) % tuple([p for p in data if p]) |
136 | 387 have_doctype = True |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
388 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
389 elif kind is XML_DECL and not have_decl and not drop_xml_decl: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
390 version, encoding, standalone = data |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
391 buf = ['<?xml version="%s"' % version] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
392 if encoding: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
393 buf.append(' encoding="%s"' % encoding) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
394 if standalone != -1: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
395 standalone = standalone and 'yes' or 'no' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
396 buf.append(' standalone="%s"' % standalone) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
397 buf.append('?>\n') |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
398 yield Markup(''.join(buf)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
399 have_decl = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
400 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
401 elif kind is START_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
402 yield Markup('<![CDATA[') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
403 in_cdata = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
404 |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
405 elif kind is END_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
406 yield Markup(']]>') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
407 in_cdata = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
408 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
409 elif kind is PI: |
830 | 410 yield _emit(kind, data, Markup('<?%s %s?>' % data)) |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
411 |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
412 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
413 class HTMLSerializer(XHTMLSerializer): |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
414 """Produces HTML text from an event stream. |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
415 |
230 | 416 >>> from genshi.builder import tag |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
417 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
418 >>> print(''.join(HTMLSerializer()(elem.generate()))) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
419 <div><a href="foo"></a><br><hr noshade></div> |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
420 """ |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
421 |
500 | 422 _NOESCAPE_ELEMS = frozenset([ |
423 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), | |
424 QName('style'), QName('http://www.w3.org/1999/xhtml}style') | |
425 ]) | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
426 |
830 | 427 def __init__(self, doctype=None, strip_whitespace=True, cache=True): |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
428 """Initialize the HTML serializer. |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
429 |
500 | 430 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
431 DOCTYPE declaration that should be included at the top | |
432 of the generated output | |
433 :param strip_whitespace: whether extraneous whitespace should be | |
434 stripped from the output | |
830 | 435 :param cache: whether to cache the text output per event, which |
436 improves performance for repetitive markup | |
437 :note: Changed in 0.6: The `cache` parameter was added | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
438 """ |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
439 super(HTMLSerializer, self).__init__(doctype, False) |
500 | 440 self.filters = [EmptyTagFilter()] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
441 if strip_whitespace: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
442 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
305 | 443 self._NOESCAPE_ELEMS)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
444 self.filters.append(NamespaceFlattener(prefixes={ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
445 'http://www.w3.org/1999/xhtml': '' |
830 | 446 }, cache=cache)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
447 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
448 self.filters.append(DocTypeInserter(doctype)) |
830 | 449 self.cache = True |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
450 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
451 def __call__(self, stream): |
136 | 452 boolean_attrs = self._BOOLEAN_ATTRS |
453 empty_elems = self._EMPTY_ELEMS | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
454 noescape_elems = self._NOESCAPE_ELEMS |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
455 have_doctype = False |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
456 noescape = False |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
457 |
830 | 458 cache = {} |
459 cache_get = cache.get | |
460 if self.cache: | |
461 def _emit(kind, input, output): | |
462 cache[kind, input] = output | |
463 return output | |
464 else: | |
465 def _emit(kind, input, output): | |
466 return output | |
467 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
468 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
469 stream = filter_(stream) |
830 | 470 for kind, data, _ in stream: |
471 output = cache_get((kind, data)) | |
472 if output is not None: | |
473 yield output | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
474 if (kind is START or kind is EMPTY) \ |
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
475 and data[0] in noescape_elems: |
830 | 476 noescape = True |
477 elif kind is END: | |
478 noescape = False | |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
479 |
830 | 480 elif kind is START or kind is EMPTY: |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
481 tag, attrib = data |
500 | 482 buf = ['<', tag] |
483 for attr, value in attrib: | |
484 if attr in boolean_attrs: | |
485 if value: | |
486 buf += [' ', attr] | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
487 elif ':' in attr: |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
488 if attr == 'xml:lang' and 'lang' not in attrib: |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
489 buf += [' lang="', escape(value), '"'] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
490 elif attr != 'xmlns': |
500 | 491 buf += [' ', attr, '="', escape(value), '"'] |
492 buf.append('>') | |
493 if kind is EMPTY: | |
494 if tag not in empty_elems: | |
495 buf.append('</%s>' % tag) | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
496 yield _emit(kind, data, Markup(''.join(buf))) |
500 | 497 if tag in noescape_elems: |
498 noescape = True | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
499 |
69 | 500 elif kind is END: |
830 | 501 yield _emit(kind, data, Markup('</%s>' % data)) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
502 noescape = False |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
503 |
69 | 504 elif kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
505 if noescape: |
830 | 506 yield _emit(kind, data, data) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
507 else: |
830 | 508 yield _emit(kind, data, escape(data, quotes=False)) |
1 | 509 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
510 elif kind is COMMENT: |
830 | 511 yield _emit(kind, data, Markup('<!--%s-->' % data)) |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
512 |
136 | 513 elif kind is DOCTYPE and not have_doctype: |
514 name, pubid, sysid = data | |
515 buf = ['<!DOCTYPE %s'] | |
516 if pubid: | |
398 | 517 buf.append(' PUBLIC "%s"') |
136 | 518 elif sysid: |
398 | 519 buf.append(' SYSTEM') |
136 | 520 if sysid: |
398 | 521 buf.append(' "%s"') |
522 buf.append('>\n') | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
523 yield Markup(''.join(buf)) % tuple([p for p in data if p]) |
136 | 524 have_doctype = True |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
525 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
526 elif kind is PI: |
830 | 527 yield _emit(kind, data, Markup('<?%s %s?>' % data)) |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
528 |
1 | 529 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
530 class TextSerializer(object): |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
531 """Produces plain text from an event stream. |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
532 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
533 Only text events are included in the output. Unlike the other serializer, |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
534 special XML characters are not escaped: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
535 |
230 | 536 >>> from genshi.builder import tag |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
537 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
538 >>> print(elem) |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
539 <div><a href="foo"><Hello!></a><br/></div> |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
540 >>> print(''.join(TextSerializer()(elem.generate()))) |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
541 <Hello!> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
542 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
543 If text events contain literal markup (instances of the `Markup` class), |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
544 that markup is by default passed through unchanged: |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
545 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
546 >>> elem = tag.div(Markup('<a href="foo">Hello & Bye!</a><br/>')) |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
547 >>> print(elem.generate().render(TextSerializer, encoding=None)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
548 <a href="foo">Hello & Bye!</a><br/> |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
549 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
550 You can use the ``strip_markup`` to change this behavior, so that tags and |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
551 entities are stripped from the output (or in the case of entities, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
552 replaced with the equivalent character): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
553 |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
554 >>> print(elem.generate().render(TextSerializer, strip_markup=True, |
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
555 ... encoding=None)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
556 Hello & Bye! |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
557 """ |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
558 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
559 def __init__(self, strip_markup=False): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
560 """Create the serializer. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
561 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
562 :param strip_markup: whether markup (tags and encoded characters) found |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
563 in the text should be removed |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
564 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
565 self.strip_markup = strip_markup |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
566 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
567 def __call__(self, stream): |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
568 strip_markup = self.strip_markup |
500 | 569 for event in stream: |
570 if event[0] is TEXT: | |
571 data = event[1] | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
572 if strip_markup and type(data) is Markup: |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
573 data = data.striptags().stripentities() |
201
0f16c907077e
The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents:
200
diff
changeset
|
574 yield unicode(data) |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
575 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
576 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
577 class EmptyTagFilter(object): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
578 """Combines `START` and `STOP` events into `EMPTY` events for elements that |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
579 have no contents. |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
580 """ |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
581 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
582 EMPTY = StreamEventKind('EMPTY') |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
583 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
584 def __call__(self, stream): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
585 prev = (None, None, None) |
500 | 586 for ev in stream: |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
587 if prev[0] is START: |
500 | 588 if ev[0] is END: |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
589 prev = EMPTY, prev[1], prev[2] |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
590 yield prev |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
591 continue |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
592 else: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
593 yield prev |
500 | 594 if ev[0] is not START: |
595 yield ev | |
596 prev = ev | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
597 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
598 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
599 EMPTY = EmptyTagFilter.EMPTY |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
600 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
601 |
500 | 602 class NamespaceFlattener(object): |
603 r"""Output stream filter that removes namespace information from the stream, | |
604 instead adding namespace attributes and prefixes as needed. | |
605 | |
606 :param prefixes: optional mapping of namespace URIs to prefixes | |
607 | |
608 >>> from genshi.input import XML | |
609 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> | |
610 ... <two:item/> | |
611 ... </doc>''') | |
612 >>> for kind, data, pos in NamespaceFlattener()(xml): | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
613 ... print('%s %r' % (kind, data)) |
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
614 START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) |
500 | 615 TEXT u'\n ' |
616 START (u'two:item', Attrs()) | |
617 END u'two:item' | |
618 TEXT u'\n' | |
619 END u'doc' | |
620 """ | |
621 | |
830 | 622 def __init__(self, prefixes=None, cache=True): |
500 | 623 self.prefixes = {XML_NAMESPACE.uri: 'xml'} |
624 if prefixes is not None: | |
625 self.prefixes.update(prefixes) | |
830 | 626 self.cache = cache |
500 | 627 |
628 def __call__(self, stream): | |
830 | 629 cache = {} |
630 cache_get = cache.get | |
631 if self.cache: | |
632 def _emit(kind, input, output, pos): | |
633 cache[kind, input] = output | |
634 return kind, output, pos | |
635 else: | |
636 def _emit(kind, input, output, pos): | |
637 return output | |
638 | |
500 | 639 prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) |
640 namespaces = {XML_NAMESPACE.uri: ['xml']} | |
641 def _push_ns(prefix, uri): | |
642 namespaces.setdefault(uri, []).append(prefix) | |
643 prefixes.setdefault(prefix, []).append(uri) | |
830 | 644 cache.clear() |
645 def _pop_ns(prefix): | |
646 uris = prefixes.get(prefix) | |
647 uri = uris.pop() | |
648 if not uris: | |
649 del prefixes[prefix] | |
650 if uri not in uris or uri != uris[-1]: | |
651 uri_prefixes = namespaces[uri] | |
652 uri_prefixes.pop() | |
653 if not uri_prefixes: | |
654 del namespaces[uri] | |
655 cache.clear() | |
656 return uri | |
500 | 657 |
658 ns_attrs = [] | |
659 _push_ns_attr = ns_attrs.append | |
660 def _make_ns_attr(prefix, uri): | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
661 return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri |
500 | 662 |
663 def _gen_prefix(): | |
664 val = 0 | |
665 while 1: | |
666 val += 1 | |
667 yield 'ns%d' % val | |
668 _gen_prefix = _gen_prefix().next | |
669 | |
670 for kind, data, pos in stream: | |
830 | 671 output = cache_get((kind, data)) |
672 if output is not None: | |
673 yield kind, output, pos | |
500 | 674 |
830 | 675 elif kind is START or kind is EMPTY: |
500 | 676 tag, attrs = data |
677 | |
678 tagname = tag.localname | |
679 tagns = tag.namespace | |
680 if tagns: | |
681 if tagns in namespaces: | |
682 prefix = namespaces[tagns][-1] | |
683 if prefix: | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
684 tagname = '%s:%s' % (prefix, tagname) |
500 | 685 else: |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
686 _push_ns_attr(('xmlns', tagns)) |
500 | 687 _push_ns('', tagns) |
688 | |
689 new_attrs = [] | |
690 for attr, value in attrs: | |
691 attrname = attr.localname | |
692 attrns = attr.namespace | |
693 if attrns: | |
694 if attrns not in namespaces: | |
695 prefix = _gen_prefix() | |
696 _push_ns(prefix, attrns) | |
697 _push_ns_attr(('xmlns:%s' % prefix, attrns)) | |
698 else: | |
699 prefix = namespaces[attrns][-1] | |
700 if prefix: | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
701 attrname = '%s:%s' % (prefix, attrname) |
500 | 702 new_attrs.append((attrname, value)) |
703 | |
830 | 704 yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos) |
500 | 705 del ns_attrs[:] |
706 | |
707 elif kind is END: | |
708 tagname = data.localname | |
709 tagns = data.namespace | |
710 if tagns: | |
711 prefix = namespaces[tagns][-1] | |
712 if prefix: | |
902
09cc3627654c
Sync `experimental/inline` branch with [source:trunk@1126].
cmlenz
parents:
830
diff
changeset
|
713 tagname = '%s:%s' % (prefix, tagname) |
830 | 714 yield _emit(kind, data, tagname, pos) |
500 | 715 |
716 elif kind is START_NS: | |
717 prefix, uri = data | |
718 if uri not in namespaces: | |
719 prefix = prefixes.get(uri, [prefix])[-1] | |
720 _push_ns_attr(_make_ns_attr(prefix, uri)) | |
721 _push_ns(prefix, uri) | |
722 | |
723 elif kind is END_NS: | |
724 if data in prefixes: | |
830 | 725 uri = _pop_ns(data) |
500 | 726 if ns_attrs: |
727 attr = _make_ns_attr(data, uri) | |
728 if attr in ns_attrs: | |
729 ns_attrs.remove(attr) | |
730 | |
731 else: | |
732 yield kind, data, pos | |
733 | |
734 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
735 class WhitespaceFilter(object): |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
736 """A filter that removes extraneous ignorable white space from the |
500 | 737 stream. |
738 """ | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
739 |
305 | 740 def __init__(self, preserve=None, noescape=None): |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
741 """Initialize the filter. |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
742 |
500 | 743 :param preserve: a set or sequence of tag names for which white-space |
744 should be preserved | |
745 :param noescape: a set or sequence of tag names for which text content | |
746 should not be escaped | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
747 |
347 | 748 The `noescape` set is expected to refer to elements that cannot contain |
500 | 749 further child elements (such as ``<style>`` or ``<script>`` in HTML |
750 documents). | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
751 """ |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
752 if preserve is None: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
753 preserve = [] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
754 self.preserve = frozenset(preserve) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
755 if noescape is None: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
756 noescape = [] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
757 self.noescape = frozenset(noescape) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
758 |
219 | 759 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], |
760 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, | |
761 collapse_lines=re.compile('\n{2,}').sub): | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
762 mjoin = Markup('').join |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
763 preserve_elems = self.preserve |
347 | 764 preserve = 0 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
765 noescape_elems = self.noescape |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
766 noescape = False |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
767 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
768 textbuf = [] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
769 push_text = textbuf.append |
136 | 770 pop_text = textbuf.pop |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
771 for kind, data, pos in chain(stream, [(None, None, None)]): |
500 | 772 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
773 if kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
774 if noescape: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
775 data = Markup(data) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
776 push_text(data) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
777 else: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
778 if textbuf: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
779 if len(textbuf) > 1: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
780 text = mjoin(textbuf, escape_quotes=False) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
781 del textbuf[:] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
782 else: |
136 | 783 text = escape(pop_text(), quotes=False) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
784 if not preserve: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
785 text = collapse_lines('\n', trim_trailing_space('', text)) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
786 yield TEXT, Markup(text), pos |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
787 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
788 if kind is START: |
347 | 789 tag, attrs = data |
790 if preserve or (tag in preserve_elems or | |
791 attrs.get(space) == 'preserve'): | |
792 preserve += 1 | |
219 | 793 if not noescape and tag in noescape_elems: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
794 noescape = True |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
795 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
796 elif kind is END: |
347 | 797 noescape = False |
798 if preserve: | |
799 preserve -= 1 | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
800 |
305 | 801 elif kind is START_CDATA: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
802 noescape = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
803 |
305 | 804 elif kind is END_CDATA: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
805 noescape = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
806 |
136 | 807 if kind: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
808 yield kind, data, pos |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
809 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
810 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
811 class DocTypeInserter(object): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
812 """A filter that inserts the DOCTYPE declaration in the correct location, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
813 after the XML declaration. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
814 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
815 def __init__(self, doctype): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
816 """Initialize the filter. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
817 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
818 :param doctype: DOCTYPE as a string or DocType object. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
819 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
820 if isinstance(doctype, basestring): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
821 doctype = DocType.get(doctype) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
822 self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
823 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
824 def __call__(self, stream): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
825 doctype_inserted = False |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
826 for kind, data, pos in stream: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
827 if not doctype_inserted: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
828 doctype_inserted = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
829 if kind is XML_DECL: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
830 yield (kind, data, pos) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
831 yield self.doctype_event |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
832 continue |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
833 yield self.doctype_event |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
834 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
835 yield (kind, data, pos) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
836 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
837 if not doctype_inserted: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
838 yield self.doctype_event |