Mercurial > genshi > mirror
comparison markup/output.py @ 141:520a5b7dd6d2 trunk
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
* Fix parsing of `xml:space` attribute.
author | cmlenz |
---|---|
date | Thu, 10 Aug 2006 15:21:55 +0000 |
parents | c1f4390d50f8 |
children | 3d4c214c979a |
comparison
equal
deleted
inserted
replaced
140:c1f4390d50f8 | 141:520a5b7dd6d2 |
---|---|
20 frozenset | 20 frozenset |
21 except NameError: | 21 except NameError: |
22 from sets import ImmutableSet as frozenset | 22 from sets import ImmutableSet as frozenset |
23 import re | 23 import re |
24 | 24 |
25 from markup.core import escape, Markup, Namespace, QName | 25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE |
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI | 26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI |
27 | 27 |
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] | 28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] |
29 | 29 |
30 | 30 |
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | 72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
73 | 73 |
74 def __call__(self, stream): | 74 def __call__(self, stream): |
75 have_doctype = False | 75 have_doctype = False |
76 ns_attrib = [] | 76 ns_attrib = [] |
77 ns_mapping = {} | 77 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
78 | 78 |
79 stream = chain(self.preamble, stream) | 79 stream = chain(self.preamble, stream) |
80 for filter_ in self.filters: | 80 for filter_ in self.filters: |
81 stream = filter_(stream) | 81 stream = filter_(stream) |
82 stream = _PushbackIterator(stream) | 82 stream = _PushbackIterator(stream) |
175 'nohref', 'noresize', 'noshade', 'nowrap']) | 175 'nohref', 'noresize', 'noshade', 'nowrap']) |
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) | 176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) |
177 | 177 |
178 def __call__(self, stream): | 178 def __call__(self, stream): |
179 namespace = self.NAMESPACE | 179 namespace = self.NAMESPACE |
180 ns_mapping = {} | 180 ns_attrib = [] |
181 ns_mapping = {XML_NAMESPACE.uri: 'xml'} | |
181 boolean_attrs = self._BOOLEAN_ATTRS | 182 boolean_attrs = self._BOOLEAN_ATTRS |
182 empty_elems = self._EMPTY_ELEMS | 183 empty_elems = self._EMPTY_ELEMS |
183 have_doctype = False | 184 have_doctype = False |
184 | 185 |
185 stream = chain(self.preamble, stream) | 186 stream = chain(self.preamble, stream) |
189 pushback = stream.pushback | 190 pushback = stream.pushback |
190 for kind, data, pos in stream: | 191 for kind, data, pos in stream: |
191 | 192 |
192 if kind is START: | 193 if kind is START: |
193 tag, attrib = data | 194 tag, attrib = data |
194 if not tag.namespace or tag in namespace: | 195 |
195 tagname = tag.localname | 196 tagname = tag.localname |
196 buf = ['<', tagname] | 197 namespace = tag.namespace |
197 | 198 if namespace: |
198 for attr, value in attrib: | 199 if namespace in ns_mapping: |
199 if not attr.namespace or attr in namespace: | 200 prefix = ns_mapping[namespace] |
200 attrname = attr.localname | 201 if prefix: |
201 if attrname in boolean_attrs: | 202 tagname = '%s:%s' % (prefix, tagname) |
202 if value: | 203 else: |
203 buf += [' ', attrname, '="', attrname, '"'] | 204 ns_attrib.append((QName('xmlns'), namespace)) |
204 else: | 205 buf = ['<', tagname] |
205 buf += [' ', attrname, '="', escape(value), '"'] | 206 |
206 | 207 for attr, value in attrib + ns_attrib: |
207 if tagname in empty_elems: | 208 attrname = attr.localname |
208 kind, data, pos = stream.next() | 209 if attr.namespace: |
209 if kind is END: | 210 prefix = ns_mapping.get(attr.namespace) |
210 buf += [' />'] | 211 if prefix: |
211 else: | 212 attrname = '%s:%s' % (prefix, attrname) |
212 buf += ['>'] | 213 if attrname in boolean_attrs: |
213 pushback((kind, data, pos)) | 214 if value: |
215 buf += [' ', attrname, '="', attrname, '"'] | |
216 else: | |
217 buf += [' ', attrname, '="', escape(value), '"'] | |
218 ns_attrib = [] | |
219 | |
220 if (not tag.namespace or tag in namespace) and \ | |
221 tagname in empty_elems: | |
222 kind, data, pos = stream.next() | |
223 if kind is END: | |
224 buf += [' />'] | |
214 else: | 225 else: |
215 buf += ['>'] | 226 buf += ['>'] |
216 | 227 pushback((kind, data, pos)) |
217 yield Markup(''.join(buf)) | 228 else: |
229 buf += ['>'] | |
230 | |
231 yield Markup(''.join(buf)) | |
218 | 232 |
219 elif kind is END: | 233 elif kind is END: |
220 tag = data | 234 tag = data |
221 if not tag.namespace or tag in namespace: | 235 tagname = tag.localname |
222 yield Markup('</%s>' % tag.localname) | 236 if tag.namespace: |
237 prefix = ns_mapping.get(tag.namespace) | |
238 if prefix: | |
239 tagname = '%s:%s' % (prefix, tag.localname) | |
240 yield Markup('</%s>' % tagname) | |
223 | 241 |
224 elif kind is TEXT: | 242 elif kind is TEXT: |
225 yield escape(data, quotes=False) | 243 yield escape(data, quotes=False) |
226 | 244 |
227 elif kind is COMMENT: | 245 elif kind is COMMENT: |
238 buf += [' "%s"'] | 256 buf += [' "%s"'] |
239 buf += ['>\n'] | 257 buf += ['>\n'] |
240 yield Markup(''.join(buf), *filter(None, data)) | 258 yield Markup(''.join(buf), *filter(None, data)) |
241 have_doctype = True | 259 have_doctype = True |
242 | 260 |
243 elif kind is START_NS and data[1] not in ns_mapping: | 261 elif kind is START_NS: |
244 ns_mapping[data[1]] = data[0] | 262 prefix, uri = data |
263 if uri not in ns_mapping: | |
264 ns_mapping[uri] = prefix | |
265 if not prefix: | |
266 ns_attrib.append((QName('xmlns'), uri)) | |
267 else: | |
268 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) | |
245 | 269 |
246 elif kind is PI: | 270 elif kind is PI: |
247 yield Markup('<?%s %s?>' % data) | 271 yield Markup('<?%s %s?>' % data) |
248 | 272 |
249 | 273 |
254 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) | 278 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
255 >>> print ''.join(HTMLSerializer()(elem.generate())) | 279 >>> print ''.join(HTMLSerializer()(elem.generate())) |
256 <div><a href="foo"></a><br><hr noshade></div> | 280 <div><a href="foo"></a><br><hr noshade></div> |
257 """ | 281 """ |
258 | 282 |
283 _NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')]) | |
284 | |
285 def __init__(self, doctype=None, strip_whitespace=True): | |
286 """Initialize the HTML serializer. | |
287 | |
288 @param doctype: a `(name, pubid, sysid)` tuple that represents the | |
289 DOCTYPE declaration that should be included at the top of the | |
290 generated output | |
291 @param strip_whitespace: whether extraneous whitespace should be | |
292 stripped from the output | |
293 """ | |
294 super(HTMLSerializer, self).__init__(doctype, False) | |
295 if strip_whitespace: | |
296 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, | |
297 self._NOESCAPE_ELEMS)) | |
298 | |
259 def __call__(self, stream): | 299 def __call__(self, stream): |
260 namespace = self.NAMESPACE | 300 namespace = self.NAMESPACE |
261 ns_mapping = {} | 301 ns_mapping = {} |
262 boolean_attrs = self._BOOLEAN_ATTRS | 302 boolean_attrs = self._BOOLEAN_ATTRS |
263 empty_elems = self._EMPTY_ELEMS | 303 empty_elems = self._EMPTY_ELEMS |
304 noescape_elems = self._NOESCAPE_ELEMS | |
264 have_doctype = False | 305 have_doctype = False |
306 noescape = False | |
265 | 307 |
266 stream = chain(self.preamble, stream) | 308 stream = chain(self.preamble, stream) |
267 for filter_ in self.filters: | 309 for filter_ in self.filters: |
268 stream = filter_(stream) | 310 stream = filter_(stream) |
269 stream = _PushbackIterator(stream) | 311 stream = _PushbackIterator(stream) |
312 pushback = stream.pushback | |
270 for kind, data, pos in stream: | 313 for kind, data, pos in stream: |
271 | 314 |
272 if kind is START: | 315 if kind is START: |
273 tag, attrib = data | 316 tag, attrib = data |
274 if not tag.namespace or tag in namespace: | 317 if not tag.namespace or tag in namespace: |
275 tagname = tag.localname | 318 tagname = tag.localname |
276 buf = ['<', tagname] | 319 buf = ['<', tagname] |
277 | 320 |
278 for attr, value in attrib: | 321 for attr, value in attrib: |
279 attrname = attr.localname | 322 attrname = attr.localname |
280 if not attr.namespace and not \ | 323 if not attr.namespace or attr in namespace: |
281 attrname.startswith('xml:') or \ | |
282 attr in namespace: | |
283 if attrname in boolean_attrs: | 324 if attrname in boolean_attrs: |
284 if value: | 325 if value: |
285 buf += [' ', attrname] | 326 buf += [' ', attrname] |
286 else: | 327 else: |
287 buf += [' ', attrname, '="', escape(value), '"'] | 328 buf += [' ', attrname, '="', escape(value), '"'] |
288 | 329 |
289 if tagname in empty_elems: | 330 if tagname in empty_elems: |
290 kind, data, pos = stream.next() | 331 kind, data, pos = stream.next() |
291 if kind is not END: | 332 if kind is not END: |
292 stream.pushback((kind, data, pos)) | 333 pushback((kind, data, pos)) |
293 | 334 |
294 buf += ['>'] | 335 buf += ['>'] |
295 yield Markup(''.join(buf)) | 336 yield Markup(''.join(buf)) |
337 | |
338 if tagname in noescape_elems: | |
339 noescape = True | |
296 | 340 |
297 elif kind is END: | 341 elif kind is END: |
298 tag = data | 342 tag = data |
299 if not tag.namespace or tag in namespace: | 343 if not tag.namespace or tag in namespace: |
300 yield Markup('</%s>' % tag.localname) | 344 yield Markup('</%s>' % tag.localname) |
301 | 345 |
346 noescape = False | |
347 | |
302 elif kind is TEXT: | 348 elif kind is TEXT: |
303 yield escape(data, quotes=False) | 349 if noescape: |
350 yield data | |
351 else: | |
352 yield escape(data, quotes=False) | |
304 | 353 |
305 elif kind is COMMENT: | 354 elif kind is COMMENT: |
306 yield Markup('<!--%s-->' % data) | 355 yield Markup('<!--%s-->' % data) |
307 | 356 |
308 elif kind is DOCTYPE and not have_doctype: | 357 elif kind is DOCTYPE and not have_doctype: |
329 """A filter that removes extraneous ignorable white space from the | 378 """A filter that removes extraneous ignorable white space from the |
330 stream.""" | 379 stream.""" |
331 | 380 |
332 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') | 381 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') |
333 _LINE_COLLAPSE = re.compile('\n{2,}') | 382 _LINE_COLLAPSE = re.compile('\n{2,}') |
334 | 383 _XML_SPACE = XML_NAMESPACE['space'] |
335 def __init__(self, preserve=None): | 384 |
385 def __init__(self, preserve=None, noescape=None): | |
336 """Initialize the filter. | 386 """Initialize the filter. |
337 | 387 |
338 @param preserve: a sequence of tag names for which white-space should | 388 @param preserve: a set or sequence of tag names for which white-space |
339 be ignored. | 389 should be ignored. |
390 @param noescape: a set or sequence of tag names for which text content | |
391 should not be escaped | |
392 | |
393 Both the `preserve` and `noescape` sets are expected to refer to | |
394 elements that cannot contain further child elements. | |
340 """ | 395 """ |
341 if preserve is None: | 396 if preserve is None: |
342 preserve = [] | 397 preserve = [] |
343 self.preserve = frozenset(preserve) | 398 self.preserve = frozenset(preserve) |
399 if noescape is None: | |
400 noescape = [] | |
401 self.noescape = frozenset(noescape) | |
344 | 402 |
345 def __call__(self, stream, ctxt=None): | 403 def __call__(self, stream, ctxt=None): |
346 trim_trailing_space = self._TRAILING_SPACE.sub | 404 trim_trailing_space = self._TRAILING_SPACE.sub |
347 collapse_lines = self._LINE_COLLAPSE.sub | 405 collapse_lines = self._LINE_COLLAPSE.sub |
406 xml_space = self._XML_SPACE | |
348 mjoin = Markup('').join | 407 mjoin = Markup('').join |
349 preserve = [False] | 408 preserve_elems = self.preserve |
350 append_preserve = preserve.append | 409 preserve = False |
351 pop_preserve = preserve.pop | 410 noescape_elems = self.noescape |
411 noescape = False | |
352 | 412 |
353 textbuf = [] | 413 textbuf = [] |
354 append_text = textbuf.append | 414 push_text = textbuf.append |
355 pop_text = textbuf.pop | 415 pop_text = textbuf.pop |
356 for kind, data, pos in chain(stream, [(None, None, None)]): | 416 for kind, data, pos in chain(stream, [(None, None, None)]): |
357 if kind is TEXT: | 417 if kind is TEXT: |
358 append_text(data) | 418 if noescape: |
419 data = Markup(data) | |
420 push_text(data) | |
359 else: | 421 else: |
360 if kind is START: | |
361 append_preserve(data[0] in self.preserve or | |
362 data[1].get('xml:space') == 'preserve') | |
363 if textbuf: | 422 if textbuf: |
364 if len(textbuf) > 1: | 423 if len(textbuf) > 1: |
365 text = mjoin(textbuf, escape_quotes=False) | 424 text = mjoin(textbuf, escape_quotes=False) |
366 del textbuf[:] | 425 del textbuf[:] |
367 else: | 426 else: |
368 text = escape(pop_text(), quotes=False) | 427 text = escape(pop_text(), quotes=False) |
369 if not preserve[-1]: | 428 if not preserve: |
370 text = collapse_lines('\n', trim_trailing_space('', text)) | 429 text = collapse_lines('\n', trim_trailing_space('', text)) |
371 yield TEXT, Markup(text), pos | 430 yield TEXT, Markup(text), pos |
372 if kind is END: | 431 |
373 pop_preserve() | 432 if kind is START: |
433 tag, attrib = data | |
434 if tag.localname in preserve_elems or \ | |
435 data[1].get(xml_space) == 'preserve': | |
436 preserve = True | |
437 | |
438 if tag.localname in noescape_elems: | |
439 noescape = True | |
440 | |
441 elif kind is END: | |
442 preserve = noescape = False | |
443 | |
374 if kind: | 444 if kind: |
375 yield kind, data, pos | 445 yield kind, data, pos |
376 | 446 |
377 | 447 |
378 class _PushbackIterator(object): | 448 class _PushbackIterator(object): |