comparison markup/output.py @ 141:520a5b7dd6d2 trunk

* No escaping of `<script>` or `<style>` tags in HTML output (see #24) * Fix parsing of `xml:space` attribute.
author cmlenz
date Thu, 10 Aug 2006 15:21:55 +0000
parents c1f4390d50f8
children 3d4c214c979a
comparison
equal deleted inserted replaced
140:c1f4390d50f8 141:520a5b7dd6d2
20 frozenset 20 frozenset
21 except NameError: 21 except NameError:
22 from sets import ImmutableSet as frozenset 22 from sets import ImmutableSet as frozenset
23 import re 23 import re
24 24
25 from markup.core import escape, Markup, Namespace, QName 25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI 26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI
27 27
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] 28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
29 29
30 30
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) 72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
73 73
74 def __call__(self, stream): 74 def __call__(self, stream):
75 have_doctype = False 75 have_doctype = False
76 ns_attrib = [] 76 ns_attrib = []
77 ns_mapping = {} 77 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
78 78
79 stream = chain(self.preamble, stream) 79 stream = chain(self.preamble, stream)
80 for filter_ in self.filters: 80 for filter_ in self.filters:
81 stream = filter_(stream) 81 stream = filter_(stream)
82 stream = _PushbackIterator(stream) 82 stream = _PushbackIterator(stream)
175 'nohref', 'noresize', 'noshade', 'nowrap']) 175 'nohref', 'noresize', 'noshade', 'nowrap'])
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) 176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
177 177
178 def __call__(self, stream): 178 def __call__(self, stream):
179 namespace = self.NAMESPACE 179 namespace = self.NAMESPACE
180 ns_mapping = {} 180 ns_attrib = []
181 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
181 boolean_attrs = self._BOOLEAN_ATTRS 182 boolean_attrs = self._BOOLEAN_ATTRS
182 empty_elems = self._EMPTY_ELEMS 183 empty_elems = self._EMPTY_ELEMS
183 have_doctype = False 184 have_doctype = False
184 185
185 stream = chain(self.preamble, stream) 186 stream = chain(self.preamble, stream)
189 pushback = stream.pushback 190 pushback = stream.pushback
190 for kind, data, pos in stream: 191 for kind, data, pos in stream:
191 192
192 if kind is START: 193 if kind is START:
193 tag, attrib = data 194 tag, attrib = data
194 if not tag.namespace or tag in namespace: 195
195 tagname = tag.localname 196 tagname = tag.localname
196 buf = ['<', tagname] 197 namespace = tag.namespace
197 198 if namespace:
198 for attr, value in attrib: 199 if namespace in ns_mapping:
199 if not attr.namespace or attr in namespace: 200 prefix = ns_mapping[namespace]
200 attrname = attr.localname 201 if prefix:
201 if attrname in boolean_attrs: 202 tagname = '%s:%s' % (prefix, tagname)
202 if value: 203 else:
203 buf += [' ', attrname, '="', attrname, '"'] 204 ns_attrib.append((QName('xmlns'), namespace))
204 else: 205 buf = ['<', tagname]
205 buf += [' ', attrname, '="', escape(value), '"'] 206
206 207 for attr, value in attrib + ns_attrib:
207 if tagname in empty_elems: 208 attrname = attr.localname
208 kind, data, pos = stream.next() 209 if attr.namespace:
209 if kind is END: 210 prefix = ns_mapping.get(attr.namespace)
210 buf += [' />'] 211 if prefix:
211 else: 212 attrname = '%s:%s' % (prefix, attrname)
212 buf += ['>'] 213 if attrname in boolean_attrs:
213 pushback((kind, data, pos)) 214 if value:
215 buf += [' ', attrname, '="', attrname, '"']
216 else:
217 buf += [' ', attrname, '="', escape(value), '"']
218 ns_attrib = []
219
220 if (not tag.namespace or tag in namespace) and \
221 tagname in empty_elems:
222 kind, data, pos = stream.next()
223 if kind is END:
224 buf += [' />']
214 else: 225 else:
215 buf += ['>'] 226 buf += ['>']
216 227 pushback((kind, data, pos))
217 yield Markup(''.join(buf)) 228 else:
229 buf += ['>']
230
231 yield Markup(''.join(buf))
218 232
219 elif kind is END: 233 elif kind is END:
220 tag = data 234 tag = data
221 if not tag.namespace or tag in namespace: 235 tagname = tag.localname
222 yield Markup('</%s>' % tag.localname) 236 if tag.namespace:
237 prefix = ns_mapping.get(tag.namespace)
238 if prefix:
239 tagname = '%s:%s' % (prefix, tag.localname)
240 yield Markup('</%s>' % tagname)
223 241
224 elif kind is TEXT: 242 elif kind is TEXT:
225 yield escape(data, quotes=False) 243 yield escape(data, quotes=False)
226 244
227 elif kind is COMMENT: 245 elif kind is COMMENT:
238 buf += [' "%s"'] 256 buf += [' "%s"']
239 buf += ['>\n'] 257 buf += ['>\n']
240 yield Markup(''.join(buf), *filter(None, data)) 258 yield Markup(''.join(buf), *filter(None, data))
241 have_doctype = True 259 have_doctype = True
242 260
243 elif kind is START_NS and data[1] not in ns_mapping: 261 elif kind is START_NS:
244 ns_mapping[data[1]] = data[0] 262 prefix, uri = data
263 if uri not in ns_mapping:
264 ns_mapping[uri] = prefix
265 if not prefix:
266 ns_attrib.append((QName('xmlns'), uri))
267 else:
268 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
245 269
246 elif kind is PI: 270 elif kind is PI:
247 yield Markup('<?%s %s?>' % data) 271 yield Markup('<?%s %s?>' % data)
248 272
249 273
254 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) 278 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
255 >>> print ''.join(HTMLSerializer()(elem.generate())) 279 >>> print ''.join(HTMLSerializer()(elem.generate()))
256 <div><a href="foo"></a><br><hr noshade></div> 280 <div><a href="foo"></a><br><hr noshade></div>
257 """ 281 """
258 282
283 _NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')])
284
285 def __init__(self, doctype=None, strip_whitespace=True):
286 """Initialize the HTML serializer.
287
288 @param doctype: a `(name, pubid, sysid)` tuple that represents the
289 DOCTYPE declaration that should be included at the top of the
290 generated output
291 @param strip_whitespace: whether extraneous whitespace should be
292 stripped from the output
293 """
294 super(HTMLSerializer, self).__init__(doctype, False)
295 if strip_whitespace:
296 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
297 self._NOESCAPE_ELEMS))
298
259 def __call__(self, stream): 299 def __call__(self, stream):
260 namespace = self.NAMESPACE 300 namespace = self.NAMESPACE
261 ns_mapping = {} 301 ns_mapping = {}
262 boolean_attrs = self._BOOLEAN_ATTRS 302 boolean_attrs = self._BOOLEAN_ATTRS
263 empty_elems = self._EMPTY_ELEMS 303 empty_elems = self._EMPTY_ELEMS
304 noescape_elems = self._NOESCAPE_ELEMS
264 have_doctype = False 305 have_doctype = False
306 noescape = False
265 307
266 stream = chain(self.preamble, stream) 308 stream = chain(self.preamble, stream)
267 for filter_ in self.filters: 309 for filter_ in self.filters:
268 stream = filter_(stream) 310 stream = filter_(stream)
269 stream = _PushbackIterator(stream) 311 stream = _PushbackIterator(stream)
312 pushback = stream.pushback
270 for kind, data, pos in stream: 313 for kind, data, pos in stream:
271 314
272 if kind is START: 315 if kind is START:
273 tag, attrib = data 316 tag, attrib = data
274 if not tag.namespace or tag in namespace: 317 if not tag.namespace or tag in namespace:
275 tagname = tag.localname 318 tagname = tag.localname
276 buf = ['<', tagname] 319 buf = ['<', tagname]
277 320
278 for attr, value in attrib: 321 for attr, value in attrib:
279 attrname = attr.localname 322 attrname = attr.localname
280 if not attr.namespace and not \ 323 if not attr.namespace or attr in namespace:
281 attrname.startswith('xml:') or \
282 attr in namespace:
283 if attrname in boolean_attrs: 324 if attrname in boolean_attrs:
284 if value: 325 if value:
285 buf += [' ', attrname] 326 buf += [' ', attrname]
286 else: 327 else:
287 buf += [' ', attrname, '="', escape(value), '"'] 328 buf += [' ', attrname, '="', escape(value), '"']
288 329
289 if tagname in empty_elems: 330 if tagname in empty_elems:
290 kind, data, pos = stream.next() 331 kind, data, pos = stream.next()
291 if kind is not END: 332 if kind is not END:
292 stream.pushback((kind, data, pos)) 333 pushback((kind, data, pos))
293 334
294 buf += ['>'] 335 buf += ['>']
295 yield Markup(''.join(buf)) 336 yield Markup(''.join(buf))
337
338 if tagname in noescape_elems:
339 noescape = True
296 340
297 elif kind is END: 341 elif kind is END:
298 tag = data 342 tag = data
299 if not tag.namespace or tag in namespace: 343 if not tag.namespace or tag in namespace:
300 yield Markup('</%s>' % tag.localname) 344 yield Markup('</%s>' % tag.localname)
301 345
346 noescape = False
347
302 elif kind is TEXT: 348 elif kind is TEXT:
303 yield escape(data, quotes=False) 349 if noescape:
350 yield data
351 else:
352 yield escape(data, quotes=False)
304 353
305 elif kind is COMMENT: 354 elif kind is COMMENT:
306 yield Markup('<!--%s-->' % data) 355 yield Markup('<!--%s-->' % data)
307 356
308 elif kind is DOCTYPE and not have_doctype: 357 elif kind is DOCTYPE and not have_doctype:
329 """A filter that removes extraneous ignorable white space from the 378 """A filter that removes extraneous ignorable white space from the
330 stream.""" 379 stream."""
331 380
332 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') 381 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
333 _LINE_COLLAPSE = re.compile('\n{2,}') 382 _LINE_COLLAPSE = re.compile('\n{2,}')
334 383 _XML_SPACE = XML_NAMESPACE['space']
335 def __init__(self, preserve=None): 384
385 def __init__(self, preserve=None, noescape=None):
336 """Initialize the filter. 386 """Initialize the filter.
337 387
338 @param preserve: a sequence of tag names for which white-space should 388 @param preserve: a set or sequence of tag names for which white-space
339 be ignored. 389 should be ignored.
390 @param noescape: a set or sequence of tag names for which text content
391 should not be escaped
392
393 Both the `preserve` and `noescape` sets are expected to refer to
394 elements that cannot contain further child elements.
340 """ 395 """
341 if preserve is None: 396 if preserve is None:
342 preserve = [] 397 preserve = []
343 self.preserve = frozenset(preserve) 398 self.preserve = frozenset(preserve)
399 if noescape is None:
400 noescape = []
401 self.noescape = frozenset(noescape)
344 402
345 def __call__(self, stream, ctxt=None): 403 def __call__(self, stream, ctxt=None):
346 trim_trailing_space = self._TRAILING_SPACE.sub 404 trim_trailing_space = self._TRAILING_SPACE.sub
347 collapse_lines = self._LINE_COLLAPSE.sub 405 collapse_lines = self._LINE_COLLAPSE.sub
406 xml_space = self._XML_SPACE
348 mjoin = Markup('').join 407 mjoin = Markup('').join
349 preserve = [False] 408 preserve_elems = self.preserve
350 append_preserve = preserve.append 409 preserve = False
351 pop_preserve = preserve.pop 410 noescape_elems = self.noescape
411 noescape = False
352 412
353 textbuf = [] 413 textbuf = []
354 append_text = textbuf.append 414 push_text = textbuf.append
355 pop_text = textbuf.pop 415 pop_text = textbuf.pop
356 for kind, data, pos in chain(stream, [(None, None, None)]): 416 for kind, data, pos in chain(stream, [(None, None, None)]):
357 if kind is TEXT: 417 if kind is TEXT:
358 append_text(data) 418 if noescape:
419 data = Markup(data)
420 push_text(data)
359 else: 421 else:
360 if kind is START:
361 append_preserve(data[0] in self.preserve or
362 data[1].get('xml:space') == 'preserve')
363 if textbuf: 422 if textbuf:
364 if len(textbuf) > 1: 423 if len(textbuf) > 1:
365 text = mjoin(textbuf, escape_quotes=False) 424 text = mjoin(textbuf, escape_quotes=False)
366 del textbuf[:] 425 del textbuf[:]
367 else: 426 else:
368 text = escape(pop_text(), quotes=False) 427 text = escape(pop_text(), quotes=False)
369 if not preserve[-1]: 428 if not preserve:
370 text = collapse_lines('\n', trim_trailing_space('', text)) 429 text = collapse_lines('\n', trim_trailing_space('', text))
371 yield TEXT, Markup(text), pos 430 yield TEXT, Markup(text), pos
372 if kind is END: 431
373 pop_preserve() 432 if kind is START:
433 tag, attrib = data
434 if tag.localname in preserve_elems or \
435 data[1].get(xml_space) == 'preserve':
436 preserve = True
437
438 if tag.localname in noescape_elems:
439 noescape = True
440
441 elif kind is END:
442 preserve = noescape = False
443
374 if kind: 444 if kind:
375 yield kind, data, pos 445 yield kind, data, pos
376 446
377 447
378 class _PushbackIterator(object): 448 class _PushbackIterator(object):
Copyright (C) 2012-2017 Edgewall Software