genshi/mirror: markup/output.py comparison

comparison markup/output.py @ 141:520a5b7dd6d2 trunk

* No escaping of `<script>` or `<style>` tags in HTML output (see #24) * Fix parsing of `xml:space` attribute.

author	cmlenz
date	Thu, 10 Aug 2006 15:21:55 +0000
parents	c1f4390d50f8
children	3d4c214c979a

comparison

equal deleted inserted replaced

-:c1f4390d50f8
+:520a5b7dd6d2
 frozenset
 except NameError:
 from sets import ImmutableSet as frozenset
 import re
-from markup.core import escape, Markup, Namespace, QName
+from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE
 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI
 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
 def __call__(self, stream):
 have_doctype = False
 ns_attrib = []
-ns_mapping = {}
+ns_mapping = {XML_NAMESPACE.uri: 'xml'}
 stream = chain(self.preamble, stream)
 for filter_ in self.filters:
 stream = filter_(stream)
 stream = _PushbackIterator(stream)
 'nohref', 'noresize', 'noshade', 'nowrap'])
 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
 def __call__(self, stream):
 namespace = self.NAMESPACE
-ns_mapping = {}
+ns_attrib = []
+ns_mapping = {XML_NAMESPACE.uri: 'xml'}
 boolean_attrs = self._BOOLEAN_ATTRS
 empty_elems = self._EMPTY_ELEMS
 have_doctype = False
 stream = chain(self.preamble, stream)
 pushback = stream.pushback
 for kind, data, pos in stream:
 if kind is START:
 tag, attrib = data
-if not tag.namespace or tag in namespace:
 tagname = tag.localname
-buf = ['<', tagname]
+namespace = tag.namespace
+if namespace:
-for attr, value in attrib:
+if namespace in ns_mapping:
-if not attr.namespace or attr in namespace:
+prefix = ns_mapping[namespace]
-attrname = attr.localname
+if prefix:
-if attrname in boolean_attrs:
+tagname = '%s:%s' % (prefix, tagname)
-if value:
+else:
-buf += [' ', attrname, '="', attrname, '"']
+ns_attrib.append((QName('xmlns'), namespace))
-else:
+buf = ['<', tagname]
-buf += [' ', attrname, '="', escape(value), '"']
+for attr, value in attrib + ns_attrib:
-if tagname in empty_elems:
+attrname = attr.localname
-kind, data, pos = stream.next()
+if attr.namespace:
-if kind is END:
+prefix = ns_mapping.get(attr.namespace)
-buf += [' />']
+if prefix:
-else:
+attrname = '%s:%s' % (prefix, attrname)
-buf += ['>']
+if attrname in boolean_attrs:
-pushback((kind, data, pos))
+if value:
+buf += [' ', attrname, '="', attrname, '"']
+else:
+buf += [' ', attrname, '="', escape(value), '"']
+ns_attrib = []
+if (not tag.namespace or tag in namespace) and \
+tagname in empty_elems:
+kind, data, pos = stream.next()
+if kind is END:
+buf += [' />']
 else:
 buf += ['>']
+pushback((kind, data, pos))
-yield Markup(''.join(buf))
+else:
+buf += ['>']
+yield Markup(''.join(buf))
 elif kind is END:
 tag = data
-if not tag.namespace or tag in namespace:
+tagname = tag.localname
-yield Markup('</%s>' % tag.localname)
+if tag.namespace:
+prefix = ns_mapping.get(tag.namespace)
+if prefix:
+tagname = '%s:%s' % (prefix, tag.localname)
+yield Markup('</%s>' % tagname)
 elif kind is TEXT:
 yield escape(data, quotes=False)
 elif kind is COMMENT:
 buf += [' "%s"']
 buf += ['>\n']
 yield Markup(''.join(buf), *filter(None, data))
 have_doctype = True
-elif kind is START_NS and data[1] not in ns_mapping:
+elif kind is START_NS:
-ns_mapping[data[1]] = data[0]
+prefix, uri = data
+if uri not in ns_mapping:
+ns_mapping[uri] = prefix
+if not prefix:
+ns_attrib.append((QName('xmlns'), uri))
+else:
+ns_attrib.append((QName('xmlns:%s' % prefix), uri))
 elif kind is PI:
 yield Markup('<?%s %s?>' % data)
 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
 >>> print ''.join(HTMLSerializer()(elem.generate()))
 <div><a href="foo"></a><br><hr noshade></div>
 """
+_NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')])
+def __init__(self, doctype=None, strip_whitespace=True):
+"""Initialize the HTML serializer.
+@param doctype: a `(name, pubid, sysid)` tuple that represents the
+DOCTYPE declaration that should be included at the top of the
+generated output
+@param strip_whitespace: whether extraneous whitespace should be
+stripped from the output
+"""
+super(HTMLSerializer, self).__init__(doctype, False)
+if strip_whitespace:
+self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
+self._NOESCAPE_ELEMS))
 def __call__(self, stream):
 namespace = self.NAMESPACE
 ns_mapping = {}
 boolean_attrs = self._BOOLEAN_ATTRS
 empty_elems = self._EMPTY_ELEMS
+noescape_elems = self._NOESCAPE_ELEMS
 have_doctype = False
+noescape = False
 stream = chain(self.preamble, stream)
 for filter_ in self.filters:
 stream = filter_(stream)
 stream = _PushbackIterator(stream)
+pushback = stream.pushback
 for kind, data, pos in stream:
 if kind is START:
 tag, attrib = data
 if not tag.namespace or tag in namespace:
 tagname = tag.localname
 buf = ['<', tagname]
 for attr, value in attrib:
 attrname = attr.localname
-if not attr.namespace and not \
+if not attr.namespace or attr in namespace:
-attrname.startswith('xml:') or \
-attr in namespace:
 if attrname in boolean_attrs:
 if value:
 buf += [' ', attrname]
 else:
 buf += [' ', attrname, '="', escape(value), '"']
 if tagname in empty_elems:
 kind, data, pos = stream.next()
 if kind is not END:
-stream.pushback((kind, data, pos))
+pushback((kind, data, pos))
 buf += ['>']
 yield Markup(''.join(buf))
+if tagname in noescape_elems:
+noescape = True
 elif kind is END:
 tag = data
 if not tag.namespace or tag in namespace:
 yield Markup('</%s>' % tag.localname)
+noescape = False
 elif kind is TEXT:
-yield escape(data, quotes=False)
+if noescape:
+yield data
+else:
+yield escape(data, quotes=False)
 elif kind is COMMENT:
 yield Markup('<!--%s-->' % data)
 elif kind is DOCTYPE and not have_doctype:
 """A filter that removes extraneous ignorable white space from the
 stream."""
 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
 _LINE_COLLAPSE = re.compile('\n{2,}')
+_XML_SPACE = XML_NAMESPACE['space']
-def __init__(self, preserve=None):
+def __init__(self, preserve=None, noescape=None):
 """Initialize the filter.
-@param preserve: a sequence of tag names for which white-space should
+@param preserve: a set or sequence of tag names for which white-space
-be ignored.
+should be ignored.
+@param noescape: a set or sequence of tag names for which text content
+should not be escaped
+Both the `preserve` and `noescape` sets are expected to refer to
+elements that cannot contain further child elements.
 """
 if preserve is None:
 preserve = []
 self.preserve = frozenset(preserve)
+if noescape is None:
+noescape = []
+self.noescape = frozenset(noescape)
 def __call__(self, stream, ctxt=None):
 trim_trailing_space = self._TRAILING_SPACE.sub
 collapse_lines = self._LINE_COLLAPSE.sub
+xml_space = self._XML_SPACE
 mjoin = Markup('').join
-preserve = [False]
+preserve_elems = self.preserve
-append_preserve = preserve.append
+preserve = False
-pop_preserve = preserve.pop
+noescape_elems = self.noescape
+noescape = False
 textbuf = []
-append_text = textbuf.append
+push_text = textbuf.append
 pop_text = textbuf.pop
 for kind, data, pos in chain(stream, [(None, None, None)]):
 if kind is TEXT:
-append_text(data)
+if noescape:
+data = Markup(data)
+push_text(data)
 else:
-if kind is START:
-append_preserve(data[0] in self.preserve or
-data[1].get('xml:space') == 'preserve')
 if textbuf:
 if len(textbuf) > 1:
 text = mjoin(textbuf, escape_quotes=False)
 del textbuf[:]
 else:
 text = escape(pop_text(), quotes=False)
-if not preserve[-1]:
+if not preserve:
 text = collapse_lines('\n', trim_trailing_space('', text))
 yield TEXT, Markup(text), pos
-if kind is END:
-pop_preserve()
+if kind is START:
+tag, attrib = data
+if tag.localname in preserve_elems or \
+data[1].get(xml_space) == 'preserve':
+preserve = True
+if tag.localname in noescape_elems:
+noescape = True
+elif kind is END:
+preserve = noescape = False
 if kind:
 yield kind, data, pos
 class _PushbackIterator(object):

Mercurial > genshi > mirror

comparison markup/output.py @ 141:520a5b7dd6d2 trunk