# HG changeset patch # User cmlenz # Date 1172514419 0 # Node ID 3460b04daeac562f30994e119da89d3195720f53 # Parent afbf3b1d3e0ef7d72748f61a8781f87d7e5651eb Improve the handling of namespaces in serialization. diff --git a/genshi/builder.py b/genshi/builder.py --- a/genshi/builder.py +++ b/genshi/builder.py @@ -163,7 +163,7 @@ >>> from genshi.core import Namespace >>> xhtml = Namespace('http://www.w3.org/1999/xhtml') >>> print Element(xhtml.html, lang='en') -
+ @@ -247,9 +262,9 @@ self.assertEqual(text, output) def test_xhtml_namespace_prefix(self): - text = """""" __slots__ = ['tag', 'attrib'] @@ -213,14 +213,14 @@ >>> factory = ElementFactory('http://www.w3.org/1999/xhtml') >>> print factory.html(lang="en") -
+
The namespace for a specific element can be altered on an existing factory
by specifying the new namespace using item access:
>>> factory = ElementFactory()
>>> print factory.html(factory['http://www.w3.org/2000/svg'].g(id=3))
-
"""
- NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
-
_EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
'hr', 'img', 'input', 'isindex', 'link', 'meta',
'param'])
@@ -195,10 +157,17 @@
QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
])
+ def __init__(self, doctype=None, strip_whitespace=True,
+ namespace_prefixes=None):
+ super(XHTMLSerializer, self).__init__(doctype, False)
+ self.filters = [EmptyTagFilter()]
+ if strip_whitespace:
+ self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+ namespace_prefixes = namespace_prefixes or {}
+ namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
+ self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
+
def __call__(self, stream):
- namespace = self.NAMESPACE
- ns_attrib = []
- ns_mapping = {XML_NAMESPACE.uri: ['xml']}
boolean_attrs = self._BOOLEAN_ATTRS
empty_elems = self._EMPTY_ELEMS
have_doctype = False
@@ -211,53 +180,22 @@
if kind is START or kind is EMPTY:
tag, attrib = data
-
- tagname = tag.localname
- tagns = tag.namespace
- if tagns:
- if tagns in ns_mapping:
- prefix = ns_mapping.get(tagns)
- if prefix and prefix[-1]:
- tagname = '%s:%s' % (prefix[-1], tagname)
- else:
- ns_attrib.append((QName('xmlns'), tagns))
- buf = ['<', tagname]
-
- if ns_attrib:
- attrib += tuple(ns_attrib)
+ buf = ['<', tag]
for attr, value in attrib:
- attrname = attr.localname
- attrns = attr.namespace
- if attrns:
- prefix = ns_mapping.get(attrns)
- if prefix and prefix[-1]:
- attrname = '%s:%s' % (prefix[-1], attrname)
- if attrname in boolean_attrs:
- if value:
- buf += [' ', attrname, '="', attrname, '"']
- else:
- buf += [' ', attrname, '="', escape(value), '"']
- ns_attrib = []
-
+ if attr in boolean_attrs:
+ value = attr
+ buf += [' ', attr, '="', escape(value), '"']
if kind is EMPTY:
- if (tagns and tagns != namespace.uri) \
- or tagname in empty_elems:
+ if tag in empty_elems:
buf.append(' />')
else:
- buf.append('>%s>' % tagname)
+ buf.append('>%s>' % tag)
else:
buf.append('>')
-
yield Markup(u''.join(buf))
elif kind is END:
- tag = data
- tagname = tag.localname
- if tag.namespace:
- prefix = ns_mapping.get(tag.namespace)
- if prefix and prefix[-1]:
- tagname = '%s:%s' % (prefix[-1], tagname)
- yield Markup('%s>' % tagname)
+ yield Markup('%s>' % data)
elif kind is TEXT:
if in_cdata:
@@ -281,22 +219,6 @@
yield Markup(u''.join(buf), *filter(None, data))
have_doctype = True
- elif kind is START_NS:
- prefix, uri = data
- if uri not in ns_mapping:
- if not prefix:
- ns_attrib.append((QName('xmlns'), uri))
- else:
- ns_attrib.append((QName('xmlns:%s' % prefix), uri))
- ns_mapping.setdefault(uri, []).append(prefix)
-
- elif kind is END_NS:
- for uri, prefix in ns_mapping.items():
- if prefix[-1] == data:
- prefix.pop()
- if not prefix:
- del ns_mapping[uri]
-
elif kind is START_CDATA:
yield Markup('
"""
- _NOESCAPE_ELEMS = frozenset([QName('script'),
- QName('http://www.w3.org/1999/xhtml}script'),
- QName('style'),
- QName('http://www.w3.org/1999/xhtml}style')])
+ _NOESCAPE_ELEMS = frozenset([
+ QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+ QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+ ])
def __init__(self, doctype=None, strip_whitespace=True):
"""Initialize the HTML serializer.
@@ -333,13 +255,13 @@
stripped from the output
"""
super(HTMLSerializer, self).__init__(doctype, False)
+ self.filters = [EmptyTagFilter()]
if strip_whitespace:
self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
self._NOESCAPE_ELEMS))
+ self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))
def __call__(self, stream):
- namespace = self.NAMESPACE
- ns_mapping = {}
boolean_attrs = self._BOOLEAN_ATTRS
empty_elems = self._EMPTY_ELEMS
noescape_elems = self._NOESCAPE_ELEMS
@@ -353,35 +275,23 @@
if kind is START or kind is EMPTY:
tag, attrib = data
- if not tag.namespace or tag in namespace:
- tagname = tag.localname
- buf = ['<', tagname]
-
- for attr, value in attrib:
- attrname = attr.localname
- if not attr.namespace or attr in namespace:
- if attrname in boolean_attrs:
- if value:
- buf += [' ', attrname]
- else:
- buf += [' ', attrname, '="', escape(value), '"']
-
- buf.append('>')
-
- if kind is EMPTY:
- if tagname not in empty_elems:
- buf.append('%s>' % tagname)
-
- yield Markup(u''.join(buf))
-
- if tagname in noescape_elems:
- noescape = True
+ buf = ['<', tag]
+ for attr, value in attrib:
+ if attr in boolean_attrs:
+ if value:
+ buf += [' ', attr]
+ else:
+ buf += [' ', attr, '="', escape(value), '"']
+ buf.append('>')
+ if kind is EMPTY:
+ if tag not in empty_elems:
+ buf.append('%s>' % tag)
+ yield Markup(u''.join(buf))
+ if tag in noescape_elems:
+ noescape = True
elif kind is END:
- tag = data
- if not tag.namespace or tag in namespace:
- yield Markup('%s>' % tag.localname)
-
+ yield Markup('%s>' % data)
noescape = False
elif kind is TEXT:
@@ -406,9 +316,6 @@
yield Markup(u''.join(buf), *filter(None, data))
have_doctype = True
- elif kind is START_NS and data[1] not in ns_mapping:
- ns_mapping[data[1]] = data[0]
-
elif kind is PI:
yield Markup('%s %s?>' % data)
@@ -437,8 +344,9 @@
"""
def __call__(self, stream):
- for kind, data, pos in stream:
- if kind is TEXT:
+ for event in stream:
+ if event[0] is TEXT:
+ data = event[1]
if type(data) is Markup:
data = data.striptags().stripentities()
yield unicode(data)
@@ -453,25 +361,189 @@
def __call__(self, stream):
prev = (None, None, None)
- for kind, data, pos in stream:
+ for ev in stream:
if prev[0] is START:
- if kind is END:
+ if ev[0] is END:
prev = EMPTY, prev[1], prev[2]
yield prev
continue
else:
yield prev
- if kind is not START:
- yield kind, data, pos
- prev = kind, data, pos
+ if ev[0] is not START:
+ yield ev
+ prev = ev
EMPTY = EmptyTagFilter.EMPTY
+class NamespaceFlattener(object):
+ r"""Output stream filter that removes namespace information from the stream,
+ instead adding namespace attributes and prefixes as needed.
+
+ @param prefixes: optional mapping of namespace URIs to prefixes
+
+ >>> from genshi.input import XML
+ >>> xml = XML('''
diff --git a/genshi/tests/output.py b/genshi/tests/output.py
--- a/genshi/tests/output.py
+++ b/genshi/tests/output.py
@@ -176,6 +176,21 @@