# HG changeset patch # User cmlenz # Date 1182165744 0 # Node ID 49848aaa78394625b6a2dabdedb111dcd6f95ae2 # Parent c1738dec04d9937a7502ed72871590258aed4e21 Add special handling for `xml:lang` to HTML/XHTML serialization. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,9 @@ * Added new markup transformation filter contributed by Alec Thomas. This provides gorgeous jQuery-inspired stream transformation capabilities based on XPath expressions. + * When using HTML or XHTML serialization, the `xml:lang` attribute is + automatically translated to the `lang` attribute which HTML user agents + understand. Version 0.4.2 diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -279,6 +279,8 @@ for attr, value in attrib: if attr in boolean_attrs: value = attr + elif attr == u'xml:lang' and u'lang' not in attrib: + buf += [' lang="', escape(value), '"'] buf += [' ', attr, '="', escape(value), '"'] if kind is EMPTY: if tag in empty_elems: @@ -354,7 +356,9 @@ if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, self._NOESCAPE_ELEMS)) - self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml')) + self.filters.append(NamespaceFlattener(prefixes={ + 'http://www.w3.org/1999/xhtml': '' + })) def __call__(self, stream): boolean_attrs = self._BOOLEAN_ATTRS @@ -375,7 +379,10 @@ if attr in boolean_attrs: if value: buf += [' ', attr] - else: + elif ':' in attr: + if attr == 'xml:lang' and u'lang' not in attrib: + buf += [' lang="', escape(value), '"'] + elif attr != 'xmlns': buf += [' ', attr, '="', escape(value), '"'] buf.append('>') if kind is EMPTY: @@ -586,60 +593,6 @@ yield kind, data, pos -class NamespaceStripper(object): - r"""Stream filter that removes all namespace information from a stream, and - optionally strips out all tags not in a given namespace. - - :param namespace: the URI of the namespace that should not be stripped. If - not set, only elements with no namespace are included in - the output. - - >>> from genshi.input import XML - >>> xml = XML(''' - ... - ... ''') - >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml): - ... print kind, repr(data) - START (u'doc', Attrs()) - TEXT u'\n ' - TEXT u'\n' - END u'doc' - """ - - def __init__(self, namespace=None): - if namespace is not None: - self.namespace = Namespace(namespace) - else: - self.namespace = {} - - def __call__(self, stream): - namespace = self.namespace - - for kind, data, pos in stream: - - if kind is START or kind is EMPTY: - tag, attrs = data - if tag.namespace and tag not in namespace: - continue - - new_attrs = [] - for attr, value in attrs: - if not attr.namespace or attr in namespace: - new_attrs.append((attr, value)) - - data = tag.localname, Attrs(new_attrs) - - elif kind is END: - if data.namespace and data not in namespace: - continue - data = data.localname - - elif kind is START_NS or kind is END_NS: - continue - - yield kind, data, pos - - class WhitespaceFilter(object): """A filter that removes extraneous ignorable white space from the stream. diff --git a/genshi/tests/output.py b/genshi/tests/output.py --- a/genshi/tests/output.py +++ b/genshi/tests/output.py @@ -194,6 +194,16 @@ class XHTMLSerializerTestCase(unittest.TestCase): + def test_xml_lang(self): + text = '

English text

' + output = XML(text).render(XHTMLSerializer) + self.assertEqual('

English text

', output) + + def test_xml_lang_nodup(self): + text = '

English text

' + output = XML(text).render(XHTMLSerializer) + self.assertEqual('

English text

', output) + def test_textarea_whitespace(self): content = '\nHey there. \n\n I am indented.\n' stream = XML('' % content) @@ -324,6 +334,16 @@ class HTMLSerializerTestCase(unittest.TestCase): + def test_xml_lang(self): + text = '

English text

' + output = XML(text).render(HTMLSerializer) + self.assertEqual('

English text

', output) + + def test_xml_lang_nodup(self): + text = '

English text

' + output = XML(text).render(HTMLSerializer) + self.assertEqual('

English text

', output) + def test_textarea_whitespace(self): content = '\nHey there. \n\n I am indented.\n' stream = XML('' % content)