# HG changeset patch
# User cmlenz
# Date 1182165744 0
# Node ID 7553760b58af756e6f385e31093819c5cc344872
# Parent  082535e5087c58f349c2971694831b0edc782673
Add special handling for `xml:lang` to HTML/XHTML serialization.

diff --git a/ChangeLog b/ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,9 @@
  * Added new markup transformation filter contributed by Alec Thomas. This
    provides gorgeous jQuery-inspired stream transformation capabilities based
    on XPath expressions.
+ * When using HTML or XHTML serialization, the `xml:lang` attribute is
+   automatically translated to the `lang` attribute which HTML user agents
+   understand.
 
 
 Version 0.4.2
diff --git a/genshi/output.py b/genshi/output.py
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -279,6 +279,8 @@
                 for attr, value in attrib:
                     if attr in boolean_attrs:
                         value = attr
+                    elif attr == u'xml:lang' and u'lang' not in attrib:
+                        buf += [' lang="', escape(value), '"']
                     buf += [' ', attr, '="', escape(value), '"']
                 if kind is EMPTY:
                     if tag in empty_elems:
@@ -354,7 +356,9 @@
         if strip_whitespace:
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
                                                  self._NOESCAPE_ELEMS))
-        self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))
+        self.filters.append(NamespaceFlattener(prefixes={
+            'http://www.w3.org/1999/xhtml': ''
+        }))
 
     def __call__(self, stream):
         boolean_attrs = self._BOOLEAN_ATTRS
@@ -375,7 +379,10 @@
                     if attr in boolean_attrs:
                         if value:
                             buf += [' ', attr]
-                    else:
+                    elif ':' in attr:
+                        if attr == 'xml:lang' and u'lang' not in attrib:
+                            buf += [' lang="', escape(value), '"']
+                    elif attr != 'xmlns':
                         buf += [' ', attr, '="', escape(value), '"']
                 buf.append('>')
                 if kind is EMPTY:
@@ -586,60 +593,6 @@
                 yield kind, data, pos
 
 
-class NamespaceStripper(object):
-    r"""Stream filter that removes all namespace information from a stream, and
-    optionally strips out all tags not in a given namespace.
-    
-    :param namespace: the URI of the namespace that should not be stripped. If
-                      not set, only elements with no namespace are included in
-                      the output.
-    
-    >>> from genshi.input import XML
-    >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
-    ...   <two:item/>
-    ... </doc>''')
-    >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml):
-    ...     print kind, repr(data)
-    START (u'doc', Attrs())
-    TEXT u'\n  '
-    TEXT u'\n'
-    END u'doc'
-    """
-
-    def __init__(self, namespace=None):
-        if namespace is not None:
-            self.namespace = Namespace(namespace)
-        else:
-            self.namespace = {}
-
-    def __call__(self, stream):
-        namespace = self.namespace
-
-        for kind, data, pos in stream:
-
-            if kind is START or kind is EMPTY:
-                tag, attrs = data
-                if tag.namespace and tag not in namespace:
-                    continue
-
-                new_attrs = []
-                for attr, value in attrs:
-                    if not attr.namespace or attr in namespace:
-                        new_attrs.append((attr, value))
-
-                data = tag.localname, Attrs(new_attrs)
-
-            elif kind is END:
-                if data.namespace and data not in namespace:
-                    continue
-                data = data.localname
-
-            elif kind is START_NS or kind is END_NS:
-                continue
-
-            yield kind, data, pos
-
-
 class WhitespaceFilter(object):
     """A filter that removes extraneous ignorable white space from the
     stream.
diff --git a/genshi/tests/output.py b/genshi/tests/output.py
--- a/genshi/tests/output.py
+++ b/genshi/tests/output.py
@@ -194,6 +194,16 @@
 
 class XHTMLSerializerTestCase(unittest.TestCase):
 
+    def test_xml_lang(self):
+        text = '<p xml:lang="en">English text</p>'
+        output = XML(text).render(XHTMLSerializer)
+        self.assertEqual('<p lang="en" xml:lang="en">English text</p>', output)
+
+    def test_xml_lang_nodup(self):
+        text = '<p xml:lang="en" lang="en">English text</p>'
+        output = XML(text).render(XHTMLSerializer)
+        self.assertEqual('<p xml:lang="en" lang="en">English text</p>', output)
+
     def test_textarea_whitespace(self):
         content = '\nHey there.  \n\n    I am indented.\n'
         stream = XML('<textarea name="foo">%s</textarea>' % content)
@@ -324,6 +334,16 @@
 
 class HTMLSerializerTestCase(unittest.TestCase):
 
+    def test_xml_lang(self):
+        text = '<p xml:lang="en">English text</p>'
+        output = XML(text).render(HTMLSerializer)
+        self.assertEqual('<p lang="en">English text</p>', output)
+
+    def test_xml_lang_nodup(self):
+        text = '<p lang="en" xml:lang="en">English text</p>'
+        output = XML(text).render(HTMLSerializer)
+        self.assertEqual('<p lang="en">English text</p>', output)
+
     def test_textarea_whitespace(self):
         content = '\nHey there.  \n\n    I am indented.\n'
         stream = XML('<textarea name="foo">%s</textarea>' % content)