Mercurial > genshi > mirror

--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,6 @@
 Version 0.4.2
 http://svn.edgewall.org/repos/genshi/tags/0.4.2/
-(?, from branches/stable/0.4.x)
+(Jun 20, from branches/stable/0.4.x)

  * The `doctype` parameter of the markup serializers now also accepts the "name"
    of the doctype as string, in addition to the `(name, pubid, sysid)` tuple.
@@ -12,6 +12,7 @@
  * The I18n filter now skips the content of elements that have an `xml:lang`
    attribute with a fixed string value. Basically, `xml:lang` can now be used
    as a flag to mark specific sections as not needing localization.
+ * Added plugin for message extraction via Babel (http://babel.edgewall.org/).


 Version 0.4.1
new file mode 100644
--- /dev/null
+++ b/doc/conf/docutils.ini
@@ -0,0 +1,9 @@
+[general]
+input_encoding = utf-8
+strip_comments = yes
+toc_backlinks = none
+
+[html4css1 writer]
+embed_stylesheet = no
+stylesheet = style/edgewall.css
+xml_declaration = no
new file mode 100644
--- /dev/null
+++ b/doc/conf/epydoc.ini
@@ -0,0 +1,24 @@
+[epydoc]
+
+name: Documentation Index
+url: ../index.html
+modules: genshi
+verbosity: 1
+
+# Extraction
+docformat: restructuredtext
+parse: yes
+introspect: yes
+exclude: .*\.tests.*
+inheritance: listed
+private: no
+imports: no
+include-log: no
+
+# HTML output
+output: html
+target: doc/api/
+css: doc/style/epydoc.css
+top: genshi
+frames: no
+sourcecode: no
deleted file mode 100644
--- a/doc/docutils.conf
+++ /dev/null
@@ -1,9 +0,0 @@
-[general]
-input_encoding = utf-8
-strip_comments = yes
-toc_backlinks = none
-
-[html4css1 writer]
-embed_stylesheet = no
-stylesheet = style/edgewall.css
-xml_declaration = no
deleted file mode 100644
--- a/doc/epydoc.conf
+++ /dev/null
@@ -1,24 +0,0 @@
-[epydoc]
-
-name: Documentation Index
-url: ../index.html
-modules: genshi
-verbosity: 1
-
-# Extraction
-docformat: restructuredtext
-parse: yes
-introspect: yes
-exclude: .*\.tests.*
-inheritance: listed
-private: no
-imports: no
-include-log: no
-
-# HTML output
-output: html
-target: doc/api/
-css: doc/style/epydoc.css
-top: genshi
-frames: no
-sourcecode: no
new file mode 100644
--- /dev/null
+++ b/doc/i18n.txt
@@ -0,0 +1,237 @@
+.. -*- mode: rst; encoding: utf-8 -*-
+
+=====================================
+Internationalization and Localization
+=====================================
+
+Genshi provides basic supporting infrastructure for internationalizing
+and localizing templates. That includes functionality for extracting localizable
+strings from templates, as well as a template filter that can apply translations
+to templates as they get rendered.
+
+This support is based on `gettext`_ message catalogs and the `gettext Python
+module`_. The extraction process can be used from the API level, or through the
+front-ends implemented by the `Babel`_ project, for which Genshi provides a
+plugin.
+
+.. _`gettext`: http://www.gnu.org/software/gettext/
+.. _`gettext python module`: http://docs.python.org/lib/module-gettext.html
+.. _`babel`: http://babel.edgewall.org/
+
+
+.. contents:: Contents
+   :depth: 2
+.. sectnum::
+
+
+Basics
+======
+
+The simplest way to internationalize and translate templates would be to wrap
+all localizable strings in a ``gettext()`` function call (which is often aliased
+to ``_()`` for brevity). In that case, no extra template filter is required.
+
+.. code-block:: genshi
+
+  <p>${_("Hello, world!")}</p>
+
+However, this approach results in significant “character noise” in templates,
+making them harder to read and preview.
+
+The ``genshi.filters.Translator`` filter allows you to get rid of the
+explicit `gettext`_ function calls, so you can continue to just write:
+
+.. code-block:: genshi
+
+  <p>Hello, world!</p>
+
+This text will still be extracted and translated as if you had wrapped it in a
+``_()`` call.
+
+.. note:: For parameterized or pluralizable messages, you need to continue using
+          the appropriate ``gettext`` functions.
+
+You can control which tags should be ignored by this process; for example, it
+doesn't really make sense to translate the content of the HTML
+``<script></script>`` element. Both ``<script>`` and ``<style>`` are excluded
+by default.
+
+Attribute values can also be automatically translated. The default is to
+consider the attributes ``abbr``, ``alt``, ``label``, ``prompt``, ``standby``,
+``summary``, and ``title``, which is a list that makes sense for HTML documents.
+Of course, you can tell the translator to use a different set of attribute
+names, or none at all.
+
+In addition, you can control automatic translation in your templates using the
+``xml:lang`` attribute. If the value of that attribute is a literal string, the
+contents and attributes of the element will be ignored:
+
+.. code-block:: genshi
+
+  <p xml:lang="en">Hello, world!</p>
+
+On the other hand, if the value of the ``xml:lang`` attribute contains a Python
+expression, the element contents and attributes are still considered for
+automatic translation:
+
+.. code-block:: genshi
+
+  <html xml:lang="$locale">
+    ...
+  </html>
+
+
+Extraction
+==========
+
+The ``Translator`` class provides a class method called ``extract``, which is
+a generator yielding all localizable strings found in a template or markup
+stream. This includes both literal strings in text nodes and attribute values,
+as well as strings in ``gettext()`` calls in embedded Python code. See the API
+documentation for details on how to use this method directly.
+
+This functionality is integrated into the message extraction framework provided
+by the `Babel`_ project. Babel provides a command-line interface as well as
+commands that can be used from ``setup.py`` scripts using `Setuptools`_ or
+`Distutils`_.
+
+.. _`setuptools`: http://peak.telecommunity.com/DevCenter/setuptools
+.. _`distutils`: http://docs.python.org/dist/dist.html
+
+The first thing you need to do to make Babel extract messages from Genshi
+templates is to let Babel know which files are Genshi templates. This is done
+using a “mapping configuration”, which can be stored in a configuration file,
+or specified directly in your ``setup.py``.
+
+In a configuration file, the mapping may look like this:
+
+.. code-block:: ini
+
+  # Python souce
+  [python:**.py]
+
+  # Genshi templates
+  [genshi:**/templates/**.html]
+  include_attrs = title
+
+  [genshi:**/templates/**.txt]
+  template_class = genshi.template.TextTemplate
+  encoding = latin-1
+
+Please consult the Babel documentation for details on configuration.
+
+If all goes well, running the extraction with Babel should create a POT file
+containing the strings from your Genshi templates and your Python source files.
+
+.. note:: Genshi currently does not support “translator comments”, i.e. text in
+          template comments that would get added to the POT file. This support
+          may or may not be added in future versions.
+
+
+---------------------
+Configuration Options
+---------------------
+
+The Genshi extraction plugin for Babel supports the following options:
+
+``template_class``
+------------------
+The concrete ``Template`` class that the file should be loaded with. Specify
+the package/module name and the class name, separated by a colon.
+
+The default is to use ``genshi.template:MarkupTemplate``, and you'll want to
+set it to ``genshi.template:TextTemplate`` for `text templates`_.
+
+.. _`text templates`: text-templates.html
+
+``encoding``
+------------------
+The encoding of the template file. This is only used for text templates. The
+default is to assume “utf-8”.
+
+``include_attrs``
+------------------
+Comma-separated list of attribute names that should be considered to have
+localizable values. Only used for markup templates.
+
+``include_tags``
+------------------
+Comma-separated list of tag names that should be ignored. Only used for markup
+templates.
+
+
+Translation
+===========
+
+If you have prepared MO files for use with Genshi using the appropriate tools,
+you can access the message catalogs with the `gettext Python module`_. You'll
+probably want to create a ``gettext.GNUTranslations`` instance, and make the
+translation functions it provides available to your templates by putting them
+in the template context.
+
+The ``Translator`` filter needs to be added to the filters of the template
+(applying it as a stream filter will likely not have the desired effect).
+Furthermore it needs to be the first filter in the list, including the internal
+filters that Genshi adds itself:
+
+.. code-block:: python
+
+  from genshi.filters import Translator
+  from genshi.template import MarkupTemplate
+
+  template = MarkupTemplate("...")
+  template.filters.insert(0, Translator(translations.ugettext))
+
+If you're using `TemplateLoader`, you should specify a callback function in
+which you add the filter:
+
+.. code-block:: python
+
+  from genshi.filters import Translator
+  from genshi.template import TemplateLoader
+
+  def template_loaded(template):
+      template.filters.insert(0, , Translator(translations.ugettext))
+
+  loader = TemplateLoader('templates', callback=template_loaded)
+  template = loader.load("...")
+
+This approach ensures that the filter is not added everytime the template is
+loaded, and thus being applied multiple times.
+
+
+Related Considerations
+======================
+
+If you intend to produce an application that is fully prepared for an
+international audience, there are a couple of other things to keep in mind:
+
+-------
+Unicode
+-------
+
+Use ``unicode`` internally, not encoded bytestrings. Only encode/decode where
+data enters or exits the system. This means that your code works with characters
+and not just with bytes, which is an important distinction for example when
+calculating the length of a piece of text. When you need to decode/encode, it's
+probably a good idea to use UTF-8.
+
+-------------
+Date and Time
+-------------
+
+If your application uses datetime information that should be displayed to users
+in different timezones, you should try to work with UTC (universal time)
+internally. Do the conversion from and to "local time" when the data enters or
+exits the system. Make use the Python `datetime`_ module and the third-party
+`pytz`_ package.
+
+--------------------------
+Formatting and Locale Data
+--------------------------
+
+Make sure you check out the functionality provided by the `Babel`_ project for
+things like number and date formatting, locale display strings, etc.
+
+.. _`datetime`: http://docs.python.org/lib/module-datetime.html
+.. _`pytz`: http://pytz.sourceforge.net/
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -26,5 +26,6 @@
 * `Text Template Language <text-templates.html>`_
 * `Using Stream Filters <filters.html>`_
 * `Using XPath <xpath.html>`_
+* `Internationalization and Localization <i18n.html>`_
 * `Using the Templating Plugin <plugin.html>`_
 * `Generated API Documentation <api/index.html>`_
--- a/doc/style/edgewall.css
+++ b/doc/style/edgewall.css
@@ -66,3 +66,4 @@
 }
 p.admonition-title { margin-bottom: 0; text-transform: uppercase; }
 tt.docutils { background-color: transparent; }
+span.pre { white-space: normal }
--- a/genshi/filters/i18n.py
+++ b/genshi/filters/i18n.py
@@ -1,3 +1,16 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
 """Utilities for internationalization and localization of templates."""

 try:
@@ -11,7 +24,10 @@
 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
                         XML_NAMESPACE, _ensure
 from genshi.template.base import Template, EXPR, SUB
-from genshi.template.markup import EXEC
+from genshi.template.markup import MarkupTemplate, EXEC
+
+__all__ = ['Translator', 'extract']
+__docformat__ = 'restructuredtext en'

 _LOAD_NAME = chr(opmap['LOAD_NAME'])
 _LOAD_CONST = chr(opmap['LOAD_CONST'])
@@ -286,3 +302,38 @@
                 for lineno, funcname, text in self.extract(substream,
                                                            gettext_functions):
                     yield lineno, funcname, text
+
+
+def extract(fileobj, keywords, comment_tags, options):
+    """Babel extraction method for Genshi templates.
+
+    :param fileobj: the file-like object the messages should be extracted from
+    :param keywords: a list of keywords (i.e. function names) that should be
+                     recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and include
+                         in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+    :rtype: ``iterator``
+    """
+    template_class = options.get('template_class', MarkupTemplate)
+    if isinstance(template_class, basestring):
+        module, clsname = template_class.split(':', 1)
+        template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
+    encoding = options.get('encoding', None)
+
+    ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
+    if isinstance(ignore_tags, basestring):
+        ignore_tags = ignore_tags.split()
+    ignore_tags = [QName(tag) for tag in ignore_tags]
+    include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
+    if isinstance(include_attrs, basestring):
+        include_attrs = include_attrs.split()
+    include_attrs = [QName(attr) for attr in include_attrs]
+
+    tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
+                          encoding=encoding)
+    translator = Translator(None, ignore_tags, include_attrs)
+    for lineno, func, message in translator.extract(tmpl.stream,
+                                                    gettext_functions=keywords):
+        yield lineno, func, message, []
--- a/genshi/filters/tests/i18n.py
+++ b/genshi/filters/tests/i18n.py
@@ -16,7 +16,7 @@
 import unittest

 from genshi.template import MarkupTemplate
-from genshi.filters.i18n import Translator
+from genshi.filters.i18n import Translator, extract


 class TranslatorTestCase(unittest.TestCase):
@@ -84,10 +84,52 @@
         self.assertEqual((2, None, u'(c) 2007 Edgewall Software'), messages[0])


+class ExtractTestCase(unittest.TestCase):
+
+    def test_markup_template_extraction(self):
+        buf = StringIO("""<html xmlns:py="http://genshi.edgewall.org/">
+          <head>
+            <title>Example</title>
+          </head>
+          <body>
+            <h1>Example</h1>
+            <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+            <p>${ngettext("You have %d item", "You have %d items", num)}</p>
+          </body>
+        </html>""")
+        results = list(extract(buf, ['_', 'ngettext'], [], {}))
+        self.assertEqual([
+            (3, None, u'Example', []),
+            (6, None, u'Example', []),
+            (7, '_', u'Hello, %(name)s', []),
+            (8, 'ngettext', (u'You have %d item', u'You have %d items'), []),
+        ], results)
+
+    def test_text_template_extraction(self):
+        buf = StringIO("""${_("Dear %(name)s") % {'name': name}},
+
+        ${ngettext("Your item:", "Your items", len(items))}
+        #for item in items
+         * $item
+        #end
+
+        All the best,
+        Foobar""")
+        results = list(extract(buf, ['_', 'ngettext'], [], {
+            'template_class': 'genshi.template:TextTemplate'
+        }))
+        self.assertEqual([
+            (1, '_', u'Dear %(name)s', []),
+            (3, 'ngettext', (u'Your item:', u'Your items'), []),
+            (7, None, u'All the best,\n        Foobar', [])
+        ], results)
+
+
 def suite():
     suite = unittest.TestSuite()
+    suite.addTests(doctest.DocTestSuite(Translator.__module__))
     suite.addTest(unittest.makeSuite(TranslatorTestCase, 'test'))
-    suite.addTests(doctest.DocTestSuite(Translator.__module__))
+    suite.addTest(unittest.makeSuite(ExtractTestCase, 'test'))
     return suite

 if __name__ == '__main__':
--- a/setup.py
+++ b/setup.py
@@ -25,10 +25,17 @@

 class build_doc(Command):
     description = 'Builds the documentation'
-    user_options = []
+    user_options = [
+        ('force', None,
+         "force regeneration even if no reStructuredText files have changed"),
+        ('without-apidocs', None,
+         "whether to skip the generation of API documentaton"),
+    ]
+    boolean_options = ['force', 'without-apidocs']

     def initialize_options(self):
-        pass
+        self.force = False
+        self.without_apidocs = False

     def finalize_options(self):
         pass
@@ -38,8 +45,8 @@
         from docutils.nodes import raw
         from docutils.parsers import rst

-        docutils_conf = os.path.join('doc', 'docutils.conf')
-        epydoc_conf = os.path.join('doc', 'epydoc.conf')
+        docutils_conf = os.path.join('doc', 'conf', 'docutils.ini')
+        epydoc_conf = os.path.join('doc', 'conf', 'epydoc.ini')

         try:
             from pygments import highlight
@@ -60,26 +67,28 @@

         for source in glob('doc/*.txt'):
             dest = os.path.splitext(source)[0] + '.html'
-            if not os.path.exists(dest) or \
-                   os.path.getmtime(dest) < os.path.getmtime(source):
+            if self.force or not os.path.exists(dest) or \
+                    os.path.getmtime(dest) < os.path.getmtime(source):
                 print 'building documentation file %s' % dest
                 publish_cmdline(writer_name='html',
                                 argv=['--config=%s' % docutils_conf, source,
                                       dest])

-        try:
-            from epydoc import cli
-            old_argv = sys.argv[1:]
-            sys.argv[1:] = [
-                '--config=%s' % epydoc_conf,
-                '--no-private', # epydoc bug, not read from config
-                '--simple-term',
-                '--verbose'
-            ]
-            cli.cli()
-            sys.argv[1:] = old_argv
-        except ImportError:
-            print 'epydoc not installed, skipping API documentation.'
+        if not self.without_apidocs:
+            try:
+                from epydoc import cli
+                old_argv = sys.argv[1:]
+                sys.argv[1:] = [
+                    '--config=%s' % epydoc_conf,
+                    '--no-private', # epydoc bug, not read from config
+                    '--simple-term',
+                    '--verbose'
+                ]
+                cli.cli()
+                sys.argv[1:] = old_argv
+
+            except ImportError:
+                print 'epydoc not installed, skipping API documentation.'


 class test_doc(Command):
@@ -130,8 +139,14 @@
     packages = ['genshi', 'genshi.filters', 'genshi.template'],
     test_suite = 'genshi.tests.suite',

-    extras_require = {'plugin': ['setuptools>=0.6a2']},
+    extras_require = {
+        'i18n': ['Babel>=0.8'],
+        'plugin': ['setuptools>=0.6a2']
+    },
     entry_points = """
+    [babel.extractors]
+    genshi = genshi.filters.i18n:extract[i18n]
+
     [python.templating.engines]
     genshi = genshi.template.plugin:MarkupTemplateEnginePlugin[plugin]
     genshi-markup = genshi.template.plugin:MarkupTemplateEnginePlugin[plugin]