Mercurial > genshi > genshi-test
changeset 532:f9ad40cae2f7 stable-0.4.x 0.4.2
Ported [634:637] to 0.4.x branch.
author | cmlenz |
---|---|
date | Wed, 20 Jun 2007 10:50:18 +0000 |
parents | 7405da82b5c1 |
children | a40ad8334f0f |
files | ChangeLog doc/conf/docutils.ini doc/conf/epydoc.ini doc/docutils.conf doc/epydoc.conf doc/i18n.txt doc/index.txt doc/style/edgewall.css genshi/filters/i18n.py genshi/filters/tests/i18n.py setup.py |
diffstat | 11 files changed, 405 insertions(+), 57 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,6 @@ Version 0.4.2 http://svn.edgewall.org/repos/genshi/tags/0.4.2/ -(?, from branches/stable/0.4.x) +(Jun 20, from branches/stable/0.4.x) * The `doctype` parameter of the markup serializers now also accepts the "name" of the doctype as string, in addition to the `(name, pubid, sysid)` tuple. @@ -12,6 +12,7 @@ * The I18n filter now skips the content of elements that have an `xml:lang` attribute with a fixed string value. Basically, `xml:lang` can now be used as a flag to mark specific sections as not needing localization. + * Added plugin for message extraction via Babel (http://babel.edgewall.org/). Version 0.4.1
new file mode 100644 --- /dev/null +++ b/doc/conf/docutils.ini @@ -0,0 +1,9 @@ +[general] +input_encoding = utf-8 +strip_comments = yes +toc_backlinks = none + +[html4css1 writer] +embed_stylesheet = no +stylesheet = style/edgewall.css +xml_declaration = no
new file mode 100644 --- /dev/null +++ b/doc/conf/epydoc.ini @@ -0,0 +1,24 @@ +[epydoc] + +name: Documentation Index +url: ../index.html +modules: genshi +verbosity: 1 + +# Extraction +docformat: restructuredtext +parse: yes +introspect: yes +exclude: .*\.tests.* +inheritance: listed +private: no +imports: no +include-log: no + +# HTML output +output: html +target: doc/api/ +css: doc/style/epydoc.css +top: genshi +frames: no +sourcecode: no
deleted file mode 100644 --- a/doc/docutils.conf +++ /dev/null @@ -1,9 +0,0 @@ -[general] -input_encoding = utf-8 -strip_comments = yes -toc_backlinks = none - -[html4css1 writer] -embed_stylesheet = no -stylesheet = style/edgewall.css -xml_declaration = no
deleted file mode 100644 --- a/doc/epydoc.conf +++ /dev/null @@ -1,24 +0,0 @@ -[epydoc] - -name: Documentation Index -url: ../index.html -modules: genshi -verbosity: 1 - -# Extraction -docformat: restructuredtext -parse: yes -introspect: yes -exclude: .*\.tests.* -inheritance: listed -private: no -imports: no -include-log: no - -# HTML output -output: html -target: doc/api/ -css: doc/style/epydoc.css -top: genshi -frames: no -sourcecode: no
new file mode 100644 --- /dev/null +++ b/doc/i18n.txt @@ -0,0 +1,237 @@ +.. -*- mode: rst; encoding: utf-8 -*- + +===================================== +Internationalization and Localization +===================================== + +Genshi provides basic supporting infrastructure for internationalizing +and localizing templates. That includes functionality for extracting localizable +strings from templates, as well as a template filter that can apply translations +to templates as they get rendered. + +This support is based on `gettext`_ message catalogs and the `gettext Python +module`_. The extraction process can be used from the API level, or through the +front-ends implemented by the `Babel`_ project, for which Genshi provides a +plugin. + +.. _`gettext`: http://www.gnu.org/software/gettext/ +.. _`gettext python module`: http://docs.python.org/lib/module-gettext.html +.. _`babel`: http://babel.edgewall.org/ + + +.. contents:: Contents + :depth: 2 +.. sectnum:: + + +Basics +====== + +The simplest way to internationalize and translate templates would be to wrap +all localizable strings in a ``gettext()`` function call (which is often aliased +to ``_()`` for brevity). In that case, no extra template filter is required. + +.. code-block:: genshi + + <p>${_("Hello, world!")}</p> + +However, this approach results in significant “character noise” in templates, +making them harder to read and preview. + +The ``genshi.filters.Translator`` filter allows you to get rid of the +explicit `gettext`_ function calls, so you can continue to just write: + +.. code-block:: genshi + + <p>Hello, world!</p> + +This text will still be extracted and translated as if you had wrapped it in a +``_()`` call. + +.. note:: For parameterized or pluralizable messages, you need to continue using + the appropriate ``gettext`` functions. + +You can control which tags should be ignored by this process; for example, it +doesn't really make sense to translate the content of the HTML +``<script></script>`` element. Both ``<script>`` and ``<style>`` are excluded +by default. + +Attribute values can also be automatically translated. The default is to +consider the attributes ``abbr``, ``alt``, ``label``, ``prompt``, ``standby``, +``summary``, and ``title``, which is a list that makes sense for HTML documents. +Of course, you can tell the translator to use a different set of attribute +names, or none at all. + +In addition, you can control automatic translation in your templates using the +``xml:lang`` attribute. If the value of that attribute is a literal string, the +contents and attributes of the element will be ignored: + +.. code-block:: genshi + + <p xml:lang="en">Hello, world!</p> + +On the other hand, if the value of the ``xml:lang`` attribute contains a Python +expression, the element contents and attributes are still considered for +automatic translation: + +.. code-block:: genshi + + <html xml:lang="$locale"> + ... + </html> + + +Extraction +========== + +The ``Translator`` class provides a class method called ``extract``, which is +a generator yielding all localizable strings found in a template or markup +stream. This includes both literal strings in text nodes and attribute values, +as well as strings in ``gettext()`` calls in embedded Python code. See the API +documentation for details on how to use this method directly. + +This functionality is integrated into the message extraction framework provided +by the `Babel`_ project. Babel provides a command-line interface as well as +commands that can be used from ``setup.py`` scripts using `Setuptools`_ or +`Distutils`_. + +.. _`setuptools`: http://peak.telecommunity.com/DevCenter/setuptools +.. _`distutils`: http://docs.python.org/dist/dist.html + +The first thing you need to do to make Babel extract messages from Genshi +templates is to let Babel know which files are Genshi templates. This is done +using a “mapping configuration”, which can be stored in a configuration file, +or specified directly in your ``setup.py``. + +In a configuration file, the mapping may look like this: + +.. code-block:: ini + + # Python souce + [python:**.py] + + # Genshi templates + [genshi:**/templates/**.html] + include_attrs = title + + [genshi:**/templates/**.txt] + template_class = genshi.template.TextTemplate + encoding = latin-1 + +Please consult the Babel documentation for details on configuration. + +If all goes well, running the extraction with Babel should create a POT file +containing the strings from your Genshi templates and your Python source files. + +.. note:: Genshi currently does not support “translator comments”, i.e. text in + template comments that would get added to the POT file. This support + may or may not be added in future versions. + + +--------------------- +Configuration Options +--------------------- + +The Genshi extraction plugin for Babel supports the following options: + +``template_class`` +------------------ +The concrete ``Template`` class that the file should be loaded with. Specify +the package/module name and the class name, separated by a colon. + +The default is to use ``genshi.template:MarkupTemplate``, and you'll want to +set it to ``genshi.template:TextTemplate`` for `text templates`_. + +.. _`text templates`: text-templates.html + +``encoding`` +------------------ +The encoding of the template file. This is only used for text templates. The +default is to assume “utf-8”. + +``include_attrs`` +------------------ +Comma-separated list of attribute names that should be considered to have +localizable values. Only used for markup templates. + +``include_tags`` +------------------ +Comma-separated list of tag names that should be ignored. Only used for markup +templates. + + +Translation +=========== + +If you have prepared MO files for use with Genshi using the appropriate tools, +you can access the message catalogs with the `gettext Python module`_. You'll +probably want to create a ``gettext.GNUTranslations`` instance, and make the +translation functions it provides available to your templates by putting them +in the template context. + +The ``Translator`` filter needs to be added to the filters of the template +(applying it as a stream filter will likely not have the desired effect). +Furthermore it needs to be the first filter in the list, including the internal +filters that Genshi adds itself: + +.. code-block:: python + + from genshi.filters import Translator + from genshi.template import MarkupTemplate + + template = MarkupTemplate("...") + template.filters.insert(0, Translator(translations.ugettext)) + +If you're using `TemplateLoader`, you should specify a callback function in +which you add the filter: + +.. code-block:: python + + from genshi.filters import Translator + from genshi.template import TemplateLoader + + def template_loaded(template): + template.filters.insert(0, , Translator(translations.ugettext)) + + loader = TemplateLoader('templates', callback=template_loaded) + template = loader.load("...") + +This approach ensures that the filter is not added everytime the template is +loaded, and thus being applied multiple times. + + +Related Considerations +====================== + +If you intend to produce an application that is fully prepared for an +international audience, there are a couple of other things to keep in mind: + +------- +Unicode +------- + +Use ``unicode`` internally, not encoded bytestrings. Only encode/decode where +data enters or exits the system. This means that your code works with characters +and not just with bytes, which is an important distinction for example when +calculating the length of a piece of text. When you need to decode/encode, it's +probably a good idea to use UTF-8. + +------------- +Date and Time +------------- + +If your application uses datetime information that should be displayed to users +in different timezones, you should try to work with UTC (universal time) +internally. Do the conversion from and to "local time" when the data enters or +exits the system. Make use the Python `datetime`_ module and the third-party +`pytz`_ package. + +-------------------------- +Formatting and Locale Data +-------------------------- + +Make sure you check out the functionality provided by the `Babel`_ project for +things like number and date formatting, locale display strings, etc. + +.. _`datetime`: http://docs.python.org/lib/module-datetime.html +.. _`pytz`: http://pytz.sourceforge.net/
--- a/doc/index.txt +++ b/doc/index.txt @@ -26,5 +26,6 @@ * `Text Template Language <text-templates.html>`_ * `Using Stream Filters <filters.html>`_ * `Using XPath <xpath.html>`_ +* `Internationalization and Localization <i18n.html>`_ * `Using the Templating Plugin <plugin.html>`_ * `Generated API Documentation <api/index.html>`_
--- a/doc/style/edgewall.css +++ b/doc/style/edgewall.css @@ -66,3 +66,4 @@ } p.admonition-title { margin-bottom: 0; text-transform: uppercase; } tt.docutils { background-color: transparent; } +span.pre { white-space: normal }
--- a/genshi/filters/i18n.py +++ b/genshi/filters/i18n.py @@ -1,3 +1,16 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + """Utilities for internationalization and localization of templates.""" try: @@ -11,7 +24,10 @@ from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ XML_NAMESPACE, _ensure from genshi.template.base import Template, EXPR, SUB -from genshi.template.markup import EXEC +from genshi.template.markup import MarkupTemplate, EXEC + +__all__ = ['Translator', 'extract'] +__docformat__ = 'restructuredtext en' _LOAD_NAME = chr(opmap['LOAD_NAME']) _LOAD_CONST = chr(opmap['LOAD_CONST']) @@ -286,3 +302,38 @@ for lineno, funcname, text in self.extract(substream, gettext_functions): yield lineno, funcname, text + + +def extract(fileobj, keywords, comment_tags, options): + """Babel extraction method for Genshi templates. + + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + template_class = options.get('template_class', MarkupTemplate) + if isinstance(template_class, basestring): + module, clsname = template_class.split(':', 1) + template_class = getattr(__import__(module, {}, {}, [clsname]), clsname) + encoding = options.get('encoding', None) + + ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS) + if isinstance(ignore_tags, basestring): + ignore_tags = ignore_tags.split() + ignore_tags = [QName(tag) for tag in ignore_tags] + include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS) + if isinstance(include_attrs, basestring): + include_attrs = include_attrs.split() + include_attrs = [QName(attr) for attr in include_attrs] + + tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), + encoding=encoding) + translator = Translator(None, ignore_tags, include_attrs) + for lineno, func, message in translator.extract(tmpl.stream, + gettext_functions=keywords): + yield lineno, func, message, []
--- a/genshi/filters/tests/i18n.py +++ b/genshi/filters/tests/i18n.py @@ -16,7 +16,7 @@ import unittest from genshi.template import MarkupTemplate -from genshi.filters.i18n import Translator +from genshi.filters.i18n import Translator, extract class TranslatorTestCase(unittest.TestCase): @@ -84,10 +84,52 @@ self.assertEqual((2, None, u'(c) 2007 Edgewall Software'), messages[0]) +class ExtractTestCase(unittest.TestCase): + + def test_markup_template_extraction(self): + buf = StringIO("""<html xmlns:py="http://genshi.edgewall.org/"> + <head> + <title>Example</title> + </head> + <body> + <h1>Example</h1> + <p>${_("Hello, %(name)s") % dict(name=username)}</p> + <p>${ngettext("You have %d item", "You have %d items", num)}</p> + </body> + </html>""") + results = list(extract(buf, ['_', 'ngettext'], [], {})) + self.assertEqual([ + (3, None, u'Example', []), + (6, None, u'Example', []), + (7, '_', u'Hello, %(name)s', []), + (8, 'ngettext', (u'You have %d item', u'You have %d items'), []), + ], results) + + def test_text_template_extraction(self): + buf = StringIO("""${_("Dear %(name)s") % {'name': name}}, + + ${ngettext("Your item:", "Your items", len(items))} + #for item in items + * $item + #end + + All the best, + Foobar""") + results = list(extract(buf, ['_', 'ngettext'], [], { + 'template_class': 'genshi.template:TextTemplate' + })) + self.assertEqual([ + (1, '_', u'Dear %(name)s', []), + (3, 'ngettext', (u'Your item:', u'Your items'), []), + (7, None, u'All the best,\n Foobar', []) + ], results) + + def suite(): suite = unittest.TestSuite() + suite.addTests(doctest.DocTestSuite(Translator.__module__)) suite.addTest(unittest.makeSuite(TranslatorTestCase, 'test')) - suite.addTests(doctest.DocTestSuite(Translator.__module__)) + suite.addTest(unittest.makeSuite(ExtractTestCase, 'test')) return suite if __name__ == '__main__':
--- a/setup.py +++ b/setup.py @@ -25,10 +25,17 @@ class build_doc(Command): description = 'Builds the documentation' - user_options = [] + user_options = [ + ('force', None, + "force regeneration even if no reStructuredText files have changed"), + ('without-apidocs', None, + "whether to skip the generation of API documentaton"), + ] + boolean_options = ['force', 'without-apidocs'] def initialize_options(self): - pass + self.force = False + self.without_apidocs = False def finalize_options(self): pass @@ -38,8 +45,8 @@ from docutils.nodes import raw from docutils.parsers import rst - docutils_conf = os.path.join('doc', 'docutils.conf') - epydoc_conf = os.path.join('doc', 'epydoc.conf') + docutils_conf = os.path.join('doc', 'conf', 'docutils.ini') + epydoc_conf = os.path.join('doc', 'conf', 'epydoc.ini') try: from pygments import highlight @@ -60,26 +67,28 @@ for source in glob('doc/*.txt'): dest = os.path.splitext(source)[0] + '.html' - if not os.path.exists(dest) or \ - os.path.getmtime(dest) < os.path.getmtime(source): + if self.force or not os.path.exists(dest) or \ + os.path.getmtime(dest) < os.path.getmtime(source): print 'building documentation file %s' % dest publish_cmdline(writer_name='html', argv=['--config=%s' % docutils_conf, source, dest]) - try: - from epydoc import cli - old_argv = sys.argv[1:] - sys.argv[1:] = [ - '--config=%s' % epydoc_conf, - '--no-private', # epydoc bug, not read from config - '--simple-term', - '--verbose' - ] - cli.cli() - sys.argv[1:] = old_argv - except ImportError: - print 'epydoc not installed, skipping API documentation.' + if not self.without_apidocs: + try: + from epydoc import cli + old_argv = sys.argv[1:] + sys.argv[1:] = [ + '--config=%s' % epydoc_conf, + '--no-private', # epydoc bug, not read from config + '--simple-term', + '--verbose' + ] + cli.cli() + sys.argv[1:] = old_argv + + except ImportError: + print 'epydoc not installed, skipping API documentation.' class test_doc(Command): @@ -130,8 +139,14 @@ packages = ['genshi', 'genshi.filters', 'genshi.template'], test_suite = 'genshi.tests.suite', - extras_require = {'plugin': ['setuptools>=0.6a2']}, + extras_require = { + 'i18n': ['Babel>=0.8'], + 'plugin': ['setuptools>=0.6a2'] + }, entry_points = """ + [babel.extractors] + genshi = genshi.filters.i18n:extract[i18n] + [python.templating.engines] genshi = genshi.template.plugin:MarkupTemplateEnginePlugin[plugin] genshi-markup = genshi.template.plugin:MarkupTemplateEnginePlugin[plugin]