changeset 250:6c06570af1b9 trunk

Soften dependency on setuptools. Extraction methods can now be referenced using a special section in the mapping configuration, mapping short names to fully-qualified function references.
author cmlenz
date Mon, 13 Aug 2007 22:29:03 +0000
parents 51a1f6101fa6
children 242c9ba1d41d
files babel/messages/catalog.py babel/messages/extract.py babel/messages/frontend.py doc/messages.txt
diffstat 4 files changed, 180 insertions(+), 101 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -539,22 +539,28 @@
         ``(message, errors)`` tuple, where ``message`` is the `Message` object
         and ``errors`` is a sequence of `TranslationError` objects.
 
+        :note: this feature requires ``setuptools``/``pkg_resources`` to be
+               installed; if it is not, this method will simply return an empty
+               iterator
         :rtype: ``iterator``
         """
         checkers = []
-        from pkg_resources import working_set
-        for entry_point in working_set.iter_entry_points('babel.checkers'):
-            checkers.append(entry_point.load())
-
-        for message in self._messages.values():
-            errors = []
-            for checker in checkers:
-                try:
-                    checker(self, message)
-                except TranslationError, e:
-                    errors.append(e)
-            if errors:
-                yield message, errors
+        try:
+            from pkg_resources import working_set
+        except ImportError:
+            return
+        else:
+            for entry_point in working_set.iter_entry_points('babel.checkers'):
+                checkers.append(entry_point.load())
+            for message in self._messages.values():
+                errors = []
+                for checker in checkers:
+                    try:
+                        checker(self, message)
+                    except TranslationError, e:
+                        errors.append(e)
+                if errors:
+                    yield message, errors
 
     def update(self, template, no_fuzzy_matching=False):
         """Update the catalog based on the given template catalog.
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -202,7 +202,11 @@
     ...     print message
     (3, u'Hello, world!', [])
 
-    :param method: a string specifying the extraction method (.e.g. "python")
+    :param method: a string specifying the extraction method (.e.g. "python");
+                   if this is a simple name, the extraction function will be
+                   looked up by entry point; if it is an explicit reference
+                   to a function (of the form ``package.module:funcname``), the
+                   corresponding function will be imported and used
     :param fileobj: the file-like object the messages should be extracted from
     :param keywords: a dictionary mapping keywords (i.e. names of functions
                      that should be recognized as translation functions) to
@@ -215,47 +219,59 @@
     :rtype: `list`
     :raise ValueError: if the extraction method is not registered
     """
-    from pkg_resources import working_set
-
-    for entry_point in working_set.iter_entry_points(GROUP_NAME, method):
-        func = entry_point.load(require=True)
-        results = func(fileobj, keywords.keys(), comment_tags,
-                       options=options or {})
-        for lineno, funcname, messages, comments in results:
-            if funcname:
-                spec = keywords[funcname] or (1,)
-            else:
-                spec = (1,)
-            if not isinstance(messages, (list, tuple)):
-                messages = [messages]
+    if ':' in method:
+        module, clsname = method.split(':', 1)
+        func = getattr(__import__(module, {}, {}, [clsname]), clsname)
+    else:
+        try:
+            from pkg_resources import working_set
+        except ImportError:
+            # pkg_resources is not available, so we resort to looking up the
+            # builtin extractors directly
+            builtin = {'ignore': extract_nothing, 'python': extract_python}
+            func = builtin.get(method)
+        else:
+            for entry_point in working_set.iter_entry_points(GROUP_NAME,
+                                                             method):
+                func = entry_point.load(require=True)
+                break
+    if func is None:
+        raise ValueError('Unknown extraction method %r' % method)
 
-            msgs = []
-            # Validate the messages against the keyword's specification
-            invalid = False
-            for index in spec:
-                message = messages[index - 1]
-                if message is None:
-                    invalid = True
-                    break
-                msgs.append(message)
-            if invalid:
-                continue
+    results = func(fileobj, keywords.keys(), comment_tags,
+                   options=options or {})
+    for lineno, funcname, messages, comments in results:
+        if funcname:
+            spec = keywords[funcname] or (1,)
+        else:
+            spec = (1,)
+        if not isinstance(messages, (list, tuple)):
+            messages = [messages]
 
-            first_msg_index = spec[0] - 1
-            if not messages[first_msg_index]:
-                # An empty string msgid isn't valid, emit a warning
-                where = '%s:%i' % (hasattr(fileobj, 'name') and \
-                                       fileobj.name or '(unknown)', lineno)
-                print >> sys.stderr, empty_msgid_warning % where
-                continue
+        msgs = []
+        # Validate the messages against the keyword's specification
+        invalid = False
+        for index in spec:
+            message = messages[index - 1]
+            if message is None:
+                invalid = True
+                break
+            msgs.append(message)
+        if invalid:
+            continue
 
-            messages = tuple(msgs)
-            if len(messages) == 1:
-                messages = messages[0]
-            yield lineno, messages, comments
-        return
+        first_msg_index = spec[0] - 1
+        if not messages[first_msg_index]:
+            # An empty string msgid isn't valid, emit a warning
+            where = '%s:%i' % (hasattr(fileobj, 'name') and \
+                                   fileobj.name or '(unknown)', lineno)
+            print >> sys.stderr, empty_msgid_warning % where
+            continue
 
-    raise ValueError('Unknown extraction method %r' % method)
+        messages = tuple(msgs)
+        if len(messages) == 1:
+            messages = messages[0]
+        yield lineno, messages, comments
 
 def extract_nothing(fileobj, keywords, comment_tags, options):
     """Pseudo extractor that does not actually extract anything, but simply
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -1073,6 +1073,9 @@
     """Parse an extraction method mapping from a file-like object.
 
     >>> buf = StringIO('''
+    ... [extractors]
+    ... custom = mypackage.module:myfunc
+    ... 
     ... # Python source files
     ... [python: **.py]
     ...
@@ -1082,9 +1085,14 @@
     ... [genshi: **/templates/**.txt]
     ... template_class = genshi.template:TextTemplate
     ... encoding = latin-1
+    ... 
+    ... # Some custom extractor
+    ... [custom: **/custom/*.*]
     ... ''')
 
     >>> method_map, options_map = parse_mapping(buf)
+    >>> len(method_map)
+    4
 
     >>> method_map[0]
     ('**.py', 'python')
@@ -1101,12 +1109,18 @@
     >>> options_map['**/templates/**.txt']['encoding']
     'latin-1'
 
+    >>> method_map[3]
+    ('**/custom/*.*', 'mypackage.module:myfunc')
+    >>> options_map['**/custom/*.*']
+    {}
+
     :param fileobj: a readable file-like object containing the configuration
                     text to parse
     :return: a `(method_map, options_map)` tuple
     :rtype: `tuple`
     :see: `extract_from_directory`
     """
+    extractors = {}
     method_map = []
     options_map = {}
 
@@ -1114,9 +1128,18 @@
     parser._sections = odict(parser._sections) # We need ordered sections
     parser.readfp(fileobj, filename)
     for section in parser.sections():
-        method, pattern = [part.strip() for part in section.split(':', 1)]
-        method_map.append((pattern, method))
-        options_map[pattern] = dict(parser.items(section))
+        if section == 'extractors':
+            extractors = dict(parser.items(section))
+        else:
+            method, pattern = [part.strip() for part in section.split(':', 1)]
+            method_map.append((pattern, method))
+            options_map[pattern] = dict(parser.items(section))
+
+    if extractors:
+        for idx, (pattern, method) in enumerate(method_map):
+            if method in extractors:
+                method = extractors[method]
+            method_map[idx] = (pattern, method)
 
     return (method_map, options_map)
 
--- a/doc/messages.txt
+++ b/doc/messages.txt
@@ -5,7 +5,7 @@
 =============================
 
 .. contents:: Contents
-   :depth: 2
+   :depth: 3
 .. sectnum::
 
 
@@ -91,6 +91,23 @@
 extraction methods, which is described below.
 
 
+.. _`frontends`:
+
+----------
+Front-Ends
+----------
+
+Babel provides two different front-ends to access its functionality for working
+with message catalogs:
+
+ * A `Command-line interface <cmdline.html>`_, and
+ * `Integration with distutils/setuptools <setup.html>`_
+
+Which one you choose depends on the nature of your project. For most modern
+Python projects, the distutils/setuptools integration is probably more
+convenient.
+
+
 .. _`mapping`:
 
 -------------------------------------------
@@ -109,21 +126,21 @@
 
     # Extraction from Python source files
     
-    [python: foobar/**.py]
+    [python: **.py]
     
     # Extraction from Genshi HTML and text templates
     
-    [genshi: foobar/**/templates/**.html]
+    [genshi: **/templates/**.html]
     ignore_tags = script,style
     include_attrs = alt title summary
     
-    [genshi: foobar/**/templates/**.txt]
+    [genshi: **/templates/**.txt]
     template_class = genshi.template:TextTemplate
     encoding = ISO-8819-15
 
 The configuration file syntax is based on the format commonly found in ``.INI``
 files on Windows systems, and as supported by the ``ConfigParser`` module in
-the Python standard libraries. Section names (the strings enclosed in square
+the Python standard library. Section names (the strings enclosed in square
 brackets) specify both the name of the extraction method, and the extended glob
 pattern to specify the files that this extraction method should be used for,
 separated by a colon. The options in the sections are passed to the extraction
@@ -138,27 +155,59 @@
 extension ``.txt`` in any directory.
 
 Lines that start with a ``#`` or ``;`` character are ignored and can be used
-for comments. Empty lines are also ignored, too.
+for comments. Empty lines are ignored, too.
 
 .. note:: if you're performing message extraction using the command Babel
-          provides for integration into ``setup.py`` scripts (see below), you
-          can also provide this configuration in a different way, namely as a
-          keyword argument to the ``setup()`` function.
+          provides for integration into ``setup.py`` scripts, you can also 
+          provide this configuration in a different way, namely as a keyword 
+          argument to the ``setup()`` function. See `Distutils/Setuptools 
+          Integration`_ for more information.
+
+.. _`distutils/setuptools integration`: setup.html
 
 
-----------
-Front-Ends
-----------
+Default Extraction Methods
+--------------------------
 
-Babel provides two different front-ends to access its functionality for working
-with message catalogs:
+Babel comes with only two builtin extractors: ``python`` (which extracts 
+messages from Python source files) and ``ignore`` (which extracts nothing).
 
- * A `Command-line interface <cmdline.html>`_, and
- * `Integration with distutils/setuptools <setup.html>`_
+The ``python`` extractor is by default mapped to the glob pattern ``**.py``,
+meaning it'll be applied to all files with the ``.py`` extension in any 
+directory. If you specify your own mapping configuration, this default mapping
+is not discarded, so you need to explicitly add it to your mapping (as shown in
+the example above.)
 
-Which one you choose depends on the nature of your project. For most modern
-Python projects, the distutils/setuptools integration is probably more
-convenient.
+
+.. _`referencing extraction methods`:
+
+Referencing Extraction Methods
+------------------------------
+
+To be able to use short extraction method names such as “genshi”, you need to 
+have `pkg_resources`_ installed, and the package implementing that extraction
+method needs to have been installed with its meta data (the `egg-info`_).
+
+If this is not possible for some reason, you need to map the short names to 
+fully qualified function names in an extract section in the mapping 
+configuration. For example:
+
+.. code-block:: ini
+
+    # Some custom extraction method
+    
+    [extractors]
+    custom = mypackage.module:extract_custom
+    
+    [custom: **.ctm]
+    some_option = foo
+
+Note that the builtin extraction methods ``python`` and ``ignore`` are available
+by default, even if `pkg_resources`_ is not installed. You should never need to
+explicitly define them in the ``[extractors]`` section.
+
+.. _`egg-info`: http://peak.telecommunity.com/DevCenter/PythonEggs
+.. _`pkg_resources`: http://peak.telecommunity.com/DevCenter/PkgResources
 
 
 --------------------------
@@ -211,10 +260,17 @@
 the name of the function (separated by a colon) implementing the actual
 extraction.
 
+.. note:: As shown in `Referencing Extraction Methods`_, declaring an entry
+          point is not  strictly required, as users can still reference the
+          extraction  function directly. But whenever possible, the entry point
+          should be  declared to make configuration more convenient.
+
 .. _`setuptools`: http://peak.telecommunity.com/DevCenter/setuptools
 
-Comments Tags And Translator Comments Explanation
-.................................................
+
+-------------------
+Translator Comments
+-------------------
 
 First of all what are comments tags. Comments tags are excerpts of text to
 search for in comments, only comments, right before the `python gettext`_
@@ -239,7 +295,7 @@
 
 Now, you might ask, why would I need that?
 
-Consider this simple case; you have a menu item called “Manual”. You know what
+Consider this simple case; you have a menu item called “manual”. You know what
 it means, but when the translator sees this they will wonder did you mean:
 
 1. a document or help manual, or
@@ -249,28 +305,6 @@
 “The installation manual” helps to clarify the situation and makes a translator
 more productive.
 
-**More examples of the need for translation comments**
-
-Real world examples are best. This is a discussion over the use of the word
-“Forward” in Northern Sotho:
-
-“When you go forward. You go ‘Pele’, but when you forward the document,
-you ‘Fetišetša pele’. So if you just say forward, we don’t know what you are
-talking about.
-It is better if it's in a sentence. But in this case i think we will use ‘pele’
-because on the string no. 86 and 88 there is “show previous page in history”
-and “show next page in history”.
-
-Were the translators guess correct? I think so, but it makes it so much easier
-if they don’t need to be super `sleuths`_ as well as translators.
-
- .. _`sleuths`: http://www.thefreedictionary.com/sleuth
-
-
-*Explanation Borrowed From:* `Wordforge`_
-
- .. _`Wordforge`: http://www.wordforge.org/static/translation_comments.html
-
-**Note**: Translator comments are currently only supported in python source
-code.
-
+.. note:: Whether translator comments can be extracted depends on the extraction
+          method in use. The Python extractor provided by Babel does implement
+          this feature, but others may not.
Copyright (C) 2012-2017 Edgewall Software