Mercurial > babel > old > mirror
diff 0.8.x/babel/messages/extract.py @ 142:4a7af44e6695 stable
Create branch for 0.8.x releases.
author | cmlenz |
---|---|
date | Wed, 20 Jun 2007 10:09:07 +0000 |
parents | |
children | 32a242175da5 |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/0.8.x/babel/messages/extract.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Basic infrastructure for extracting localizable messages from source files. + +This module defines an extensible system for collecting localizable message +strings from a variety of sources. A native extractor for Python source files +is builtin, extractors for other sources can be added using very simple plugins. + +The main entry points into the extraction functionality are the functions +`extract_from_dir` and `extract_from_file`. +""" + +import os +try: + set +except NameError: + from sets import Set as set +import sys +from tokenize import generate_tokens, NAME, OP, STRING, COMMENT + +from babel.util import pathmatch, relpath + +__all__ = ['extract', 'extract_from_dir', 'extract_from_file'] +__docformat__ = 'restructuredtext en' + +GROUP_NAME = 'babel.extractors' + +DEFAULT_KEYWORDS = { + '_': None, + 'gettext': None, + 'ngettext': (1, 2), + 'ugettext': None, + 'ungettext': (1, 2), + 'dgettext': (2,), + 'dngettext': (2, 3), +} + +DEFAULT_MAPPING = [('**.py', 'python')] + +def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, + options_map=None, keywords=DEFAULT_KEYWORDS, + comment_tags=(), callback=None): + """Extract messages from any source files found in the given directory. + + This function generates tuples of the form: + + ``(filename, lineno, message, comments)`` + + Which extraction method is used per file is determined by the `method_map` + parameter, which maps extended glob patterns to extraction method names. + For example, the following is the default mapping: + + >>> method_map = [ + ... ('**.py', 'python') + ... ] + + This basically says that files with the filename extension ".py" at any + level inside the directory should be processed by the "python" extraction + method. Files that don't match any of the mapping patterns are ignored. See + the documentation of the `pathmatch` function for details on the pattern + syntax. + + The following extended mapping would also use the "genshi" extraction + method on any file in "templates" subdirectory: + + >>> method_map = [ + ... ('**/templates/**.*', 'genshi'), + ... ('**.py', 'python') + ... ] + + The dictionary provided by the optional `options_map` parameter augments + these mappings. It uses extended glob patterns as keys, and the values are + dictionaries mapping options names to option values (both strings). + + The glob patterns of the `options_map` do not necessarily need to be the + same as those used in the method mapping. For example, while all files in + the ``templates`` folders in an application may be Genshi applications, the + options for those files may differ based on extension: + + >>> options_map = { + ... '**/templates/**.txt': { + ... 'template_class': 'genshi.template.text.TextTemplate', + ... 'encoding': 'latin-1' + ... }, + ... '**/templates/**.html': { + ... 'include_attrs': '' + ... } + ... } + + :param dirname: the path to the directory to extract messages from + :param method_map: a list of ``(pattern, method)`` tuples that maps of + extraction method names to extended glob patterns + :param options_map: a dictionary of additional options (optional) + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of tags of translator comments to search for + and include in the results + :param callback: a function that is called for every file that message are + extracted from, just before the extraction itself is + performed; the function is passed the filename, the name + of the extraction method and and the options dictionary as + positional arguments, in that order + :return: an iterator over ``(filename, lineno, funcname, message)`` tuples + :rtype: ``iterator`` + :see: `pathmatch` + """ + if options_map is None: + options_map = {} + + absname = os.path.abspath(dirname) + for root, dirnames, filenames in os.walk(absname): + for subdir in dirnames: + if subdir.startswith('.') or subdir.startswith('_'): + dirnames.remove(subdir) + for filename in filenames: + filename = relpath( + os.path.join(root, filename).replace(os.sep, '/'), + dirname + ) + for pattern, method in method_map: + if pathmatch(pattern, filename): + filepath = os.path.join(absname, filename) + options = {} + for opattern, odict in options_map.items(): + if pathmatch(opattern, filename): + options = odict + if callback: + callback(filename, method, options) + for lineno, message, comments in \ + extract_from_file(method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options): + yield filename, lineno, message, comments + break + +def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, + comment_tags=(), options=None): + """Extract messages from a specific file. + + This function returns a list of tuples of the form: + + ``(lineno, funcname, message)`` + + :param filename: the path to the file to extract messages from + :param method: a string specifying the extraction method (.e.g. "python") + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: the list of extracted messages + :rtype: `list` + """ + fileobj = open(filename, 'U') + try: + return list(extract(method, fileobj, keywords, comment_tags, options)) + finally: + fileobj.close() + +def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), + options=None): + """Extract messages from the given file-like object using the specified + extraction method. + + This function returns a list of tuples of the form: + + ``(lineno, message, comments)`` + + The implementation dispatches the actual extraction to plugins, based on the + value of the ``method`` parameter. + + >>> source = '''# foo module + ... def run(argv): + ... print _('Hello, world!') + ... ''' + + >>> from StringIO import StringIO + >>> for message in extract('python', StringIO(source)): + ... print message + (3, 'Hello, world!', []) + + :param method: a string specifying the extraction method (.e.g. "python") + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: the list of extracted messages + :rtype: `list` + :raise ValueError: if the extraction method is not registered + """ + from pkg_resources import working_set + + for entry_point in working_set.iter_entry_points(GROUP_NAME, method): + func = entry_point.load(require=True) + results = func(fileobj, keywords.keys(), comment_tags, + options=options or {}) + for lineno, funcname, messages, comments in results: + if isinstance(messages, (list, tuple)): + msgs = [] + for index in keywords[funcname]: + msgs.append(messages[index - 1]) + messages = tuple(msgs) + if len(messages) == 1: + messages = messages[0] + yield lineno, messages, comments + return + + raise ValueError('Unknown extraction method %r' % method) + +def extract_nothing(fileobj, keywords, comment_tags, options): + """Pseudo extractor that does not actually extract anything, but simply + returns an empty list. + """ + return [] + +def extract_python(fileobj, keywords, comment_tags, options): + """Extract messages from Python source code. + + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + funcname = None + lineno = None + buf = [] + messages = [] + translator_comments = [] + in_args = False + in_translator_comments = False + + tokens = generate_tokens(fileobj.readline) + for tok, value, (lineno, _), _, _ in tokens: + if funcname and tok == OP and value == '(': + in_args = True + elif tok == COMMENT: + # Strip the comment token from the line + value = value[1:].strip() + if in_translator_comments is True and \ + translator_comments[-1][0] == lineno - 1: + # We're already inside a translator comment, continue appending + # XXX: Should we check if the programmer keeps adding the + # comment_tag for every comment line??? probably not! + translator_comments.append((lineno, value)) + continue + # If execution reaches this point, let's see if comment line + # starts with one of the comment tags + for comment_tag in comment_tags: + if value.startswith(comment_tag): + if in_translator_comments is not True: + in_translator_comments = True + comment = value[len(comment_tag):].strip() + translator_comments.append((lineno, comment)) + break + elif funcname and in_args: + if tok == OP and value == ')': + in_args = in_translator_comments = False + if buf: + messages.append(''.join(buf)) + del buf[:] + if filter(None, messages): + if len(messages) > 1: + messages = tuple(messages) + else: + messages = messages[0] + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < lineno - 1: + translator_comments = [] + + yield (lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + funcname = lineno = None + messages = [] + translator_comments = [] + elif tok == STRING: + # Unwrap quotes in a safe manner + buf.append(eval(value, {'__builtins__':{}}, {})) + elif tok == OP and value == ',': + messages.append(''.join(buf)) + del buf[:] + elif funcname: + funcname = None + elif tok == NAME and value in keywords: + funcname = value