cmlenz@1: # -*- coding: utf-8 -*- cmlenz@1: # cmlenz@1: # Copyright (C) 2007 Edgewall Software cmlenz@1: # All rights reserved. cmlenz@1: # cmlenz@1: # This software is licensed as described in the file COPYING, which cmlenz@1: # you should have received as part of this distribution. The terms cmlenz@1: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@1: # cmlenz@1: # This software consists of voluntary contributions made by many cmlenz@1: # individuals. For the exact contribution history, see the revision cmlenz@1: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@1: cmlenz@1: """Basic infrastructure for extracting localizable messages from source files. cmlenz@1: cmlenz@1: This module defines an extensible system for collecting localizable message cmlenz@1: strings from a variety of sources. A native extractor for Python source files cmlenz@1: is builtin, extractors for other sources can be added using very simple plugins. cmlenz@1: cmlenz@1: The main entry points into the extraction functionality are the functions cmlenz@1: `extract_from_dir` and `extract_from_file`. cmlenz@1: """ cmlenz@1: cmlenz@1: import os cmlenz@44: try: cmlenz@44: set cmlenz@44: except NameError: cmlenz@44: from sets import Set as set cmlenz@1: import sys palgarvio@80: from tokenize import generate_tokens, NAME, OP, STRING, COMMENT cmlenz@1: cmlenz@44: from babel.util import pathmatch, relpath cmlenz@1: cmlenz@1: __all__ = ['extract', 'extract_from_dir', 'extract_from_file'] cmlenz@1: __docformat__ = 'restructuredtext en' cmlenz@1: cmlenz@1: GROUP_NAME = 'babel.extractors' cmlenz@1: cmlenz@12: DEFAULT_KEYWORDS = { palgarvio@10: '_': None, palgarvio@10: 'gettext': None, palgarvio@10: 'ngettext': (1, 2), palgarvio@10: 'ugettext': None, palgarvio@10: 'ungettext': (1, 2), palgarvio@10: 'dgettext': (2,), palgarvio@10: 'dngettext': (2, 3), palgarvio@10: } cmlenz@1: cmlenz@62: DEFAULT_MAPPING = [('**.py', 'python')] cmlenz@1: cmlenz@47: def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, cmlenz@47: options_map=None, keywords=DEFAULT_KEYWORDS, cmlenz@84: comment_tags=(), callback=None): cmlenz@1: """Extract messages from any source files found in the given directory. cmlenz@1: cmlenz@1: This function generates tuples of the form: cmlenz@1: palgarvio@82: ``(filename, lineno, message, comments)`` cmlenz@1: cmlenz@44: Which extraction method is used per file is determined by the `method_map` cmlenz@44: parameter, which maps extended glob patterns to extraction method names. cmlenz@44: For example, the following is the default mapping: cmlenz@1: cmlenz@62: >>> method_map = [ cmlenz@62: ... ('**.py', 'python') cmlenz@62: ... ] cmlenz@1: cmlenz@1: This basically says that files with the filename extension ".py" at any cmlenz@1: level inside the directory should be processed by the "python" extraction cmlenz@44: method. Files that don't match any of the mapping patterns are ignored. See cmlenz@44: the documentation of the `pathmatch` function for details on the pattern cmlenz@44: syntax. cmlenz@1: cmlenz@62: The following extended mapping would also use the "genshi" extraction cmlenz@62: method on any file in "templates" subdirectory: cmlenz@1: cmlenz@62: >>> method_map = [ cmlenz@62: ... ('**/templates/**.*', 'genshi'), cmlenz@62: ... ('**.py', 'python') cmlenz@62: ... ] cmlenz@44: cmlenz@44: The dictionary provided by the optional `options_map` parameter augments cmlenz@62: these mappings. It uses extended glob patterns as keys, and the values are cmlenz@62: dictionaries mapping options names to option values (both strings). cmlenz@44: cmlenz@44: The glob patterns of the `options_map` do not necessarily need to be the cmlenz@62: same as those used in the method mapping. For example, while all files in cmlenz@62: the ``templates`` folders in an application may be Genshi applications, the cmlenz@44: options for those files may differ based on extension: cmlenz@44: cmlenz@44: >>> options_map = { cmlenz@44: ... '**/templates/**.txt': { cmlenz@144: ... 'template_class': 'genshi.template:TextTemplate', cmlenz@44: ... 'encoding': 'latin-1' cmlenz@44: ... }, cmlenz@44: ... '**/templates/**.html': { cmlenz@44: ... 'include_attrs': '' cmlenz@44: ... } cmlenz@1: ... } cmlenz@1: cmlenz@1: :param dirname: the path to the directory to extract messages from cmlenz@62: :param method_map: a list of ``(pattern, method)`` tuples that maps of cmlenz@62: extraction method names to extended glob patterns cmlenz@44: :param options_map: a dictionary of additional options (optional) cmlenz@12: :param keywords: a dictionary mapping keywords (i.e. names of functions cmlenz@12: that should be recognized as translation functions) to cmlenz@12: tuples that specify which of their arguments contain cmlenz@12: localizable strings cmlenz@84: :param comment_tags: a list of tags of translator comments to search for cmlenz@84: and include in the results cmlenz@47: :param callback: a function that is called for every file that message are cmlenz@47: extracted from, just before the extraction itself is cmlenz@75: performed; the function is passed the filename, the name cmlenz@75: of the extraction method and and the options dictionary as cmlenz@75: positional arguments, in that order cmlenz@1: :return: an iterator over ``(filename, lineno, funcname, message)`` tuples cmlenz@1: :rtype: ``iterator`` cmlenz@44: :see: `pathmatch` cmlenz@1: """ cmlenz@44: if options_map is None: cmlenz@44: options_map = {} cmlenz@56: cmlenz@44: absname = os.path.abspath(dirname) cmlenz@44: for root, dirnames, filenames in os.walk(absname): cmlenz@44: for subdir in dirnames: cmlenz@44: if subdir.startswith('.') or subdir.startswith('_'): cmlenz@44: dirnames.remove(subdir) cmlenz@44: for filename in filenames: cmlenz@44: filename = relpath( cmlenz@44: os.path.join(root, filename).replace(os.sep, '/'), cmlenz@44: dirname cmlenz@44: ) cmlenz@62: for pattern, method in method_map: cmlenz@44: if pathmatch(pattern, filename): cmlenz@44: filepath = os.path.join(absname, filename) cmlenz@44: options = {} cmlenz@44: for opattern, odict in options_map.items(): cmlenz@44: if pathmatch(opattern, filename): cmlenz@44: options = odict cmlenz@47: if callback: cmlenz@57: callback(filename, method, options) palgarvio@80: for lineno, message, comments in \ palgarvio@80: extract_from_file(method, filepath, palgarvio@80: keywords=keywords, cmlenz@84: comment_tags=comment_tags, palgarvio@80: options=options): palgarvio@80: yield filename, lineno, message, comments cmlenz@57: break cmlenz@1: cmlenz@12: def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, cmlenz@84: comment_tags=(), options=None): cmlenz@1: """Extract messages from a specific file. cmlenz@1: cmlenz@1: This function returns a list of tuples of the form: cmlenz@1: cmlenz@1: ``(lineno, funcname, message)`` cmlenz@1: cmlenz@1: :param filename: the path to the file to extract messages from cmlenz@1: :param method: a string specifying the extraction method (.e.g. "python") cmlenz@12: :param keywords: a dictionary mapping keywords (i.e. names of functions cmlenz@12: that should be recognized as translation functions) to cmlenz@12: tuples that specify which of their arguments contain cmlenz@12: localizable strings cmlenz@84: :param comment_tags: a list of translator tags to search for and include cmlenz@84: in the results cmlenz@1: :param options: a dictionary of additional options (optional) cmlenz@1: :return: the list of extracted messages cmlenz@1: :rtype: `list` cmlenz@1: """ cmlenz@1: fileobj = open(filename, 'U') cmlenz@1: try: cmlenz@84: return list(extract(method, fileobj, keywords, comment_tags, options)) cmlenz@1: finally: cmlenz@1: fileobj.close() cmlenz@1: cmlenz@84: def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), palgarvio@80: options=None): cmlenz@1: """Extract messages from the given file-like object using the specified cmlenz@1: extraction method. cmlenz@1: cmlenz@1: This function returns a list of tuples of the form: cmlenz@1: palgarvio@80: ``(lineno, message, comments)`` cmlenz@1: cmlenz@1: The implementation dispatches the actual extraction to plugins, based on the cmlenz@1: value of the ``method`` parameter. cmlenz@1: cmlenz@1: >>> source = '''# foo module cmlenz@1: ... def run(argv): cmlenz@1: ... print _('Hello, world!') cmlenz@1: ... ''' palgarvio@10: cmlenz@1: >>> from StringIO import StringIO cmlenz@1: >>> for message in extract('python', StringIO(source)): cmlenz@1: ... print message palgarvio@80: (3, 'Hello, world!', []) cmlenz@1: cmlenz@1: :param method: a string specifying the extraction method (.e.g. "python") cmlenz@1: :param fileobj: the file-like object the messages should be extracted from cmlenz@12: :param keywords: a dictionary mapping keywords (i.e. names of functions cmlenz@12: that should be recognized as translation functions) to cmlenz@12: tuples that specify which of their arguments contain cmlenz@12: localizable strings cmlenz@84: :param comment_tags: a list of translator tags to search for and include cmlenz@84: in the results cmlenz@1: :param options: a dictionary of additional options (optional) cmlenz@1: :return: the list of extracted messages cmlenz@1: :rtype: `list` cmlenz@1: :raise ValueError: if the extraction method is not registered cmlenz@1: """ cmlenz@12: from pkg_resources import working_set cmlenz@12: cmlenz@1: for entry_point in working_set.iter_entry_points(GROUP_NAME, method): cmlenz@1: func = entry_point.load(require=True) cmlenz@84: results = func(fileobj, keywords.keys(), comment_tags, cmlenz@84: options=options or {}) cmlenz@84: for lineno, funcname, messages, comments in results: palgarvio@10: if isinstance(messages, (list, tuple)): palgarvio@10: msgs = [] cmlenz@12: for index in keywords[funcname]: cmlenz@12: msgs.append(messages[index - 1]) palgarvio@10: messages = tuple(msgs) palgarvio@10: if len(messages) == 1: palgarvio@10: messages = messages[0] palgarvio@80: yield lineno, messages, comments palgarvio@10: return cmlenz@12: cmlenz@1: raise ValueError('Unknown extraction method %r' % method) cmlenz@1: cmlenz@84: def extract_nothing(fileobj, keywords, comment_tags, options): cmlenz@57: """Pseudo extractor that does not actually extract anything, but simply cmlenz@57: returns an empty list. cmlenz@57: """ cmlenz@57: return [] cmlenz@57: cmlenz@84: def extract_python(fileobj, keywords, comment_tags, options): cmlenz@1: """Extract messages from Python source code. cmlenz@1: cmlenz@1: :param fileobj: the file-like object the messages should be extracted from cmlenz@1: :param keywords: a list of keywords (i.e. function names) that should be cmlenz@1: recognized as translation functions cmlenz@84: :param comment_tags: a list of translator tags to search for and include cmlenz@84: in the results cmlenz@1: :param options: a dictionary of additional options (optional) palgarvio@81: :return: an iterator over ``(lineno, funcname, message, comments)`` tuples cmlenz@1: :rtype: ``iterator`` cmlenz@1: """ cmlenz@1: funcname = None cmlenz@1: lineno = None cmlenz@1: buf = [] cmlenz@1: messages = [] palgarvio@80: translator_comments = [] cmlenz@1: in_args = False palgarvio@80: in_translator_comments = False cmlenz@1: cmlenz@1: tokens = generate_tokens(fileobj.readline) cmlenz@1: for tok, value, (lineno, _), _, _ in tokens: cmlenz@1: if funcname and tok == OP and value == '(': cmlenz@1: in_args = True palgarvio@80: elif tok == COMMENT: palgarvio@92: # Strip the comment token from the line palgarvio@92: value = value[1:].strip() pjenvey@147: if in_translator_comments and \ palgarvio@93: translator_comments[-1][0] == lineno - 1: palgarvio@92: # We're already inside a translator comment, continue appending palgarvio@92: # XXX: Should we check if the programmer keeps adding the palgarvio@92: # comment_tag for every comment line??? probably not! palgarvio@93: translator_comments.append((lineno, value)) palgarvio@92: continue palgarvio@92: # If execution reaches this point, let's see if comment line palgarvio@92: # starts with one of the comment tags palgarvio@85: for comment_tag in comment_tags: palgarvio@92: if value.startswith(comment_tag): pjenvey@147: in_translator_comments = True palgarvio@92: comment = value[len(comment_tag):].strip() palgarvio@93: translator_comments.append((lineno, comment)) palgarvio@92: break cmlenz@1: elif funcname and in_args: cmlenz@1: if tok == OP and value == ')': palgarvio@80: in_args = in_translator_comments = False cmlenz@1: if buf: cmlenz@1: messages.append(''.join(buf)) cmlenz@1: del buf[:] cmlenz@1: if filter(None, messages): cmlenz@1: if len(messages) > 1: cmlenz@1: messages = tuple(messages) cmlenz@1: else: cmlenz@1: messages = messages[0] palgarvio@93: # Comments don't apply unless they immediately preceed the palgarvio@93: # message palgarvio@93: if translator_comments and \ palgarvio@93: translator_comments[-1][0] < lineno - 1: palgarvio@93: translator_comments = [] palgarvio@93: palgarvio@93: yield (lineno, funcname, messages, palgarvio@93: [comment[1] for comment in translator_comments]) cmlenz@1: funcname = lineno = None cmlenz@1: messages = [] palgarvio@80: translator_comments = [] cmlenz@1: elif tok == STRING: cmlenz@36: # Unwrap quotes in a safe manner cmlenz@36: buf.append(eval(value, {'__builtins__':{}}, {})) cmlenz@1: elif tok == OP and value == ',': cmlenz@1: messages.append(''.join(buf)) cmlenz@1: del buf[:] cmlenz@1: elif funcname: cmlenz@1: funcname = None cmlenz@1: elif tok == NAME and value in keywords: cmlenz@1: funcname = value