# HG changeset patch # User cmlenz # Date 1181151635 0 # Node ID a524b547ea7e73106a0159b852e7b58aada62516 # Parent 39df4248662e91e531450024ef3fd84a8ee37292 Some work towards #4. diff --git a/babel/catalog/extract.py b/babel/catalog/extract.py --- a/babel/catalog/extract.py +++ b/babel/catalog/extract.py @@ -22,10 +22,14 @@ """ import os +try: + set +except NameError: + from sets import Set as set import sys from tokenize import generate_tokens, NAME, OP, STRING -from babel.util import extended_glob +from babel.util import pathmatch, relpath __all__ = ['extract', 'extract_from_dir', 'extract_from_file'] __docformat__ = 'restructuredtext en' @@ -43,62 +47,95 @@ } DEFAULT_MAPPING = { - 'genshi': ['*.html', '**/*.html'], - 'python': ['*.py', '**/*.py'] + '**.html': 'genshi', + '**.py': 'python' } -def extract_from_dir(dirname, mapping=DEFAULT_MAPPING, - keywords=DEFAULT_KEYWORDS, options=None): +def extract_from_dir(dirname, method_map=DEFAULT_MAPPING, + options_map=None, keywords=DEFAULT_KEYWORDS): """Extract messages from any source files found in the given directory. This function generates tuples of the form: ``(filename, lineno, funcname, message)`` - Which extraction method used is per file is determined by the `mapping` - parameter, which maps extraction method names to lists of extended glob - patterns. For example, the following is the default mapping: + Which extraction method is used per file is determined by the `method_map` + parameter, which maps extended glob patterns to extraction method names. + For example, the following is the default mapping: - >>> mapping = { - ... 'python': ['*.py', '**/*.py'] + >>> method_map = { + ... '**.py': 'python' ... } This basically says that files with the filename extension ".py" at any level inside the directory should be processed by the "python" extraction - method. Files that don't match any of the patterns are ignored. + method. Files that don't match any of the mapping patterns are ignored. See + the documentation of the `pathmatch` function for details on the pattern + syntax. The following extended mapping would also use the "genshi" extraction method on any file in "templates" subdirectory: - >>> mapping = { - ... 'genshi': ['**/templates/*.*', '**/templates/**/*.*'], - ... 'python': ['*.py', '**/*.py'] + >>> method_map = { + ... '**/templates/**.*': 'genshi', + ... '**.py': 'python' + ... } + + The dictionary provided by the optional `options_map` parameter augments + the mapping data. It too uses extended glob patterns as keys, but the values + are dictionaries mapping options names to option values (both strings). + + The glob patterns of the `options_map` do not necessarily need to be the + same as those used in the pattern. For example, while all files in the + ``templates`` folders in an application may be Genshi applications, the + options for those files may differ based on extension: + + >>> options_map = { + ... '**/templates/**.txt': { + ... 'template_class': 'genshi.template.text.TextTemplate', + ... 'encoding': 'latin-1' + ... }, + ... '**/templates/**.html': { + ... 'include_attrs': '' + ... } ... } :param dirname: the path to the directory to extract messages from - :param mapping: a mapping of extraction method names to extended glob - patterns + :param method_map: a mapping of extraction method names to extended glob + patterns + :param options_map: a dictionary of additional options (optional) :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to tuples that specify which of their arguments contain localizable strings - :param options: a dictionary of additional options (optional) :return: an iterator over ``(filename, lineno, funcname, message)`` tuples :rtype: ``iterator`` + :see: `pathmatch` """ - extracted_files = {} - for method, patterns in mapping.items(): - for pattern in patterns: - for filename in extended_glob(pattern, dirname): - if filename in extracted_files: - continue - filepath = os.path.join(dirname, filename) - for line, func, key in extract_from_file(method, filepath, - keywords=keywords, - options=options): - yield filename, line, func, key - extracted_files[filename] = True + if options_map is None: + options_map = {} + absname = os.path.abspath(dirname) + for root, dirnames, filenames in os.walk(absname): + for subdir in dirnames: + if subdir.startswith('.') or subdir.startswith('_'): + dirnames.remove(subdir) + for filename in filenames: + filename = relpath( + os.path.join(root, filename).replace(os.sep, '/'), + dirname + ) + for pattern, method in method_map.items(): + if pathmatch(pattern, filename): + filepath = os.path.join(absname, filename) + options = {} + for opattern, odict in options_map.items(): + if pathmatch(opattern, filename): + options = odict + for line, func, key in extract_from_file(method, filepath, + keywords=keywords, + options=options): + yield filepath, line, func, key def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, options=None): diff --git a/babel/util.py b/babel/util.py --- a/babel/util.py +++ b/babel/util.py @@ -17,29 +17,45 @@ import os import re -__all__ = ['extended_glob', 'relpath', 'LazyProxy', 'UTC'] +__all__ = ['pathmatch', 'relpath', 'LazyProxy', 'UTC'] __docformat__ = 'restructuredtext en' -def extended_glob(pattern, dirname=''): - """Extended pathname pattern expansion. +def pathmatch(pattern, filename): + """Extended pathname pattern matching. - This function is similar to what is provided by the ``glob`` module in the - Python standard library, but also supports a convenience pattern ("**") to - match files at any directory level. + This function is similar to what is provided by the ``fnmatch`` module in + the Python standard library, but: + + * can match complete (relative or absolute) path names, and not just file + names, and + * also supports a convenience pattern ("**") to match files at any + directory level. + + Examples: + + >>> pathmatch('**.py', 'bar.py') + True + >>> pathmatch('**.py', 'foo/bar/baz.py') + True + >>> pathmatch('**.py', 'templates/index.html') + False + >>> pathmatch('**/templates/*.html', 'templates/index.html') + True + >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html') + False :param pattern: the glob pattern - :param dirname: the path to the directory in which to search for files - matching the given pattern - :return: an iterator over the absolute filenames of any matching files - :rtype: ``iterator`` + :param filename: the path name of the file to match against + :return: `True` if the path name matches the pattern, `False` otherwise + :rtype: `bool` """ symbols = { '?': '[^/]', '?/': '[^/]/', '*': '[^/]+', '*/': '[^/]+/', - '**': '(?:.+/)*?', '**/': '(?:.+/)*?', + '**': '(?:.+/)*?[^/]+', } buf = [] for idx, part in enumerate(re.split('([?*]+/?)', pattern)): @@ -47,20 +63,7 @@ buf.append(symbols[part]) elif part: buf.append(re.escape(part)) - regex = re.compile(''.join(buf) + '$') - - absname = os.path.abspath(dirname) - for root, dirnames, filenames in os.walk(absname): - for subdir in dirnames: - if subdir.startswith('.') or subdir.startswith('_'): - dirnames.remove(subdir) - for filename in filenames: - filepath = relpath( - os.path.join(root, filename).replace(os.sep, '/'), - dirname - ) - if regex.match(filepath): - yield filepath + return re.match(''.join(buf) + '$', filename) is not None class LazyProxy(object): @@ -199,6 +202,13 @@ def relpath(path, start='.'): """Compute the relative path to one path from another. + >>> relpath('foo/bar.txt', '') + 'foo/bar.txt' + >>> relpath('foo/bar.txt', 'foo') + 'bar.txt' + >>> relpath('foo/bar.txt', 'baz') + '../foo/bar.txt' + :return: the relative path :rtype: `basestring` """