# HG changeset patch
# User cmlenz
# Date 1181151635 0
# Node ID a524b547ea7e73106a0159b852e7b58aada62516
# Parent  39df4248662e91e531450024ef3fd84a8ee37292
Some work towards #4.

diff --git a/babel/catalog/extract.py b/babel/catalog/extract.py
--- a/babel/catalog/extract.py
+++ b/babel/catalog/extract.py
@@ -22,10 +22,14 @@
 """
 
 import os
+try:
+    set
+except NameError:
+    from sets import Set as set
 import sys
 from tokenize import generate_tokens, NAME, OP, STRING
 
-from babel.util import extended_glob
+from babel.util import pathmatch, relpath
 
 __all__ = ['extract', 'extract_from_dir', 'extract_from_file']
 __docformat__ = 'restructuredtext en'
@@ -43,62 +47,95 @@
 }
 
 DEFAULT_MAPPING = {
-    'genshi': ['*.html', '**/*.html'],
-    'python': ['*.py', '**/*.py']
+    '**.html': 'genshi',
+    '**.py': 'python'
 }
 
 
-def extract_from_dir(dirname, mapping=DEFAULT_MAPPING,
-                     keywords=DEFAULT_KEYWORDS, options=None):
+def extract_from_dir(dirname, method_map=DEFAULT_MAPPING,
+                     options_map=None, keywords=DEFAULT_KEYWORDS):
     """Extract messages from any source files found in the given directory.
     
     This function generates tuples of the form:
     
         ``(filename, lineno, funcname, message)``
     
-    Which extraction method used is per file is determined by the `mapping`
-    parameter, which maps extraction method names to lists of extended glob
-    patterns. For example, the following is the default mapping:
+    Which extraction method is used per file is determined by the `method_map`
+    parameter, which maps extended glob patterns to extraction method names.
+    For example, the following is the default mapping:
     
-    >>> mapping = {
-    ...     'python': ['*.py', '**/*.py']
+    >>> method_map = {
+    ...     '**.py': 'python'
     ... }
     
     This basically says that files with the filename extension ".py" at any
     level inside the directory should be processed by the "python" extraction
-    method. Files that don't match any of the patterns are ignored.
+    method. Files that don't match any of the mapping patterns are ignored. See
+    the documentation of the `pathmatch` function for details on the pattern
+    syntax.
     
     The following extended mapping would also use the "genshi" extraction method
     on any file in "templates" subdirectory:
     
-    >>> mapping = {
-    ...     'genshi': ['**/templates/*.*', '**/templates/**/*.*'],
-    ...     'python': ['*.py', '**/*.py']
+    >>> method_map = {
+    ...     '**/templates/**.*': 'genshi',
+    ...     '**.py': 'python'
+    ... }
+    
+    The dictionary provided by the optional `options_map` parameter augments
+    the mapping data. It too uses extended glob patterns as keys, but the values
+    are dictionaries mapping options names to option values (both strings).
+    
+    The glob patterns of the `options_map` do not necessarily need to be the
+    same as those used in the pattern. For example, while all files in the
+    ``templates`` folders in an application may be Genshi applications, the
+    options for those files may differ based on extension:
+    
+    >>> options_map = {
+    ...     '**/templates/**.txt': {
+    ...         'template_class': 'genshi.template.text.TextTemplate',
+    ...         'encoding': 'latin-1'
+    ...     },
+    ...     '**/templates/**.html': {
+    ...         'include_attrs': ''
+    ...     }
     ... }
     
     :param dirname: the path to the directory to extract messages from
-    :param mapping: a mapping of extraction method names to extended glob
-                    patterns
+    :param method_map: a mapping of extraction method names to extended glob
+                       patterns
+    :param options_map: a dictionary of additional options (optional)
     :param keywords: a dictionary mapping keywords (i.e. names of functions
                      that should be recognized as translation functions) to
                      tuples that specify which of their arguments contain
                      localizable strings
-    :param options: a dictionary of additional options (optional)
     :return: an iterator over ``(filename, lineno, funcname, message)`` tuples
     :rtype: ``iterator``
+    :see: `pathmatch`
     """
-    extracted_files = {}
-    for method, patterns in mapping.items():
-        for pattern in patterns:
-            for filename in extended_glob(pattern, dirname):
-                if filename in extracted_files:
-                    continue
-                filepath = os.path.join(dirname, filename)
-                for line, func, key in extract_from_file(method, filepath,
-                                                         keywords=keywords,
-                                                         options=options):
-                    yield filename, line, func, key
-                extracted_files[filename] = True
+    if options_map is None:
+        options_map = {}
+    absname = os.path.abspath(dirname)
+    for root, dirnames, filenames in os.walk(absname):
+        for subdir in dirnames:
+            if subdir.startswith('.') or subdir.startswith('_'):
+                dirnames.remove(subdir)
+        for filename in filenames:
+            filename = relpath(
+                os.path.join(root, filename).replace(os.sep, '/'),
+                dirname
+            )
+            for pattern, method in method_map.items():
+                if pathmatch(pattern, filename):
+                    filepath = os.path.join(absname, filename)
+                    options = {}
+                    for opattern, odict in options_map.items():
+                        if pathmatch(opattern, filename):
+                            options = odict
+                    for line, func, key in extract_from_file(method, filepath,
+                                                             keywords=keywords,
+                                                             options=options):
+                        yield filepath, line, func, key
 
 def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
                       options=None):
diff --git a/babel/util.py b/babel/util.py
--- a/babel/util.py
+++ b/babel/util.py
@@ -17,29 +17,45 @@
 import os
 import re
 
-__all__ = ['extended_glob', 'relpath', 'LazyProxy', 'UTC']
+__all__ = ['pathmatch', 'relpath', 'LazyProxy', 'UTC']
 __docformat__ = 'restructuredtext en'
 
-def extended_glob(pattern, dirname=''):
-    """Extended pathname pattern expansion.
+def pathmatch(pattern, filename):
+    """Extended pathname pattern matching.
     
-    This function is similar to what is provided by the ``glob`` module in the
-    Python standard library, but also supports a convenience pattern ("**") to
-    match files at any directory level.
+    This function is similar to what is provided by the ``fnmatch`` module in
+    the Python standard library, but:
+    
+     * can match complete (relative or absolute) path names, and not just file
+       names, and
+     * also supports a convenience pattern ("**") to match files at any
+       directory level.
+    
+    Examples:
+    
+    >>> pathmatch('**.py', 'bar.py')
+    True
+    >>> pathmatch('**.py', 'foo/bar/baz.py')
+    True
+    >>> pathmatch('**.py', 'templates/index.html')
+    False
+    >>> pathmatch('**/templates/*.html', 'templates/index.html')
+    True
+    >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
+    False
     
     :param pattern: the glob pattern
-    :param dirname: the path to the directory in which to search for files
-                     matching the given pattern
-    :return: an iterator over the absolute filenames of any matching files
-    :rtype: ``iterator``
+    :param filename: the path name of the file to match against
+    :return: `True` if the path name matches the pattern, `False` otherwise
+    :rtype: `bool`
     """
     symbols = {
         '?':   '[^/]',
         '?/':  '[^/]/',
         '*':   '[^/]+',
         '*/':  '[^/]+/',
-        '**':  '(?:.+/)*?',
         '**/': '(?:.+/)*?',
+        '**':  '(?:.+/)*?[^/]+',
     }
     buf = []
     for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
@@ -47,20 +63,7 @@
             buf.append(symbols[part])
         elif part:
             buf.append(re.escape(part))
-    regex = re.compile(''.join(buf) + '$')
-
-    absname = os.path.abspath(dirname)
-    for root, dirnames, filenames in os.walk(absname):
-        for subdir in dirnames:
-            if subdir.startswith('.') or subdir.startswith('_'):
-                dirnames.remove(subdir)
-        for filename in filenames:
-            filepath = relpath(
-                os.path.join(root, filename).replace(os.sep, '/'),
-                dirname
-            )
-            if regex.match(filepath):
-                yield filepath
+    return re.match(''.join(buf) + '$', filename) is not None
 
 
 class LazyProxy(object):
@@ -199,6 +202,13 @@
     def relpath(path, start='.'):
         """Compute the relative path to one path from another.
         
+        >>> relpath('foo/bar.txt', '')
+        'foo/bar.txt'
+        >>> relpath('foo/bar.txt', 'foo')
+        'bar.txt'
+        >>> relpath('foo/bar.txt', 'baz')
+        '../foo/bar.txt'
+        
         :return: the relative path
         :rtype: `basestring`
         """