changeset 375:369300a7ebd3

Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
author cmlenz
date Mon, 07 Jul 2008 14:49:16 +0000
parents edc3428a34de
children a7980c2e77a6
files ChangeLog babel/core.py babel/dates.py babel/localedata.py babel/numbers.py babel/tests/dates.py babel/tests/localedata.py scripts/dump_data.py scripts/import_cldr.py
diffstat 9 files changed, 344 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,11 +11,14 @@
    forms where the translations were empty (ticket #97).
  * The stripping of the comment tags in comments is optional now and
    is done for each line in a comment.
- * A JavaScript message extractor was added.
- * Updated to CLDR 1.5.1.
+ * Added a JavaScript message extractor.
+ * Updated to CLDR 1.6.
  * Fixed timezone calculations when formatting datetime and time values.
  * Added a `get_plural` function into the plurals module that returns the
    correct plural forms for a locale as tuple.
+ * Added support for alias definitions in the CLDR data files, meaning that
+   the chance for items missing in certain locales should be greatly reduced
+   (ticket #68).
 
 
 Version 0.9.2
--- a/babel/core.py
+++ b/babel/core.py
@@ -223,7 +223,7 @@
 
     def _data(self):
         if self.__data is None:
-            self.__data = localedata.load(str(self))
+            self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
         return self.__data
     _data = property(_data)
 
@@ -326,7 +326,7 @@
         Mapping of script codes to translated script names.
         
         >>> Locale('de', 'DE').variants['1901']
-        u'alte deutsche Rechtschreibung'
+        u'Alte deutsche Rechtschreibung'
         
         :type: `dict`
         """)
@@ -481,7 +481,7 @@
         >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
         u'British Summer Time'
         >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
-        u'St. John\u2019s'
+        u"St. John's"
         
         :type: `dict`
         """)
--- a/babel/dates.py
+++ b/babel/dates.py
@@ -185,11 +185,11 @@
     >>> get_timezone_gmt(dt, 'short', locale='en')
     u'-0800'
     
-    The long format depends on the locale, for example in France a different
-    string is used for GMT:
+    The long format depends on the locale, for example in France the acronym
+    UTC string is used instead of GMT:
     
     >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
-    u'HMG-08:00'
+    u'UTC-08:00'
     
     :param datetime: the ``datetime`` object; if `None`, the current date and
                      time in UTC is used
--- a/babel/localedata.py
+++ b/babel/localedata.py
@@ -23,14 +23,16 @@
     import threading
 except ImportError:
     import dummy_threading as threading
+from UserDict import DictMixin
 
-__all__ = ['exists', 'load']
+__all__ = ['exists', 'list', 'load']
 __docformat__ = 'restructuredtext en'
 
 _cache = {}
 _cache_lock = threading.RLock()
 _dirname = os.path.join(os.path.dirname(__file__), 'localedata')
 
+
 def exists(name):
     """Check whether locale data is available for the given locale.
     
@@ -42,6 +44,7 @@
         return True
     return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
 
+
 def list():
     """Return a list of all locale identifiers for which locale data is
     available.
@@ -54,7 +57,8 @@
         os.path.splitext(filename) for filename in os.listdir(_dirname)
     ] if extension == '.dat' and stem != 'root']
 
-def load(name):
+
+def load(name, merge_inherited=True):
     """Load the locale data for the given locale.
     
     The locale data is a dictionary that contains much of the data defined by
@@ -74,6 +78,8 @@
     True
     
     :param name: the locale identifier string (or "root")
+    :param merge_inherited: whether the inherited data should be merged into
+                            the data of the requested locale
     :return: the locale data
     :rtype: `dict`
     :raise `IOError`: if no locale data file is found for the given locale
@@ -84,7 +90,7 @@
         data = _cache.get(name)
         if not data:
             # Load inherited data
-            if name == 'root':
+            if name == 'root' or not merge_inherited:
                 data = {}
             else:
                 parts = name.split('_')
@@ -96,7 +102,7 @@
             filename = os.path.join(_dirname, '%s.dat' % name)
             fileobj = open(filename, 'rb')
             try:
-                if name != 'root':
+                if name != 'root' and merge_inherited:
                     merge(data, pickle.load(fileobj))
                 else:
                     data = pickle.load(fileobj)
@@ -107,17 +113,92 @@
     finally:
         _cache_lock.release()
 
+
 def merge(dict1, dict2):
     """Merge the data from `dict2` into the `dict1` dictionary, making copies
     of nested dictionaries.
     
+    >>> d = {1: 'foo', 3: 'baz'}
+    >>> merge(d, {1: 'Foo', 2: 'Bar'})
+    >>> d
+    {1: 'Foo', 2: 'Bar', 3: 'baz'}
+    
     :param dict1: the dictionary to merge into
     :param dict2: the dictionary containing the data that should be merged
     """
-    for key, value in dict2.items():
-        if value is not None:
-            if type(value) is dict:
-                dict1[key] = dict1.get(key, {}).copy()
-                merge(dict1[key], value)
+    for key, val2 in dict2.items():
+        if val2 is not None:
+            val1 = dict1.get(key)
+            if isinstance(val2, dict):
+                if val1 is None:
+                    val1 = {}
+                if isinstance(val1, Alias):
+                    val1 = (val1, val2)
+                elif isinstance(val1, tuple):
+                    alias, others = val1
+                    others = others.copy()
+                    merge(others, val2)
+                    val1 = (alias, others)
+                else:
+                    val1 = val1.copy()
+                    merge(val1, val2)
             else:
-                dict1[key] = value
+                val1 = val2
+            dict1[key] = val1
+
+
+class Alias(object):
+    """Representation of an alias in the locale data.
+    
+    An alias is a value that refers to some other part of the locale data,
+    as specified by the `keys`.
+    """
+
+    def __init__(self, keys):
+        self.keys = tuple(keys)
+
+    def __repr__(self):
+        return '<%s %r>' % (type(self).__name__, self.keys)
+
+    def resolve(self, data):
+        """Resolve the alias based on the given data.
+        
+        This is done recursively, so if one alias resolves to a second alias,
+        that second alias will also be resolved.
+        
+        :param data: the locale data
+        :type data: `dict`
+        """
+        base = data
+        for key in self.keys:
+            data = data[key]
+        if isinstance(data, Alias):
+            data = data.resolve(base)
+        return data
+
+
+class LocaleDataDict(DictMixin, dict):
+    """Dictionary wrapper that automatically resolves aliases to the actual
+    values.
+    """
+
+    def __init__(self, data, base=None):
+        dict.__init__(self, data)
+        if base is None:
+            base = self
+        self.base = base
+
+    def __getitem__(self, key):
+        val = dict.__getitem__(self, key)
+        if isinstance(val, Alias): # resolve an alias
+            val = val.resolve(self.base)
+        if isinstance(val, tuple): # Merge a partial dict with an alias
+            alias, others = val
+            val = alias.resolve(self.base).copy()
+            merge(val, others)
+        if isinstance(val, dict): # Return a nested alias-resolving dict
+            val = LocaleDataDict(val, base=self.base)
+        return val
+
+    def copy(self):
+        return LocaleDataDict(dict.copy(self), base=self.base)
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -165,9 +165,9 @@
     >>> format_currency(1099.98, 'USD', locale='en_US')
     u'$1,099.98'
     >>> format_currency(1099.98, 'USD', locale='es_CO')
-    u'US$ 1.099,98'
+    u'US$\\xa01.099,98'
     >>> format_currency(1099.98, 'EUR', locale='de_DE')
-    u'1.099,98 \\u20ac'
+    u'1.099,98\\xa0\\u20ac'
     
     The pattern can also be specified explicitly:
     
--- a/babel/tests/dates.py
+++ b/babel/tests/dates.py
@@ -29,6 +29,11 @@
         fmt = dates.DateTimeFormat(d, locale='cs_CZ')
         self.assertEqual('1.', fmt['LLL'])
 
+    def test_abbreviated_month_alias(self):
+        d = date(2006, 3, 8)
+        fmt = dates.DateTimeFormat(d, locale='de_DE')
+        self.assertEqual(u'Mär', fmt['LLL'])
+
     def test_week_of_year_first(self):
         d = date(2006, 1, 8)
         fmt = dates.DateTimeFormat(d, locale='de_DE')
@@ -187,7 +192,7 @@
         tz = timezone('Europe/Paris')
         t = time(15, 30, tzinfo=tz)
         fmt = dates.DateTimeFormat(t, locale='fr_FR')
-        self.assertEqual(u'Heure de l’Europe centrale', fmt['vvvv'])
+        self.assertEqual(u'heure d’Europe centrale', fmt['vvvv'])
 
     def test_hour_formatting(self):
         l = 'en_US'
--- a/babel/tests/localedata.py
+++ b/babel/tests/localedata.py
@@ -16,9 +16,57 @@
 
 from babel import localedata
 
+
+class MergeResolveTestCase(unittest.TestCase):
+
+    def test_merge_items(self):
+        d = {1: 'foo', 3: 'baz'}
+        localedata.merge(d, {1: 'Foo', 2: 'Bar'})
+        self.assertEqual({1: 'Foo', 2: 'Bar', 3: 'baz'}, d)
+
+    def test_merge_nested_dict(self):
+        d1 = {'x': {'a': 1, 'b': 2, 'c': 3}}
+        d2 = {'x': {'a': 1, 'b': 12, 'd': 14}}
+        localedata.merge(d1, d2)
+        self.assertEqual({
+            'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}
+        }, d1)
+
+    def test_merge_nested_dict_no_overlap(self):
+        d1 = {'x': {'a': 1, 'b': 2}}
+        d2 = {'y': {'a': 11, 'b': 12}}
+        localedata.merge(d1, d2)
+        self.assertEqual({
+            'x': {'a': 1, 'b': 2},
+            'y': {'a': 11, 'b': 12}
+        }, d1)
+
+    def test_merge_with_alias_and_resolve(self):
+        alias = localedata.Alias('x')
+        d1 = {
+            'x': {'a': 1, 'b': 2, 'c': 3},
+            'y': alias
+        }
+        d2 = {
+            'x': {'a': 1, 'b': 12, 'd': 14},
+            'y': {'b': 22, 'e': 25}
+        }
+        localedata.merge(d1, d2)
+        self.assertEqual({
+            'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14},
+            'y': (alias, {'b': 22, 'e': 25})
+        }, d1)
+        d = localedata.LocaleDataDict(d1)
+        self.assertEqual({
+            'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14},
+            'y': {'a': 1, 'b': 22, 'c': 3, 'd': 14, 'e': 25}
+        }, dict(d.items()))
+
+
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(doctest.DocTestSuite(localedata))
+    suite.addTest(unittest.makeSuite(MergeResolveTestCase))
     return suite
 
 if __name__ == '__main__':
--- a/scripts/dump_data.py
+++ b/scripts/dump_data.py
@@ -12,12 +12,32 @@
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://babel.edgewall.org/log/.
 
+from optparse import OptionParser
 from pprint import pprint
 import sys
 
-from babel.localedata import load
+from babel.localedata import load, LocaleDataDict
 
-if len(sys.argv) > 2:
-    pprint(load(sys.argv[1]).get(sys.argv[2]))
-else:
-    pprint(load(sys.argv[1]))
+
+def main():
+    parser = OptionParser(usage='%prog [options] locale [path]')
+    parser.add_option('--noinherit', action='store_false', dest='inherit',
+                      help='do not merge inherited data into locale data')
+    parser.add_option('--resolve', action='store_true', dest='resolve',
+                      help='resolve aliases in locale data')
+    parser.set_defaults(inherit=True, resolve=False)
+    options, args = parser.parse_args()
+    if len(args) not in (1, 2):
+        parser.error('incorrect number of arguments')
+
+    data = load(args[0], merge_inherited=options.inherit)
+    if options.resolve:
+        data = LocaleDataDict(data)
+    if len(args) > 1:
+        for key in args[1].split('.'):
+            data = data[key]
+    pprint(dict(data.items()))
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -16,6 +16,7 @@
 from optparse import OptionParser
 import os
 import pickle
+import re
 import sys
 try:
     from xml.etree.ElementTree import parse
@@ -26,6 +27,7 @@
 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
 
 from babel import dates, numbers
+from babel.localedata import Alias
 
 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
             'sun': 6}
@@ -36,6 +38,7 @@
     def any(iterable):
         return filter(None, list(iterable))
 
+
 def _text(elem):
     buf = [elem.text or '']
     for child in elem:
@@ -43,6 +46,35 @@
     buf.append(elem.tail or '')
     return u''.join(filter(None, buf)).strip()
 
+
+NAME_RE = re.compile(r"^\w+$")
+TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$")
+
+NAME_MAP = {
+    'dateFormats': 'date_formats',
+    'dateTimeFormats': 'datetime_formats',
+    'eraAbbr': 'abbreviated',
+    'eraNames': 'wide',
+    'eraNarrow': 'narrow',
+    'timeFormats': 'time_formats'
+}
+
+def _translate_alias(ctxt, path):
+    parts = path.split('/')
+    keys = ctxt[:]
+    for part in parts:
+        if part == '..':
+            keys.pop()
+        else:
+            match = TYPE_ATTR_RE.match(part)
+            if match:
+                keys.append(match.group(1))
+            else:
+                assert NAME_RE.match(part)
+                keys.append(NAME_MAP.get(part, part))
+    return keys
+
+
 def main():
     parser = OptionParser(usage='%prog path/to/cldr')
     options, args = parser.parse_args()
@@ -109,6 +141,8 @@
         stem, ext = os.path.splitext(filename)
         if ext != '.xml':
             continue
+        #if stem != 'root':
+        #    break
 
         tree = parse(os.path.join(srcdir, 'main', filename))
         data = {}
@@ -133,25 +167,29 @@
 
         territories = data.setdefault('territories', {})
         for elem in tree.findall('//territories/territory'):
-            if 'draft' in elem.attrib and elem.attrib['type'] in territories:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib['type'] in territories:
                 continue
             territories[elem.attrib['type']] = _text(elem)
 
         languages = data.setdefault('languages', {})
         for elem in tree.findall('//languages/language'):
-            if 'draft' in elem.attrib and elem.attrib['type'] in languages:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib['type'] in languages:
                 continue
             languages[elem.attrib['type']] = _text(elem)
 
         variants = data.setdefault('variants', {})
         for elem in tree.findall('//variants/variant'):
-            if 'draft' in elem.attrib and elem.attrib['type'] in variants:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib['type'] in variants:
                 continue
             variants[elem.attrib['type']] = _text(elem)
 
         scripts = data.setdefault('scripts', {})
         for elem in tree.findall('//scripts/script'):
-            if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib['type'] in scripts:
                 continue
             scripts[elem.attrib['type']] = _text(elem)
 
@@ -182,15 +220,15 @@
 
         zone_formats = data.setdefault('zone_formats', {})
         for elem in tree.findall('//timeZoneNames/gmtFormat'):
-            if 'draft' not in elem.attrib:
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
                 break
         for elem in tree.findall('//timeZoneNames/regionFormat'):
-            if 'draft' not in elem.attrib:
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
                 break
         for elem in tree.findall('//timeZoneNames/fallbackFormat'):
-            if 'draft' not in elem.attrib:
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
                 zone_formats['fallback'] = unicode(elem.text) \
                     .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                 break
@@ -227,88 +265,141 @@
 
             months = data.setdefault('months', {})
             for ctxt in calendar.findall('months/monthContext'):
-                ctxts = months.setdefault(ctxt.attrib['type'], {})
+                ctxt_type = ctxt.attrib['type']
+                ctxts = months.setdefault(ctxt_type, {})
                 for width in ctxt.findall('monthWidth'):
-                    widths = ctxts.setdefault(width.attrib['type'], {})
-                    for elem in width.findall('month'):
-                        if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
-                            continue
-                        widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+                    width_type = width.attrib['type']
+                    widths = ctxts.setdefault(width_type, {})
+                    for elem in width.getiterator():
+                        if elem.tag == 'month':
+                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                                    and int(elem.attrib['type']) in widths:
+                                continue
+                            widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+                        elif elem.tag == 'alias':
+                            ctxts[width_type] = Alias(
+                                _translate_alias(['months', ctxt_type, width_type],
+                                                 elem.attrib['path'])
+                            )
 
             days = data.setdefault('days', {})
             for ctxt in calendar.findall('days/dayContext'):
-                ctxts = days.setdefault(ctxt.attrib['type'], {})
+                ctxt_type = ctxt.attrib['type']
+                ctxts = days.setdefault(ctxt_type, {})
                 for width in ctxt.findall('dayWidth'):
-                    widths = ctxts.setdefault(width.attrib['type'], {})
-                    for elem in width.findall('day'):
-                        dtype = weekdays[elem.attrib['type']]
-                        if 'draft' in elem.attrib and dtype in widths:
-                            continue
-                        widths[dtype] = unicode(elem.text)
+                    width_type = width.attrib['type']
+                    widths = ctxts.setdefault(width_type, {})
+                    for elem in width.getiterator():
+                        if elem.tag == 'day':
+                            dtype = weekdays[elem.attrib['type']]
+                            if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
+                                    and dtype in widths:
+                                continue
+                            widths[dtype] = unicode(elem.text)
+                        elif elem.tag == 'alias':
+                            ctxts[width_type] = Alias(
+                                _translate_alias(['days', ctxt_type, width_type],
+                                                 elem.attrib['path'])
+                            )
 
             quarters = data.setdefault('quarters', {})
             for ctxt in calendar.findall('quarters/quarterContext'):
+                ctxt_type = ctxt.attrib['type']
                 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
                 for width in ctxt.findall('quarterWidth'):
-                    widths = ctxts.setdefault(width.attrib['type'], {})
-                    for elem in width.findall('quarter'):
-                        if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
-                            continue
-                        widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+                    width_type = width.attrib['type']
+                    widths = ctxts.setdefault(width_type, {})
+                    for elem in width.getiterator():
+                        if elem.tag == 'quarter':
+                            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                                    and int(elem.attrib['type']) in widths:
+                                continue
+                            widths[int(elem.attrib['type'])] = unicode(elem.text)
+                        elif elem.tag == 'alias':
+                            ctxts[width_type] = Alias(
+                                _translate_alias(['quarters', ctxt_type, width_type],
+                                                 elem.attrib['path'])
+                            )
 
             eras = data.setdefault('eras', {})
             for width in calendar.findall('eras/*'):
-                ewidth = {
-                    'eraAbbr': 'abbreviated',
-                    'eraNames': 'wide',
-                    'eraNarrow': 'narrow',
-                }[width.tag]
-                widths = eras.setdefault(ewidth, {})
-                for elem in width.findall('era'):
-                    if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
-                        continue
-                    widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+                width_type = NAME_MAP[width.tag]
+                widths = eras.setdefault(width_type, {})
+                for elem in width.getiterator():
+                    if elem.tag == 'era':
+                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                                and int(elem.attrib['type']) in widths:
+                            continue
+                        widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+                    elif elem.tag == 'alias':
+                        eras[width_type] = Alias(
+                            _translate_alias(['eras', width_type],
+                                             elem.attrib['path'])
+                        )
 
             # AM/PM
             periods = data.setdefault('periods', {})
             for elem in calendar.findall('am'):
-                if 'draft' in elem.attrib and elem.tag in periods:
+                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                        and elem.tag in periods:
                     continue
                 periods[elem.tag] = unicode(elem.text)
             for elem in calendar.findall('pm'):
-                if 'draft' in elem.attrib and elem.tag in periods:
+                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                        and elem.tag in periods:
                     continue
                 periods[elem.tag] = unicode(elem.text)
 
             date_formats = data.setdefault('date_formats', {})
-            for elem in calendar.findall('dateFormats/dateFormatLength'):
-                if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
-                    continue
-                try:
-                    date_formats[elem.attrib.get('type')] = \
-                        dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
-                except ValueError, e:
-                    print>>sys.stderr, 'ERROR: %s' % e
+            for format in calendar.findall('dateFormats'):
+                for elem in format.getiterator():
+                    if elem.tag == 'dateFormatLength':
+                        if 'draft' in elem.attrib and \
+                                elem.attrib.get('type') in date_formats:
+                            continue
+                        try:
+                            date_formats[elem.attrib.get('type')] = \
+                                dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
+                        except ValueError, e:
+                            print>>sys.stderr, 'ERROR: %s' % e
+                    elif elem.tag == 'alias':
+                        date_formats = Alias(_translate_alias(
+                            ['date_formats'], elem.attrib['path'])
+                        )
 
             time_formats = data.setdefault('time_formats', {})
-            for elem in calendar.findall('timeFormats/timeFormatLength'):
-                if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
-                    continue
-                try:
-                    time_formats[elem.attrib.get('type')] = \
-                        dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
-                except ValueError, e:
-                    print>>sys.stderr, 'ERROR: %s' % e
+            for format in calendar.findall('timeFormats'):
+                for elem in format.getiterator():
+                    if elem.tag == 'timeFormatLength':
+                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                                and elem.attrib.get('type') in time_formats:
+                            continue
+                        try:
+                            time_formats[elem.attrib.get('type')] = \
+                                dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
+                        except ValueError, e:
+                            print>>sys.stderr, 'ERROR: %s' % e
+                    elif elem.tag == 'alias':
+                        time_formats = Alias(_translate_alias(
+                            ['time_formats'], elem.attrib['path'])
+                        )
 
             datetime_formats = data.setdefault('datetime_formats', {})
-            for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
-                if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
-                    continue
-                try:
-                    datetime_formats[elem.attrib.get('type')] = \
-                        unicode(elem.findtext('dateTimeFormat/pattern'))
-                except ValueError, e:
-                    print>>sys.stderr, 'ERROR: %s' % e
+            for format in calendar.findall('dateTimeFormats'):
+                for elem in format.getiterator():
+                    if elem.tag == 'dateTimeFormatLength':
+                        if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                                and elem.attrib.get('type') in datetime_formats:
+                            continue
+                        try:
+                            datetime_formats[elem.attrib.get('type')] = \
+                                unicode(elem.findtext('dateTimeFormat/pattern'))
+                        except ValueError, e:
+                            print>>sys.stderr, 'ERROR: %s' % e
+                    elif elem.tag == 'alias':
+                        datetime_formats = Alias(_translate_alias(
+                            ['datetime_formats'], elem.attrib['path'])
+                        )
 
         # <numbers>
 
@@ -318,28 +409,32 @@
 
         decimal_formats = data.setdefault('decimal_formats', {})
         for elem in tree.findall('//decimalFormats/decimalFormatLength'):
-            if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib.get('type') in decimal_formats:
                 continue
             pattern = unicode(elem.findtext('decimalFormat/pattern'))
             decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
         scientific_formats = data.setdefault('scientific_formats', {})
         for elem in tree.findall('//scientificFormats/scientificFormatLength'):
-            if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib.get('type') in scientific_formats:
                 continue
             pattern = unicode(elem.findtext('scientificFormat/pattern'))
             scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
         currency_formats = data.setdefault('currency_formats', {})
         for elem in tree.findall('//currencyFormats/currencyFormatLength'):
-            if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib.get('type') in currency_formats:
                 continue
             pattern = unicode(elem.findtext('currencyFormat/pattern'))
             currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
         percent_formats = data.setdefault('percent_formats', {})
         for elem in tree.findall('//percentFormats/percentFormatLength'):
-            if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+                    and elem.attrib.get('type') in percent_formats:
                 continue
             pattern = unicode(elem.findtext('percentFormat/pattern'))
             percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
@@ -360,5 +455,6 @@
         finally:
             outfile.close()
 
+
 if __name__ == '__main__':
     main()
Copyright (C) 2012-2017 Edgewall Software