# HG changeset patch # User cmlenz # Date 1215442156 0 # Node ID 4eca63af0a12eafd72c5485930a9c4822dfb7c62 # Parent c79e7d5bb3a6b2c4fbc5f67db490491c73a56ee8 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -11,11 +11,14 @@ forms where the translations were empty (ticket #97). * The stripping of the comment tags in comments is optional now and is done for each line in a comment. - * A JavaScript message extractor was added. - * Updated to CLDR 1.5.1. + * Added a JavaScript message extractor. + * Updated to CLDR 1.6. * Fixed timezone calculations when formatting datetime and time values. * Added a `get_plural` function into the plurals module that returns the correct plural forms for a locale as tuple. + * Added support for alias definitions in the CLDR data files, meaning that + the chance for items missing in certain locales should be greatly reduced + (ticket #68). Version 0.9.2 diff --git a/babel/core.py b/babel/core.py --- a/babel/core.py +++ b/babel/core.py @@ -223,7 +223,7 @@ def _data(self): if self.__data is None: - self.__data = localedata.load(str(self)) + self.__data = localedata.LocaleDataDict(localedata.load(str(self))) return self.__data _data = property(_data) @@ -326,7 +326,7 @@ Mapping of script codes to translated script names. >>> Locale('de', 'DE').variants['1901'] - u'alte deutsche Rechtschreibung' + u'Alte deutsche Rechtschreibung' :type: `dict` """) @@ -481,7 +481,7 @@ >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] u'British Summer Time' >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] - u'St. John\u2019s' + u"St. John's" :type: `dict` """) diff --git a/babel/dates.py b/babel/dates.py --- a/babel/dates.py +++ b/babel/dates.py @@ -185,11 +185,11 @@ >>> get_timezone_gmt(dt, 'short', locale='en') u'-0800' - The long format depends on the locale, for example in France a different - string is used for GMT: + The long format depends on the locale, for example in France the acronym + UTC string is used instead of GMT: >>> get_timezone_gmt(dt, 'long', locale='fr_FR') - u'HMG-08:00' + u'UTC-08:00' :param datetime: the ``datetime`` object; if `None`, the current date and time in UTC is used diff --git a/babel/localedata.py b/babel/localedata.py --- a/babel/localedata.py +++ b/babel/localedata.py @@ -23,14 +23,16 @@ import threading except ImportError: import dummy_threading as threading +from UserDict import DictMixin -__all__ = ['exists', 'load'] +__all__ = ['exists', 'list', 'load'] __docformat__ = 'restructuredtext en' _cache = {} _cache_lock = threading.RLock() _dirname = os.path.join(os.path.dirname(__file__), 'localedata') + def exists(name): """Check whether locale data is available for the given locale. @@ -42,6 +44,7 @@ return True return os.path.exists(os.path.join(_dirname, '%s.dat' % name)) + def list(): """Return a list of all locale identifiers for which locale data is available. @@ -54,7 +57,8 @@ os.path.splitext(filename) for filename in os.listdir(_dirname) ] if extension == '.dat' and stem != 'root'] -def load(name): + +def load(name, merge_inherited=True): """Load the locale data for the given locale. The locale data is a dictionary that contains much of the data defined by @@ -74,6 +78,8 @@ True :param name: the locale identifier string (or "root") + :param merge_inherited: whether the inherited data should be merged into + the data of the requested locale :return: the locale data :rtype: `dict` :raise `IOError`: if no locale data file is found for the given locale @@ -84,7 +90,7 @@ data = _cache.get(name) if not data: # Load inherited data - if name == 'root': + if name == 'root' or not merge_inherited: data = {} else: parts = name.split('_') @@ -96,7 +102,7 @@ filename = os.path.join(_dirname, '%s.dat' % name) fileobj = open(filename, 'rb') try: - if name != 'root': + if name != 'root' and merge_inherited: merge(data, pickle.load(fileobj)) else: data = pickle.load(fileobj) @@ -107,17 +113,92 @@ finally: _cache_lock.release() + def merge(dict1, dict2): """Merge the data from `dict2` into the `dict1` dictionary, making copies of nested dictionaries. + >>> d = {1: 'foo', 3: 'baz'} + >>> merge(d, {1: 'Foo', 2: 'Bar'}) + >>> d + {1: 'Foo', 2: 'Bar', 3: 'baz'} + :param dict1: the dictionary to merge into :param dict2: the dictionary containing the data that should be merged """ - for key, value in dict2.items(): - if value is not None: - if type(value) is dict: - dict1[key] = dict1.get(key, {}).copy() - merge(dict1[key], value) + for key, val2 in dict2.items(): + if val2 is not None: + val1 = dict1.get(key) + if isinstance(val2, dict): + if val1 is None: + val1 = {} + if isinstance(val1, Alias): + val1 = (val1, val2) + elif isinstance(val1, tuple): + alias, others = val1 + others = others.copy() + merge(others, val2) + val1 = (alias, others) + else: + val1 = val1.copy() + merge(val1, val2) else: - dict1[key] = value + val1 = val2 + dict1[key] = val1 + + +class Alias(object): + """Representation of an alias in the locale data. + + An alias is a value that refers to some other part of the locale data, + as specified by the `keys`. + """ + + def __init__(self, keys): + self.keys = tuple(keys) + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.keys) + + def resolve(self, data): + """Resolve the alias based on the given data. + + This is done recursively, so if one alias resolves to a second alias, + that second alias will also be resolved. + + :param data: the locale data + :type data: `dict` + """ + base = data + for key in self.keys: + data = data[key] + if isinstance(data, Alias): + data = data.resolve(base) + return data + + +class LocaleDataDict(DictMixin, dict): + """Dictionary wrapper that automatically resolves aliases to the actual + values. + """ + + def __init__(self, data, base=None): + dict.__init__(self, data) + if base is None: + base = self + self.base = base + + def __getitem__(self, key): + val = dict.__getitem__(self, key) + if isinstance(val, Alias): # resolve an alias + val = val.resolve(self.base) + if isinstance(val, tuple): # Merge a partial dict with an alias + alias, others = val + val = alias.resolve(self.base).copy() + merge(val, others) + if isinstance(val, dict): # Return a nested alias-resolving dict + val = LocaleDataDict(val, base=self.base) + return val + + def copy(self): + return LocaleDataDict(dict.copy(self), base=self.base) diff --git a/babel/numbers.py b/babel/numbers.py --- a/babel/numbers.py +++ b/babel/numbers.py @@ -165,9 +165,9 @@ >>> format_currency(1099.98, 'USD', locale='en_US') u'$1,099.98' >>> format_currency(1099.98, 'USD', locale='es_CO') - u'US$ 1.099,98' + u'US$\\xa01.099,98' >>> format_currency(1099.98, 'EUR', locale='de_DE') - u'1.099,98 \\u20ac' + u'1.099,98\\xa0\\u20ac' The pattern can also be specified explicitly: diff --git a/babel/tests/dates.py b/babel/tests/dates.py --- a/babel/tests/dates.py +++ b/babel/tests/dates.py @@ -29,6 +29,11 @@ fmt = dates.DateTimeFormat(d, locale='cs_CZ') self.assertEqual('1.', fmt['LLL']) + def test_abbreviated_month_alias(self): + d = date(2006, 3, 8) + fmt = dates.DateTimeFormat(d, locale='de_DE') + self.assertEqual(u'Mär', fmt['LLL']) + def test_week_of_year_first(self): d = date(2006, 1, 8) fmt = dates.DateTimeFormat(d, locale='de_DE') @@ -187,7 +192,7 @@ tz = timezone('Europe/Paris') t = time(15, 30, tzinfo=tz) fmt = dates.DateTimeFormat(t, locale='fr_FR') - self.assertEqual(u'Heure de l’Europe centrale', fmt['vvvv']) + self.assertEqual(u'heure d’Europe centrale', fmt['vvvv']) def test_hour_formatting(self): l = 'en_US' diff --git a/babel/tests/localedata.py b/babel/tests/localedata.py --- a/babel/tests/localedata.py +++ b/babel/tests/localedata.py @@ -16,9 +16,57 @@ from babel import localedata + +class MergeResolveTestCase(unittest.TestCase): + + def test_merge_items(self): + d = {1: 'foo', 3: 'baz'} + localedata.merge(d, {1: 'Foo', 2: 'Bar'}) + self.assertEqual({1: 'Foo', 2: 'Bar', 3: 'baz'}, d) + + def test_merge_nested_dict(self): + d1 = {'x': {'a': 1, 'b': 2, 'c': 3}} + d2 = {'x': {'a': 1, 'b': 12, 'd': 14}} + localedata.merge(d1, d2) + self.assertEqual({ + 'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14} + }, d1) + + def test_merge_nested_dict_no_overlap(self): + d1 = {'x': {'a': 1, 'b': 2}} + d2 = {'y': {'a': 11, 'b': 12}} + localedata.merge(d1, d2) + self.assertEqual({ + 'x': {'a': 1, 'b': 2}, + 'y': {'a': 11, 'b': 12} + }, d1) + + def test_merge_with_alias_and_resolve(self): + alias = localedata.Alias('x') + d1 = { + 'x': {'a': 1, 'b': 2, 'c': 3}, + 'y': alias + } + d2 = { + 'x': {'a': 1, 'b': 12, 'd': 14}, + 'y': {'b': 22, 'e': 25} + } + localedata.merge(d1, d2) + self.assertEqual({ + 'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, + 'y': (alias, {'b': 22, 'e': 25}) + }, d1) + d = localedata.LocaleDataDict(d1) + self.assertEqual({ + 'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, + 'y': {'a': 1, 'b': 22, 'c': 3, 'd': 14, 'e': 25} + }, dict(d.items())) + + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(localedata)) + suite.addTest(unittest.makeSuite(MergeResolveTestCase)) return suite if __name__ == '__main__': diff --git a/scripts/dump_data.py b/scripts/dump_data.py --- a/scripts/dump_data.py +++ b/scripts/dump_data.py @@ -12,12 +12,32 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. +from optparse import OptionParser from pprint import pprint import sys -from babel.localedata import load +from babel.localedata import load, LocaleDataDict -if len(sys.argv) > 2: - pprint(load(sys.argv[1]).get(sys.argv[2])) -else: - pprint(load(sys.argv[1])) + +def main(): + parser = OptionParser(usage='%prog [options] locale [path]') + parser.add_option('--noinherit', action='store_false', dest='inherit', + help='do not merge inherited data into locale data') + parser.add_option('--resolve', action='store_true', dest='resolve', + help='resolve aliases in locale data') + parser.set_defaults(inherit=True, resolve=False) + options, args = parser.parse_args() + if len(args) not in (1, 2): + parser.error('incorrect number of arguments') + + data = load(args[0], merge_inherited=options.inherit) + if options.resolve: + data = LocaleDataDict(data) + if len(args) > 1: + for key in args[1].split('.'): + data = data[key] + pprint(dict(data.items())) + + +if __name__ == '__main__': + main() diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -16,6 +16,7 @@ from optparse import OptionParser import os import pickle +import re import sys try: from xml.etree.ElementTree import parse @@ -26,6 +27,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) from babel import dates, numbers +from babel.localedata import Alias weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, 'sun': 6} @@ -36,6 +38,7 @@ def any(iterable): return filter(None, list(iterable)) + def _text(elem): buf = [elem.text or ''] for child in elem: @@ -43,6 +46,35 @@ buf.append(elem.tail or '') return u''.join(filter(None, buf)).strip() + +NAME_RE = re.compile(r"^\w+$") +TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$") + +NAME_MAP = { + 'dateFormats': 'date_formats', + 'dateTimeFormats': 'datetime_formats', + 'eraAbbr': 'abbreviated', + 'eraNames': 'wide', + 'eraNarrow': 'narrow', + 'timeFormats': 'time_formats' +} + +def _translate_alias(ctxt, path): + parts = path.split('/') + keys = ctxt[:] + for part in parts: + if part == '..': + keys.pop() + else: + match = TYPE_ATTR_RE.match(part) + if match: + keys.append(match.group(1)) + else: + assert NAME_RE.match(part) + keys.append(NAME_MAP.get(part, part)) + return keys + + def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() @@ -109,6 +141,8 @@ stem, ext = os.path.splitext(filename) if ext != '.xml': continue + #if stem != 'root': + # break tree = parse(os.path.join(srcdir, 'main', filename)) data = {} @@ -133,25 +167,29 @@ territories = data.setdefault('territories', {}) for elem in tree.findall('//territories/territory'): - if 'draft' in elem.attrib and elem.attrib['type'] in territories: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('//languages/language'): - if 'draft' in elem.attrib and elem.attrib['type'] in languages: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('//variants/variant'): - if 'draft' in elem.attrib and elem.attrib['type'] in variants: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('//scripts/script'): - if 'draft' in elem.attrib and elem.attrib['type'] in scripts: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) @@ -182,15 +220,15 @@ zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('//timeZoneNames/gmtFormat'): - if 'draft' not in elem.attrib: + if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') break for elem in tree.findall('//timeZoneNames/regionFormat'): - if 'draft' not in elem.attrib: + if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') break for elem in tree.findall('//timeZoneNames/fallbackFormat'): - if 'draft' not in elem.attrib: + if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = unicode(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break @@ -227,88 +265,141 @@ months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): - ctxts = months.setdefault(ctxt.attrib['type'], {}) + ctxt_type = ctxt.attrib['type'] + ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): - widths = ctxts.setdefault(width.attrib['type'], {}) - for elem in width.findall('month'): - if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: - continue - widths[int(elem.attrib.get('type'))] = unicode(elem.text) + width_type = width.attrib['type'] + widths = ctxts.setdefault(width_type, {}) + for elem in width.getiterator(): + if elem.tag == 'month': + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and int(elem.attrib['type']) in widths: + continue + widths[int(elem.attrib.get('type'))] = unicode(elem.text) + elif elem.tag == 'alias': + ctxts[width_type] = Alias( + _translate_alias(['months', ctxt_type, width_type], + elem.attrib['path']) + ) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): - ctxts = days.setdefault(ctxt.attrib['type'], {}) + ctxt_type = ctxt.attrib['type'] + ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): - widths = ctxts.setdefault(width.attrib['type'], {}) - for elem in width.findall('day'): - dtype = weekdays[elem.attrib['type']] - if 'draft' in elem.attrib and dtype in widths: - continue - widths[dtype] = unicode(elem.text) + width_type = width.attrib['type'] + widths = ctxts.setdefault(width_type, {}) + for elem in width.getiterator(): + if elem.tag == 'day': + dtype = weekdays[elem.attrib['type']] + if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ + and dtype in widths: + continue + widths[dtype] = unicode(elem.text) + elif elem.tag == 'alias': + ctxts[width_type] = Alias( + _translate_alias(['days', ctxt_type, width_type], + elem.attrib['path']) + ) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): + ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): - widths = ctxts.setdefault(width.attrib['type'], {}) - for elem in width.findall('quarter'): - if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: - continue - widths[int(elem.attrib.get('type'))] = unicode(elem.text) + width_type = width.attrib['type'] + widths = ctxts.setdefault(width_type, {}) + for elem in width.getiterator(): + if elem.tag == 'quarter': + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and int(elem.attrib['type']) in widths: + continue + widths[int(elem.attrib['type'])] = unicode(elem.text) + elif elem.tag == 'alias': + ctxts[width_type] = Alias( + _translate_alias(['quarters', ctxt_type, width_type], + elem.attrib['path']) + ) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): - ewidth = { - 'eraAbbr': 'abbreviated', - 'eraNames': 'wide', - 'eraNarrow': 'narrow', - }[width.tag] - widths = eras.setdefault(ewidth, {}) - for elem in width.findall('era'): - if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: - continue - widths[int(elem.attrib.get('type'))] = unicode(elem.text) + width_type = NAME_MAP[width.tag] + widths = eras.setdefault(width_type, {}) + for elem in width.getiterator(): + if elem.tag == 'era': + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and int(elem.attrib['type']) in widths: + continue + widths[int(elem.attrib.get('type'))] = unicode(elem.text) + elif elem.tag == 'alias': + eras[width_type] = Alias( + _translate_alias(['eras', width_type], + elem.attrib['path']) + ) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): - if 'draft' in elem.attrib and elem.tag in periods: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) for elem in calendar.findall('pm'): - if 'draft' in elem.attrib and elem.tag in periods: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) date_formats = data.setdefault('date_formats', {}) - for elem in calendar.findall('dateFormats/dateFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: - continue - try: - date_formats[elem.attrib.get('type')] = \ - dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) - except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + for format in calendar.findall('dateFormats'): + for elem in format.getiterator(): + if elem.tag == 'dateFormatLength': + if 'draft' in elem.attrib and \ + elem.attrib.get('type') in date_formats: + continue + try: + date_formats[elem.attrib.get('type')] = \ + dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) + except ValueError, e: + print>>sys.stderr, 'ERROR: %s' % e + elif elem.tag == 'alias': + date_formats = Alias(_translate_alias( + ['date_formats'], elem.attrib['path']) + ) time_formats = data.setdefault('time_formats', {}) - for elem in calendar.findall('timeFormats/timeFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: - continue - try: - time_formats[elem.attrib.get('type')] = \ - dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) - except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + for format in calendar.findall('timeFormats'): + for elem in format.getiterator(): + if elem.tag == 'timeFormatLength': + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in time_formats: + continue + try: + time_formats[elem.attrib.get('type')] = \ + dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) + except ValueError, e: + print>>sys.stderr, 'ERROR: %s' % e + elif elem.tag == 'alias': + time_formats = Alias(_translate_alias( + ['time_formats'], elem.attrib['path']) + ) datetime_formats = data.setdefault('datetime_formats', {}) - for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats: - continue - try: - datetime_formats[elem.attrib.get('type')] = \ - unicode(elem.findtext('dateTimeFormat/pattern')) - except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + for format in calendar.findall('dateTimeFormats'): + for elem in format.getiterator(): + if elem.tag == 'dateTimeFormatLength': + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in datetime_formats: + continue + try: + datetime_formats[elem.attrib.get('type')] = \ + unicode(elem.findtext('dateTimeFormat/pattern')) + except ValueError, e: + print>>sys.stderr, 'ERROR: %s' % e + elif elem.tag == 'alias': + datetime_formats = Alias(_translate_alias( + ['datetime_formats'], elem.attrib['path']) + ) # @@ -318,28 +409,32 @@ decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('//decimalFormats/decimalFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in decimal_formats: continue pattern = unicode(elem.findtext('decimalFormat/pattern')) decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('//scientificFormats/scientificFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in scientific_formats: continue pattern = unicode(elem.findtext('scientificFormat/pattern')) scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('//currencyFormats/currencyFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in currency_formats: continue pattern = unicode(elem.findtext('currencyFormat/pattern')) currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('//percentFormats/percentFormatLength'): - if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: + if ('draft' in elem.attrib or 'alt' in elem.attrib) \ + and elem.attrib.get('type') in percent_formats: continue pattern = unicode(elem.findtext('percentFormat/pattern')) percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) @@ -360,5 +455,6 @@ finally: outfile.close() + if __name__ == '__main__': main()