# HG changeset patch # User cmlenz # Date 1180884447 0 # Node ID 6041782ea6775de66a1afff964239eded35aee3c # Parent 1b9956f20649f8068c807f472d534dc514164940 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime. * Move locale data loading from `babel.core` into a separate `babel.localedata` module. * Add curency names and symbols to locale data. diff --git a/babel/catalog/pofile.py b/babel/catalog/pofile.py --- a/babel/catalog/pofile.py +++ b/babel/catalog/pofile.py @@ -199,7 +199,6 @@ :param string: the string to normalize :param width: the maximum line width; use `None`, 0, or a negative number to completely disable line wrapping - :param charset: the encoding to use for `unicode` strings :return: the normalized string :rtype: `unicode` """ diff --git a/babel/core.py b/babel/core.py --- a/babel/core.py +++ b/babel/core.py @@ -13,12 +13,7 @@ """Core locale representation and locale data access gateway.""" -import os -import pickle -try: - import threading -except ImportError: - import dummy_threading as threading +from babel import localedata __all__ = ['Locale', 'negotiate', 'parse'] __docformat__ = 'restructuredtext en' @@ -47,35 +42,6 @@ :see: `IETF RFC 3066 `_ """ - _cache = {} - _cache_lock = threading.Lock() - - def __new__(cls, language, territory=None, variant=None): - """Create new locale object, or load it from the cache if it had already - been instantiated. - - >>> l1 = Locale('en') - >>> l2 = Locale('en') - >>> l1 is l2 - True - - :param language: the language code - :param territory: the territory (country or region) code - :param variant: the variant code - :return: new or existing `Locale` instance - :rtype: `Locale` - """ - key = (language, territory, variant) - cls._cache_lock.acquire() - try: - self = cls._cache.get(key) - if self is None: - self = super(Locale, cls).__new__(cls, language, territory, - variant) - cls._cache[key] = self - return self - finally: - self._cache_lock.release() def __init__(self, language, territory=None, variant=None): """Initialize the locale object from the given identifier components. @@ -93,7 +59,7 @@ self.language = language self.territory = territory self.variant = variant - self.__data = None + self._data = localedata.load(str(self)) def parse(cls, identifier, sep='_'): """Create a `Locale` instance for the given locale identifier. @@ -127,18 +93,6 @@ return '_'.join(filter(None, [self.language, self.territory, self.variant])) - def _data(self): - if self.__data is None: - filename = os.path.join(os.path.dirname(__file__), - 'localedata/%s.dat' % self) - fileobj = open(filename, 'rb') - try: - self.__data = pickle.load(fileobj) - finally: - fileobj.close() - return self.__data - _data = property(_data) - def display_name(self): retval = self.languages.get(self.language) if self.territory: @@ -209,6 +163,32 @@ #{ Number Formatting + def currencies(self): + return self._data['currency_names'] + currencies = property(currencies, doc="""\ + Mapping of currency codes to translated currency names. + + >>> Locale('en').currencies['COP'] + u'Colombian Peso' + >>> Locale('de', 'DE').currencies['COP'] + u'Kolumbianischer Peso' + + :type: `dict` + """) + + def currency_symbols(self): + return self._data['currency_symbols'] + currency_symbols = property(currency_symbols, doc="""\ + Mapping of currency codes to symbols. + + >>> Locale('en').currency_symbols['USD'] + u'US$' + >>> Locale('en', 'US').currency_symbols['USD'] + u'$' + + :type: `dict` + """) + def number_symbols(self): return self._data['number_symbols'] number_symbols = property(number_symbols, doc="""\ diff --git a/babel/localedata.py b/babel/localedata.py new file mode 100644 --- /dev/null +++ b/babel/localedata.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Low-level locale data access. + +:note: The `Locale` class, which uses this module under the hood, provides a + more convenient interface for accessing the locale data. +""" + +import copy +import os +import pickle +try: + import threading +except ImportError: + import dummy_threading as threading + +__all__ = ['load'] +__docformat__ = 'restructuredtext en' + +_cache = {} +_cache_lock = threading.RLock() + +def load(name): + """Load the locale data for the given locale. + + The locale data is a dictionary that contains much of the data defined by + the Common Locale Data Repository (CLDR). This data is stored as a + collection of pickle files inside the ``babel`` package. + + >>> d = load('en_US') + >>> d['languages']['sv'] + u'Swedish' + + Note that the results are cached, and subsequent requests for the same + locale return the same dictionary: + + >>> d1 = load('en_US') + >>> d2 = load('en_US') + >>> d1 is d2 + True + + :param name: the locale identifier string (or "root") + :return: the locale data + :rtype: `dict` + :raise `IOError`: if no locale data file is found for the given locale + identifer, or one of the locales it inherits from + """ + _cache_lock.acquire() + try: + data = _cache.get(name) + if not data: + # Load inherited data + if name == 'root': + data = {} + else: + parts = name.split('_') + if len(parts) == 1: + parent = 'root' + else: + parent = '_'.join(parts[:-1]) + data = load(parent).copy() + filename = os.path.join(os.path.dirname(__file__), + 'localedata/%s.dat' % name) + fileobj = open(filename, 'rb') + try: + if name != 'root': + merge(data, pickle.load(fileobj)) + else: + data = pickle.load(fileobj) + _cache[name] = data + finally: + fileobj.close() + return data + finally: + _cache_lock.release() + +def merge(dict1, dict2): + """Merge the data from `dict2` into the `dict1` dictionary, making copies + of nested dictionaries. + + :param dict1: the dictionary to merge into + :param dict2: the dictionary containing the data that should be merged + """ + for key, value in dict2.items(): + if value: + if type(value) is dict: + dict1[key] = dict1.get(key, {}).copy() + merge(dict1[key], value) + else: + dict1[key] = value diff --git a/babel/numbers.py b/babel/numbers.py --- a/babel/numbers.py +++ b/babel/numbers.py @@ -106,20 +106,20 @@ pattern = parse_pattern(format) return pattern.apply(number, locale) -def format_currency(value, locale=LC_NUMERIC): +def format_currency(number, locale=LC_NUMERIC): """Returns formatted currency value. >>> format_currency(1099.98, locale='en_US') u'1,099.98' - :param value: the number to format + :param number: the number to format :param locale: the `Locale` object or locale identifier :return: the formatted currency value :rtype: `unicode` """ - return format_decimal(value, locale=locale) + return format_decimal(number, locale=locale) -def format_percent(value, format=None, locale=LC_NUMERIC): +def format_percent(number, format=None, locale=LC_NUMERIC): """Returns formatted percent value for a specific locale. >>> format_percent(0.34, locale='en_US') @@ -139,9 +139,9 @@ pattern = locale.percent_formats.get(format) if not pattern: pattern = parse_pattern(format) - return pattern.apply(value, locale) + return pattern.apply(number, locale) -def format_scientific(value, locale=LC_NUMERIC): +def format_scientific(number, locale=LC_NUMERIC): raise NotImplementedError def parse_number(string, locale=LC_NUMERIC): diff --git a/babel/tests/__init__.py b/babel/tests/__init__.py --- a/babel/tests/__init__.py +++ b/babel/tests/__init__.py @@ -14,11 +14,12 @@ import unittest def suite(): - from babel.tests import core, dates, numbers, util + from babel.tests import core, dates, localedata, numbers, util from babel.catalog import tests as catalog suite = unittest.TestSuite() suite.addTest(core.suite()) suite.addTest(dates.suite()) + suite.addTest(localedata.suite()) suite.addTest(numbers.suite()) suite.addTest(util.suite()) suite.addTest(catalog.suite()) diff --git a/babel/tests/localedata.py b/babel/tests/localedata.py new file mode 100644 --- /dev/null +++ b/babel/tests/localedata.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +import doctest +import unittest + +from babel import localedata + +def suite(): + suite = unittest.TestSuite() + suite.addTest(doctest.DocTestSuite(localedata)) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -33,13 +33,6 @@ def any(iterable): return filter(None, list(iterable)) -def _parent(locale): - parts = locale.split('_') - if len(parts) == 1: - return 'root' - else: - return '_'.join(parts[:-1]) - def _text(elem): buf = [elem.text or ''] for child in elem: @@ -63,7 +56,6 @@ regions = {} for elem in sup.findall('//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() - from pprint import pprint # Resolve territory containment territory_containment = {} @@ -89,10 +81,8 @@ if ext != '.xml': continue + tree = parse(os.path.join(srcdir, 'main', filename)) data = {} - if stem != 'root': - data.update(copy.deepcopy(dicts[_parent(stem)])) - tree = parse(os.path.join(srcdir, 'main', filename)) language = None elem = tree.find('//identity/language') @@ -229,7 +219,7 @@ date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) except ValueError, e: - print e + print>>sys.stderr, 'ERROR: %s' % e time_formats = data.setdefault('time_formats', {}) for elem in calendar.findall('timeFormats/timeFormatLength'): @@ -239,7 +229,7 @@ time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) except ValueError, e: - print e + print>>sys.stderr, 'ERROR: %s' % e # @@ -251,32 +241,39 @@ for elem in tree.findall('//decimalFormats/decimalFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: continue - decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern'))) + pattern = unicode(elem.findtext('decimalFormat/pattern')) + decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('//scientificFormats/scientificFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: continue + # FIXME: should use numbers.parse_pattern scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('//currencyFormats/currencyFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: continue + # FIXME: should use numbers.parse_pattern currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('//percentFormats/percentFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: continue - percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern'))) + pattern = unicode(elem.findtext('percentFormat/pattern')) + percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) - currencies = data.setdefault('currencies', {}) + currency_names = data.setdefault('currency_names', {}) + currency_symbols = data.setdefault('currency_symbols', {}) for elem in tree.findall('//currencies/currency'): - currencies[elem.attrib['type']] = { - 'display_name': unicode(elem.findtext('displayName')), - 'symbol': unicode(elem.findtext('symbol')) - } + name = elem.findtext('displayName') + if name: + currency_names[elem.attrib['type']] = unicode(name) + symbol = elem.findtext('symbol') + if symbol: + currency_symbols[elem.attrib['type']] = unicode(symbol) dicts[stem] = data outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')