changeset 28:695884591af6

* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime. * Move locale data loading from `babel.core` into a separate `babel.localedata` module. * Add curency names and symbols to locale data.
author cmlenz
date Sun, 03 Jun 2007 15:27:27 +0000
parents 8d4cd0856f69
children cbda87af9aa0
files babel/catalog/pofile.py babel/core.py babel/localedata.py babel/numbers.py babel/tests/__init__.py babel/tests/localedata.py scripts/import_cldr.py
diffstat 7 files changed, 179 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/babel/catalog/pofile.py
+++ b/babel/catalog/pofile.py
@@ -199,7 +199,6 @@
     :param string: the string to normalize
     :param width: the maximum line width; use `None`, 0, or a negative number
                   to completely disable line wrapping
-    :param charset: the encoding to use for `unicode` strings
     :return: the normalized string
     :rtype: `unicode`
     """
--- a/babel/core.py
+++ b/babel/core.py
@@ -13,12 +13,7 @@
 
 """Core locale representation and locale data access gateway."""
 
-import os
-import pickle
-try:
-    import threading
-except ImportError:
-    import dummy_threading as threading
+from babel import localedata
 
 __all__ = ['Locale', 'negotiate', 'parse']
 __docformat__ = 'restructuredtext en'
@@ -47,35 +42,6 @@
     
     :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
     """
-    _cache = {}
-    _cache_lock = threading.Lock()
-
-    def __new__(cls, language, territory=None, variant=None):
-        """Create new locale object, or load it from the cache if it had already
-        been instantiated.
-        
-        >>> l1 = Locale('en')
-        >>> l2 = Locale('en')
-        >>> l1 is l2
-        True
-        
-        :param language: the language code
-        :param territory: the territory (country or region) code
-        :param variant: the variant code
-        :return: new or existing `Locale` instance
-        :rtype: `Locale`
-        """
-        key = (language, territory, variant)
-        cls._cache_lock.acquire()
-        try:
-            self = cls._cache.get(key)
-            if self is None:
-                self = super(Locale, cls).__new__(cls, language, territory,
-                                                  variant)
-                cls._cache[key] = self
-            return self
-        finally:
-            self._cache_lock.release()
 
     def __init__(self, language, territory=None, variant=None):
         """Initialize the locale object from the given identifier components.
@@ -93,7 +59,7 @@
         self.language = language
         self.territory = territory
         self.variant = variant
-        self.__data = None
+        self._data = localedata.load(str(self))
 
     def parse(cls, identifier, sep='_'):
         """Create a `Locale` instance for the given locale identifier.
@@ -127,18 +93,6 @@
         return '_'.join(filter(None, [self.language, self.territory,
                                       self.variant]))
 
-    def _data(self):
-        if self.__data is None:
-            filename = os.path.join(os.path.dirname(__file__),
-                                    'localedata/%s.dat' % self)
-            fileobj = open(filename, 'rb')
-            try:
-                self.__data = pickle.load(fileobj)
-            finally:
-                fileobj.close()
-        return self.__data
-    _data = property(_data)
-
     def display_name(self):
         retval = self.languages.get(self.language)
         if self.territory:
@@ -209,6 +163,32 @@
 
     #{ Number Formatting
 
+    def currencies(self):
+        return self._data['currency_names']
+    currencies = property(currencies, doc="""\
+        Mapping of currency codes to translated currency names.
+        
+        >>> Locale('en').currencies['COP']
+        u'Colombian Peso'
+        >>> Locale('de', 'DE').currencies['COP']
+        u'Kolumbianischer Peso'
+        
+        :type: `dict`
+        """)
+
+    def currency_symbols(self):
+        return self._data['currency_symbols']
+    currency_symbols = property(currency_symbols, doc="""\
+        Mapping of currency codes to symbols.
+        
+        >>> Locale('en').currency_symbols['USD']
+        u'US$'
+        >>> Locale('en', 'US').currency_symbols['USD']
+        u'$'
+        
+        :type: `dict`
+        """)
+
     def number_symbols(self):
         return self._data['number_symbols']
     number_symbols = property(number_symbols, doc="""\
new file mode 100644
--- /dev/null
+++ b/babel/localedata.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Low-level locale data access.
+
+:note: The `Locale` class, which uses this module under the hood, provides a
+       more convenient interface for accessing the locale data.
+"""
+
+import copy
+import os
+import pickle
+try:
+    import threading
+except ImportError:
+    import dummy_threading as threading
+
+__all__ = ['load']
+__docformat__ = 'restructuredtext en'
+
+_cache = {}
+_cache_lock = threading.RLock()
+
+def load(name):
+    """Load the locale data for the given locale.
+    
+    The locale data is a dictionary that contains much of the data defined by
+    the Common Locale Data Repository (CLDR). This data is stored as a
+    collection of pickle files inside the ``babel`` package.
+    
+    >>> d = load('en_US')
+    >>> d['languages']['sv']
+    u'Swedish'
+    
+    Note that the results are cached, and subsequent requests for the same
+    locale return the same dictionary:
+    
+    >>> d1 = load('en_US')
+    >>> d2 = load('en_US')
+    >>> d1 is d2
+    True
+    
+    :param name: the locale identifier string (or "root")
+    :return: the locale data
+    :rtype: `dict`
+    :raise `IOError`: if no locale data file is found for the given locale
+                      identifer, or one of the locales it inherits from
+    """
+    _cache_lock.acquire()
+    try:
+        data = _cache.get(name)
+        if not data:
+            # Load inherited data
+            if name == 'root':
+                data = {}
+            else:
+                parts = name.split('_')
+                if len(parts) == 1:
+                    parent = 'root'
+                else:
+                    parent = '_'.join(parts[:-1])
+                data = load(parent).copy()
+            filename = os.path.join(os.path.dirname(__file__),
+                                    'localedata/%s.dat' % name)
+            fileobj = open(filename, 'rb')
+            try:
+                if name != 'root':
+                    merge(data, pickle.load(fileobj))
+                else:
+                    data = pickle.load(fileobj)
+                _cache[name] = data
+            finally:
+                fileobj.close()
+        return data
+    finally:
+        _cache_lock.release()
+
+def merge(dict1, dict2):
+    """Merge the data from `dict2` into the `dict1` dictionary, making copies
+    of nested dictionaries.
+    
+    :param dict1: the dictionary to merge into
+    :param dict2: the dictionary containing the data that should be merged
+    """
+    for key, value in dict2.items():
+        if value:
+            if type(value) is dict:
+                dict1[key] = dict1.get(key, {}).copy()
+                merge(dict1[key], value)
+            else:
+                dict1[key] = value
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -106,20 +106,20 @@
         pattern = parse_pattern(format)
     return pattern.apply(number, locale)
 
-def format_currency(value, locale=LC_NUMERIC):
+def format_currency(number, locale=LC_NUMERIC):
     """Returns formatted currency value.
     
     >>> format_currency(1099.98, locale='en_US')
     u'1,099.98'
     
-    :param value: the number to format
+    :param number: the number to format
     :param locale: the `Locale` object or locale identifier
     :return: the formatted currency value
     :rtype: `unicode`
     """
-    return format_decimal(value, locale=locale)
+    return format_decimal(number, locale=locale)
 
-def format_percent(value, format=None, locale=LC_NUMERIC):
+def format_percent(number, format=None, locale=LC_NUMERIC):
     """Returns formatted percent value for a specific locale.
     
     >>> format_percent(0.34, locale='en_US')
@@ -139,9 +139,9 @@
     pattern = locale.percent_formats.get(format)
     if not pattern:
         pattern = parse_pattern(format)
-    return pattern.apply(value, locale)
+    return pattern.apply(number, locale)
 
-def format_scientific(value, locale=LC_NUMERIC):
+def format_scientific(number, locale=LC_NUMERIC):
     raise NotImplementedError
 
 def parse_number(string, locale=LC_NUMERIC):
--- a/babel/tests/__init__.py
+++ b/babel/tests/__init__.py
@@ -14,11 +14,12 @@
 import unittest
 
 def suite():
-    from babel.tests import core, dates, numbers, util
+    from babel.tests import core, dates, localedata, numbers, util
     from babel.catalog import tests as catalog
     suite = unittest.TestSuite()
     suite.addTest(core.suite())
     suite.addTest(dates.suite())
+    suite.addTest(localedata.suite())
     suite.addTest(numbers.suite())
     suite.addTest(util.suite())
     suite.addTest(catalog.suite())
new file mode 100644
--- /dev/null
+++ b/babel/tests/localedata.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from babel import localedata
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(doctest.DocTestSuite(localedata))
+    return suite
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -33,13 +33,6 @@
     def any(iterable):
         return filter(None, list(iterable))
 
-def _parent(locale):
-    parts = locale.split('_')
-    if len(parts) == 1:
-        return 'root'
-    else:
-        return '_'.join(parts[:-1])
-
 def _text(elem):
     buf = [elem.text or '']
     for child in elem:
@@ -63,7 +56,6 @@
     regions = {}
     for elem in sup.findall('//territoryContainment/group'):
         regions[elem.attrib['type']] = elem.attrib['contains'].split()
-    from pprint import pprint
 
     # Resolve territory containment
     territory_containment = {}
@@ -89,10 +81,8 @@
         if ext != '.xml':
             continue
 
+        tree = parse(os.path.join(srcdir, 'main', filename))
         data = {}
-        if stem != 'root':
-            data.update(copy.deepcopy(dicts[_parent(stem)]))
-        tree = parse(os.path.join(srcdir, 'main', filename))
 
         language = None
         elem = tree.find('//identity/language')
@@ -229,7 +219,7 @@
                     date_formats[elem.attrib.get('type')] = \
                         dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
                 except ValueError, e:
-                    print e
+                    print>>sys.stderr, 'ERROR: %s' % e
 
             time_formats = data.setdefault('time_formats', {})
             for elem in calendar.findall('timeFormats/timeFormatLength'):
@@ -239,7 +229,7 @@
                     time_formats[elem.attrib.get('type')] = \
                         dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
                 except ValueError, e:
-                    print e
+                    print>>sys.stderr, 'ERROR: %s' % e
 
         # <numbers>
 
@@ -251,32 +241,39 @@
         for elem in tree.findall('//decimalFormats/decimalFormatLength'):
             if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
                 continue
-            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern')))
+            pattern = unicode(elem.findtext('decimalFormat/pattern'))
+            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
         scientific_formats = data.setdefault('scientific_formats', {})
         for elem in tree.findall('//scientificFormats/scientificFormatLength'):
             if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
                 continue
+            # FIXME: should use numbers.parse_pattern
             scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
 
         currency_formats = data.setdefault('currency_formats', {})
         for elem in tree.findall('//currencyFormats/currencyFormatLength'):
             if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
                 continue
+            # FIXME: should use numbers.parse_pattern
             currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
 
         percent_formats = data.setdefault('percent_formats', {})
         for elem in tree.findall('//percentFormats/percentFormatLength'):
             if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
                 continue
-            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern')))
+            pattern = unicode(elem.findtext('percentFormat/pattern'))
+            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
-        currencies = data.setdefault('currencies', {})
+        currency_names = data.setdefault('currency_names', {})
+        currency_symbols = data.setdefault('currency_symbols', {})
         for elem in tree.findall('//currencies/currency'):
-            currencies[elem.attrib['type']] = {
-                'display_name': unicode(elem.findtext('displayName')),
-                'symbol': unicode(elem.findtext('symbol'))
-            }
+            name = elem.findtext('displayName')
+            if name:
+                currency_names[elem.attrib['type']] = unicode(name)
+            symbol = elem.findtext('symbol')
+            if symbol:
+                currency_symbols[elem.attrib['type']] = unicode(symbol)
 
         dicts[stem] = data
         outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
Copyright (C) 2012-2017 Edgewall Software