changeset 235:d0cd235ede46

Upgraded to CLDR 1.5 and improved timezone formatting.
author cmlenz
date Wed, 01 Aug 2007 12:32:20 +0000
parents 541b6d630575
children bf9579c4b0ee
files ChangeLog MANIFEST.in babel/core.py babel/dates.py babel/localedata.py babel/numbers.py babel/tests/dates.py doc/dates.txt scripts/dump_data.py scripts/dump_global.py scripts/import_cldr.py setup.py
diffstat 12 files changed, 454 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,8 @@
    for multi-line function calls, and other small fixes (tickets #38 and #39).
  * Improved support for detecting Python string formatting fields in message
    strings (ticket #57).
+ * CLDR upgraded to the 1.5 release.
+ * Improved timezone formatting.
 
 
 Version 0.8.1
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
+include babel/global.dat
 include babel/localedata/*.dat
 include doc/api/*.*
 include doc/*.html
--- a/babel/core.py
+++ b/babel/core.py
@@ -14,6 +14,7 @@
 """Core locale representation and locale data access."""
 
 import os
+import pickle
 
 from babel import localedata
 
@@ -21,6 +22,33 @@
            'parse_locale']
 __docformat__ = 'restructuredtext en'
 
+_global_data = None
+
+def get_global(key):
+    """
+    Return the dictionary for the given key in the global data.
+    
+    The global data is stored in the ``babel/global.dat`` file and contains
+    information independent of individual locales.
+    
+    >>> get_global('zone_aliases')['UTC']
+    'Etc/GMT'
+    >>> get_global('zone_territories')['Europe/Berlin']
+    'DE'
+    
+    :since: version 0.9
+    """
+    global _global_data
+    if _global_data is None:
+        dirname = os.path.join(os.path.dirname(__file__))
+        filename = os.path.join(dirname, 'global.dat')
+        fileobj = open(filename, 'rb')
+        try:
+            _global_data = pickle.load(fileobj)
+        finally:
+            fileobj.close()
+    return _global_data.get(key, {})
+
 
 class UnknownLocaleError(Exception):
     """Exception thrown when a locale is requested for which no locale data
@@ -305,10 +333,10 @@
     currency_symbols = property(currency_symbols, doc="""\
         Mapping of currency codes to symbols.
         
-        >>> Locale('en').currency_symbols['USD']
-        u'US$'
         >>> Locale('en', 'US').currency_symbols['USD']
         u'$'
+        >>> Locale('es', 'CO').currency_symbols['USD']
+        u'US$'
         
         :type: `dict`
         """)
@@ -432,25 +460,41 @@
     time_zones = property(time_zones, doc="""\
         Locale display names for time zones.
         
-        >>> Locale('en', 'US').time_zones['America/Los_Angeles']['long']['standard']
-        u'Pacific Standard Time'
-        >>> Locale('en', 'US').time_zones['Europe/Dublin']['city']
-        u'Dublin'
+        >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
+        u'British Summer Time'
+        >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
+        u'St. John\u2019s'
         
         :type: `dict`
         """)
 
-    def zone_aliases(self):
-        return self._data['zone_aliases']
-    zone_aliases = property(zone_aliases, doc="""\
-        Mapping of time zone aliases to their respective canonical identifer.
+    def meta_zones(self):
+        return self._data['meta_zones']
+    meta_zones = property(meta_zones, doc="""\
+        Locale display names for meta time zones.
         
-        >>> Locale('en').zone_aliases['UTC']
-        'Etc/GMT'
+        Meta time zones are basically groups of different Olson time zones that
+        have the same GMT offset and daylight savings time.
+        
+        >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
+        u'Central European Summer Time'
         
         :type: `dict`
-        :note: this doesn't really belong here, as it does not change between
-               locales
+        :since: version 0.9
+        """)
+
+    def zone_formats(self):
+        return self._data['zone_formats']
+    zone_formats = property(zone_formats, doc=r"""\
+        Patterns related to the formatting of time zones.
+        
+        >>> Locale('en', 'US').zone_formats['fallback']
+        u'%(1)s (%(0)s)'
+        >>> Locale('pt', 'BR').zone_formats['region']
+        u'Hor\xe1rio %s'
+        
+        :type: `dict`
+        :since: version 0.9
         """)
 
     def first_week_day(self):
--- a/babel/dates.py
+++ b/babel/dates.py
@@ -24,11 +24,11 @@
 from datetime import date, datetime, time, timedelta, tzinfo
 import re
 
-from babel.core import default_locale, Locale
+from babel.core import default_locale, get_global, Locale
 from babel.util import UTC
 
-__all__ = ['format_date', 'format_datetime', 'format_time', 'parse_date',
-           'parse_datetime', 'parse_time']
+__all__ = ['format_date', 'format_datetime', 'format_time',
+           'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time']
 __docformat__ = 'restructuredtext en'
 
 LC_TIME = default_locale('LC_TIME')
@@ -110,7 +110,7 @@
     >>> get_era_names('abbreviated', locale='de_DE')[1]
     u'n. Chr.'
     
-    :param width: the width to use, either "wide" or "abbreviated"
+    :param width: the width to use, either "wide", "abbreviated", or "narrow"
     :param locale: the `Locale` object, or a locale string
     :return: the dictionary of era names
     :rtype: `dict`
@@ -159,7 +159,7 @@
     >>> get_time_format(locale='en_US')
     <DateTimePattern u'h:mm:ss a'>
     >>> get_time_format('full', locale='de_DE')
-    <DateTimePattern u"H:mm' Uhr 'z">
+    <DateTimePattern u'HH:mm:ss v'>
     
     :param format: the format to use, one of "full", "long", "medium", or
                    "short"
@@ -169,6 +169,249 @@
     """
     return Locale.parse(locale).time_formats[format]
 
+def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME):
+    """Return the timezone associated with the given `datetime` object formatted
+    as string indicating the offset from GMT.
+    
+    >>> dt = datetime(2007, 4, 1, 15, 30)
+    >>> get_timezone_gmt(dt)
+    u'GMT+00:00'
+    
+    >>> from pytz import timezone
+    >>> tz = timezone('America/Los_Angeles')
+    >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+    >>> get_timezone_gmt(dt)
+    u'GMT-08:00'
+    >>> get_timezone_gmt(dt, 'short')
+    u'-0800'
+    
+    The long format depends on the locale, for example in France a different
+    string is used for GMT:
+    
+    >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
+    u'HMG-08:00'
+    
+    :param dt: the ``datetime`` object; if `None`, the current date and time are
+               used
+    :param width: either "long" or "short"
+    :param locale: the `Locale` object, or a locale string
+    :return: the GMT offset representation of the timezone
+    :rtype: `unicode`
+    :since: version 0.9
+    """
+    if datetime is None:
+        datetime = datetime_.now()
+    elif isinstance(datetime, (int, long)):
+        datetime = datetime_.fromtimestamp(datetime).time()
+    if datetime.tzinfo is None:
+        datetime = datetime.replace(tzinfo=UTC)
+    locale = Locale.parse(locale)
+
+    offset = datetime.utcoffset()
+    seconds = offset.days * 24 * 60 * 60 + offset.seconds
+    hours, seconds = divmod(seconds, 3600)
+    if width == 'short':
+        pattern = u'%+03d%02d'
+    else:
+        pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
+    return pattern % (hours, seconds // 60)
+
+def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME):
+    """Return a representation of the given timezone using "location format".
+    
+    The result depends on both the local display name of the country and the
+    city assocaited with the time zone:
+    
+    >>> from pytz import timezone
+    >>> tz = timezone('America/St_Johns')
+    >>> get_timezone_location(tz, locale='de_DE')
+    u"Kanada (St. John's)"
+    >>> tz = timezone('America/Mexico_City')
+    >>> get_timezone_location(tz, locale='de_DE')
+    u'Mexiko (Mexiko-Stadt)'
+    
+    If the timezone is associated with a country that uses only a single
+    timezone, just the localized country name is returned:
+    
+    >>> tz = timezone('Europe/Berlin')
+    >>> get_timezone_name(tz, locale='de_DE')
+    u'Deutschland'
+    
+    :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+                         the timezone; if `None`, the current date and time in
+                         UTC is assumed
+    :param locale: the `Locale` object, or a locale string
+    :return: the localized timezone name using location format
+    :rtype: `unicode`
+    :since: version 0.9
+    """
+    if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+        dt = None
+        tzinfo = UTC
+    elif isinstance(dt_or_tzinfo, (datetime, time)):
+        dt = dt_or_tzinfo
+        if dt.tzinfo is not None:
+            tzinfo = dt.tzinfo
+        else:
+            tzinfo = UTC
+    else:
+        dt = None
+        tzinfo = dt_or_tzinfo
+    locale = Locale.parse(locale)
+
+    if hasattr(tzinfo, 'zone'):
+        zone = tzinfo.zone
+    else:
+        zone = tzinfo.tzname(dt or datetime.utcnow())
+
+    # Get the canonical time-zone code
+    zone = get_global('zone_aliases').get(zone, zone)
+
+    metainfo = {}
+    info = locale.time_zones.get(zone, {})
+    if 'use_metazone' in info:
+        metainfo = locale.meta_zones.get(info['use_metazone'], {})
+
+    # Otherwise, if there is only one timezone for the country, return the
+    # localized country name
+    region_format = locale.zone_formats['region']
+    territory = get_global('zone_territories').get(zone)
+    territory_name = locale.territories[territory]
+    if territory and len(get_global('territory_zones')[territory]) == 1:
+        return region_format % (territory_name)
+
+    # Otherwise, include the city in the output
+    fallback_format = locale.zone_formats['fallback']
+    if 'city' in info:
+        city_name = info['city']
+    elif 'city' in metainfo:
+        city_name = metainfo['city']
+    elif '/' in zone:
+        city_name = zone.split('/', 1)[1].replace('_', ' ')
+    else:
+        city_name = zone.replace('_', ' ')
+
+    return region_format % (fallback_format % {
+        '0': city_name,
+        '1': territory_name
+    })
+
+def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
+                      locale=LC_TIME):
+    r"""Return the localized display name for the given timezone. The timezone
+    may be specified using a ``datetime`` or `tzinfo` object.
+    
+    >>> from pytz import timezone
+    >>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles'))
+    >>> get_timezone_name(dt, locale='en_US')
+    u'Pacific Standard Time'
+    >>> get_timezone_name(dt, width='short', locale='en_US')
+    u'PST'
+    
+    If this function gets passed only a `tzinfo` object and no concrete
+    `datetime`,  the returned display name is indenpendent of daylight savings
+    time. This can be used for example for selecting timezones, or to set the
+    time of events that recur across DST changes:
+    
+    >>> tz = timezone('America/Los_Angeles')
+    >>> get_timezone_name(tz, locale='en_US')
+    u'Pacific Time'
+    >>> get_timezone_name(tz, 'short', locale='en_US')
+    u'PT'
+    
+    If no localized display name for the timezone is available, and the timezone
+    is associated with a country that uses only a single timezone, the name of
+    that country is returned, formatted according to the locale:
+    
+    >>> tz = timezone('Europe/Berlin')
+    >>> get_timezone_name(tz, locale='de_DE')
+    u'Deutschland'
+    >>> get_timezone_name(tz, locale='pt_BR')
+    u'Hor\xe1rio Alemanha'
+    
+    On the other hand, if the country uses multiple timezones, the city is also
+    included in the representation:
+    
+    >>> tz = timezone('America/St_Johns')
+    >>> get_timezone_name(tz, locale='de_DE')
+    u"Kanada (St. John's)"
+    
+    The `uncommon` parameter can be set to `True` to enable the use of timezone
+    representations that are not commonly used by the requested locale. For
+    example, while in frensh the central europian timezone is usually
+    abbreviated as "HEC", in Canadian frensh, this abbreviation is not in common
+    use, so a generic name would be chosen by default:
+    
+    >>> tz = timezone('Europe/Paris')
+    >>> get_timezone_name(tz, 'short', locale='fr_CA')
+    u'France'
+    >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA')
+    u'HEC'
+    
+    :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+                         the timezone; if a ``tzinfo`` object is used, the
+                         resulting display name will be generic, i.e.
+                         independent of daylight savings time; if `None`, the
+                         current date in UTC is assumed
+    :param width: either "long" or "short"
+    :param uncommon: whether even uncommon timezone abbreviations should be used
+    :param locale: the `Locale` object, or a locale string
+    :return: the timezone display name
+    :rtype: `unicode`
+    :since: version 0.9
+    :see:  `LDML Appendix J: Time Zone Display Names
+            <http://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
+    """
+    if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+        dt = None
+        tzinfo = UTC
+    elif isinstance(dt_or_tzinfo, (datetime, time)):
+        dt = dt_or_tzinfo
+        if dt.tzinfo is not None:
+            tzinfo = dt.tzinfo
+        else:
+            tzinfo = UTC
+    else:
+        dt = None
+        tzinfo = dt_or_tzinfo
+    locale = Locale.parse(locale)
+
+    if hasattr(tzinfo, 'zone'):
+        zone = tzinfo.zone
+    else:
+        zone = tzinfo.tzname(dt or datetime.utcnow())
+
+    # Get the canonical time-zone code
+    zone = get_global('zone_aliases').get(zone, zone)
+
+    metainfo = {}
+    info = locale.time_zones.get(zone, {})
+    # Try explicitly translated zone names first
+    if width in info:
+        if dt is None:
+            field = 'generic'
+        else:
+            field = tzinfo.dst(dt) and 'daylight' or 'standard'
+        if field in info[width]:
+            return info[width][field]
+
+    if 'use_metazone' in info:
+        metainfo = locale.meta_zones.get(info['use_metazone'], {})
+        if width in metainfo and (uncommon or metainfo.get('common')):
+            if dt is None:
+                field = 'generic'
+            else:
+                field = tzinfo.dst(dt) and 'daylight' or 'standard'
+            if field in metainfo[width]:
+                return metainfo[width][field]
+
+    # If we have a concrete datetime, we assume that the result can't be
+    # independent of daylight savings time, so we return the GMT offset
+    if dt is not None:
+        return get_timezone_gmt(time, width=width, locale=locale)
+
+    return get_timezone_location(dt_or_tzinfo, locale=locale)
+
 def format_date(date=None, format='medium', locale=LC_TIME):
     """Return a date formatted according to the given pattern.
     
@@ -219,9 +462,9 @@
     ``pytz`` package is needed to explicitly specify the time-zone:
     
     >>> from pytz import timezone
-    >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Berlin'),
-    ...                 locale='de_DE')
-    u'Sonntag, 1. April 2007 17:30 Uhr MESZ'
+    >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
+    ...                 locale='fr_FR')
+    u'dimanche 1 avril 2007 17:30:00 HEC'
     >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
     ...                 tzinfo=timezone('US/Eastern'), locale='en')
     u'2007.04.01 AD at 11:30:00 EDT'
@@ -276,9 +519,9 @@
     
     >>> from pytz import timezone
     >>> t = time(15, 30)
-    >>> format_time(t, format='full', tzinfo=timezone('Europe/Berlin'),
-    ...             locale='de_DE')
-    u'17:30 Uhr MESZ'
+    >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'),
+    ...             locale='fr_FR')
+    u'17:30:00 HEC'
     >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'),
     ...             locale='en')
     u"11 o'clock AM, Eastern Daylight Time"
@@ -474,7 +717,7 @@
             return self.format_frac_seconds(num)
         elif char == 'A':
             return self.format_milliseconds_in_day(num)
-        elif char in ('z', 'Z', 'v'):
+        elif char in ('z', 'Z', 'v', 'V'):
             return self.format_timezone(char, num)
         else:
             raise KeyError('Unsupported date/time field %r' % char)
@@ -538,37 +781,19 @@
         return self.format(msecs, num)
 
     def format_timezone(self, char, num):
-        if char in ('z', 'v'):
-            if hasattr(self.value.tzinfo, 'zone'):
-                zone = self.value.tzinfo.zone
-            else:
-                zone = self.value.tzinfo.tzname(self.value)
-
-            # Get the canonical time-zone code
-            zone = self.locale.zone_aliases.get(zone, zone)
-
-            # Try explicitly translated zone names first
-            display = self.locale.time_zones.get(zone)
-            if display:
-                if 'long' in display:
-                    width = {3: 'short', 4: 'long'}[max(3, num)]
-                    if char == 'v':
-                        dst = 'generic'
-                    else:
-                        dst = self.value.dst() and 'daylight' or 'standard'
-                    return display[width][dst]
-                elif 'city' in display:
-                    return display['city']
-
-            else:
-                return zone.split('/', 1)[1]
-
+        width = {3: 'short', 4: 'long'}[max(3, num)]
+        if char == 'z':
+            return get_timezone_name(self.value, width, locale=self.locale)
         elif char == 'Z':
-            offset = self.value.utcoffset()
-            seconds = offset.days * 24 * 60 * 60 + offset.seconds
-            hours, seconds = divmod(seconds, 3600)
-            pattern = {3: '%+03d%02d', 4: 'GMT %+03d:%02d'}[max(3, num)]
-            return pattern % (hours, seconds // 60)
+            return get_timezone_gmt(self.value, width)
+        elif char == 'v':
+            return get_timezone_name(self.value.tzinfo, width,
+                                     locale=self.locale)
+        elif char == 'V':
+            if num == 1:
+                return get_timezone_name(self.value.tzinfo, width,
+                                         uncommon=True, locale=self.locale)
+            return get_timezone_location(self.value.tzinfo, locale=self.locale)
 
     def format(self, value, length):
         return ('%%0%dd' % length) % value
@@ -586,7 +811,7 @@
     'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2],             # hour
     'm': [1, 2],                                                    # minute
     's': [1, 2], 'S': None, 'A': None,                              # second
-    'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4]               # zone
+    'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4]  # zone
 }
 
 def parse_pattern(pattern):
--- a/babel/localedata.py
+++ b/babel/localedata.py
@@ -115,7 +115,7 @@
     :param dict2: the dictionary containing the data that should be merged
     """
     for key, value in dict2.items():
-        if value:
+        if value is not None:
             if type(value) is dict:
                 dict1[key] = dict1.get(key, {}).copy()
                 merge(dict1[key], value)
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -127,7 +127,7 @@
     >>> format_currency(1099.98, 'USD', locale='en_US')
     u'$1,099.98'
     >>> format_currency(1099.98, 'USD', locale='es_CO')
-    u'US$1.099,98'
+    u'US$ 1.099,98'
     >>> format_currency(1099.98, 'EUR', locale='de_DE')
     u'1.099,98 \\u20ac'
     
@@ -156,7 +156,7 @@
     >>> format_percent(25.1234, locale='en_US')
     u'2,512%'
     >>> format_percent(25.1234, locale='sv_SE')
-    u'2\\xa0512 %'
+    u'2\\xa0512\\xa0%'
 
     The format pattern can also be specified explicitly:
     
--- a/babel/tests/dates.py
+++ b/babel/tests/dates.py
@@ -111,19 +111,37 @@
         tz = timezone('Europe/Berlin')
         t = time(15, 30, tzinfo=tz)
         fmt = dates.DateTimeFormat(t, locale='de_DE')
-        self.assertEqual('GMT +01:00', fmt['ZZZZ'])
+        self.assertEqual('GMT+01:00', fmt['ZZZZ'])
+
+    def test_timezone_no_uncommon(self):
+        tz = timezone('Europe/Paris')
+        dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+        fmt = dates.DateTimeFormat(dt, locale='fr_CA')
+        self.assertEqual('France', fmt['v'])
+
+    def test_timezone_with_uncommon(self):
+        tz = timezone('Europe/Paris')
+        dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+        fmt = dates.DateTimeFormat(dt, locale='fr_CA')
+        self.assertEqual('HEC', fmt['V'])
+
+    def test_timezone_location_format(self):
+        tz = timezone('Europe/Paris')
+        dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+        fmt = dates.DateTimeFormat(dt, locale='fr_FR')
+        self.assertEqual('France', fmt['VVVV'])
 
     def test_timezone_walltime_short(self):
         tz = timezone('Europe/Paris')
         t = time(15, 30, tzinfo=tz)
-        fmt = dates.DateTimeFormat(t, locale='en_US')
-        self.assertEqual('CET', fmt['v'])
+        fmt = dates.DateTimeFormat(t, locale='fr_FR')
+        self.assertEqual('HEC', fmt['v'])
 
     def test_timezone_walltime_long(self):
         tz = timezone('Europe/Paris')
         t = time(15, 30, tzinfo=tz)
-        fmt = dates.DateTimeFormat(t, locale='en_US')
-        self.assertEqual('Central European Time', fmt['vvvv'])
+        fmt = dates.DateTimeFormat(t, locale='fr_FR')
+        self.assertEqual(u'Heure de l’Europe centrale', fmt['vvvv'])
 
 
 class FormatDateTestCase(unittest.TestCase):
--- a/doc/dates.txt
+++ b/doc/dates.txt
@@ -190,6 +190,10 @@
   |          +--------+--------------------------------------------------------+
   |          | ``v``  | Use one letter for short wall (generic) time, four for |
   |          |        | long wall time.                                        |
+  |          +--------+--------------------------------------------------------+
+  |          | ``V``  | Same as ``z``, except that timezone abbreviations      |
+  |          |        | should be used regardless of whether they are in       |
+  |          |        | common use by the locale.                              |
   +----------+--------+--------------------------------------------------------+
 
 
--- a/scripts/dump_data.py
+++ b/scripts/dump_data.py
@@ -17,4 +17,7 @@
 
 from babel.localedata import load
 
-pprint(load(sys.argv[1]))
+if len(sys.argv) > 2:
+    pprint(load(sys.argv[1]).get(sys.argv[2]))
+else:
+    pprint(load(sys.argv[1]))
new file mode 100755
--- /dev/null
+++ b/scripts/dump_global.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import os
+import pickle
+from pprint import pprint
+import sys
+
+import babel
+
+dirname = os.path.join(os.path.dirname(babel.__file__))
+filename = os.path.join(dirname, 'global.dat')
+fileobj = open(filename, 'rb')
+try:
+    data = pickle.load(fileobj)
+finally:
+    fileobj.close()
+
+if len(sys.argv) > 1:
+    pprint(data.get(sys.argv[1]))
+else:
+    pprint(data)
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -51,10 +51,30 @@
 
     srcdir = args[0]
     destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
-                           '..', 'babel', 'localedata')
+                           '..', 'babel')
 
     sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
 
+    # import global data from the supplemental files
+    global_data = {}
+
+    territory_zones = global_data.setdefault('territory_zones', {})
+    zone_aliases = global_data.setdefault('zone_aliases', {})
+    zone_territories = global_data.setdefault('zone_territories', {})
+    for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
+        tzid = elem.attrib['type']
+        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+        zone_territories[tzid] = elem.attrib['territory']
+        if 'aliases' in elem.attrib:
+            for alias in elem.attrib['aliases'].split():
+                zone_aliases[alias] = tzid
+
+    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
+    try:
+        pickle.dump(global_data, outfile, 2)
+    finally:
+        outfile.close()
+
     # build a territory containment mapping for inheritance
     regions = {}
     for elem in sup.findall('//territoryContainment/group'):
@@ -76,8 +96,6 @@
     filenames.sort(lambda a,b: len(a)-len(b))
     filenames.insert(0, 'root.xml')
 
-    dicts = {}
-
     for filename in filenames:
         print>>sys.stderr, 'Processing input file %r' % filename
         stem, ext = os.path.splitext(filename)
@@ -154,6 +172,21 @@
             if territory in territories or any([r in territories for r in regions]):
                 week_data['weekend_end'] = weekdays[elem.attrib['day']]
 
+        zone_formats = data.setdefault('zone_formats', {})
+        for elem in tree.findall('//timeZoneNames/gmtFormat'):
+            if 'draft' not in elem.attrib:
+                zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
+                break
+        for elem in tree.findall('//timeZoneNames/regionFormat'):
+            if 'draft' not in elem.attrib:
+                zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
+                break
+        for elem in tree.findall('//timeZoneNames/fallbackFormat'):
+            if 'draft' not in elem.attrib:
+                zone_formats['fallback'] = unicode(elem.text) \
+                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+                break
+
         time_zones = data.setdefault('time_zones', {})
         for elem in tree.findall('//timeZoneNames/zone'):
             info = {}
@@ -164,15 +197,23 @@
                 info.setdefault('long', {})[child.tag] = unicode(child.text)
             for child in elem.findall('short/*'):
                 info.setdefault('short', {})[child.tag] = unicode(child.text)
+            for child in elem.findall('usesMetazone'):
+                if 'to' not in child.attrib: # FIXME: support old mappings
+                    info['use_metazone'] = child.attrib['mzone']
             time_zones[elem.attrib['type']] = info
 
-        zone_aliases = data.setdefault('zone_aliases', {})
-        if stem == 'root':
-            for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
-                if 'aliases' in elem.attrib:
-                    canonical_id = elem.attrib['type']
-                    for alias in elem.attrib['aliases'].split():
-                        zone_aliases[alias] = canonical_id
+        meta_zones = data.setdefault('meta_zones', {})
+        for elem in tree.findall('//timeZoneNames/metazone'):
+            info = {}
+            city = elem.findtext('exemplarCity')
+            if city:
+                info['city'] = unicode(city)
+            for child in elem.findall('long/*'):
+                info.setdefault('long', {})[child.tag] = unicode(child.text)
+            for child in elem.findall('short/*'):
+                info.setdefault('short', {})[child.tag] = unicode(child.text)
+            info['common'] = elem.findtext('commonlyUsed') == 'true'
+            meta_zones[elem.attrib['type']] = info
 
         for calendar in tree.findall('//calendars/calendar'):
             if calendar.attrib['type'] != 'gregorian':
@@ -212,7 +253,11 @@
 
             eras = data.setdefault('eras', {})
             for width in calendar.findall('eras/*'):
-                ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
+                ewidth = {
+                    'eraAbbr': 'abbreviated',
+                    'eraNames': 'wide',
+                    'eraNarrow': 'narrow',
+                }[width.tag]
                 widths = eras.setdefault(ewidth, {})
                 for elem in width.findall('era'):
                     if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
@@ -304,8 +349,7 @@
             if symbol:
                 currency_symbols[elem.attrib['type']] = unicode(symbol)
 
-        dicts[stem] = data
-        outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
+        outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
         try:
             pickle.dump(data, outfile, 2)
         finally:
--- a/setup.py
+++ b/setup.py
@@ -130,7 +130,7 @@
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
     packages = ['babel', 'babel.messages'],
-    package_data = {'babel': ['localedata/*.dat']},
+    package_data = {'babel': ['global.dat', 'localedata/*.dat']},
     test_suite = 'babel.tests.suite',
 
     entry_points = """
Copyright (C) 2012-2017 Edgewall Software