# HG changeset patch # User cmlenz # Date 1185971540 0 # Node ID bc22f5aef216bead7f6bef5d146f954acc44dad6 # Parent 0e5c3377683307b2b6795df49ab10d469e863f78 Upgraded to CLDR 1.5 and improved timezone formatting. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -21,6 +21,8 @@ for multi-line function calls, and other small fixes (tickets #38 and #39). * Improved support for detecting Python string formatting fields in message strings (ticket #57). + * CLDR upgraded to the 1.5 release. + * Improved timezone formatting. Version 0.8.1 diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ +include babel/global.dat include babel/localedata/*.dat include doc/api/*.* include doc/*.html diff --git a/babel/core.py b/babel/core.py --- a/babel/core.py +++ b/babel/core.py @@ -14,6 +14,7 @@ """Core locale representation and locale data access.""" import os +import pickle from babel import localedata @@ -21,6 +22,33 @@ 'parse_locale'] __docformat__ = 'restructuredtext en' +_global_data = None + +def get_global(key): + """ + Return the dictionary for the given key in the global data. + + The global data is stored in the ``babel/global.dat`` file and contains + information independent of individual locales. + + >>> get_global('zone_aliases')['UTC'] + 'Etc/GMT' + >>> get_global('zone_territories')['Europe/Berlin'] + 'DE' + + :since: version 0.9 + """ + global _global_data + if _global_data is None: + dirname = os.path.join(os.path.dirname(__file__)) + filename = os.path.join(dirname, 'global.dat') + fileobj = open(filename, 'rb') + try: + _global_data = pickle.load(fileobj) + finally: + fileobj.close() + return _global_data.get(key, {}) + class UnknownLocaleError(Exception): """Exception thrown when a locale is requested for which no locale data @@ -305,10 +333,10 @@ currency_symbols = property(currency_symbols, doc="""\ Mapping of currency codes to symbols. - >>> Locale('en').currency_symbols['USD'] - u'US$' >>> Locale('en', 'US').currency_symbols['USD'] u'$' + >>> Locale('es', 'CO').currency_symbols['USD'] + u'US$' :type: `dict` """) @@ -432,25 +460,41 @@ time_zones = property(time_zones, doc="""\ Locale display names for time zones. - >>> Locale('en', 'US').time_zones['America/Los_Angeles']['long']['standard'] - u'Pacific Standard Time' - >>> Locale('en', 'US').time_zones['Europe/Dublin']['city'] - u'Dublin' + >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] + u'British Summer Time' + >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] + u'St. John\u2019s' :type: `dict` """) - def zone_aliases(self): - return self._data['zone_aliases'] - zone_aliases = property(zone_aliases, doc="""\ - Mapping of time zone aliases to their respective canonical identifer. + def meta_zones(self): + return self._data['meta_zones'] + meta_zones = property(meta_zones, doc="""\ + Locale display names for meta time zones. - >>> Locale('en').zone_aliases['UTC'] - 'Etc/GMT' + Meta time zones are basically groups of different Olson time zones that + have the same GMT offset and daylight savings time. + + >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] + u'Central European Summer Time' :type: `dict` - :note: this doesn't really belong here, as it does not change between - locales + :since: version 0.9 + """) + + def zone_formats(self): + return self._data['zone_formats'] + zone_formats = property(zone_formats, doc=r"""\ + Patterns related to the formatting of time zones. + + >>> Locale('en', 'US').zone_formats['fallback'] + u'%(1)s (%(0)s)' + >>> Locale('pt', 'BR').zone_formats['region'] + u'Hor\xe1rio %s' + + :type: `dict` + :since: version 0.9 """) def first_week_day(self): diff --git a/babel/dates.py b/babel/dates.py --- a/babel/dates.py +++ b/babel/dates.py @@ -24,11 +24,11 @@ from datetime import date, datetime, time, timedelta, tzinfo import re -from babel.core import default_locale, Locale +from babel.core import default_locale, get_global, Locale from babel.util import UTC -__all__ = ['format_date', 'format_datetime', 'format_time', 'parse_date', - 'parse_datetime', 'parse_time'] +__all__ = ['format_date', 'format_datetime', 'format_time', + 'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time'] __docformat__ = 'restructuredtext en' LC_TIME = default_locale('LC_TIME') @@ -110,7 +110,7 @@ >>> get_era_names('abbreviated', locale='de_DE')[1] u'n. Chr.' - :param width: the width to use, either "wide" or "abbreviated" + :param width: the width to use, either "wide", "abbreviated", or "narrow" :param locale: the `Locale` object, or a locale string :return: the dictionary of era names :rtype: `dict` @@ -159,7 +159,7 @@ >>> get_time_format(locale='en_US') >>> get_time_format('full', locale='de_DE') - + :param format: the format to use, one of "full", "long", "medium", or "short" @@ -169,6 +169,249 @@ """ return Locale.parse(locale).time_formats[format] +def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME): + """Return the timezone associated with the given `datetime` object formatted + as string indicating the offset from GMT. + + >>> dt = datetime(2007, 4, 1, 15, 30) + >>> get_timezone_gmt(dt) + u'GMT+00:00' + + >>> from pytz import timezone + >>> tz = timezone('America/Los_Angeles') + >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz) + >>> get_timezone_gmt(dt) + u'GMT-08:00' + >>> get_timezone_gmt(dt, 'short') + u'-0800' + + The long format depends on the locale, for example in France a different + string is used for GMT: + + >>> get_timezone_gmt(dt, 'long', locale='fr_FR') + u'HMG-08:00' + + :param dt: the ``datetime`` object; if `None`, the current date and time are + used + :param width: either "long" or "short" + :param locale: the `Locale` object, or a locale string + :return: the GMT offset representation of the timezone + :rtype: `unicode` + :since: version 0.9 + """ + if datetime is None: + datetime = datetime_.now() + elif isinstance(datetime, (int, long)): + datetime = datetime_.fromtimestamp(datetime).time() + if datetime.tzinfo is None: + datetime = datetime.replace(tzinfo=UTC) + locale = Locale.parse(locale) + + offset = datetime.utcoffset() + seconds = offset.days * 24 * 60 * 60 + offset.seconds + hours, seconds = divmod(seconds, 3600) + if width == 'short': + pattern = u'%+03d%02d' + else: + pattern = locale.zone_formats['gmt'] % '%+03d:%02d' + return pattern % (hours, seconds // 60) + +def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME): + """Return a representation of the given timezone using "location format". + + The result depends on both the local display name of the country and the + city assocaited with the time zone: + + >>> from pytz import timezone + >>> tz = timezone('America/St_Johns') + >>> get_timezone_location(tz, locale='de_DE') + u"Kanada (St. John's)" + >>> tz = timezone('America/Mexico_City') + >>> get_timezone_location(tz, locale='de_DE') + u'Mexiko (Mexiko-Stadt)' + + If the timezone is associated with a country that uses only a single + timezone, just the localized country name is returned: + + >>> tz = timezone('Europe/Berlin') + >>> get_timezone_name(tz, locale='de_DE') + u'Deutschland' + + :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines + the timezone; if `None`, the current date and time in + UTC is assumed + :param locale: the `Locale` object, or a locale string + :return: the localized timezone name using location format + :rtype: `unicode` + :since: version 0.9 + """ + if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)): + dt = None + tzinfo = UTC + elif isinstance(dt_or_tzinfo, (datetime, time)): + dt = dt_or_tzinfo + if dt.tzinfo is not None: + tzinfo = dt.tzinfo + else: + tzinfo = UTC + else: + dt = None + tzinfo = dt_or_tzinfo + locale = Locale.parse(locale) + + if hasattr(tzinfo, 'zone'): + zone = tzinfo.zone + else: + zone = tzinfo.tzname(dt or datetime.utcnow()) + + # Get the canonical time-zone code + zone = get_global('zone_aliases').get(zone, zone) + + metainfo = {} + info = locale.time_zones.get(zone, {}) + if 'use_metazone' in info: + metainfo = locale.meta_zones.get(info['use_metazone'], {}) + + # Otherwise, if there is only one timezone for the country, return the + # localized country name + region_format = locale.zone_formats['region'] + territory = get_global('zone_territories').get(zone) + territory_name = locale.territories[territory] + if territory and len(get_global('territory_zones')[territory]) == 1: + return region_format % (territory_name) + + # Otherwise, include the city in the output + fallback_format = locale.zone_formats['fallback'] + if 'city' in info: + city_name = info['city'] + elif 'city' in metainfo: + city_name = metainfo['city'] + elif '/' in zone: + city_name = zone.split('/', 1)[1].replace('_', ' ') + else: + city_name = zone.replace('_', ' ') + + return region_format % (fallback_format % { + '0': city_name, + '1': territory_name + }) + +def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, + locale=LC_TIME): + r"""Return the localized display name for the given timezone. The timezone + may be specified using a ``datetime`` or `tzinfo` object. + + >>> from pytz import timezone + >>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles')) + >>> get_timezone_name(dt, locale='en_US') + u'Pacific Standard Time' + >>> get_timezone_name(dt, width='short', locale='en_US') + u'PST' + + If this function gets passed only a `tzinfo` object and no concrete + `datetime`, the returned display name is indenpendent of daylight savings + time. This can be used for example for selecting timezones, or to set the + time of events that recur across DST changes: + + >>> tz = timezone('America/Los_Angeles') + >>> get_timezone_name(tz, locale='en_US') + u'Pacific Time' + >>> get_timezone_name(tz, 'short', locale='en_US') + u'PT' + + If no localized display name for the timezone is available, and the timezone + is associated with a country that uses only a single timezone, the name of + that country is returned, formatted according to the locale: + + >>> tz = timezone('Europe/Berlin') + >>> get_timezone_name(tz, locale='de_DE') + u'Deutschland' + >>> get_timezone_name(tz, locale='pt_BR') + u'Hor\xe1rio Alemanha' + + On the other hand, if the country uses multiple timezones, the city is also + included in the representation: + + >>> tz = timezone('America/St_Johns') + >>> get_timezone_name(tz, locale='de_DE') + u"Kanada (St. John's)" + + The `uncommon` parameter can be set to `True` to enable the use of timezone + representations that are not commonly used by the requested locale. For + example, while in frensh the central europian timezone is usually + abbreviated as "HEC", in Canadian frensh, this abbreviation is not in common + use, so a generic name would be chosen by default: + + >>> tz = timezone('Europe/Paris') + >>> get_timezone_name(tz, 'short', locale='fr_CA') + u'France' + >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA') + u'HEC' + + :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines + the timezone; if a ``tzinfo`` object is used, the + resulting display name will be generic, i.e. + independent of daylight savings time; if `None`, the + current date in UTC is assumed + :param width: either "long" or "short" + :param uncommon: whether even uncommon timezone abbreviations should be used + :param locale: the `Locale` object, or a locale string + :return: the timezone display name + :rtype: `unicode` + :since: version 0.9 + :see: `LDML Appendix J: Time Zone Display Names + `_ + """ + if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)): + dt = None + tzinfo = UTC + elif isinstance(dt_or_tzinfo, (datetime, time)): + dt = dt_or_tzinfo + if dt.tzinfo is not None: + tzinfo = dt.tzinfo + else: + tzinfo = UTC + else: + dt = None + tzinfo = dt_or_tzinfo + locale = Locale.parse(locale) + + if hasattr(tzinfo, 'zone'): + zone = tzinfo.zone + else: + zone = tzinfo.tzname(dt or datetime.utcnow()) + + # Get the canonical time-zone code + zone = get_global('zone_aliases').get(zone, zone) + + metainfo = {} + info = locale.time_zones.get(zone, {}) + # Try explicitly translated zone names first + if width in info: + if dt is None: + field = 'generic' + else: + field = tzinfo.dst(dt) and 'daylight' or 'standard' + if field in info[width]: + return info[width][field] + + if 'use_metazone' in info: + metainfo = locale.meta_zones.get(info['use_metazone'], {}) + if width in metainfo and (uncommon or metainfo.get('common')): + if dt is None: + field = 'generic' + else: + field = tzinfo.dst(dt) and 'daylight' or 'standard' + if field in metainfo[width]: + return metainfo[width][field] + + # If we have a concrete datetime, we assume that the result can't be + # independent of daylight savings time, so we return the GMT offset + if dt is not None: + return get_timezone_gmt(time, width=width, locale=locale) + + return get_timezone_location(dt_or_tzinfo, locale=locale) + def format_date(date=None, format='medium', locale=LC_TIME): """Return a date formatted according to the given pattern. @@ -219,9 +462,9 @@ ``pytz`` package is needed to explicitly specify the time-zone: >>> from pytz import timezone - >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Berlin'), - ... locale='de_DE') - u'Sonntag, 1. April 2007 17:30 Uhr MESZ' + >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'), + ... locale='fr_FR') + u'dimanche 1 avril 2007 17:30:00 HEC' >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", ... tzinfo=timezone('US/Eastern'), locale='en') u'2007.04.01 AD at 11:30:00 EDT' @@ -276,9 +519,9 @@ >>> from pytz import timezone >>> t = time(15, 30) - >>> format_time(t, format='full', tzinfo=timezone('Europe/Berlin'), - ... locale='de_DE') - u'17:30 Uhr MESZ' + >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'), + ... locale='fr_FR') + u'17:30:00 HEC' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'), ... locale='en') u"11 o'clock AM, Eastern Daylight Time" @@ -474,7 +717,7 @@ return self.format_frac_seconds(num) elif char == 'A': return self.format_milliseconds_in_day(num) - elif char in ('z', 'Z', 'v'): + elif char in ('z', 'Z', 'v', 'V'): return self.format_timezone(char, num) else: raise KeyError('Unsupported date/time field %r' % char) @@ -538,37 +781,19 @@ return self.format(msecs, num) def format_timezone(self, char, num): - if char in ('z', 'v'): - if hasattr(self.value.tzinfo, 'zone'): - zone = self.value.tzinfo.zone - else: - zone = self.value.tzinfo.tzname(self.value) - - # Get the canonical time-zone code - zone = self.locale.zone_aliases.get(zone, zone) - - # Try explicitly translated zone names first - display = self.locale.time_zones.get(zone) - if display: - if 'long' in display: - width = {3: 'short', 4: 'long'}[max(3, num)] - if char == 'v': - dst = 'generic' - else: - dst = self.value.dst() and 'daylight' or 'standard' - return display[width][dst] - elif 'city' in display: - return display['city'] - - else: - return zone.split('/', 1)[1] - + width = {3: 'short', 4: 'long'}[max(3, num)] + if char == 'z': + return get_timezone_name(self.value, width, locale=self.locale) elif char == 'Z': - offset = self.value.utcoffset() - seconds = offset.days * 24 * 60 * 60 + offset.seconds - hours, seconds = divmod(seconds, 3600) - pattern = {3: '%+03d%02d', 4: 'GMT %+03d:%02d'}[max(3, num)] - return pattern % (hours, seconds // 60) + return get_timezone_gmt(self.value, width) + elif char == 'v': + return get_timezone_name(self.value.tzinfo, width, + locale=self.locale) + elif char == 'V': + if num == 1: + return get_timezone_name(self.value.tzinfo, width, + uncommon=True, locale=self.locale) + return get_timezone_location(self.value.tzinfo, locale=self.locale) def format(self, value, length): return ('%%0%dd' % length) % value @@ -586,7 +811,7 @@ 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour 'm': [1, 2], # minute 's': [1, 2], 'S': None, 'A': None, # second - 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4] # zone + 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4] # zone } def parse_pattern(pattern): diff --git a/babel/localedata.py b/babel/localedata.py --- a/babel/localedata.py +++ b/babel/localedata.py @@ -115,7 +115,7 @@ :param dict2: the dictionary containing the data that should be merged """ for key, value in dict2.items(): - if value: + if value is not None: if type(value) is dict: dict1[key] = dict1.get(key, {}).copy() merge(dict1[key], value) diff --git a/babel/numbers.py b/babel/numbers.py --- a/babel/numbers.py +++ b/babel/numbers.py @@ -127,7 +127,7 @@ >>> format_currency(1099.98, 'USD', locale='en_US') u'$1,099.98' >>> format_currency(1099.98, 'USD', locale='es_CO') - u'US$1.099,98' + u'US$ 1.099,98' >>> format_currency(1099.98, 'EUR', locale='de_DE') u'1.099,98 \\u20ac' @@ -156,7 +156,7 @@ >>> format_percent(25.1234, locale='en_US') u'2,512%' >>> format_percent(25.1234, locale='sv_SE') - u'2\\xa0512 %' + u'2\\xa0512\\xa0%' The format pattern can also be specified explicitly: diff --git a/babel/tests/dates.py b/babel/tests/dates.py --- a/babel/tests/dates.py +++ b/babel/tests/dates.py @@ -111,19 +111,37 @@ tz = timezone('Europe/Berlin') t = time(15, 30, tzinfo=tz) fmt = dates.DateTimeFormat(t, locale='de_DE') - self.assertEqual('GMT +01:00', fmt['ZZZZ']) + self.assertEqual('GMT+01:00', fmt['ZZZZ']) + + def test_timezone_no_uncommon(self): + tz = timezone('Europe/Paris') + dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz) + fmt = dates.DateTimeFormat(dt, locale='fr_CA') + self.assertEqual('France', fmt['v']) + + def test_timezone_with_uncommon(self): + tz = timezone('Europe/Paris') + dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz) + fmt = dates.DateTimeFormat(dt, locale='fr_CA') + self.assertEqual('HEC', fmt['V']) + + def test_timezone_location_format(self): + tz = timezone('Europe/Paris') + dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz) + fmt = dates.DateTimeFormat(dt, locale='fr_FR') + self.assertEqual('France', fmt['VVVV']) def test_timezone_walltime_short(self): tz = timezone('Europe/Paris') t = time(15, 30, tzinfo=tz) - fmt = dates.DateTimeFormat(t, locale='en_US') - self.assertEqual('CET', fmt['v']) + fmt = dates.DateTimeFormat(t, locale='fr_FR') + self.assertEqual('HEC', fmt['v']) def test_timezone_walltime_long(self): tz = timezone('Europe/Paris') t = time(15, 30, tzinfo=tz) - fmt = dates.DateTimeFormat(t, locale='en_US') - self.assertEqual('Central European Time', fmt['vvvv']) + fmt = dates.DateTimeFormat(t, locale='fr_FR') + self.assertEqual(u'Heure de l’Europe centrale', fmt['vvvv']) class FormatDateTestCase(unittest.TestCase): diff --git a/doc/dates.txt b/doc/dates.txt --- a/doc/dates.txt +++ b/doc/dates.txt @@ -190,6 +190,10 @@ | +--------+--------------------------------------------------------+ | | ``v`` | Use one letter for short wall (generic) time, four for | | | | long wall time. | + | +--------+--------------------------------------------------------+ + | | ``V`` | Same as ``z``, except that timezone abbreviations | + | | | should be used regardless of whether they are in | + | | | common use by the locale. | +----------+--------+--------------------------------------------------------+ diff --git a/scripts/dump_data.py b/scripts/dump_data.py --- a/scripts/dump_data.py +++ b/scripts/dump_data.py @@ -17,4 +17,7 @@ from babel.localedata import load -pprint(load(sys.argv[1])) +if len(sys.argv) > 2: + pprint(load(sys.argv[1]).get(sys.argv[2])) +else: + pprint(load(sys.argv[1])) diff --git a/scripts/dump_global.py b/scripts/dump_global.py new file mode 100755 --- /dev/null +++ b/scripts/dump_global.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +import os +import pickle +from pprint import pprint +import sys + +import babel + +dirname = os.path.join(os.path.dirname(babel.__file__)) +filename = os.path.join(dirname, 'global.dat') +fileobj = open(filename, 'rb') +try: + data = pickle.load(fileobj) +finally: + fileobj.close() + +if len(sys.argv) > 1: + pprint(data.get(sys.argv[1])) +else: + pprint(data) diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -51,10 +51,30 @@ srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), - '..', 'babel', 'localedata') + '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) + # import global data from the supplemental files + global_data = {} + + territory_zones = global_data.setdefault('territory_zones', {}) + zone_aliases = global_data.setdefault('zone_aliases', {}) + zone_territories = global_data.setdefault('zone_territories', {}) + for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): + tzid = elem.attrib['type'] + territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) + zone_territories[tzid] = elem.attrib['territory'] + if 'aliases' in elem.attrib: + for alias in elem.attrib['aliases'].split(): + zone_aliases[alias] = tzid + + outfile = open(os.path.join(destdir, 'global.dat'), 'wb') + try: + pickle.dump(global_data, outfile, 2) + finally: + outfile.close() + # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('//territoryContainment/group'): @@ -76,8 +96,6 @@ filenames.sort(lambda a,b: len(a)-len(b)) filenames.insert(0, 'root.xml') - dicts = {} - for filename in filenames: print>>sys.stderr, 'Processing input file %r' % filename stem, ext = os.path.splitext(filename) @@ -154,6 +172,21 @@ if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] + zone_formats = data.setdefault('zone_formats', {}) + for elem in tree.findall('//timeZoneNames/gmtFormat'): + if 'draft' not in elem.attrib: + zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') + break + for elem in tree.findall('//timeZoneNames/regionFormat'): + if 'draft' not in elem.attrib: + zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') + break + for elem in tree.findall('//timeZoneNames/fallbackFormat'): + if 'draft' not in elem.attrib: + zone_formats['fallback'] = unicode(elem.text) \ + .replace('{0}', '%(0)s').replace('{1}', '%(1)s') + break + time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('//timeZoneNames/zone'): info = {} @@ -164,15 +197,23 @@ info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) + for child in elem.findall('usesMetazone'): + if 'to' not in child.attrib: # FIXME: support old mappings + info['use_metazone'] = child.attrib['mzone'] time_zones[elem.attrib['type']] = info - zone_aliases = data.setdefault('zone_aliases', {}) - if stem == 'root': - for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): - if 'aliases' in elem.attrib: - canonical_id = elem.attrib['type'] - for alias in elem.attrib['aliases'].split(): - zone_aliases[alias] = canonical_id + meta_zones = data.setdefault('meta_zones', {}) + for elem in tree.findall('//timeZoneNames/metazone'): + info = {} + city = elem.findtext('exemplarCity') + if city: + info['city'] = unicode(city) + for child in elem.findall('long/*'): + info.setdefault('long', {})[child.tag] = unicode(child.text) + for child in elem.findall('short/*'): + info.setdefault('short', {})[child.tag] = unicode(child.text) + info['common'] = elem.findtext('commonlyUsed') == 'true' + meta_zones[elem.attrib['type']] = info for calendar in tree.findall('//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': @@ -212,7 +253,11 @@ eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): - ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] + ewidth = { + 'eraAbbr': 'abbreviated', + 'eraNames': 'wide', + 'eraNarrow': 'narrow', + }[width.tag] widths = eras.setdefault(ewidth, {}) for elem in width.findall('era'): if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: @@ -304,8 +349,7 @@ if symbol: currency_symbols[elem.attrib['type']] = unicode(symbol) - dicts[stem] = data - outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') + outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') try: pickle.dump(data, outfile, 2) finally: diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -130,7 +130,7 @@ 'Topic :: Software Development :: Libraries :: Python Modules', ], packages = ['babel', 'babel.messages'], - package_data = {'babel': ['localedata/*.dat']}, + package_data = {'babel': ['global.dat', 'localedata/*.dat']}, test_suite = 'babel.tests.suite', entry_points = """