Mercurial > babel > old > mirror
diff scripts/import_cldr.py @ 235:d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
author | cmlenz |
---|---|
date | Wed, 01 Aug 2007 12:32:20 +0000 |
parents | a72de8971819 |
children | c22f292731be |
line wrap: on
line diff
--- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -51,10 +51,30 @@ srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), - '..', 'babel', 'localedata') + '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) + # import global data from the supplemental files + global_data = {} + + territory_zones = global_data.setdefault('territory_zones', {}) + zone_aliases = global_data.setdefault('zone_aliases', {}) + zone_territories = global_data.setdefault('zone_territories', {}) + for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): + tzid = elem.attrib['type'] + territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) + zone_territories[tzid] = elem.attrib['territory'] + if 'aliases' in elem.attrib: + for alias in elem.attrib['aliases'].split(): + zone_aliases[alias] = tzid + + outfile = open(os.path.join(destdir, 'global.dat'), 'wb') + try: + pickle.dump(global_data, outfile, 2) + finally: + outfile.close() + # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('//territoryContainment/group'): @@ -76,8 +96,6 @@ filenames.sort(lambda a,b: len(a)-len(b)) filenames.insert(0, 'root.xml') - dicts = {} - for filename in filenames: print>>sys.stderr, 'Processing input file %r' % filename stem, ext = os.path.splitext(filename) @@ -154,6 +172,21 @@ if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] + zone_formats = data.setdefault('zone_formats', {}) + for elem in tree.findall('//timeZoneNames/gmtFormat'): + if 'draft' not in elem.attrib: + zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') + break + for elem in tree.findall('//timeZoneNames/regionFormat'): + if 'draft' not in elem.attrib: + zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') + break + for elem in tree.findall('//timeZoneNames/fallbackFormat'): + if 'draft' not in elem.attrib: + zone_formats['fallback'] = unicode(elem.text) \ + .replace('{0}', '%(0)s').replace('{1}', '%(1)s') + break + time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('//timeZoneNames/zone'): info = {} @@ -164,15 +197,23 @@ info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) + for child in elem.findall('usesMetazone'): + if 'to' not in child.attrib: # FIXME: support old mappings + info['use_metazone'] = child.attrib['mzone'] time_zones[elem.attrib['type']] = info - zone_aliases = data.setdefault('zone_aliases', {}) - if stem == 'root': - for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): - if 'aliases' in elem.attrib: - canonical_id = elem.attrib['type'] - for alias in elem.attrib['aliases'].split(): - zone_aliases[alias] = canonical_id + meta_zones = data.setdefault('meta_zones', {}) + for elem in tree.findall('//timeZoneNames/metazone'): + info = {} + city = elem.findtext('exemplarCity') + if city: + info['city'] = unicode(city) + for child in elem.findall('long/*'): + info.setdefault('long', {})[child.tag] = unicode(child.text) + for child in elem.findall('short/*'): + info.setdefault('short', {})[child.tag] = unicode(child.text) + info['common'] = elem.findtext('commonlyUsed') == 'true' + meta_zones[elem.attrib['type']] = info for calendar in tree.findall('//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': @@ -212,7 +253,11 @@ eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): - ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] + ewidth = { + 'eraAbbr': 'abbreviated', + 'eraNames': 'wide', + 'eraNarrow': 'narrow', + }[width.tag] widths = eras.setdefault(ewidth, {}) for elem in width.findall('era'): if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: @@ -304,8 +349,7 @@ if symbol: currency_symbols[elem.attrib['type']] = unicode(symbol) - dicts[stem] = data - outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') + outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') try: pickle.dump(data, outfile, 2) finally: