babel/old/mirror: scripts/import

comparison scripts/import_cldr.py @ 235:d0cd235ede46

Upgraded to CLDR 1.5 and improved timezone formatting.

author	cmlenz
date	Wed, 01 Aug 2007 12:32:20 +0000
parents	a72de8971819
children	c22f292731be

comparison

equal deleted inserted replaced

-:541b6d630575
+:d0cd235ede46
 if len(args) != 1:
 parser.error('incorrect number of arguments')
 srcdir = args[0]
 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
-'..', 'babel', 'localedata')
+'..', 'babel')
 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+# import global data from the supplemental files
+global_data = {}
+territory_zones = global_data.setdefault('territory_zones', {})
+zone_aliases = global_data.setdefault('zone_aliases', {})
+zone_territories = global_data.setdefault('zone_territories', {})
+for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
+tzid = elem.attrib['type']
+territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+zone_territories[tzid] = elem.attrib['territory']
+if 'aliases' in elem.attrib:
+for alias in elem.attrib['aliases'].split():
+zone_aliases[alias] = tzid
+outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
+try:
+pickle.dump(global_data, outfile, 2)
+finally:
+outfile.close()
 # build a territory containment mapping for inheritance
 regions = {}
 for elem in sup.findall('//territoryContainment/group'):
 regions[elem.attrib['type']] = elem.attrib['contains'].split()
 filenames = os.listdir(os.path.join(srcdir, 'main'))
 filenames.remove('root.xml')
 filenames.sort(lambda a,b: len(a)-len(b))
 filenames.insert(0, 'root.xml')
-dicts = {}
 for filename in filenames:
 print>>sys.stderr, 'Processing input file %r' % filename
 stem, ext = os.path.splitext(filename)
 if ext != '.xml':
 continue
 for elem in supelem.findall('weekendEnd'):
 territories = elem.attrib['territories'].split()
 if territory in territories or any([r in territories for r in regions]):
 week_data['weekend_end'] = weekdays[elem.attrib['day']]
+zone_formats = data.setdefault('zone_formats', {})
+for elem in tree.findall('//timeZoneNames/gmtFormat'):
+if 'draft' not in elem.attrib:
+zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
+break
+for elem in tree.findall('//timeZoneNames/regionFormat'):
+if 'draft' not in elem.attrib:
+zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
+break
+for elem in tree.findall('//timeZoneNames/fallbackFormat'):
+if 'draft' not in elem.attrib:
+zone_formats['fallback'] = unicode(elem.text) \
+.replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+break
 time_zones = data.setdefault('time_zones', {})
 for elem in tree.findall('//timeZoneNames/zone'):
 info = {}
 city = elem.findtext('exemplarCity')
 info['city'] = unicode(city)
 for child in elem.findall('long/*'):
 info.setdefault('long', {})[child.tag] = unicode(child.text)
 for child in elem.findall('short/*'):
 info.setdefault('short', {})[child.tag] = unicode(child.text)
+for child in elem.findall('usesMetazone'):
+if 'to' not in child.attrib: # FIXME: support old mappings
+info['use_metazone'] = child.attrib['mzone']
 time_zones[elem.attrib['type']] = info
-zone_aliases = data.setdefault('zone_aliases', {})
+meta_zones = data.setdefault('meta_zones', {})
-if stem == 'root':
+for elem in tree.findall('//timeZoneNames/metazone'):
-for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
+info = {}
-if 'aliases' in elem.attrib:
+city = elem.findtext('exemplarCity')
-canonical_id = elem.attrib['type']
+if city:
-for alias in elem.attrib['aliases'].split():
+info['city'] = unicode(city)
-zone_aliases[alias] = canonical_id
+for child in elem.findall('long/*'):
+info.setdefault('long', {})[child.tag] = unicode(child.text)
+for child in elem.findall('short/*'):
+info.setdefault('short', {})[child.tag] = unicode(child.text)
+info['common'] = elem.findtext('commonlyUsed') == 'true'
+meta_zones[elem.attrib['type']] = info
 for calendar in tree.findall('//calendars/calendar'):
 if calendar.attrib['type'] != 'gregorian':
 # TODO: support other calendar types
 continue
 continue
 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
 eras = data.setdefault('eras', {})
 for width in calendar.findall('eras/*'):
-ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
+ewidth = {
+'eraAbbr': 'abbreviated',
+'eraNames': 'wide',
+'eraNarrow': 'narrow',
+}[width.tag]
 widths = eras.setdefault(ewidth, {})
 for elem in width.findall('era'):
 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
 continue
 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
 currency_names[elem.attrib['type']] = unicode(name)
 symbol = elem.findtext('symbol')
 if symbol:
 currency_symbols[elem.attrib['type']] = unicode(symbol)
-dicts[stem] = data
+outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
-outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
 try:
 pickle.dump(data, outfile, 2)
 finally:
 outfile.close()

Mercurial > babel > old > mirror

comparison scripts/import_cldr.py @ 235:d0cd235ede46