comparison scripts/import_cldr.py @ 235:d0cd235ede46

Upgraded to CLDR 1.5 and improved timezone formatting.
author cmlenz
date Wed, 01 Aug 2007 12:32:20 +0000
parents a72de8971819
children c22f292731be
comparison
equal deleted inserted replaced
234:541b6d630575 235:d0cd235ede46
49 if len(args) != 1: 49 if len(args) != 1:
50 parser.error('incorrect number of arguments') 50 parser.error('incorrect number of arguments')
51 51
52 srcdir = args[0] 52 srcdir = args[0]
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), 53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
54 '..', 'babel', 'localedata') 54 '..', 'babel')
55 55
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) 56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
57
58 # import global data from the supplemental files
59 global_data = {}
60
61 territory_zones = global_data.setdefault('territory_zones', {})
62 zone_aliases = global_data.setdefault('zone_aliases', {})
63 zone_territories = global_data.setdefault('zone_territories', {})
64 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
65 tzid = elem.attrib['type']
66 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
67 zone_territories[tzid] = elem.attrib['territory']
68 if 'aliases' in elem.attrib:
69 for alias in elem.attrib['aliases'].split():
70 zone_aliases[alias] = tzid
71
72 outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
73 try:
74 pickle.dump(global_data, outfile, 2)
75 finally:
76 outfile.close()
57 77
58 # build a territory containment mapping for inheritance 78 # build a territory containment mapping for inheritance
59 regions = {} 79 regions = {}
60 for elem in sup.findall('//territoryContainment/group'): 80 for elem in sup.findall('//territoryContainment/group'):
61 regions[elem.attrib['type']] = elem.attrib['contains'].split() 81 regions[elem.attrib['type']] = elem.attrib['contains'].split()
74 filenames = os.listdir(os.path.join(srcdir, 'main')) 94 filenames = os.listdir(os.path.join(srcdir, 'main'))
75 filenames.remove('root.xml') 95 filenames.remove('root.xml')
76 filenames.sort(lambda a,b: len(a)-len(b)) 96 filenames.sort(lambda a,b: len(a)-len(b))
77 filenames.insert(0, 'root.xml') 97 filenames.insert(0, 'root.xml')
78 98
79 dicts = {}
80
81 for filename in filenames: 99 for filename in filenames:
82 print>>sys.stderr, 'Processing input file %r' % filename 100 print>>sys.stderr, 'Processing input file %r' % filename
83 stem, ext = os.path.splitext(filename) 101 stem, ext = os.path.splitext(filename)
84 if ext != '.xml': 102 if ext != '.xml':
85 continue 103 continue
151 169
152 for elem in supelem.findall('weekendEnd'): 170 for elem in supelem.findall('weekendEnd'):
153 territories = elem.attrib['territories'].split() 171 territories = elem.attrib['territories'].split()
154 if territory in territories or any([r in territories for r in regions]): 172 if territory in territories or any([r in territories for r in regions]):
155 week_data['weekend_end'] = weekdays[elem.attrib['day']] 173 week_data['weekend_end'] = weekdays[elem.attrib['day']]
174
175 zone_formats = data.setdefault('zone_formats', {})
176 for elem in tree.findall('//timeZoneNames/gmtFormat'):
177 if 'draft' not in elem.attrib:
178 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
179 break
180 for elem in tree.findall('//timeZoneNames/regionFormat'):
181 if 'draft' not in elem.attrib:
182 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
183 break
184 for elem in tree.findall('//timeZoneNames/fallbackFormat'):
185 if 'draft' not in elem.attrib:
186 zone_formats['fallback'] = unicode(elem.text) \
187 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
188 break
156 189
157 time_zones = data.setdefault('time_zones', {}) 190 time_zones = data.setdefault('time_zones', {})
158 for elem in tree.findall('//timeZoneNames/zone'): 191 for elem in tree.findall('//timeZoneNames/zone'):
159 info = {} 192 info = {}
160 city = elem.findtext('exemplarCity') 193 city = elem.findtext('exemplarCity')
162 info['city'] = unicode(city) 195 info['city'] = unicode(city)
163 for child in elem.findall('long/*'): 196 for child in elem.findall('long/*'):
164 info.setdefault('long', {})[child.tag] = unicode(child.text) 197 info.setdefault('long', {})[child.tag] = unicode(child.text)
165 for child in elem.findall('short/*'): 198 for child in elem.findall('short/*'):
166 info.setdefault('short', {})[child.tag] = unicode(child.text) 199 info.setdefault('short', {})[child.tag] = unicode(child.text)
200 for child in elem.findall('usesMetazone'):
201 if 'to' not in child.attrib: # FIXME: support old mappings
202 info['use_metazone'] = child.attrib['mzone']
167 time_zones[elem.attrib['type']] = info 203 time_zones[elem.attrib['type']] = info
168 204
169 zone_aliases = data.setdefault('zone_aliases', {}) 205 meta_zones = data.setdefault('meta_zones', {})
170 if stem == 'root': 206 for elem in tree.findall('//timeZoneNames/metazone'):
171 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): 207 info = {}
172 if 'aliases' in elem.attrib: 208 city = elem.findtext('exemplarCity')
173 canonical_id = elem.attrib['type'] 209 if city:
174 for alias in elem.attrib['aliases'].split(): 210 info['city'] = unicode(city)
175 zone_aliases[alias] = canonical_id 211 for child in elem.findall('long/*'):
212 info.setdefault('long', {})[child.tag] = unicode(child.text)
213 for child in elem.findall('short/*'):
214 info.setdefault('short', {})[child.tag] = unicode(child.text)
215 info['common'] = elem.findtext('commonlyUsed') == 'true'
216 meta_zones[elem.attrib['type']] = info
176 217
177 for calendar in tree.findall('//calendars/calendar'): 218 for calendar in tree.findall('//calendars/calendar'):
178 if calendar.attrib['type'] != 'gregorian': 219 if calendar.attrib['type'] != 'gregorian':
179 # TODO: support other calendar types 220 # TODO: support other calendar types
180 continue 221 continue
210 continue 251 continue
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text) 252 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
212 253
213 eras = data.setdefault('eras', {}) 254 eras = data.setdefault('eras', {})
214 for width in calendar.findall('eras/*'): 255 for width in calendar.findall('eras/*'):
215 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] 256 ewidth = {
257 'eraAbbr': 'abbreviated',
258 'eraNames': 'wide',
259 'eraNarrow': 'narrow',
260 }[width.tag]
216 widths = eras.setdefault(ewidth, {}) 261 widths = eras.setdefault(ewidth, {})
217 for elem in width.findall('era'): 262 for elem in width.findall('era'):
218 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: 263 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
219 continue 264 continue
220 widths[int(elem.attrib.get('type'))] = unicode(elem.text) 265 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
302 currency_names[elem.attrib['type']] = unicode(name) 347 currency_names[elem.attrib['type']] = unicode(name)
303 symbol = elem.findtext('symbol') 348 symbol = elem.findtext('symbol')
304 if symbol: 349 if symbol:
305 currency_symbols[elem.attrib['type']] = unicode(symbol) 350 currency_symbols[elem.attrib['type']] = unicode(symbol)
306 351
307 dicts[stem] = data 352 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
308 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
309 try: 353 try:
310 pickle.dump(data, outfile, 2) 354 pickle.dump(data, outfile, 2)
311 finally: 355 finally:
312 outfile.close() 356 outfile.close()
313 357
Copyright (C) 2012-2017 Edgewall Software