comparison scripts/import_cldr.py @ 467:0228ac359f84

Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
author jruigrok
date Thu, 08 Apr 2010 09:16:53 +0000
parents fd01923aaf1e
children 97e99dbc63f8
comparison
equal deleted inserted replaced
466:330e91b7feb2 467:0228ac359f84
93 global_data = {} 93 global_data = {}
94 94
95 territory_zones = global_data.setdefault('territory_zones', {}) 95 territory_zones = global_data.setdefault('territory_zones', {})
96 zone_aliases = global_data.setdefault('zone_aliases', {}) 96 zone_aliases = global_data.setdefault('zone_aliases', {})
97 zone_territories = global_data.setdefault('zone_territories', {}) 97 zone_territories = global_data.setdefault('zone_territories', {})
98 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): 98 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
99 tzid = elem.attrib['type'] 99 tzid = elem.attrib['type']
100 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) 100 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
101 zone_territories[tzid] = elem.attrib['territory'] 101 zone_territories[tzid] = elem.attrib['territory']
102 if 'aliases' in elem.attrib: 102 if 'aliases' in elem.attrib:
103 for alias in elem.attrib['aliases'].split(): 103 for alias in elem.attrib['aliases'].split():
104 zone_aliases[alias] = tzid 104 zone_aliases[alias] = tzid
105 105
106 # Import Metazone mapping 106 # Import Metazone mapping
107 meta_zones = global_data.setdefault('meta_zones', {}) 107 meta_zones = global_data.setdefault('meta_zones', {})
108 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) 108 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
109 for elem in tzsup.findall('//timezone'): 109 for elem in tzsup.findall('.//timezone'):
110 for child in elem.findall('usesMetazone'): 110 for child in elem.findall('usesMetazone'):
111 if 'to' not in child.attrib: # FIXME: support old mappings 111 if 'to' not in child.attrib: # FIXME: support old mappings
112 meta_zones[elem.attrib['type']] = child.attrib['mzone'] 112 meta_zones[elem.attrib['type']] = child.attrib['mzone']
113 113
114 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') 114 outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
117 finally: 117 finally:
118 outfile.close() 118 outfile.close()
119 119
120 # build a territory containment mapping for inheritance 120 # build a territory containment mapping for inheritance
121 regions = {} 121 regions = {}
122 for elem in sup.findall('//territoryContainment/group'): 122 for elem in sup.findall('.//territoryContainment/group'):
123 regions[elem.attrib['type']] = elem.attrib['contains'].split() 123 regions[elem.attrib['type']] = elem.attrib['contains'].split()
124 124
125 # Resolve territory containment 125 # Resolve territory containment
126 territory_containment = {} 126 territory_containment = {}
127 region_items = regions.items() 127 region_items = regions.items()
134 containers.add(group) 134 containers.add(group)
135 135
136 # prepare the per-locale plural rules definitions 136 # prepare the per-locale plural rules definitions
137 plural_rules = {} 137 plural_rules = {}
138 prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) 138 prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
139 for elem in prsup.findall('//plurals/pluralRules'): 139 for elem in prsup.findall('.//plurals/pluralRules'):
140 rules = [] 140 rules = []
141 for rule in elem.findall('pluralRule'): 141 for rule in elem.findall('pluralRule'):
142 rules.append((rule.attrib['count'], unicode(rule.text))) 142 rules.append((rule.attrib['count'], unicode(rule.text)))
143 pr = PluralRule(rules) 143 pr = PluralRule(rules)
144 for locale in elem.attrib['locales'].split(): 144 for locale in elem.attrib['locales'].split():
157 print>>sys.stderr, 'Processing input file %r' % filename 157 print>>sys.stderr, 'Processing input file %r' % filename
158 tree = parse(os.path.join(srcdir, 'main', filename)) 158 tree = parse(os.path.join(srcdir, 'main', filename))
159 data = {} 159 data = {}
160 160
161 language = None 161 language = None
162 elem = tree.find('//identity/language') 162 elem = tree.find('.//identity/language')
163 if elem is not None: 163 if elem is not None:
164 language = elem.attrib['type'] 164 language = elem.attrib['type']
165 print>>sys.stderr, ' Language: %r' % language 165 print>>sys.stderr, ' Language: %r' % language
166 166
167 territory = None 167 territory = None
168 elem = tree.find('//identity/territory') 168 elem = tree.find('.//identity/territory')
169 if elem is not None: 169 if elem is not None:
170 territory = elem.attrib['type'] 170 territory = elem.attrib['type']
171 else: 171 else:
172 territory = '001' # world 172 territory = '001' # world
173 print>>sys.stderr, ' Territory: %r' % territory 173 print>>sys.stderr, ' Territory: %r' % territory
183 data['plural_form'] = plural_rules[locale_id] 183 data['plural_form'] = plural_rules[locale_id]
184 184
185 # <localeDisplayNames> 185 # <localeDisplayNames>
186 186
187 territories = data.setdefault('territories', {}) 187 territories = data.setdefault('territories', {})
188 for elem in tree.findall('//territories/territory'): 188 for elem in tree.findall('.//territories/territory'):
189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
190 and elem.attrib['type'] in territories: 190 and elem.attrib['type'] in territories:
191 continue 191 continue
192 territories[elem.attrib['type']] = _text(elem) 192 territories[elem.attrib['type']] = _text(elem)
193 193
194 languages = data.setdefault('languages', {}) 194 languages = data.setdefault('languages', {})
195 for elem in tree.findall('//languages/language'): 195 for elem in tree.findall('.//languages/language'):
196 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 196 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
197 and elem.attrib['type'] in languages: 197 and elem.attrib['type'] in languages:
198 continue 198 continue
199 languages[elem.attrib['type']] = _text(elem) 199 languages[elem.attrib['type']] = _text(elem)
200 200
201 variants = data.setdefault('variants', {}) 201 variants = data.setdefault('variants', {})
202 for elem in tree.findall('//variants/variant'): 202 for elem in tree.findall('.//variants/variant'):
203 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 203 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
204 and elem.attrib['type'] in variants: 204 and elem.attrib['type'] in variants:
205 continue 205 continue
206 variants[elem.attrib['type']] = _text(elem) 206 variants[elem.attrib['type']] = _text(elem)
207 207
208 scripts = data.setdefault('scripts', {}) 208 scripts = data.setdefault('scripts', {})
209 for elem in tree.findall('//scripts/script'): 209 for elem in tree.findall('.//scripts/script'):
210 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 210 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
211 and elem.attrib['type'] in scripts: 211 and elem.attrib['type'] in scripts:
212 continue 212 continue
213 scripts[elem.attrib['type']] = _text(elem) 213 scripts[elem.attrib['type']] = _text(elem)
214 214
215 # <dates> 215 # <dates>
216 216
217 week_data = data.setdefault('week_data', {}) 217 week_data = data.setdefault('week_data', {})
218 supelem = sup.find('//weekData') 218 supelem = sup.find('.//weekData')
219 219
220 for elem in supelem.findall('minDays'): 220 for elem in supelem.findall('minDays'):
221 territories = elem.attrib['territories'].split() 221 territories = elem.attrib['territories'].split()
222 if territory in territories or any([r in territories for r in regions]): 222 if territory in territories or any([r in territories for r in regions]):
223 week_data['min_days'] = int(elem.attrib['count']) 223 week_data['min_days'] = int(elem.attrib['count'])
236 territories = elem.attrib['territories'].split() 236 territories = elem.attrib['territories'].split()
237 if territory in territories or any([r in territories for r in regions]): 237 if territory in territories or any([r in territories for r in regions]):
238 week_data['weekend_end'] = weekdays[elem.attrib['day']] 238 week_data['weekend_end'] = weekdays[elem.attrib['day']]
239 239
240 zone_formats = data.setdefault('zone_formats', {}) 240 zone_formats = data.setdefault('zone_formats', {})
241 for elem in tree.findall('//timeZoneNames/gmtFormat'): 241 for elem in tree.findall('.//timeZoneNames/gmtFormat'):
242 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 242 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
243 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') 243 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
244 break 244 break
245 for elem in tree.findall('//timeZoneNames/regionFormat'): 245 for elem in tree.findall('.//timeZoneNames/regionFormat'):
246 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 246 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
247 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') 247 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
248 break 248 break
249 for elem in tree.findall('//timeZoneNames/fallbackFormat'): 249 for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
250 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 250 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
251 zone_formats['fallback'] = unicode(elem.text) \ 251 zone_formats['fallback'] = unicode(elem.text) \
252 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') 252 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
253 break 253 break
254 254
255 time_zones = data.setdefault('time_zones', {}) 255 time_zones = data.setdefault('time_zones', {})
256 for elem in tree.findall('//timeZoneNames/zone'): 256 for elem in tree.findall('.//timeZoneNames/zone'):
257 info = {} 257 info = {}
258 city = elem.findtext('exemplarCity') 258 city = elem.findtext('exemplarCity')
259 if city: 259 if city:
260 info['city'] = unicode(city) 260 info['city'] = unicode(city)
261 for child in elem.findall('long/*'): 261 for child in elem.findall('long/*'):
263 for child in elem.findall('short/*'): 263 for child in elem.findall('short/*'):
264 info.setdefault('short', {})[child.tag] = unicode(child.text) 264 info.setdefault('short', {})[child.tag] = unicode(child.text)
265 time_zones[elem.attrib['type']] = info 265 time_zones[elem.attrib['type']] = info
266 266
267 meta_zones = data.setdefault('meta_zones', {}) 267 meta_zones = data.setdefault('meta_zones', {})
268 for elem in tree.findall('//timeZoneNames/metazone'): 268 for elem in tree.findall('.//timeZoneNames/metazone'):
269 info = {} 269 info = {}
270 city = elem.findtext('exemplarCity') 270 city = elem.findtext('exemplarCity')
271 if city: 271 if city:
272 info['city'] = unicode(city) 272 info['city'] = unicode(city)
273 for child in elem.findall('long/*'): 273 for child in elem.findall('long/*'):
275 for child in elem.findall('short/*'): 275 for child in elem.findall('short/*'):
276 info.setdefault('short', {})[child.tag] = unicode(child.text) 276 info.setdefault('short', {})[child.tag] = unicode(child.text)
277 info['common'] = elem.findtext('commonlyUsed') == 'true' 277 info['common'] = elem.findtext('commonlyUsed') == 'true'
278 meta_zones[elem.attrib['type']] = info 278 meta_zones[elem.attrib['type']] = info
279 279
280 for calendar in tree.findall('//calendars/calendar'): 280 for calendar in tree.findall('.//calendars/calendar'):
281 if calendar.attrib['type'] != 'gregorian': 281 if calendar.attrib['type'] != 'gregorian':
282 # TODO: support other calendar types 282 # TODO: support other calendar types
283 continue 283 continue
284 284
285 months = data.setdefault('months', {}) 285 months = data.setdefault('months', {})
421 ) 421 )
422 422
423 # <numbers> 423 # <numbers>
424 424
425 number_symbols = data.setdefault('number_symbols', {}) 425 number_symbols = data.setdefault('number_symbols', {})
426 for elem in tree.findall('//numbers/symbols/*'): 426 for elem in tree.findall('.//numbers/symbols/*'):
427 if ('draft' in elem.attrib or 'alt' in elem.attrib): 427 if ('draft' in elem.attrib or 'alt' in elem.attrib):
428 continue 428 continue
429 number_symbols[elem.tag] = unicode(elem.text) 429 number_symbols[elem.tag] = unicode(elem.text)
430 430
431 decimal_formats = data.setdefault('decimal_formats', {}) 431 decimal_formats = data.setdefault('decimal_formats', {})
432 for elem in tree.findall('//decimalFormats/decimalFormatLength'): 432 for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
433 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 433 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
434 and elem.attrib.get('type') in decimal_formats: 434 and elem.attrib.get('type') in decimal_formats:
435 continue 435 continue
436 pattern = unicode(elem.findtext('decimalFormat/pattern')) 436 pattern = unicode(elem.findtext('decimalFormat/pattern'))
437 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 437 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
438 438
439 scientific_formats = data.setdefault('scientific_formats', {}) 439 scientific_formats = data.setdefault('scientific_formats', {})
440 for elem in tree.findall('//scientificFormats/scientificFormatLength'): 440 for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
441 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 441 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
442 and elem.attrib.get('type') in scientific_formats: 442 and elem.attrib.get('type') in scientific_formats:
443 continue 443 continue
444 pattern = unicode(elem.findtext('scientificFormat/pattern')) 444 pattern = unicode(elem.findtext('scientificFormat/pattern'))
445 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 445 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
446 446
447 currency_formats = data.setdefault('currency_formats', {}) 447 currency_formats = data.setdefault('currency_formats', {})
448 for elem in tree.findall('//currencyFormats/currencyFormatLength'): 448 for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
449 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 449 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
450 and elem.attrib.get('type') in currency_formats: 450 and elem.attrib.get('type') in currency_formats:
451 continue 451 continue
452 pattern = unicode(elem.findtext('currencyFormat/pattern')) 452 pattern = unicode(elem.findtext('currencyFormat/pattern'))
453 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 453 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
454 454
455 percent_formats = data.setdefault('percent_formats', {}) 455 percent_formats = data.setdefault('percent_formats', {})
456 for elem in tree.findall('//percentFormats/percentFormatLength'): 456 for elem in tree.findall('.//percentFormats/percentFormatLength'):
457 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 457 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
458 and elem.attrib.get('type') in percent_formats: 458 and elem.attrib.get('type') in percent_formats:
459 continue 459 continue
460 pattern = unicode(elem.findtext('percentFormat/pattern')) 460 pattern = unicode(elem.findtext('percentFormat/pattern'))
461 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 461 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
462 462
463 currency_names = data.setdefault('currency_names', {}) 463 currency_names = data.setdefault('currency_names', {})
464 currency_symbols = data.setdefault('currency_symbols', {}) 464 currency_symbols = data.setdefault('currency_symbols', {})
465 for elem in tree.findall('//currencies/currency'): 465 for elem in tree.findall('.//currencies/currency'):
466 code = elem.attrib['type'] 466 code = elem.attrib['type']
467 # TODO: support plural rules for currency name selection 467 # TODO: support plural rules for currency name selection
468 for name in elem.findall('displayName'): 468 for name in elem.findall('displayName'):
469 if ('draft' in name.attrib or 'count' in name.attrib) \ 469 if ('draft' in name.attrib or 'count' in name.attrib) \
470 and code in currency_names: 470 and code in currency_names:
477 currency_symbols[code] = unicode(symbol.text) 477 currency_symbols[code] = unicode(symbol.text)
478 478
479 # <units> 479 # <units>
480 480
481 unit_patterns = data.setdefault('unit_patterns', {}) 481 unit_patterns = data.setdefault('unit_patterns', {})
482 for elem in tree.findall('//units/unit'): 482 for elem in tree.findall('.//units/unit'):
483 unit_type = elem.attrib['type'] 483 unit_type = elem.attrib['type']
484 unit_pattern = unit_patterns.setdefault(unit_type, {}) 484 unit_pattern = unit_patterns.setdefault(unit_type, {})
485 for pattern in elem.findall('unitPattern'): 485 for pattern in elem.findall('unitPattern'):
486 unit_patterns[unit_type][pattern.attrib['count']] = \ 486 unit_patterns[unit_type][pattern.attrib['count']] = \
487 unicode(pattern.text) 487 unicode(pattern.text)
Copyright (C) 2012-2017 Edgewall Software