comparison scripts/import_cldr.py @ 469:74de1a99a312 stable-0.9.x

Merged revisions 518-519 via svnmerge from http://svn.edgewall.org/repos/babel/trunk ........ r518 | jruigrok | 2010-04-08 11:16:53 +0200 (do, 08 apr 2010) | 2 lines Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7. ........ r519 | jruigrok | 2010-04-08 11:17:34 +0200 (do, 08 apr 2010) | 2 lines Document the Python 2.7 ElementTree fix. ........
author jruigrok
date Sun, 11 Apr 2010 08:10:45 +0000
parents a11564c5c1f1
children a05fd751cbac
comparison
equal deleted inserted replaced
465:fc154ff3dce4 469:74de1a99a312
91 global_data = {} 91 global_data = {}
92 92
93 territory_zones = global_data.setdefault('territory_zones', {}) 93 territory_zones = global_data.setdefault('territory_zones', {})
94 zone_aliases = global_data.setdefault('zone_aliases', {}) 94 zone_aliases = global_data.setdefault('zone_aliases', {})
95 zone_territories = global_data.setdefault('zone_territories', {}) 95 zone_territories = global_data.setdefault('zone_territories', {})
96 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): 96 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
97 tzid = elem.attrib['type'] 97 tzid = elem.attrib['type']
98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) 98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
99 zone_territories[tzid] = elem.attrib['territory'] 99 zone_territories[tzid] = elem.attrib['territory']
100 if 'aliases' in elem.attrib: 100 if 'aliases' in elem.attrib:
101 for alias in elem.attrib['aliases'].split(): 101 for alias in elem.attrib['aliases'].split():
102 zone_aliases[alias] = tzid 102 zone_aliases[alias] = tzid
103 103
104 # Import Metazone mapping 104 # Import Metazone mapping
105 meta_zones = global_data.setdefault('meta_zones', {}) 105 meta_zones = global_data.setdefault('meta_zones', {})
106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) 106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
107 for elem in tzsup.findall('//timezone'): 107 for elem in tzsup.findall('.//timezone'):
108 for child in elem.findall('usesMetazone'): 108 for child in elem.findall('usesMetazone'):
109 if 'to' not in child.attrib: # FIXME: support old mappings 109 if 'to' not in child.attrib: # FIXME: support old mappings
110 meta_zones[elem.attrib['type']] = child.attrib['mzone'] 110 meta_zones[elem.attrib['type']] = child.attrib['mzone']
111 111
112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') 112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
115 finally: 115 finally:
116 outfile.close() 116 outfile.close()
117 117
118 # build a territory containment mapping for inheritance 118 # build a territory containment mapping for inheritance
119 regions = {} 119 regions = {}
120 for elem in sup.findall('//territoryContainment/group'): 120 for elem in sup.findall('.//territoryContainment/group'):
121 regions[elem.attrib['type']] = elem.attrib['contains'].split() 121 regions[elem.attrib['type']] = elem.attrib['contains'].split()
122 122
123 # Resolve territory containment 123 # Resolve territory containment
124 territory_containment = {} 124 territory_containment = {}
125 region_items = regions.items() 125 region_items = regions.items()
144 print>>sys.stderr, 'Processing input file %r' % filename 144 print>>sys.stderr, 'Processing input file %r' % filename
145 tree = parse(os.path.join(srcdir, 'main', filename)) 145 tree = parse(os.path.join(srcdir, 'main', filename))
146 data = {} 146 data = {}
147 147
148 language = None 148 language = None
149 elem = tree.find('//identity/language') 149 elem = tree.find('.//identity/language')
150 if elem is not None: 150 if elem is not None:
151 language = elem.attrib['type'] 151 language = elem.attrib['type']
152 print>>sys.stderr, ' Language: %r' % language 152 print>>sys.stderr, ' Language: %r' % language
153 153
154 territory = None 154 territory = None
155 elem = tree.find('//identity/territory') 155 elem = tree.find('.//identity/territory')
156 if elem is not None: 156 if elem is not None:
157 territory = elem.attrib['type'] 157 territory = elem.attrib['type']
158 else: 158 else:
159 territory = '001' # world 159 territory = '001' # world
160 print>>sys.stderr, ' Territory: %r' % territory 160 print>>sys.stderr, ' Territory: %r' % territory
162 print>>sys.stderr, ' Regions: %r' % regions 162 print>>sys.stderr, ' Regions: %r' % regions
163 163
164 # <localeDisplayNames> 164 # <localeDisplayNames>
165 165
166 territories = data.setdefault('territories', {}) 166 territories = data.setdefault('territories', {})
167 for elem in tree.findall('//territories/territory'): 167 for elem in tree.findall('.//territories/territory'):
168 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 168 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
169 and elem.attrib['type'] in territories: 169 and elem.attrib['type'] in territories:
170 continue 170 continue
171 territories[elem.attrib['type']] = _text(elem) 171 territories[elem.attrib['type']] = _text(elem)
172 172
173 languages = data.setdefault('languages', {}) 173 languages = data.setdefault('languages', {})
174 for elem in tree.findall('//languages/language'): 174 for elem in tree.findall('.//languages/language'):
175 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 175 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
176 and elem.attrib['type'] in languages: 176 and elem.attrib['type'] in languages:
177 continue 177 continue
178 languages[elem.attrib['type']] = _text(elem) 178 languages[elem.attrib['type']] = _text(elem)
179 179
180 variants = data.setdefault('variants', {}) 180 variants = data.setdefault('variants', {})
181 for elem in tree.findall('//variants/variant'): 181 for elem in tree.findall('.//variants/variant'):
182 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 182 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
183 and elem.attrib['type'] in variants: 183 and elem.attrib['type'] in variants:
184 continue 184 continue
185 variants[elem.attrib['type']] = _text(elem) 185 variants[elem.attrib['type']] = _text(elem)
186 186
187 scripts = data.setdefault('scripts', {}) 187 scripts = data.setdefault('scripts', {})
188 for elem in tree.findall('//scripts/script'): 188 for elem in tree.findall('.//scripts/script'):
189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
190 and elem.attrib['type'] in scripts: 190 and elem.attrib['type'] in scripts:
191 continue 191 continue
192 scripts[elem.attrib['type']] = _text(elem) 192 scripts[elem.attrib['type']] = _text(elem)
193 193
194 # <dates> 194 # <dates>
195 195
196 week_data = data.setdefault('week_data', {}) 196 week_data = data.setdefault('week_data', {})
197 supelem = sup.find('//weekData') 197 supelem = sup.find('.//weekData')
198 198
199 for elem in supelem.findall('minDays'): 199 for elem in supelem.findall('minDays'):
200 territories = elem.attrib['territories'].split() 200 territories = elem.attrib['territories'].split()
201 if territory in territories or any([r in territories for r in regions]): 201 if territory in territories or any([r in territories for r in regions]):
202 week_data['min_days'] = int(elem.attrib['count']) 202 week_data['min_days'] = int(elem.attrib['count'])
215 territories = elem.attrib['territories'].split() 215 territories = elem.attrib['territories'].split()
216 if territory in territories or any([r in territories for r in regions]): 216 if territory in territories or any([r in territories for r in regions]):
217 week_data['weekend_end'] = weekdays[elem.attrib['day']] 217 week_data['weekend_end'] = weekdays[elem.attrib['day']]
218 218
219 zone_formats = data.setdefault('zone_formats', {}) 219 zone_formats = data.setdefault('zone_formats', {})
220 for elem in tree.findall('//timeZoneNames/gmtFormat'): 220 for elem in tree.findall('.//timeZoneNames/gmtFormat'):
221 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 221 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
222 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') 222 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
223 break 223 break
224 for elem in tree.findall('//timeZoneNames/regionFormat'): 224 for elem in tree.findall('.//timeZoneNames/regionFormat'):
225 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 225 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
226 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') 226 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
227 break 227 break
228 for elem in tree.findall('//timeZoneNames/fallbackFormat'): 228 for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
229 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: 229 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
230 zone_formats['fallback'] = unicode(elem.text) \ 230 zone_formats['fallback'] = unicode(elem.text) \
231 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') 231 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
232 break 232 break
233 233
234 time_zones = data.setdefault('time_zones', {}) 234 time_zones = data.setdefault('time_zones', {})
235 for elem in tree.findall('//timeZoneNames/zone'): 235 for elem in tree.findall('.//timeZoneNames/zone'):
236 info = {} 236 info = {}
237 city = elem.findtext('exemplarCity') 237 city = elem.findtext('exemplarCity')
238 if city: 238 if city:
239 info['city'] = unicode(city) 239 info['city'] = unicode(city)
240 for child in elem.findall('long/*'): 240 for child in elem.findall('long/*'):
242 for child in elem.findall('short/*'): 242 for child in elem.findall('short/*'):
243 info.setdefault('short', {})[child.tag] = unicode(child.text) 243 info.setdefault('short', {})[child.tag] = unicode(child.text)
244 time_zones[elem.attrib['type']] = info 244 time_zones[elem.attrib['type']] = info
245 245
246 meta_zones = data.setdefault('meta_zones', {}) 246 meta_zones = data.setdefault('meta_zones', {})
247 for elem in tree.findall('//timeZoneNames/metazone'): 247 for elem in tree.findall('.//timeZoneNames/metazone'):
248 info = {} 248 info = {}
249 city = elem.findtext('exemplarCity') 249 city = elem.findtext('exemplarCity')
250 if city: 250 if city:
251 info['city'] = unicode(city) 251 info['city'] = unicode(city)
252 for child in elem.findall('long/*'): 252 for child in elem.findall('long/*'):
254 for child in elem.findall('short/*'): 254 for child in elem.findall('short/*'):
255 info.setdefault('short', {})[child.tag] = unicode(child.text) 255 info.setdefault('short', {})[child.tag] = unicode(child.text)
256 info['common'] = elem.findtext('commonlyUsed') == 'true' 256 info['common'] = elem.findtext('commonlyUsed') == 'true'
257 meta_zones[elem.attrib['type']] = info 257 meta_zones[elem.attrib['type']] = info
258 258
259 for calendar in tree.findall('//calendars/calendar'): 259 for calendar in tree.findall('.//calendars/calendar'):
260 if calendar.attrib['type'] != 'gregorian': 260 if calendar.attrib['type'] != 'gregorian':
261 # TODO: support other calendar types 261 # TODO: support other calendar types
262 continue 262 continue
263 263
264 months = data.setdefault('months', {}) 264 months = data.setdefault('months', {})
400 ) 400 )
401 401
402 # <numbers> 402 # <numbers>
403 403
404 number_symbols = data.setdefault('number_symbols', {}) 404 number_symbols = data.setdefault('number_symbols', {})
405 for elem in tree.findall('//numbers/symbols/*'): 405 for elem in tree.findall('.//numbers/symbols/*'):
406 number_symbols[elem.tag] = unicode(elem.text) 406 number_symbols[elem.tag] = unicode(elem.text)
407 407
408 decimal_formats = data.setdefault('decimal_formats', {}) 408 decimal_formats = data.setdefault('decimal_formats', {})
409 for elem in tree.findall('//decimalFormats/decimalFormatLength'): 409 for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
410 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 410 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
411 and elem.attrib.get('type') in decimal_formats: 411 and elem.attrib.get('type') in decimal_formats:
412 continue 412 continue
413 pattern = unicode(elem.findtext('decimalFormat/pattern')) 413 pattern = unicode(elem.findtext('decimalFormat/pattern'))
414 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 414 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
415 415
416 scientific_formats = data.setdefault('scientific_formats', {}) 416 scientific_formats = data.setdefault('scientific_formats', {})
417 for elem in tree.findall('//scientificFormats/scientificFormatLength'): 417 for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
418 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 418 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
419 and elem.attrib.get('type') in scientific_formats: 419 and elem.attrib.get('type') in scientific_formats:
420 continue 420 continue
421 pattern = unicode(elem.findtext('scientificFormat/pattern')) 421 pattern = unicode(elem.findtext('scientificFormat/pattern'))
422 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 422 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
423 423
424 currency_formats = data.setdefault('currency_formats', {}) 424 currency_formats = data.setdefault('currency_formats', {})
425 for elem in tree.findall('//currencyFormats/currencyFormatLength'): 425 for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
426 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 426 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
427 and elem.attrib.get('type') in currency_formats: 427 and elem.attrib.get('type') in currency_formats:
428 continue 428 continue
429 pattern = unicode(elem.findtext('currencyFormat/pattern')) 429 pattern = unicode(elem.findtext('currencyFormat/pattern'))
430 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 430 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
431 431
432 percent_formats = data.setdefault('percent_formats', {}) 432 percent_formats = data.setdefault('percent_formats', {})
433 for elem in tree.findall('//percentFormats/percentFormatLength'): 433 for elem in tree.findall('.//percentFormats/percentFormatLength'):
434 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ 434 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
435 and elem.attrib.get('type') in percent_formats: 435 and elem.attrib.get('type') in percent_formats:
436 continue 436 continue
437 pattern = unicode(elem.findtext('percentFormat/pattern')) 437 pattern = unicode(elem.findtext('percentFormat/pattern'))
438 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) 438 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
439 439
440 currency_names = data.setdefault('currency_names', {}) 440 currency_names = data.setdefault('currency_names', {})
441 currency_symbols = data.setdefault('currency_symbols', {}) 441 currency_symbols = data.setdefault('currency_symbols', {})
442 for elem in tree.findall('//currencies/currency'): 442 for elem in tree.findall('.//currencies/currency'):
443 code = elem.attrib['type'] 443 code = elem.attrib['type']
444 # TODO: support plural rules for currency name selection 444 # TODO: support plural rules for currency name selection
445 for name in elem.findall('displayName'): 445 for name in elem.findall('displayName'):
446 if ('draft' in name.attrib or 'count' in name.attrib) \ 446 if ('draft' in name.attrib or 'count' in name.attrib) \
447 and code in currency_names: 447 and code in currency_names:
Copyright (C) 2012-2017 Edgewall Software