Mercurial > babel > mirror
comparison scripts/import_cldr.py @ 469:74de1a99a312 stable-0.9.x
Merged revisions 518-519 via svnmerge from
http://svn.edgewall.org/repos/babel/trunk
........
r518 | jruigrok | 2010-04-08 11:16:53 +0200 (do, 08 apr 2010) | 2 lines
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
........
r519 | jruigrok | 2010-04-08 11:17:34 +0200 (do, 08 apr 2010) | 2 lines
Document the Python 2.7 ElementTree fix.
........
author | jruigrok |
---|---|
date | Sun, 11 Apr 2010 08:10:45 +0000 |
parents | a11564c5c1f1 |
children | a05fd751cbac |
comparison
equal
deleted
inserted
replaced
465:fc154ff3dce4 | 469:74de1a99a312 |
---|---|
91 global_data = {} | 91 global_data = {} |
92 | 92 |
93 territory_zones = global_data.setdefault('territory_zones', {}) | 93 territory_zones = global_data.setdefault('territory_zones', {}) |
94 zone_aliases = global_data.setdefault('zone_aliases', {}) | 94 zone_aliases = global_data.setdefault('zone_aliases', {}) |
95 zone_territories = global_data.setdefault('zone_territories', {}) | 95 zone_territories = global_data.setdefault('zone_territories', {}) |
96 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): | 96 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): |
97 tzid = elem.attrib['type'] | 97 tzid = elem.attrib['type'] |
98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) | 98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
99 zone_territories[tzid] = elem.attrib['territory'] | 99 zone_territories[tzid] = elem.attrib['territory'] |
100 if 'aliases' in elem.attrib: | 100 if 'aliases' in elem.attrib: |
101 for alias in elem.attrib['aliases'].split(): | 101 for alias in elem.attrib['aliases'].split(): |
102 zone_aliases[alias] = tzid | 102 zone_aliases[alias] = tzid |
103 | 103 |
104 # Import Metazone mapping | 104 # Import Metazone mapping |
105 meta_zones = global_data.setdefault('meta_zones', {}) | 105 meta_zones = global_data.setdefault('meta_zones', {}) |
106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) | 106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) |
107 for elem in tzsup.findall('//timezone'): | 107 for elem in tzsup.findall('.//timezone'): |
108 for child in elem.findall('usesMetazone'): | 108 for child in elem.findall('usesMetazone'): |
109 if 'to' not in child.attrib: # FIXME: support old mappings | 109 if 'to' not in child.attrib: # FIXME: support old mappings |
110 meta_zones[elem.attrib['type']] = child.attrib['mzone'] | 110 meta_zones[elem.attrib['type']] = child.attrib['mzone'] |
111 | 111 |
112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') | 112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
115 finally: | 115 finally: |
116 outfile.close() | 116 outfile.close() |
117 | 117 |
118 # build a territory containment mapping for inheritance | 118 # build a territory containment mapping for inheritance |
119 regions = {} | 119 regions = {} |
120 for elem in sup.findall('//territoryContainment/group'): | 120 for elem in sup.findall('.//territoryContainment/group'): |
121 regions[elem.attrib['type']] = elem.attrib['contains'].split() | 121 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
122 | 122 |
123 # Resolve territory containment | 123 # Resolve territory containment |
124 territory_containment = {} | 124 territory_containment = {} |
125 region_items = regions.items() | 125 region_items = regions.items() |
144 print>>sys.stderr, 'Processing input file %r' % filename | 144 print>>sys.stderr, 'Processing input file %r' % filename |
145 tree = parse(os.path.join(srcdir, 'main', filename)) | 145 tree = parse(os.path.join(srcdir, 'main', filename)) |
146 data = {} | 146 data = {} |
147 | 147 |
148 language = None | 148 language = None |
149 elem = tree.find('//identity/language') | 149 elem = tree.find('.//identity/language') |
150 if elem is not None: | 150 if elem is not None: |
151 language = elem.attrib['type'] | 151 language = elem.attrib['type'] |
152 print>>sys.stderr, ' Language: %r' % language | 152 print>>sys.stderr, ' Language: %r' % language |
153 | 153 |
154 territory = None | 154 territory = None |
155 elem = tree.find('//identity/territory') | 155 elem = tree.find('.//identity/territory') |
156 if elem is not None: | 156 if elem is not None: |
157 territory = elem.attrib['type'] | 157 territory = elem.attrib['type'] |
158 else: | 158 else: |
159 territory = '001' # world | 159 territory = '001' # world |
160 print>>sys.stderr, ' Territory: %r' % territory | 160 print>>sys.stderr, ' Territory: %r' % territory |
162 print>>sys.stderr, ' Regions: %r' % regions | 162 print>>sys.stderr, ' Regions: %r' % regions |
163 | 163 |
164 # <localeDisplayNames> | 164 # <localeDisplayNames> |
165 | 165 |
166 territories = data.setdefault('territories', {}) | 166 territories = data.setdefault('territories', {}) |
167 for elem in tree.findall('//territories/territory'): | 167 for elem in tree.findall('.//territories/territory'): |
168 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 168 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
169 and elem.attrib['type'] in territories: | 169 and elem.attrib['type'] in territories: |
170 continue | 170 continue |
171 territories[elem.attrib['type']] = _text(elem) | 171 territories[elem.attrib['type']] = _text(elem) |
172 | 172 |
173 languages = data.setdefault('languages', {}) | 173 languages = data.setdefault('languages', {}) |
174 for elem in tree.findall('//languages/language'): | 174 for elem in tree.findall('.//languages/language'): |
175 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 175 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
176 and elem.attrib['type'] in languages: | 176 and elem.attrib['type'] in languages: |
177 continue | 177 continue |
178 languages[elem.attrib['type']] = _text(elem) | 178 languages[elem.attrib['type']] = _text(elem) |
179 | 179 |
180 variants = data.setdefault('variants', {}) | 180 variants = data.setdefault('variants', {}) |
181 for elem in tree.findall('//variants/variant'): | 181 for elem in tree.findall('.//variants/variant'): |
182 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 182 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
183 and elem.attrib['type'] in variants: | 183 and elem.attrib['type'] in variants: |
184 continue | 184 continue |
185 variants[elem.attrib['type']] = _text(elem) | 185 variants[elem.attrib['type']] = _text(elem) |
186 | 186 |
187 scripts = data.setdefault('scripts', {}) | 187 scripts = data.setdefault('scripts', {}) |
188 for elem in tree.findall('//scripts/script'): | 188 for elem in tree.findall('.//scripts/script'): |
189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
190 and elem.attrib['type'] in scripts: | 190 and elem.attrib['type'] in scripts: |
191 continue | 191 continue |
192 scripts[elem.attrib['type']] = _text(elem) | 192 scripts[elem.attrib['type']] = _text(elem) |
193 | 193 |
194 # <dates> | 194 # <dates> |
195 | 195 |
196 week_data = data.setdefault('week_data', {}) | 196 week_data = data.setdefault('week_data', {}) |
197 supelem = sup.find('//weekData') | 197 supelem = sup.find('.//weekData') |
198 | 198 |
199 for elem in supelem.findall('minDays'): | 199 for elem in supelem.findall('minDays'): |
200 territories = elem.attrib['territories'].split() | 200 territories = elem.attrib['territories'].split() |
201 if territory in territories or any([r in territories for r in regions]): | 201 if territory in territories or any([r in territories for r in regions]): |
202 week_data['min_days'] = int(elem.attrib['count']) | 202 week_data['min_days'] = int(elem.attrib['count']) |
215 territories = elem.attrib['territories'].split() | 215 territories = elem.attrib['territories'].split() |
216 if territory in territories or any([r in territories for r in regions]): | 216 if territory in territories or any([r in territories for r in regions]): |
217 week_data['weekend_end'] = weekdays[elem.attrib['day']] | 217 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
218 | 218 |
219 zone_formats = data.setdefault('zone_formats', {}) | 219 zone_formats = data.setdefault('zone_formats', {}) |
220 for elem in tree.findall('//timeZoneNames/gmtFormat'): | 220 for elem in tree.findall('.//timeZoneNames/gmtFormat'): |
221 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: | 221 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
222 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') | 222 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
223 break | 223 break |
224 for elem in tree.findall('//timeZoneNames/regionFormat'): | 224 for elem in tree.findall('.//timeZoneNames/regionFormat'): |
225 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: | 225 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
226 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') | 226 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
227 break | 227 break |
228 for elem in tree.findall('//timeZoneNames/fallbackFormat'): | 228 for elem in tree.findall('.//timeZoneNames/fallbackFormat'): |
229 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: | 229 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
230 zone_formats['fallback'] = unicode(elem.text) \ | 230 zone_formats['fallback'] = unicode(elem.text) \ |
231 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') | 231 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
232 break | 232 break |
233 | 233 |
234 time_zones = data.setdefault('time_zones', {}) | 234 time_zones = data.setdefault('time_zones', {}) |
235 for elem in tree.findall('//timeZoneNames/zone'): | 235 for elem in tree.findall('.//timeZoneNames/zone'): |
236 info = {} | 236 info = {} |
237 city = elem.findtext('exemplarCity') | 237 city = elem.findtext('exemplarCity') |
238 if city: | 238 if city: |
239 info['city'] = unicode(city) | 239 info['city'] = unicode(city) |
240 for child in elem.findall('long/*'): | 240 for child in elem.findall('long/*'): |
242 for child in elem.findall('short/*'): | 242 for child in elem.findall('short/*'): |
243 info.setdefault('short', {})[child.tag] = unicode(child.text) | 243 info.setdefault('short', {})[child.tag] = unicode(child.text) |
244 time_zones[elem.attrib['type']] = info | 244 time_zones[elem.attrib['type']] = info |
245 | 245 |
246 meta_zones = data.setdefault('meta_zones', {}) | 246 meta_zones = data.setdefault('meta_zones', {}) |
247 for elem in tree.findall('//timeZoneNames/metazone'): | 247 for elem in tree.findall('.//timeZoneNames/metazone'): |
248 info = {} | 248 info = {} |
249 city = elem.findtext('exemplarCity') | 249 city = elem.findtext('exemplarCity') |
250 if city: | 250 if city: |
251 info['city'] = unicode(city) | 251 info['city'] = unicode(city) |
252 for child in elem.findall('long/*'): | 252 for child in elem.findall('long/*'): |
254 for child in elem.findall('short/*'): | 254 for child in elem.findall('short/*'): |
255 info.setdefault('short', {})[child.tag] = unicode(child.text) | 255 info.setdefault('short', {})[child.tag] = unicode(child.text) |
256 info['common'] = elem.findtext('commonlyUsed') == 'true' | 256 info['common'] = elem.findtext('commonlyUsed') == 'true' |
257 meta_zones[elem.attrib['type']] = info | 257 meta_zones[elem.attrib['type']] = info |
258 | 258 |
259 for calendar in tree.findall('//calendars/calendar'): | 259 for calendar in tree.findall('.//calendars/calendar'): |
260 if calendar.attrib['type'] != 'gregorian': | 260 if calendar.attrib['type'] != 'gregorian': |
261 # TODO: support other calendar types | 261 # TODO: support other calendar types |
262 continue | 262 continue |
263 | 263 |
264 months = data.setdefault('months', {}) | 264 months = data.setdefault('months', {}) |
400 ) | 400 ) |
401 | 401 |
402 # <numbers> | 402 # <numbers> |
403 | 403 |
404 number_symbols = data.setdefault('number_symbols', {}) | 404 number_symbols = data.setdefault('number_symbols', {}) |
405 for elem in tree.findall('//numbers/symbols/*'): | 405 for elem in tree.findall('.//numbers/symbols/*'): |
406 number_symbols[elem.tag] = unicode(elem.text) | 406 number_symbols[elem.tag] = unicode(elem.text) |
407 | 407 |
408 decimal_formats = data.setdefault('decimal_formats', {}) | 408 decimal_formats = data.setdefault('decimal_formats', {}) |
409 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | 409 for elem in tree.findall('.//decimalFormats/decimalFormatLength'): |
410 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 410 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
411 and elem.attrib.get('type') in decimal_formats: | 411 and elem.attrib.get('type') in decimal_formats: |
412 continue | 412 continue |
413 pattern = unicode(elem.findtext('decimalFormat/pattern')) | 413 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
414 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | 414 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
415 | 415 |
416 scientific_formats = data.setdefault('scientific_formats', {}) | 416 scientific_formats = data.setdefault('scientific_formats', {}) |
417 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | 417 for elem in tree.findall('.//scientificFormats/scientificFormatLength'): |
418 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 418 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
419 and elem.attrib.get('type') in scientific_formats: | 419 and elem.attrib.get('type') in scientific_formats: |
420 continue | 420 continue |
421 pattern = unicode(elem.findtext('scientificFormat/pattern')) | 421 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
422 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | 422 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
423 | 423 |
424 currency_formats = data.setdefault('currency_formats', {}) | 424 currency_formats = data.setdefault('currency_formats', {}) |
425 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | 425 for elem in tree.findall('.//currencyFormats/currencyFormatLength'): |
426 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 426 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
427 and elem.attrib.get('type') in currency_formats: | 427 and elem.attrib.get('type') in currency_formats: |
428 continue | 428 continue |
429 pattern = unicode(elem.findtext('currencyFormat/pattern')) | 429 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
430 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | 430 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
431 | 431 |
432 percent_formats = data.setdefault('percent_formats', {}) | 432 percent_formats = data.setdefault('percent_formats', {}) |
433 for elem in tree.findall('//percentFormats/percentFormatLength'): | 433 for elem in tree.findall('.//percentFormats/percentFormatLength'): |
434 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | 434 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
435 and elem.attrib.get('type') in percent_formats: | 435 and elem.attrib.get('type') in percent_formats: |
436 continue | 436 continue |
437 pattern = unicode(elem.findtext('percentFormat/pattern')) | 437 pattern = unicode(elem.findtext('percentFormat/pattern')) |
438 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | 438 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
439 | 439 |
440 currency_names = data.setdefault('currency_names', {}) | 440 currency_names = data.setdefault('currency_names', {}) |
441 currency_symbols = data.setdefault('currency_symbols', {}) | 441 currency_symbols = data.setdefault('currency_symbols', {}) |
442 for elem in tree.findall('//currencies/currency'): | 442 for elem in tree.findall('.//currencies/currency'): |
443 code = elem.attrib['type'] | 443 code = elem.attrib['type'] |
444 # TODO: support plural rules for currency name selection | 444 # TODO: support plural rules for currency name selection |
445 for name in elem.findall('displayName'): | 445 for name in elem.findall('displayName'): |
446 if ('draft' in name.attrib or 'count' in name.attrib) \ | 446 if ('draft' in name.attrib or 'count' in name.attrib) \ |
447 and code in currency_names: | 447 and code in currency_names: |