comparison scripts/import_cldr.py @ 28:695884591af6

* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime. * Move locale data loading from `babel.core` into a separate `babel.localedata` module. * Add curency names and symbols to locale data.
author cmlenz
date Sun, 03 Jun 2007 15:27:27 +0000
parents 6c2c9fc7d787
children 9a00ac84004c
comparison
equal deleted inserted replaced
27:8d4cd0856f69 28:695884591af6
31 any 31 any
32 except NameError: 32 except NameError:
33 def any(iterable): 33 def any(iterable):
34 return filter(None, list(iterable)) 34 return filter(None, list(iterable))
35 35
36 def _parent(locale):
37 parts = locale.split('_')
38 if len(parts) == 1:
39 return 'root'
40 else:
41 return '_'.join(parts[:-1])
42
43 def _text(elem): 36 def _text(elem):
44 buf = [elem.text or ''] 37 buf = [elem.text or '']
45 for child in elem: 38 for child in elem:
46 buf.append(_text(child)) 39 buf.append(_text(child))
47 buf.append(elem.tail or '') 40 buf.append(elem.tail or '')
61 54
62 # build a territory containment mapping for inheritance 55 # build a territory containment mapping for inheritance
63 regions = {} 56 regions = {}
64 for elem in sup.findall('//territoryContainment/group'): 57 for elem in sup.findall('//territoryContainment/group'):
65 regions[elem.attrib['type']] = elem.attrib['contains'].split() 58 regions[elem.attrib['type']] = elem.attrib['contains'].split()
66 from pprint import pprint
67 59
68 # Resolve territory containment 60 # Resolve territory containment
69 territory_containment = {} 61 territory_containment = {}
70 region_items = regions.items() 62 region_items = regions.items()
71 region_items.sort() 63 region_items.sort()
87 print>>sys.stderr, 'Processing input file %r' % filename 79 print>>sys.stderr, 'Processing input file %r' % filename
88 stem, ext = os.path.splitext(filename) 80 stem, ext = os.path.splitext(filename)
89 if ext != '.xml': 81 if ext != '.xml':
90 continue 82 continue
91 83
84 tree = parse(os.path.join(srcdir, 'main', filename))
92 data = {} 85 data = {}
93 if stem != 'root':
94 data.update(copy.deepcopy(dicts[_parent(stem)]))
95 tree = parse(os.path.join(srcdir, 'main', filename))
96 86
97 language = None 87 language = None
98 elem = tree.find('//identity/language') 88 elem = tree.find('//identity/language')
99 if elem is not None: 89 if elem is not None:
100 language = elem.attrib['type'] 90 language = elem.attrib['type']
227 continue 217 continue
228 try: 218 try:
229 date_formats[elem.attrib.get('type')] = \ 219 date_formats[elem.attrib.get('type')] = \
230 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) 220 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
231 except ValueError, e: 221 except ValueError, e:
232 print e 222 print>>sys.stderr, 'ERROR: %s' % e
233 223
234 time_formats = data.setdefault('time_formats', {}) 224 time_formats = data.setdefault('time_formats', {})
235 for elem in calendar.findall('timeFormats/timeFormatLength'): 225 for elem in calendar.findall('timeFormats/timeFormatLength'):
236 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: 226 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
237 continue 227 continue
238 try: 228 try:
239 time_formats[elem.attrib.get('type')] = \ 229 time_formats[elem.attrib.get('type')] = \
240 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) 230 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
241 except ValueError, e: 231 except ValueError, e:
242 print e 232 print>>sys.stderr, 'ERROR: %s' % e
243 233
244 # <numbers> 234 # <numbers>
245 235
246 number_symbols = data.setdefault('number_symbols', {}) 236 number_symbols = data.setdefault('number_symbols', {})
247 for elem in tree.findall('//numbers/symbols/*'): 237 for elem in tree.findall('//numbers/symbols/*'):
249 239
250 decimal_formats = data.setdefault('decimal_formats', {}) 240 decimal_formats = data.setdefault('decimal_formats', {})
251 for elem in tree.findall('//decimalFormats/decimalFormatLength'): 241 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
252 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: 242 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
253 continue 243 continue
254 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern'))) 244 pattern = unicode(elem.findtext('decimalFormat/pattern'))
245 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
255 246
256 scientific_formats = data.setdefault('scientific_formats', {}) 247 scientific_formats = data.setdefault('scientific_formats', {})
257 for elem in tree.findall('//scientificFormats/scientificFormatLength'): 248 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
258 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: 249 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
259 continue 250 continue
251 # FIXME: should use numbers.parse_pattern
260 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) 252 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
261 253
262 currency_formats = data.setdefault('currency_formats', {}) 254 currency_formats = data.setdefault('currency_formats', {})
263 for elem in tree.findall('//currencyFormats/currencyFormatLength'): 255 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
264 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: 256 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
265 continue 257 continue
258 # FIXME: should use numbers.parse_pattern
266 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) 259 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
267 260
268 percent_formats = data.setdefault('percent_formats', {}) 261 percent_formats = data.setdefault('percent_formats', {})
269 for elem in tree.findall('//percentFormats/percentFormatLength'): 262 for elem in tree.findall('//percentFormats/percentFormatLength'):
270 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: 263 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
271 continue 264 continue
272 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern'))) 265 pattern = unicode(elem.findtext('percentFormat/pattern'))
273 266 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
274 currencies = data.setdefault('currencies', {}) 267
268 currency_names = data.setdefault('currency_names', {})
269 currency_symbols = data.setdefault('currency_symbols', {})
275 for elem in tree.findall('//currencies/currency'): 270 for elem in tree.findall('//currencies/currency'):
276 currencies[elem.attrib['type']] = { 271 name = elem.findtext('displayName')
277 'display_name': unicode(elem.findtext('displayName')), 272 if name:
278 'symbol': unicode(elem.findtext('symbol')) 273 currency_names[elem.attrib['type']] = unicode(name)
279 } 274 symbol = elem.findtext('symbol')
275 if symbol:
276 currency_symbols[elem.attrib['type']] = unicode(symbol)
280 277
281 dicts[stem] = data 278 dicts[stem] = data
282 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') 279 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
283 try: 280 try:
284 pickle.dump(data, outfile, 2) 281 pickle.dump(data, outfile, 2)
Copyright (C) 2012-2017 Edgewall Software