Mercurial > babel > old > mirror
comparison scripts/import_cldr.py @ 28:695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
* Move locale data loading from `babel.core` into a separate `babel.localedata` module.
* Add curency names and symbols to locale data.
author | cmlenz |
---|---|
date | Sun, 03 Jun 2007 15:27:27 +0000 |
parents | 6c2c9fc7d787 |
children | 9a00ac84004c |
comparison
equal
deleted
inserted
replaced
27:8d4cd0856f69 | 28:695884591af6 |
---|---|
31 any | 31 any |
32 except NameError: | 32 except NameError: |
33 def any(iterable): | 33 def any(iterable): |
34 return filter(None, list(iterable)) | 34 return filter(None, list(iterable)) |
35 | 35 |
36 def _parent(locale): | |
37 parts = locale.split('_') | |
38 if len(parts) == 1: | |
39 return 'root' | |
40 else: | |
41 return '_'.join(parts[:-1]) | |
42 | |
43 def _text(elem): | 36 def _text(elem): |
44 buf = [elem.text or ''] | 37 buf = [elem.text or ''] |
45 for child in elem: | 38 for child in elem: |
46 buf.append(_text(child)) | 39 buf.append(_text(child)) |
47 buf.append(elem.tail or '') | 40 buf.append(elem.tail or '') |
61 | 54 |
62 # build a territory containment mapping for inheritance | 55 # build a territory containment mapping for inheritance |
63 regions = {} | 56 regions = {} |
64 for elem in sup.findall('//territoryContainment/group'): | 57 for elem in sup.findall('//territoryContainment/group'): |
65 regions[elem.attrib['type']] = elem.attrib['contains'].split() | 58 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
66 from pprint import pprint | |
67 | 59 |
68 # Resolve territory containment | 60 # Resolve territory containment |
69 territory_containment = {} | 61 territory_containment = {} |
70 region_items = regions.items() | 62 region_items = regions.items() |
71 region_items.sort() | 63 region_items.sort() |
87 print>>sys.stderr, 'Processing input file %r' % filename | 79 print>>sys.stderr, 'Processing input file %r' % filename |
88 stem, ext = os.path.splitext(filename) | 80 stem, ext = os.path.splitext(filename) |
89 if ext != '.xml': | 81 if ext != '.xml': |
90 continue | 82 continue |
91 | 83 |
84 tree = parse(os.path.join(srcdir, 'main', filename)) | |
92 data = {} | 85 data = {} |
93 if stem != 'root': | |
94 data.update(copy.deepcopy(dicts[_parent(stem)])) | |
95 tree = parse(os.path.join(srcdir, 'main', filename)) | |
96 | 86 |
97 language = None | 87 language = None |
98 elem = tree.find('//identity/language') | 88 elem = tree.find('//identity/language') |
99 if elem is not None: | 89 if elem is not None: |
100 language = elem.attrib['type'] | 90 language = elem.attrib['type'] |
227 continue | 217 continue |
228 try: | 218 try: |
229 date_formats[elem.attrib.get('type')] = \ | 219 date_formats[elem.attrib.get('type')] = \ |
230 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) | 220 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
231 except ValueError, e: | 221 except ValueError, e: |
232 print e | 222 print>>sys.stderr, 'ERROR: %s' % e |
233 | 223 |
234 time_formats = data.setdefault('time_formats', {}) | 224 time_formats = data.setdefault('time_formats', {}) |
235 for elem in calendar.findall('timeFormats/timeFormatLength'): | 225 for elem in calendar.findall('timeFormats/timeFormatLength'): |
236 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: | 226 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: |
237 continue | 227 continue |
238 try: | 228 try: |
239 time_formats[elem.attrib.get('type')] = \ | 229 time_formats[elem.attrib.get('type')] = \ |
240 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) | 230 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
241 except ValueError, e: | 231 except ValueError, e: |
242 print e | 232 print>>sys.stderr, 'ERROR: %s' % e |
243 | 233 |
244 # <numbers> | 234 # <numbers> |
245 | 235 |
246 number_symbols = data.setdefault('number_symbols', {}) | 236 number_symbols = data.setdefault('number_symbols', {}) |
247 for elem in tree.findall('//numbers/symbols/*'): | 237 for elem in tree.findall('//numbers/symbols/*'): |
249 | 239 |
250 decimal_formats = data.setdefault('decimal_formats', {}) | 240 decimal_formats = data.setdefault('decimal_formats', {}) |
251 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | 241 for elem in tree.findall('//decimalFormats/decimalFormatLength'): |
252 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: | 242 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: |
253 continue | 243 continue |
254 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern'))) | 244 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
245 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
255 | 246 |
256 scientific_formats = data.setdefault('scientific_formats', {}) | 247 scientific_formats = data.setdefault('scientific_formats', {}) |
257 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | 248 for elem in tree.findall('//scientificFormats/scientificFormatLength'): |
258 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: | 249 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: |
259 continue | 250 continue |
251 # FIXME: should use numbers.parse_pattern | |
260 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) | 252 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) |
261 | 253 |
262 currency_formats = data.setdefault('currency_formats', {}) | 254 currency_formats = data.setdefault('currency_formats', {}) |
263 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | 255 for elem in tree.findall('//currencyFormats/currencyFormatLength'): |
264 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: | 256 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: |
265 continue | 257 continue |
258 # FIXME: should use numbers.parse_pattern | |
266 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) | 259 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) |
267 | 260 |
268 percent_formats = data.setdefault('percent_formats', {}) | 261 percent_formats = data.setdefault('percent_formats', {}) |
269 for elem in tree.findall('//percentFormats/percentFormatLength'): | 262 for elem in tree.findall('//percentFormats/percentFormatLength'): |
270 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: | 263 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: |
271 continue | 264 continue |
272 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern'))) | 265 pattern = unicode(elem.findtext('percentFormat/pattern')) |
273 | 266 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
274 currencies = data.setdefault('currencies', {}) | 267 |
268 currency_names = data.setdefault('currency_names', {}) | |
269 currency_symbols = data.setdefault('currency_symbols', {}) | |
275 for elem in tree.findall('//currencies/currency'): | 270 for elem in tree.findall('//currencies/currency'): |
276 currencies[elem.attrib['type']] = { | 271 name = elem.findtext('displayName') |
277 'display_name': unicode(elem.findtext('displayName')), | 272 if name: |
278 'symbol': unicode(elem.findtext('symbol')) | 273 currency_names[elem.attrib['type']] = unicode(name) |
279 } | 274 symbol = elem.findtext('symbol') |
275 if symbol: | |
276 currency_symbols[elem.attrib['type']] = unicode(symbol) | |
280 | 277 |
281 dicts[stem] = data | 278 dicts[stem] = data |
282 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') | 279 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') |
283 try: | 280 try: |
284 pickle.dump(data, outfile, 2) | 281 pickle.dump(data, outfile, 2) |