babel/old/babel-test: scripts/import

comparison scripts/import_cldr.py @ 379:1c0915da48c6 stable-0.9.x

Ported [407:415/trunk] back to 0.9.x branch.

author	cmlenz
date	Tue, 08 Jul 2008 21:01:28 +0000
parents	faf0ead3a132
children	cd8761c6f1a6

comparison

equal deleted inserted replaced

-:c2ae38340540
+:1c0915da48c6
 import copy
 from optparse import OptionParser
 import os
 import pickle
+import re
 import sys
 try:
 from xml.etree.ElementTree import parse
 except ImportError:
 from elementtree.ElementTree import parse
 # Make sure we're using Babel source, and not some previously installed version
 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
 from babel import dates, numbers
+from babel.localedata import Alias
 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
 'sun': 6}
 try:
 any
 except NameError:
 def any(iterable):
 return filter(None, list(iterable))
 def _text(elem):
 buf = [elem.text or '']
 for child in elem:
 buf.append(_text(child))
 buf.append(elem.tail or '')
 return u''.join(filter(None, buf)).strip()
+NAME_RE = re.compile(r"^\w+$")
+TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$")
+NAME_MAP = {
+'dateFormats': 'date_formats',
+'dateTimeFormats': 'datetime_formats',
+'eraAbbr': 'abbreviated',
+'eraNames': 'wide',
+'eraNarrow': 'narrow',
+'timeFormats': 'time_formats'
+}
+def _translate_alias(ctxt, path):
+parts = path.split('/')
+keys = ctxt[:]
+for part in parts:
+if part == '..':
+keys.pop()
+else:
+match = TYPE_ATTR_RE.match(part)
+if match:
+keys.append(match.group(1))
+else:
+assert NAME_RE.match(part)
+keys.append(NAME_MAP.get(part, part))
+return keys
 def main():
 parser = OptionParser(usage='%prog path/to/cldr')
 options, args = parser.parse_args()
 if len(args) != 1:
 for filename in filenames:
 print>>sys.stderr, 'Processing input file %r' % filename
 stem, ext = os.path.splitext(filename)
 if ext != '.xml':
 continue
+#if stem != 'root':
+#    break
 tree = parse(os.path.join(srcdir, 'main', filename))
 data = {}
 language = None
 # <localeDisplayNames>
 territories = data.setdefault('territories', {})
 for elem in tree.findall('//territories/territory'):
-if 'draft' in elem.attrib and elem.attrib['type'] in territories:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib['type'] in territories:
 continue
 territories[elem.attrib['type']] = _text(elem)
 languages = data.setdefault('languages', {})
 for elem in tree.findall('//languages/language'):
-if 'draft' in elem.attrib and elem.attrib['type'] in languages:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib['type'] in languages:
 continue
 languages[elem.attrib['type']] = _text(elem)
 variants = data.setdefault('variants', {})
 for elem in tree.findall('//variants/variant'):
-if 'draft' in elem.attrib and elem.attrib['type'] in variants:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib['type'] in variants:
 continue
 variants[elem.attrib['type']] = _text(elem)
 scripts = data.setdefault('scripts', {})
 for elem in tree.findall('//scripts/script'):
-if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib['type'] in scripts:
 continue
 scripts[elem.attrib['type']] = _text(elem)
 # <dates>
 if territory in territories or any([r in territories for r in regions]):
 week_data['weekend_end'] = weekdays[elem.attrib['day']]
 zone_formats = data.setdefault('zone_formats', {})
 for elem in tree.findall('//timeZoneNames/gmtFormat'):
-if 'draft' not in elem.attrib:
+if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
 break
 for elem in tree.findall('//timeZoneNames/regionFormat'):
-if 'draft' not in elem.attrib:
+if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
 break
 for elem in tree.findall('//timeZoneNames/fallbackFormat'):
-if 'draft' not in elem.attrib:
+if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
 zone_formats['fallback'] = unicode(elem.text) \
 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
 break
 time_zones = data.setdefault('time_zones', {})
 # TODO: support other calendar types
 continue
 months = data.setdefault('months', {})
 for ctxt in calendar.findall('months/monthContext'):
-ctxts = months.setdefault(ctxt.attrib['type'], {})
+ctxt_type = ctxt.attrib['type']
+ctxts = months.setdefault(ctxt_type, {})
 for width in ctxt.findall('monthWidth'):
-widths = ctxts.setdefault(width.attrib['type'], {})
+width_type = width.attrib['type']
-for elem in width.findall('month'):
+widths = ctxts.setdefault(width_type, {})
-if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
+for elem in width.getiterator():
+if elem.tag == 'month':
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and int(elem.attrib['type']) in widths:
+continue
+widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+elif elem.tag == 'alias':
+ctxts[width_type] = Alias(
+_translate_alias(['months', ctxt_type, width_type],
+elem.attrib['path'])
+)
+days = data.setdefault('days', {})
+for ctxt in calendar.findall('days/dayContext'):
+ctxt_type = ctxt.attrib['type']
+ctxts = days.setdefault(ctxt_type, {})
+for width in ctxt.findall('dayWidth'):
+width_type = width.attrib['type']
+widths = ctxts.setdefault(width_type, {})
+for elem in width.getiterator():
+if elem.tag == 'day':
+dtype = weekdays[elem.attrib['type']]
+if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
+and dtype in widths:
+continue
+widths[dtype] = unicode(elem.text)
+elif elem.tag == 'alias':
+ctxts[width_type] = Alias(
+_translate_alias(['days', ctxt_type, width_type],
+elem.attrib['path'])
+)
+quarters = data.setdefault('quarters', {})
+for ctxt in calendar.findall('quarters/quarterContext'):
+ctxt_type = ctxt.attrib['type']
+ctxts = quarters.setdefault(ctxt.attrib['type'], {})
+for width in ctxt.findall('quarterWidth'):
+width_type = width.attrib['type']
+widths = ctxts.setdefault(width_type, {})
+for elem in width.getiterator():
+if elem.tag == 'quarter':
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and int(elem.attrib['type']) in widths:
+continue
+widths[int(elem.attrib['type'])] = unicode(elem.text)
+elif elem.tag == 'alias':
+ctxts[width_type] = Alias(
+_translate_alias(['quarters', ctxt_type, width_type],
+elem.attrib['path'])
+)
+eras = data.setdefault('eras', {})
+for width in calendar.findall('eras/*'):
+width_type = NAME_MAP[width.tag]
+widths = eras.setdefault(width_type, {})
+for elem in width.getiterator():
+if elem.tag == 'era':
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and int(elem.attrib['type']) in widths:
 continue
 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+elif elem.tag == 'alias':
-days = data.setdefault('days', {})
+eras[width_type] = Alias(
-for ctxt in calendar.findall('days/dayContext'):
+_translate_alias(['eras', width_type],
-ctxts = days.setdefault(ctxt.attrib['type'], {})
+elem.attrib['path'])
-for width in ctxt.findall('dayWidth'):
+)
-widths = ctxts.setdefault(width.attrib['type'], {})
-for elem in width.findall('day'):
-dtype = weekdays[elem.attrib['type']]
-if 'draft' in elem.attrib and dtype in widths:
-continue
-widths[dtype] = unicode(elem.text)
-quarters = data.setdefault('quarters', {})
-for ctxt in calendar.findall('quarters/quarterContext'):
-ctxts = quarters.setdefault(ctxt.attrib['type'], {})
-for width in ctxt.findall('quarterWidth'):
-widths = ctxts.setdefault(width.attrib['type'], {})
-for elem in width.findall('quarter'):
-if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
-continue
-widths[int(elem.attrib.get('type'))] = unicode(elem.text)
-eras = data.setdefault('eras', {})
-for width in calendar.findall('eras/*'):
-ewidth = {
-'eraAbbr': 'abbreviated',
-'eraNames': 'wide',
-'eraNarrow': 'narrow',
-}[width.tag]
-widths = eras.setdefault(ewidth, {})
-for elem in width.findall('era'):
-if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
-continue
-widths[int(elem.attrib.get('type'))] = unicode(elem.text)
 # AM/PM
 periods = data.setdefault('periods', {})
 for elem in calendar.findall('am'):
-if 'draft' in elem.attrib and elem.tag in periods:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.tag in periods:
 continue
 periods[elem.tag] = unicode(elem.text)
 for elem in calendar.findall('pm'):
-if 'draft' in elem.attrib and elem.tag in periods:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.tag in periods:
 continue
 periods[elem.tag] = unicode(elem.text)
 date_formats = data.setdefault('date_formats', {})
-for elem in calendar.findall('dateFormats/dateFormatLength'):
+for format in calendar.findall('dateFormats'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
+for elem in format.getiterator():
-continue
+if elem.tag == 'dateFormatLength':
-try:
+if 'draft' in elem.attrib and \
-date_formats[elem.attrib.get('type')] = \
+elem.attrib.get('type') in date_formats:
-dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
+continue
-except ValueError, e:
+try:
-print>>sys.stderr, 'ERROR: %s' % e
+date_formats[elem.attrib.get('type')] = \
+dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
+except ValueError, e:
+print>>sys.stderr, 'ERROR: %s' % e
+elif elem.tag == 'alias':
+date_formats = Alias(_translate_alias(
+['date_formats'], elem.attrib['path'])
+)
 time_formats = data.setdefault('time_formats', {})
-for elem in calendar.findall('timeFormats/timeFormatLength'):
+for format in calendar.findall('timeFormats'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
+for elem in format.getiterator():
-continue
+if elem.tag == 'timeFormatLength':
-try:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-time_formats[elem.attrib.get('type')] = \
+and elem.attrib.get('type') in time_formats:
-dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
+continue
-except ValueError, e:
+try:
-print>>sys.stderr, 'ERROR: %s' % e
+time_formats[elem.attrib.get('type')] = \
+dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
+except ValueError, e:
+print>>sys.stderr, 'ERROR: %s' % e
+elif elem.tag == 'alias':
+time_formats = Alias(_translate_alias(
+['time_formats'], elem.attrib['path'])
+)
 datetime_formats = data.setdefault('datetime_formats', {})
-for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
+for format in calendar.findall('dateTimeFormats'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
+for elem in format.getiterator():
-continue
+if elem.tag == 'dateTimeFormatLength':
-try:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-datetime_formats[elem.attrib.get('type')] = \
+and elem.attrib.get('type') in datetime_formats:
-unicode(elem.findtext('dateTimeFormat/pattern'))
+continue
-except ValueError, e:
+try:
-print>>sys.stderr, 'ERROR: %s' % e
+datetime_formats[elem.attrib.get('type')] = \
+unicode(elem.findtext('dateTimeFormat/pattern'))
+except ValueError, e:
+print>>sys.stderr, 'ERROR: %s' % e
+elif elem.tag == 'alias':
+datetime_formats = Alias(_translate_alias(
+['datetime_formats'], elem.attrib['path'])
+)
 # <numbers>
 number_symbols = data.setdefault('number_symbols', {})
 for elem in tree.findall('//numbers/symbols/*'):
 number_symbols[elem.tag] = unicode(elem.text)
 decimal_formats = data.setdefault('decimal_formats', {})
 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib.get('type') in decimal_formats:
 continue
 pattern = unicode(elem.findtext('decimalFormat/pattern'))
 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 scientific_formats = data.setdefault('scientific_formats', {})
 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib.get('type') in scientific_formats:
 continue
 pattern = unicode(elem.findtext('scientificFormat/pattern'))
 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 currency_formats = data.setdefault('currency_formats', {})
 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib.get('type') in currency_formats:
 continue
 pattern = unicode(elem.findtext('currencyFormat/pattern'))
 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 percent_formats = data.setdefault('percent_formats', {})
 for elem in tree.findall('//percentFormats/percentFormatLength'):
-if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
+if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+and elem.attrib.get('type') in percent_formats:
 continue
 pattern = unicode(elem.findtext('percentFormat/pattern'))
 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 currency_names = data.setdefault('currency_names', {})
 try:
 pickle.dump(data, outfile, 2)
 finally:
 outfile.close()
 if __name__ == '__main__':
 main()

Mercurial > babel > old > babel-test

comparison scripts/import_cldr.py @ 379:1c0915da48c6 stable-0.9.x