Mercurial > babel > old > mirror
view scripts/import_cldr.py @ 8:ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
author | cmlenz |
---|---|
date | Wed, 30 May 2007 11:52:46 +0000 |
parents | e9eaddab598e |
children | 0ca5dd65594f |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2007 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. import copy from optparse import OptionParser import os import pickle import sys try: from xml.etree.ElementTree import parse except ImportError: from elementtree.ElementTree import parse from babel.dates import parse_pattern def _parent(locale): parts = locale.split('_') if len(parts) == 1: return 'root' else: return '_'.join(parts[:-1]) def _text(elem): buf = [elem.text or ''] for child in elem: buf.append(_text(child)) buf.append(elem.tail or '') return u''.join(filter(None, buf)).strip() def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel', 'localedata') filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(lambda a,b: len(a)-len(b)) filenames.insert(0, 'root.xml') dicts = {} for filename in filenames: print>>sys.stderr, 'Processing input file %r' % filename stem, ext = os.path.splitext(filename) if ext != '.xml': continue data = {} if stem != 'root': data.update(copy.deepcopy(dicts[_parent(stem)])) tree = parse(os.path.join(srcdir, 'main', filename)) # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('//territories/territory'): if 'draft' in elem.attrib and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('//languages/language'): if 'draft' in elem.attrib and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('//variants/variant'): if 'draft' in elem.attrib and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('//scripts/script'): if 'draft' in elem.attrib and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('//timeZoneNames/zone'): time_zones[elem.tag] = unicode(elem.findtext('displayName')) for calendar in tree.findall('//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxts = months.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('monthWidth'): widths = ctxts.setdefault(width.attrib['type'], {}) for elem in width.findall('month'): if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode(elem.text) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxts = days.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('dayWidth'): widths = ctxts.setdefault(width.attrib['type'], {}) for elem in width.findall('day'): dtype = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, 'fri': 5, 'sat': 6, 'sun': 7}[elem.attrib['type']] if 'draft' in elem.attrib and dtype in widths: continue widths[dtype] = unicode(elem.text) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): widths = ctxts.setdefault(width.attrib['type'], {}) for elem in width.findall('quarter'): if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode(elem.text) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] widths = eras.setdefault(ewidth, {}) for elem in width.findall('era'): if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode(elem.text) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): if 'draft' in elem.attrib and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) for elem in calendar.findall('pm'): if 'draft' in elem.attrib and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) date_formats = data.setdefault('date_formats', {}) for elem in calendar.findall('dateFormats/dateFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) except ValueError, e: print e time_formats = data.setdefault('time_formats', {}) for elem in calendar.findall('timeFormats/timeFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) except ValueError, e: print e # <numbers> number_symbols = data.setdefault('number_symbols', {}) for elem in tree.findall('//numbers/symbols/*'): number_symbols[elem.tag] = unicode(elem.text) decimal_formats = data.setdefault('decimal_formats', {}) for elem in tree.findall('//decimalFormats/decimalFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: continue decimal_formats[elem.attrib.get('type')] = unicode(elem.findtext('decimalFormat/pattern')) scientific_formats = data.setdefault('scientific_formats', {}) for elem in tree.findall('//scientificFormats/scientificFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: continue scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) currency_formats = data.setdefault('currency_formats', {}) for elem in tree.findall('//currencyFormats/currencyFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: continue currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) percent_formats = data.setdefault('percent_formats', {}) for elem in tree.findall('//percentFormats/percentFormatLength'): if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: continue percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern')) currencies = data.setdefault('currencies', {}) for elem in tree.findall('//currencies/currency'): currencies[elem.attrib['type']] = { 'display_name': unicode(elem.findtext('displayName')), 'symbol': unicode(elem.findtext('symbol')) } dicts[stem] = data outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') try: pickle.dump(data, outfile, 2) finally: outfile.close() if __name__ == '__main__': main()