Mercurial > babel > mirror
comparison scripts/import_cldr.py @ 1:7870274479f5 trunk
Import of initial code base.
author | cmlenz |
---|---|
date | Tue, 29 May 2007 20:33:55 +0000 |
parents | |
children | 29f6f9a90f14 |
comparison
equal
deleted
inserted
replaced
0:fef11c9819c7 | 1:7870274479f5 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
19 import sys | |
20 try: | |
21 from xml.etree.ElementTree import parse | |
22 except ImportError: | |
23 from elementtree.ElementTree import parse | |
24 | |
25 from babel.dates import parse_pattern | |
26 | |
27 def _parent(locale): | |
28 parts = locale.split('_') | |
29 if len(parts) == 1: | |
30 return 'root' | |
31 else: | |
32 return '_'.join(parts[:-1]) | |
33 | |
34 def _text(elem): | |
35 buf = [elem.text or ''] | |
36 for child in elem: | |
37 buf.append(_text(child)) | |
38 buf.append(elem.tail or '') | |
39 return u''.join(filter(None, buf)).strip() | |
40 | |
41 def main(): | |
42 parser = OptionParser(usage='%prog path/to/cldr') | |
43 options, args = parser.parse_args() | |
44 if len(args) != 1: | |
45 parser.error('incorrect number of arguments') | |
46 | |
47 srcdir = args[0] | |
48 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
49 '..', 'babel', 'localedata') | |
50 | |
51 filenames = os.listdir(os.path.join(srcdir, 'main')) | |
52 filenames.remove('root.xml') | |
53 filenames.sort(lambda a,b: len(a)-len(b)) | |
54 filenames.insert(0, 'root.xml') | |
55 | |
56 dicts = {} | |
57 | |
58 for filename in filenames: | |
59 print>>sys.stderr, 'Processing input file %r' % filename | |
60 stem, ext = os.path.splitext(filename) | |
61 if ext != '.xml': | |
62 continue | |
63 | |
64 data = {} | |
65 if stem != 'root': | |
66 data.update(copy.deepcopy(dicts[_parent(stem)])) | |
67 tree = parse(os.path.join(srcdir, 'main', filename)) | |
68 | |
69 # <localeDisplayNames> | |
70 | |
71 territories = data.setdefault('territories', {}) | |
72 for elem in tree.findall('//territories/territory'): | |
73 if 'draft' in elem.attrib and elem.attrib['type'] in territories: | |
74 continue | |
75 territories[elem.attrib['type']] = _text(elem) | |
76 | |
77 languages = data.setdefault('languages', {}) | |
78 for elem in tree.findall('//languages/language'): | |
79 if 'draft' in elem.attrib and elem.attrib['type'] in languages: | |
80 continue | |
81 languages[elem.attrib['type']] = _text(elem) | |
82 | |
83 variants = data.setdefault('variants', {}) | |
84 for elem in tree.findall('//variants/variant'): | |
85 if 'draft' in elem.attrib and elem.attrib['type'] in variants: | |
86 continue | |
87 variants[elem.attrib['type']] = _text(elem) | |
88 | |
89 scripts = data.setdefault('scripts', {}) | |
90 for elem in tree.findall('//scripts/script'): | |
91 if 'draft' in elem.attrib and elem.attrib['type'] in scripts: | |
92 continue | |
93 scripts[elem.attrib['type']] = _text(elem) | |
94 | |
95 # <dates> | |
96 | |
97 time_zones = data.setdefault('time_zones', {}) | |
98 for elem in tree.findall('//timeZoneNames/zone'): | |
99 time_zones[elem.tag] = unicode(elem.findtext('displayName')) | |
100 | |
101 for calendar in tree.findall('//calendars/calendar'): | |
102 if calendar.attrib['type'] != 'gregorian': | |
103 # TODO: support other calendar types | |
104 continue | |
105 | |
106 months = data.setdefault('months', {}) | |
107 for ctxt in calendar.findall('months/monthContext'): | |
108 ctxts = months.setdefault(ctxt.attrib['type'], {}) | |
109 for width in ctxt.findall('monthWidth'): | |
110 widths = ctxts.setdefault(width.attrib['type'], {}) | |
111 for elem in width.findall('month'): | |
112 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
113 continue | |
114 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
115 | |
116 days = data.setdefault('days', {}) | |
117 for ctxt in calendar.findall('days/dayContext'): | |
118 ctxts = days.setdefault(ctxt.attrib['type'], {}) | |
119 for width in ctxt.findall('dayWidth'): | |
120 widths = ctxts.setdefault(width.attrib['type'], {}) | |
121 for elem in width.findall('day'): | |
122 dtype = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, | |
123 'fri': 5, 'sat': 6, 'sun': 7}[elem.attrib['type']] | |
124 if 'draft' in elem.attrib and dtype in widths: | |
125 continue | |
126 widths[dtype] = unicode(elem.text) | |
127 | |
128 quarters = data.setdefault('quarters', {}) | |
129 for ctxt in calendar.findall('quarters/quarterContext'): | |
130 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) | |
131 for width in ctxt.findall('quarterWidth'): | |
132 widths = ctxts.setdefault(width.attrib['type'], {}) | |
133 for elem in width.findall('quarter'): | |
134 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
135 continue | |
136 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
137 | |
138 eras = data.setdefault('eras', {}) | |
139 for width in calendar.findall('eras/*'): | |
140 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] | |
141 widths = eras.setdefault(ewidth, {}) | |
142 for elem in width.findall('era'): | |
143 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
144 continue | |
145 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
146 | |
147 # AM/PM | |
148 periods = data.setdefault('periods', {}) | |
149 for elem in calendar.findall('am'): | |
150 if 'draft' in elem.attrib and elem.tag in periods: | |
151 continue | |
152 periods[elem.tag] = unicode(elem.text) | |
153 for elem in calendar.findall('pm'): | |
154 if 'draft' in elem.attrib and elem.tag in periods: | |
155 continue | |
156 periods[elem.tag] = unicode(elem.text) | |
157 | |
158 date_formats = data.setdefault('date_formats', {}) | |
159 for elem in calendar.findall('dateFormats/dateFormatLength'): | |
160 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: | |
161 continue | |
162 try: | |
163 date_formats[elem.attrib.get('type')] = \ | |
164 parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) | |
165 except ValueError, e: | |
166 print e | |
167 | |
168 time_formats = data.setdefault('time_formats', {}) | |
169 for elem in calendar.findall('timeFormats/timeFormatLength'): | |
170 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: | |
171 continue | |
172 try: | |
173 time_formats[elem.attrib.get('type')] = \ | |
174 parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) | |
175 except ValueError, e: | |
176 print e | |
177 | |
178 # <numbers> | |
179 | |
180 number_symbols = data.setdefault('number_symbols', {}) | |
181 for elem in tree.findall('//numbers/symbols/*'): | |
182 number_symbols[elem.tag] = unicode(elem.text) | |
183 | |
184 decimal_formats = data.setdefault('decimal_formats', {}) | |
185 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | |
186 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: | |
187 continue | |
188 decimal_formats[elem.attrib.get('type')] = unicode(elem.findtext('decimalFormat/pattern')) | |
189 | |
190 scientific_formats = data.setdefault('scientific_formats', {}) | |
191 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | |
192 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: | |
193 continue | |
194 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) | |
195 | |
196 currency_formats = data.setdefault('currency_formats', {}) | |
197 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | |
198 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: | |
199 continue | |
200 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) | |
201 | |
202 percent_formats = data.setdefault('percent_formats', {}) | |
203 for elem in tree.findall('//percentFormats/percentFormatLength'): | |
204 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: | |
205 continue | |
206 percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern')) | |
207 | |
208 currencies = data.setdefault('currencies', {}) | |
209 for elem in tree.findall('//currencies/currency'): | |
210 currencies[elem.attrib['type']] = { | |
211 'display_name': unicode(elem.findtext('displayName')), | |
212 'symbol': unicode(elem.findtext('symbol')) | |
213 } | |
214 | |
215 dicts[stem] = data | |
216 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') | |
217 try: | |
218 pickle.dump(data, outfile, 2) | |
219 finally: | |
220 outfile.close() | |
221 | |
222 if __name__ == '__main__': | |
223 main() |