Mercurial > babel > mirror
annotate scripts/import_cldr.py @ 22:d1e6944f2ff0 trunk
Implemented babel.numbers.format_percent
author | jonas |
---|---|
date | Thu, 31 May 2007 19:52:57 +0000 |
parents | 244a74232f5e |
children | 6041782ea677 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
19 import sys | |
20 try: | |
21 from xml.etree.ElementTree import parse | |
22 except ImportError: | |
23 from elementtree.ElementTree import parse | |
24 | |
9 | 25 from babel import dates, numbers |
1 | 26 |
15 | 27 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
28 'sun': 6} | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
29 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
30 try: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
31 any |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
32 except NameError: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
33 def any(iterable): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
34 return filter(None, list(iterable)) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
35 |
1 | 36 def _parent(locale): |
37 parts = locale.split('_') | |
38 if len(parts) == 1: | |
39 return 'root' | |
40 else: | |
41 return '_'.join(parts[:-1]) | |
42 | |
43 def _text(elem): | |
44 buf = [elem.text or ''] | |
45 for child in elem: | |
46 buf.append(_text(child)) | |
47 buf.append(elem.tail or '') | |
48 return u''.join(filter(None, buf)).strip() | |
49 | |
50 def main(): | |
51 parser = OptionParser(usage='%prog path/to/cldr') | |
52 options, args = parser.parse_args() | |
53 if len(args) != 1: | |
54 parser.error('incorrect number of arguments') | |
55 | |
56 srcdir = args[0] | |
57 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
58 '..', 'babel', 'localedata') | |
59 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
60 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
61 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
62 # build a territory containment mapping for inheritance |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
63 regions = {} |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
64 for elem in sup.findall('//territoryContainment/group'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
65 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
66 from pprint import pprint |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
67 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
68 # Resolve territory containment |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
69 territory_containment = {} |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
70 region_items = regions.items() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
71 region_items.sort() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
72 for group, territory_list in region_items: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
73 for territory in territory_list: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
74 containers = territory_containment.setdefault(territory, set([])) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
75 if group in territory_containment: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
76 containers |= territory_containment[group] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
77 containers.add(group) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
78 |
1 | 79 filenames = os.listdir(os.path.join(srcdir, 'main')) |
80 filenames.remove('root.xml') | |
81 filenames.sort(lambda a,b: len(a)-len(b)) | |
82 filenames.insert(0, 'root.xml') | |
83 | |
84 dicts = {} | |
85 | |
86 for filename in filenames: | |
87 print>>sys.stderr, 'Processing input file %r' % filename | |
88 stem, ext = os.path.splitext(filename) | |
89 if ext != '.xml': | |
90 continue | |
91 | |
92 data = {} | |
93 if stem != 'root': | |
94 data.update(copy.deepcopy(dicts[_parent(stem)])) | |
95 tree = parse(os.path.join(srcdir, 'main', filename)) | |
96 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
97 language = None |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
98 elem = tree.find('//identity/language') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
99 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
100 language = elem.attrib['type'] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
101 print>>sys.stderr, ' Language: %r' % language |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
102 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
103 territory = None |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
104 elem = tree.find('//identity/territory') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
105 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
106 territory = elem.attrib['type'] |
13 | 107 else: |
108 territory = '001' # world | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
109 print>>sys.stderr, ' Territory: %r' % territory |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
110 regions = territory_containment.get(territory, []) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
111 print>>sys.stderr, ' Regions: %r' % regions |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
112 |
1 | 113 # <localeDisplayNames> |
114 | |
115 territories = data.setdefault('territories', {}) | |
116 for elem in tree.findall('//territories/territory'): | |
117 if 'draft' in elem.attrib and elem.attrib['type'] in territories: | |
118 continue | |
119 territories[elem.attrib['type']] = _text(elem) | |
120 | |
121 languages = data.setdefault('languages', {}) | |
122 for elem in tree.findall('//languages/language'): | |
123 if 'draft' in elem.attrib and elem.attrib['type'] in languages: | |
124 continue | |
125 languages[elem.attrib['type']] = _text(elem) | |
126 | |
127 variants = data.setdefault('variants', {}) | |
128 for elem in tree.findall('//variants/variant'): | |
129 if 'draft' in elem.attrib and elem.attrib['type'] in variants: | |
130 continue | |
131 variants[elem.attrib['type']] = _text(elem) | |
132 | |
133 scripts = data.setdefault('scripts', {}) | |
134 for elem in tree.findall('//scripts/script'): | |
135 if 'draft' in elem.attrib and elem.attrib['type'] in scripts: | |
136 continue | |
137 scripts[elem.attrib['type']] = _text(elem) | |
138 | |
139 # <dates> | |
140 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
141 week_data = data.setdefault('week_data', {}) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
142 supelem = sup.find('//weekData') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
143 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
144 for elem in supelem.findall('minDays'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
145 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
146 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
147 week_data['min_days'] = int(elem.attrib['count']) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
148 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
149 for elem in supelem.findall('firstDay'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
150 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
151 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
152 week_data['first_day'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
153 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
154 for elem in supelem.findall('weekendStart'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
155 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
156 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
157 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
158 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
159 for elem in supelem.findall('weekendEnd'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
160 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
161 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
162 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
163 |
1 | 164 time_zones = data.setdefault('time_zones', {}) |
165 for elem in tree.findall('//timeZoneNames/zone'): | |
166 time_zones[elem.tag] = unicode(elem.findtext('displayName')) | |
167 | |
168 for calendar in tree.findall('//calendars/calendar'): | |
169 if calendar.attrib['type'] != 'gregorian': | |
170 # TODO: support other calendar types | |
171 continue | |
172 | |
173 months = data.setdefault('months', {}) | |
174 for ctxt in calendar.findall('months/monthContext'): | |
175 ctxts = months.setdefault(ctxt.attrib['type'], {}) | |
176 for width in ctxt.findall('monthWidth'): | |
177 widths = ctxts.setdefault(width.attrib['type'], {}) | |
178 for elem in width.findall('month'): | |
179 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
180 continue | |
181 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
182 | |
183 days = data.setdefault('days', {}) | |
184 for ctxt in calendar.findall('days/dayContext'): | |
185 ctxts = days.setdefault(ctxt.attrib['type'], {}) | |
186 for width in ctxt.findall('dayWidth'): | |
187 widths = ctxts.setdefault(width.attrib['type'], {}) | |
188 for elem in width.findall('day'): | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
189 dtype = weekdays[elem.attrib['type']] |
1 | 190 if 'draft' in elem.attrib and dtype in widths: |
191 continue | |
192 widths[dtype] = unicode(elem.text) | |
193 | |
194 quarters = data.setdefault('quarters', {}) | |
195 for ctxt in calendar.findall('quarters/quarterContext'): | |
196 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) | |
197 for width in ctxt.findall('quarterWidth'): | |
198 widths = ctxts.setdefault(width.attrib['type'], {}) | |
199 for elem in width.findall('quarter'): | |
200 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
201 continue | |
202 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
203 | |
204 eras = data.setdefault('eras', {}) | |
205 for width in calendar.findall('eras/*'): | |
206 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] | |
207 widths = eras.setdefault(ewidth, {}) | |
208 for elem in width.findall('era'): | |
209 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
210 continue | |
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
212 | |
213 # AM/PM | |
214 periods = data.setdefault('periods', {}) | |
215 for elem in calendar.findall('am'): | |
216 if 'draft' in elem.attrib and elem.tag in periods: | |
217 continue | |
218 periods[elem.tag] = unicode(elem.text) | |
219 for elem in calendar.findall('pm'): | |
220 if 'draft' in elem.attrib and elem.tag in periods: | |
221 continue | |
222 periods[elem.tag] = unicode(elem.text) | |
223 | |
224 date_formats = data.setdefault('date_formats', {}) | |
225 for elem in calendar.findall('dateFormats/dateFormatLength'): | |
226 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: | |
227 continue | |
228 try: | |
229 date_formats[elem.attrib.get('type')] = \ | |
9 | 230 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
1 | 231 except ValueError, e: |
232 print e | |
233 | |
234 time_formats = data.setdefault('time_formats', {}) | |
235 for elem in calendar.findall('timeFormats/timeFormatLength'): | |
236 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: | |
237 continue | |
238 try: | |
239 time_formats[elem.attrib.get('type')] = \ | |
9 | 240 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
1 | 241 except ValueError, e: |
242 print e | |
243 | |
244 # <numbers> | |
245 | |
246 number_symbols = data.setdefault('number_symbols', {}) | |
247 for elem in tree.findall('//numbers/symbols/*'): | |
248 number_symbols[elem.tag] = unicode(elem.text) | |
249 | |
250 decimal_formats = data.setdefault('decimal_formats', {}) | |
251 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | |
252 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: | |
253 continue | |
9 | 254 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern'))) |
1 | 255 |
256 scientific_formats = data.setdefault('scientific_formats', {}) | |
257 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | |
258 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: | |
259 continue | |
260 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern')) | |
261 | |
262 currency_formats = data.setdefault('currency_formats', {}) | |
263 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | |
264 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: | |
265 continue | |
266 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern')) | |
267 | |
268 percent_formats = data.setdefault('percent_formats', {}) | |
269 for elem in tree.findall('//percentFormats/percentFormatLength'): | |
270 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: | |
271 continue | |
22 | 272 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern'))) |
1 | 273 |
274 currencies = data.setdefault('currencies', {}) | |
275 for elem in tree.findall('//currencies/currency'): | |
276 currencies[elem.attrib['type']] = { | |
277 'display_name': unicode(elem.findtext('displayName')), | |
278 'symbol': unicode(elem.findtext('symbol')) | |
279 } | |
280 | |
281 dicts[stem] = data | |
282 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') | |
283 try: | |
284 pickle.dump(data, outfile, 2) | |
285 finally: | |
286 outfile.close() | |
287 | |
288 if __name__ == '__main__': | |
289 main() |