263
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3 #
|
|
4 # Copyright (C) 2007 Edgewall Software
|
|
5 # All rights reserved.
|
|
6 #
|
|
7 # This software is licensed as described in the file COPYING, which
|
|
8 # you should have received as part of this distribution. The terms
|
|
9 # are also available at http://babel.edgewall.org/wiki/License.
|
|
10 #
|
|
11 # This software consists of voluntary contributions made by many
|
|
12 # individuals. For the exact contribution history, see the revision
|
|
13 # history and logs, available at http://babel.edgewall.org/log/.
|
|
14
|
|
15 import copy
|
|
16 from optparse import OptionParser
|
|
17 import os
|
|
18 import pickle
|
|
19 import sys
|
|
20 try:
|
|
21 from xml.etree.ElementTree import parse
|
|
22 except ImportError:
|
|
23 from elementtree.ElementTree import parse
|
|
24
|
|
25 # Make sure we're using Babel source, and not some previously installed version
|
|
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
|
|
27
|
|
28 from babel import dates, numbers
|
|
29
|
|
30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
|
|
31 'sun': 6}
|
|
32
|
|
33 try:
|
|
34 any
|
|
35 except NameError:
|
|
36 def any(iterable):
|
|
37 return filter(None, list(iterable))
|
|
38
|
|
39 def _text(elem):
|
|
40 buf = [elem.text or '']
|
|
41 for child in elem:
|
|
42 buf.append(_text(child))
|
|
43 buf.append(elem.tail or '')
|
|
44 return u''.join(filter(None, buf)).strip()
|
|
45
|
|
46 def main():
|
|
47 parser = OptionParser(usage='%prog path/to/cldr')
|
|
48 options, args = parser.parse_args()
|
|
49 if len(args) != 1:
|
|
50 parser.error('incorrect number of arguments')
|
|
51
|
|
52 srcdir = args[0]
|
|
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
|
|
54 '..', 'babel')
|
|
55
|
|
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
|
|
57
|
|
58 # import global data from the supplemental files
|
|
59 global_data = {}
|
|
60
|
|
61 territory_zones = global_data.setdefault('territory_zones', {})
|
|
62 zone_aliases = global_data.setdefault('zone_aliases', {})
|
|
63 zone_territories = global_data.setdefault('zone_territories', {})
|
|
64 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
|
|
65 tzid = elem.attrib['type']
|
|
66 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
|
|
67 zone_territories[tzid] = elem.attrib['territory']
|
|
68 if 'aliases' in elem.attrib:
|
|
69 for alias in elem.attrib['aliases'].split():
|
|
70 zone_aliases[alias] = tzid
|
|
71
|
|
72 outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
|
|
73 try:
|
|
74 pickle.dump(global_data, outfile, 2)
|
|
75 finally:
|
|
76 outfile.close()
|
|
77
|
|
78 # build a territory containment mapping for inheritance
|
|
79 regions = {}
|
|
80 for elem in sup.findall('//territoryContainment/group'):
|
|
81 regions[elem.attrib['type']] = elem.attrib['contains'].split()
|
|
82
|
|
83 # Resolve territory containment
|
|
84 territory_containment = {}
|
|
85 region_items = regions.items()
|
|
86 region_items.sort()
|
|
87 for group, territory_list in region_items:
|
|
88 for territory in territory_list:
|
|
89 containers = territory_containment.setdefault(territory, set([]))
|
|
90 if group in territory_containment:
|
|
91 containers |= territory_containment[group]
|
|
92 containers.add(group)
|
|
93
|
|
94 filenames = os.listdir(os.path.join(srcdir, 'main'))
|
|
95 filenames.remove('root.xml')
|
|
96 filenames.sort(lambda a,b: len(a)-len(b))
|
|
97 filenames.insert(0, 'root.xml')
|
|
98
|
|
99 for filename in filenames:
|
|
100 print>>sys.stderr, 'Processing input file %r' % filename
|
|
101 stem, ext = os.path.splitext(filename)
|
|
102 if ext != '.xml':
|
|
103 continue
|
|
104
|
|
105 tree = parse(os.path.join(srcdir, 'main', filename))
|
|
106 data = {}
|
|
107
|
|
108 language = None
|
|
109 elem = tree.find('//identity/language')
|
|
110 if elem is not None:
|
|
111 language = elem.attrib['type']
|
|
112 print>>sys.stderr, ' Language: %r' % language
|
|
113
|
|
114 territory = None
|
|
115 elem = tree.find('//identity/territory')
|
|
116 if elem is not None:
|
|
117 territory = elem.attrib['type']
|
|
118 else:
|
|
119 territory = '001' # world
|
|
120 print>>sys.stderr, ' Territory: %r' % territory
|
|
121 regions = territory_containment.get(territory, [])
|
|
122 print>>sys.stderr, ' Regions: %r' % regions
|
|
123
|
|
124 # <localeDisplayNames>
|
|
125
|
|
126 territories = data.setdefault('territories', {})
|
|
127 for elem in tree.findall('//territories/territory'):
|
|
128 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
|
|
129 continue
|
|
130 territories[elem.attrib['type']] = _text(elem)
|
|
131
|
|
132 languages = data.setdefault('languages', {})
|
|
133 for elem in tree.findall('//languages/language'):
|
|
134 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
|
|
135 continue
|
|
136 languages[elem.attrib['type']] = _text(elem)
|
|
137
|
|
138 variants = data.setdefault('variants', {})
|
|
139 for elem in tree.findall('//variants/variant'):
|
|
140 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
|
|
141 continue
|
|
142 variants[elem.attrib['type']] = _text(elem)
|
|
143
|
|
144 scripts = data.setdefault('scripts', {})
|
|
145 for elem in tree.findall('//scripts/script'):
|
|
146 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
|
|
147 continue
|
|
148 scripts[elem.attrib['type']] = _text(elem)
|
|
149
|
|
150 # <dates>
|
|
151
|
|
152 week_data = data.setdefault('week_data', {})
|
|
153 supelem = sup.find('//weekData')
|
|
154
|
|
155 for elem in supelem.findall('minDays'):
|
|
156 territories = elem.attrib['territories'].split()
|
|
157 if territory in territories or any([r in territories for r in regions]):
|
|
158 week_data['min_days'] = int(elem.attrib['count'])
|
|
159
|
|
160 for elem in supelem.findall('firstDay'):
|
|
161 territories = elem.attrib['territories'].split()
|
|
162 if territory in territories or any([r in territories for r in regions]):
|
|
163 week_data['first_day'] = weekdays[elem.attrib['day']]
|
|
164
|
|
165 for elem in supelem.findall('weekendStart'):
|
|
166 territories = elem.attrib['territories'].split()
|
|
167 if territory in territories or any([r in territories for r in regions]):
|
|
168 week_data['weekend_start'] = weekdays[elem.attrib['day']]
|
|
169
|
|
170 for elem in supelem.findall('weekendEnd'):
|
|
171 territories = elem.attrib['territories'].split()
|
|
172 if territory in territories or any([r in territories for r in regions]):
|
|
173 week_data['weekend_end'] = weekdays[elem.attrib['day']]
|
|
174
|
|
175 zone_formats = data.setdefault('zone_formats', {})
|
|
176 for elem in tree.findall('//timeZoneNames/gmtFormat'):
|
|
177 if 'draft' not in elem.attrib:
|
|
178 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
|
|
179 break
|
|
180 for elem in tree.findall('//timeZoneNames/regionFormat'):
|
|
181 if 'draft' not in elem.attrib:
|
|
182 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
|
|
183 break
|
|
184 for elem in tree.findall('//timeZoneNames/fallbackFormat'):
|
|
185 if 'draft' not in elem.attrib:
|
|
186 zone_formats['fallback'] = unicode(elem.text) \
|
|
187 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
|
|
188 break
|
|
189
|
|
190 time_zones = data.setdefault('time_zones', {})
|
|
191 for elem in tree.findall('//timeZoneNames/zone'):
|
|
192 info = {}
|
|
193 city = elem.findtext('exemplarCity')
|
|
194 if city:
|
|
195 info['city'] = unicode(city)
|
|
196 for child in elem.findall('long/*'):
|
|
197 info.setdefault('long', {})[child.tag] = unicode(child.text)
|
|
198 for child in elem.findall('short/*'):
|
|
199 info.setdefault('short', {})[child.tag] = unicode(child.text)
|
|
200 for child in elem.findall('usesMetazone'):
|
|
201 if 'to' not in child.attrib: # FIXME: support old mappings
|
|
202 info['use_metazone'] = child.attrib['mzone']
|
|
203 time_zones[elem.attrib['type']] = info
|
|
204
|
|
205 meta_zones = data.setdefault('meta_zones', {})
|
|
206 for elem in tree.findall('//timeZoneNames/metazone'):
|
|
207 info = {}
|
|
208 city = elem.findtext('exemplarCity')
|
|
209 if city:
|
|
210 info['city'] = unicode(city)
|
|
211 for child in elem.findall('long/*'):
|
|
212 info.setdefault('long', {})[child.tag] = unicode(child.text)
|
|
213 for child in elem.findall('short/*'):
|
|
214 info.setdefault('short', {})[child.tag] = unicode(child.text)
|
|
215 info['common'] = elem.findtext('commonlyUsed') == 'true'
|
|
216 meta_zones[elem.attrib['type']] = info
|
|
217
|
|
218 for calendar in tree.findall('//calendars/calendar'):
|
|
219 if calendar.attrib['type'] != 'gregorian':
|
|
220 # TODO: support other calendar types
|
|
221 continue
|
|
222
|
|
223 months = data.setdefault('months', {})
|
|
224 for ctxt in calendar.findall('months/monthContext'):
|
|
225 ctxts = months.setdefault(ctxt.attrib['type'], {})
|
|
226 for width in ctxt.findall('monthWidth'):
|
|
227 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
228 for elem in width.findall('month'):
|
|
229 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
230 continue
|
|
231 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
232
|
|
233 days = data.setdefault('days', {})
|
|
234 for ctxt in calendar.findall('days/dayContext'):
|
|
235 ctxts = days.setdefault(ctxt.attrib['type'], {})
|
|
236 for width in ctxt.findall('dayWidth'):
|
|
237 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
238 for elem in width.findall('day'):
|
|
239 dtype = weekdays[elem.attrib['type']]
|
|
240 if 'draft' in elem.attrib and dtype in widths:
|
|
241 continue
|
|
242 widths[dtype] = unicode(elem.text)
|
|
243
|
|
244 quarters = data.setdefault('quarters', {})
|
|
245 for ctxt in calendar.findall('quarters/quarterContext'):
|
|
246 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
|
|
247 for width in ctxt.findall('quarterWidth'):
|
|
248 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
249 for elem in width.findall('quarter'):
|
|
250 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
251 continue
|
|
252 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
253
|
|
254 eras = data.setdefault('eras', {})
|
|
255 for width in calendar.findall('eras/*'):
|
|
256 ewidth = {
|
|
257 'eraAbbr': 'abbreviated',
|
|
258 'eraNames': 'wide',
|
|
259 'eraNarrow': 'narrow',
|
|
260 }[width.tag]
|
|
261 widths = eras.setdefault(ewidth, {})
|
|
262 for elem in width.findall('era'):
|
|
263 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
264 continue
|
|
265 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
266
|
|
267 # AM/PM
|
|
268 periods = data.setdefault('periods', {})
|
|
269 for elem in calendar.findall('am'):
|
|
270 if 'draft' in elem.attrib and elem.tag in periods:
|
|
271 continue
|
|
272 periods[elem.tag] = unicode(elem.text)
|
|
273 for elem in calendar.findall('pm'):
|
|
274 if 'draft' in elem.attrib and elem.tag in periods:
|
|
275 continue
|
|
276 periods[elem.tag] = unicode(elem.text)
|
|
277
|
|
278 date_formats = data.setdefault('date_formats', {})
|
|
279 for elem in calendar.findall('dateFormats/dateFormatLength'):
|
|
280 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
|
|
281 continue
|
|
282 try:
|
|
283 date_formats[elem.attrib.get('type')] = \
|
|
284 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
|
|
285 except ValueError, e:
|
|
286 print>>sys.stderr, 'ERROR: %s' % e
|
|
287
|
|
288 time_formats = data.setdefault('time_formats', {})
|
|
289 for elem in calendar.findall('timeFormats/timeFormatLength'):
|
|
290 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
|
|
291 continue
|
|
292 try:
|
|
293 time_formats[elem.attrib.get('type')] = \
|
|
294 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
|
|
295 except ValueError, e:
|
|
296 print>>sys.stderr, 'ERROR: %s' % e
|
|
297
|
|
298 datetime_formats = data.setdefault('datetime_formats', {})
|
|
299 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
|
|
300 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
|
|
301 continue
|
|
302 try:
|
|
303 datetime_formats[elem.attrib.get('type')] = \
|
|
304 unicode(elem.findtext('dateTimeFormat/pattern'))
|
|
305 except ValueError, e:
|
|
306 print>>sys.stderr, 'ERROR: %s' % e
|
|
307
|
|
308 # <numbers>
|
|
309
|
|
310 number_symbols = data.setdefault('number_symbols', {})
|
|
311 for elem in tree.findall('//numbers/symbols/*'):
|
|
312 number_symbols[elem.tag] = unicode(elem.text)
|
|
313
|
|
314 decimal_formats = data.setdefault('decimal_formats', {})
|
|
315 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
|
|
316 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
|
|
317 continue
|
|
318 pattern = unicode(elem.findtext('decimalFormat/pattern'))
|
|
319 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
|
|
320
|
|
321 scientific_formats = data.setdefault('scientific_formats', {})
|
|
322 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
|
|
323 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
|
|
324 continue
|
|
325 pattern = unicode(elem.findtext('scientificFormat/pattern'))
|
|
326 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
|
|
327
|
|
328 currency_formats = data.setdefault('currency_formats', {})
|
|
329 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
|
|
330 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
|
|
331 continue
|
|
332 pattern = unicode(elem.findtext('currencyFormat/pattern'))
|
|
333 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
|
|
334
|
|
335 percent_formats = data.setdefault('percent_formats', {})
|
|
336 for elem in tree.findall('//percentFormats/percentFormatLength'):
|
|
337 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
|
|
338 continue
|
|
339 pattern = unicode(elem.findtext('percentFormat/pattern'))
|
|
340 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
|
|
341
|
|
342 currency_names = data.setdefault('currency_names', {})
|
|
343 currency_symbols = data.setdefault('currency_symbols', {})
|
|
344 for elem in tree.findall('//currencies/currency'):
|
|
345 name = elem.findtext('displayName')
|
|
346 if name:
|
|
347 currency_names[elem.attrib['type']] = unicode(name)
|
|
348 symbol = elem.findtext('symbol')
|
|
349 if symbol:
|
|
350 currency_symbols[elem.attrib['type']] = unicode(symbol)
|
|
351
|
|
352 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
|
|
353 try:
|
|
354 pickle.dump(data, outfile, 2)
|
|
355 finally:
|
|
356 outfile.close()
|
|
357
|
|
358 if __name__ == '__main__':
|
|
359 main()
|