1
|
1 #!/usr/bin/env python
|
|
2 # -*- coding: utf-8 -*-
|
|
3 #
|
|
4 # Copyright (C) 2007 Edgewall Software
|
|
5 # All rights reserved.
|
|
6 #
|
|
7 # This software is licensed as described in the file COPYING, which
|
|
8 # you should have received as part of this distribution. The terms
|
|
9 # are also available at http://babel.edgewall.org/wiki/License.
|
|
10 #
|
|
11 # This software consists of voluntary contributions made by many
|
|
12 # individuals. For the exact contribution history, see the revision
|
|
13 # history and logs, available at http://babel.edgewall.org/log/.
|
|
14
|
|
15 import copy
|
|
16 from optparse import OptionParser
|
|
17 import os
|
|
18 import pickle
|
|
19 import sys
|
|
20 try:
|
|
21 from xml.etree.ElementTree import parse
|
|
22 except ImportError:
|
|
23 from elementtree.ElementTree import parse
|
|
24
|
|
25 from babel.dates import parse_pattern
|
|
26
|
|
27 def _parent(locale):
|
|
28 parts = locale.split('_')
|
|
29 if len(parts) == 1:
|
|
30 return 'root'
|
|
31 else:
|
|
32 return '_'.join(parts[:-1])
|
|
33
|
|
34 def _text(elem):
|
|
35 buf = [elem.text or '']
|
|
36 for child in elem:
|
|
37 buf.append(_text(child))
|
|
38 buf.append(elem.tail or '')
|
|
39 return u''.join(filter(None, buf)).strip()
|
|
40
|
|
41 def main():
|
|
42 parser = OptionParser(usage='%prog path/to/cldr')
|
|
43 options, args = parser.parse_args()
|
|
44 if len(args) != 1:
|
|
45 parser.error('incorrect number of arguments')
|
|
46
|
|
47 srcdir = args[0]
|
|
48 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
|
|
49 '..', 'babel', 'localedata')
|
|
50
|
|
51 filenames = os.listdir(os.path.join(srcdir, 'main'))
|
|
52 filenames.remove('root.xml')
|
|
53 filenames.sort(lambda a,b: len(a)-len(b))
|
|
54 filenames.insert(0, 'root.xml')
|
|
55
|
|
56 dicts = {}
|
|
57
|
|
58 for filename in filenames:
|
|
59 print>>sys.stderr, 'Processing input file %r' % filename
|
|
60 stem, ext = os.path.splitext(filename)
|
|
61 if ext != '.xml':
|
|
62 continue
|
|
63
|
|
64 data = {}
|
|
65 if stem != 'root':
|
|
66 data.update(copy.deepcopy(dicts[_parent(stem)]))
|
|
67 tree = parse(os.path.join(srcdir, 'main', filename))
|
|
68
|
|
69 # <localeDisplayNames>
|
|
70
|
|
71 territories = data.setdefault('territories', {})
|
|
72 for elem in tree.findall('//territories/territory'):
|
|
73 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
|
|
74 continue
|
|
75 territories[elem.attrib['type']] = _text(elem)
|
|
76
|
|
77 languages = data.setdefault('languages', {})
|
|
78 for elem in tree.findall('//languages/language'):
|
|
79 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
|
|
80 continue
|
|
81 languages[elem.attrib['type']] = _text(elem)
|
|
82
|
|
83 variants = data.setdefault('variants', {})
|
|
84 for elem in tree.findall('//variants/variant'):
|
|
85 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
|
|
86 continue
|
|
87 variants[elem.attrib['type']] = _text(elem)
|
|
88
|
|
89 scripts = data.setdefault('scripts', {})
|
|
90 for elem in tree.findall('//scripts/script'):
|
|
91 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
|
|
92 continue
|
|
93 scripts[elem.attrib['type']] = _text(elem)
|
|
94
|
|
95 # <dates>
|
|
96
|
|
97 time_zones = data.setdefault('time_zones', {})
|
|
98 for elem in tree.findall('//timeZoneNames/zone'):
|
|
99 time_zones[elem.tag] = unicode(elem.findtext('displayName'))
|
|
100
|
|
101 for calendar in tree.findall('//calendars/calendar'):
|
|
102 if calendar.attrib['type'] != 'gregorian':
|
|
103 # TODO: support other calendar types
|
|
104 continue
|
|
105
|
|
106 months = data.setdefault('months', {})
|
|
107 for ctxt in calendar.findall('months/monthContext'):
|
|
108 ctxts = months.setdefault(ctxt.attrib['type'], {})
|
|
109 for width in ctxt.findall('monthWidth'):
|
|
110 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
111 for elem in width.findall('month'):
|
|
112 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
113 continue
|
|
114 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
115
|
|
116 days = data.setdefault('days', {})
|
|
117 for ctxt in calendar.findall('days/dayContext'):
|
|
118 ctxts = days.setdefault(ctxt.attrib['type'], {})
|
|
119 for width in ctxt.findall('dayWidth'):
|
|
120 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
121 for elem in width.findall('day'):
|
|
122 dtype = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4,
|
|
123 'fri': 5, 'sat': 6, 'sun': 7}[elem.attrib['type']]
|
|
124 if 'draft' in elem.attrib and dtype in widths:
|
|
125 continue
|
|
126 widths[dtype] = unicode(elem.text)
|
|
127
|
|
128 quarters = data.setdefault('quarters', {})
|
|
129 for ctxt in calendar.findall('quarters/quarterContext'):
|
|
130 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
|
|
131 for width in ctxt.findall('quarterWidth'):
|
|
132 widths = ctxts.setdefault(width.attrib['type'], {})
|
|
133 for elem in width.findall('quarter'):
|
|
134 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
135 continue
|
|
136 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
137
|
|
138 eras = data.setdefault('eras', {})
|
|
139 for width in calendar.findall('eras/*'):
|
|
140 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
|
|
141 widths = eras.setdefault(ewidth, {})
|
|
142 for elem in width.findall('era'):
|
|
143 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
|
|
144 continue
|
|
145 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
|
|
146
|
|
147 # AM/PM
|
|
148 periods = data.setdefault('periods', {})
|
|
149 for elem in calendar.findall('am'):
|
|
150 if 'draft' in elem.attrib and elem.tag in periods:
|
|
151 continue
|
|
152 periods[elem.tag] = unicode(elem.text)
|
|
153 for elem in calendar.findall('pm'):
|
|
154 if 'draft' in elem.attrib and elem.tag in periods:
|
|
155 continue
|
|
156 periods[elem.tag] = unicode(elem.text)
|
|
157
|
|
158 date_formats = data.setdefault('date_formats', {})
|
|
159 for elem in calendar.findall('dateFormats/dateFormatLength'):
|
|
160 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
|
|
161 continue
|
|
162 try:
|
|
163 date_formats[elem.attrib.get('type')] = \
|
|
164 parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
|
|
165 except ValueError, e:
|
|
166 print e
|
|
167
|
|
168 time_formats = data.setdefault('time_formats', {})
|
|
169 for elem in calendar.findall('timeFormats/timeFormatLength'):
|
|
170 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
|
|
171 continue
|
|
172 try:
|
|
173 time_formats[elem.attrib.get('type')] = \
|
|
174 parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
|
|
175 except ValueError, e:
|
|
176 print e
|
|
177
|
|
178 # <numbers>
|
|
179
|
|
180 number_symbols = data.setdefault('number_symbols', {})
|
|
181 for elem in tree.findall('//numbers/symbols/*'):
|
|
182 number_symbols[elem.tag] = unicode(elem.text)
|
|
183
|
|
184 decimal_formats = data.setdefault('decimal_formats', {})
|
|
185 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
|
|
186 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
|
|
187 continue
|
|
188 decimal_formats[elem.attrib.get('type')] = unicode(elem.findtext('decimalFormat/pattern'))
|
|
189
|
|
190 scientific_formats = data.setdefault('scientific_formats', {})
|
|
191 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
|
|
192 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
|
|
193 continue
|
|
194 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
|
|
195
|
|
196 currency_formats = data.setdefault('currency_formats', {})
|
|
197 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
|
|
198 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
|
|
199 continue
|
|
200 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
|
|
201
|
|
202 percent_formats = data.setdefault('percent_formats', {})
|
|
203 for elem in tree.findall('//percentFormats/percentFormatLength'):
|
|
204 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
|
|
205 continue
|
|
206 percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern'))
|
|
207
|
|
208 currencies = data.setdefault('currencies', {})
|
|
209 for elem in tree.findall('//currencies/currency'):
|
|
210 currencies[elem.attrib['type']] = {
|
|
211 'display_name': unicode(elem.findtext('displayName')),
|
|
212 'symbol': unicode(elem.findtext('symbol'))
|
|
213 }
|
|
214
|
|
215 dicts[stem] = data
|
|
216 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
|
|
217 try:
|
|
218 pickle.dump(data, outfile, 2)
|
|
219 finally:
|
|
220 outfile.close()
|
|
221
|
|
222 if __name__ == '__main__':
|
|
223 main()
|