comparison scripts/import_cldr.py @ 1:f71ca60f2a4a

Import of initial code base.
author cmlenz
date Tue, 29 May 2007 20:33:55 +0000
parents
children 9132c9218745
comparison
equal deleted inserted replaced
0:6b2982b3e66a 1:f71ca60f2a4a
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright (C) 2007 Edgewall Software
5 # All rights reserved.
6 #
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://babel.edgewall.org/wiki/License.
10 #
11 # This software consists of voluntary contributions made by many
12 # individuals. For the exact contribution history, see the revision
13 # history and logs, available at http://babel.edgewall.org/log/.
14
15 import copy
16 from optparse import OptionParser
17 import os
18 import pickle
19 import sys
20 try:
21 from xml.etree.ElementTree import parse
22 except ImportError:
23 from elementtree.ElementTree import parse
24
25 from babel.dates import parse_pattern
26
27 def _parent(locale):
28 parts = locale.split('_')
29 if len(parts) == 1:
30 return 'root'
31 else:
32 return '_'.join(parts[:-1])
33
34 def _text(elem):
35 buf = [elem.text or '']
36 for child in elem:
37 buf.append(_text(child))
38 buf.append(elem.tail or '')
39 return u''.join(filter(None, buf)).strip()
40
41 def main():
42 parser = OptionParser(usage='%prog path/to/cldr')
43 options, args = parser.parse_args()
44 if len(args) != 1:
45 parser.error('incorrect number of arguments')
46
47 srcdir = args[0]
48 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
49 '..', 'babel', 'localedata')
50
51 filenames = os.listdir(os.path.join(srcdir, 'main'))
52 filenames.remove('root.xml')
53 filenames.sort(lambda a,b: len(a)-len(b))
54 filenames.insert(0, 'root.xml')
55
56 dicts = {}
57
58 for filename in filenames:
59 print>>sys.stderr, 'Processing input file %r' % filename
60 stem, ext = os.path.splitext(filename)
61 if ext != '.xml':
62 continue
63
64 data = {}
65 if stem != 'root':
66 data.update(copy.deepcopy(dicts[_parent(stem)]))
67 tree = parse(os.path.join(srcdir, 'main', filename))
68
69 # <localeDisplayNames>
70
71 territories = data.setdefault('territories', {})
72 for elem in tree.findall('//territories/territory'):
73 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
74 continue
75 territories[elem.attrib['type']] = _text(elem)
76
77 languages = data.setdefault('languages', {})
78 for elem in tree.findall('//languages/language'):
79 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
80 continue
81 languages[elem.attrib['type']] = _text(elem)
82
83 variants = data.setdefault('variants', {})
84 for elem in tree.findall('//variants/variant'):
85 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
86 continue
87 variants[elem.attrib['type']] = _text(elem)
88
89 scripts = data.setdefault('scripts', {})
90 for elem in tree.findall('//scripts/script'):
91 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
92 continue
93 scripts[elem.attrib['type']] = _text(elem)
94
95 # <dates>
96
97 time_zones = data.setdefault('time_zones', {})
98 for elem in tree.findall('//timeZoneNames/zone'):
99 time_zones[elem.tag] = unicode(elem.findtext('displayName'))
100
101 for calendar in tree.findall('//calendars/calendar'):
102 if calendar.attrib['type'] != 'gregorian':
103 # TODO: support other calendar types
104 continue
105
106 months = data.setdefault('months', {})
107 for ctxt in calendar.findall('months/monthContext'):
108 ctxts = months.setdefault(ctxt.attrib['type'], {})
109 for width in ctxt.findall('monthWidth'):
110 widths = ctxts.setdefault(width.attrib['type'], {})
111 for elem in width.findall('month'):
112 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
113 continue
114 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
115
116 days = data.setdefault('days', {})
117 for ctxt in calendar.findall('days/dayContext'):
118 ctxts = days.setdefault(ctxt.attrib['type'], {})
119 for width in ctxt.findall('dayWidth'):
120 widths = ctxts.setdefault(width.attrib['type'], {})
121 for elem in width.findall('day'):
122 dtype = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4,
123 'fri': 5, 'sat': 6, 'sun': 7}[elem.attrib['type']]
124 if 'draft' in elem.attrib and dtype in widths:
125 continue
126 widths[dtype] = unicode(elem.text)
127
128 quarters = data.setdefault('quarters', {})
129 for ctxt in calendar.findall('quarters/quarterContext'):
130 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
131 for width in ctxt.findall('quarterWidth'):
132 widths = ctxts.setdefault(width.attrib['type'], {})
133 for elem in width.findall('quarter'):
134 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
135 continue
136 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
137
138 eras = data.setdefault('eras', {})
139 for width in calendar.findall('eras/*'):
140 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
141 widths = eras.setdefault(ewidth, {})
142 for elem in width.findall('era'):
143 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
144 continue
145 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
146
147 # AM/PM
148 periods = data.setdefault('periods', {})
149 for elem in calendar.findall('am'):
150 if 'draft' in elem.attrib and elem.tag in periods:
151 continue
152 periods[elem.tag] = unicode(elem.text)
153 for elem in calendar.findall('pm'):
154 if 'draft' in elem.attrib and elem.tag in periods:
155 continue
156 periods[elem.tag] = unicode(elem.text)
157
158 date_formats = data.setdefault('date_formats', {})
159 for elem in calendar.findall('dateFormats/dateFormatLength'):
160 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
161 continue
162 try:
163 date_formats[elem.attrib.get('type')] = \
164 parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
165 except ValueError, e:
166 print e
167
168 time_formats = data.setdefault('time_formats', {})
169 for elem in calendar.findall('timeFormats/timeFormatLength'):
170 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
171 continue
172 try:
173 time_formats[elem.attrib.get('type')] = \
174 parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
175 except ValueError, e:
176 print e
177
178 # <numbers>
179
180 number_symbols = data.setdefault('number_symbols', {})
181 for elem in tree.findall('//numbers/symbols/*'):
182 number_symbols[elem.tag] = unicode(elem.text)
183
184 decimal_formats = data.setdefault('decimal_formats', {})
185 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
186 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
187 continue
188 decimal_formats[elem.attrib.get('type')] = unicode(elem.findtext('decimalFormat/pattern'))
189
190 scientific_formats = data.setdefault('scientific_formats', {})
191 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
192 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
193 continue
194 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
195
196 currency_formats = data.setdefault('currency_formats', {})
197 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
198 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
199 continue
200 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
201
202 percent_formats = data.setdefault('percent_formats', {})
203 for elem in tree.findall('//percentFormats/percentFormatLength'):
204 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
205 continue
206 percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern'))
207
208 currencies = data.setdefault('currencies', {})
209 for elem in tree.findall('//currencies/currency'):
210 currencies[elem.attrib['type']] = {
211 'display_name': unicode(elem.findtext('displayName')),
212 'symbol': unicode(elem.findtext('symbol'))
213 }
214
215 dicts[stem] = data
216 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
217 try:
218 pickle.dump(data, outfile, 2)
219 finally:
220 outfile.close()
221
222 if __name__ == '__main__':
223 main()
Copyright (C) 2012-2017 Edgewall Software