Mercurial > babel > old > babel-test
annotate scripts/import_cldr.py @ 239:4afe02d81083
Again, properly implement week-in-year (#46).
author | cmlenz |
---|---|
date | Tue, 07 Aug 2007 17:17:28 +0000 |
parents | bc22f5aef216 |
children | 64d340f76701 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
19 import sys | |
20 try: | |
21 from xml.etree.ElementTree import parse | |
22 except ImportError: | |
23 from elementtree.ElementTree import parse | |
24 | |
65
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
25 # Make sure we're using Babel source, and not some previously installed version |
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
27 |
9 | 28 from babel import dates, numbers |
1 | 29 |
15 | 30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
31 'sun': 6} | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
32 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
33 try: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
34 any |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
35 except NameError: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
36 def any(iterable): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
37 return filter(None, list(iterable)) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
38 |
1 | 39 def _text(elem): |
40 buf = [elem.text or ''] | |
41 for child in elem: | |
42 buf.append(_text(child)) | |
43 buf.append(elem.tail or '') | |
44 return u''.join(filter(None, buf)).strip() | |
45 | |
46 def main(): | |
47 parser = OptionParser(usage='%prog path/to/cldr') | |
48 options, args = parser.parse_args() | |
49 if len(args) != 1: | |
50 parser.error('incorrect number of arguments') | |
51 | |
52 srcdir = args[0] | |
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
54 '..', 'babel') |
1 | 55 |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
57 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
58 # import global data from the supplemental files |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
59 global_data = {} |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
60 |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
61 territory_zones = global_data.setdefault('territory_zones', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
62 zone_aliases = global_data.setdefault('zone_aliases', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
63 zone_territories = global_data.setdefault('zone_territories', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
64 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
65 tzid = elem.attrib['type'] |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
66 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
67 zone_territories[tzid] = elem.attrib['territory'] |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
68 if 'aliases' in elem.attrib: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
69 for alias in elem.attrib['aliases'].split(): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
70 zone_aliases[alias] = tzid |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
71 |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
72 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
73 try: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
74 pickle.dump(global_data, outfile, 2) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
75 finally: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
76 outfile.close() |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
77 |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
78 # build a territory containment mapping for inheritance |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
79 regions = {} |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
80 for elem in sup.findall('//territoryContainment/group'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
81 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
82 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
83 # Resolve territory containment |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
84 territory_containment = {} |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
85 region_items = regions.items() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
86 region_items.sort() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
87 for group, territory_list in region_items: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
88 for territory in territory_list: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
89 containers = territory_containment.setdefault(territory, set([])) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
90 if group in territory_containment: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
91 containers |= territory_containment[group] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
92 containers.add(group) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
93 |
1 | 94 filenames = os.listdir(os.path.join(srcdir, 'main')) |
95 filenames.remove('root.xml') | |
96 filenames.sort(lambda a,b: len(a)-len(b)) | |
97 filenames.insert(0, 'root.xml') | |
98 | |
99 for filename in filenames: | |
100 print>>sys.stderr, 'Processing input file %r' % filename | |
101 stem, ext = os.path.splitext(filename) | |
102 if ext != '.xml': | |
103 continue | |
104 | |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
105 tree = parse(os.path.join(srcdir, 'main', filename)) |
1 | 106 data = {} |
107 | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
108 language = None |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
109 elem = tree.find('//identity/language') |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
110 if elem is not None: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
111 language = elem.attrib['type'] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
112 print>>sys.stderr, ' Language: %r' % language |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
113 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
114 territory = None |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
115 elem = tree.find('//identity/territory') |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
116 if elem is not None: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
117 territory = elem.attrib['type'] |
13 | 118 else: |
119 territory = '001' # world | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
120 print>>sys.stderr, ' Territory: %r' % territory |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
121 regions = territory_containment.get(territory, []) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
122 print>>sys.stderr, ' Regions: %r' % regions |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
123 |
1 | 124 # <localeDisplayNames> |
125 | |
126 territories = data.setdefault('territories', {}) | |
127 for elem in tree.findall('//territories/territory'): | |
128 if 'draft' in elem.attrib and elem.attrib['type'] in territories: | |
129 continue | |
130 territories[elem.attrib['type']] = _text(elem) | |
131 | |
132 languages = data.setdefault('languages', {}) | |
133 for elem in tree.findall('//languages/language'): | |
134 if 'draft' in elem.attrib and elem.attrib['type'] in languages: | |
135 continue | |
136 languages[elem.attrib['type']] = _text(elem) | |
137 | |
138 variants = data.setdefault('variants', {}) | |
139 for elem in tree.findall('//variants/variant'): | |
140 if 'draft' in elem.attrib and elem.attrib['type'] in variants: | |
141 continue | |
142 variants[elem.attrib['type']] = _text(elem) | |
143 | |
144 scripts = data.setdefault('scripts', {}) | |
145 for elem in tree.findall('//scripts/script'): | |
146 if 'draft' in elem.attrib and elem.attrib['type'] in scripts: | |
147 continue | |
148 scripts[elem.attrib['type']] = _text(elem) | |
149 | |
150 # <dates> | |
151 | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
152 week_data = data.setdefault('week_data', {}) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
153 supelem = sup.find('//weekData') |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
154 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
155 for elem in supelem.findall('minDays'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
156 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
157 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
158 week_data['min_days'] = int(elem.attrib['count']) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
159 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
160 for elem in supelem.findall('firstDay'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
161 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
162 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
163 week_data['first_day'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
164 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
165 for elem in supelem.findall('weekendStart'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
166 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
167 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
168 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
169 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
170 for elem in supelem.findall('weekendEnd'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
171 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
172 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
173 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
174 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
175 zone_formats = data.setdefault('zone_formats', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
176 for elem in tree.findall('//timeZoneNames/gmtFormat'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
177 if 'draft' not in elem.attrib: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
178 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
179 break |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
180 for elem in tree.findall('//timeZoneNames/regionFormat'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
181 if 'draft' not in elem.attrib: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
182 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
183 break |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
184 for elem in tree.findall('//timeZoneNames/fallbackFormat'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
185 if 'draft' not in elem.attrib: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
186 zone_formats['fallback'] = unicode(elem.text) \ |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
187 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
188 break |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
189 |
1 | 190 time_zones = data.setdefault('time_zones', {}) |
191 for elem in tree.findall('//timeZoneNames/zone'): | |
28
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
192 info = {} |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
193 city = elem.findtext('exemplarCity') |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
194 if city: |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
195 info['city'] = unicode(city) |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
196 for child in elem.findall('long/*'): |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
197 info.setdefault('long', {})[child.tag] = unicode(child.text) |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
198 for child in elem.findall('short/*'): |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
199 info.setdefault('short', {})[child.tag] = unicode(child.text) |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
200 for child in elem.findall('usesMetazone'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
201 if 'to' not in child.attrib: # FIXME: support old mappings |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
202 info['use_metazone'] = child.attrib['mzone'] |
28
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
203 time_zones[elem.attrib['type']] = info |
1 | 204 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
205 meta_zones = data.setdefault('meta_zones', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
206 for elem in tree.findall('//timeZoneNames/metazone'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
207 info = {} |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
208 city = elem.findtext('exemplarCity') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
209 if city: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
210 info['city'] = unicode(city) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
211 for child in elem.findall('long/*'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
212 info.setdefault('long', {})[child.tag] = unicode(child.text) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
213 for child in elem.findall('short/*'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
214 info.setdefault('short', {})[child.tag] = unicode(child.text) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
215 info['common'] = elem.findtext('commonlyUsed') == 'true' |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
216 meta_zones[elem.attrib['type']] = info |
34 | 217 |
1 | 218 for calendar in tree.findall('//calendars/calendar'): |
219 if calendar.attrib['type'] != 'gregorian': | |
220 # TODO: support other calendar types | |
221 continue | |
222 | |
223 months = data.setdefault('months', {}) | |
224 for ctxt in calendar.findall('months/monthContext'): | |
225 ctxts = months.setdefault(ctxt.attrib['type'], {}) | |
226 for width in ctxt.findall('monthWidth'): | |
227 widths = ctxts.setdefault(width.attrib['type'], {}) | |
228 for elem in width.findall('month'): | |
229 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
230 continue | |
231 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
232 | |
233 days = data.setdefault('days', {}) | |
234 for ctxt in calendar.findall('days/dayContext'): | |
235 ctxts = days.setdefault(ctxt.attrib['type'], {}) | |
236 for width in ctxt.findall('dayWidth'): | |
237 widths = ctxts.setdefault(width.attrib['type'], {}) | |
238 for elem in width.findall('day'): | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
239 dtype = weekdays[elem.attrib['type']] |
1 | 240 if 'draft' in elem.attrib and dtype in widths: |
241 continue | |
242 widths[dtype] = unicode(elem.text) | |
243 | |
244 quarters = data.setdefault('quarters', {}) | |
245 for ctxt in calendar.findall('quarters/quarterContext'): | |
246 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) | |
247 for width in ctxt.findall('quarterWidth'): | |
248 widths = ctxts.setdefault(width.attrib['type'], {}) | |
249 for elem in width.findall('quarter'): | |
250 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
251 continue | |
252 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
253 | |
254 eras = data.setdefault('eras', {}) | |
255 for width in calendar.findall('eras/*'): | |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
256 ewidth = { |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
257 'eraAbbr': 'abbreviated', |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
258 'eraNames': 'wide', |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
259 'eraNarrow': 'narrow', |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
260 }[width.tag] |
1 | 261 widths = eras.setdefault(ewidth, {}) |
262 for elem in width.findall('era'): | |
263 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
264 continue | |
265 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
266 | |
267 # AM/PM | |
268 periods = data.setdefault('periods', {}) | |
269 for elem in calendar.findall('am'): | |
270 if 'draft' in elem.attrib and elem.tag in periods: | |
271 continue | |
272 periods[elem.tag] = unicode(elem.text) | |
273 for elem in calendar.findall('pm'): | |
274 if 'draft' in elem.attrib and elem.tag in periods: | |
275 continue | |
276 periods[elem.tag] = unicode(elem.text) | |
277 | |
278 date_formats = data.setdefault('date_formats', {}) | |
279 for elem in calendar.findall('dateFormats/dateFormatLength'): | |
280 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: | |
281 continue | |
282 try: | |
283 date_formats[elem.attrib.get('type')] = \ | |
9 | 284 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
1 | 285 except ValueError, e: |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
286 print>>sys.stderr, 'ERROR: %s' % e |
1 | 287 |
288 time_formats = data.setdefault('time_formats', {}) | |
289 for elem in calendar.findall('timeFormats/timeFormatLength'): | |
290 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: | |
291 continue | |
292 try: | |
293 time_formats[elem.attrib.get('type')] = \ | |
9 | 294 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
1 | 295 except ValueError, e: |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
296 print>>sys.stderr, 'ERROR: %s' % e |
1 | 297 |
33 | 298 datetime_formats = data.setdefault('datetime_formats', {}) |
299 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'): | |
300 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats: | |
301 continue | |
302 try: | |
303 datetime_formats[elem.attrib.get('type')] = \ | |
304 unicode(elem.findtext('dateTimeFormat/pattern')) | |
305 except ValueError, e: | |
306 print>>sys.stderr, 'ERROR: %s' % e | |
307 | |
1 | 308 # <numbers> |
309 | |
310 number_symbols = data.setdefault('number_symbols', {}) | |
311 for elem in tree.findall('//numbers/symbols/*'): | |
312 number_symbols[elem.tag] = unicode(elem.text) | |
313 | |
314 decimal_formats = data.setdefault('decimal_formats', {}) | |
315 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | |
316 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: | |
317 continue | |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
318 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
319 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 320 |
321 scientific_formats = data.setdefault('scientific_formats', {}) | |
322 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | |
323 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: | |
324 continue | |
125 | 325 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
326 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 327 |
328 currency_formats = data.setdefault('currency_formats', {}) | |
329 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | |
330 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: | |
331 continue | |
125 | 332 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
333 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 334 |
335 percent_formats = data.setdefault('percent_formats', {}) | |
336 for elem in tree.findall('//percentFormats/percentFormatLength'): | |
337 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: | |
338 continue | |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
339 pattern = unicode(elem.findtext('percentFormat/pattern')) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
340 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 341 |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
342 currency_names = data.setdefault('currency_names', {}) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
343 currency_symbols = data.setdefault('currency_symbols', {}) |
1 | 344 for elem in tree.findall('//currencies/currency'): |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
345 name = elem.findtext('displayName') |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
346 if name: |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
347 currency_names[elem.attrib['type']] = unicode(name) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
348 symbol = elem.findtext('symbol') |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
349 if symbol: |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
350 currency_symbols[elem.attrib['type']] = unicode(symbol) |
1 | 351 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
352 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
1 | 353 try: |
354 pickle.dump(data, outfile, 2) | |
355 finally: | |
356 outfile.close() | |
357 | |
358 if __name__ == '__main__': | |
359 main() |