Mercurial > babel > old > mirror
annotate scripts/import_cldr.py @ 300:04d06b162f62
When using sys.stdout with a pipe or redirection the sys.stdout.encoding value
will be set to None instead of what the environment first specified through
any locale setting. As such, fall back to locale.getpreferredencoding() before
the ultimate fallback to ASCII.
OK: cmlenz
closes: #76
author | jruigrok |
---|---|
date | Thu, 13 Dec 2007 11:51:36 +0000 |
parents | d0cd235ede46 |
children | c22f292731be |
rev | line source |
---|---|
3 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
19 import sys | |
20 try: | |
21 from xml.etree.ElementTree import parse | |
22 except ImportError: | |
23 from elementtree.ElementTree import parse | |
24 | |
67
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
25 # Make sure we're using Babel source, and not some previously installed version |
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
27 |
11 | 28 from babel import dates, numbers |
3 | 29 |
17 | 30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
31 'sun': 6} | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
32 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
33 try: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
34 any |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
35 except NameError: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
36 def any(iterable): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
37 return filter(None, list(iterable)) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
38 |
3 | 39 def _text(elem): |
40 buf = [elem.text or ''] | |
41 for child in elem: | |
42 buf.append(_text(child)) | |
43 buf.append(elem.tail or '') | |
44 return u''.join(filter(None, buf)).strip() | |
45 | |
46 def main(): | |
47 parser = OptionParser(usage='%prog path/to/cldr') | |
48 options, args = parser.parse_args() | |
49 if len(args) != 1: | |
50 parser.error('incorrect number of arguments') | |
51 | |
52 srcdir = args[0] | |
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
54 '..', 'babel') |
3 | 55 |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
57 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
58 # import global data from the supplemental files |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
59 global_data = {} |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
60 |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
61 territory_zones = global_data.setdefault('territory_zones', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
62 zone_aliases = global_data.setdefault('zone_aliases', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
63 zone_territories = global_data.setdefault('zone_territories', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
64 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
65 tzid = elem.attrib['type'] |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
66 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
67 zone_territories[tzid] = elem.attrib['territory'] |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
68 if 'aliases' in elem.attrib: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
69 for alias in elem.attrib['aliases'].split(): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
70 zone_aliases[alias] = tzid |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
71 |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
72 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
73 try: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
74 pickle.dump(global_data, outfile, 2) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
75 finally: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
76 outfile.close() |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
77 |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
78 # build a territory containment mapping for inheritance |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
79 regions = {} |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
80 for elem in sup.findall('//territoryContainment/group'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
81 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
82 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
83 # Resolve territory containment |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
84 territory_containment = {} |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
85 region_items = regions.items() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
86 region_items.sort() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
87 for group, territory_list in region_items: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
88 for territory in territory_list: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
89 containers = territory_containment.setdefault(territory, set([])) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
90 if group in territory_containment: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
91 containers |= territory_containment[group] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
92 containers.add(group) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
93 |
3 | 94 filenames = os.listdir(os.path.join(srcdir, 'main')) |
95 filenames.remove('root.xml') | |
96 filenames.sort(lambda a,b: len(a)-len(b)) | |
97 filenames.insert(0, 'root.xml') | |
98 | |
99 for filename in filenames: | |
100 print>>sys.stderr, 'Processing input file %r' % filename | |
101 stem, ext = os.path.splitext(filename) | |
102 if ext != '.xml': | |
103 continue | |
104 | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
105 tree = parse(os.path.join(srcdir, 'main', filename)) |
3 | 106 data = {} |
107 | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
108 language = None |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
109 elem = tree.find('//identity/language') |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
110 if elem is not None: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
111 language = elem.attrib['type'] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
112 print>>sys.stderr, ' Language: %r' % language |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
113 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
114 territory = None |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
115 elem = tree.find('//identity/territory') |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
116 if elem is not None: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
117 territory = elem.attrib['type'] |
15 | 118 else: |
119 territory = '001' # world | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
120 print>>sys.stderr, ' Territory: %r' % territory |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
121 regions = territory_containment.get(territory, []) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
122 print>>sys.stderr, ' Regions: %r' % regions |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
123 |
3 | 124 # <localeDisplayNames> |
125 | |
126 territories = data.setdefault('territories', {}) | |
127 for elem in tree.findall('//territories/territory'): | |
128 if 'draft' in elem.attrib and elem.attrib['type'] in territories: | |
129 continue | |
130 territories[elem.attrib['type']] = _text(elem) | |
131 | |
132 languages = data.setdefault('languages', {}) | |
133 for elem in tree.findall('//languages/language'): | |
134 if 'draft' in elem.attrib and elem.attrib['type'] in languages: | |
135 continue | |
136 languages[elem.attrib['type']] = _text(elem) | |
137 | |
138 variants = data.setdefault('variants', {}) | |
139 for elem in tree.findall('//variants/variant'): | |
140 if 'draft' in elem.attrib and elem.attrib['type'] in variants: | |
141 continue | |
142 variants[elem.attrib['type']] = _text(elem) | |
143 | |
144 scripts = data.setdefault('scripts', {}) | |
145 for elem in tree.findall('//scripts/script'): | |
146 if 'draft' in elem.attrib and elem.attrib['type'] in scripts: | |
147 continue | |
148 scripts[elem.attrib['type']] = _text(elem) | |
149 | |
150 # <dates> | |
151 | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
152 week_data = data.setdefault('week_data', {}) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
153 supelem = sup.find('//weekData') |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
154 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
155 for elem in supelem.findall('minDays'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
156 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
157 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
158 week_data['min_days'] = int(elem.attrib['count']) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
159 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
160 for elem in supelem.findall('firstDay'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
161 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
162 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
163 week_data['first_day'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
164 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
165 for elem in supelem.findall('weekendStart'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
166 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
167 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
168 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
169 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
170 for elem in supelem.findall('weekendEnd'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
171 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
172 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
173 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
174 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
175 zone_formats = data.setdefault('zone_formats', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
176 for elem in tree.findall('//timeZoneNames/gmtFormat'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
177 if 'draft' not in elem.attrib: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
178 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
179 break |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
180 for elem in tree.findall('//timeZoneNames/regionFormat'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
181 if 'draft' not in elem.attrib: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
182 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
183 break |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
184 for elem in tree.findall('//timeZoneNames/fallbackFormat'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
185 if 'draft' not in elem.attrib: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
186 zone_formats['fallback'] = unicode(elem.text) \ |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
187 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
188 break |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
189 |
3 | 190 time_zones = data.setdefault('time_zones', {}) |
191 for elem in tree.findall('//timeZoneNames/zone'): | |
30
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
192 info = {} |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
193 city = elem.findtext('exemplarCity') |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
194 if city: |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
195 info['city'] = unicode(city) |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
196 for child in elem.findall('long/*'): |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
197 info.setdefault('long', {})[child.tag] = unicode(child.text) |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
198 for child in elem.findall('short/*'): |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
199 info.setdefault('short', {})[child.tag] = unicode(child.text) |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
200 for child in elem.findall('usesMetazone'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
201 if 'to' not in child.attrib: # FIXME: support old mappings |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
202 info['use_metazone'] = child.attrib['mzone'] |
30
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
203 time_zones[elem.attrib['type']] = info |
3 | 204 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
205 meta_zones = data.setdefault('meta_zones', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
206 for elem in tree.findall('//timeZoneNames/metazone'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
207 info = {} |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
208 city = elem.findtext('exemplarCity') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
209 if city: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
210 info['city'] = unicode(city) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
211 for child in elem.findall('long/*'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
212 info.setdefault('long', {})[child.tag] = unicode(child.text) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
213 for child in elem.findall('short/*'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
214 info.setdefault('short', {})[child.tag] = unicode(child.text) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
215 info['common'] = elem.findtext('commonlyUsed') == 'true' |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
216 meta_zones[elem.attrib['type']] = info |
36 | 217 |
3 | 218 for calendar in tree.findall('//calendars/calendar'): |
219 if calendar.attrib['type'] != 'gregorian': | |
220 # TODO: support other calendar types | |
221 continue | |
222 | |
223 months = data.setdefault('months', {}) | |
224 for ctxt in calendar.findall('months/monthContext'): | |
225 ctxts = months.setdefault(ctxt.attrib['type'], {}) | |
226 for width in ctxt.findall('monthWidth'): | |
227 widths = ctxts.setdefault(width.attrib['type'], {}) | |
228 for elem in width.findall('month'): | |
229 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
230 continue | |
231 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
232 | |
233 days = data.setdefault('days', {}) | |
234 for ctxt in calendar.findall('days/dayContext'): | |
235 ctxts = days.setdefault(ctxt.attrib['type'], {}) | |
236 for width in ctxt.findall('dayWidth'): | |
237 widths = ctxts.setdefault(width.attrib['type'], {}) | |
238 for elem in width.findall('day'): | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
239 dtype = weekdays[elem.attrib['type']] |
3 | 240 if 'draft' in elem.attrib and dtype in widths: |
241 continue | |
242 widths[dtype] = unicode(elem.text) | |
243 | |
244 quarters = data.setdefault('quarters', {}) | |
245 for ctxt in calendar.findall('quarters/quarterContext'): | |
246 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) | |
247 for width in ctxt.findall('quarterWidth'): | |
248 widths = ctxts.setdefault(width.attrib['type'], {}) | |
249 for elem in width.findall('quarter'): | |
250 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
251 continue | |
252 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
253 | |
254 eras = data.setdefault('eras', {}) | |
255 for width in calendar.findall('eras/*'): | |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
256 ewidth = { |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
257 'eraAbbr': 'abbreviated', |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
258 'eraNames': 'wide', |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
259 'eraNarrow': 'narrow', |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
260 }[width.tag] |
3 | 261 widths = eras.setdefault(ewidth, {}) |
262 for elem in width.findall('era'): | |
263 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | |
264 continue | |
265 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
266 | |
267 # AM/PM | |
268 periods = data.setdefault('periods', {}) | |
269 for elem in calendar.findall('am'): | |
270 if 'draft' in elem.attrib and elem.tag in periods: | |
271 continue | |
272 periods[elem.tag] = unicode(elem.text) | |
273 for elem in calendar.findall('pm'): | |
274 if 'draft' in elem.attrib and elem.tag in periods: | |
275 continue | |
276 periods[elem.tag] = unicode(elem.text) | |
277 | |
278 date_formats = data.setdefault('date_formats', {}) | |
279 for elem in calendar.findall('dateFormats/dateFormatLength'): | |
280 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats: | |
281 continue | |
282 try: | |
283 date_formats[elem.attrib.get('type')] = \ | |
11 | 284 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
3 | 285 except ValueError, e: |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
286 print>>sys.stderr, 'ERROR: %s' % e |
3 | 287 |
288 time_formats = data.setdefault('time_formats', {}) | |
289 for elem in calendar.findall('timeFormats/timeFormatLength'): | |
290 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats: | |
291 continue | |
292 try: | |
293 time_formats[elem.attrib.get('type')] = \ | |
11 | 294 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
3 | 295 except ValueError, e: |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
296 print>>sys.stderr, 'ERROR: %s' % e |
3 | 297 |
35 | 298 datetime_formats = data.setdefault('datetime_formats', {}) |
299 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'): | |
300 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats: | |
301 continue | |
302 try: | |
303 datetime_formats[elem.attrib.get('type')] = \ | |
304 unicode(elem.findtext('dateTimeFormat/pattern')) | |
305 except ValueError, e: | |
306 print>>sys.stderr, 'ERROR: %s' % e | |
307 | |
3 | 308 # <numbers> |
309 | |
310 number_symbols = data.setdefault('number_symbols', {}) | |
311 for elem in tree.findall('//numbers/symbols/*'): | |
312 number_symbols[elem.tag] = unicode(elem.text) | |
313 | |
314 decimal_formats = data.setdefault('decimal_formats', {}) | |
315 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | |
316 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats: | |
317 continue | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
318 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
319 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
3 | 320 |
321 scientific_formats = data.setdefault('scientific_formats', {}) | |
322 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | |
323 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats: | |
324 continue | |
127 | 325 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
326 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
3 | 327 |
328 currency_formats = data.setdefault('currency_formats', {}) | |
329 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | |
330 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats: | |
331 continue | |
127 | 332 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
333 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
3 | 334 |
335 percent_formats = data.setdefault('percent_formats', {}) | |
336 for elem in tree.findall('//percentFormats/percentFormatLength'): | |
337 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats: | |
338 continue | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
339 pattern = unicode(elem.findtext('percentFormat/pattern')) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
340 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
3 | 341 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
342 currency_names = data.setdefault('currency_names', {}) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
343 currency_symbols = data.setdefault('currency_symbols', {}) |
3 | 344 for elem in tree.findall('//currencies/currency'): |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
345 name = elem.findtext('displayName') |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
346 if name: |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
347 currency_names[elem.attrib['type']] = unicode(name) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
348 symbol = elem.findtext('symbol') |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
349 if symbol: |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
350 currency_symbols[elem.attrib['type']] = unicode(symbol) |
3 | 351 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
352 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
3 | 353 try: |
354 pickle.dump(data, outfile, 2) | |
355 finally: | |
356 outfile.close() | |
357 | |
358 if __name__ == '__main__': | |
359 main() |