Mercurial > babel > mirror
annotate scripts/import_cldr.py @ 383:7e3d7532f3d4 stable-0.9.x
Bump up version number on 0.9.x branch.
author | cmlenz |
---|---|
date | Thu, 10 Jul 2008 09:44:38 +0000 |
parents | 0a0bc1639ea7 |
children | 38db48990998 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
379 | 19 import re |
1 | 20 import sys |
21 try: | |
22 from xml.etree.ElementTree import parse | |
23 except ImportError: | |
24 from elementtree.ElementTree import parse | |
25 | |
65
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
26 # Make sure we're using Babel source, and not some previously installed version |
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
27 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
28 |
9 | 29 from babel import dates, numbers |
379 | 30 from babel.localedata import Alias |
1 | 31 |
15 | 32 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
33 'sun': 6} | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
34 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
35 try: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
36 any |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
37 except NameError: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
38 def any(iterable): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
39 return filter(None, list(iterable)) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
40 |
379 | 41 |
1 | 42 def _text(elem): |
43 buf = [elem.text or ''] | |
44 for child in elem: | |
45 buf.append(_text(child)) | |
46 buf.append(elem.tail or '') | |
47 return u''.join(filter(None, buf)).strip() | |
48 | |
379 | 49 |
50 NAME_RE = re.compile(r"^\w+$") | |
51 TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$") | |
52 | |
53 NAME_MAP = { | |
54 'dateFormats': 'date_formats', | |
55 'dateTimeFormats': 'datetime_formats', | |
56 'eraAbbr': 'abbreviated', | |
57 'eraNames': 'wide', | |
58 'eraNarrow': 'narrow', | |
59 'timeFormats': 'time_formats' | |
60 } | |
61 | |
62 def _translate_alias(ctxt, path): | |
63 parts = path.split('/') | |
64 keys = ctxt[:] | |
65 for part in parts: | |
66 if part == '..': | |
67 keys.pop() | |
68 else: | |
69 match = TYPE_ATTR_RE.match(part) | |
70 if match: | |
71 keys.append(match.group(1)) | |
72 else: | |
73 assert NAME_RE.match(part) | |
74 keys.append(NAME_MAP.get(part, part)) | |
75 return keys | |
76 | |
77 | |
1 | 78 def main(): |
79 parser = OptionParser(usage='%prog path/to/cldr') | |
80 options, args = parser.parse_args() | |
81 if len(args) != 1: | |
82 parser.error('incorrect number of arguments') | |
83 | |
84 srcdir = args[0] | |
85 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
86 '..', 'babel') |
1 | 87 |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
88 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
89 |
346
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
90 # Import global data from the supplemental files |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
91 global_data = {} |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
92 |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
93 territory_zones = global_data.setdefault('territory_zones', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
94 zone_aliases = global_data.setdefault('zone_aliases', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
95 zone_territories = global_data.setdefault('zone_territories', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
96 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
97 tzid = elem.attrib['type'] |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
99 zone_territories[tzid] = elem.attrib['territory'] |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
100 if 'aliases' in elem.attrib: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
101 for alias in elem.attrib['aliases'].split(): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
102 zone_aliases[alias] = tzid |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
103 |
346
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
104 # Import Metazone mapping |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
105 meta_zones = global_data.setdefault('meta_zones', {}) |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
107 for elem in tzsup.findall('//timezone'): |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
108 for child in elem.findall('usesMetazone'): |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
109 if 'to' not in child.attrib: # FIXME: support old mappings |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
110 meta_zones[elem.attrib['type']] = child.attrib['mzone'] |
5e58ea360a5c
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
111 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
113 try: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
114 pickle.dump(global_data, outfile, 2) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
115 finally: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
116 outfile.close() |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
117 |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
118 # build a territory containment mapping for inheritance |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
119 regions = {} |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
120 for elem in sup.findall('//territoryContainment/group'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
121 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
122 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
123 # Resolve territory containment |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
124 territory_containment = {} |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
125 region_items = regions.items() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
126 region_items.sort() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
127 for group, territory_list in region_items: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
128 for territory in territory_list: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
129 containers = territory_containment.setdefault(territory, set([])) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
130 if group in territory_containment: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
131 containers |= territory_containment[group] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
132 containers.add(group) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
133 |
1 | 134 filenames = os.listdir(os.path.join(srcdir, 'main')) |
135 filenames.remove('root.xml') | |
136 filenames.sort(lambda a,b: len(a)-len(b)) | |
137 filenames.insert(0, 'root.xml') | |
138 | |
139 for filename in filenames: | |
140 print>>sys.stderr, 'Processing input file %r' % filename | |
141 stem, ext = os.path.splitext(filename) | |
142 if ext != '.xml': | |
143 continue | |
379 | 144 #if stem != 'root': |
145 # break | |
1 | 146 |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
147 tree = parse(os.path.join(srcdir, 'main', filename)) |
1 | 148 data = {} |
149 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
150 language = None |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
151 elem = tree.find('//identity/language') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
152 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
153 language = elem.attrib['type'] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
154 print>>sys.stderr, ' Language: %r' % language |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
155 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
156 territory = None |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
157 elem = tree.find('//identity/territory') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
158 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
159 territory = elem.attrib['type'] |
13 | 160 else: |
161 territory = '001' # world | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
162 print>>sys.stderr, ' Territory: %r' % territory |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
163 regions = territory_containment.get(territory, []) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
164 print>>sys.stderr, ' Regions: %r' % regions |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
165 |
1 | 166 # <localeDisplayNames> |
167 | |
168 territories = data.setdefault('territories', {}) | |
169 for elem in tree.findall('//territories/territory'): | |
379 | 170 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
171 and elem.attrib['type'] in territories: | |
1 | 172 continue |
173 territories[elem.attrib['type']] = _text(elem) | |
174 | |
175 languages = data.setdefault('languages', {}) | |
176 for elem in tree.findall('//languages/language'): | |
379 | 177 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
178 and elem.attrib['type'] in languages: | |
1 | 179 continue |
180 languages[elem.attrib['type']] = _text(elem) | |
181 | |
182 variants = data.setdefault('variants', {}) | |
183 for elem in tree.findall('//variants/variant'): | |
379 | 184 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
185 and elem.attrib['type'] in variants: | |
1 | 186 continue |
187 variants[elem.attrib['type']] = _text(elem) | |
188 | |
189 scripts = data.setdefault('scripts', {}) | |
190 for elem in tree.findall('//scripts/script'): | |
379 | 191 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
192 and elem.attrib['type'] in scripts: | |
1 | 193 continue |
194 scripts[elem.attrib['type']] = _text(elem) | |
195 | |
196 # <dates> | |
197 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
198 week_data = data.setdefault('week_data', {}) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
199 supelem = sup.find('//weekData') |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
200 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
201 for elem in supelem.findall('minDays'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
202 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
203 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
204 week_data['min_days'] = int(elem.attrib['count']) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
205 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
206 for elem in supelem.findall('firstDay'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
207 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
208 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
209 week_data['first_day'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
210 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
211 for elem in supelem.findall('weekendStart'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
212 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
213 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
214 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
215 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
216 for elem in supelem.findall('weekendEnd'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
217 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
218 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
219 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
220 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
221 zone_formats = data.setdefault('zone_formats', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
222 for elem in tree.findall('//timeZoneNames/gmtFormat'): |
379 | 223 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
224 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
225 break |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
226 for elem in tree.findall('//timeZoneNames/regionFormat'): |
379 | 227 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
228 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
229 break |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
230 for elem in tree.findall('//timeZoneNames/fallbackFormat'): |
379 | 231 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
232 zone_formats['fallback'] = unicode(elem.text) \ |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
233 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
234 break |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
235 |
1 | 236 time_zones = data.setdefault('time_zones', {}) |
237 for elem in tree.findall('//timeZoneNames/zone'): | |
28
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
238 info = {} |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
239 city = elem.findtext('exemplarCity') |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
240 if city: |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
241 info['city'] = unicode(city) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
242 for child in elem.findall('long/*'): |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
243 info.setdefault('long', {})[child.tag] = unicode(child.text) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
244 for child in elem.findall('short/*'): |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
245 info.setdefault('short', {})[child.tag] = unicode(child.text) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
246 time_zones[elem.attrib['type']] = info |
1 | 247 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
248 meta_zones = data.setdefault('meta_zones', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
249 for elem in tree.findall('//timeZoneNames/metazone'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
250 info = {} |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
251 city = elem.findtext('exemplarCity') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
252 if city: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
253 info['city'] = unicode(city) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
254 for child in elem.findall('long/*'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
255 info.setdefault('long', {})[child.tag] = unicode(child.text) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
256 for child in elem.findall('short/*'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
257 info.setdefault('short', {})[child.tag] = unicode(child.text) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
258 info['common'] = elem.findtext('commonlyUsed') == 'true' |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
259 meta_zones[elem.attrib['type']] = info |
34 | 260 |
1 | 261 for calendar in tree.findall('//calendars/calendar'): |
262 if calendar.attrib['type'] != 'gregorian': | |
263 # TODO: support other calendar types | |
264 continue | |
265 | |
266 months = data.setdefault('months', {}) | |
267 for ctxt in calendar.findall('months/monthContext'): | |
379 | 268 ctxt_type = ctxt.attrib['type'] |
269 ctxts = months.setdefault(ctxt_type, {}) | |
1 | 270 for width in ctxt.findall('monthWidth'): |
379 | 271 width_type = width.attrib['type'] |
272 widths = ctxts.setdefault(width_type, {}) | |
273 for elem in width.getiterator(): | |
274 if elem.tag == 'month': | |
275 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
276 and int(elem.attrib['type']) in widths: | |
277 continue | |
278 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
279 elif elem.tag == 'alias': | |
280 ctxts[width_type] = Alias( | |
281 _translate_alias(['months', ctxt_type, width_type], | |
282 elem.attrib['path']) | |
283 ) | |
1 | 284 |
285 days = data.setdefault('days', {}) | |
286 for ctxt in calendar.findall('days/dayContext'): | |
379 | 287 ctxt_type = ctxt.attrib['type'] |
288 ctxts = days.setdefault(ctxt_type, {}) | |
1 | 289 for width in ctxt.findall('dayWidth'): |
379 | 290 width_type = width.attrib['type'] |
291 widths = ctxts.setdefault(width_type, {}) | |
292 for elem in width.getiterator(): | |
293 if elem.tag == 'day': | |
294 dtype = weekdays[elem.attrib['type']] | |
295 if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ | |
296 and dtype in widths: | |
297 continue | |
298 widths[dtype] = unicode(elem.text) | |
299 elif elem.tag == 'alias': | |
300 ctxts[width_type] = Alias( | |
301 _translate_alias(['days', ctxt_type, width_type], | |
302 elem.attrib['path']) | |
303 ) | |
1 | 304 |
305 quarters = data.setdefault('quarters', {}) | |
306 for ctxt in calendar.findall('quarters/quarterContext'): | |
379 | 307 ctxt_type = ctxt.attrib['type'] |
1 | 308 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) |
309 for width in ctxt.findall('quarterWidth'): | |
379 | 310 width_type = width.attrib['type'] |
311 widths = ctxts.setdefault(width_type, {}) | |
312 for elem in width.getiterator(): | |
313 if elem.tag == 'quarter': | |
314 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
315 and int(elem.attrib['type']) in widths: | |
316 continue | |
317 widths[int(elem.attrib['type'])] = unicode(elem.text) | |
318 elif elem.tag == 'alias': | |
319 ctxts[width_type] = Alias( | |
320 _translate_alias(['quarters', ctxt_type, width_type], | |
321 elem.attrib['path']) | |
322 ) | |
1 | 323 |
324 eras = data.setdefault('eras', {}) | |
325 for width in calendar.findall('eras/*'): | |
379 | 326 width_type = NAME_MAP[width.tag] |
327 widths = eras.setdefault(width_type, {}) | |
328 for elem in width.getiterator(): | |
329 if elem.tag == 'era': | |
330 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
331 and int(elem.attrib['type']) in widths: | |
332 continue | |
333 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
334 elif elem.tag == 'alias': | |
335 eras[width_type] = Alias( | |
336 _translate_alias(['eras', width_type], | |
337 elem.attrib['path']) | |
338 ) | |
1 | 339 |
340 # AM/PM | |
341 periods = data.setdefault('periods', {}) | |
342 for elem in calendar.findall('am'): | |
379 | 343 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
344 and elem.tag in periods: | |
1 | 345 continue |
346 periods[elem.tag] = unicode(elem.text) | |
347 for elem in calendar.findall('pm'): | |
379 | 348 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
349 and elem.tag in periods: | |
1 | 350 continue |
351 periods[elem.tag] = unicode(elem.text) | |
352 | |
353 date_formats = data.setdefault('date_formats', {}) | |
379 | 354 for format in calendar.findall('dateFormats'): |
355 for elem in format.getiterator(): | |
356 if elem.tag == 'dateFormatLength': | |
357 if 'draft' in elem.attrib and \ | |
358 elem.attrib.get('type') in date_formats: | |
359 continue | |
360 try: | |
361 date_formats[elem.attrib.get('type')] = \ | |
362 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) | |
363 except ValueError, e: | |
364 print>>sys.stderr, 'ERROR: %s' % e | |
365 elif elem.tag == 'alias': | |
366 date_formats = Alias(_translate_alias( | |
367 ['date_formats'], elem.attrib['path']) | |
368 ) | |
1 | 369 |
370 time_formats = data.setdefault('time_formats', {}) | |
379 | 371 for format in calendar.findall('timeFormats'): |
372 for elem in format.getiterator(): | |
373 if elem.tag == 'timeFormatLength': | |
374 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
375 and elem.attrib.get('type') in time_formats: | |
376 continue | |
377 try: | |
378 time_formats[elem.attrib.get('type')] = \ | |
379 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) | |
380 except ValueError, e: | |
381 print>>sys.stderr, 'ERROR: %s' % e | |
382 elif elem.tag == 'alias': | |
383 time_formats = Alias(_translate_alias( | |
384 ['time_formats'], elem.attrib['path']) | |
385 ) | |
1 | 386 |
33 | 387 datetime_formats = data.setdefault('datetime_formats', {}) |
379 | 388 for format in calendar.findall('dateTimeFormats'): |
389 for elem in format.getiterator(): | |
390 if elem.tag == 'dateTimeFormatLength': | |
391 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
392 and elem.attrib.get('type') in datetime_formats: | |
393 continue | |
394 try: | |
395 datetime_formats[elem.attrib.get('type')] = \ | |
396 unicode(elem.findtext('dateTimeFormat/pattern')) | |
397 except ValueError, e: | |
398 print>>sys.stderr, 'ERROR: %s' % e | |
399 elif elem.tag == 'alias': | |
400 datetime_formats = Alias(_translate_alias( | |
401 ['datetime_formats'], elem.attrib['path']) | |
402 ) | |
33 | 403 |
1 | 404 # <numbers> |
405 | |
406 number_symbols = data.setdefault('number_symbols', {}) | |
407 for elem in tree.findall('//numbers/symbols/*'): | |
408 number_symbols[elem.tag] = unicode(elem.text) | |
409 | |
410 decimal_formats = data.setdefault('decimal_formats', {}) | |
411 for elem in tree.findall('//decimalFormats/decimalFormatLength'): | |
379 | 412 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
413 and elem.attrib.get('type') in decimal_formats: | |
1 | 414 continue |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
415 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
416 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 417 |
418 scientific_formats = data.setdefault('scientific_formats', {}) | |
419 for elem in tree.findall('//scientificFormats/scientificFormatLength'): | |
379 | 420 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
421 and elem.attrib.get('type') in scientific_formats: | |
1 | 422 continue |
125 | 423 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
424 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 425 |
426 currency_formats = data.setdefault('currency_formats', {}) | |
427 for elem in tree.findall('//currencyFormats/currencyFormatLength'): | |
379 | 428 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
429 and elem.attrib.get('type') in currency_formats: | |
1 | 430 continue |
125 | 431 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
432 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 433 |
434 percent_formats = data.setdefault('percent_formats', {}) | |
435 for elem in tree.findall('//percentFormats/percentFormatLength'): | |
379 | 436 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
437 and elem.attrib.get('type') in percent_formats: | |
1 | 438 continue |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
439 pattern = unicode(elem.findtext('percentFormat/pattern')) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
440 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 441 |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
442 currency_names = data.setdefault('currency_names', {}) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
443 currency_symbols = data.setdefault('currency_symbols', {}) |
1 | 444 for elem in tree.findall('//currencies/currency'): |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
445 name = elem.findtext('displayName') |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
446 if name: |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
447 currency_names[elem.attrib['type']] = unicode(name) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
448 symbol = elem.findtext('symbol') |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
449 if symbol: |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
450 currency_symbols[elem.attrib['type']] = unicode(symbol) |
1 | 451 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
452 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
1 | 453 try: |
454 pickle.dump(data, outfile, 2) | |
455 finally: | |
456 outfile.close() | |
457 | |
379 | 458 |
1 | 459 if __name__ == '__main__': |
460 main() |