Mercurial > babel > old > babel-test
annotate scripts/import_cldr.py @ 543:dff1de0f9a36 stable-0.9.x 0.9.6
add release date for 0.9.6 in changelog
author | fschwarz |
---|---|
date | Thu, 17 Mar 2011 14:00:27 +0000 |
parents | d877836a8455 |
children |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
4 # Copyright (C) 2007 Edgewall Software | |
5 # All rights reserved. | |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 import copy | |
16 from optparse import OptionParser | |
17 import os | |
18 import pickle | |
379 | 19 import re |
1 | 20 import sys |
21 try: | |
22 from xml.etree.ElementTree import parse | |
23 except ImportError: | |
24 from elementtree.ElementTree import parse | |
25 | |
65
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
26 # Make sure we're using Babel source, and not some previously installed version |
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
27 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
7478d663561f
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
28 |
9 | 29 from babel import dates, numbers |
379 | 30 from babel.localedata import Alias |
509 | 31 from babel.util import set |
1 | 32 |
15 | 33 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
34 'sun': 6} | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
35 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
36 try: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
37 any |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
38 except NameError: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
39 def any(iterable): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
40 return filter(None, list(iterable)) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
41 |
379 | 42 |
1 | 43 def _text(elem): |
44 buf = [elem.text or ''] | |
45 for child in elem: | |
46 buf.append(_text(child)) | |
47 buf.append(elem.tail or '') | |
48 return u''.join(filter(None, buf)).strip() | |
49 | |
379 | 50 |
51 NAME_RE = re.compile(r"^\w+$") | |
52 TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$") | |
53 | |
54 NAME_MAP = { | |
55 'dateFormats': 'date_formats', | |
56 'dateTimeFormats': 'datetime_formats', | |
57 'eraAbbr': 'abbreviated', | |
58 'eraNames': 'wide', | |
59 'eraNarrow': 'narrow', | |
60 'timeFormats': 'time_formats' | |
61 } | |
62 | |
63 def _translate_alias(ctxt, path): | |
64 parts = path.split('/') | |
65 keys = ctxt[:] | |
66 for part in parts: | |
67 if part == '..': | |
68 keys.pop() | |
69 else: | |
70 match = TYPE_ATTR_RE.match(part) | |
71 if match: | |
72 keys.append(match.group(1)) | |
73 else: | |
74 assert NAME_RE.match(part) | |
75 keys.append(NAME_MAP.get(part, part)) | |
76 return keys | |
77 | |
78 | |
1 | 79 def main(): |
80 parser = OptionParser(usage='%prog path/to/cldr') | |
81 options, args = parser.parse_args() | |
82 if len(args) != 1: | |
83 parser.error('incorrect number of arguments') | |
84 | |
85 srcdir = args[0] | |
86 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
87 '..', 'babel') |
1 | 88 |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
89 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
90 |
346
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
91 # Import global data from the supplemental files |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
92 global_data = {} |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
93 |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
94 territory_zones = global_data.setdefault('territory_zones', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
95 zone_aliases = global_data.setdefault('zone_aliases', {}) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
96 zone_territories = global_data.setdefault('zone_territories', {}) |
469 | 97 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
98 tzid = elem.attrib['type'] |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
99 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
100 zone_territories[tzid] = elem.attrib['territory'] |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
101 if 'aliases' in elem.attrib: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
102 for alias in elem.attrib['aliases'].split(): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
103 zone_aliases[alias] = tzid |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
104 |
346
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
105 # Import Metazone mapping |
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
106 meta_zones = global_data.setdefault('meta_zones', {}) |
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
107 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) |
469 | 108 for elem in tzsup.findall('.//timezone'): |
346
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
109 for child in elem.findall('usesMetazone'): |
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
110 if 'to' not in child.attrib: # FIXME: support old mappings |
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
111 meta_zones[elem.attrib['type']] = child.attrib['mzone'] |
faf0ead3a132
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
cmlenz
parents:
233
diff
changeset
|
112 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
113 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
114 try: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
115 pickle.dump(global_data, outfile, 2) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
116 finally: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
117 outfile.close() |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
118 |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
119 # build a territory containment mapping for inheritance |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
120 regions = {} |
469 | 121 for elem in sup.findall('.//territoryContainment/group'): |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
122 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
123 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
124 # Resolve territory containment |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
125 territory_containment = {} |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
126 region_items = regions.items() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
127 region_items.sort() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
128 for group, territory_list in region_items: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
129 for territory in territory_list: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
130 containers = territory_containment.setdefault(territory, set([])) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
131 if group in territory_containment: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
132 containers |= territory_containment[group] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
133 containers.add(group) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
134 |
1 | 135 filenames = os.listdir(os.path.join(srcdir, 'main')) |
136 filenames.remove('root.xml') | |
137 filenames.sort(lambda a,b: len(a)-len(b)) | |
138 filenames.insert(0, 'root.xml') | |
139 | |
140 for filename in filenames: | |
141 stem, ext = os.path.splitext(filename) | |
142 if ext != '.xml': | |
143 continue | |
144 | |
389
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
145 print>>sys.stderr, 'Processing input file %r' % filename |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
146 tree = parse(os.path.join(srcdir, 'main', filename)) |
1 | 147 data = {} |
148 | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
149 language = None |
469 | 150 elem = tree.find('.//identity/language') |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
151 if elem is not None: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
152 language = elem.attrib['type'] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
153 print>>sys.stderr, ' Language: %r' % language |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
154 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
155 territory = None |
469 | 156 elem = tree.find('.//identity/territory') |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
157 if elem is not None: |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
158 territory = elem.attrib['type'] |
13 | 159 else: |
160 territory = '001' # world | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
161 print>>sys.stderr, ' Territory: %r' % territory |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
162 regions = territory_containment.get(territory, []) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
163 print>>sys.stderr, ' Regions: %r' % regions |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
164 |
1 | 165 # <localeDisplayNames> |
166 | |
167 territories = data.setdefault('territories', {}) | |
469 | 168 for elem in tree.findall('.//territories/territory'): |
379 | 169 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
170 and elem.attrib['type'] in territories: | |
1 | 171 continue |
172 territories[elem.attrib['type']] = _text(elem) | |
173 | |
174 languages = data.setdefault('languages', {}) | |
469 | 175 for elem in tree.findall('.//languages/language'): |
379 | 176 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
177 and elem.attrib['type'] in languages: | |
1 | 178 continue |
179 languages[elem.attrib['type']] = _text(elem) | |
180 | |
181 variants = data.setdefault('variants', {}) | |
469 | 182 for elem in tree.findall('.//variants/variant'): |
379 | 183 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
184 and elem.attrib['type'] in variants: | |
1 | 185 continue |
186 variants[elem.attrib['type']] = _text(elem) | |
187 | |
188 scripts = data.setdefault('scripts', {}) | |
469 | 189 for elem in tree.findall('.//scripts/script'): |
379 | 190 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
191 and elem.attrib['type'] in scripts: | |
1 | 192 continue |
193 scripts[elem.attrib['type']] = _text(elem) | |
194 | |
195 # <dates> | |
196 | |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
197 week_data = data.setdefault('week_data', {}) |
469 | 198 supelem = sup.find('.//weekData') |
8
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
199 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
200 for elem in supelem.findall('minDays'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
201 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
202 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
203 week_data['min_days'] = int(elem.attrib['count']) |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
204 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
205 for elem in supelem.findall('firstDay'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
206 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
207 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
208 week_data['first_day'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
209 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
210 for elem in supelem.findall('weekendStart'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
211 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
212 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
213 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
214 |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
215 for elem in supelem.findall('weekendEnd'): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
216 territories = elem.attrib['territories'].split() |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
217 if territory in territories or any([r in territories for r in regions]): |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
218 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
9132c9218745
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
219 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
220 zone_formats = data.setdefault('zone_formats', {}) |
469 | 221 for elem in tree.findall('.//timeZoneNames/gmtFormat'): |
379 | 222 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
223 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
224 break |
469 | 225 for elem in tree.findall('.//timeZoneNames/regionFormat'): |
379 | 226 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
227 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
228 break |
469 | 229 for elem in tree.findall('.//timeZoneNames/fallbackFormat'): |
379 | 230 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
231 zone_formats['fallback'] = unicode(elem.text) \ |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
232 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
233 break |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
234 |
1 | 235 time_zones = data.setdefault('time_zones', {}) |
469 | 236 for elem in tree.findall('.//timeZoneNames/zone'): |
28
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
237 info = {} |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
238 city = elem.findtext('exemplarCity') |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
239 if city: |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
240 info['city'] = unicode(city) |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
241 for child in elem.findall('long/*'): |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
242 info.setdefault('long', {})[child.tag] = unicode(child.text) |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
243 for child in elem.findall('short/*'): |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
244 info.setdefault('short', {})[child.tag] = unicode(child.text) |
11278622ede9
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
245 time_zones[elem.attrib['type']] = info |
1 | 246 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
247 meta_zones = data.setdefault('meta_zones', {}) |
469 | 248 for elem in tree.findall('.//timeZoneNames/metazone'): |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
249 info = {} |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
250 city = elem.findtext('exemplarCity') |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
251 if city: |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
252 info['city'] = unicode(city) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
253 for child in elem.findall('long/*'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
254 info.setdefault('long', {})[child.tag] = unicode(child.text) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
255 for child in elem.findall('short/*'): |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
256 info.setdefault('short', {})[child.tag] = unicode(child.text) |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
257 info['common'] = elem.findtext('commonlyUsed') == 'true' |
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
258 meta_zones[elem.attrib['type']] = info |
34 | 259 |
469 | 260 for calendar in tree.findall('.//calendars/calendar'): |
1 | 261 if calendar.attrib['type'] != 'gregorian': |
262 # TODO: support other calendar types | |
263 continue | |
264 | |
265 months = data.setdefault('months', {}) | |
266 for ctxt in calendar.findall('months/monthContext'): | |
379 | 267 ctxt_type = ctxt.attrib['type'] |
268 ctxts = months.setdefault(ctxt_type, {}) | |
1 | 269 for width in ctxt.findall('monthWidth'): |
379 | 270 width_type = width.attrib['type'] |
271 widths = ctxts.setdefault(width_type, {}) | |
272 for elem in width.getiterator(): | |
273 if elem.tag == 'month': | |
274 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
275 and int(elem.attrib['type']) in widths: | |
276 continue | |
277 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
278 elif elem.tag == 'alias': | |
279 ctxts[width_type] = Alias( | |
280 _translate_alias(['months', ctxt_type, width_type], | |
281 elem.attrib['path']) | |
282 ) | |
1 | 283 |
284 days = data.setdefault('days', {}) | |
285 for ctxt in calendar.findall('days/dayContext'): | |
379 | 286 ctxt_type = ctxt.attrib['type'] |
287 ctxts = days.setdefault(ctxt_type, {}) | |
1 | 288 for width in ctxt.findall('dayWidth'): |
379 | 289 width_type = width.attrib['type'] |
290 widths = ctxts.setdefault(width_type, {}) | |
291 for elem in width.getiterator(): | |
292 if elem.tag == 'day': | |
293 dtype = weekdays[elem.attrib['type']] | |
294 if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ | |
295 and dtype in widths: | |
296 continue | |
297 widths[dtype] = unicode(elem.text) | |
298 elif elem.tag == 'alias': | |
299 ctxts[width_type] = Alias( | |
300 _translate_alias(['days', ctxt_type, width_type], | |
301 elem.attrib['path']) | |
302 ) | |
1 | 303 |
304 quarters = data.setdefault('quarters', {}) | |
305 for ctxt in calendar.findall('quarters/quarterContext'): | |
379 | 306 ctxt_type = ctxt.attrib['type'] |
1 | 307 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) |
308 for width in ctxt.findall('quarterWidth'): | |
379 | 309 width_type = width.attrib['type'] |
310 widths = ctxts.setdefault(width_type, {}) | |
311 for elem in width.getiterator(): | |
312 if elem.tag == 'quarter': | |
313 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
314 and int(elem.attrib['type']) in widths: | |
315 continue | |
316 widths[int(elem.attrib['type'])] = unicode(elem.text) | |
317 elif elem.tag == 'alias': | |
318 ctxts[width_type] = Alias( | |
319 _translate_alias(['quarters', ctxt_type, width_type], | |
320 elem.attrib['path']) | |
321 ) | |
1 | 322 |
323 eras = data.setdefault('eras', {}) | |
324 for width in calendar.findall('eras/*'): | |
379 | 325 width_type = NAME_MAP[width.tag] |
326 widths = eras.setdefault(width_type, {}) | |
327 for elem in width.getiterator(): | |
328 if elem.tag == 'era': | |
329 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
330 and int(elem.attrib['type']) in widths: | |
331 continue | |
332 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | |
333 elif elem.tag == 'alias': | |
334 eras[width_type] = Alias( | |
335 _translate_alias(['eras', width_type], | |
336 elem.attrib['path']) | |
337 ) | |
1 | 338 |
339 # AM/PM | |
340 periods = data.setdefault('periods', {}) | |
341 for elem in calendar.findall('am'): | |
379 | 342 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
343 and elem.tag in periods: | |
1 | 344 continue |
345 periods[elem.tag] = unicode(elem.text) | |
346 for elem in calendar.findall('pm'): | |
379 | 347 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
348 and elem.tag in periods: | |
1 | 349 continue |
350 periods[elem.tag] = unicode(elem.text) | |
351 | |
352 date_formats = data.setdefault('date_formats', {}) | |
379 | 353 for format in calendar.findall('dateFormats'): |
354 for elem in format.getiterator(): | |
355 if elem.tag == 'dateFormatLength': | |
356 if 'draft' in elem.attrib and \ | |
357 elem.attrib.get('type') in date_formats: | |
358 continue | |
359 try: | |
360 date_formats[elem.attrib.get('type')] = \ | |
361 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) | |
362 except ValueError, e: | |
363 print>>sys.stderr, 'ERROR: %s' % e | |
364 elif elem.tag == 'alias': | |
365 date_formats = Alias(_translate_alias( | |
366 ['date_formats'], elem.attrib['path']) | |
367 ) | |
1 | 368 |
369 time_formats = data.setdefault('time_formats', {}) | |
379 | 370 for format in calendar.findall('timeFormats'): |
371 for elem in format.getiterator(): | |
372 if elem.tag == 'timeFormatLength': | |
373 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
374 and elem.attrib.get('type') in time_formats: | |
375 continue | |
376 try: | |
377 time_formats[elem.attrib.get('type')] = \ | |
378 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) | |
379 except ValueError, e: | |
380 print>>sys.stderr, 'ERROR: %s' % e | |
381 elif elem.tag == 'alias': | |
382 time_formats = Alias(_translate_alias( | |
383 ['time_formats'], elem.attrib['path']) | |
384 ) | |
1 | 385 |
33 | 386 datetime_formats = data.setdefault('datetime_formats', {}) |
379 | 387 for format in calendar.findall('dateTimeFormats'): |
388 for elem in format.getiterator(): | |
389 if elem.tag == 'dateTimeFormatLength': | |
390 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ | |
391 and elem.attrib.get('type') in datetime_formats: | |
392 continue | |
393 try: | |
394 datetime_formats[elem.attrib.get('type')] = \ | |
395 unicode(elem.findtext('dateTimeFormat/pattern')) | |
396 except ValueError, e: | |
397 print>>sys.stderr, 'ERROR: %s' % e | |
398 elif elem.tag == 'alias': | |
399 datetime_formats = Alias(_translate_alias( | |
400 ['datetime_formats'], elem.attrib['path']) | |
401 ) | |
33 | 402 |
1 | 403 # <numbers> |
404 | |
405 number_symbols = data.setdefault('number_symbols', {}) | |
469 | 406 for elem in tree.findall('.//numbers/symbols/*'): |
510
d877836a8455
merge r478 from trunk: Fix the import script to skip alt or draft items in the numbers/symbols subtree of a locale (ticket #217)
fschwarz
parents:
509
diff
changeset
|
407 if ('draft' in elem.attrib or 'alt' in elem.attrib): |
d877836a8455
merge r478 from trunk: Fix the import script to skip alt or draft items in the numbers/symbols subtree of a locale (ticket #217)
fschwarz
parents:
509
diff
changeset
|
408 continue |
1 | 409 number_symbols[elem.tag] = unicode(elem.text) |
410 | |
411 decimal_formats = data.setdefault('decimal_formats', {}) | |
469 | 412 for elem in tree.findall('.//decimalFormats/decimalFormatLength'): |
379 | 413 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
414 and elem.attrib.get('type') in decimal_formats: | |
1 | 415 continue |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
416 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
417 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 418 |
419 scientific_formats = data.setdefault('scientific_formats', {}) | |
469 | 420 for elem in tree.findall('.//scientificFormats/scientificFormatLength'): |
379 | 421 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
422 and elem.attrib.get('type') in scientific_formats: | |
1 | 423 continue |
125 | 424 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
425 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 426 |
427 currency_formats = data.setdefault('currency_formats', {}) | |
469 | 428 for elem in tree.findall('.//currencyFormats/currencyFormatLength'): |
379 | 429 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
430 and elem.attrib.get('type') in currency_formats: | |
1 | 431 continue |
125 | 432 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
433 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 434 |
435 percent_formats = data.setdefault('percent_formats', {}) | |
469 | 436 for elem in tree.findall('.//percentFormats/percentFormatLength'): |
379 | 437 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
438 and elem.attrib.get('type') in percent_formats: | |
1 | 439 continue |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
440 pattern = unicode(elem.findtext('percentFormat/pattern')) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
441 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 442 |
26
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
443 currency_names = data.setdefault('currency_names', {}) |
710090104678
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
444 currency_symbols = data.setdefault('currency_symbols', {}) |
469 | 445 for elem in tree.findall('.//currencies/currency'): |
389
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
446 code = elem.attrib['type'] |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
447 # TODO: support plural rules for currency name selection |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
448 for name in elem.findall('displayName'): |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
449 if ('draft' in name.attrib or 'count' in name.attrib) \ |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
450 and code in currency_names: |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
451 continue |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
452 currency_names[code] = unicode(name.text) |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
453 # TODO: support choice patterns for currency symbol selection |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
454 symbol = elem.find('symbol') |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
455 if symbol is not None and 'draft' not in symbol.attrib \ |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
456 and 'choice' not in symbol.attrib: |
dddfd2551f94
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
379
diff
changeset
|
457 currency_symbols[code] = unicode(symbol.text) |
1 | 458 |
233
bc22f5aef216
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
459 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
1 | 460 try: |
461 pickle.dump(data, outfile, 2) | |
462 finally: | |
463 outfile.close() | |
464 | |
379 | 465 |
1 | 466 if __name__ == '__main__': |
467 main() |