Mercurial > babel > old > mirror
comparison scripts/import_cldr.py @ 235:d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
author | cmlenz |
---|---|
date | Wed, 01 Aug 2007 12:32:20 +0000 |
parents | a72de8971819 |
children | c22f292731be |
comparison
equal
deleted
inserted
replaced
234:541b6d630575 | 235:d0cd235ede46 |
---|---|
49 if len(args) != 1: | 49 if len(args) != 1: |
50 parser.error('incorrect number of arguments') | 50 parser.error('incorrect number of arguments') |
51 | 51 |
52 srcdir = args[0] | 52 srcdir = args[0] |
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | 53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), |
54 '..', 'babel', 'localedata') | 54 '..', 'babel') |
55 | 55 |
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) | 56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
57 | |
58 # import global data from the supplemental files | |
59 global_data = {} | |
60 | |
61 territory_zones = global_data.setdefault('territory_zones', {}) | |
62 zone_aliases = global_data.setdefault('zone_aliases', {}) | |
63 zone_territories = global_data.setdefault('zone_territories', {}) | |
64 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): | |
65 tzid = elem.attrib['type'] | |
66 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) | |
67 zone_territories[tzid] = elem.attrib['territory'] | |
68 if 'aliases' in elem.attrib: | |
69 for alias in elem.attrib['aliases'].split(): | |
70 zone_aliases[alias] = tzid | |
71 | |
72 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') | |
73 try: | |
74 pickle.dump(global_data, outfile, 2) | |
75 finally: | |
76 outfile.close() | |
57 | 77 |
58 # build a territory containment mapping for inheritance | 78 # build a territory containment mapping for inheritance |
59 regions = {} | 79 regions = {} |
60 for elem in sup.findall('//territoryContainment/group'): | 80 for elem in sup.findall('//territoryContainment/group'): |
61 regions[elem.attrib['type']] = elem.attrib['contains'].split() | 81 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
74 filenames = os.listdir(os.path.join(srcdir, 'main')) | 94 filenames = os.listdir(os.path.join(srcdir, 'main')) |
75 filenames.remove('root.xml') | 95 filenames.remove('root.xml') |
76 filenames.sort(lambda a,b: len(a)-len(b)) | 96 filenames.sort(lambda a,b: len(a)-len(b)) |
77 filenames.insert(0, 'root.xml') | 97 filenames.insert(0, 'root.xml') |
78 | 98 |
79 dicts = {} | |
80 | |
81 for filename in filenames: | 99 for filename in filenames: |
82 print>>sys.stderr, 'Processing input file %r' % filename | 100 print>>sys.stderr, 'Processing input file %r' % filename |
83 stem, ext = os.path.splitext(filename) | 101 stem, ext = os.path.splitext(filename) |
84 if ext != '.xml': | 102 if ext != '.xml': |
85 continue | 103 continue |
151 | 169 |
152 for elem in supelem.findall('weekendEnd'): | 170 for elem in supelem.findall('weekendEnd'): |
153 territories = elem.attrib['territories'].split() | 171 territories = elem.attrib['territories'].split() |
154 if territory in territories or any([r in territories for r in regions]): | 172 if territory in territories or any([r in territories for r in regions]): |
155 week_data['weekend_end'] = weekdays[elem.attrib['day']] | 173 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
174 | |
175 zone_formats = data.setdefault('zone_formats', {}) | |
176 for elem in tree.findall('//timeZoneNames/gmtFormat'): | |
177 if 'draft' not in elem.attrib: | |
178 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') | |
179 break | |
180 for elem in tree.findall('//timeZoneNames/regionFormat'): | |
181 if 'draft' not in elem.attrib: | |
182 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') | |
183 break | |
184 for elem in tree.findall('//timeZoneNames/fallbackFormat'): | |
185 if 'draft' not in elem.attrib: | |
186 zone_formats['fallback'] = unicode(elem.text) \ | |
187 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') | |
188 break | |
156 | 189 |
157 time_zones = data.setdefault('time_zones', {}) | 190 time_zones = data.setdefault('time_zones', {}) |
158 for elem in tree.findall('//timeZoneNames/zone'): | 191 for elem in tree.findall('//timeZoneNames/zone'): |
159 info = {} | 192 info = {} |
160 city = elem.findtext('exemplarCity') | 193 city = elem.findtext('exemplarCity') |
162 info['city'] = unicode(city) | 195 info['city'] = unicode(city) |
163 for child in elem.findall('long/*'): | 196 for child in elem.findall('long/*'): |
164 info.setdefault('long', {})[child.tag] = unicode(child.text) | 197 info.setdefault('long', {})[child.tag] = unicode(child.text) |
165 for child in elem.findall('short/*'): | 198 for child in elem.findall('short/*'): |
166 info.setdefault('short', {})[child.tag] = unicode(child.text) | 199 info.setdefault('short', {})[child.tag] = unicode(child.text) |
200 for child in elem.findall('usesMetazone'): | |
201 if 'to' not in child.attrib: # FIXME: support old mappings | |
202 info['use_metazone'] = child.attrib['mzone'] | |
167 time_zones[elem.attrib['type']] = info | 203 time_zones[elem.attrib['type']] = info |
168 | 204 |
169 zone_aliases = data.setdefault('zone_aliases', {}) | 205 meta_zones = data.setdefault('meta_zones', {}) |
170 if stem == 'root': | 206 for elem in tree.findall('//timeZoneNames/metazone'): |
171 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'): | 207 info = {} |
172 if 'aliases' in elem.attrib: | 208 city = elem.findtext('exemplarCity') |
173 canonical_id = elem.attrib['type'] | 209 if city: |
174 for alias in elem.attrib['aliases'].split(): | 210 info['city'] = unicode(city) |
175 zone_aliases[alias] = canonical_id | 211 for child in elem.findall('long/*'): |
212 info.setdefault('long', {})[child.tag] = unicode(child.text) | |
213 for child in elem.findall('short/*'): | |
214 info.setdefault('short', {})[child.tag] = unicode(child.text) | |
215 info['common'] = elem.findtext('commonlyUsed') == 'true' | |
216 meta_zones[elem.attrib['type']] = info | |
176 | 217 |
177 for calendar in tree.findall('//calendars/calendar'): | 218 for calendar in tree.findall('//calendars/calendar'): |
178 if calendar.attrib['type'] != 'gregorian': | 219 if calendar.attrib['type'] != 'gregorian': |
179 # TODO: support other calendar types | 220 # TODO: support other calendar types |
180 continue | 221 continue |
210 continue | 251 continue |
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | 252 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
212 | 253 |
213 eras = data.setdefault('eras', {}) | 254 eras = data.setdefault('eras', {}) |
214 for width in calendar.findall('eras/*'): | 255 for width in calendar.findall('eras/*'): |
215 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag] | 256 ewidth = { |
257 'eraAbbr': 'abbreviated', | |
258 'eraNames': 'wide', | |
259 'eraNarrow': 'narrow', | |
260 }[width.tag] | |
216 widths = eras.setdefault(ewidth, {}) | 261 widths = eras.setdefault(ewidth, {}) |
217 for elem in width.findall('era'): | 262 for elem in width.findall('era'): |
218 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: | 263 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths: |
219 continue | 264 continue |
220 widths[int(elem.attrib.get('type'))] = unicode(elem.text) | 265 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
302 currency_names[elem.attrib['type']] = unicode(name) | 347 currency_names[elem.attrib['type']] = unicode(name) |
303 symbol = elem.findtext('symbol') | 348 symbol = elem.findtext('symbol') |
304 if symbol: | 349 if symbol: |
305 currency_symbols[elem.attrib['type']] = unicode(symbol) | 350 currency_symbols[elem.attrib['type']] = unicode(symbol) |
306 | 351 |
307 dicts[stem] = data | 352 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
308 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb') | |
309 try: | 353 try: |
310 pickle.dump(data, outfile, 2) | 354 pickle.dump(data, outfile, 2) |
311 finally: | 355 finally: |
312 outfile.close() | 356 outfile.close() |
313 | 357 |