annotate scripts/import_cldr.py @ 156:0a41bd313985

The default ordering of messages in generated POT files, which is based on the order those messages are found when walking the source tree, is no longer subject to differences between platforms; directory and file names are now always sorted alphabetically.
author cmlenz
date Wed, 20 Jun 2007 21:41:00 +0000
parents a72de8971819
children d0cd235ede46
rev   line source
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
3 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
6 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
10 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
14
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
24
67
ad48b95af0d9 Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 36
diff changeset
25 # Make sure we're using Babel source, and not some previously installed version
ad48b95af0d9 Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 36
diff changeset
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
ad48b95af0d9 Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 36
diff changeset
27
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
28 from babel import dates, numbers
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
29
17
aa33ad077d24 Minor date formatting improvements.
cmlenz
parents: 15
diff changeset
30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
aa33ad077d24 Minor date formatting improvements.
cmlenz
parents: 15
diff changeset
31 'sun': 6}
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
32
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
33 try:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
34 any
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
35 except NameError:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
36 def any(iterable):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
37 return filter(None, list(iterable))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
38
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
39 def _text(elem):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
40 buf = [elem.text or '']
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
41 for child in elem:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
42 buf.append(_text(child))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
43 buf.append(elem.tail or '')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
44 return u''.join(filter(None, buf)).strip()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
45
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
46 def main():
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
47 parser = OptionParser(usage='%prog path/to/cldr')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
48 options, args = parser.parse_args()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
49 if len(args) != 1:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
50 parser.error('incorrect number of arguments')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
51
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
52 srcdir = args[0]
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
54 '..', 'babel', 'localedata')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
55
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
57
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
58 # build a territory containment mapping for inheritance
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
59 regions = {}
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
60 for elem in sup.findall('//territoryContainment/group'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
61 regions[elem.attrib['type']] = elem.attrib['contains'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
62
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
63 # Resolve territory containment
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
64 territory_containment = {}
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
65 region_items = regions.items()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
66 region_items.sort()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
67 for group, territory_list in region_items:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
68 for territory in territory_list:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
69 containers = territory_containment.setdefault(territory, set([]))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
70 if group in territory_containment:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
71 containers |= territory_containment[group]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
72 containers.add(group)
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
73
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
74 filenames = os.listdir(os.path.join(srcdir, 'main'))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
75 filenames.remove('root.xml')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
76 filenames.sort(lambda a,b: len(a)-len(b))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
77 filenames.insert(0, 'root.xml')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
78
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
79 dicts = {}
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
80
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
81 for filename in filenames:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
82 print>>sys.stderr, 'Processing input file %r' % filename
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
83 stem, ext = os.path.splitext(filename)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
84 if ext != '.xml':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
85 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
86
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
87 tree = parse(os.path.join(srcdir, 'main', filename))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
88 data = {}
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
89
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
90 language = None
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
91 elem = tree.find('//identity/language')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
92 if elem is not None:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
93 language = elem.attrib['type']
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
94 print>>sys.stderr, ' Language: %r' % language
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
95
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
96 territory = None
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
97 elem = tree.find('//identity/territory')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
98 if elem is not None:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
99 territory = elem.attrib['type']
15
b47c34d42eda Extended and documented `LazyProxy`.
cmlenz
parents: 11
diff changeset
100 else:
b47c34d42eda Extended and documented `LazyProxy`.
cmlenz
parents: 11
diff changeset
101 territory = '001' # world
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
102 print>>sys.stderr, ' Territory: %r' % territory
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
103 regions = territory_containment.get(territory, [])
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
104 print>>sys.stderr, ' Regions: %r' % regions
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
105
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
106 # <localeDisplayNames>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
107
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
108 territories = data.setdefault('territories', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
109 for elem in tree.findall('//territories/territory'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
110 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
111 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
112 territories[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
113
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
114 languages = data.setdefault('languages', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
115 for elem in tree.findall('//languages/language'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
116 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
117 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
118 languages[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
119
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
120 variants = data.setdefault('variants', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
121 for elem in tree.findall('//variants/variant'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
122 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
123 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
124 variants[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
125
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
126 scripts = data.setdefault('scripts', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
127 for elem in tree.findall('//scripts/script'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
128 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
129 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
130 scripts[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
131
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
132 # <dates>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
133
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
134 week_data = data.setdefault('week_data', {})
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
135 supelem = sup.find('//weekData')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
136
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
137 for elem in supelem.findall('minDays'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
138 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
139 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
140 week_data['min_days'] = int(elem.attrib['count'])
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
141
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
142 for elem in supelem.findall('firstDay'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
143 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
144 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
145 week_data['first_day'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
146
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
147 for elem in supelem.findall('weekendStart'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
148 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
149 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
150 week_data['weekend_start'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
151
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
152 for elem in supelem.findall('weekendEnd'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
153 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
154 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
155 week_data['weekend_end'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
156
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
157 time_zones = data.setdefault('time_zones', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
158 for elem in tree.findall('//timeZoneNames/zone'):
30
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
159 info = {}
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
160 city = elem.findtext('exemplarCity')
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
161 if city:
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
162 info['city'] = unicode(city)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
163 for child in elem.findall('long/*'):
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
164 info.setdefault('long', {})[child.tag] = unicode(child.text)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
165 for child in elem.findall('short/*'):
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
166 info.setdefault('short', {})[child.tag] = unicode(child.text)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
167 time_zones[elem.attrib['type']] = info
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
168
36
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
169 zone_aliases = data.setdefault('zone_aliases', {})
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
170 if stem == 'root':
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
171 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
172 if 'aliases' in elem.attrib:
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
173 canonical_id = elem.attrib['type']
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
174 for alias in elem.attrib['aliases'].split():
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
175 zone_aliases[alias] = canonical_id
2e143f1a0003 Extended time-zone support.
cmlenz
parents: 35
diff changeset
176
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
177 for calendar in tree.findall('//calendars/calendar'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
178 if calendar.attrib['type'] != 'gregorian':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
179 # TODO: support other calendar types
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
180 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
181
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
182 months = data.setdefault('months', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
183 for ctxt in calendar.findall('months/monthContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
184 ctxts = months.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
185 for width in ctxt.findall('monthWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
186 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
187 for elem in width.findall('month'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
188 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
189 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
190 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
191
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
192 days = data.setdefault('days', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
193 for ctxt in calendar.findall('days/dayContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
194 ctxts = days.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
195 for width in ctxt.findall('dayWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
196 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
197 for elem in width.findall('day'):
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
198 dtype = weekdays[elem.attrib['type']]
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
199 if 'draft' in elem.attrib and dtype in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
200 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
201 widths[dtype] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
202
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
203 quarters = data.setdefault('quarters', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
204 for ctxt in calendar.findall('quarters/quarterContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
205 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
206 for width in ctxt.findall('quarterWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
207 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
208 for elem in width.findall('quarter'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
209 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
210 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
212
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
213 eras = data.setdefault('eras', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
214 for width in calendar.findall('eras/*'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
215 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
216 widths = eras.setdefault(ewidth, {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
217 for elem in width.findall('era'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
218 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
219 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
220 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
221
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
222 # AM/PM
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
223 periods = data.setdefault('periods', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
224 for elem in calendar.findall('am'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
225 if 'draft' in elem.attrib and elem.tag in periods:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
226 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
227 periods[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
228 for elem in calendar.findall('pm'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
229 if 'draft' in elem.attrib and elem.tag in periods:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
230 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
231 periods[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
232
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
233 date_formats = data.setdefault('date_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
234 for elem in calendar.findall('dateFormats/dateFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
235 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
236 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
237 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
238 date_formats[elem.attrib.get('type')] = \
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
239 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
240 except ValueError, e:
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
241 print>>sys.stderr, 'ERROR: %s' % e
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
242
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
243 time_formats = data.setdefault('time_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
244 for elem in calendar.findall('timeFormats/timeFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
245 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
246 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
247 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
248 time_formats[elem.attrib.get('type')] = \
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
249 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
250 except ValueError, e:
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
251 print>>sys.stderr, 'ERROR: %s' % e
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
252
35
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
253 datetime_formats = data.setdefault('datetime_formats', {})
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
254 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
255 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
256 continue
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
257 try:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
258 datetime_formats[elem.attrib.get('type')] = \
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
259 unicode(elem.findtext('dateTimeFormat/pattern'))
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
260 except ValueError, e:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
261 print>>sys.stderr, 'ERROR: %s' % e
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
262
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
263 # <numbers>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
264
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
265 number_symbols = data.setdefault('number_symbols', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
266 for elem in tree.findall('//numbers/symbols/*'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
267 number_symbols[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
268
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
269 decimal_formats = data.setdefault('decimal_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
270 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
271 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
272 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
273 pattern = unicode(elem.findtext('decimalFormat/pattern'))
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
274 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
275
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
276 scientific_formats = data.setdefault('scientific_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
277 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
278 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
279 continue
127
a72de8971819 Add currency formatting.
cmlenz
parents: 67
diff changeset
280 pattern = unicode(elem.findtext('scientificFormat/pattern'))
a72de8971819 Add currency formatting.
cmlenz
parents: 67
diff changeset
281 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
282
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
283 currency_formats = data.setdefault('currency_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
284 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
285 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
286 continue
127
a72de8971819 Add currency formatting.
cmlenz
parents: 67
diff changeset
287 pattern = unicode(elem.findtext('currencyFormat/pattern'))
a72de8971819 Add currency formatting.
cmlenz
parents: 67
diff changeset
288 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
289
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
290 percent_formats = data.setdefault('percent_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
291 for elem in tree.findall('//percentFormats/percentFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
292 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
293 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
294 pattern = unicode(elem.findtext('percentFormat/pattern'))
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
295 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
296
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
297 currency_names = data.setdefault('currency_names', {})
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
298 currency_symbols = data.setdefault('currency_symbols', {})
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
299 for elem in tree.findall('//currencies/currency'):
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
300 name = elem.findtext('displayName')
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
301 if name:
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
302 currency_names[elem.attrib['type']] = unicode(name)
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
303 symbol = elem.findtext('symbol')
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
304 if symbol:
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
305 currency_symbols[elem.attrib['type']] = unicode(symbol)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
306
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
307 dicts[stem] = data
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
308 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
309 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
310 pickle.dump(data, outfile, 2)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
311 finally:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
312 outfile.close()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
313
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
314 if __name__ == '__main__':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
315 main()
Copyright (C) 2012-2017 Edgewall Software