annotate scripts/import_cldr.py @ 154:4d2117dfd7f5

The default ordering of messages in generated POT files, which is based on the order those messages are found when walking the source tree, is no longer subject to differences between platforms; directory and file names are now always sorted alphabetically.
author cmlenz
date Wed, 20 Jun 2007 21:41:00 +0000
parents b75ae5def3b1
children bc22f5aef216
rev   line source
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
3 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
6 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
10 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
14
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
24
65
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
25 # Make sure we're using Babel source, and not some previously installed version
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
27
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
28 from babel import dates, numbers
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
29
15
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
31 'sun': 6}
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
32
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
33 try:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
34 any
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
35 except NameError:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
36 def any(iterable):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
37 return filter(None, list(iterable))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
38
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
39 def _text(elem):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
40 buf = [elem.text or '']
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
41 for child in elem:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
42 buf.append(_text(child))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
43 buf.append(elem.tail or '')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
44 return u''.join(filter(None, buf)).strip()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
45
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
46 def main():
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
47 parser = OptionParser(usage='%prog path/to/cldr')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
48 options, args = parser.parse_args()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
49 if len(args) != 1:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
50 parser.error('incorrect number of arguments')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
51
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
52 srcdir = args[0]
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
54 '..', 'babel', 'localedata')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
55
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
57
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
58 # build a territory containment mapping for inheritance
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
59 regions = {}
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
60 for elem in sup.findall('//territoryContainment/group'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
61 regions[elem.attrib['type']] = elem.attrib['contains'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
62
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
63 # Resolve territory containment
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
64 territory_containment = {}
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
65 region_items = regions.items()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
66 region_items.sort()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
67 for group, territory_list in region_items:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
68 for territory in territory_list:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
69 containers = territory_containment.setdefault(territory, set([]))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
70 if group in territory_containment:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
71 containers |= territory_containment[group]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
72 containers.add(group)
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
73
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
74 filenames = os.listdir(os.path.join(srcdir, 'main'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
75 filenames.remove('root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
76 filenames.sort(lambda a,b: len(a)-len(b))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
77 filenames.insert(0, 'root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
78
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
79 dicts = {}
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
80
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
81 for filename in filenames:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
82 print>>sys.stderr, 'Processing input file %r' % filename
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
83 stem, ext = os.path.splitext(filename)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
84 if ext != '.xml':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
85 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
86
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
87 tree = parse(os.path.join(srcdir, 'main', filename))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
88 data = {}
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
89
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
90 language = None
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
91 elem = tree.find('//identity/language')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
92 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
93 language = elem.attrib['type']
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
94 print>>sys.stderr, ' Language: %r' % language
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
95
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
96 territory = None
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
97 elem = tree.find('//identity/territory')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
98 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
99 territory = elem.attrib['type']
13
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
100 else:
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
101 territory = '001' # world
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
102 print>>sys.stderr, ' Territory: %r' % territory
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
103 regions = territory_containment.get(territory, [])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
104 print>>sys.stderr, ' Regions: %r' % regions
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
105
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
106 # <localeDisplayNames>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
107
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
108 territories = data.setdefault('territories', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
109 for elem in tree.findall('//territories/territory'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
110 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
111 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
112 territories[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
113
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
114 languages = data.setdefault('languages', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
115 for elem in tree.findall('//languages/language'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
116 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
117 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
118 languages[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
119
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
120 variants = data.setdefault('variants', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
121 for elem in tree.findall('//variants/variant'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
122 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
123 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
124 variants[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
125
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
126 scripts = data.setdefault('scripts', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
127 for elem in tree.findall('//scripts/script'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
128 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
129 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
130 scripts[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
131
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
132 # <dates>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
133
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
134 week_data = data.setdefault('week_data', {})
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
135 supelem = sup.find('//weekData')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
136
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
137 for elem in supelem.findall('minDays'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
138 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
139 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
140 week_data['min_days'] = int(elem.attrib['count'])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
141
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
142 for elem in supelem.findall('firstDay'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
143 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
144 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
145 week_data['first_day'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
146
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
147 for elem in supelem.findall('weekendStart'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
148 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
149 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
150 week_data['weekend_start'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
151
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
152 for elem in supelem.findall('weekendEnd'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
153 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
154 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
155 week_data['weekend_end'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
156
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
157 time_zones = data.setdefault('time_zones', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
158 for elem in tree.findall('//timeZoneNames/zone'):
28
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
159 info = {}
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
160 city = elem.findtext('exemplarCity')
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
161 if city:
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
162 info['city'] = unicode(city)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
163 for child in elem.findall('long/*'):
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
164 info.setdefault('long', {})[child.tag] = unicode(child.text)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
165 for child in elem.findall('short/*'):
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
166 info.setdefault('short', {})[child.tag] = unicode(child.text)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
167 time_zones[elem.attrib['type']] = info
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
168
34
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
169 zone_aliases = data.setdefault('zone_aliases', {})
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
170 if stem == 'root':
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
171 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
172 if 'aliases' in elem.attrib:
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
173 canonical_id = elem.attrib['type']
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
174 for alias in elem.attrib['aliases'].split():
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
175 zone_aliases[alias] = canonical_id
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
176
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
177 for calendar in tree.findall('//calendars/calendar'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
178 if calendar.attrib['type'] != 'gregorian':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
179 # TODO: support other calendar types
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
180 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
181
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
182 months = data.setdefault('months', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
183 for ctxt in calendar.findall('months/monthContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
184 ctxts = months.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
185 for width in ctxt.findall('monthWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
186 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
187 for elem in width.findall('month'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
188 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
189 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
190 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
191
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
192 days = data.setdefault('days', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
193 for ctxt in calendar.findall('days/dayContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
194 ctxts = days.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
195 for width in ctxt.findall('dayWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
196 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
197 for elem in width.findall('day'):
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
198 dtype = weekdays[elem.attrib['type']]
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
199 if 'draft' in elem.attrib and dtype in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
200 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
201 widths[dtype] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
202
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
203 quarters = data.setdefault('quarters', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
204 for ctxt in calendar.findall('quarters/quarterContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
205 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
206 for width in ctxt.findall('quarterWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
207 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
208 for elem in width.findall('quarter'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
209 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
210 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
212
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
213 eras = data.setdefault('eras', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
214 for width in calendar.findall('eras/*'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
215 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
216 widths = eras.setdefault(ewidth, {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
217 for elem in width.findall('era'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
218 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
219 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
220 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
221
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
222 # AM/PM
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
223 periods = data.setdefault('periods', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
224 for elem in calendar.findall('am'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
225 if 'draft' in elem.attrib and elem.tag in periods:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
226 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
227 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
228 for elem in calendar.findall('pm'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
229 if 'draft' in elem.attrib and elem.tag in periods:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
230 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
231 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
232
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
233 date_formats = data.setdefault('date_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
234 for elem in calendar.findall('dateFormats/dateFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
235 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
236 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
237 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
238 date_formats[elem.attrib.get('type')] = \
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
239 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
240 except ValueError, e:
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
241 print>>sys.stderr, 'ERROR: %s' % e
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
242
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
243 time_formats = data.setdefault('time_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
244 for elem in calendar.findall('timeFormats/timeFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
245 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
246 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
247 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
248 time_formats[elem.attrib.get('type')] = \
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
249 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
250 except ValueError, e:
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
251 print>>sys.stderr, 'ERROR: %s' % e
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
252
33
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
253 datetime_formats = data.setdefault('datetime_formats', {})
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
254 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
255 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
256 continue
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
257 try:
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
258 datetime_formats[elem.attrib.get('type')] = \
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
259 unicode(elem.findtext('dateTimeFormat/pattern'))
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
260 except ValueError, e:
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
261 print>>sys.stderr, 'ERROR: %s' % e
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
262
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
263 # <numbers>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
264
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
265 number_symbols = data.setdefault('number_symbols', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
266 for elem in tree.findall('//numbers/symbols/*'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
267 number_symbols[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
268
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
269 decimal_formats = data.setdefault('decimal_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
270 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
271 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
272 continue
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
273 pattern = unicode(elem.findtext('decimalFormat/pattern'))
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
274 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
275
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
276 scientific_formats = data.setdefault('scientific_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
277 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
278 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
279 continue
125
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
280 pattern = unicode(elem.findtext('scientificFormat/pattern'))
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
281 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
282
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
283 currency_formats = data.setdefault('currency_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
284 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
285 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
286 continue
125
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
287 pattern = unicode(elem.findtext('currencyFormat/pattern'))
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
288 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
289
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
290 percent_formats = data.setdefault('percent_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
291 for elem in tree.findall('//percentFormats/percentFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
292 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
293 continue
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
294 pattern = unicode(elem.findtext('percentFormat/pattern'))
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
295 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
296
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
297 currency_names = data.setdefault('currency_names', {})
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
298 currency_symbols = data.setdefault('currency_symbols', {})
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
299 for elem in tree.findall('//currencies/currency'):
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
300 name = elem.findtext('displayName')
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
301 if name:
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
302 currency_names[elem.attrib['type']] = unicode(name)
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
303 symbol = elem.findtext('symbol')
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
304 if symbol:
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
305 currency_symbols[elem.attrib['type']] = unicode(symbol)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
306
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
307 dicts[stem] = data
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
308 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
309 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
310 pickle.dump(data, outfile, 2)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
311 finally:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
312 outfile.close()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
313
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
314 if __name__ == '__main__':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
315 main()
Copyright (C) 2012-2017 Edgewall Software