annotate scripts/import_cldr.py @ 35:0505d666fa1f

* Import datetime patterns from CLDR. * Make the date/time arguments to the formatting functions optional, defaulting to the current date/time.
author cmlenz
date Mon, 04 Jun 2007 14:28:54 +0000
parents 9a00ac84004c
children 2e143f1a0003
rev   line source
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
3 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
6 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
10 #
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
14
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
24
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
25 from babel import dates, numbers
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
26
17
aa33ad077d24 Minor date formatting improvements.
cmlenz
parents: 15
diff changeset
27 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
aa33ad077d24 Minor date formatting improvements.
cmlenz
parents: 15
diff changeset
28 'sun': 6}
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
29
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
30 try:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
31 any
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
32 except NameError:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
33 def any(iterable):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
34 return filter(None, list(iterable))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
35
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
36 def _text(elem):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
37 buf = [elem.text or '']
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
38 for child in elem:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
39 buf.append(_text(child))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
40 buf.append(elem.tail or '')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
41 return u''.join(filter(None, buf)).strip()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
42
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
43 def main():
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
44 parser = OptionParser(usage='%prog path/to/cldr')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
45 options, args = parser.parse_args()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
46 if len(args) != 1:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
47 parser.error('incorrect number of arguments')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
48
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
49 srcdir = args[0]
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
50 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
51 '..', 'babel', 'localedata')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
52
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
53 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
54
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
55 # build a territory containment mapping for inheritance
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
56 regions = {}
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
57 for elem in sup.findall('//territoryContainment/group'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
58 regions[elem.attrib['type']] = elem.attrib['contains'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
59
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
60 # Resolve territory containment
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
61 territory_containment = {}
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
62 region_items = regions.items()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
63 region_items.sort()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
64 for group, territory_list in region_items:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
65 for territory in territory_list:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
66 containers = territory_containment.setdefault(territory, set([]))
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
67 if group in territory_containment:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
68 containers |= territory_containment[group]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
69 containers.add(group)
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
70
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
71 filenames = os.listdir(os.path.join(srcdir, 'main'))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
72 filenames.remove('root.xml')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
73 filenames.sort(lambda a,b: len(a)-len(b))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
74 filenames.insert(0, 'root.xml')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
75
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
76 dicts = {}
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
77
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
78 for filename in filenames:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
79 print>>sys.stderr, 'Processing input file %r' % filename
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
80 stem, ext = os.path.splitext(filename)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
81 if ext != '.xml':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
82 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
83
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
84 tree = parse(os.path.join(srcdir, 'main', filename))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
85 data = {}
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
86
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
87 language = None
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
88 elem = tree.find('//identity/language')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
89 if elem is not None:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
90 language = elem.attrib['type']
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
91 print>>sys.stderr, ' Language: %r' % language
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
92
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
93 territory = None
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
94 elem = tree.find('//identity/territory')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
95 if elem is not None:
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
96 territory = elem.attrib['type']
15
b47c34d42eda Extended and documented `LazyProxy`.
cmlenz
parents: 11
diff changeset
97 else:
b47c34d42eda Extended and documented `LazyProxy`.
cmlenz
parents: 11
diff changeset
98 territory = '001' # world
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
99 print>>sys.stderr, ' Territory: %r' % territory
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
100 regions = territory_containment.get(territory, [])
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
101 print>>sys.stderr, ' Regions: %r' % regions
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
102
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
103 # <localeDisplayNames>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
104
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
105 territories = data.setdefault('territories', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
106 for elem in tree.findall('//territories/territory'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
107 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
108 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
109 territories[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
110
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
111 languages = data.setdefault('languages', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
112 for elem in tree.findall('//languages/language'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
113 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
114 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
115 languages[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
116
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
117 variants = data.setdefault('variants', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
118 for elem in tree.findall('//variants/variant'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
119 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
120 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
121 variants[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
122
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
123 scripts = data.setdefault('scripts', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
124 for elem in tree.findall('//scripts/script'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
125 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
126 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
127 scripts[elem.attrib['type']] = _text(elem)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
128
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
129 # <dates>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
130
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
131 week_data = data.setdefault('week_data', {})
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
132 supelem = sup.find('//weekData')
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
133
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
134 for elem in supelem.findall('minDays'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
135 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
136 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
137 week_data['min_days'] = int(elem.attrib['count'])
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
138
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
139 for elem in supelem.findall('firstDay'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
140 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
141 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
142 week_data['first_day'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
143
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
144 for elem in supelem.findall('weekendStart'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
145 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
146 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
147 week_data['weekend_start'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
148
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
149 for elem in supelem.findall('weekendEnd'):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
150 territories = elem.attrib['territories'].split()
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
151 if territory in territories or any([r in territories for r in regions]):
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
152 week_data['weekend_end'] = weekdays[elem.attrib['day']]
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
153
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
154 time_zones = data.setdefault('time_zones', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
155 for elem in tree.findall('//timeZoneNames/zone'):
30
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
156 info = {}
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
157 city = elem.findtext('exemplarCity')
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
158 if city:
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
159 info['city'] = unicode(city)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
160 for child in elem.findall('long/*'):
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
161 info.setdefault('long', {})[child.tag] = unicode(child.text)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
162 for child in elem.findall('short/*'):
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
163 info.setdefault('short', {})[child.tag] = unicode(child.text)
9a00ac84004c Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 28
diff changeset
164 time_zones[elem.attrib['type']] = info
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
165
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
166 for calendar in tree.findall('//calendars/calendar'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
167 if calendar.attrib['type'] != 'gregorian':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
168 # TODO: support other calendar types
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
169 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
170
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
171 months = data.setdefault('months', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
172 for ctxt in calendar.findall('months/monthContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
173 ctxts = months.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
174 for width in ctxt.findall('monthWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
175 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
176 for elem in width.findall('month'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
177 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
178 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
179 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
180
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
181 days = data.setdefault('days', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
182 for ctxt in calendar.findall('days/dayContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
183 ctxts = days.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
184 for width in ctxt.findall('dayWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
185 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
186 for elem in width.findall('day'):
10
0ca5dd65594f Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 3
diff changeset
187 dtype = weekdays[elem.attrib['type']]
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
188 if 'draft' in elem.attrib and dtype in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
189 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
190 widths[dtype] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
191
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
192 quarters = data.setdefault('quarters', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
193 for ctxt in calendar.findall('quarters/quarterContext'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
194 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
195 for width in ctxt.findall('quarterWidth'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
196 widths = ctxts.setdefault(width.attrib['type'], {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
197 for elem in width.findall('quarter'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
198 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
199 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
200 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
201
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
202 eras = data.setdefault('eras', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
203 for width in calendar.findall('eras/*'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
204 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
205 widths = eras.setdefault(ewidth, {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
206 for elem in width.findall('era'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
207 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
208 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
209 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
210
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
211 # AM/PM
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
212 periods = data.setdefault('periods', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
213 for elem in calendar.findall('am'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
214 if 'draft' in elem.attrib and elem.tag in periods:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
215 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
216 periods[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
217 for elem in calendar.findall('pm'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
218 if 'draft' in elem.attrib and elem.tag in periods:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
219 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
220 periods[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
221
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
222 date_formats = data.setdefault('date_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
223 for elem in calendar.findall('dateFormats/dateFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
224 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
225 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
226 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
227 date_formats[elem.attrib.get('type')] = \
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
228 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
229 except ValueError, e:
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
230 print>>sys.stderr, 'ERROR: %s' % e
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
231
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
232 time_formats = data.setdefault('time_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
233 for elem in calendar.findall('timeFormats/timeFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
234 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
235 continue
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
236 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
237 time_formats[elem.attrib.get('type')] = \
11
11f64b232b04 Add basic support for number format patterns.
jonas
parents: 10
diff changeset
238 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
239 except ValueError, e:
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
240 print>>sys.stderr, 'ERROR: %s' % e
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
241
35
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
242 datetime_formats = data.setdefault('datetime_formats', {})
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
243 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
244 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
245 continue
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
246 try:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
247 datetime_formats[elem.attrib.get('type')] = \
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
248 unicode(elem.findtext('dateTimeFormat/pattern'))
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
249 except ValueError, e:
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
250 print>>sys.stderr, 'ERROR: %s' % e
0505d666fa1f * Import datetime patterns from CLDR.
cmlenz
parents: 30
diff changeset
251
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
252 # <numbers>
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
253
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
254 number_symbols = data.setdefault('number_symbols', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
255 for elem in tree.findall('//numbers/symbols/*'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
256 number_symbols[elem.tag] = unicode(elem.text)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
257
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
258 decimal_formats = data.setdefault('decimal_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
259 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
260 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
261 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
262 pattern = unicode(elem.findtext('decimalFormat/pattern'))
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
263 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
264
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
265 scientific_formats = data.setdefault('scientific_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
266 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
267 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
268 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
269 # FIXME: should use numbers.parse_pattern
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
270 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
271
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
272 currency_formats = data.setdefault('currency_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
273 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
274 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
275 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
276 # FIXME: should use numbers.parse_pattern
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
277 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
278
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
279 percent_formats = data.setdefault('percent_formats', {})
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
280 for elem in tree.findall('//percentFormats/percentFormatLength'):
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
281 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
282 continue
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
283 pattern = unicode(elem.findtext('percentFormat/pattern'))
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
284 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
285
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
286 currency_names = data.setdefault('currency_names', {})
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
287 currency_symbols = data.setdefault('currency_symbols', {})
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
288 for elem in tree.findall('//currencies/currency'):
28
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
289 name = elem.findtext('displayName')
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
290 if name:
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
291 currency_names[elem.attrib['type']] = unicode(name)
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
292 symbol = elem.findtext('symbol')
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
293 if symbol:
695884591af6 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 24
diff changeset
294 currency_symbols[elem.attrib['type']] = unicode(symbol)
3
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
295
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
296 dicts[stem] = data
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
297 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
298 try:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
299 pickle.dump(data, outfile, 2)
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
300 finally:
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
301 outfile.close()
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
302
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
303 if __name__ == '__main__':
e9eaddab598e Import of initial code base.
cmlenz
parents:
diff changeset
304 main()
Copyright (C) 2012-2017 Edgewall Software