annotate scripts/import_cldr.py @ 215:7fcdcdf8972d trunk

Dummy/stub implementation for week-in-year and week-in-month date format fields. Also, treat extended year the same as the regular year field, not even ICU seems to handle it specially.
author cmlenz
date Fri, 13 Jul 2007 22:22:03 +0000
parents 061ea0e0ac8c
children da97a3138239
rev   line source
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
3 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
6 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
10 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
14
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
24
65
75fe8369ed3b Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
25 # Make sure we're using Babel source, and not some previously installed version
75fe8369ed3b Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
75fe8369ed3b Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
27
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
28 from babel import dates, numbers
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
29
15
244a74232f5e Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
244a74232f5e Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
31 'sun': 6}
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
32
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
33 try:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
34 any
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
35 except NameError:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
36 def any(iterable):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
37 return filter(None, list(iterable))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
38
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
39 def _text(elem):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
40 buf = [elem.text or '']
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
41 for child in elem:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
42 buf.append(_text(child))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
43 buf.append(elem.tail or '')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
44 return u''.join(filter(None, buf)).strip()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
45
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
46 def main():
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
47 parser = OptionParser(usage='%prog path/to/cldr')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
48 options, args = parser.parse_args()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
49 if len(args) != 1:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
50 parser.error('incorrect number of arguments')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
51
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
52 srcdir = args[0]
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
53 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
54 '..', 'babel', 'localedata')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
55
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
56 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
57
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
58 # build a territory containment mapping for inheritance
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
59 regions = {}
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
60 for elem in sup.findall('//territoryContainment/group'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
61 regions[elem.attrib['type']] = elem.attrib['contains'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
62
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
63 # Resolve territory containment
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
64 territory_containment = {}
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
65 region_items = regions.items()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
66 region_items.sort()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
67 for group, territory_list in region_items:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
68 for territory in territory_list:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
69 containers = territory_containment.setdefault(territory, set([]))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
70 if group in territory_containment:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
71 containers |= territory_containment[group]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
72 containers.add(group)
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
73
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
74 filenames = os.listdir(os.path.join(srcdir, 'main'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
75 filenames.remove('root.xml')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
76 filenames.sort(lambda a,b: len(a)-len(b))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
77 filenames.insert(0, 'root.xml')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
78
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
79 dicts = {}
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
80
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
81 for filename in filenames:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
82 print>>sys.stderr, 'Processing input file %r' % filename
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
83 stem, ext = os.path.splitext(filename)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
84 if ext != '.xml':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
85 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
86
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
87 tree = parse(os.path.join(srcdir, 'main', filename))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
88 data = {}
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
89
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
90 language = None
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
91 elem = tree.find('//identity/language')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
92 if elem is not None:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
93 language = elem.attrib['type']
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
94 print>>sys.stderr, ' Language: %r' % language
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
95
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
96 territory = None
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
97 elem = tree.find('//identity/territory')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
98 if elem is not None:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
99 territory = elem.attrib['type']
13
368650dc3423 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
100 else:
368650dc3423 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
101 territory = '001' # world
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
102 print>>sys.stderr, ' Territory: %r' % territory
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
103 regions = territory_containment.get(territory, [])
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
104 print>>sys.stderr, ' Regions: %r' % regions
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
105
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
106 # <localeDisplayNames>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
107
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
108 territories = data.setdefault('territories', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
109 for elem in tree.findall('//territories/territory'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
110 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
111 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
112 territories[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
113
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
114 languages = data.setdefault('languages', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
115 for elem in tree.findall('//languages/language'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
116 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
117 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
118 languages[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
119
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
120 variants = data.setdefault('variants', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
121 for elem in tree.findall('//variants/variant'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
122 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
123 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
124 variants[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
125
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
126 scripts = data.setdefault('scripts', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
127 for elem in tree.findall('//scripts/script'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
128 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
129 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
130 scripts[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
131
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
132 # <dates>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
133
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
134 week_data = data.setdefault('week_data', {})
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
135 supelem = sup.find('//weekData')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
136
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
137 for elem in supelem.findall('minDays'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
138 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
139 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
140 week_data['min_days'] = int(elem.attrib['count'])
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
141
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
142 for elem in supelem.findall('firstDay'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
143 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
144 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
145 week_data['first_day'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
146
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
147 for elem in supelem.findall('weekendStart'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
148 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
149 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
150 week_data['weekend_start'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
151
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
152 for elem in supelem.findall('weekendEnd'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
153 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
154 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
155 week_data['weekend_end'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
156
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
157 time_zones = data.setdefault('time_zones', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
158 for elem in tree.findall('//timeZoneNames/zone'):
28
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
159 info = {}
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
160 city = elem.findtext('exemplarCity')
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
161 if city:
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
162 info['city'] = unicode(city)
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
163 for child in elem.findall('long/*'):
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
164 info.setdefault('long', {})[child.tag] = unicode(child.text)
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
165 for child in elem.findall('short/*'):
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
166 info.setdefault('short', {})[child.tag] = unicode(child.text)
b00b06e5ace8 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
167 time_zones[elem.attrib['type']] = info
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
168
34
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
169 zone_aliases = data.setdefault('zone_aliases', {})
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
170 if stem == 'root':
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
171 for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
172 if 'aliases' in elem.attrib:
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
173 canonical_id = elem.attrib['type']
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
174 for alias in elem.attrib['aliases'].split():
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
175 zone_aliases[alias] = canonical_id
464fbcefedde Extended time-zone support.
cmlenz
parents: 33
diff changeset
176
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
177 for calendar in tree.findall('//calendars/calendar'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
178 if calendar.attrib['type'] != 'gregorian':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
179 # TODO: support other calendar types
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
180 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
181
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
182 months = data.setdefault('months', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
183 for ctxt in calendar.findall('months/monthContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
184 ctxts = months.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
185 for width in ctxt.findall('monthWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
186 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
187 for elem in width.findall('month'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
188 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
189 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
190 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
191
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
192 days = data.setdefault('days', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
193 for ctxt in calendar.findall('days/dayContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
194 ctxts = days.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
195 for width in ctxt.findall('dayWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
196 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
197 for elem in width.findall('day'):
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
198 dtype = weekdays[elem.attrib['type']]
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
199 if 'draft' in elem.attrib and dtype in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
200 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
201 widths[dtype] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
202
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
203 quarters = data.setdefault('quarters', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
204 for ctxt in calendar.findall('quarters/quarterContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
205 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
206 for width in ctxt.findall('quarterWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
207 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
208 for elem in width.findall('quarter'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
209 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
210 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
212
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
213 eras = data.setdefault('eras', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
214 for width in calendar.findall('eras/*'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
215 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
216 widths = eras.setdefault(ewidth, {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
217 for elem in width.findall('era'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
218 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
219 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
220 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
221
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
222 # AM/PM
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
223 periods = data.setdefault('periods', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
224 for elem in calendar.findall('am'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
225 if 'draft' in elem.attrib and elem.tag in periods:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
226 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
227 periods[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
228 for elem in calendar.findall('pm'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
229 if 'draft' in elem.attrib and elem.tag in periods:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
230 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
231 periods[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
232
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
233 date_formats = data.setdefault('date_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
234 for elem in calendar.findall('dateFormats/dateFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
235 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
236 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
237 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
238 date_formats[elem.attrib.get('type')] = \
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
239 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
240 except ValueError, e:
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
241 print>>sys.stderr, 'ERROR: %s' % e
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
242
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
243 time_formats = data.setdefault('time_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
244 for elem in calendar.findall('timeFormats/timeFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
245 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
246 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
247 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
248 time_formats[elem.attrib.get('type')] = \
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
249 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
250 except ValueError, e:
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
251 print>>sys.stderr, 'ERROR: %s' % e
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
252
33
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
253 datetime_formats = data.setdefault('datetime_formats', {})
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
254 for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
255 if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
256 continue
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
257 try:
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
258 datetime_formats[elem.attrib.get('type')] = \
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
259 unicode(elem.findtext('dateTimeFormat/pattern'))
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
260 except ValueError, e:
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
261 print>>sys.stderr, 'ERROR: %s' % e
75a64f5a176e * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
262
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
263 # <numbers>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
264
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
265 number_symbols = data.setdefault('number_symbols', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
266 for elem in tree.findall('//numbers/symbols/*'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
267 number_symbols[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
268
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
269 decimal_formats = data.setdefault('decimal_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
270 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
271 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
272 continue
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
273 pattern = unicode(elem.findtext('decimalFormat/pattern'))
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
274 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
275
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
276 scientific_formats = data.setdefault('scientific_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
277 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
278 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
279 continue
125
061ea0e0ac8c Add currency formatting.
cmlenz
parents: 65
diff changeset
280 pattern = unicode(elem.findtext('scientificFormat/pattern'))
061ea0e0ac8c Add currency formatting.
cmlenz
parents: 65
diff changeset
281 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
282
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
283 currency_formats = data.setdefault('currency_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
284 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
285 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
286 continue
125
061ea0e0ac8c Add currency formatting.
cmlenz
parents: 65
diff changeset
287 pattern = unicode(elem.findtext('currencyFormat/pattern'))
061ea0e0ac8c Add currency formatting.
cmlenz
parents: 65
diff changeset
288 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
289
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
290 percent_formats = data.setdefault('percent_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
291 for elem in tree.findall('//percentFormats/percentFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
292 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
293 continue
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
294 pattern = unicode(elem.findtext('percentFormat/pattern'))
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
295 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
296
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
297 currency_names = data.setdefault('currency_names', {})
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
298 currency_symbols = data.setdefault('currency_symbols', {})
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
299 for elem in tree.findall('//currencies/currency'):
26
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
300 name = elem.findtext('displayName')
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
301 if name:
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
302 currency_names[elem.attrib['type']] = unicode(name)
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
303 symbol = elem.findtext('symbol')
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
304 if symbol:
6041782ea677 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
305 currency_symbols[elem.attrib['type']] = unicode(symbol)
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
306
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
307 dicts[stem] = data
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
308 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
309 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
310 pickle.dump(data, outfile, 2)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
311 finally:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
312 outfile.close()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
313
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
314 if __name__ == '__main__':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
315 main()
Copyright (C) 2012-2017 Edgewall Software