annotate scripts/import_cldr.py @ 10:4130d9c6cb34 trunk

Both Babel's [source:trunk/babel/catalog/frontend.py frontend] and [source:trunk/babel/catalog/extract.py extract] now handle keyword indices. Also added an extra boolean flag so that the default keywords defined by Babel are not included in the keywords to search for when extracting strings.
author palgarvio
date Wed, 30 May 2007 22:48:11 +0000
parents 9ed6cf5975a1
children 368650dc3423
rev   line source
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
3 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
6 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
10 #
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
14
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
24
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
25 from babel import dates, numbers
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
26
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
27 weekdays = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, 'fri': 5, 'sat': 6,
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
28 'sun': 7}
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
29
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
30 try:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
31 any
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
32 except NameError:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
33 def any(iterable):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
34 return filter(None, list(iterable))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
35
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
36 def _parent(locale):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
37 parts = locale.split('_')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
38 if len(parts) == 1:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
39 return 'root'
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
40 else:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
41 return '_'.join(parts[:-1])
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
42
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
43 def _text(elem):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
44 buf = [elem.text or '']
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
45 for child in elem:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
46 buf.append(_text(child))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
47 buf.append(elem.tail or '')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
48 return u''.join(filter(None, buf)).strip()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
49
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
50 def main():
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
51 parser = OptionParser(usage='%prog path/to/cldr')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
52 options, args = parser.parse_args()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
53 if len(args) != 1:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
54 parser.error('incorrect number of arguments')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
55
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
56 srcdir = args[0]
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
57 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
58 '..', 'babel', 'localedata')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
59
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
60 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
61
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
62 # build a territory containment mapping for inheritance
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
63 regions = {}
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
64 for elem in sup.findall('//territoryContainment/group'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
65 regions[elem.attrib['type']] = elem.attrib['contains'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
66 from pprint import pprint
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
67
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
68 # Resolve territory containment
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
69 territory_containment = {}
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
70 region_items = regions.items()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
71 region_items.sort()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
72 for group, territory_list in region_items:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
73 for territory in territory_list:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
74 containers = territory_containment.setdefault(territory, set([]))
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
75 if group in territory_containment:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
76 containers |= territory_containment[group]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
77 containers.add(group)
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
78
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
79 filenames = os.listdir(os.path.join(srcdir, 'main'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
80 filenames.remove('root.xml')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
81 filenames.sort(lambda a,b: len(a)-len(b))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
82 filenames.insert(0, 'root.xml')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
83
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
84 dicts = {}
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
85
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
86 for filename in filenames:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
87 print>>sys.stderr, 'Processing input file %r' % filename
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
88 stem, ext = os.path.splitext(filename)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
89 if ext != '.xml':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
90 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
91
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
92 data = {}
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
93 if stem != 'root':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
94 data.update(copy.deepcopy(dicts[_parent(stem)]))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
95 tree = parse(os.path.join(srcdir, 'main', filename))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
96
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
97 language = None
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
98 elem = tree.find('//identity/language')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
99 if elem is not None:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
100 language = elem.attrib['type']
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
101 print>>sys.stderr, ' Language: %r' % language
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
102
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
103 territory = None
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
104 elem = tree.find('//identity/territory')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
105 if elem is not None:
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
106 territory = elem.attrib['type']
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
107 print>>sys.stderr, ' Territory: %r' % territory
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
108 regions = territory_containment.get(territory, [])
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
109 print>>sys.stderr, ' Regions: %r' % regions
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
110
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
111 # <localeDisplayNames>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
112
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
113 territories = data.setdefault('territories', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
114 for elem in tree.findall('//territories/territory'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
115 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
116 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
117 territories[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
118
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
119 languages = data.setdefault('languages', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
120 for elem in tree.findall('//languages/language'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
121 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
122 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
123 languages[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
124
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
125 variants = data.setdefault('variants', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
126 for elem in tree.findall('//variants/variant'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
127 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
128 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
129 variants[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
130
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
131 scripts = data.setdefault('scripts', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
132 for elem in tree.findall('//scripts/script'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
133 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
134 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
135 scripts[elem.attrib['type']] = _text(elem)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
136
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
137 # <dates>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
138
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
139 week_data = data.setdefault('week_data', {})
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
140 supelem = sup.find('//weekData')
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
141
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
142 for elem in supelem.findall('minDays'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
143 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
144 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
145 week_data['min_days'] = int(elem.attrib['count'])
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
146
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
147 for elem in supelem.findall('firstDay'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
148 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
149 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
150 week_data['first_day'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
151
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
152 for elem in supelem.findall('weekendStart'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
153 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
154 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
155 week_data['weekend_start'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
156
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
157 for elem in supelem.findall('weekendEnd'):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
158 territories = elem.attrib['territories'].split()
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
159 if territory in territories or any([r in territories for r in regions]):
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
160 week_data['weekend_end'] = weekdays[elem.attrib['day']]
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
161
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
162 time_zones = data.setdefault('time_zones', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
163 for elem in tree.findall('//timeZoneNames/zone'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
164 time_zones[elem.tag] = unicode(elem.findtext('displayName'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
165
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
166 for calendar in tree.findall('//calendars/calendar'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
167 if calendar.attrib['type'] != 'gregorian':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
168 # TODO: support other calendar types
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
169 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
170
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
171 months = data.setdefault('months', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
172 for ctxt in calendar.findall('months/monthContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
173 ctxts = months.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
174 for width in ctxt.findall('monthWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
175 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
176 for elem in width.findall('month'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
177 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
178 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
179 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
180
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
181 days = data.setdefault('days', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
182 for ctxt in calendar.findall('days/dayContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
183 ctxts = days.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
184 for width in ctxt.findall('dayWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
185 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
186 for elem in width.findall('day'):
8
29f6f9a90f14 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
187 dtype = weekdays[elem.attrib['type']]
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
188 if 'draft' in elem.attrib and dtype in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
189 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
190 widths[dtype] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
191
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
192 quarters = data.setdefault('quarters', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
193 for ctxt in calendar.findall('quarters/quarterContext'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
194 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
195 for width in ctxt.findall('quarterWidth'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
196 widths = ctxts.setdefault(width.attrib['type'], {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
197 for elem in width.findall('quarter'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
198 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
199 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
200 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
201
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
202 eras = data.setdefault('eras', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
203 for width in calendar.findall('eras/*'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
204 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
205 widths = eras.setdefault(ewidth, {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
206 for elem in width.findall('era'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
207 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
208 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
209 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
210
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
211 # AM/PM
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
212 periods = data.setdefault('periods', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
213 for elem in calendar.findall('am'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
214 if 'draft' in elem.attrib and elem.tag in periods:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
215 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
216 periods[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
217 for elem in calendar.findall('pm'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
218 if 'draft' in elem.attrib and elem.tag in periods:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
219 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
220 periods[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
221
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
222 date_formats = data.setdefault('date_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
223 for elem in calendar.findall('dateFormats/dateFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
224 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
225 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
226 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
227 date_formats[elem.attrib.get('type')] = \
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
228 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
229 except ValueError, e:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
230 print e
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
231
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
232 time_formats = data.setdefault('time_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
233 for elem in calendar.findall('timeFormats/timeFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
234 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
235 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
236 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
237 time_formats[elem.attrib.get('type')] = \
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
238 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
239 except ValueError, e:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
240 print e
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
241
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
242 # <numbers>
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
243
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
244 number_symbols = data.setdefault('number_symbols', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
245 for elem in tree.findall('//numbers/symbols/*'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
246 number_symbols[elem.tag] = unicode(elem.text)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
247
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
248 decimal_formats = data.setdefault('decimal_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
249 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
250 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
251 continue
9
9ed6cf5975a1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
252 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern')))
1
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
253
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
254 scientific_formats = data.setdefault('scientific_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
255 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
256 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
257 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
258 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
259
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
260 currency_formats = data.setdefault('currency_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
261 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
262 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
263 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
264 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
265
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
266 percent_formats = data.setdefault('percent_formats', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
267 for elem in tree.findall('//percentFormats/percentFormatLength'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
268 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
269 continue
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
270 percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
271
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
272 currencies = data.setdefault('currencies', {})
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
273 for elem in tree.findall('//currencies/currency'):
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
274 currencies[elem.attrib['type']] = {
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
275 'display_name': unicode(elem.findtext('displayName')),
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
276 'symbol': unicode(elem.findtext('symbol'))
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
277 }
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
278
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
279 dicts[stem] = data
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
280 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
281 try:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
282 pickle.dump(data, outfile, 2)
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
283 finally:
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
284 outfile.close()
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
285
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
286 if __name__ == '__main__':
7870274479f5 Import of initial code base.
cmlenz
parents:
diff changeset
287 main()
Copyright (C) 2012-2017 Edgewall Software