annotate scripts/import_cldr.py @ 21:ddfac856c34f

Change pot header's first line, "Translations Template for %%(project)s." instead of "SOME DESCRIPTIVE TITLE.". '''`project`''' and '''`version`''' now default to '''PROJECT''' and '''VERSION''' respectively. Fixed a bug regarding '''Content-Transfer-Encoding''', it shouldn't be the charset, and we're defaulting to `8bit` untill someone complains.
author palgarvio
date Thu, 31 May 2007 18:04:41 +0000
parents 76985c08a339
children 7d37639a7411
rev   line source
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
3 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
6 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
10 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
14
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
19 import sys
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
20 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
21 from xml.etree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
22 except ImportError:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
23 from elementtree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
24
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
25 from babel import dates, numbers
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
26
15
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
27 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
28 'sun': 6}
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
29
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
30 try:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
31 any
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
32 except NameError:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
33 def any(iterable):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
34 return filter(None, list(iterable))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
35
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
36 def _parent(locale):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
37 parts = locale.split('_')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
38 if len(parts) == 1:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
39 return 'root'
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
40 else:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
41 return '_'.join(parts[:-1])
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
42
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
43 def _text(elem):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
44 buf = [elem.text or '']
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
45 for child in elem:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
46 buf.append(_text(child))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
47 buf.append(elem.tail or '')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
48 return u''.join(filter(None, buf)).strip()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
49
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
50 def main():
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
51 parser = OptionParser(usage='%prog path/to/cldr')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
52 options, args = parser.parse_args()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
53 if len(args) != 1:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
54 parser.error('incorrect number of arguments')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
55
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
56 srcdir = args[0]
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
57 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
58 '..', 'babel', 'localedata')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
59
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
60 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
61
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
62 # build a territory containment mapping for inheritance
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
63 regions = {}
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
64 for elem in sup.findall('//territoryContainment/group'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
65 regions[elem.attrib['type']] = elem.attrib['contains'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
66 from pprint import pprint
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
67
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
68 # Resolve territory containment
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
69 territory_containment = {}
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
70 region_items = regions.items()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
71 region_items.sort()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
72 for group, territory_list in region_items:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
73 for territory in territory_list:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
74 containers = territory_containment.setdefault(territory, set([]))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
75 if group in territory_containment:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
76 containers |= territory_containment[group]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
77 containers.add(group)
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
78
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
79 filenames = os.listdir(os.path.join(srcdir, 'main'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
80 filenames.remove('root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
81 filenames.sort(lambda a,b: len(a)-len(b))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
82 filenames.insert(0, 'root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
83
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
84 dicts = {}
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
85
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
86 for filename in filenames:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
87 print>>sys.stderr, 'Processing input file %r' % filename
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
88 stem, ext = os.path.splitext(filename)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
89 if ext != '.xml':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
90 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
91
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
92 data = {}
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
93 if stem != 'root':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
94 data.update(copy.deepcopy(dicts[_parent(stem)]))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
95 tree = parse(os.path.join(srcdir, 'main', filename))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
96
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
97 language = None
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
98 elem = tree.find('//identity/language')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
99 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
100 language = elem.attrib['type']
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
101 print>>sys.stderr, ' Language: %r' % language
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
102
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
103 territory = None
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
104 elem = tree.find('//identity/territory')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
105 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
106 territory = elem.attrib['type']
13
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
107 else:
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
108 territory = '001' # world
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
109 print>>sys.stderr, ' Territory: %r' % territory
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
110 regions = territory_containment.get(territory, [])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
111 print>>sys.stderr, ' Regions: %r' % regions
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
112
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
113 # <localeDisplayNames>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
114
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
115 territories = data.setdefault('territories', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
116 for elem in tree.findall('//territories/territory'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
117 if 'draft' in elem.attrib and elem.attrib['type'] in territories:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
118 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
119 territories[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
120
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
121 languages = data.setdefault('languages', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
122 for elem in tree.findall('//languages/language'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
123 if 'draft' in elem.attrib and elem.attrib['type'] in languages:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
124 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
125 languages[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
126
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
127 variants = data.setdefault('variants', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
128 for elem in tree.findall('//variants/variant'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
129 if 'draft' in elem.attrib and elem.attrib['type'] in variants:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
130 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
131 variants[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
132
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
133 scripts = data.setdefault('scripts', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
134 for elem in tree.findall('//scripts/script'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
135 if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
136 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
137 scripts[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
138
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
139 # <dates>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
140
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
141 week_data = data.setdefault('week_data', {})
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
142 supelem = sup.find('//weekData')
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
143
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
144 for elem in supelem.findall('minDays'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
145 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
146 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
147 week_data['min_days'] = int(elem.attrib['count'])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
148
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
149 for elem in supelem.findall('firstDay'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
150 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
151 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
152 week_data['first_day'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
153
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
154 for elem in supelem.findall('weekendStart'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
155 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
156 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
157 week_data['weekend_start'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
158
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
159 for elem in supelem.findall('weekendEnd'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
160 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
161 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
162 week_data['weekend_end'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
163
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
164 time_zones = data.setdefault('time_zones', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
165 for elem in tree.findall('//timeZoneNames/zone'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
166 time_zones[elem.tag] = unicode(elem.findtext('displayName'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
167
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
168 for calendar in tree.findall('//calendars/calendar'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
169 if calendar.attrib['type'] != 'gregorian':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
170 # TODO: support other calendar types
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
171 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
172
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
173 months = data.setdefault('months', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
174 for ctxt in calendar.findall('months/monthContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
175 ctxts = months.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
176 for width in ctxt.findall('monthWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
177 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
178 for elem in width.findall('month'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
179 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
180 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
181 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
182
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
183 days = data.setdefault('days', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
184 for ctxt in calendar.findall('days/dayContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
185 ctxts = days.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
186 for width in ctxt.findall('dayWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
187 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
188 for elem in width.findall('day'):
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
189 dtype = weekdays[elem.attrib['type']]
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
190 if 'draft' in elem.attrib and dtype in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
191 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
192 widths[dtype] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
193
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
194 quarters = data.setdefault('quarters', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
195 for ctxt in calendar.findall('quarters/quarterContext'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
196 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
197 for width in ctxt.findall('quarterWidth'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
198 widths = ctxts.setdefault(width.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
199 for elem in width.findall('quarter'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
200 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
201 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
202 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
203
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
204 eras = data.setdefault('eras', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
205 for width in calendar.findall('eras/*'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
206 ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
207 widths = eras.setdefault(ewidth, {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
208 for elem in width.findall('era'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
209 if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
210 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
211 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
212
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
213 # AM/PM
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
214 periods = data.setdefault('periods', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
215 for elem in calendar.findall('am'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
216 if 'draft' in elem.attrib and elem.tag in periods:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
217 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
218 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
219 for elem in calendar.findall('pm'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
220 if 'draft' in elem.attrib and elem.tag in periods:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
221 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
222 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
223
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
224 date_formats = data.setdefault('date_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
225 for elem in calendar.findall('dateFormats/dateFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
226 if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
227 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
228 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
229 date_formats[elem.attrib.get('type')] = \
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
230 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
231 except ValueError, e:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
232 print e
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
233
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
234 time_formats = data.setdefault('time_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
235 for elem in calendar.findall('timeFormats/timeFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
236 if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
237 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
238 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
239 time_formats[elem.attrib.get('type')] = \
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
240 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
241 except ValueError, e:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
242 print e
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
243
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
244 # <numbers>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
245
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
246 number_symbols = data.setdefault('number_symbols', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
247 for elem in tree.findall('//numbers/symbols/*'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
248 number_symbols[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
249
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
250 decimal_formats = data.setdefault('decimal_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
251 for elem in tree.findall('//decimalFormats/decimalFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
252 if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
253 continue
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
254 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern')))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
255
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
256 scientific_formats = data.setdefault('scientific_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
257 for elem in tree.findall('//scientificFormats/scientificFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
258 if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
259 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
260 scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
261
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
262 currency_formats = data.setdefault('currency_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
263 for elem in tree.findall('//currencyFormats/currencyFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
264 if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
265 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
266 currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
267
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
268 percent_formats = data.setdefault('percent_formats', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
269 for elem in tree.findall('//percentFormats/percentFormatLength'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
270 if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
271 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
272 percent_formats[elem.attrib.get('type')] = unicode(elem.findtext('percentFormat/pattern'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
273
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
274 currencies = data.setdefault('currencies', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
275 for elem in tree.findall('//currencies/currency'):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
276 currencies[elem.attrib['type']] = {
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
277 'display_name': unicode(elem.findtext('displayName')),
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
278 'symbol': unicode(elem.findtext('symbol'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
279 }
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
280
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
281 dicts[stem] = data
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
282 outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
283 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
284 pickle.dump(data, outfile, 2)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
285 finally:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
286 outfile.close()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
287
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
288 if __name__ == '__main__':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
289 main()
Copyright (C) 2012-2017 Edgewall Software