annotate scripts/import_cldr.py @ 467:0228ac359f84

Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
author jruigrok
date Thu, 08 Apr 2010 09:16:53 +0000
parents fd01923aaf1e
children 97e99dbc63f8
rev   line source
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
1 #!/usr/bin/env python
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
2 # -*- coding: utf-8 -*-
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
3 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
4 # Copyright (C) 2007 Edgewall Software
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
5 # All rights reserved.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
6 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
7 # This software is licensed as described in the file COPYING, which
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
8 # you should have received as part of this distribution. The terms
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
9 # are also available at http://babel.edgewall.org/wiki/License.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
10 #
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
11 # This software consists of voluntary contributions made by many
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
12 # individuals. For the exact contribution history, see the revision
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
13 # history and logs, available at http://babel.edgewall.org/log/.
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
14
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
15 import copy
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
16 from optparse import OptionParser
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
17 import os
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
18 import pickle
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
19 import re
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
20 import sys
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
21 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
22 from xml.etree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
23 except ImportError:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
24 from elementtree.ElementTree import parse
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
25
65
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
26 # Make sure we're using Babel source, and not some previously installed version
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
27 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
7478d663561f Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents: 34
diff changeset
28
9
3be73c6f01f1 Add basic support for number format patterns.
jonas
parents: 8
diff changeset
29 from babel import dates, numbers
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
30 from babel.plural import PluralRule
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
31 from babel.localedata import Alias
415
7431c47b91bb 2.3 compat: fix another usage of set
pjenvey
parents: 390
diff changeset
32 from babel.util import set
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
33
15
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
34 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
76985c08a339 Minor date formatting improvements.
cmlenz
parents: 13
diff changeset
35 'sun': 6}
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
36
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
37 try:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
38 any
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
39 except NameError:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
40 def any(iterable):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
41 return filter(None, list(iterable))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
42
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
43
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
44 def _text(elem):
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
45 buf = [elem.text or '']
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
46 for child in elem:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
47 buf.append(_text(child))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
48 buf.append(elem.tail or '')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
49 return u''.join(filter(None, buf)).strip()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
50
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
51
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
52 NAME_RE = re.compile(r"^\w+$")
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
53 TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$")
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
54
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
55 NAME_MAP = {
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
56 'dateFormats': 'date_formats',
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
57 'dateTimeFormats': 'datetime_formats',
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
58 'eraAbbr': 'abbreviated',
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
59 'eraNames': 'wide',
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
60 'eraNarrow': 'narrow',
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
61 'timeFormats': 'time_formats'
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
62 }
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
63
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
64 def _translate_alias(ctxt, path):
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
65 parts = path.split('/')
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
66 keys = ctxt[:]
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
67 for part in parts:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
68 if part == '..':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
69 keys.pop()
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
70 else:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
71 match = TYPE_ATTR_RE.match(part)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
72 if match:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
73 keys.append(match.group(1))
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
74 else:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
75 assert NAME_RE.match(part)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
76 keys.append(NAME_MAP.get(part, part))
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
77 return keys
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
78
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
79
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
80 def main():
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
81 parser = OptionParser(usage='%prog path/to/cldr')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
82 options, args = parser.parse_args()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
83 if len(args) != 1:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
84 parser.error('incorrect number of arguments')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
85
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
86 srcdir = args[0]
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
87 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
88 '..', 'babel')
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
89
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
90 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
91
345
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
92 # Import global data from the supplemental files
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
93 global_data = {}
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
94
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
95 territory_zones = global_data.setdefault('territory_zones', {})
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
96 zone_aliases = global_data.setdefault('zone_aliases', {})
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
97 zone_territories = global_data.setdefault('zone_territories', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
98 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
99 tzid = elem.attrib['type']
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
100 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
101 zone_territories[tzid] = elem.attrib['territory']
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
102 if 'aliases' in elem.attrib:
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
103 for alias in elem.attrib['aliases'].split():
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
104 zone_aliases[alias] = tzid
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
105
345
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
106 # Import Metazone mapping
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
107 meta_zones = global_data.setdefault('meta_zones', {})
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
108 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
109 for elem in tzsup.findall('.//timezone'):
345
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
110 for child in elem.findall('usesMetazone'):
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
111 if 'to' not in child.attrib: # FIXME: support old mappings
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
112 meta_zones[elem.attrib['type']] = child.attrib['mzone']
64d340f76701 Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents: 233
diff changeset
113
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
114 outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
115 try:
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
116 pickle.dump(global_data, outfile, 2)
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
117 finally:
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
118 outfile.close()
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
119
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
120 # build a territory containment mapping for inheritance
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
121 regions = {}
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
122 for elem in sup.findall('.//territoryContainment/group'):
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
123 regions[elem.attrib['type']] = elem.attrib['contains'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
124
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
125 # Resolve territory containment
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
126 territory_containment = {}
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
127 region_items = regions.items()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
128 region_items.sort()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
129 for group, territory_list in region_items:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
130 for territory in territory_list:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
131 containers = territory_containment.setdefault(territory, set([]))
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
132 if group in territory_containment:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
133 containers |= territory_containment[group]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
134 containers.add(group)
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
135
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
136 # prepare the per-locale plural rules definitions
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
137 plural_rules = {}
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
138 prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml'))
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
139 for elem in prsup.findall('.//plurals/pluralRules'):
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
140 rules = []
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
141 for rule in elem.findall('pluralRule'):
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
142 rules.append((rule.attrib['count'], unicode(rule.text)))
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
143 pr = PluralRule(rules)
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
144 for locale in elem.attrib['locales'].split():
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
145 plural_rules[locale] = pr
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
146
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
147 filenames = os.listdir(os.path.join(srcdir, 'main'))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
148 filenames.remove('root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
149 filenames.sort(lambda a,b: len(a)-len(b))
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
150 filenames.insert(0, 'root.xml')
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
151
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
152 for filename in filenames:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
153 stem, ext = os.path.splitext(filename)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
154 if ext != '.xml':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
155 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
156
385
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
157 print>>sys.stderr, 'Processing input file %r' % filename
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
158 tree = parse(os.path.join(srcdir, 'main', filename))
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
159 data = {}
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
160
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
161 language = None
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
162 elem = tree.find('.//identity/language')
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
163 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
164 language = elem.attrib['type']
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
165 print>>sys.stderr, ' Language: %r' % language
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
166
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
167 territory = None
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
168 elem = tree.find('.//identity/territory')
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
169 if elem is not None:
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
170 territory = elem.attrib['type']
13
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
171 else:
b6c0de43fa40 Extended and documented `LazyProxy`.
cmlenz
parents: 9
diff changeset
172 territory = '001' # world
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
173 print>>sys.stderr, ' Territory: %r' % territory
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
174 regions = territory_containment.get(territory, [])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
175 print>>sys.stderr, ' Regions: %r' % regions
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
176
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
177 # plural rules
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
178 locale_id = '_'.join(filter(None, [
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
179 language,
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
180 territory != '001' and territory or None
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
181 ]))
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
182 if locale_id in plural_rules:
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
183 data['plural_form'] = plural_rules[locale_id]
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
184
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
185 # <localeDisplayNames>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
186
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
187 territories = data.setdefault('territories', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
188 for elem in tree.findall('.//territories/territory'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
189 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
190 and elem.attrib['type'] in territories:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
191 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
192 territories[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
193
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
194 languages = data.setdefault('languages', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
195 for elem in tree.findall('.//languages/language'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
196 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
197 and elem.attrib['type'] in languages:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
198 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
199 languages[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
200
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
201 variants = data.setdefault('variants', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
202 for elem in tree.findall('.//variants/variant'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
203 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
204 and elem.attrib['type'] in variants:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
205 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
206 variants[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
207
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
208 scripts = data.setdefault('scripts', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
209 for elem in tree.findall('.//scripts/script'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
210 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
211 and elem.attrib['type'] in scripts:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
212 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
213 scripts[elem.attrib['type']] = _text(elem)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
214
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
215 # <dates>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
216
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
217 week_data = data.setdefault('week_data', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
218 supelem = sup.find('.//weekData')
8
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
219
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
220 for elem in supelem.findall('minDays'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
221 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
222 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
223 week_data['min_days'] = int(elem.attrib['count'])
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
224
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
225 for elem in supelem.findall('firstDay'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
226 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
227 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
228 week_data['first_day'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
229
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
230 for elem in supelem.findall('weekendStart'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
231 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
232 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
233 week_data['weekend_start'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
234
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
235 for elem in supelem.findall('weekendEnd'):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
236 territories = elem.attrib['territories'].split()
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
237 if territory in territories or any([r in territories for r in regions]):
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
238 week_data['weekend_end'] = weekdays[elem.attrib['day']]
9132c9218745 Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents: 1
diff changeset
239
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
240 zone_formats = data.setdefault('zone_formats', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
241 for elem in tree.findall('.//timeZoneNames/gmtFormat'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
242 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
243 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
244 break
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
245 for elem in tree.findall('.//timeZoneNames/regionFormat'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
246 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
247 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
248 break
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
249 for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
250 if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
251 zone_formats['fallback'] = unicode(elem.text) \
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
252 .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
253 break
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
254
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
255 time_zones = data.setdefault('time_zones', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
256 for elem in tree.findall('.//timeZoneNames/zone'):
28
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
257 info = {}
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
258 city = elem.findtext('exemplarCity')
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
259 if city:
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
260 info['city'] = unicode(city)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
261 for child in elem.findall('long/*'):
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
262 info.setdefault('long', {})[child.tag] = unicode(child.text)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
263 for child in elem.findall('short/*'):
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
264 info.setdefault('short', {})[child.tag] = unicode(child.text)
11278622ede9 Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents: 26
diff changeset
265 time_zones[elem.attrib['type']] = info
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
266
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
267 meta_zones = data.setdefault('meta_zones', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
268 for elem in tree.findall('.//timeZoneNames/metazone'):
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
269 info = {}
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
270 city = elem.findtext('exemplarCity')
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
271 if city:
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
272 info['city'] = unicode(city)
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
273 for child in elem.findall('long/*'):
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
274 info.setdefault('long', {})[child.tag] = unicode(child.text)
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
275 for child in elem.findall('short/*'):
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
276 info.setdefault('short', {})[child.tag] = unicode(child.text)
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
277 info['common'] = elem.findtext('commonlyUsed') == 'true'
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
278 meta_zones[elem.attrib['type']] = info
34
3666f3d3df15 Extended time-zone support.
cmlenz
parents: 33
diff changeset
279
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
280 for calendar in tree.findall('.//calendars/calendar'):
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
281 if calendar.attrib['type'] != 'gregorian':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
282 # TODO: support other calendar types
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
283 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
284
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
285 months = data.setdefault('months', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
286 for ctxt in calendar.findall('months/monthContext'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
287 ctxt_type = ctxt.attrib['type']
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
288 ctxts = months.setdefault(ctxt_type, {})
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
289 for width in ctxt.findall('monthWidth'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
290 width_type = width.attrib['type']
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
291 widths = ctxts.setdefault(width_type, {})
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
292 for elem in width.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
293 if elem.tag == 'month':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
294 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
295 and int(elem.attrib['type']) in widths:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
296 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
297 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
298 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
299 ctxts[width_type] = Alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
300 _translate_alias(['months', ctxt_type, width_type],
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
301 elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
302 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
303
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
304 days = data.setdefault('days', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
305 for ctxt in calendar.findall('days/dayContext'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
306 ctxt_type = ctxt.attrib['type']
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
307 ctxts = days.setdefault(ctxt_type, {})
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
308 for width in ctxt.findall('dayWidth'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
309 width_type = width.attrib['type']
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
310 widths = ctxts.setdefault(width_type, {})
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
311 for elem in width.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
312 if elem.tag == 'day':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
313 dtype = weekdays[elem.attrib['type']]
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
314 if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
315 and dtype in widths:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
316 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
317 widths[dtype] = unicode(elem.text)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
318 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
319 ctxts[width_type] = Alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
320 _translate_alias(['days', ctxt_type, width_type],
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
321 elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
322 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
323
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
324 quarters = data.setdefault('quarters', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
325 for ctxt in calendar.findall('quarters/quarterContext'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
326 ctxt_type = ctxt.attrib['type']
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
327 ctxts = quarters.setdefault(ctxt.attrib['type'], {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
328 for width in ctxt.findall('quarterWidth'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
329 width_type = width.attrib['type']
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
330 widths = ctxts.setdefault(width_type, {})
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
331 for elem in width.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
332 if elem.tag == 'quarter':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
333 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
334 and int(elem.attrib['type']) in widths:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
335 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
336 widths[int(elem.attrib['type'])] = unicode(elem.text)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
337 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
338 ctxts[width_type] = Alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
339 _translate_alias(['quarters', ctxt_type, width_type],
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
340 elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
341 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
342
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
343 eras = data.setdefault('eras', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
344 for width in calendar.findall('eras/*'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
345 width_type = NAME_MAP[width.tag]
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
346 widths = eras.setdefault(width_type, {})
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
347 for elem in width.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
348 if elem.tag == 'era':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
349 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
350 and int(elem.attrib['type']) in widths:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
351 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
352 widths[int(elem.attrib.get('type'))] = unicode(elem.text)
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
353 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
354 eras[width_type] = Alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
355 _translate_alias(['eras', width_type],
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
356 elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
357 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
358
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
359 # AM/PM
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
360 periods = data.setdefault('periods', {})
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
361 for elem in calendar.findall('am'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
362 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
363 and elem.tag in periods:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
364 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
365 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
366 for elem in calendar.findall('pm'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
367 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
368 and elem.tag in periods:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
369 continue
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
370 periods[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
371
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
372 date_formats = data.setdefault('date_formats', {})
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
373 for format in calendar.findall('dateFormats'):
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
374 for elem in format.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
375 if elem.tag == 'dateFormatLength':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
376 if 'draft' in elem.attrib and \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
377 elem.attrib.get('type') in date_formats:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
378 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
379 try:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
380 date_formats[elem.attrib.get('type')] = \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
381 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
382 except ValueError, e:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
383 print>>sys.stderr, 'ERROR: %s' % e
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
384 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
385 date_formats = Alias(_translate_alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
386 ['date_formats'], elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
387 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
388
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
389 time_formats = data.setdefault('time_formats', {})
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
390 for format in calendar.findall('timeFormats'):
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
391 for elem in format.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
392 if elem.tag == 'timeFormatLength':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
393 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
394 and elem.attrib.get('type') in time_formats:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
395 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
396 try:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
397 time_formats[elem.attrib.get('type')] = \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
398 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
399 except ValueError, e:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
400 print>>sys.stderr, 'ERROR: %s' % e
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
401 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
402 time_formats = Alias(_translate_alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
403 ['time_formats'], elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
404 )
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
405
33
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
406 datetime_formats = data.setdefault('datetime_formats', {})
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
407 for format in calendar.findall('dateTimeFormats'):
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
408 for elem in format.getiterator():
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
409 if elem.tag == 'dateTimeFormatLength':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
410 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
411 and elem.attrib.get('type') in datetime_formats:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
412 continue
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
413 try:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
414 datetime_formats[elem.attrib.get('type')] = \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
415 unicode(elem.findtext('dateTimeFormat/pattern'))
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
416 except ValueError, e:
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
417 print>>sys.stderr, 'ERROR: %s' % e
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
418 elif elem.tag == 'alias':
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
419 datetime_formats = Alias(_translate_alias(
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
420 ['datetime_formats'], elem.attrib['path'])
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
421 )
33
0740b6d31799 * Import datetime patterns from CLDR.
cmlenz
parents: 28
diff changeset
422
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
423 # <numbers>
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
424
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
425 number_symbols = data.setdefault('number_symbols', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
426 for elem in tree.findall('.//numbers/symbols/*'):
436
fd01923aaf1e Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents: 415
diff changeset
427 if ('draft' in elem.attrib or 'alt' in elem.attrib):
fd01923aaf1e Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents: 415
diff changeset
428 continue
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
429 number_symbols[elem.tag] = unicode(elem.text)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
430
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
431 decimal_formats = data.setdefault('decimal_formats', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
432 for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
433 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
434 and elem.attrib.get('type') in decimal_formats:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
435 continue
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
436 pattern = unicode(elem.findtext('decimalFormat/pattern'))
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
437 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
438
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
439 scientific_formats = data.setdefault('scientific_formats', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
440 for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
441 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
442 and elem.attrib.get('type') in scientific_formats:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
443 continue
125
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
444 pattern = unicode(elem.findtext('scientificFormat/pattern'))
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
445 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
446
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
447 currency_formats = data.setdefault('currency_formats', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
448 for elem in tree.findall('.//currencyFormats/currencyFormatLength'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
449 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
450 and elem.attrib.get('type') in currency_formats:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
451 continue
125
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
452 pattern = unicode(elem.findtext('currencyFormat/pattern'))
b75ae5def3b1 Add currency formatting.
cmlenz
parents: 65
diff changeset
453 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
454
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
455 percent_formats = data.setdefault('percent_formats', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
456 for elem in tree.findall('.//percentFormats/percentFormatLength'):
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
457 if ('draft' in elem.attrib or 'alt' in elem.attrib) \
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
458 and elem.attrib.get('type') in percent_formats:
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
459 continue
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
460 pattern = unicode(elem.findtext('percentFormat/pattern'))
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
461 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
462
26
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
463 currency_names = data.setdefault('currency_names', {})
710090104678 * Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents: 22
diff changeset
464 currency_symbols = data.setdefault('currency_symbols', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
465 for elem in tree.findall('.//currencies/currency'):
385
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
466 code = elem.attrib['type']
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
467 # TODO: support plural rules for currency name selection
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
468 for name in elem.findall('displayName'):
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
469 if ('draft' in name.attrib or 'count' in name.attrib) \
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
470 and code in currency_names:
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
471 continue
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
472 currency_names[code] = unicode(name.text)
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
473 # TODO: support choice patterns for currency symbol selection
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
474 symbol = elem.find('symbol')
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
475 if symbol is not None and 'draft' not in symbol.attrib \
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
476 and 'choice' not in symbol.attrib:
cd8761c6f1a6 Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents: 375
diff changeset
477 currency_symbols[code] = unicode(symbol.text)
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
478
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
479 # <units>
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
480
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
481 unit_patterns = data.setdefault('unit_patterns', {})
467
0228ac359f84 Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents: 436
diff changeset
482 for elem in tree.findall('.//units/unit'):
390
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
483 unit_type = elem.attrib['type']
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
484 unit_pattern = unit_patterns.setdefault(unit_type, {})
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
485 for pattern in elem.findall('unitPattern'):
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
486 unit_patterns[unit_type][pattern.attrib['count']] = \
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
487 unicode(pattern.text)
ecf110e7f604 Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents: 385
diff changeset
488
233
bc22f5aef216 Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents: 125
diff changeset
489 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
490 try:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
491 pickle.dump(data, outfile, 2)
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
492 finally:
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
493 outfile.close()
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
494
375
369300a7ebd3 Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents: 345
diff changeset
495
1
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
496 if __name__ == '__main__':
f71ca60f2a4a Import of initial code base.
cmlenz
parents:
diff changeset
497 main()
Copyright (C) 2012-2017 Edgewall Software