Mercurial > babel > old > mirror
annotate scripts/import_cldr.py @ 531:bb65afe3fa88
Bump the copyright.
author | jruigrok |
---|---|
date | Sat, 05 Mar 2011 15:12:39 +0000 |
parents | 6bd3502672b8 |
children | 632b2338e6dd |
rev | line source |
---|---|
3 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
531 | 4 # Copyright (C) 2007-2011 Edgewall Software |
3 | 5 # All rights reserved. |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
15 from optparse import OptionParser | |
16 import os | |
17 import pickle | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
18 import re |
3 | 19 import sys |
20 try: | |
21 from xml.etree.ElementTree import parse | |
22 except ImportError: | |
23 from elementtree.ElementTree import parse | |
24 | |
67
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
25 # Make sure we're using Babel source, and not some previously installed version |
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
26 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
ad48b95af0d9
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
36
diff
changeset
|
27 |
11 | 28 from babel import dates, numbers |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
29 from babel.plural import PluralRule |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
30 from babel.localedata import Alias |
3 | 31 |
17 | 32 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
33 'sun': 6} | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
34 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
35 try: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
36 any |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
37 except NameError: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
38 def any(iterable): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
39 return filter(None, list(iterable)) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
40 |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
41 |
3 | 42 def _text(elem): |
43 buf = [elem.text or ''] | |
44 for child in elem: | |
45 buf.append(_text(child)) | |
46 buf.append(elem.tail or '') | |
47 return u''.join(filter(None, buf)).strip() | |
48 | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
49 |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
50 NAME_RE = re.compile(r"^\w+$") |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
51 TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$") |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
52 |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
53 NAME_MAP = { |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
54 'dateFormats': 'date_formats', |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
55 'dateTimeFormats': 'datetime_formats', |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
56 'eraAbbr': 'abbreviated', |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
57 'eraNames': 'wide', |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
58 'eraNarrow': 'narrow', |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
59 'timeFormats': 'time_formats' |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
60 } |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
61 |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
62 def _translate_alias(ctxt, path): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
63 parts = path.split('/') |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
64 keys = ctxt[:] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
65 for part in parts: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
66 if part == '..': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
67 keys.pop() |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
68 else: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
69 match = TYPE_ATTR_RE.match(part) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
70 if match: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
71 keys.append(match.group(1)) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
72 else: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
73 assert NAME_RE.match(part) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
74 keys.append(NAME_MAP.get(part, part)) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
75 return keys |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
76 |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
77 |
3 | 78 def main(): |
79 parser = OptionParser(usage='%prog path/to/cldr') | |
80 options, args = parser.parse_args() | |
81 if len(args) != 1: | |
82 parser.error('incorrect number of arguments') | |
83 | |
84 srcdir = args[0] | |
85 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
86 '..', 'babel') |
3 | 87 |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
88 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
89 |
347
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
90 # Import global data from the supplemental files |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
91 global_data = {} |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
92 |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
93 territory_zones = global_data.setdefault('territory_zones', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
94 zone_aliases = global_data.setdefault('zone_aliases', {}) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
95 zone_territories = global_data.setdefault('zone_territories', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
96 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
97 tzid = elem.attrib['type'] |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
98 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
99 zone_territories[tzid] = elem.attrib['territory'] |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
100 if 'aliases' in elem.attrib: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
101 for alias in elem.attrib['aliases'].split(): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
102 zone_aliases[alias] = tzid |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
103 |
347
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
104 # Import Metazone mapping |
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
105 meta_zones = global_data.setdefault('meta_zones', {}) |
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
106 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
107 for elem in tzsup.findall('.//timezone'): |
347
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
108 for child in elem.findall('usesMetazone'): |
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
109 if 'to' not in child.attrib: # FIXME: support old mappings |
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
110 meta_zones[elem.attrib['type']] = child.attrib['mzone'] |
c22f292731be
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
235
diff
changeset
|
111 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
112 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
113 try: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
114 pickle.dump(global_data, outfile, 2) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
115 finally: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
116 outfile.close() |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
117 |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
118 # build a territory containment mapping for inheritance |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
119 regions = {} |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
120 for elem in sup.findall('.//territoryContainment/group'): |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
121 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
122 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
123 # Resolve territory containment |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
124 territory_containment = {} |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
125 region_items = regions.items() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
126 region_items.sort() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
127 for group, territory_list in region_items: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
128 for territory in territory_list: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
129 containers = territory_containment.setdefault(territory, set([])) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
130 if group in territory_containment: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
131 containers |= territory_containment[group] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
132 containers.add(group) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
133 |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
134 # prepare the per-locale plural rules definitions |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
135 plural_rules = {} |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
136 prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
137 for elem in prsup.findall('.//plurals/pluralRules'): |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
138 rules = [] |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
139 for rule in elem.findall('pluralRule'): |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
140 rules.append((rule.attrib['count'], unicode(rule.text))) |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
141 pr = PluralRule(rules) |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
142 for locale in elem.attrib['locales'].split(): |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
143 plural_rules[locale] = pr |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
144 |
3 | 145 filenames = os.listdir(os.path.join(srcdir, 'main')) |
146 filenames.remove('root.xml') | |
147 filenames.sort(lambda a,b: len(a)-len(b)) | |
148 filenames.insert(0, 'root.xml') | |
149 | |
150 for filename in filenames: | |
151 stem, ext = os.path.splitext(filename) | |
152 if ext != '.xml': | |
153 continue | |
154 | |
387
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
155 print>>sys.stderr, 'Processing input file %r' % filename |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
156 tree = parse(os.path.join(srcdir, 'main', filename)) |
3 | 157 data = {} |
158 | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
159 language = None |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
160 elem = tree.find('.//identity/language') |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
161 if elem is not None: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
162 language = elem.attrib['type'] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
163 print>>sys.stderr, ' Language: %r' % language |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
164 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
165 territory = None |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
166 elem = tree.find('.//identity/territory') |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
167 if elem is not None: |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
168 territory = elem.attrib['type'] |
15 | 169 else: |
170 territory = '001' # world | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
171 print>>sys.stderr, ' Territory: %r' % territory |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
172 regions = territory_containment.get(territory, []) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
173 print>>sys.stderr, ' Regions: %r' % regions |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
174 |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
175 # plural rules |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
176 locale_id = '_'.join(filter(None, [ |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
177 language, |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
178 territory != '001' and territory or None |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
179 ])) |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
180 if locale_id in plural_rules: |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
181 data['plural_form'] = plural_rules[locale_id] |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
182 |
3 | 183 # <localeDisplayNames> |
184 | |
185 territories = data.setdefault('territories', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
186 for elem in tree.findall('.//territories/territory'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
187 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
188 and elem.attrib['type'] in territories: |
3 | 189 continue |
190 territories[elem.attrib['type']] = _text(elem) | |
191 | |
192 languages = data.setdefault('languages', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
193 for elem in tree.findall('.//languages/language'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
194 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
195 and elem.attrib['type'] in languages: |
3 | 196 continue |
197 languages[elem.attrib['type']] = _text(elem) | |
198 | |
199 variants = data.setdefault('variants', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
200 for elem in tree.findall('.//variants/variant'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
201 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
202 and elem.attrib['type'] in variants: |
3 | 203 continue |
204 variants[elem.attrib['type']] = _text(elem) | |
205 | |
206 scripts = data.setdefault('scripts', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
207 for elem in tree.findall('.//scripts/script'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
208 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
209 and elem.attrib['type'] in scripts: |
3 | 210 continue |
211 scripts[elem.attrib['type']] = _text(elem) | |
212 | |
213 # <dates> | |
214 | |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
215 week_data = data.setdefault('week_data', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
216 supelem = sup.find('.//weekData') |
10
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
217 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
218 for elem in supelem.findall('minDays'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
219 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
220 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
221 week_data['min_days'] = int(elem.attrib['count']) |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
222 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
223 for elem in supelem.findall('firstDay'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
224 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
225 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
226 week_data['first_day'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
227 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
228 for elem in supelem.findall('weekendStart'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
229 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
230 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
231 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
232 |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
233 for elem in supelem.findall('weekendEnd'): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
234 territories = elem.attrib['territories'].split() |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
235 if territory in territories or any([r in territories for r in regions]): |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
236 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
0ca5dd65594f
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
3
diff
changeset
|
237 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
238 zone_formats = data.setdefault('zone_formats', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
239 for elem in tree.findall('.//timeZoneNames/gmtFormat'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
240 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
241 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
242 break |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
243 for elem in tree.findall('.//timeZoneNames/regionFormat'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
244 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
245 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
246 break |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
247 for elem in tree.findall('.//timeZoneNames/fallbackFormat'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
248 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
249 zone_formats['fallback'] = unicode(elem.text) \ |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
250 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
251 break |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
252 |
3 | 253 time_zones = data.setdefault('time_zones', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
254 for elem in tree.findall('.//timeZoneNames/zone'): |
30
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
255 info = {} |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
256 city = elem.findtext('exemplarCity') |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
257 if city: |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
258 info['city'] = unicode(city) |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
259 for child in elem.findall('long/*'): |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
260 info.setdefault('long', {})[child.tag] = unicode(child.text) |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
261 for child in elem.findall('short/*'): |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
262 info.setdefault('short', {})[child.tag] = unicode(child.text) |
9a00ac84004c
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
28
diff
changeset
|
263 time_zones[elem.attrib['type']] = info |
3 | 264 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
265 meta_zones = data.setdefault('meta_zones', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
266 for elem in tree.findall('.//timeZoneNames/metazone'): |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
267 info = {} |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
268 city = elem.findtext('exemplarCity') |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
269 if city: |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
270 info['city'] = unicode(city) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
271 for child in elem.findall('long/*'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
272 info.setdefault('long', {})[child.tag] = unicode(child.text) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
273 for child in elem.findall('short/*'): |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
274 info.setdefault('short', {})[child.tag] = unicode(child.text) |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
275 info['common'] = elem.findtext('commonlyUsed') == 'true' |
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
276 meta_zones[elem.attrib['type']] = info |
36 | 277 |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
278 for calendar in tree.findall('.//calendars/calendar'): |
3 | 279 if calendar.attrib['type'] != 'gregorian': |
280 # TODO: support other calendar types | |
281 continue | |
282 | |
283 months = data.setdefault('months', {}) | |
284 for ctxt in calendar.findall('months/monthContext'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
285 ctxt_type = ctxt.attrib['type'] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
286 ctxts = months.setdefault(ctxt_type, {}) |
3 | 287 for width in ctxt.findall('monthWidth'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
288 width_type = width.attrib['type'] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
289 widths = ctxts.setdefault(width_type, {}) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
290 for elem in width.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
291 if elem.tag == 'month': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
292 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
293 and int(elem.attrib['type']) in widths: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
294 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
295 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
296 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
297 ctxts[width_type] = Alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
298 _translate_alias(['months', ctxt_type, width_type], |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
299 elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
300 ) |
3 | 301 |
302 days = data.setdefault('days', {}) | |
303 for ctxt in calendar.findall('days/dayContext'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
304 ctxt_type = ctxt.attrib['type'] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
305 ctxts = days.setdefault(ctxt_type, {}) |
3 | 306 for width in ctxt.findall('dayWidth'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
307 width_type = width.attrib['type'] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
308 widths = ctxts.setdefault(width_type, {}) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
309 for elem in width.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
310 if elem.tag == 'day': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
311 dtype = weekdays[elem.attrib['type']] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
312 if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
313 and dtype in widths: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
314 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
315 widths[dtype] = unicode(elem.text) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
316 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
317 ctxts[width_type] = Alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
318 _translate_alias(['days', ctxt_type, width_type], |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
319 elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
320 ) |
3 | 321 |
322 quarters = data.setdefault('quarters', {}) | |
323 for ctxt in calendar.findall('quarters/quarterContext'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
324 ctxt_type = ctxt.attrib['type'] |
3 | 325 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) |
326 for width in ctxt.findall('quarterWidth'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
327 width_type = width.attrib['type'] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
328 widths = ctxts.setdefault(width_type, {}) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
329 for elem in width.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
330 if elem.tag == 'quarter': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
331 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
332 and int(elem.attrib['type']) in widths: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
333 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
334 widths[int(elem.attrib['type'])] = unicode(elem.text) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
335 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
336 ctxts[width_type] = Alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
337 _translate_alias(['quarters', ctxt_type, width_type], |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
338 elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
339 ) |
3 | 340 |
341 eras = data.setdefault('eras', {}) | |
342 for width in calendar.findall('eras/*'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
343 width_type = NAME_MAP[width.tag] |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
344 widths = eras.setdefault(width_type, {}) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
345 for elem in width.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
346 if elem.tag == 'era': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
347 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
348 and int(elem.attrib['type']) in widths: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
349 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
350 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
351 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
352 eras[width_type] = Alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
353 _translate_alias(['eras', width_type], |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
354 elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
355 ) |
3 | 356 |
357 # AM/PM | |
358 periods = data.setdefault('periods', {}) | |
359 for elem in calendar.findall('am'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
360 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
361 and elem.tag in periods: |
3 | 362 continue |
363 periods[elem.tag] = unicode(elem.text) | |
364 for elem in calendar.findall('pm'): | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
365 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
366 and elem.tag in periods: |
3 | 367 continue |
368 periods[elem.tag] = unicode(elem.text) | |
369 | |
370 date_formats = data.setdefault('date_formats', {}) | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
371 for format in calendar.findall('dateFormats'): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
372 for elem in format.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
373 if elem.tag == 'dateFormatLength': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
374 if 'draft' in elem.attrib and \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
375 elem.attrib.get('type') in date_formats: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
376 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
377 try: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
378 date_formats[elem.attrib.get('type')] = \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
379 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
380 except ValueError, e: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
381 print>>sys.stderr, 'ERROR: %s' % e |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
382 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
383 date_formats = Alias(_translate_alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
384 ['date_formats'], elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
385 ) |
3 | 386 |
387 time_formats = data.setdefault('time_formats', {}) | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
388 for format in calendar.findall('timeFormats'): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
389 for elem in format.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
390 if elem.tag == 'timeFormatLength': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
391 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
392 and elem.attrib.get('type') in time_formats: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
393 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
394 try: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
395 time_formats[elem.attrib.get('type')] = \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
396 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
397 except ValueError, e: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
398 print>>sys.stderr, 'ERROR: %s' % e |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
399 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
400 time_formats = Alias(_translate_alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
401 ['time_formats'], elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
402 ) |
3 | 403 |
35 | 404 datetime_formats = data.setdefault('datetime_formats', {}) |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
405 for format in calendar.findall('dateTimeFormats'): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
406 for elem in format.getiterator(): |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
407 if elem.tag == 'dateTimeFormatLength': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
408 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
409 and elem.attrib.get('type') in datetime_formats: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
410 continue |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
411 try: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
412 datetime_formats[elem.attrib.get('type')] = \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
413 unicode(elem.findtext('dateTimeFormat/pattern')) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
414 except ValueError, e: |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
415 print>>sys.stderr, 'ERROR: %s' % e |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
416 elif elem.tag == 'alias': |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
417 datetime_formats = Alias(_translate_alias( |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
418 ['datetime_formats'], elem.attrib['path']) |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
419 ) |
35 | 420 |
3 | 421 # <numbers> |
422 | |
423 number_symbols = data.setdefault('number_symbols', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
424 for elem in tree.findall('.//numbers/symbols/*'): |
438
783f0a1970c2
Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents:
417
diff
changeset
|
425 if ('draft' in elem.attrib or 'alt' in elem.attrib): |
783f0a1970c2
Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents:
417
diff
changeset
|
426 continue |
3 | 427 number_symbols[elem.tag] = unicode(elem.text) |
428 | |
429 decimal_formats = data.setdefault('decimal_formats', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
430 for elem in tree.findall('.//decimalFormats/decimalFormatLength'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
431 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
432 and elem.attrib.get('type') in decimal_formats: |
3 | 433 continue |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
434 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
435 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
3 | 436 |
437 scientific_formats = data.setdefault('scientific_formats', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
438 for elem in tree.findall('.//scientificFormats/scientificFormatLength'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
439 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
440 and elem.attrib.get('type') in scientific_formats: |
3 | 441 continue |
127 | 442 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
443 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
3 | 444 |
445 currency_formats = data.setdefault('currency_formats', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
446 for elem in tree.findall('.//currencyFormats/currencyFormatLength'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
447 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
448 and elem.attrib.get('type') in currency_formats: |
3 | 449 continue |
127 | 450 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
451 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
3 | 452 |
453 percent_formats = data.setdefault('percent_formats', {}) | |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
454 for elem in tree.findall('.//percentFormats/percentFormatLength'): |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
455 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
456 and elem.attrib.get('type') in percent_formats: |
3 | 457 continue |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
458 pattern = unicode(elem.findtext('percentFormat/pattern')) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
459 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
3 | 460 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
461 currency_names = data.setdefault('currency_names', {}) |
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
462 currency_symbols = data.setdefault('currency_symbols', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
463 for elem in tree.findall('.//currencies/currency'): |
387
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
464 code = elem.attrib['type'] |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
465 # TODO: support plural rules for currency name selection |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
466 for name in elem.findall('displayName'): |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
467 if ('draft' in name.attrib or 'count' in name.attrib) \ |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
468 and code in currency_names: |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
469 continue |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
470 currency_names[code] = unicode(name.text) |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
471 # TODO: support choice patterns for currency symbol selection |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
472 symbol = elem.find('symbol') |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
473 if symbol is not None and 'draft' not in symbol.attrib \ |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
474 and 'choice' not in symbol.attrib: |
88e3589ca8df
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
377
diff
changeset
|
475 currency_symbols[code] = unicode(symbol.text) |
3 | 476 |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
477 # <units> |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
478 |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
479 unit_patterns = data.setdefault('unit_patterns', {}) |
469
b48184a21da6
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
438
diff
changeset
|
480 for elem in tree.findall('.//units/unit'): |
392
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
481 unit_type = elem.attrib['type'] |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
482 unit_pattern = unit_patterns.setdefault(unit_type, {}) |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
483 for pattern in elem.findall('unitPattern'): |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
484 unit_patterns[unit_type][pattern.attrib['count']] = \ |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
485 unicode(pattern.text) |
34c0a25b1ed7
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
387
diff
changeset
|
486 |
235
d0cd235ede46
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
127
diff
changeset
|
487 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
3 | 488 try: |
489 pickle.dump(data, outfile, 2) | |
490 finally: | |
491 outfile.close() | |
492 | |
377
841858d5b567
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
347
diff
changeset
|
493 |
3 | 494 if __name__ == '__main__': |
495 main() |