Mercurial > babel > mirror
annotate scripts/import_cldr.py @ 569:1b801a0cb2cb trunk
Support for context-aware methods during message extraction (fixes #229, patch by David Rios)
author | fschwarz |
---|---|
date | Mon, 26 Sep 2011 20:01:01 +0000 |
parents | c81a11cb1476 |
children | 8ce41e60f90d |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | |
3 # | |
529 | 4 # Copyright (C) 2007-2011 Edgewall Software |
1 | 5 # All rights reserved. |
6 # | |
7 # This software is licensed as described in the file COPYING, which | |
8 # you should have received as part of this distribution. The terms | |
9 # are also available at http://babel.edgewall.org/wiki/License. | |
10 # | |
11 # This software consists of voluntary contributions made by many | |
12 # individuals. For the exact contribution history, see the revision | |
13 # history and logs, available at http://babel.edgewall.org/log/. | |
14 | |
549
1de26da5aa25
use cPickle instead of pickle for better performance (fixes #225)
fschwarz
parents:
529
diff
changeset
|
15 import cPickle as pickle |
1 | 16 from optparse import OptionParser |
17 import os | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
18 import re |
1 | 19 import sys |
20 | |
65
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
21 # Make sure we're using Babel source, and not some previously installed version |
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
22 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) |
75fe8369ed3b
Add Babel soruce path to CLDR import script automatically for asmodai ;-).
cmlenz
parents:
34
diff
changeset
|
23 |
9 | 24 from babel import dates, numbers |
567
c81a11cb1476
add a compat module to shield the code from changes in different versions of Python
fschwarz
parents:
549
diff
changeset
|
25 from babel.compat import any, ElementTree |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
26 from babel.plural import PluralRule |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
27 from babel.localedata import Alias |
1 | 28 |
567
c81a11cb1476
add a compat module to shield the code from changes in different versions of Python
fschwarz
parents:
549
diff
changeset
|
29 parse = ElementTree.parse |
15 | 30 weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5, |
31 'sun': 6} | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
32 |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
33 |
1 | 34 def _text(elem): |
35 buf = [elem.text or ''] | |
36 for child in elem: | |
37 buf.append(_text(child)) | |
38 buf.append(elem.tail or '') | |
39 return u''.join(filter(None, buf)).strip() | |
40 | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
41 |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
42 NAME_RE = re.compile(r"^\w+$") |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
43 TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$") |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
44 |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
45 NAME_MAP = { |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
46 'dateFormats': 'date_formats', |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
47 'dateTimeFormats': 'datetime_formats', |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
48 'eraAbbr': 'abbreviated', |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
49 'eraNames': 'wide', |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
50 'eraNarrow': 'narrow', |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
51 'timeFormats': 'time_formats' |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
52 } |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
53 |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
54 def _translate_alias(ctxt, path): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
55 parts = path.split('/') |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
56 keys = ctxt[:] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
57 for part in parts: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
58 if part == '..': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
59 keys.pop() |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
60 else: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
61 match = TYPE_ATTR_RE.match(part) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
62 if match: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
63 keys.append(match.group(1)) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
64 else: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
65 assert NAME_RE.match(part) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
66 keys.append(NAME_MAP.get(part, part)) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
67 return keys |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
68 |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
69 |
1 | 70 def main(): |
71 parser = OptionParser(usage='%prog path/to/cldr') | |
72 options, args = parser.parse_args() | |
73 if len(args) != 1: | |
74 parser.error('incorrect number of arguments') | |
75 | |
76 srcdir = args[0] | |
77 destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), | |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
78 '..', 'babel') |
1 | 79 |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
80 sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
81 |
345
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
82 # Import global data from the supplemental files |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
83 global_data = {} |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
84 |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
85 territory_zones = global_data.setdefault('territory_zones', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
86 zone_aliases = global_data.setdefault('zone_aliases', {}) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
87 zone_territories = global_data.setdefault('zone_territories', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
88 for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
89 tzid = elem.attrib['type'] |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
90 territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
91 zone_territories[tzid] = elem.attrib['territory'] |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
92 if 'aliases' in elem.attrib: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
93 for alias in elem.attrib['aliases'].split(): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
94 zone_aliases[alias] = tzid |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
95 |
345
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
96 # Import Metazone mapping |
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
97 meta_zones = global_data.setdefault('meta_zones', {}) |
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
98 tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
99 for elem in tzsup.findall('.//timezone'): |
345
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
100 for child in elem.findall('usesMetazone'): |
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
101 if 'to' not in child.attrib: # FIXME: support old mappings |
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
102 meta_zones[elem.attrib['type']] = child.attrib['mzone'] |
e818b03b370f
Update to CLDR 1.5.1, which split out the metazone mappings into a separate supplemental file.
cmlenz
parents:
233
diff
changeset
|
103 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
104 outfile = open(os.path.join(destdir, 'global.dat'), 'wb') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
105 try: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
106 pickle.dump(global_data, outfile, 2) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
107 finally: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
108 outfile.close() |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
109 |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
110 # build a territory containment mapping for inheritance |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
111 regions = {} |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
112 for elem in sup.findall('.//territoryContainment/group'): |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
113 regions[elem.attrib['type']] = elem.attrib['contains'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
114 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
115 # Resolve territory containment |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
116 territory_containment = {} |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
117 region_items = regions.items() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
118 region_items.sort() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
119 for group, territory_list in region_items: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
120 for territory in territory_list: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
121 containers = territory_containment.setdefault(territory, set([])) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
122 if group in territory_containment: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
123 containers |= territory_containment[group] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
124 containers.add(group) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
125 |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
126 # prepare the per-locale plural rules definitions |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
127 plural_rules = {} |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
128 prsup = parse(os.path.join(srcdir, 'supplemental', 'plurals.xml')) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
129 for elem in prsup.findall('.//plurals/pluralRules'): |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
130 rules = [] |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
131 for rule in elem.findall('pluralRule'): |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
132 rules.append((rule.attrib['count'], unicode(rule.text))) |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
133 pr = PluralRule(rules) |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
134 for locale in elem.attrib['locales'].split(): |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
135 plural_rules[locale] = pr |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
136 |
1 | 137 filenames = os.listdir(os.path.join(srcdir, 'main')) |
138 filenames.remove('root.xml') | |
139 filenames.sort(lambda a,b: len(a)-len(b)) | |
140 filenames.insert(0, 'root.xml') | |
141 | |
142 for filename in filenames: | |
143 stem, ext = os.path.splitext(filename) | |
144 if ext != '.xml': | |
145 continue | |
146 | |
385
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
147 print>>sys.stderr, 'Processing input file %r' % filename |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
148 tree = parse(os.path.join(srcdir, 'main', filename)) |
1 | 149 data = {} |
150 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
151 language = None |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
152 elem = tree.find('.//identity/language') |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
153 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
154 language = elem.attrib['type'] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
155 print>>sys.stderr, ' Language: %r' % language |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
156 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
157 territory = None |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
158 elem = tree.find('.//identity/territory') |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
159 if elem is not None: |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
160 territory = elem.attrib['type'] |
13 | 161 else: |
162 territory = '001' # world | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
163 print>>sys.stderr, ' Territory: %r' % territory |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
164 regions = territory_containment.get(territory, []) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
165 print>>sys.stderr, ' Regions: %r' % regions |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
166 |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
167 # plural rules |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
168 locale_id = '_'.join(filter(None, [ |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
169 language, |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
170 territory != '001' and territory or None |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
171 ])) |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
172 if locale_id in plural_rules: |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
173 data['plural_form'] = plural_rules[locale_id] |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
174 |
1 | 175 # <localeDisplayNames> |
176 | |
177 territories = data.setdefault('territories', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
178 for elem in tree.findall('.//territories/territory'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
179 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
180 and elem.attrib['type'] in territories: |
1 | 181 continue |
182 territories[elem.attrib['type']] = _text(elem) | |
183 | |
184 languages = data.setdefault('languages', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
185 for elem in tree.findall('.//languages/language'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
186 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
187 and elem.attrib['type'] in languages: |
1 | 188 continue |
189 languages[elem.attrib['type']] = _text(elem) | |
190 | |
191 variants = data.setdefault('variants', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
192 for elem in tree.findall('.//variants/variant'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
193 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
194 and elem.attrib['type'] in variants: |
1 | 195 continue |
196 variants[elem.attrib['type']] = _text(elem) | |
197 | |
198 scripts = data.setdefault('scripts', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
199 for elem in tree.findall('.//scripts/script'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
200 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
201 and elem.attrib['type'] in scripts: |
1 | 202 continue |
203 scripts[elem.attrib['type']] = _text(elem) | |
204 | |
205 # <dates> | |
206 | |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
207 week_data = data.setdefault('week_data', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
208 supelem = sup.find('.//weekData') |
8
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
209 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
210 for elem in supelem.findall('minDays'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
211 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
212 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
213 week_data['min_days'] = int(elem.attrib['count']) |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
214 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
215 for elem in supelem.findall('firstDay'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
216 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
217 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
218 week_data['first_day'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
219 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
220 for elem in supelem.findall('weekendStart'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
221 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
222 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
223 week_data['weekend_start'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
224 |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
225 for elem in supelem.findall('weekendEnd'): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
226 territories = elem.attrib['territories'].split() |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
227 if territory in territories or any([r in territories for r in regions]): |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
228 week_data['weekend_end'] = weekdays[elem.attrib['day']] |
29f6f9a90f14
Pull in some supplemental data from the CLDR, for things like the first day of the week.
cmlenz
parents:
1
diff
changeset
|
229 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
230 zone_formats = data.setdefault('zone_formats', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
231 for elem in tree.findall('.//timeZoneNames/gmtFormat'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
232 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
233 zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
234 break |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
235 for elem in tree.findall('.//timeZoneNames/regionFormat'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
236 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
237 zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
238 break |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
239 for elem in tree.findall('.//timeZoneNames/fallbackFormat'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
240 if 'draft' not in elem.attrib and 'alt' not in elem.attrib: |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
241 zone_formats['fallback'] = unicode(elem.text) \ |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
242 .replace('{0}', '%(0)s').replace('{1}', '%(1)s') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
243 break |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
244 |
1 | 245 time_zones = data.setdefault('time_zones', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
246 for elem in tree.findall('.//timeZoneNames/zone'): |
28
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
247 info = {} |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
248 city = elem.findtext('exemplarCity') |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
249 if city: |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
250 info['city'] = unicode(city) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
251 for child in elem.findall('long/*'): |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
252 info.setdefault('long', {})[child.tag] = unicode(child.text) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
253 for child in elem.findall('short/*'): |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
254 info.setdefault('short', {})[child.tag] = unicode(child.text) |
b00b06e5ace8
Import basic timezone info from CLDR (see #3). Still missing a couple other pieces in the puzzle.
cmlenz
parents:
26
diff
changeset
|
255 time_zones[elem.attrib['type']] = info |
1 | 256 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
257 meta_zones = data.setdefault('meta_zones', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
258 for elem in tree.findall('.//timeZoneNames/metazone'): |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
259 info = {} |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
260 city = elem.findtext('exemplarCity') |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
261 if city: |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
262 info['city'] = unicode(city) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
263 for child in elem.findall('long/*'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
264 info.setdefault('long', {})[child.tag] = unicode(child.text) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
265 for child in elem.findall('short/*'): |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
266 info.setdefault('short', {})[child.tag] = unicode(child.text) |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
267 info['common'] = elem.findtext('commonlyUsed') == 'true' |
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
268 meta_zones[elem.attrib['type']] = info |
34 | 269 |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
270 for calendar in tree.findall('.//calendars/calendar'): |
1 | 271 if calendar.attrib['type'] != 'gregorian': |
272 # TODO: support other calendar types | |
273 continue | |
274 | |
275 months = data.setdefault('months', {}) | |
276 for ctxt in calendar.findall('months/monthContext'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
277 ctxt_type = ctxt.attrib['type'] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
278 ctxts = months.setdefault(ctxt_type, {}) |
1 | 279 for width in ctxt.findall('monthWidth'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
280 width_type = width.attrib['type'] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
281 widths = ctxts.setdefault(width_type, {}) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
282 for elem in width.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
283 if elem.tag == 'month': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
284 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
285 and int(elem.attrib['type']) in widths: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
286 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
287 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
288 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
289 ctxts[width_type] = Alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
290 _translate_alias(['months', ctxt_type, width_type], |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
291 elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
292 ) |
1 | 293 |
294 days = data.setdefault('days', {}) | |
295 for ctxt in calendar.findall('days/dayContext'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
296 ctxt_type = ctxt.attrib['type'] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
297 ctxts = days.setdefault(ctxt_type, {}) |
1 | 298 for width in ctxt.findall('dayWidth'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
299 width_type = width.attrib['type'] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
300 widths = ctxts.setdefault(width_type, {}) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
301 for elem in width.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
302 if elem.tag == 'day': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
303 dtype = weekdays[elem.attrib['type']] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
304 if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
305 and dtype in widths: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
306 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
307 widths[dtype] = unicode(elem.text) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
308 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
309 ctxts[width_type] = Alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
310 _translate_alias(['days', ctxt_type, width_type], |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
311 elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
312 ) |
1 | 313 |
314 quarters = data.setdefault('quarters', {}) | |
315 for ctxt in calendar.findall('quarters/quarterContext'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
316 ctxt_type = ctxt.attrib['type'] |
1 | 317 ctxts = quarters.setdefault(ctxt.attrib['type'], {}) |
318 for width in ctxt.findall('quarterWidth'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
319 width_type = width.attrib['type'] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
320 widths = ctxts.setdefault(width_type, {}) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
321 for elem in width.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
322 if elem.tag == 'quarter': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
323 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
324 and int(elem.attrib['type']) in widths: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
325 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
326 widths[int(elem.attrib['type'])] = unicode(elem.text) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
327 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
328 ctxts[width_type] = Alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
329 _translate_alias(['quarters', ctxt_type, width_type], |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
330 elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
331 ) |
1 | 332 |
333 eras = data.setdefault('eras', {}) | |
334 for width in calendar.findall('eras/*'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
335 width_type = NAME_MAP[width.tag] |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
336 widths = eras.setdefault(width_type, {}) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
337 for elem in width.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
338 if elem.tag == 'era': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
339 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
340 and int(elem.attrib['type']) in widths: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
341 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
342 widths[int(elem.attrib.get('type'))] = unicode(elem.text) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
343 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
344 eras[width_type] = Alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
345 _translate_alias(['eras', width_type], |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
346 elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
347 ) |
1 | 348 |
349 # AM/PM | |
350 periods = data.setdefault('periods', {}) | |
351 for elem in calendar.findall('am'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
352 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
353 and elem.tag in periods: |
1 | 354 continue |
355 periods[elem.tag] = unicode(elem.text) | |
356 for elem in calendar.findall('pm'): | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
357 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
358 and elem.tag in periods: |
1 | 359 continue |
360 periods[elem.tag] = unicode(elem.text) | |
361 | |
362 date_formats = data.setdefault('date_formats', {}) | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
363 for format in calendar.findall('dateFormats'): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
364 for elem in format.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
365 if elem.tag == 'dateFormatLength': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
366 if 'draft' in elem.attrib and \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
367 elem.attrib.get('type') in date_formats: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
368 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
369 try: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
370 date_formats[elem.attrib.get('type')] = \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
371 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
372 except ValueError, e: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
373 print>>sys.stderr, 'ERROR: %s' % e |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
374 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
375 date_formats = Alias(_translate_alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
376 ['date_formats'], elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
377 ) |
1 | 378 |
379 time_formats = data.setdefault('time_formats', {}) | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
380 for format in calendar.findall('timeFormats'): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
381 for elem in format.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
382 if elem.tag == 'timeFormatLength': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
383 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
384 and elem.attrib.get('type') in time_formats: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
385 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
386 try: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
387 time_formats[elem.attrib.get('type')] = \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
388 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
389 except ValueError, e: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
390 print>>sys.stderr, 'ERROR: %s' % e |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
391 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
392 time_formats = Alias(_translate_alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
393 ['time_formats'], elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
394 ) |
1 | 395 |
33 | 396 datetime_formats = data.setdefault('datetime_formats', {}) |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
397 for format in calendar.findall('dateTimeFormats'): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
398 for elem in format.getiterator(): |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
399 if elem.tag == 'dateTimeFormatLength': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
400 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
401 and elem.attrib.get('type') in datetime_formats: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
402 continue |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
403 try: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
404 datetime_formats[elem.attrib.get('type')] = \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
405 unicode(elem.findtext('dateTimeFormat/pattern')) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
406 except ValueError, e: |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
407 print>>sys.stderr, 'ERROR: %s' % e |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
408 elif elem.tag == 'alias': |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
409 datetime_formats = Alias(_translate_alias( |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
410 ['datetime_formats'], elem.attrib['path']) |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
411 ) |
33 | 412 |
1 | 413 # <numbers> |
414 | |
415 number_symbols = data.setdefault('number_symbols', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
416 for elem in tree.findall('.//numbers/symbols/*'): |
436
53d32919ed65
Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents:
415
diff
changeset
|
417 if ('draft' in elem.attrib or 'alt' in elem.attrib): |
53d32919ed65
Fix the import script to skip alt or draft items in the numbers/symbols
jruigrok
parents:
415
diff
changeset
|
418 continue |
1 | 419 number_symbols[elem.tag] = unicode(elem.text) |
420 | |
421 decimal_formats = data.setdefault('decimal_formats', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
422 for elem in tree.findall('.//decimalFormats/decimalFormatLength'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
423 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
424 and elem.attrib.get('type') in decimal_formats: |
1 | 425 continue |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
426 pattern = unicode(elem.findtext('decimalFormat/pattern')) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
427 decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 428 |
429 scientific_formats = data.setdefault('scientific_formats', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
430 for elem in tree.findall('.//scientificFormats/scientificFormatLength'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
431 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
432 and elem.attrib.get('type') in scientific_formats: |
1 | 433 continue |
125 | 434 pattern = unicode(elem.findtext('scientificFormat/pattern')) |
435 scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 436 |
437 currency_formats = data.setdefault('currency_formats', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
438 for elem in tree.findall('.//currencyFormats/currencyFormatLength'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
439 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
440 and elem.attrib.get('type') in currency_formats: |
1 | 441 continue |
125 | 442 pattern = unicode(elem.findtext('currencyFormat/pattern')) |
443 currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) | |
1 | 444 |
445 percent_formats = data.setdefault('percent_formats', {}) | |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
446 for elem in tree.findall('.//percentFormats/percentFormatLength'): |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
447 if ('draft' in elem.attrib or 'alt' in elem.attrib) \ |
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
448 and elem.attrib.get('type') in percent_formats: |
1 | 449 continue |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
450 pattern = unicode(elem.findtext('percentFormat/pattern')) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
451 percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern) |
1 | 452 |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
453 currency_names = data.setdefault('currency_names', {}) |
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
454 currency_symbols = data.setdefault('currency_symbols', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
455 for elem in tree.findall('.//currencies/currency'): |
385
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
456 code = elem.attrib['type'] |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
457 # TODO: support plural rules for currency name selection |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
458 for name in elem.findall('displayName'): |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
459 if ('draft' in name.attrib or 'count' in name.attrib) \ |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
460 and code in currency_names: |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
461 continue |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
462 currency_names[code] = unicode(name.text) |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
463 # TODO: support choice patterns for currency symbol selection |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
464 symbol = elem.find('symbol') |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
465 if symbol is not None and 'draft' not in symbol.attrib \ |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
466 and 'choice' not in symbol.attrib: |
38db48990998
Improve CLDR import of currency-related data to ignore unsupported features such as symbol choice patterns and pluralized display names. See #93.
cmlenz
parents:
375
diff
changeset
|
467 currency_symbols[code] = unicode(symbol.text) |
1 | 468 |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
469 # <units> |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
470 |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
471 unit_patterns = data.setdefault('unit_patterns', {}) |
467
7e0387a936f5
Fix the ElementTree find()/findall() syntax to be compatible with Python 2.7.
jruigrok
parents:
436
diff
changeset
|
472 for elem in tree.findall('.//units/unit'): |
390
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
473 unit_type = elem.attrib['type'] |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
474 unit_pattern = unit_patterns.setdefault(unit_type, {}) |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
475 for pattern in elem.findall('unitPattern'): |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
476 unit_patterns[unit_type][pattern.attrib['count']] = \ |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
477 unicode(pattern.text) |
c5bc0f6822a9
Preliminary support for timedelta formatting (see #126), and import/expose the locale plural rules from the CLDR.
cmlenz
parents:
385
diff
changeset
|
478 |
233
da97a3138239
Upgraded to CLDR 1.5 and improved timezone formatting.
cmlenz
parents:
125
diff
changeset
|
479 outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') |
1 | 480 try: |
481 pickle.dump(data, outfile, 2) | |
482 finally: | |
483 outfile.close() | |
484 | |
375
4eca63af0a12
Implement support for aliases in the CLDR data. Closes #68. Also, update to CLDR 1.6, and a much improved `dump_data` script.
cmlenz
parents:
345
diff
changeset
|
485 |
1 | 486 if __name__ == '__main__': |
487 main() |