diff scripts/import_cldr.py @ 10:0ca5dd65594f

Pull in some supplemental data from the CLDR, for things like the first day of the week.
author cmlenz
date Wed, 30 May 2007 17:39:43 +0000
parents e9eaddab598e
children 11f64b232b04
line wrap: on
line diff
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -24,6 +24,15 @@
 
 from babel.dates import parse_pattern
 
+weekdays = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, 'fri': 5, 'sat': 6,
+            'sun': 7}
+
+try:
+    any
+except NameError:
+    def any(iterable):
+        return filter(None, list(iterable))
+
 def _parent(locale):
     parts = locale.split('_')
     if len(parts) == 1:
@@ -48,6 +57,25 @@
     destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                            '..', 'babel', 'localedata')
 
+    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+
+    # build a territory containment mapping for inheritance
+    regions = {}
+    for elem in sup.findall('//territoryContainment/group'):
+        regions[elem.attrib['type']] = elem.attrib['contains'].split()
+    from pprint import pprint
+
+    # Resolve territory containment
+    territory_containment = {}
+    region_items = regions.items()
+    region_items.sort()
+    for group, territory_list in region_items:
+        for territory in territory_list:
+            containers = territory_containment.setdefault(territory, set([]))
+            if group in territory_containment:
+                containers |= territory_containment[group]
+            containers.add(group)
+
     filenames = os.listdir(os.path.join(srcdir, 'main'))
     filenames.remove('root.xml')
     filenames.sort(lambda a,b: len(a)-len(b))
@@ -66,6 +94,20 @@
             data.update(copy.deepcopy(dicts[_parent(stem)]))
         tree = parse(os.path.join(srcdir, 'main', filename))
 
+        language = None
+        elem = tree.find('//identity/language')
+        if elem is not None:
+            language = elem.attrib['type']
+        print>>sys.stderr, '  Language:  %r' % language
+
+        territory = None
+        elem = tree.find('//identity/territory')
+        if elem is not None:
+            territory = elem.attrib['type']
+        print>>sys.stderr, '  Territory: %r' % territory
+        regions = territory_containment.get(territory, [])
+        print>>sys.stderr, '  Regions:    %r' % regions
+
         # <localeDisplayNames>
 
         territories = data.setdefault('territories', {})
@@ -94,6 +136,29 @@
 
         # <dates>
 
+        week_data = data.setdefault('week_data', {})
+        supelem = sup.find('//weekData')
+
+        for elem in supelem.findall('minDays'):
+            territories = elem.attrib['territories'].split()
+            if territory in territories or any([r in territories for r in regions]):
+                week_data['min_days'] = int(elem.attrib['count'])
+
+        for elem in supelem.findall('firstDay'):
+            territories = elem.attrib['territories'].split()
+            if territory in territories or any([r in territories for r in regions]):
+                week_data['first_day'] = weekdays[elem.attrib['day']]
+
+        for elem in supelem.findall('weekendStart'):
+            territories = elem.attrib['territories'].split()
+            if territory in territories or any([r in territories for r in regions]):
+                week_data['weekend_start'] = weekdays[elem.attrib['day']]
+
+        for elem in supelem.findall('weekendEnd'):
+            territories = elem.attrib['territories'].split()
+            if territory in territories or any([r in territories for r in regions]):
+                week_data['weekend_end'] = weekdays[elem.attrib['day']]
+
         time_zones = data.setdefault('time_zones', {})
         for elem in tree.findall('//timeZoneNames/zone'):
             time_zones[elem.tag] = unicode(elem.findtext('displayName'))
@@ -119,8 +184,7 @@
                 for width in ctxt.findall('dayWidth'):
                     widths = ctxts.setdefault(width.attrib['type'], {})
                     for elem in width.findall('day'):
-                        dtype = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4,
-                                 'fri': 5, 'sat': 6, 'sun': 7}[elem.attrib['type']]
+                        dtype = weekdays[elem.attrib['type']]
                         if 'draft' in elem.attrib and dtype in widths:
                             continue
                         widths[dtype] = unicode(elem.text)
Copyright (C) 2012-2017 Edgewall Software