changeset 304:5979bcb0892e

The build master now attempts to build new snapshot archives based on the closest existing archive, under the assumption that usually very few files are changed between revisions, and that it is cheaper to copy the unmodified content over from an existing ZIP archive.
author cmlenz
date Tue, 08 Nov 2005 11:00:46 +0000
parents 3d58d9dd11c8
children 13b290f5f1ee
files bitten/snapshot.py bitten/tests/snapshot.py
diffstat 2 files changed, 140 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/bitten/snapshot.py
+++ b/bitten/snapshot.py
@@ -21,9 +21,7 @@
 For larger code bases, these snapshots may be relatively expensive to create.
 Most of the time is spent in walking the repository directory and reading the
 files it contains. To avoid blocking the build master while snapshots are
-created, this is done in a worker thread. The main thread polls the snapshots
-directory to find the snapshots that have been completely created (including
-the corresponding checksum file).
+created, this is done in a worker thread.
 
 As snapshot archives are often very similar to each other for subsequent
 revisions, an attempt is made to avoid the creation of new archives from
@@ -103,7 +101,8 @@
             try:
                 md5sum.validate(filepath)
             except md5sum.IntegrityError, e:
-                log.warning('Integrity error checking %s (e)', filepath, e)
+                log.warning('Integrity error checking %s (%s)', filepath, e)
+                os.remove(filepath)
                 continue
             mtime = os.path.getmtime(filepath)
 
@@ -140,83 +139,119 @@
         self._lock.acquire()
         try:
             repos = self.env.get_repository()
-            root = repos.get_node(self.config.path or '/', rev)
-            assert root.isdir, '"%s" is not a directory' % self.config.path
+            new_root = repos.get_node(self.config.path or '/', rev)
+            assert new_root.isdir, '"%s" is not a directory' % self.config.path
 
-            if root.rev in self._workers:
-                return self._workers[root.rev]
+            if new_root.rev in self._workers:
+                return self._workers[new_root.rev]
 
-            prefix = self.prefix + '_r' + str(rev)
-            filename = prefix + '.zip'
-            filepath = os.path.join(self.directory, filename)
-            if os.path.exists(filepath):
-                raise IOError, 'Snapshot file already exists at %s' % filepath
+            new_prefix = self.prefix + '_r' + str(rev)
+            filename = new_prefix + '.zip'
+            new_filepath = os.path.join(self.directory, filename)
+            if os.path.exists(new_filepath):
+                raise IOError, 'Snapshot file already exists at %s' \
+                               % new_filepath
 
             self._cleanup(self.limit - 1)
 
+            existing = self._get_closest_match(repos, new_root)
+            if existing:
+                base_rev, base_filepath = existing
+                base_root = repos.get_node(self.config.path or '/', base_rev)
+                base_prefix = self.prefix + '_r' + str(base_rev)
+            else:
+                base_root = base_filepath = base_prefix = None
+
             worker = threading.Thread(target=self._create,
-                                      args=(prefix, root, filepath),
+                                      args=(repos, new_root, new_filepath,
+                                            new_prefix, base_root,
+                                            base_filepath, base_prefix),
                                       name='Create snapshot %s' % filename)
             worker.start()
-            self._workers[root.rev] = worker
+            self._workers[new_root.rev] = worker
             return worker
         finally:
             self._lock.release()
 
-    def _create(self, prefix, root, filepath):
+    def _create(self, repos, new_root, new_filepath, new_prefix, base_root=None,
+                base_filepath=None, base_prefix=None):
         """Actually create a snapshot archive.
         
         This is used internally from the `create()` function and executed in a
         worker thread.
         """
-        log.debug('Preparing snapshot archive for %s@%s', root.path, root.rev)
+        log.debug('Preparing snapshot archive for %s@%s', new_root.path,
+                  new_root.rev)
+        if base_root:
+            base_rev = repos.next_rev(base_root.rev)
+            base_zip = zipfile.ZipFile(base_filepath, 'r')
+        new_zip = zipfile.ZipFile(new_filepath, 'w', zipfile.ZIP_DEFLATED)
 
-        zip_file = zipfile.ZipFile(filepath, 'w', zipfile.ZIP_DEFLATED)
         def _add_entry(node):
             name = node.path[len(self.config.path):]
             if name.startswith('/'):
                 name = name[1:]
             if node.isdir:
-                path = os.path.join(prefix, name).rstrip('/\\') + '/'
+                path = os.path.join(new_prefix, name).rstrip('/\\') + '/'
                 info = zipfile.ZipInfo(path)
                 info.create_system = 3
                 info.external_attr = 040755 << 16L | 0x10
-                zip_file.writestr(info, '')
-                log.debug('Adding directory %s to archive', name)
+                new_zip.writestr(info, '')
+                log.debug('Adding directory %s to archive', name + '/')
                 for entry in node.get_entries():
                     _add_entry(entry)
                 time.sleep(.1) # be nice
             else:
-                path = os.path.join(prefix, name)
-                info = zipfile.ZipInfo(path)
-                info.create_system = 3
-                info.compress_type = zipfile.ZIP_DEFLATED
-                info.date_time = time.gmtime(node.last_modified)[:6]
-                info.file_size = node.content_length
+                new_path = os.path.join(new_prefix, name)
 
-                # FIXME: Subversion specific! This should really be an
-                #        executable flag provided by Trac's versioncontrol API
-                if 'svn:executable' in node.get_properties():
-                    info.external_attr = 0100755 << 16L
+                copy_base = False
+                if base_root and repos.has_node(node.path, base_root.rev):
+                    base_node = repos.get_node(node.path, base_root.rev)
+                    copy_base = base_node.rev == node.rev
+
+                if copy_base:
+                    # Copy entry from base ZIP file
+                    base_path = os.path.join(base_prefix, name)
+                    base_info = base_zip.getinfo(base_path)
+                    base_info.filename = new_path
+                    new_zip.writestr(base_info, base_zip.read(base_path))
+
                 else:
-                    info.external_attr = 0100644 << 16L
+                    # Create entry from repository
+                    new_info = zipfile.ZipInfo(new_path)
+                    new_info.create_system = 3
+                    new_info.compress_type = zipfile.ZIP_DEFLATED
+                    new_info.date_time = time.gmtime(node.last_modified)[:6]
+                    new_info.file_size = node.content_length
 
-                zip_file.writestr(info, node.get_content().read())
+                    # FIXME: Subversion specific! This should really be an
+                    #        executable flag provided by Trac's versioncontrol
+                    #        API
+                    if 'svn:executable' in node.get_properties():
+                        new_info.external_attr = 0100755 << 16L
+                    else:
+                        new_info.external_attr = 0100644 << 16L
+
+                    new_zip.writestr(new_info, node.get_content().read())
+
         try:
-            _add_entry(root)
+            _add_entry(new_root)
         finally:
-            zip_file.close()
+            new_zip.close()
+            if base_root:
+                base_zip.close()
 
         # Create MD5 checksum file
-        md5sum.write(filepath)
+        md5sum.write(new_filepath)
 
         self._lock.acquire()
         try:
-            self._index.append((os.path.getmtime(filepath), root.rev, filepath))
-            del self._workers[root.rev]
+            self._index.append((os.path.getmtime(new_filepath), new_root.rev,
+                                new_filepath))
+            del self._workers[new_root.rev]
         finally:
             self._lock.release()
-        log.info('Prepared snapshot archive at %s', filepath)
+        log.info('Prepared snapshot archive at %s', new_filepath)
 
     def get(self, rev):
         """Returns the path to an already existing snapshot archive for the
@@ -232,3 +267,33 @@
             return None
         finally:
             self._lock.release()
+
+    def _get_closest_match(self, repos, root):
+        """Determine which existing snapshot archive is closest to the
+        requested repository revision."""
+        self._lock.acquire()
+        try:
+            distances = [] # (distance, rev) tuples
+
+            for mtime, srev, path in self._index:
+                distance = 0
+                srev = repos.normalize_rev(srev)
+                get_next = repos.next_rev
+                if repos.rev_older_than(root.rev, srev):
+                    get_next = repos.previous_rev
+                nrev = srev
+                while nrev != root.rev:
+                    distance += 1
+                    nrev = get_next(nrev)
+                    if nrev is None:
+                        distance = 0
+                        break
+                if distance:
+                    distances.append((distance, srev, path))
+
+            if not distances:
+                return None
+            distances.sort()
+            return distances[0][1:]
+        finally:
+            self._lock.release()
--- a/bitten/tests/snapshot.py
+++ b/bitten/tests/snapshot.py
@@ -158,6 +158,42 @@
         self.assertEqual('foo_r123/', entries[0].filename)
         self.assertEqual('foo_r123/empty/', entries[1].filename)
 
+    def test_get_closest_match_backward(self):
+        path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip'))
+        path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip'))
+
+        empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty')
+        root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir],
+                        path='trunk', rev=125)
+        repos = Mock(get_node=lambda path, rev: root_dir,
+                     normalize_rev=lambda rev: int(rev),
+                     rev_older_than=lambda rev1, rev2: rev1 < rev2,
+                     next_rev=lambda rev: int(rev) + 1,
+                     previous_rev=lambda rev: int(rev) - 1)
+        self.env.get_repository = lambda authname=None: repos
+
+        snapshots = SnapshotManager(self.config)
+        match = snapshots._get_closest_match(repos, root_dir)
+        self.assertEqual((124, path2), match)
+
+    def test_get_closest_match_forward(self):
+        path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip'))
+        path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip'))
+
+        empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty')
+        root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir],
+                        path='trunk', rev=122)
+        repos = Mock(get_node=lambda path, rev: root_dir,
+                     normalize_rev=lambda rev: int(rev),
+                     rev_older_than=lambda rev1, rev2: rev1 < rev2,
+                     next_rev=lambda rev: int(rev) + 1,
+                     previous_rev=lambda rev: int(rev) - 1)
+        self.env.get_repository = lambda authname=None: repos
+
+        snapshots = SnapshotManager(self.config)
+        match = snapshots._get_closest_match(repos, root_dir)
+        self.assertEqual((123, path1), match)
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software