changeset 239:bc7b77236011

Add MD5-based integrity checks for the snapshot archives maintained by the build master. If an archive is corrupted (for example by interruption of the archive creation), the build master will detect this because the MD5 checksum file is either missing, or does not match. Closes #56. Thanks to Chandler Carruth for the suggestion!
author cmlenz
date Sun, 02 Oct 2005 13:02:03 +0000
parents 832e64330c31
children 24e91cbae6e0
files bitten/queue.py bitten/tests/queue.py bitten/util/archive.py bitten/util/tests/archive.py
diffstat 4 files changed, 107 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/bitten/queue.py
+++ b/bitten/queue.py
@@ -81,11 +81,28 @@
 
         # Paths to generated snapshot archives, key is (config name, revision)
         self.snapshots = {}
+
+        # Populate the snapshots index with existing archive files
         for config in BuildConfig.select(self.env):
             snapshots = archive.index(self.env, prefix=config.name)
             for rev, format, path in snapshots:
                 self.snapshots[(config.name, rev, format)] = path
 
+        # Clear any files in the snapshots directory that aren't in the archive
+        # index. Those may be archives without corresponding checksum files,
+        # i.e. here the creation of the snapshot was interrupted
+        snapshots_dir = os.path.join(self.env.path, 'snapshots')
+        for filename in os.listdir(snapshots_dir):
+            filepath = os.path.join(snapshots_dir, filename)
+            if filepath.endswith('.md5'):
+                if filepath[:-4] not in self.snapshots.values():
+                    os.remove(filepath)
+            else:
+                if filepath not in self.snapshots.values():
+                    log.info('Removing file %s (not a valid snapshot archive)',
+                             filename)
+                    os.remove(filepath)
+
         self.reset_orphaned_builds()
         self.remove_unused_snapshots()
 
@@ -183,8 +200,11 @@
         snapshot = self.snapshots.get((build.config, build.rev, format))
         if create and snapshot is None:
             config = BuildConfig.fetch(self.env, build.config)
+            log.debug('Preparing snapshot archive for %s@%s' % (config.path,
+                      build.rev))
             snapshot = archive.pack(self.env, path=config.path, rev=build.rev,
-                                    prefix=config.name, format=format)
+                                    prefix=config.name, format=format,
+                                    overwrite=True)
             log.info('Prepared snapshot archive at %s' % snapshot)
             self.snapshots[(build.config, build.rev, format)] = snapshot
         return snapshot
@@ -207,6 +227,8 @@
             if not keep:
                 log.info('Removing unused snapshot %s', path)
                 os.remove(path)
+                if os.path.isfile(path + '.md5'):
+                    os.remove(path + '.md5')
                 del self.snapshots[(config, rev, format)]
 
     # Slave registry
--- a/bitten/tests/queue.py
+++ b/bitten/tests/queue.py
@@ -15,6 +15,7 @@
 from trac.test import EnvironmentStub
 from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep, schema
 from bitten.queue import BuildQueue
+from bitten.util import archive
 
 
 class BuildQueueTestCase(unittest.TestCase):
@@ -178,6 +179,11 @@
         snapshot = os.path.join(self.env.path, 'snapshots', 'test_r123.zip')
         snapshot_file = file(snapshot, 'w')
         snapshot_file.close()
+        md5sum_file = file(snapshot + '.md5', 'w')
+        try:
+            md5sum_file.write(archive._make_md5sum(snapshot))
+        finally:
+            md5sum_file.close()
 
         queue = BuildQueue(self.env)
         self.assertEqual(snapshot, queue.get_snapshot(build, 'zip'))
--- a/bitten/util/archive.py
+++ b/bitten/util/archive.py
@@ -7,17 +7,20 @@
 # you should have received as part of this distribution. The terms
 # are also available at http://bitten.cmlenz.net/wiki/License.
 
+import md5
 import os
 import tarfile
 import time
 import zipfile
 
-_formats = {'gzip': ('.tar.gz', 'gz'), 'bzip2': ('.tar.bz2', 'bz2'),
-            'zip': ('.zip', None)}
 
 class Error(Exception):
     """Error raised when packing or unpacking a snapshot archive fails."""
 
+
+_formats = {'gzip': ('.tar.gz', 'gz'), 'bzip2': ('.tar.bz2', 'bz2'),
+            'zip': ('.zip', None)}
+
 def index(env, prefix):
     """Generator that yields `(rev, format, path)` tuples for every archive in
     the environment snapshots directory that match the specified prefix.
@@ -39,8 +42,34 @@
             continue
         rev = rest[2:]
 
+        expected_md5sum = _make_md5sum(os.path.join(filedir, filename))
+        md5sum_path = os.path.join(filedir, filename + '.md5')
+        if not os.path.isfile(md5sum_path):
+            continue
+        md5sum_file = file(md5sum_path)
+        try:
+            existing_md5sum = md5sum_file.read()
+            if existing_md5sum != expected_md5sum:
+                continue
+        finally:
+            md5sum_file.close()
+
         yield rev, format, os.path.join(filedir, filename)
 
+def _make_md5sum(filename):
+    """Generate an MD5 checksum for the specified file."""
+    md5sum = md5.new()
+    fileobj = file(filename, 'rb')
+    try:
+        while True:
+            chunk = fileobj.read(4096)
+            if not chunk:
+                break
+            md5sum.update(chunk)
+    finally:
+        fileobj.close()
+    return md5sum.hexdigest() + '  ' + filename
+
 def pack(env, repos=None, path=None, rev=None, prefix=None, format='gzip',
          overwrite=False):
     """Create a snapshot archive in the specified format."""
@@ -103,6 +132,14 @@
     finally:
         archive.close()
 
+    # Create MD5 checksum
+    md5sum = _make_md5sum(filename)
+    md5sum_file = file(filename + '.md5', 'w')
+    try:
+        md5sum_file.write(md5sum)
+    finally:
+        md5sum_file.close()
+
     return filename
 
 def unpack(filename, dest_path, format=None):
--- a/bitten/util/tests/archive.py
+++ b/bitten/util/tests/archive.py
@@ -7,6 +7,7 @@
 # you should have received as part of this distribution. The terms
 # are also available at http://bitten.cmlenz.net/wiki/License.
 
+import md5
 import os
 import shutil
 import tarfile
@@ -28,16 +29,23 @@
     def tearDown(self):
         shutil.rmtree(self.env.path)
 
-    def _create_file(self, *path):
-        filename = os.path.join(self.env.path, *path)
-        fd = file(filename, 'w')
-        fd.close()
+    def _create_file(self, path, create_md5sum=True):
+        filename = os.path.join(self.env.path, path)
+        fileobj = file(filename, 'w')
+        fileobj.close()
+        if create_md5sum:
+            md5sum = archive._make_md5sum(filename)
+            md5sum_file = file(filename + '.md5', 'w')
+            try:
+                md5sum_file.write(md5sum)
+            finally:
+                md5sum_file.close()
         return filename
 
     def test_index_formats(self):
-        targz_path = self._create_file('snapshots', 'foo_r123.tar.gz')
-        tarbz2_path = self._create_file('snapshots', 'foo_r123.tar.bz2')
-        zip_path = self._create_file('snapshots', 'foo_r123.zip')
+        targz_path = self._create_file('snapshots/foo_r123.tar.gz')
+        tarbz2_path = self._create_file('snapshots/foo_r123.tar.bz2')
+        zip_path = self._create_file('snapshots/foo_r123.zip')
         index = list(archive.index(self.env, 'foo'))
         self.assertEqual(3, len(index))
         assert ('123', 'gzip', targz_path) in index
@@ -45,8 +53,8 @@
         assert ('123', 'zip', zip_path) in index
 
     def test_index_revs(self):
-        rev123_path = self._create_file('snapshots', 'foo_r123.tar.gz')
-        rev124_path = self._create_file('snapshots', 'foo_r124.tar.gz')
+        rev123_path = self._create_file('snapshots/foo_r123.tar.gz')
+        rev124_path = self._create_file('snapshots/foo_r124.tar.gz')
         index = list(archive.index(self.env, 'foo'))
         self.assertEqual(2, len(index))
         assert ('123', 'gzip', rev123_path) in index
@@ -57,19 +65,37 @@
         self.assertEqual(0, len(index))
 
     def test_index_prefix(self):
-        path = self._create_file('snapshots', 'foo_r123.tar.gz')
-        self._create_file('snapshots', 'bar_r123.tar.gz')
+        path = self._create_file('snapshots/foo_r123.tar.gz')
+        self._create_file('snapshots/bar_r123.tar.gz')
         index = list(archive.index(self.env, 'foo'))
         self.assertEqual(1, len(index))
         assert ('123', 'gzip', path) in index
 
     def test_index_no_rev(self):
-        path = self._create_file('snapshots', 'foo_r123.tar.gz')
-        self._create_file('snapshots', 'foo_map.tar.gz')
+        path = self._create_file('snapshots/foo_r123.tar.gz')
+        self._create_file('snapshots/foo_map.tar.gz')
         index = list(archive.index(self.env, 'foo'))
         self.assertEqual(1, len(index))
         assert ('123', 'gzip', path) in index
 
+    def test_index_missing_md5sum(self):
+        self._create_file('snapshots/foo_r123.tar.gz', create_md5sum=False)
+        index = list(archive.index(self.env, 'foo'))
+        self.assertEqual(0, len(index))
+
+    def test_index_nonmatching_md5sum(self):
+        path = self._create_file('snapshots/foo_r123.tar.gz',
+                                 create_md5sum=False)
+        md5sum = md5.new('Foo bar')
+        md5sum_file = file(path + '.md5', 'w')
+        try:
+            md5sum_file.write(md5sum.hexdigest() + '  ' + path)
+        finally:
+            md5sum_file.close()
+
+        index = list(archive.index(self.env, 'foo'))
+        self.assertEqual(0, len(index))
+
 
 class PackTestCase(unittest.TestCase):
 
Copyright (C) 2012-2017 Edgewall Software