# HG changeset patch # User cmlenz # Date 1128258123 0 # Node ID bc7b772360110d9f12b71a5c1ff64290ffd3f07d # Parent 832e64330c312d3389986599928b3c992d1917da Add MD5-based integrity checks for the snapshot archives maintained by the build master. If an archive is corrupted (for example by interruption of the archive creation), the build master will detect this because the MD5 checksum file is either missing, or does not match. Closes #56. Thanks to Chandler Carruth for the suggestion! diff --git a/bitten/queue.py b/bitten/queue.py --- a/bitten/queue.py +++ b/bitten/queue.py @@ -81,11 +81,28 @@ # Paths to generated snapshot archives, key is (config name, revision) self.snapshots = {} + + # Populate the snapshots index with existing archive files for config in BuildConfig.select(self.env): snapshots = archive.index(self.env, prefix=config.name) for rev, format, path in snapshots: self.snapshots[(config.name, rev, format)] = path + # Clear any files in the snapshots directory that aren't in the archive + # index. Those may be archives without corresponding checksum files, + # i.e. here the creation of the snapshot was interrupted + snapshots_dir = os.path.join(self.env.path, 'snapshots') + for filename in os.listdir(snapshots_dir): + filepath = os.path.join(snapshots_dir, filename) + if filepath.endswith('.md5'): + if filepath[:-4] not in self.snapshots.values(): + os.remove(filepath) + else: + if filepath not in self.snapshots.values(): + log.info('Removing file %s (not a valid snapshot archive)', + filename) + os.remove(filepath) + self.reset_orphaned_builds() self.remove_unused_snapshots() @@ -183,8 +200,11 @@ snapshot = self.snapshots.get((build.config, build.rev, format)) if create and snapshot is None: config = BuildConfig.fetch(self.env, build.config) + log.debug('Preparing snapshot archive for %s@%s' % (config.path, + build.rev)) snapshot = archive.pack(self.env, path=config.path, rev=build.rev, - prefix=config.name, format=format) + prefix=config.name, format=format, + overwrite=True) log.info('Prepared snapshot archive at %s' % snapshot) self.snapshots[(build.config, build.rev, format)] = snapshot return snapshot @@ -207,6 +227,8 @@ if not keep: log.info('Removing unused snapshot %s', path) os.remove(path) + if os.path.isfile(path + '.md5'): + os.remove(path + '.md5') del self.snapshots[(config, rev, format)] # Slave registry diff --git a/bitten/tests/queue.py b/bitten/tests/queue.py --- a/bitten/tests/queue.py +++ b/bitten/tests/queue.py @@ -15,6 +15,7 @@ from trac.test import EnvironmentStub from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep, schema from bitten.queue import BuildQueue +from bitten.util import archive class BuildQueueTestCase(unittest.TestCase): @@ -178,6 +179,11 @@ snapshot = os.path.join(self.env.path, 'snapshots', 'test_r123.zip') snapshot_file = file(snapshot, 'w') snapshot_file.close() + md5sum_file = file(snapshot + '.md5', 'w') + try: + md5sum_file.write(archive._make_md5sum(snapshot)) + finally: + md5sum_file.close() queue = BuildQueue(self.env) self.assertEqual(snapshot, queue.get_snapshot(build, 'zip')) diff --git a/bitten/util/archive.py b/bitten/util/archive.py --- a/bitten/util/archive.py +++ b/bitten/util/archive.py @@ -7,17 +7,20 @@ # you should have received as part of this distribution. The terms # are also available at http://bitten.cmlenz.net/wiki/License. +import md5 import os import tarfile import time import zipfile -_formats = {'gzip': ('.tar.gz', 'gz'), 'bzip2': ('.tar.bz2', 'bz2'), - 'zip': ('.zip', None)} class Error(Exception): """Error raised when packing or unpacking a snapshot archive fails.""" + +_formats = {'gzip': ('.tar.gz', 'gz'), 'bzip2': ('.tar.bz2', 'bz2'), + 'zip': ('.zip', None)} + def index(env, prefix): """Generator that yields `(rev, format, path)` tuples for every archive in the environment snapshots directory that match the specified prefix. @@ -39,8 +42,34 @@ continue rev = rest[2:] + expected_md5sum = _make_md5sum(os.path.join(filedir, filename)) + md5sum_path = os.path.join(filedir, filename + '.md5') + if not os.path.isfile(md5sum_path): + continue + md5sum_file = file(md5sum_path) + try: + existing_md5sum = md5sum_file.read() + if existing_md5sum != expected_md5sum: + continue + finally: + md5sum_file.close() + yield rev, format, os.path.join(filedir, filename) +def _make_md5sum(filename): + """Generate an MD5 checksum for the specified file.""" + md5sum = md5.new() + fileobj = file(filename, 'rb') + try: + while True: + chunk = fileobj.read(4096) + if not chunk: + break + md5sum.update(chunk) + finally: + fileobj.close() + return md5sum.hexdigest() + ' ' + filename + def pack(env, repos=None, path=None, rev=None, prefix=None, format='gzip', overwrite=False): """Create a snapshot archive in the specified format.""" @@ -103,6 +132,14 @@ finally: archive.close() + # Create MD5 checksum + md5sum = _make_md5sum(filename) + md5sum_file = file(filename + '.md5', 'w') + try: + md5sum_file.write(md5sum) + finally: + md5sum_file.close() + return filename def unpack(filename, dest_path, format=None): diff --git a/bitten/util/tests/archive.py b/bitten/util/tests/archive.py --- a/bitten/util/tests/archive.py +++ b/bitten/util/tests/archive.py @@ -7,6 +7,7 @@ # you should have received as part of this distribution. The terms # are also available at http://bitten.cmlenz.net/wiki/License. +import md5 import os import shutil import tarfile @@ -28,16 +29,23 @@ def tearDown(self): shutil.rmtree(self.env.path) - def _create_file(self, *path): - filename = os.path.join(self.env.path, *path) - fd = file(filename, 'w') - fd.close() + def _create_file(self, path, create_md5sum=True): + filename = os.path.join(self.env.path, path) + fileobj = file(filename, 'w') + fileobj.close() + if create_md5sum: + md5sum = archive._make_md5sum(filename) + md5sum_file = file(filename + '.md5', 'w') + try: + md5sum_file.write(md5sum) + finally: + md5sum_file.close() return filename def test_index_formats(self): - targz_path = self._create_file('snapshots', 'foo_r123.tar.gz') - tarbz2_path = self._create_file('snapshots', 'foo_r123.tar.bz2') - zip_path = self._create_file('snapshots', 'foo_r123.zip') + targz_path = self._create_file('snapshots/foo_r123.tar.gz') + tarbz2_path = self._create_file('snapshots/foo_r123.tar.bz2') + zip_path = self._create_file('snapshots/foo_r123.zip') index = list(archive.index(self.env, 'foo')) self.assertEqual(3, len(index)) assert ('123', 'gzip', targz_path) in index @@ -45,8 +53,8 @@ assert ('123', 'zip', zip_path) in index def test_index_revs(self): - rev123_path = self._create_file('snapshots', 'foo_r123.tar.gz') - rev124_path = self._create_file('snapshots', 'foo_r124.tar.gz') + rev123_path = self._create_file('snapshots/foo_r123.tar.gz') + rev124_path = self._create_file('snapshots/foo_r124.tar.gz') index = list(archive.index(self.env, 'foo')) self.assertEqual(2, len(index)) assert ('123', 'gzip', rev123_path) in index @@ -57,19 +65,37 @@ self.assertEqual(0, len(index)) def test_index_prefix(self): - path = self._create_file('snapshots', 'foo_r123.tar.gz') - self._create_file('snapshots', 'bar_r123.tar.gz') + path = self._create_file('snapshots/foo_r123.tar.gz') + self._create_file('snapshots/bar_r123.tar.gz') index = list(archive.index(self.env, 'foo')) self.assertEqual(1, len(index)) assert ('123', 'gzip', path) in index def test_index_no_rev(self): - path = self._create_file('snapshots', 'foo_r123.tar.gz') - self._create_file('snapshots', 'foo_map.tar.gz') + path = self._create_file('snapshots/foo_r123.tar.gz') + self._create_file('snapshots/foo_map.tar.gz') index = list(archive.index(self.env, 'foo')) self.assertEqual(1, len(index)) assert ('123', 'gzip', path) in index + def test_index_missing_md5sum(self): + self._create_file('snapshots/foo_r123.tar.gz', create_md5sum=False) + index = list(archive.index(self.env, 'foo')) + self.assertEqual(0, len(index)) + + def test_index_nonmatching_md5sum(self): + path = self._create_file('snapshots/foo_r123.tar.gz', + create_md5sum=False) + md5sum = md5.new('Foo bar') + md5sum_file = file(path + '.md5', 'w') + try: + md5sum_file.write(md5sum.hexdigest() + ' ' + path) + finally: + md5sum_file.close() + + index = list(archive.index(self.env, 'foo')) + self.assertEqual(0, len(index)) + class PackTestCase(unittest.TestCase):