Mercurial > bitten > bitten-test
changeset 277:1141027071b3
Changes to snapshot archive creation/transmission:
* No format negotiation, always use ZIP archives.
* Archives are created by the build master in a background thread.
* The master no longer keeps all archives for revisions that lack builds, it just keeps the 10 most recently used archives.
author | cmlenz |
---|---|
date | Fri, 14 Oct 2005 12:50:21 +0000 |
parents | 80645a15336b |
children | a4aed338b3c3 |
files | bitten/master.py bitten/queue.py bitten/slave.py bitten/snapshot.py bitten/tests/__init__.py bitten/tests/queue.py bitten/tests/slave.py bitten/tests/snapshot.py bitten/util/archive.py bitten/util/tests/__init__.py bitten/util/tests/archive.py |
diffstat | 11 files changed, 585 insertions(+), 612 deletions(-) [+] |
line wrap: on
line diff
--- a/bitten/master.py +++ b/bitten/master.py @@ -21,7 +21,7 @@ from bitten.model import BuildConfig, Build, BuildStep, BuildLog, Report from bitten.queue import BuildQueue from bitten.trac_ext.main import BuildSystem -from bitten.util import archive, beep, xmlio +from bitten.util import beep, xmlio log = logging.getLogger('bitten.master') @@ -50,23 +50,17 @@ queue.reset_orphaned_builds() beep.Listener.close(self) - def _cleanup(self, when): - for queue in self.queues: - queue.remove_unused_snapshots() - - def _enqueue_builds(self, when): + def _enqueue_builds(self): self.schedule(self.check_interval, self._enqueue_builds) for queue in self.queues: queue.populate() self.schedule(self.check_interval * 0.2, self._initiate_builds) - self.schedule(self.check_interval * 1.8, self._cleanup) - def _initiate_builds(self, when): + def _initiate_builds(self): available_slaves = set([name for name in self.handlers if not self.handlers[name].building]) - for idx, queue in enumerate(self.queues[:]): build, slave = queue.get_next_pending_build(available_slaves) if build: @@ -144,11 +138,8 @@ self.info[child.attr['name'] + '.' + name] = value if not self.master.register(self): - xml = xmlio.Element('error', code=550)[ - 'Nothing for you to build here, please move along' - ] - self.channel.send_err(msgno, beep.Payload(xml)) - return + raise beep.ProtocolError(550, 'Nothing for you to build here, ' + 'please move along') xml = xmlio.Element('ok') self.channel.send_rpy(msgno, beep.Payload(xml)) @@ -157,6 +148,7 @@ log.info('Initiating build of "%s" on slave %s', build.config, self.name) self.building = True + config = BuildConfig.fetch(queue.env, build.config) def handle_reply(cmd, msgno, ansno, payload): if cmd == 'ERR': @@ -170,40 +162,46 @@ return elem = xmlio.parse(payload.body) - assert elem.name == 'proceed' - type = encoding = None - for child in elem.children('accept'): - type, encoding = child.attr['type'], child.attr.get('encoding') - if (type, encoding) in (('application/tar', 'gzip'), - ('application/tar', 'bzip2'), - ('application/tar', None), - ('application/zip', None)): - break - type = None - if not type: - xml = xmlio.Element('error', code=550)[ - 'None of the accepted archive formats supported' - ] - self.channel.send_err(beep.Payload(xml)) - self.building = False - return - self.send_snapshot(queue, build, type, encoding) + if elem.name != 'proceed': + raise beep.ProtocolError(500) - config = BuildConfig.fetch(queue.env, build.config) + snapshots = queue.snapshots[config.name] + snapshot = snapshots.get(build.rev) + if not snapshot: + # Request a snapshot for this build, and schedule a poll + # function that kicks off the snapshot transmission once the + # archive has been completely built + worker = snapshots.create(build.rev) + def _check_snapshot(): + worker.join(.5) + if worker.isAlive(): + self.master.schedule(2, _check_snapshot) + else: + snapshot = snapshots.get(build.rev) + if snapshot is None: + log.error('Failed to create snapshot archive for ' + '%s@%s', config.path, build.rev) + return + self.send_snapshot(queue, build, snapshot) + _check_snapshot() + else: + self.send_snapshot(queue, build, snapshot) + self.channel.send_msg(beep.Payload(config.recipe), handle_reply=handle_reply) - def send_snapshot(self, queue, build, type, encoding): + def send_snapshot(self, queue, build, snapshot): timestamp_delta = 0 if self.master.adjust_timestamps: d = datetime.now() - timedelta(seconds=self.master.check_interval) \ - datetime.fromtimestamp(build.rev_time) - log.info('Warping timestamps by %s' % d) + log.info('Warping timestamps by %s', d) timestamp_delta = d.days * 86400 + d.seconds def handle_reply(cmd, msgno, ansno, payload): if cmd == 'ERR': - assert payload.content_type == beep.BEEP_XML + if payload.content_type != beep.BEEP_XML: + raise beep.ProtocolError(500) elem = xmlio.parse(payload.body) if elem.name == 'error': log.warning('Slave %s refused to start build: %s (%d)', @@ -212,7 +210,8 @@ self.building = False elif cmd == 'ANS': - assert payload.content_type == beep.BEEP_XML + if payload.content_type != beep.BEEP_XML: + raise beep.ProtocolError(500) elem = xmlio.parse(payload.body) if elem.name == 'started': self._build_started(queue, build, elem, timestamp_delta) @@ -229,17 +228,10 @@ elif cmd == 'NUL': self.building = False - snapshot_format = { - ('application/tar', 'bzip2'): 'bzip2', - ('application/tar', 'gzip'): 'gzip', - ('application/tar', None): 'tar', - ('application/zip', None): 'zip', - }[(type, encoding)] - snapshot_path = queue.get_snapshot(build, snapshot_format, create=True) - snapshot_name = os.path.basename(snapshot_path) - message = beep.Payload(file(snapshot_path, 'rb'), content_type=type, - content_disposition=snapshot_name, - content_encoding=encoding) + snapshot_name = os.path.basename(snapshot) + message = beep.Payload(file(snapshot, 'rb'), + content_type='application/zip', + content_disposition=snapshot_name) self.channel.send_msg(message, handle_reply=handle_reply) def _build_started(self, queue, build, elem, timestamp_delta=None): @@ -274,7 +266,7 @@ step.status = BuildStep.FAILURE else: step.status = BuildStep.SUCCESS - step.errors += [err.gettext() for err in elem.children('error')] + step.errors += [error.gettext() for error in elem.children('error')] step.insert(db=db) for idx, log_elem in enumerate(elem.children('log')):
--- a/bitten/queue.py +++ b/bitten/queue.py @@ -13,7 +13,7 @@ import re from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep -from bitten.util import archive +from bitten.snapshot import SnapshotManager log = logging.getLogger('bitten.queue') @@ -82,32 +82,12 @@ self.env = env self.slaves = {} # Sets of slave names keyed by target platform ID - # Paths to generated snapshot archives, key is (config name, revision) + # Snapshot managers, keyed by build config name self.snapshots = {} - - # Populate the snapshots index with existing archive files - for config in BuildConfig.select(self.env): - snapshots = archive.index(self.env, prefix=config.name) - for rev, format, path in snapshots: - self.snapshots[(config.name, rev, format)] = path - - # Clear any files in the snapshots directory that aren't in the archive - # index. Those may be archives without corresponding checksum files, - # i.e. here the creation of the snapshot was interrupted - snapshots_dir = os.path.join(self.env.path, 'snapshots') - for filename in os.listdir(snapshots_dir): - filepath = os.path.join(snapshots_dir, filename) - if filepath.endswith('.md5'): - if filepath[:-4] not in self.snapshots.values(): - os.remove(filepath) - else: - if filepath not in self.snapshots.values(): - log.info('Removing file %s (not a valid snapshot archive)', - filename) - os.remove(filepath) + for config in BuildConfig.select(self.env, include_inactive=True): + self.snapshots[config.name] = SnapshotManager(config) self.reset_orphaned_builds() - self.remove_unused_snapshots() # Build scheduling @@ -193,53 +173,6 @@ build.update(db=db) db.commit() - # Snapshot management - - def get_snapshot(self, build, format, create=False): - """Return the absolute path to a snapshot archive for the given build. - The archive can be created if it doesn't exist yet. - - @param build: The `Build` object - @param format: The archive format (one of `gzip`, `bzip2` or `zip`) - @param create: Whether the archive should be created if it doesn't exist - yet - @return: The absolute path to the create archive file, or None if the - snapshot doesn't exist and wasn't created - """ - snapshot = self.snapshots.get((build.config, build.rev, format)) - if create and snapshot is None: - config = BuildConfig.fetch(self.env, build.config) - log.debug('Preparing snapshot archive for %s@%s' % (config.path, - build.rev)) - snapshot = archive.pack(self.env, path=config.path, rev=build.rev, - prefix=config.name, format=format, - overwrite=True) - log.info('Prepared snapshot archive at %s' % snapshot) - self.snapshots[(build.config, build.rev, format)] = snapshot - return snapshot - - def remove_unused_snapshots(self): - """Find any previously created snapshot archives that are no longer - needed because all corresponding builds have already been completed. - - This method should be called in regular intervals to keep the total - disk space occupied by the snapshot archives to a minimum. - """ - log.debug('Checking for unused snapshot archives...') - - for (config, rev, format), path in self.snapshots.items(): - keep = False - for build in Build.select(self.env, config=config, rev=rev): - if build.status not in (Build.SUCCESS, Build.FAILURE): - keep = True - break - if not keep: - log.info('Removing unused snapshot %s', path) - os.remove(path) - if os.path.isfile(path + '.md5'): - os.remove(path + '.md5') - del self.snapshots[(config, rev, format)] - # Slave registry def register_slave(self, name, properties):
--- a/bitten/slave.py +++ b/bitten/slave.py @@ -18,11 +18,12 @@ from sets import Set as set import shutil import tempfile +import zipfile from bitten.build import BuildError from bitten.build.config import Configuration from bitten.recipe import Recipe, InvalidRecipeError -from bitten.util import archive, beep, xmlio +from bitten.util import beep, xmlio log = logging.getLogger('bitten.slave') @@ -99,28 +100,14 @@ if elem.name == 'build': self.recipe_xml = elem # Received a build request - xml = xmlio.Element('proceed')[ - xmlio.Element('accept', type='application/tar', - encoding='bzip2'), - xmlio.Element('accept', type='application/tar', - encoding='gzip'), - xmlio.Element('accept', type='application/zip') - ] + xml = xmlio.Element('proceed') self.channel.send_rpy(msgno, beep.Payload(xml)) - elif payload.content_type in ('application/tar', 'application/zip'): + elif payload.content_type == 'application/zip': # Received snapshot archive for build archive_name = payload.content_disposition if not archive_name: - if payload.content_type == 'application/tar': - if payload.content_encoding == 'gzip': - archive_name = 'snapshot.tar.gz' - elif payload.content_encoding == 'bzip2': - archive_name = 'snapshot.tar.bz2' - elif not payload.content_encoding: - archive_name = 'snapshot.tar' - else: - archive_name = 'snapshot.zip' + archive_name = 'snapshot.zip' archive_path = os.path.join(self.session.work_dir, archive_name) archive_file = file(archive_path, 'wb') @@ -129,32 +116,54 @@ finally: archive_file.close() os.chmod(archive_path, 0400) - - log.debug('Received snapshot archive: %s', archive_path) - - # Unpack the archive - try: - prefix = archive.unpack(archive_path, self.session.work_dir) - path = os.path.join(self.session.work_dir, prefix) - os.chmod(path, 0700) - log.debug('Unpacked snapshot to %s' % path) - except archive.Error, e: - xml = xmlio.Element('error', code=550)[ - 'Could not unpack archive (%s)' % e - ] - self.channel.send_err(msgno, beep.Payload(xml)) - log.error('Could not unpack archive %s: %s', archive_path, e, - exc_info=True) - return + basedir = self.unpack_snapshot(msgno, archive_path) try: - recipe = Recipe(self.recipe_xml, path, self.config) + recipe = Recipe(self.recipe_xml, basedir, self.config) self.execute_build(msgno, recipe) finally: if not self.session.keep_files: - shutil.rmtree(path) + shutil.rmtree(basedir) os.remove(archive_path) + def unpack_snapshot(self, msgno, path): + """Unpack a snapshot archive.""" + log.debug('Received snapshot archive: %s', path) + try: + zip = zipfile.ZipFile(path, 'r') + badfile = zip.testzip() + if badfile: + raise ProtocolError(550, 'Corrupt ZIP archive: invalid CRC ' + 'for %s' % badfile) + try: + names = [] + for name in zip.namelist(): + names.append(name) + path = os.path.join(self.session.work_dir, name) + if name.endswith('/'): + os.makedirs(path) + else: + dirname = os.path.dirname(path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + fileobj = file(path, 'wb') + try: + fileobj.write(zip.read(name)) + finally: + fileobj.close() + finally: + zip.close() + + path = os.path.join(self.session.work_dir, + os.path.commonprefix(names)) + os.chmod(path, 0700) + log.debug('Unpacked snapshot to %s' % path) + return path + + except (IOError, zipfile.error), e: + log.error('Could not unpack archive %s: %s', path, e, exc_info=True) + raise beep.ProtocolError(550, 'Could not unpack archive (%s)' % e) + def execute_build(self, msgno, recipe): log.info('Building in directory %s', recipe.ctxt.basedir) try:
new file mode 100644 --- /dev/null +++ b/bitten/snapshot.py @@ -0,0 +1,231 @@ +# -*- coding: iso8859-1 -*- +# +# Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://bitten.cmlenz.net/wiki/License. + +"""Snapshot archive management. + +Snapshots of the code base are stored in the Trac environment as ZIP files. + +These files use the naming convention `[config_name]_r[revision].zip` so they +can be located programmatically after creation, and associated with the build +config and revision they apply to. + +These snapshot files are accompanied by a checksum file (using MD5). Any archive +file with no accompanying checksum is considered incomplete or invalid. + +For larger code bases, these snapshots may be relatively expensive to create. +Most of the time is spent in walking the repository directory and reading the +files it contains. To avoid blocking the build master while snapshots are +created, this is done in a worker thread. The main thread polls the snapshots +directory to find the snapshots that have been completely created (including +the corresponding checksum file). + +As snapshot archives are often very similar to each other for subsequent +revisions, an attempt is made to avoid the creation of new archives from +scratch. Instead, the build master keeps the most recently used archives (MRU +cache) and will build new archives based on the deltas provided by the version +control system. Using the nearest existing snapshot as the base, deleted files +and directories are removed from the snapshot, added files/directories are +added, and modified files are updated. +""" + +import logging +import md5 +import os +try: + import threading +except ImportError: + import dummy_threading as threading +import time +import zipfile + +log = logging.getLogger('bitten.snapshot') + +MAX_SNAPSHOTS = 10 +SNAPSHOTS_DIR = 'snapshots' + +def _make_md5sum(filename): + """Generate an MD5 checksum for the specified file.""" + md5sum = md5.new() + fileobj = file(filename, 'rb') + try: + while True: + chunk = fileobj.read(4096) + if not chunk: + break + md5sum.update(chunk) + finally: + fileobj.close() + return md5sum.hexdigest() + ' ' + filename + + +class SnapshotManager(object): + """Manages snapshot archives for a specific build configuration.""" + + def __init__(self, config): + """Create the snapshot manager. + + @param config: The `BuildConfig` instance + """ + assert config and config.exists, 'Build configuration does not exist' + self.env = config.env + self.config = config + + self.prefix = config.name + self.directory = self.env.config.get('bitten', 'snapshots_dir', + os.path.join(self.env.path, + SNAPSHOTS_DIR)) + self.limit = int(self.env.config.get('bitten', 'max_snapshots', + MAX_SNAPSHOTS)) + + # Create the snapshots directory if it doesn't already exist + if not os.path.exists(self.directory): + os.mkdir(self.directory) + + # Make sure we have permissions to write to the directory + if not os.access(self.directory, os.R_OK + os.W_OK): + raise IOError, 'Insufficient permissions to create snapshots in ' \ + + self.directory + + # Collect a list of all existing snapshot archives + self._index = [] + for snapshot in self._scan(): + self._index.append(snapshot) + self._lock = threading.RLock() + self._cleanup() + + def _scan(self): + """Find all existing snapshots in the directory.""" + for filename in [f for f in os.listdir(self.directory) + if f.startswith(self.prefix)]: + if not filename.endswith('.zip'): + continue + rest = filename[len(self.prefix):-4] + if not rest.startswith('_r'): + continue + rev = rest[2:] + + filepath = os.path.join(self.directory, filename) + expected_md5sum = _make_md5sum(filepath) + md5sum_path = os.path.join(self.directory, + filename[:-4] + '.md5') + if not os.path.isfile(md5sum_path): + continue + md5sum_file = file(md5sum_path) + try: + existing_md5sum = md5sum_file.read() + if existing_md5sum != expected_md5sum: + continue + finally: + md5sum_file.close() + + mtime = os.path.getmtime(filepath) + + yield mtime, rev, filepath + + def _cleanup(self, limit=None): + """Remove obsolete snapshots to preserve disk space.""" + self._lock.acquire() + try: + self._index.sort(lambda a, b: -cmp(a[0], b[0])) + limit = limit or self.limit + if len(self._index) > limit: + for mtime, rev, path in self._index[limit:]: + log.debug('Removing snapshot %s', path) + os.remove(path) + os.remove(path[:-4] + '.md5') + self._index = self._index[:limit] + finally: + self._lock.release() + + def create(self, rev): + """Create a new snapshot archive for the specified revision. + + The archive is created in a worker thread. The return value of this + function is the thread object. The caller may use this object to check + for completion of the operation. + """ + prefix = self.prefix + '_r' + str(rev) + filename = prefix + '.zip' + filepath = os.path.join(self.directory, filename) + if os.path.exists(filepath): + raise IOError, 'Snapshot file already exists at %s' % filepath + + repos = self.env.get_repository() + root = repos.get_node(self.config.path or '/', rev) + assert root.isdir, '"%s" is not a directory' % self.config.path + + self._cleanup(self.limit - 1) + + worker = threading.Thread(target=self._create, + args=(prefix, root, filepath), + name='Create snapshot %s' % filename) + worker.start() + return worker + + def _create(self, prefix, root, filepath): + """Actually create a snapshot archive. + + This is used internally from the `create()` function and executed in a + worker thread. + """ + log.debug('Preparing snapshot archive for %s@%s', root.path, root.rev) + + zip = zipfile.ZipFile(filepath, 'w', zipfile.ZIP_DEFLATED) + def _add_entry(node): + name = node.path[len(self.config.path):] + if name.startswith('/'): + name = name[1:] + if node.isdir: + path = os.path.join(prefix, name).rstrip('/\\') + '/' + info = zipfile.ZipInfo(path) + zip.writestr(info, '') + for entry in node.get_entries(): + _add_entry(entry) + time.sleep(.5) # be nice + else: + path = os.path.join(prefix, name) + info = zipfile.ZipInfo(path) + info.compress_type = zipfile.ZIP_DEFLATED + info.date_time = time.gmtime(node.last_modified)[:6] + info.file_size = node.content_length + zip.writestr(info, node.get_content().read()) + try: + _add_entry(root) + finally: + zip.close() + + # Create MD5 checksum + md5sum = _make_md5sum(filepath) + md5sum_file = file(filepath[:-4] + '.md5', 'w') + try: + md5sum_file.write(md5sum) + finally: + md5sum_file.close() + + self._lock.acquire() + try: + self._index.append((os.path.getmtime(filepath), root.rev, filepath)) + finally: + self._lock.release() + log.info('Prepared snapshot archive at %s', filepath) + + def get(self, rev): + """Returns the path to an already existing snapshot archive for the + specified revision. + + If no snapshot exists for the revision, this function returns `None`. + """ + self._lock.acquire() + try: + for mtime, srev, path in self._index: + if str(rev) == str(srev): + return path + return None + finally: + self._lock.release()
--- a/bitten/tests/__init__.py +++ b/bitten/tests/__init__.py @@ -9,7 +9,7 @@ import unittest -from bitten.tests import model, recipe, queue +from bitten.tests import model, recipe, queue, slave, snapshot from bitten.build import tests as build from bitten.util import tests as util from bitten.trac_ext import tests as trac_ext @@ -19,6 +19,8 @@ suite.addTest(model.suite()) suite.addTest(recipe.suite()) suite.addTest(queue.suite()) + suite.addTest(slave.suite()) + suite.addTest(snapshot.suite()) suite.addTest(build.suite()) suite.addTest(trac_ext.suite()) suite.addTest(util.suite())
--- a/bitten/tests/queue.py +++ b/bitten/tests/queue.py @@ -15,7 +15,6 @@ from trac.test import EnvironmentStub, Mock from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep, schema from bitten.queue import BuildQueue, collect_changes -from bitten.util import archive class CollectChangesTestCase(unittest.TestCase): @@ -280,23 +279,6 @@ self.assertEqual({}, build.slave_info) self.assertEqual(0, build.started) - def test_get_existing_snapshot(self): - BuildConfig(self.env, 'test', active=True).insert() - build = Build(self.env, config='test', platform=1, rev=123, rev_time=42, - status=Build.PENDING) - build.insert() - snapshot = os.path.join(self.env.path, 'snapshots', 'test_r123.zip') - snapshot_file = file(snapshot, 'w') - snapshot_file.close() - md5sum_file = file(snapshot + '.md5', 'w') - try: - md5sum_file.write(archive._make_md5sum(snapshot)) - finally: - md5sum_file.close() - - queue = BuildQueue(self.env) - self.assertEqual(snapshot, queue.get_snapshot(build, 'zip')) - def suite(): suite = unittest.TestSuite()
new file mode 100644 --- /dev/null +++ b/bitten/tests/slave.py @@ -0,0 +1,77 @@ +# -*- coding: iso8859-1 -*- +# +# Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://bitten.cmlenz.net/wiki/License. + +import os +import shutil +import tempfile +import unittest + + +import md5 +import os +import shutil +import tempfile +import unittest +import zipfile + +from trac.test import Mock +from bitten.slave import Slave, OrchestrationProfileHandler +from bitten.util.beep import ProtocolError + + +class OrchestrationProfileHandlerTestCase(unittest.TestCase): + + def setUp(self): + self.work_dir = tempfile.mkdtemp(prefix='bitten_test') + self.slave = Slave(None, None) + self.handler = OrchestrationProfileHandler(Mock(session=self.slave)) + + def tearDown(self): + shutil.rmtree(self.work_dir) + + def _create_file(self, *path): + filename = os.path.join(self.work_dir, *path) + fd = file(filename, 'w') + fd.close() + return filename + + def test_unpack_invalid_zip_1(self): + """ + Verify handling of `IOError` exceptions when trying to unpack an + invalid ZIP file. + + The `zipfile` module will actually raise an `IOError` instead of a + `zipfile.error` here because it'll try to seek past the beginning of + the file. + """ + path = self._create_file('invalid.zip') + zip = file(path, 'w') + zip.write('INVALID') + zip.close() + self.assertRaises(ProtocolError, self.handler.unpack_snapshot, 0, path) + + def test_unpack_invalid_zip_2(self): + """ + Verify handling of `zip.error` exceptions when trying to unpack an + invalid ZIP file. + """ + path = self._create_file('invalid.zip') + zip = file(path, 'w') + zip.write('INVALIDINVALIDINVALIDINVALIDINVALIDINVALID') + zip.close() + self.assertRaises(ProtocolError, self.handler.unpack_snapshot, 0, path) + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(OrchestrationProfileHandlerTestCase, + 'test')) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite')
new file mode 100644 --- /dev/null +++ b/bitten/tests/snapshot.py @@ -0,0 +1,182 @@ +# -*- coding: iso8859-1 -*- +# +# Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://bitten.cmlenz.net/wiki/License. + +import os +import shutil +import tempfile +import unittest + + +import md5 +import os +import shutil +import tempfile +import unittest +import zipfile + +from trac.test import EnvironmentStub, Mock +from bitten.model import BuildConfig +from bitten.snapshot import SnapshotManager, _make_md5sum + + +class SnapshotManagerTestCase(unittest.TestCase): + + def setUp(self): + self.env = EnvironmentStub() + self.env.path = tempfile.mkdtemp(prefix='bitten_test') + os.mkdir(os.path.join(self.env.path, 'snapshots')) + db = self.env.get_db_cnx() + cursor = db.cursor() + for table in BuildConfig._schema: + for stmt in db.to_sql(table): + cursor.execute(stmt) + db.commit() + self.config = BuildConfig(self.env, name='foo', path='trunk') + self.config.insert() + + def tearDown(self): + shutil.rmtree(self.env.path) + + def _create_file(self, path, create_md5sum=True): + filename = os.path.join(self.env.path, path) + fileobj = file(filename, 'w') + fileobj.close() + if create_md5sum: + md5sum = _make_md5sum(filename) + md5sum_file = file(filename[:-4] + '.md5', 'w') + try: + md5sum_file.write(md5sum) + finally: + md5sum_file.close() + return filename + + def test_empty(self): + snapshots = SnapshotManager(self.config) + self.assertEqual([], snapshots._index) + self.assertEqual(None, snapshots.get(123)) + + def test_get(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(path2, snapshots.get(124)) + + def test_get_prefix_match(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + self._create_file(os.path.join('snapshots', 'bar_r124.zip')) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(None, snapshots.get(124)) + + def test_get_wrong_extension(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + self._create_file(os.path.join('snapshots', 'foo_r124.doc')) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(None, snapshots.get(124)) + + def test_get_missing_rev(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + self._create_file(os.path.join('snapshots', 'foo124.doc')) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(None, snapshots.get(124)) + + def test_get_missing_md5sum(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + self._create_file(os.path.join('snapshots', 'foo_r124.zip'), + create_md5sum=False) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(None, snapshots.get(124)) + + def test_get_wrong_md5sum(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip'), + create_md5sum=False) + md5sum = md5.new('Foo bar') + md5sum_file = file(path2[:-4] + '.md5', 'w') + try: + md5sum_file.write(md5sum.hexdigest() + ' ' + path2) + finally: + md5sum_file.close() + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(None, snapshots.get(124)) + + def test_cleanup_on_init(self): + self.env.config.set('bitten', 'max_snapshots', '3') + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + path3 = self._create_file(os.path.join('snapshots', 'foo_r125.zip')) + self._create_file(os.path.join('snapshots', 'foo_r126.zip')) + snapshots = SnapshotManager(self.config) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(path2, snapshots.get(124)) + self.assertEqual(path3, snapshots.get(125)) + self.assertEqual(None, snapshots.get(126)) + + def test_cleanup_explicit(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + path3 = self._create_file(os.path.join('snapshots', 'foo_r125.zip')) + snapshots = SnapshotManager(self.config) + path4 = self._create_file(os.path.join('snapshots', 'foo_r126.zip')) + snapshots._index.append((os.path.getmtime(path4), 126, path4)) + snapshots._cleanup(3) + self.assertEqual(path1, snapshots.get(123)) + self.assertEqual(path2, snapshots.get(124)) + self.assertEqual(path3, snapshots.get(125)) + self.assertEqual(None, snapshots.get(126)) + + def test_create_not_a_directory(self): + repos = Mock(get_node=lambda path, rev: Mock(isdir=False)) + self.env.get_repository = lambda authname=None: repos + snapshots = SnapshotManager(self.config) + self.assertRaises(AssertionError, snapshots.create, 123) + + def test_create_empty(self): + root_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk', + rev=123) + repos = Mock(get_node=lambda path, rev: root_dir) + self.env.get_repository = lambda authname=None: repos + snapshots = SnapshotManager(self.config) + snapshots.create(123).join() + path = snapshots.get(123) + assert path is not None + assert path.endswith('foo_r123.zip') + entries = zipfile.ZipFile(path, 'r').infolist() + self.assertEqual(1, len(entries)) + self.assertEqual('foo_r123/', entries[0].filename) + + def test_create_empty_dir(self): + empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') + root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir], + path='trunk', rev=123) + repos = Mock(get_node=lambda path, rev: root_dir) + self.env.get_repository = lambda authname=None: repos + snapshots = SnapshotManager(self.config) + snapshots.create(123).join() + path = snapshots.get(123) + assert path is not None + assert path.endswith('foo_r123.zip') + entries = zipfile.ZipFile(path, 'r').infolist() + self.assertEqual(2, len(entries)) + self.assertEqual('foo_r123/', entries[0].filename) + self.assertEqual('foo_r123/empty/', entries[1].filename) + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(SnapshotManagerTestCase, 'test')) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite')
deleted file mode 100644 --- a/bitten/util/archive.py +++ /dev/null @@ -1,194 +0,0 @@ -# -*- coding: iso8859-1 -*- -# -# Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://bitten.cmlenz.net/wiki/License. - -import md5 -import os -import tarfile -import time -import zipfile - - -class Error(Exception): - """Error raised when packing or unpacking a snapshot archive fails.""" - - -_formats = {'gzip': ('.tar.gz', 'gz'), 'bzip2': ('.tar.bz2', 'bz2'), - 'zip': ('.zip', None)} - -def index(env, prefix): - """Generator that yields `(rev, format, path)` tuples for every archive in - the environment snapshots directory that match the specified prefix. - """ - filedir = os.path.join(env.path, 'snapshots') - for filename in [f for f in os.listdir(filedir) if f.startswith(prefix)]: - rest = filename[len(prefix):] - - # Determine format based of file extension - format = None - for name, (extension, _) in _formats.items(): - if rest.endswith(extension): - rest = rest[:-len(extension)] - format = name - if not format: - continue - - if not rest.startswith('_r'): - continue - rev = rest[2:] - - expected_md5sum = _make_md5sum(os.path.join(filedir, filename)) - md5sum_path = os.path.join(filedir, filename + '.md5') - if not os.path.isfile(md5sum_path): - continue - md5sum_file = file(md5sum_path) - try: - existing_md5sum = md5sum_file.read() - if existing_md5sum != expected_md5sum: - continue - finally: - md5sum_file.close() - - yield rev, format, os.path.join(filedir, filename) - -def _make_md5sum(filename): - """Generate an MD5 checksum for the specified file.""" - md5sum = md5.new() - fileobj = file(filename, 'rb') - try: - while True: - chunk = fileobj.read(4096) - if not chunk: - break - md5sum.update(chunk) - finally: - fileobj.close() - return md5sum.hexdigest() + ' ' + filename - -def pack(env, repos=None, path=None, rev=None, prefix=None, format='gzip', - overwrite=False): - """Create a snapshot archive in the specified format.""" - if format not in _formats: - raise Error, 'Unknown archive format: %s' % format - - if repos is None: - repos = env.get_repository() - root = repos.get_node(path or '/', rev) - if not root.isdir: - raise Error, '"%s" is not a directory' % path - - filedir = os.path.join(env.path, 'snapshots') - if not os.access(filedir, os.R_OK + os.W_OK): - raise Error, 'Insufficient permissions to create tarball' - if not prefix: - prefix = root.path.replace('/', '-') - prefix += '_r%s' % root.rev - filename = os.path.join(filedir, prefix + _formats[format][0]) - - if not overwrite and os.path.isfile(filename): - return filename - - if format in ('bzip2', 'gzip'): - archive = tarfile.open(filename, 'w:' + _formats[format][1]) - else: - archive = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) - - def _add_entry(node): - name = node.path[len(root.path):] - if name.startswith('/'): - name = name[1:] - if node.isdir: - if format == 'zip': - dirpath = os.path.join(prefix, name).rstrip('/\\') + '/' - info = zipfile.ZipInfo(dirpath) - archive.writestr(info, '') - for entry in node.get_entries(): - _add_entry(entry) - elif format in ('bzip2', 'gzip'): - try: - info = tarfile.TarInfo(os.path.join(prefix, name)) - info.type = tarfile.REGTYPE - info.mtime = node.last_modified - info.size = node.content_length - archive.addfile(info, node.get_content()) - except tarfile.TarError, e: - raise Error, e - else: # ZIP format - try: - info = zipfile.ZipInfo(os.path.join(prefix, name)) - info.compress_type = zipfile.ZIP_DEFLATED - info.date_time = time.gmtime(node.last_modified)[:6] - info.file_size = node.content_length - archive.writestr(info, node.get_content().read()) - except zipfile.error, e: - raise Error, e - try: - _add_entry(root) - finally: - archive.close() - - # Create MD5 checksum - md5sum = _make_md5sum(filename) - md5sum_file = file(filename + '.md5', 'w') - try: - md5sum_file.write(md5sum) - finally: - md5sum_file.close() - - return filename - -def unpack(filename, dest_path, format=None): - """Extract the contents of a snapshot archive.""" - if not format: - for name, (extension, _) in _formats.items(): - if filename.endswith(extension): - format = name - break - if not format: - raise Error, 'Unkown archive extension: %s' \ - % os.path.splitext(filename)[1] - - names = [] - if format in ('bzip2', 'gzip'): - try: - tar_file = tarfile.open(filename) - try: - tar_file.chown = lambda *args: None # Don't chown extracted members - for tarinfo in tar_file: - if tarinfo.isfile() or tarinfo.isdir(): - if tarinfo.name.startswith('/') or '..' in tarinfo.name: - continue - names.append(tarinfo.name) - tar_file.extract(tarinfo, dest_path) - finally: - tar_file.close() - except tarfile.TarError, e: - raise Error, e - elif format == 'zip': - try: - zip_file = zipfile.ZipFile(filename, 'r') - try: - for name in zip_file.namelist(): - names.append(name) - path = os.path.join(dest_path, name) - if name.endswith('/'): - os.makedirs(path) - else: - dirname = os.path.dirname(path) - if not os.path.isdir(dirname): - os.makedirs(dirname) - dest_file = file(path, 'wb') - try: - dest_file.write(zip_file.read(name)) - finally: - dest_file.close() - finally: - zip_file.close() - except (IOError, zipfile.error), e: - raise Error, e - return os.path.commonprefix(names)
--- a/bitten/util/tests/__init__.py +++ b/bitten/util/tests/__init__.py @@ -11,11 +11,10 @@ import unittest from bitten.util import xmlio -from bitten.util.tests import archive, beep +from bitten.util.tests import beep def suite(): suite = unittest.TestSuite() - suite.addTest(archive.suite()) suite.addTest(beep.suite()) suite.addTest(doctest.DocTestSuite(xmlio)) return suite
deleted file mode 100644 --- a/bitten/util/tests/archive.py +++ /dev/null @@ -1,240 +0,0 @@ -# -*- coding: iso8859-1 -*- -# -# Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://bitten.cmlenz.net/wiki/License. - -import md5 -import os -import shutil -import tarfile -import tempfile -import unittest -import zipfile - -from trac.test import EnvironmentStub, Mock -from bitten.util import archive - - -class IndexTestCase(unittest.TestCase): - - def setUp(self): - self.env = EnvironmentStub() - self.env.path = tempfile.mkdtemp(prefix='bitten_test') - os.mkdir(os.path.join(self.env.path, 'snapshots')) - - def tearDown(self): - shutil.rmtree(self.env.path) - - def _create_file(self, path, create_md5sum=True): - filename = os.path.join(self.env.path, path) - fileobj = file(filename, 'w') - fileobj.close() - if create_md5sum: - md5sum = archive._make_md5sum(filename) - md5sum_file = file(filename + '.md5', 'w') - try: - md5sum_file.write(md5sum) - finally: - md5sum_file.close() - return filename - - def test_index_formats(self): - targz_path = self._create_file(os.path.join('snapshots', - 'foo_r123.tar.gz')) - tarbz2_path = self._create_file(os.path.join('snapshots', - 'foo_r123.tar.bz2')) - zip_path = self._create_file(os.path.join('snapshots', - 'foo_r123.zip')) - index = list(archive.index(self.env, 'foo')) - self.assertEqual(3, len(index)) - assert ('123', 'gzip', targz_path) in index - assert ('123', 'bzip2', tarbz2_path) in index - assert ('123', 'zip', zip_path) in index - - def test_index_revs(self): - rev123_path = self._create_file(os.path.join('snapshots', - 'foo_r123.tar.gz')) - rev124_path = self._create_file(os.path.join('snapshots', - 'foo_r124.tar.gz')) - index = list(archive.index(self.env, 'foo')) - self.assertEqual(2, len(index)) - assert ('123', 'gzip', rev123_path) in index - assert ('124', 'gzip', rev124_path) in index - - def test_index_empty(self): - index = list(archive.index(self.env, 'foo')) - self.assertEqual(0, len(index)) - - def test_index_prefix(self): - path = self._create_file(os.path.join('snapshots', 'foo_r123.tar.gz')) - self._create_file(os.path.join('snapshots', 'bar_r123.tar.gz')) - index = list(archive.index(self.env, 'foo')) - self.assertEqual(1, len(index)) - assert ('123', 'gzip', path) in index - - def test_index_no_rev(self): - path = self._create_file(os.path.join('snapshots', 'foo_r123.tar.gz')) - self._create_file(os.path.join('snapshots', 'foo_map.tar.gz')) - index = list(archive.index(self.env, 'foo')) - self.assertEqual(1, len(index)) - assert ('123', 'gzip', path) in index - - def test_index_missing_md5sum(self): - self._create_file(os.path.join('snapshots', 'foo_r123.tar.gz'), - create_md5sum=False) - index = list(archive.index(self.env, 'foo')) - self.assertEqual(0, len(index)) - - def test_index_nonmatching_md5sum(self): - path = self._create_file(os.path.join('snapshots', 'foo_r123.tar.gz'), - create_md5sum=False) - md5sum = md5.new('Foo bar') - md5sum_file = file(path + '.md5', 'w') - try: - md5sum_file.write(md5sum.hexdigest() + ' ' + path) - finally: - md5sum_file.close() - - index = list(archive.index(self.env, 'foo')) - self.assertEqual(0, len(index)) - - -class PackTestCase(unittest.TestCase): - - def setUp(self): - self.env = EnvironmentStub() - self.env.path = tempfile.mkdtemp(prefix='bitten_test') - os.mkdir(os.path.join(self.env.path, 'snapshots')) - - def tearDown(self): - shutil.rmtree(self.env.path) - - def _create_file(self, *path): - filename = os.path.join(self.env.path, *path) - fd = file(filename, 'w') - fd.close() - return filename - - def test_pack_unknown_format(self): - self.assertRaises(archive.Error, archive.pack, self.env, format='foo') - - def test_pack_not_a_directory(self): - repos = Mock(get_node=lambda path, rev: Mock(isdir=False)) - self.assertRaises(archive.Error, archive.pack, self.env, repos) - - if not hasattr(os, 'geteuid') or os.geteuid() != 0: - def test_pack_insufficient_perms(self): - try: - os.chmod(os.path.join(self.env.path, 'snapshots'), 0500) - repos = Mock(get_node=lambda path, rev: Mock(isdir=True)) - self.assertRaises(archive.Error, archive.pack, self.env, repos) - finally: - # Revert permissions, otherwise the environment directory can't - # be deleted on windows - os.chmod(os.path.join(self.env.path, 'snapshots'), 0700) - - def test_pack_tarbz2_empty(self): - root_dir = Mock(isdir=True, get_entries=lambda: [], path='', rev=123) - repos = Mock(get_node=lambda path, rev: root_dir) - path = archive.pack(self.env, repos, format='bzip2') - assert path.endswith('_r123.tar.bz2') - - def test_pack_targz_empty(self): - root_dir = Mock(isdir=True, get_entries=lambda: [], path='', rev=123) - repos = Mock(get_node=lambda path, rev: root_dir) - path = archive.pack(self.env, repos, format='gzip') - assert path.endswith('_r123.tar.gz') - - def test_pack_zip_empty(self): - root_dir = Mock(isdir=True, get_entries=lambda: [], path='', rev=123) - repos = Mock(get_node=lambda path, rev: root_dir) - path = archive.pack(self.env, repos, format='zip') - assert path.endswith('_r123.zip') - entries = zipfile.ZipFile(path, 'r').infolist() - self.assertEqual(1, len(entries)) - self.assertEqual('_r123/', entries[0].filename) - - def test_pack_zip_empty_dir(self): - empty_dir = Mock(isdir=True, get_entries=lambda: [], path='empty') - root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir], - path='', rev=123) - repos = Mock(get_node=lambda path, rev: root_dir) - path = archive.pack(self.env, repos, format='zip') - entries = zipfile.ZipFile(path, 'r').infolist() - self.assertEqual(2, len(entries)) - self.assertEqual('_r123/', entries[0].filename) - self.assertEqual('_r123/empty/', entries[1].filename) - - -class UnpackTestCase(unittest.TestCase): - - def setUp(self): - self.workdir = tempfile.mkdtemp(prefix='bitten_test') - - def tearDown(self): - shutil.rmtree(self.workdir) - - def _create_file(self, *path): - filename = os.path.join(self.workdir, *path) - fd = file(filename, 'w') - fd.close() - return filename - - def test_unpack_unknown_format(self): - self.assertRaises(archive.Error, archive.unpack, 'test.foo', - self.workdir) - - def test_unpack_invalid_tar_gz(self): - path = self._create_file('invalid.tar.gz') - targz = file(path, 'w') - targz.write('INVALID') - targz.close() - self.assertRaises(archive.Error, archive.unpack, path, self.workdir) - - def test_unpack_invalid_tar_bz2(self): - path = self._create_file('invalid.tar.bz2') - tarbz2 = file(path, 'w') - tarbz2.write('INVALID') - tarbz2.close() - self.assertRaises(archive.Error, archive.unpack, path, self.workdir) - - def test_unpack_invalid_zip_1(self): - """ - Verify handling of `IOError` exceptions when trying to unpack an - invalid ZIP file. - - The `zipfile` module will actually raise an `IOError` instead of a - `zipfile.error` here because it'll try to seek past the beginning of - the file. - """ - path = self._create_file('invalid.zip') - zip = file(path, 'w') - zip.write('INVALID') - zip.close() - self.assertRaises(archive.Error, archive.unpack, path, self.workdir) - - def test_unpack_invalid_zip_2(self): - """ - Verify handling of `zip.error` exceptions when trying to unpack an - invalid ZIP file. - """ - path = self._create_file('invalid.zip') - zip = file(path, 'w') - zip.write('INVALIDINVALIDINVALIDINVALIDINVALIDINVALID') - zip.close() - self.assertRaises(archive.Error, archive.unpack, path, self.workdir) - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(IndexTestCase, 'test')) - suite.addTest(unittest.makeSuite(PackTestCase, 'test')) - suite.addTest(unittest.makeSuite(UnpackTestCase, 'test')) - return suite - -if __name__ == '__main__': - unittest.main(defaultTest='suite')