# HG changeset patch # User cmlenz # Date 1132744408 0 # Node ID 0ffdab3a70324f5c0920d548f042e51e6420f735 # Parent 29f7b0d7dc4d0130125b6bc6bed48f41dd94b2b6 Transmit source archives as `tar.bz2` instead of as zip. See #76. diff --git a/bitten/master.py b/bitten/master.py --- a/bitten/master.py +++ b/bitten/master.py @@ -243,7 +243,8 @@ snapshot_name = os.path.basename(snapshot) message = beep.Payload(file(snapshot, 'rb'), - content_type='application/zip', + content_type='application/tar', + content_encoding='bzip2', content_disposition=snapshot_name) self.channel.send_msg(message, handle_reply=handle_reply) diff --git a/bitten/slave.py b/bitten/slave.py --- a/bitten/slave.py +++ b/bitten/slave.py @@ -17,7 +17,7 @@ from sets import Set as set import shutil import tempfile -import zipfile +import tarfile from bitten.build import BuildError from bitten.build.config import Configuration @@ -102,7 +102,8 @@ xml = xmlio.Element('proceed') self.channel.send_rpy(msgno, beep.Payload(xml)) - elif payload.content_type == 'application/zip': + elif payload.content_type == 'application/tar' and \ + payload.content_encoding == 'bzip2': # Received snapshot archive for build project_name = self.build_xml.attr.get('project', 'default') project_dir = os.path.join(self.session.work_dir, project_name) @@ -111,7 +112,7 @@ archive_name = payload.content_disposition if not archive_name: - archive_name = 'snapshot.zip' + archive_name = 'snapshot.tar.bz2' archive_path = os.path.join(project_dir, archive_name) archive_file = file(archive_path, 'wb') @@ -134,42 +135,25 @@ path = os.path.join(project_dir, archive_name) log.debug('Received snapshot archive: %s', path) try: - zip_file = zipfile.ZipFile(path, 'r') + tar_file = tarfile.open(path, 'r:bz2') + tar_file.chown = lambda *args: None # Don't chown extracted members try: names = [] - for name in zip_file.namelist(): - if name.startswith('/') or '..' in name: - continue - names.append(os.path.normpath(name)) - info = zip_file.getinfo(name) - fullpath = os.path.join(project_dir, name) - if name.endswith('/') or info.external_attr & 0x10: - os.makedirs(fullpath) - else: - dirname = os.path.dirname(fullpath) - if not os.path.isdir(dirname): - os.makedirs(dirname) - fileobj = file(fullpath, 'wb') - try: - try: - fileobj.write(zip_file.read(name)) - except zipfile.BadZipFile: - log.error('Bad CRC for file %s', name, path) - raise beep.ProtocolError(550, 'Corrupt ' - 'snapshot archive') - finally: - fileobj.close() - mode = (info.external_attr >> 16) & 0777 - if mode: - os.chmod(fullpath, mode) + for tarinfo in tar_file: + if tarinfo.isfile() or tarinfo.isdir(): + log.debug('Extracting %s to %s', tarinfo.name, project_dir) + if tarinfo.name.startswith('/') or '..' in tarinfo.name: + continue + names.append(tarinfo.name) + tar_file.extract(tarinfo, project_dir) finally: - zip_file.close() + tar_file.close() basedir = os.path.join(project_dir, os.path.commonprefix(names)) log.debug('Unpacked snapshot to %s' % basedir) return basedir - except (IOError, zipfile.error), e: + except tarfile.TarError, e: log.error('Could not unpack archive %s: %s', path, e, exc_info=True) raise beep.ProtocolError(550, 'Could not unpack archive (%s)' % e) diff --git a/bitten/snapshot.py b/bitten/snapshot.py --- a/bitten/snapshot.py +++ b/bitten/snapshot.py @@ -9,11 +9,12 @@ """Snapshot archive management. -Snapshots of the code base are stored in the Trac environment as ZIP files. +Snapshots of the code base are stored in the Trac environment as tar archives, +compressed using bzip2. -These files use the naming convention `[config_name]_r[revision].zip` so they -can be located programmatically after creation, and associated with the build -config and revision they apply to. +These files use the naming convention `[config_name]_r[revision].tar.bz2` so +they can be located programmatically after creation, and associated with the +build config and revision they apply to. These snapshot files are accompanied by a checksum file (using MD5). Any archive file with no accompanying checksum is considered incomplete or invalid. @@ -34,12 +35,13 @@ import logging import os +import posixpath try: import threading except ImportError: import dummy_threading as threading import time -import zipfile +import tarfile from bitten.util import md5sum @@ -90,9 +92,9 @@ """Find all existing snapshots in the directory.""" for filename in [f for f in os.listdir(self.directory) if f.startswith(self.prefix)]: - if not filename.endswith('.zip'): + if not filename.endswith('.tar.bz2'): continue - rest = filename[len(self.prefix):-4] + rest = filename[len(self.prefix):-8] if not rest.startswith('_r'): continue rev = rest[2:] @@ -146,7 +148,7 @@ return self._workers[new_root.rev] new_prefix = self.prefix + '_r' + str(rev) - filename = new_prefix + '.zip' + filename = new_prefix + '.tar.bz2' new_filepath = os.path.join(self.directory, filename) if os.path.exists(new_filepath): raise IOError, 'Snapshot file already exists at %s' \ @@ -184,62 +186,63 @@ new_root.rev) if base_root: base_rev = repos.next_rev(base_root.rev) - base_zip = zipfile.ZipFile(base_filepath, 'r') - new_zip = zipfile.ZipFile(new_filepath, 'w', zipfile.ZIP_DEFLATED) + base_tar = tarfile.open(base_filepath, 'r:bz2') + base_tar.posix = False + new_tar = tarfile.open(new_filepath, 'w:bz2') + new_tar.posix = False def _add_entry(node): name = node.path[len(self.config.path):] if name.startswith('/'): name = name[1:] + new_path = posixpath.join(new_prefix, name) if node.isdir: - path = os.path.join(new_prefix, name).rstrip('/\\') + '/' - info = zipfile.ZipInfo(path) - info.create_system = 3 - info.external_attr = 040755 << 16L | 0x10 - new_zip.writestr(info, '') - log.debug('Adding directory %s to archive', name + '/') + log.debug('Adding directory %s/ to archive', name) + new_info = tarfile.TarInfo(new_path) + new_info.type = tarfile.DIRTYPE + new_info.mode = 0755 + new_tar.addfile(new_info) + for entry in node.get_entries(): _add_entry(entry) time.sleep(.1) # be nice else: - new_path = os.path.join(new_prefix, name) - copy_base = False if base_root and repos.has_node(node.path, base_root.rev): base_node = repos.get_node(node.path, base_root.rev) copy_base = base_node.rev == node.rev if copy_base: - # Copy entry from base ZIP file - base_path = os.path.join(base_prefix, name) - base_info = base_zip.getinfo(base_path) - base_info.filename = new_path - new_zip.writestr(base_info, base_zip.read(base_path)) + # Copy entry from base archive + base_path = posixpath.join(base_prefix, name) + base_info = base_tar.getmember(base_path) + base_info.name = new_path + fileobj = base_tar.extractfile(base_info) + new_tar.addfile(base_info, fileobj) else: # Create entry from repository - new_info = zipfile.ZipInfo(new_path) - new_info.create_system = 3 - new_info.compress_type = zipfile.ZIP_DEFLATED - new_info.date_time = time.gmtime(node.last_modified)[:6] - new_info.file_size = node.content_length + new_info = tarfile.TarInfo(new_path) + new_info.type = tarfile.REGTYPE + new_info.mtime = node.last_modified + new_info.size = node.content_length # FIXME: Subversion specific! This should really be an # executable flag provided by Trac's versioncontrol # API if 'svn:executable' in node.get_properties(): - new_info.external_attr = 0100755 << 16L + new_info.mode = 0755 else: - new_info.external_attr = 0100644 << 16L + new_info.mode = 0644 - new_zip.writestr(new_info, node.get_content().read()) + new_tar.addfile(new_info, node.get_content()) try: _add_entry(new_root) finally: - new_zip.close() + new_tar.close() if base_root: - base_zip.close() + base_tar.close() # Create MD5 checksum file md5sum.write(new_filepath) diff --git a/bitten/tests/snapshot.py b/bitten/tests/snapshot.py --- a/bitten/tests/snapshot.py +++ b/bitten/tests/snapshot.py @@ -10,9 +10,9 @@ import md5 import os import shutil +import tarfile import tempfile import unittest -import zipfile from trac.test import EnvironmentStub, Mock from bitten.model import BuildConfig @@ -52,35 +52,35 @@ self.assertEqual(None, snapshots.get(123)) def test_get(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2')) snapshots = SnapshotManager(self.config) self.assertEqual(path1, snapshots.get(123)) self.assertEqual(path2, snapshots.get(124)) def test_get_prefix_match(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - self._create_file(os.path.join('snapshots', 'bar_r124.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + self._create_file(os.path.join('snapshots', 'bar_r124.tar.bz2')) snapshots = SnapshotManager(self.config) self.assertEqual(path1, snapshots.get(123)) self.assertEqual(None, snapshots.get(124)) def test_get_wrong_extension(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) self._create_file(os.path.join('snapshots', 'foo_r124.doc')) snapshots = SnapshotManager(self.config) self.assertEqual(path1, snapshots.get(123)) self.assertEqual(None, snapshots.get(124)) def test_get_missing_rev(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) self._create_file(os.path.join('snapshots', 'foo124.doc')) snapshots = SnapshotManager(self.config) self.assertEqual(path1, snapshots.get(123)) self.assertEqual(None, snapshots.get(124)) def test_get_missing_md5sum(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) self._create_file(os.path.join('snapshots', 'foo_r124.zip'), create_md5sum=False) snapshots = SnapshotManager(self.config) @@ -88,8 +88,8 @@ self.assertEqual(None, snapshots.get(124)) def test_get_wrong_md5sum(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip'), + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2'), create_md5sum=False) md5sum.write(path1, path2 + '.md5') @@ -99,10 +99,10 @@ def test_cleanup_on_init(self): self.env.config.set('bitten', 'max_snapshots', '3') - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) - path3 = self._create_file(os.path.join('snapshots', 'foo_r125.zip')) - self._create_file(os.path.join('snapshots', 'foo_r126.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2')) + path3 = self._create_file(os.path.join('snapshots', 'foo_r125.tar.bz2')) + self._create_file(os.path.join('snapshots', 'foo_r126.tar.bz2')) snapshots = SnapshotManager(self.config) self.assertEqual(path1, snapshots.get(123)) self.assertEqual(path2, snapshots.get(124)) @@ -110,11 +110,11 @@ self.assertEqual(None, snapshots.get(126)) def test_cleanup_explicit(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) - path3 = self._create_file(os.path.join('snapshots', 'foo_r125.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2')) + path3 = self._create_file(os.path.join('snapshots', 'foo_r125.tar.bz2')) snapshots = SnapshotManager(self.config) - path4 = self._create_file(os.path.join('snapshots', 'foo_r126.zip')) + path4 = self._create_file(os.path.join('snapshots', 'foo_r126.tar.bz2')) snapshots._index.append((os.path.getmtime(path4), 126, path4)) snapshots._cleanup(3) self.assertEqual(path1, snapshots.get(123)) @@ -137,10 +137,10 @@ snapshots.create(123).join() path = snapshots.get(123) assert path is not None - assert path.endswith('foo_r123.zip') - entries = zipfile.ZipFile(path, 'r').infolist() + assert path.endswith('foo_r123.tar.bz2') + entries = tarfile.open(path, 'r:bz2').getmembers() self.assertEqual(1, len(entries)) - self.assertEqual('foo_r123/', entries[0].filename) + self.assertEqual('foo_r123/', entries[0].name) def test_create_empty_dir(self): empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') @@ -152,15 +152,15 @@ snapshots.create(123).join() path = snapshots.get(123) assert path is not None - assert path.endswith('foo_r123.zip') - entries = zipfile.ZipFile(path, 'r').infolist() + assert path.endswith('foo_r123.tar.bz2') + entries = tarfile.open(path, 'r:bz2').getmembers() self.assertEqual(2, len(entries)) - self.assertEqual('foo_r123/', entries[0].filename) - self.assertEqual('foo_r123/empty/', entries[1].filename) + self.assertEqual('foo_r123/', entries[0].name) + self.assertEqual('foo_r123/empty/', entries[1].name) def test_get_closest_match_backward(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2')) empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir], @@ -177,8 +177,8 @@ self.assertEqual((124, path2), match) def test_get_closest_match_forward(self): - path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) - path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.tar.bz2')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.tar.bz2')) empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir],