# HG changeset patch # User cmlenz # Date 1131447646 0 # Node ID 5979bcb0892e2f7707b54333e1c5a6ff22839eaa # Parent 3d58d9dd11c866b0759e57a631e024a1964a1a0e The build master now attempts to build new snapshot archives based on the closest existing archive, under the assumption that usually very few files are changed between revisions, and that it is cheaper to copy the unmodified content over from an existing ZIP archive. diff --git a/bitten/snapshot.py b/bitten/snapshot.py --- a/bitten/snapshot.py +++ b/bitten/snapshot.py @@ -21,9 +21,7 @@ For larger code bases, these snapshots may be relatively expensive to create. Most of the time is spent in walking the repository directory and reading the files it contains. To avoid blocking the build master while snapshots are -created, this is done in a worker thread. The main thread polls the snapshots -directory to find the snapshots that have been completely created (including -the corresponding checksum file). +created, this is done in a worker thread. As snapshot archives are often very similar to each other for subsequent revisions, an attempt is made to avoid the creation of new archives from @@ -103,7 +101,8 @@ try: md5sum.validate(filepath) except md5sum.IntegrityError, e: - log.warning('Integrity error checking %s (e)', filepath, e) + log.warning('Integrity error checking %s (%s)', filepath, e) + os.remove(filepath) continue mtime = os.path.getmtime(filepath) @@ -140,83 +139,119 @@ self._lock.acquire() try: repos = self.env.get_repository() - root = repos.get_node(self.config.path or '/', rev) - assert root.isdir, '"%s" is not a directory' % self.config.path + new_root = repos.get_node(self.config.path or '/', rev) + assert new_root.isdir, '"%s" is not a directory' % self.config.path - if root.rev in self._workers: - return self._workers[root.rev] + if new_root.rev in self._workers: + return self._workers[new_root.rev] - prefix = self.prefix + '_r' + str(rev) - filename = prefix + '.zip' - filepath = os.path.join(self.directory, filename) - if os.path.exists(filepath): - raise IOError, 'Snapshot file already exists at %s' % filepath + new_prefix = self.prefix + '_r' + str(rev) + filename = new_prefix + '.zip' + new_filepath = os.path.join(self.directory, filename) + if os.path.exists(new_filepath): + raise IOError, 'Snapshot file already exists at %s' \ + % new_filepath self._cleanup(self.limit - 1) + existing = self._get_closest_match(repos, new_root) + if existing: + base_rev, base_filepath = existing + base_root = repos.get_node(self.config.path or '/', base_rev) + base_prefix = self.prefix + '_r' + str(base_rev) + else: + base_root = base_filepath = base_prefix = None + worker = threading.Thread(target=self._create, - args=(prefix, root, filepath), + args=(repos, new_root, new_filepath, + new_prefix, base_root, + base_filepath, base_prefix), name='Create snapshot %s' % filename) worker.start() - self._workers[root.rev] = worker + self._workers[new_root.rev] = worker return worker finally: self._lock.release() - def _create(self, prefix, root, filepath): + def _create(self, repos, new_root, new_filepath, new_prefix, base_root=None, + base_filepath=None, base_prefix=None): """Actually create a snapshot archive. This is used internally from the `create()` function and executed in a worker thread. """ - log.debug('Preparing snapshot archive for %s@%s', root.path, root.rev) + log.debug('Preparing snapshot archive for %s@%s', new_root.path, + new_root.rev) + if base_root: + base_rev = repos.next_rev(base_root.rev) + base_zip = zipfile.ZipFile(base_filepath, 'r') + new_zip = zipfile.ZipFile(new_filepath, 'w', zipfile.ZIP_DEFLATED) - zip_file = zipfile.ZipFile(filepath, 'w', zipfile.ZIP_DEFLATED) def _add_entry(node): name = node.path[len(self.config.path):] if name.startswith('/'): name = name[1:] if node.isdir: - path = os.path.join(prefix, name).rstrip('/\\') + '/' + path = os.path.join(new_prefix, name).rstrip('/\\') + '/' info = zipfile.ZipInfo(path) info.create_system = 3 info.external_attr = 040755 << 16L | 0x10 - zip_file.writestr(info, '') - log.debug('Adding directory %s to archive', name) + new_zip.writestr(info, '') + log.debug('Adding directory %s to archive', name + '/') for entry in node.get_entries(): _add_entry(entry) time.sleep(.1) # be nice else: - path = os.path.join(prefix, name) - info = zipfile.ZipInfo(path) - info.create_system = 3 - info.compress_type = zipfile.ZIP_DEFLATED - info.date_time = time.gmtime(node.last_modified)[:6] - info.file_size = node.content_length + new_path = os.path.join(new_prefix, name) - # FIXME: Subversion specific! This should really be an - # executable flag provided by Trac's versioncontrol API - if 'svn:executable' in node.get_properties(): - info.external_attr = 0100755 << 16L + copy_base = False + if base_root and repos.has_node(node.path, base_root.rev): + base_node = repos.get_node(node.path, base_root.rev) + copy_base = base_node.rev == node.rev + + if copy_base: + # Copy entry from base ZIP file + base_path = os.path.join(base_prefix, name) + base_info = base_zip.getinfo(base_path) + base_info.filename = new_path + new_zip.writestr(base_info, base_zip.read(base_path)) + else: - info.external_attr = 0100644 << 16L + # Create entry from repository + new_info = zipfile.ZipInfo(new_path) + new_info.create_system = 3 + new_info.compress_type = zipfile.ZIP_DEFLATED + new_info.date_time = time.gmtime(node.last_modified)[:6] + new_info.file_size = node.content_length - zip_file.writestr(info, node.get_content().read()) + # FIXME: Subversion specific! This should really be an + # executable flag provided by Trac's versioncontrol + # API + if 'svn:executable' in node.get_properties(): + new_info.external_attr = 0100755 << 16L + else: + new_info.external_attr = 0100644 << 16L + + new_zip.writestr(new_info, node.get_content().read()) + try: - _add_entry(root) + _add_entry(new_root) finally: - zip_file.close() + new_zip.close() + if base_root: + base_zip.close() # Create MD5 checksum file - md5sum.write(filepath) + md5sum.write(new_filepath) self._lock.acquire() try: - self._index.append((os.path.getmtime(filepath), root.rev, filepath)) - del self._workers[root.rev] + self._index.append((os.path.getmtime(new_filepath), new_root.rev, + new_filepath)) + del self._workers[new_root.rev] finally: self._lock.release() - log.info('Prepared snapshot archive at %s', filepath) + log.info('Prepared snapshot archive at %s', new_filepath) def get(self, rev): """Returns the path to an already existing snapshot archive for the @@ -232,3 +267,33 @@ return None finally: self._lock.release() + + def _get_closest_match(self, repos, root): + """Determine which existing snapshot archive is closest to the + requested repository revision.""" + self._lock.acquire() + try: + distances = [] # (distance, rev) tuples + + for mtime, srev, path in self._index: + distance = 0 + srev = repos.normalize_rev(srev) + get_next = repos.next_rev + if repos.rev_older_than(root.rev, srev): + get_next = repos.previous_rev + nrev = srev + while nrev != root.rev: + distance += 1 + nrev = get_next(nrev) + if nrev is None: + distance = 0 + break + if distance: + distances.append((distance, srev, path)) + + if not distances: + return None + distances.sort() + return distances[0][1:] + finally: + self._lock.release() diff --git a/bitten/tests/snapshot.py b/bitten/tests/snapshot.py --- a/bitten/tests/snapshot.py +++ b/bitten/tests/snapshot.py @@ -158,6 +158,42 @@ self.assertEqual('foo_r123/', entries[0].filename) self.assertEqual('foo_r123/empty/', entries[1].filename) + def test_get_closest_match_backward(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + + empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') + root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir], + path='trunk', rev=125) + repos = Mock(get_node=lambda path, rev: root_dir, + normalize_rev=lambda rev: int(rev), + rev_older_than=lambda rev1, rev2: rev1 < rev2, + next_rev=lambda rev: int(rev) + 1, + previous_rev=lambda rev: int(rev) - 1) + self.env.get_repository = lambda authname=None: repos + + snapshots = SnapshotManager(self.config) + match = snapshots._get_closest_match(repos, root_dir) + self.assertEqual((124, path2), match) + + def test_get_closest_match_forward(self): + path1 = self._create_file(os.path.join('snapshots', 'foo_r123.zip')) + path2 = self._create_file(os.path.join('snapshots', 'foo_r124.zip')) + + empty_dir = Mock(isdir=True, get_entries=lambda: [], path='trunk/empty') + root_dir = Mock(isdir=True, get_entries=lambda: [empty_dir], + path='trunk', rev=122) + repos = Mock(get_node=lambda path, rev: root_dir, + normalize_rev=lambda rev: int(rev), + rev_older_than=lambda rev1, rev2: rev1 < rev2, + next_rev=lambda rev: int(rev) + 1, + previous_rev=lambda rev: int(rev) - 1) + self.env.get_repository = lambda authname=None: repos + + snapshots = SnapshotManager(self.config) + match = snapshots._get_closest_match(repos, root_dir) + self.assertEqual((123, path1), match) + def suite(): suite = unittest.TestSuite()