# HG changeset patch # User cmlenz # Date 1186581313 0 # Node ID b72802dc06328acb769add93591d481cd3d4e261 # Parent 0cfc877405d10ea587093aae12af8eb7e15a446c Fix resetting of builds when multiple slaves are building simultaneously, and implement the `slave_timeout` trac.ini option. diff --git a/bitten/master.py b/bitten/master.py --- a/bitten/master.py +++ b/bitten/master.py @@ -95,7 +95,8 @@ raise HTTPNotFound('No such collection') def _process_build_creation(self, req): - queue = BuildQueue(self.env, build_all=self.build_all) + queue = BuildQueue(self.env, build_all=self.build_all, + timeout=self.slave_timeout) queue.populate() try: diff --git a/bitten/queue.py b/bitten/queue.py --- a/bitten/queue.py +++ b/bitten/queue.py @@ -22,6 +22,7 @@ from itertools import ifilter import logging import re +import time from trac.versioncontrol import NoSuchNode from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep @@ -95,15 +96,19 @@ repository revisions that need to be built. """ - def __init__(self, env, build_all=False): + def __init__(self, env, build_all=False, timeout=0): """Create the build queue. :param env: the Trac environment :param build_all: whether older revisions should be built + :param timeout: the time in seconds after which an in-progress build + should be considered orphaned, and reset to pending + state """ self.env = env self.log = env.log self.build_all = build_all + self.timeout = timeout # Build scheduling @@ -225,14 +230,27 @@ db.commit() def reset_orphaned_builds(self): - """Reset all in-progress builds to ``PENDING`` state. + """Reset all in-progress builds to ``PENDING`` state if they've been + running so long that the configured timeout has been reached. - This is used to cleanup after a crash of the build master process, - which would leave in-progress builds in the database that aren't - actually being built because the slaves have disconnected. + This is used to cleanup after slaves that have unexpectedly cancelled + a build without notifying the master, or are for some other reason not + reporting back status updates. """ + if not self.timeout: + # If no timeout is set, none of the in-progress builds can be + # considered orphaned + return + db = self.env.get_db_cnx() + now = int(time.time()) for build in Build.select(self.env, status=Build.IN_PROGRESS, db=db): + if now - build.started < self.timeout: + # This build has not reached the timeout yet, assume it's still + # being executed + # FIXME: ideally, we'd base this check on the last activity on + # the build, not the start time + continue build.status = Build.PENDING build.slave = None build.slave_info = {} diff --git a/bitten/tests/queue.py b/bitten/tests/queue.py --- a/bitten/tests/queue.py +++ b/bitten/tests/queue.py @@ -11,6 +11,7 @@ import os import shutil import tempfile +import time import unittest from trac.db import DatabaseManager @@ -189,6 +190,26 @@ build = queue.get_build_for_slave('foobar', {}) self.assertEqual(None, build) + def test_reset_orphaned_builds(self): + BuildConfig(self.env, 'test').insert() + platform = TargetPlatform(self.env, config='test', name='Foo') + platform.insert() + build1 = Build(self.env, config='test', platform=platform.id, rev=123, + rev_time=42, status=Build.IN_PROGRESS, slave='heinz', + started=time.time() - 600) # Started ten minutes ago + build1.insert() + + build2 = Build(self.env, config='test', platform=platform.id, rev=124, + rev_time=42, status=Build.IN_PROGRESS, slave='heinz', + started=time.time() - 60) # Started a minute ago + build2.insert() + + queue = BuildQueue(self.env, timeout=300) # 5 minutes timeout + build = queue.reset_orphaned_builds() + self.assertEqual(Build.PENDING, Build.fetch(self.env, build1.id).status) + self.assertEqual(Build.IN_PROGRESS, + Build.fetch(self.env, build2.id).status) + def test_match_slave_match(self): BuildConfig(self.env, 'test', active=True).insert() platform = TargetPlatform(self.env, config='test', name="Unix")