changeset 419:b72802dc0632

Fix resetting of builds when multiple slaves are building simultaneously, and implement the `slave_timeout` trac.ini option.
author cmlenz
date Wed, 08 Aug 2007 13:55:13 +0000
parents 0cfc877405d1
children 23de253435b8
files bitten/master.py bitten/queue.py bitten/tests/queue.py
diffstat 3 files changed, 46 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/bitten/master.py
+++ b/bitten/master.py
@@ -95,7 +95,8 @@
             raise HTTPNotFound('No such collection')
 
     def _process_build_creation(self, req):
-        queue = BuildQueue(self.env, build_all=self.build_all)
+        queue = BuildQueue(self.env, build_all=self.build_all,
+                           timeout=self.slave_timeout)
         queue.populate()
 
         try:
--- a/bitten/queue.py
+++ b/bitten/queue.py
@@ -22,6 +22,7 @@
 from itertools import ifilter
 import logging
 import re
+import time
 
 from trac.versioncontrol import NoSuchNode
 from bitten.model import BuildConfig, TargetPlatform, Build, BuildStep
@@ -95,15 +96,19 @@
     repository revisions that need to be built.
     """
 
-    def __init__(self, env, build_all=False):
+    def __init__(self, env, build_all=False, timeout=0):
         """Create the build queue.
         
         :param env: the Trac environment
         :param build_all: whether older revisions should be built
+        :param timeout: the time in seconds after which an in-progress build
+                        should be considered orphaned, and reset to pending
+                        state
         """
         self.env = env
         self.log = env.log
         self.build_all = build_all
+        self.timeout = timeout
 
     # Build scheduling
 
@@ -225,14 +230,27 @@
         db.commit()
 
     def reset_orphaned_builds(self):
-        """Reset all in-progress builds to ``PENDING`` state.
+        """Reset all in-progress builds to ``PENDING`` state if they've been
+        running so long that the configured timeout has been reached.
         
-        This is used to cleanup after a crash of the build master process,
-        which would leave in-progress builds in the database that aren't
-        actually being built because the slaves have disconnected.
+        This is used to cleanup after slaves that have unexpectedly cancelled
+        a build without notifying the master, or are for some other reason not
+        reporting back status updates.
         """
+        if not self.timeout:
+            # If no timeout is set, none of the in-progress builds can be
+            # considered orphaned
+            return
+
         db = self.env.get_db_cnx()
+        now = int(time.time())
         for build in Build.select(self.env, status=Build.IN_PROGRESS, db=db):
+            if now - build.started < self.timeout:
+                # This build has not reached the timeout yet, assume it's still
+                # being executed
+                # FIXME: ideally, we'd base this check on the last activity on
+                #        the build, not the start time
+                continue
             build.status = Build.PENDING
             build.slave = None
             build.slave_info = {}
--- a/bitten/tests/queue.py
+++ b/bitten/tests/queue.py
@@ -11,6 +11,7 @@
 import os
 import shutil
 import tempfile
+import time
 import unittest
 
 from trac.db import DatabaseManager
@@ -189,6 +190,26 @@
         build = queue.get_build_for_slave('foobar', {})
         self.assertEqual(None, build)
 
+    def test_reset_orphaned_builds(self):
+        BuildConfig(self.env, 'test').insert()
+        platform = TargetPlatform(self.env, config='test', name='Foo')
+        platform.insert()
+        build1 = Build(self.env, config='test', platform=platform.id, rev=123,
+                      rev_time=42, status=Build.IN_PROGRESS, slave='heinz',
+                      started=time.time() - 600) # Started ten minutes ago
+        build1.insert()
+
+        build2 = Build(self.env, config='test', platform=platform.id, rev=124,
+                       rev_time=42, status=Build.IN_PROGRESS, slave='heinz',
+                       started=time.time() - 60) # Started a minute ago
+        build2.insert()
+
+        queue = BuildQueue(self.env, timeout=300) # 5 minutes timeout
+        build = queue.reset_orphaned_builds()
+        self.assertEqual(Build.PENDING, Build.fetch(self.env, build1.id).status)
+        self.assertEqual(Build.IN_PROGRESS,
+                         Build.fetch(self.env, build2.id).status)
+
     def test_match_slave_match(self):
         BuildConfig(self.env, 'test', active=True).insert()
         platform = TargetPlatform(self.env, config='test', name="Unix")
Copyright (C) 2012-2017 Edgewall Software