view bitten/master.py @ 785:8e76b8f6310a

Adding keepalives to the bitten client/server protocol. Keepalives now keep builds running as long as the slave is up, rather than having them time out independently of if the slave is working on them. This requires an update of both the master and slave to support keepalives-- older slaves (protocol version 3) will continue to work, but will not submit keepalives (and thus will retain previous behavior.) I've updated the PROTOCOL_VERSION field as older masters would kill builds for newer slaves that attempted to keepalive. We probably want to update test_create_build_protocol_wrong_version to test the matrix of allowed versions, but I expect the exception of allowing of older slaves to disappear when we next edit the protocol. Closes #411.
author wbell
date Thu, 29 Apr 2010 18:17:05 +0000
parents 5f0cfee44540
children 4c73a3cea9f5
line wrap: on
line source
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# Copyright (C) 2005-2007 Christopher Lenz <cmlenz@gmx.de>
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://bitten.edgewall.org/wiki/License.

"""Build master implementation."""

import calendar
import re
import time
from StringIO import StringIO

from trac.attachment import Attachment
from trac.config import BoolOption, IntOption, Option
from trac.core import *
from trac.resource import ResourceNotFound
from trac.web import IRequestHandler, RequestDone

from bitten import PROTOCOL_VERSION
from bitten.model import BuildConfig, Build, BuildStep, BuildLog, Report, \
                     TargetPlatform

from bitten.main import BuildSystem
from bitten.queue import BuildQueue
from bitten.recipe import Recipe
from bitten.util import xmlio

__all__ = ['BuildMaster']
__docformat__ = 'restructuredtext en'


HTTP_BAD_REQUEST = 400
HTTP_FORBIDDEN = 403
HTTP_NOT_FOUND = 404
HTTP_METHOD_NOT_ALLOWED = 405
HTTP_CONFLICT = 409


class BuildMaster(Component):
    """Trac request handler implementation for the build master."""

    implements(IRequestHandler)

    # Configuration options

    adjust_timestamps = BoolOption('bitten', 'adjust_timestamps', False, doc=
        """Whether the timestamps of builds should be adjusted to be close
        to the timestamps of the corresponding changesets.""")

    build_all = BoolOption('bitten', 'build_all', False, doc=
        """Whether to request builds of older revisions even if a younger
        revision has already been built.""")

    stabilize_wait = IntOption('bitten', 'stabilize_wait', 0, doc=
        """The time in seconds to wait for the repository to stabilize before
        queuing up a new build.  This allows time for developers to check in
        a group of related changes back to back without spawning multiple
        builds.""")

    slave_timeout = IntOption('bitten', 'slave_timeout', 3600, doc=
        """The time in seconds after which a build is cancelled if the slave
        does not report progress.""")

    logs_dir = Option('bitten', 'logs_dir', "log/bitten", doc=
         """The directory on the server in which client log files will be stored.""")

    quick_status = BoolOption('bitten', 'quick_status', False, doc=
         """Whether to show the current build status withing the Trac main
            navigation bar""")

    def __init__(self):
        self.env.systeminfo.append(('Bitten',
                __import__('bitten', ['__version__']).__version__))

    # IRequestHandler methods

    def match_request(self, req):
        match = re.match(r'/builds(?:/(\d+)(?:/(\w+)/([^/]+)?)?)?$',
                         req.path_info)
        if match:
            if match.group(1):
                req.args['id'] = match.group(1)
                req.args['collection'] = match.group(2)
                req.args['member'] = match.group(3)
            return True

    def process_request(self, req):
        req.perm.assert_permission('BUILD_EXEC')

        if 'trac_auth' in req.incookie:
            slave_token = req.incookie['trac_auth'].value
        else:
            slave_token = req.session.sid

        if 'id' not in req.args:
            if req.method != 'POST':
                self._send_response(req,
                                body='Only POST allowed for build creation.')
            return self._process_build_creation(req, slave_token)

        build = Build.fetch(self.env, req.args['id'])
        if not build:
            self._send_error(req, HTTP_NOT_FOUND,
                                  'No such build (%s)' % req.args['id'])

        build_token = build.slave_info.get('token', '')
        if build_token != slave_token:
            self._send_error(req, HTTP_CONFLICT,
                          'Token mismatch (wrong slave): slave=%s, build=%s' \
                               % (slave_token, build_token))

        config = BuildConfig.fetch(self.env, build.config)

        if not req.args['collection']:
            if req.method == 'DELETE':
                return self._process_build_cancellation(req, config, build)
            else:
                return self._process_build_initiation(req, config, build)

        if req.method != 'POST':
            self._send_error(req, HTTP_METHOD_NOT_ALLOWED,
                                  'Method %s not allowed' % req.method)

        if req.args['collection'] == 'steps':
            return self._process_build_step(req, config, build)
        elif req.args['collection'] == 'keepalive':
            return self._process_keepalive(req, config, build)
        else:
            self._send_error(req, HTTP_NOT_FOUND,
                    "No such collection '%s'" % req.args['collection'])

    # Internal methods

    def _send_response(self, req, code=200, body='', headers=None):
        """ Formats and sends the response, raising ``RequestDone``. """
        req.send_response(code)
        headers = headers or {}
        headers.setdefault('Content-Length', len(body))
        for header in headers:
            req.send_header(header, headers[header])
        req.write(body)
        raise RequestDone

    def _send_error(self, req, code=500, message=''):
        """ Formats and sends the error, raising ``RequestDone``. """
        headers = {'Content-Type': 'text/plain',
                   'Content-Length': str(len(message))}
        self._send_response(req, code, body=message, headers=headers)

    def _process_build_creation(self, req, slave_token):
        queue = BuildQueue(self.env, build_all=self.build_all,
                           stabilize_wait=self.stabilize_wait,
                           timeout=self.slave_timeout)
        queue.populate()

        try:
            elem = xmlio.parse(req.read())
        except xmlio.ParseError, e:
            self.log.error('Error parsing build initialization request: %s', e,
                           exc_info=True)
            self._send_error(req, HTTP_BAD_REQUEST, 'XML parser error')

        slave_version = int(elem.attr.get('version', 1))

        # FIXME: Remove version compatibility code.
        # The initial difference between protocol version 3 and 4 is that
        # the master allows keepalive requests-- the master must be
        # at least 4 before slaves supporting version 4 are allowed. When
        # the first force master/slave upgrade requirement comes in
        # (or we bump the) version number again, remove this code.
        if slave_version == 3 and PROTOCOL_VERSION == 4:
            self.log.info('Allowing slave version %d to process build for '
                          'compatibility. Upgrade slave to support build '
                          'keepalives.', slave_version)
        elif slave_version != PROTOCOL_VERSION:
            self._send_error(req, HTTP_BAD_REQUEST,
                    "Master-Slave version mismatch: master=%d, slave=%d" % \
                                (PROTOCOL_VERSION, slave_version))

        slavename = elem.attr['name']
        properties = {'name': slavename, Build.IP_ADDRESS: req.remote_addr,
                    Build.TOKEN: slave_token}
        self.log.info('Build slave %r connected from %s with token %s',
                    slavename, req.remote_addr, slave_token)

        for child in elem.children():
            if child.name == 'platform':
                properties[Build.MACHINE] = child.gettext()
                properties[Build.PROCESSOR] = child.attr.get('processor')
            elif child.name == 'os':
                properties[Build.OS_NAME] = child.gettext()
                properties[Build.OS_FAMILY] = child.attr.get('family')
                properties[Build.OS_VERSION] = child.attr.get('version')
            elif child.name == 'package':
                for name, value in child.attr.items():
                    if name == 'name':
                        continue
                    properties[child.attr['name'] + '.' + name] = value

        self.log.debug('Build slave configuration: %r', properties)

        build = queue.get_build_for_slave(slavename, properties)
        if not build:
            self._send_response(req, 204, '', {})

        self._send_response(req, 201, 'Build pending', headers={
                            'Content-Type': 'text/plain',
                            'Location': req.abs_href.builds(build.id)})

    def _process_build_cancellation(self, req, config, build):
        self.log.info('Build slave %r cancelled build %d', build.slave,
                      build.id)
        build.status = Build.PENDING
        build.slave = None
        build.slave_info = {}
        build.started = 0
        db = self.env.get_db_cnx()
        for step in list(BuildStep.select(self.env, build=build.id, db=db)):
            step.delete(db=db)
        build.update(db=db)

        Attachment.delete_all(self.env, 'build', build.resource.id, db)

        db.commit()

        for listener in BuildSystem(self.env).listeners:
            listener.build_aborted(build)

        self._send_response(req, 204, '', {})

    def _process_build_initiation(self, req, config, build):
        self.log.info('Build slave %r initiated build %d', build.slave,
                      build.id)
        build.started = int(time.time())
        build.last_activity = build.started
        build.update()

        for listener in BuildSystem(self.env).listeners:
            listener.build_started(build)

        xml = xmlio.parse(config.recipe)
        xml.attr['path'] = config.path
        xml.attr['revision'] = build.rev
        xml.attr['config'] = config.name
        xml.attr['build'] = str(build.id)
        target_platform = TargetPlatform.fetch(self.env, build.platform)
        xml.attr['platform'] = target_platform.name
        xml.attr['name'] = build.slave
        body = str(xml)

        self.log.info('Build slave %r initiated build %d', build.slave,
                      build.id)

        # create the first step, mark it as in-progress.

        recipe = Recipe(xmlio.parse(config.recipe))
        stepname = recipe.__iter__().next().id

        step = self._start_new_step(build, stepname)
        step.insert()

        self._send_response(req, 200, body, headers={
                    'Content-Type': 'application/x-bitten+xml',
                    'Content-Length': str(len(body)),
                    'Content-Disposition':
                        'attachment; filename=recipe_%s_r%s.xml' %
                        (config.name, build.rev)})

    def _process_build_step(self, req, config, build):
        try:
            elem = xmlio.parse(req.read())
        except xmlio.ParseError, e:
            self.log.error('Error parsing build step result: %s', e,
                           exc_info=True)
            self._send_error(req, HTTP_BAD_REQUEST, 'XML parser error')
        stepname = elem.attr['step']

        # we should have created this step previously; if it hasn't,
        # the master and slave are processing steps out of order.
        step = BuildStep.fetch(self.env, build=build.id, name=stepname)
        if not step:
            self._send_error(req, HTTP_CONFLICT, 'Build step has not been created.')

        recipe = Recipe(xmlio.parse(config.recipe))
        index = None
        current_step = None
        for num, recipe_step in enumerate(recipe):
            if recipe_step.id == stepname:
                index = num
                current_step = recipe_step
        if index is None:
            self._send_error(req, HTTP_FORBIDDEN,
                                'No such build step' % stepname)
        last_step = index == num

        self.log.debug('Slave %s (build %d) completed step %d (%s) with '
                       'status %s', build.slave, build.id, index, stepname,
                       elem.attr['status'])

        db = self.env.get_db_cnx()

        step.stopped = int(time.time())

        if elem.attr['status'] == 'failure':
            self.log.warning('Build %s step %s failed', build.id, stepname)
            step.status = BuildStep.FAILURE
            if current_step.onerror == 'fail':
                last_step = True
        else:
            step.status = BuildStep.SUCCESS
        step.errors += [error.gettext() for error in elem.children('error')]

        # TODO: step.update(db=db)
        step.delete(db=db)
        step.insert(db=db)

        # Collect log messages from the request body
        for idx, log_elem in enumerate(elem.children('log')):
            build_log = BuildLog(self.env, build=build.id, step=stepname,
                                 generator=log_elem.attr.get('generator'),
                                 orderno=idx)
            for message_elem in log_elem.children('message'):
                build_log.messages.append((message_elem.attr['level'],
                                           message_elem.gettext()))
            build_log.insert(db=db)

        # Collect report data from the request body
        for report_elem in elem.children('report'):
            report = Report(self.env, build=build.id, step=stepname,
                            category=report_elem.attr.get('category'),
                            generator=report_elem.attr.get('generator'))
            for item_elem in report_elem.children():
                item = {'type': item_elem.name}
                item.update(item_elem.attr)
                for child_elem in item_elem.children():
                    item[child_elem.name] = child_elem.gettext()
                report.items.append(item)
            report.insert(db=db)

        # Collect attachments from the request body
        for attach_elem in elem.children(Recipe.ATTACH):
            attach_elem = list(attach_elem.children('file'))[0] # One file only
            filename = attach_elem.attr.get('filename')
            resource_id = attach_elem.attr.get('resource') == 'config' \
                                    and build.config or build.resource.id

            try: # Delete attachment if it already exists
                old_attach = Attachment(self.env, 'build',
                                    parent_id=resource_id, filename=filename)
                old_attach.delete()
            except ResourceNotFound:
                pass
            attachment = Attachment(self.env, 'build', parent_id=resource_id)
            attachment.description = attach_elem.attr.get('description')
            attachment.author = req.authname
            fileobj = StringIO(attach_elem.gettext().decode('base64'))
            attachment.insert(filename, fileobj, fileobj.len, db=db)

        # If this was the last step in the recipe we mark the build as
        # completed otherwise just update last_activity
        if last_step:
            self.log.info('Slave %s completed build %d ("%s" as of [%s])',
                          build.slave, build.id, build.config, build.rev)
            build.stopped = step.stopped
            build.last_activity = build.stopped

            # Determine overall outcome of the build by checking the outcome
            # of the individual steps against the "onerror" specification of
            # each step in the recipe
            for num, recipe_step in enumerate(recipe):
                step = BuildStep.fetch(self.env, build.id, recipe_step.id)
                if step.status == BuildStep.FAILURE:
                    if recipe_step.onerror == 'fail' or \
                            recipe_step.onerror == 'continue':
                        build.status = Build.FAILURE
                        break
            else:
                build.status = Build.SUCCESS

            build.update(db=db)
        else:
            build.last_activity = step.stopped
            build.update(db=db)

            # start the next step.
            for num, recipe_step in enumerate(recipe):
                if num == index + 1:
                    next_step = recipe_step
            if next_step is None:
                self._send_error(req, HTTP_FORBIDDEN,
                                 'Unable to find step after ' % stepname)

            step = self._start_new_step(build, next_step.id)
            step.insert(db=db)

        db.commit()

        if last_step:
            for listener in BuildSystem(self.env).listeners:
                listener.build_completed(build)

        body = 'Build step processed'
        self._send_response(req, 201, body, {
                            'Content-Type': 'text/plain',
                            'Content-Length': str(len(body)),
                            'Location': req.abs_href.builds(
                                    build.id, 'steps', stepname)})

    def _process_keepalive(self, req, config, build):
        build.last_activity = int(time.time())
        build.update()

        self.log.info('Slave %s build %d keepalive ("%s" as of [%s])',
                      build.slave, build.id, build.config, build.rev)

        body = 'Keepalive processed'
        self._send_response(req, 200, body, {
                            'Content-Type': 'text/plain',
                            'Content-Length': str(len(body)),
                            'Location': req.abs_href.builds(
                                    build.id, 'keepalive')})

    def _start_new_step(self, build, stepname):
        """Creates the in-memory representation for a newly started
        step, ready to be persisted to the database.
        """
        step = BuildStep(self.env, build=build.id, name=stepname)
        step.status = BuildStep.IN_PROGRESS
        step.started = int(time.time())
        step.stopped = 0

        return step
Copyright (C) 2012-2017 Edgewall Software