Merge lp:~julian-edwards/launchpad/double-build-bug-705342 into lp:launchpad

Proposed by Julian Edwards
Status: Merged
Approved by: Julian Edwards
Approved revision: no longer in the source branch.
Merged at revision: 12322
Proposed branch: lp:~julian-edwards/launchpad/double-build-bug-705342
Merge into: lp:launchpad
Prerequisite: lp:~jml/launchpad/failing-tests-bug-711209
Diff against target: 96 lines (+43/-5)
4 files modified
lib/canonical/widgets/__init__.py (+12/-0)
lib/lp/buildmaster/model/builder.py (+10/-0)
lib/lp/buildmaster/tests/mock_slaves.py (+2/-2)
lib/lp/buildmaster/tests/test_builder.py (+19/-3)
To merge this branch: bzr merge lp:~julian-edwards/launchpad/double-build-bug-705342
Reviewer Review Type Date Requested Status
Leonard Richardson (community) Approve
Review via email: mp+48219@code.launchpad.net

Commit message

[r=leonardr][ui=none] [r=leonardr][bug=705342] Fail nonvirtual builders in the ABORTED state rather than cleaning them, since we can't guarantee that there's no active build.

Description of the change

The attached bug describes how some build logs end up with two apparently separate builds in them. This happens when a buildd-admin kills the builder's processes off but doesn't kill everything quite properly for whatever reason. Normally the buildd-manager will handle this fine for virtual builders, as when it sees an ABORTED state it resets the builder.

However for nonvirtual builders we just try and clean the slave which is clamitous due to a bug in sbuild which won't kill the actual build process off, thus leaving the builder open to receiving a second build.

This change just fails the (nonvirtual) builder if it gets into this state, since there's nothing we can do on the manager side.

To post a comment you must log in.
Revision history for this message
Leonard Richardson (leonardr) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added directory 'lib/canonical/widgets'
=== added file 'lib/canonical/widgets/__init__.py'
--- lib/canonical/widgets/__init__.py 1970-01-01 00:00:00 +0000
+++ lib/canonical/widgets/__init__.py 2011-02-02 21:39:04 +0000
@@ -0,0 +1,12 @@
1# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4# This is a stub to keep canonical.shipit operational. this module
5# can delete when shipit is independent.
6
7from lp.app.widgets.itemswidgets import (
8 CheckBoxMatrixWidget,
9 LabeledMultiCheckBoxWidget,
10 )
11
12
013
=== modified file 'lib/lp/buildmaster/model/builder.py'
--- lib/lp/buildmaster/model/builder.py 2010-12-06 04:41:45 +0000
+++ lib/lp/buildmaster/model/builder.py 2011-02-03 16:31:19 +0000
@@ -338,6 +338,16 @@
338 # communications with the slave and the build manager had to reset338 # communications with the slave and the build manager had to reset
339 # the job.339 # the job.
340 if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:340 if status == 'BuilderStatus.ABORTED' and builder.currentjob is None:
341 if not builder.virtualized:
342 # We can't reset non-virtual builders reliably as the
343 # abort() function doesn't kill the actual build job,
344 # only the sbuild process! All we can do here is fail
345 # the builder with a message indicating the problem and
346 # wait for an admin to reboot it.
347 builder.failBuilder(
348 "Non-virtual builder in ABORTED state, requires admin to "
349 "restart")
350 return "dummy status"
341 if logger is not None:351 if logger is not None:
342 logger.info(352 logger.info(
343 "Builder '%s' being cleaned up from ABORTED" %353 "Builder '%s' being cleaned up from ABORTED" %
344354
=== modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
--- lib/lp/buildmaster/tests/mock_slaves.py 2010-11-29 14:51:07 +0000
+++ lib/lp/buildmaster/tests/mock_slaves.py 2011-02-03 16:31:19 +0000
@@ -239,11 +239,11 @@
239 """A mock slave that looks like it's aborted."""239 """A mock slave that looks like it's aborted."""
240240
241 def clean(self):241 def clean(self):
242 self.call_log.append('status')242 self.call_log.append('clean')
243 return defer.succeed(None)243 return defer.succeed(None)
244244
245 def status(self):245 def status(self):
246 self.call_log.append('clean')246 self.call_log.append('status')
247 return defer.succeed(('BuilderStatus.ABORTED', '1-1'))247 return defer.succeed(('BuilderStatus.ABORTED', '1-1'))
248248
249249
250250
=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
--- lib/lp/buildmaster/tests/test_builder.py 2010-12-20 03:21:03 +0000
+++ lib/lp/buildmaster/tests/test_builder.py 2011-02-03 16:31:19 +0000
@@ -279,9 +279,10 @@
279 builder = removeSecurityProxy(self.factory.makeBuilder())279 builder = removeSecurityProxy(self.factory.makeBuilder())
280 self.assertEqual(builder.url, builder.slave.url)280 self.assertEqual(builder.url, builder.slave.url)
281281
282 def test_recovery_of_aborted_slave(self):282 def test_recovery_of_aborted_virtual_slave(self):
283 # If a slave is in the ABORTED state, rescueBuilderIfLost should283 # If a virtual_slave is in the ABORTED state,
284 # clean it if we don't think it's currently building anything.284 # rescueBuilderIfLost should clean it if we don't think it's
285 # currently building anything.
285 # See bug 463046.286 # See bug 463046.
286 aborted_slave = AbortedSlave()287 aborted_slave = AbortedSlave()
287 builder = MockBuilder("mock_builder", aborted_slave)288 builder = MockBuilder("mock_builder", aborted_slave)
@@ -291,6 +292,21 @@
291 self.assertIn('clean', aborted_slave.call_log)292 self.assertIn('clean', aborted_slave.call_log)
292 return d.addCallback(check_slave_calls)293 return d.addCallback(check_slave_calls)
293294
295 def test_recovery_of_aborted_nonvirtual_slave(self):
296 # Nonvirtual slaves in the ABORTED state cannot be reliably
297 # cleaned since the sbuild process doesn't properly kill the
298 # build job. We test that the builder is marked failed.
299 aborted_slave = AbortedSlave()
300 builder = MockBuilder("mock_builder", aborted_slave)
301 builder.currentjob = None
302 builder.virtualized = False
303 builder.builderok = True
304 d = builder.rescueIfLost()
305 def check_failed(ignored):
306 self.assertFalse(builder.builderok)
307 self.assertNotIn('clean', aborted_slave.call_log)
308 return d.addCallback(check_failed)
309
294 def test_recover_ok_slave(self):310 def test_recover_ok_slave(self):
295 # An idle slave is not rescued.311 # An idle slave is not rescued.
296 slave = OkSlave()312 slave = OkSlave()