Merge lp:~jelmer/launchpad/bug499115 into lp:launchpad/db-devel

Proposed by Jelmer Vernooij
Status: Superseded
Proposed branch: lp:~jelmer/launchpad/bug499115
Merge into: lp:launchpad/db-devel
Diff against target: 385 lines (+376/-0) (has conflicts)
1 file modified
lib/lp/buildmaster/buildergroup.py (+376/-0)
Text conflict in lib/lp/buildmaster/buildergroup.py
To merge this branch: bzr merge lp:~jelmer/launchpad/bug499115
Reviewer Review Type Date Requested Status
Canonical Launchpad Engineering Pending
Review via email: mp+19118@code.launchpad.net

This proposal has been superseded by a proposal from 2010-02-15.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

This trivial change prevents the buildd master from falling over if a container directory was created but no log file uploaded. (bug 499115)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/buildmaster/buildergroup.py'
2--- lib/lp/buildmaster/buildergroup.py 2010-01-21 05:03:16 +0000
3+++ lib/lp/buildmaster/buildergroup.py 2010-02-11 16:58:25 +0000
4@@ -122,5 +122,381 @@
5
6 Perform the required actions for each state.
7 """
8+<<<<<<< TREE
9 queueItem.builder.updateBuild(queueItem)
10 self.commit()
11+=======
12+ try:
13+ slave_status = queueItem.builder.slaveStatus()
14+
15+ except (xmlrpclib.Fault, socket.error), info:
16+ # XXX cprov 2005-06-29:
17+ # Hmm, a problem with the xmlrpc interface,
18+ # disable the builder ?? or simple notice the failure
19+ # with a timestamp.
20+ info = ("Could not contact the builder %s, caught a (%s)"
21+ % (queueItem.builder.url, info))
22+ self.logger.debug(info, exc_info=True)
23+ # keep the job for scan
24+ return
25+
26+ builder_status_handlers = {
27+ 'BuilderStatus.IDLE': queueItem.updateBuild_IDLE,
28+ 'BuilderStatus.BUILDING': queueItem.updateBuild_BUILDING,
29+ 'BuilderStatus.ABORTING': queueItem.updateBuild_ABORTING,
30+ 'BuilderStatus.ABORTED': queueItem.updateBuild_ABORTED,
31+ 'BuilderStatus.WAITING': self.updateBuild_WAITING,
32+ }
33+
34+ builder_status = slave_status['builder_status']
35+ if builder_status not in builder_status_handlers:
36+ self.logger.critical(
37+ "Builder on %s returned unknown status %s, failing it"
38+ % (queueItem.builder.url, builder_status))
39+ self.failBuilder(
40+ queueItem.builder,
41+ "Unknown status code (%s) returned from status() probe."
42+ % builder_status)
43+ queueItem.builder = None
44+ queueItem.setDateStarted(None)
45+ self.commit()
46+ return
47+
48+ # Since logtail is a xmlrpclib.Binary container and it is returned
49+ # from the IBuilder content class, it arrives protected by a Zope
50+ # Security Proxy, which is not declared, thus empty. Before passing
51+ # it to the status handlers we will simply remove the proxy.
52+ logtail = removeSecurityProxy(slave_status.get('logtail'))
53+ build_id = slave_status.get('build_id')
54+ build_status = slave_status.get('build_status')
55+ filemap = slave_status.get('filemap')
56+ dependencies = slave_status.get('dependencies')
57+
58+ method = builder_status_handlers[builder_status]
59+ try:
60+ # XXX cprov 2007-05-25: We need this code for WAITING status
61+ # handler only until we are able to also move it to
62+ # BuildQueue content class and avoid to pass 'queueItem'.
63+ if builder_status == 'BuilderStatus.WAITING':
64+ method(queueItem, build_id, build_status, logtail,
65+ filemap, dependencies, self.logger)
66+ else:
67+ method(build_id, build_status, logtail,
68+ filemap, dependencies, self.logger)
69+ except TypeError, e:
70+ self.logger.critical("Received wrong number of args in response.")
71+ self.logger.exception(e)
72+
73+ self.commit()
74+
75+ def updateBuild_WAITING(self, queueItem, buildid, build_status,
76+ logtail, filemap, dependencies, logger):
77+ """Perform the actions needed for a slave in a WAITING state
78+
79+ Buildslave can be WAITING in five situations:
80+
81+ * Build has failed, no filemap is received (PACKAGEFAIL, DEPFAIL,
82+ CHROOTFAIL, BUILDERFAIL)
83+
84+ * Build has been built successfully (BuildStatus.OK), in this case
85+ we have a 'filemap', so we can retrieve those files and store in
86+ Librarian with getFileFromSlave() and then pass the binaries to
87+ the uploader for processing.
88+ """
89+ librarian = getUtility(ILibrarianClient)
90+
91+ # XXX: dsilvers 2005-03-02: Confirm the builder has the right build?
92+ assert build_status.startswith('BuildStatus.'), (
93+ 'Malformed status string: %s' % build_status)
94+
95+ buildstatus = build_status[len('BuildStatus.'):]
96+ method = getattr(self, 'buildStatus_' + buildstatus, None)
97+
98+ if method is None:
99+ logger.critical("Unknown BuildStatus '%s' for builder '%s'"
100+ % (buildstatus, queueItem.builder.url))
101+ return
102+
103+ method(queueItem, librarian, buildid, filemap, dependencies)
104+
105+ def storeBuildInfo(self, queueItem, librarian, buildid, dependencies):
106+ """Store available information for build jobs.
107+
108+ Store Buildlog, datebuilt, duration, dependencies.
109+ """
110+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
111+ build.buildlog = self.getLogFromSlave(queueItem)
112+ build.builder = queueItem.builder
113+ build.dependencies = dependencies
114+ # XXX cprov 20060615 bug=120584: Currently buildduration includes
115+ # the scanner latency, it should really be asking the slave for
116+ # the duration spent building locally.
117+ build.datebuilt = UTC_NOW
118+ # We need dynamic datetime.now() instance to be able to perform
119+ # the time operations for duration.
120+ RIGHT_NOW = datetime.datetime.now(pytz.timezone('UTC'))
121+ build.buildduration = RIGHT_NOW - queueItem.date_started
122+
123+
124+ def buildStatus_OK(self, queueItem, librarian, buildid,
125+ filemap=None, dependencies=None):
126+ """Handle a package that built successfully.
127+
128+ Once built successfully, we pull the files, store them in a
129+ directory, store build information and push them through the
130+ uploader.
131+ """
132+ # XXX cprov 2007-07-11 bug=129487: untested code path.
133+
134+ self.logger.debug("Processing successful build %s" % buildid)
135+ # Explode before collect a binary that is denied in this
136+ # distroseries/pocket
137+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
138+ if not build.archive.allowUpdatesToReleasePocket():
139+ assert build.distroseries.canUploadToPocket(build.pocket), (
140+ "%s (%s) can not be built for pocket %s: illegal status"
141+ % (build.title, build.id,
142+ build.pocket.name))
143+
144+ # ensure we have the correct build root as:
145+ # <BUILDMASTER_ROOT>/incoming/<UPLOAD_LEAF>/<TARGET_PATH>/[FILES]
146+ root = os.path.abspath(config.builddmaster.root)
147+ incoming = os.path.join(root, 'incoming')
148+
149+ # create a single directory to store build result files
150+ # UPLOAD_LEAF: <TIMESTAMP>-<BUILD_ID>-<BUILDQUEUE_ID>
151+ upload_leaf = "%s-%s" % (time.strftime("%Y%m%d-%H%M%S"), buildid)
152+ upload_dir = os.path.join(incoming, upload_leaf)
153+ self.logger.debug("Storing build result at '%s'" % upload_dir)
154+
155+ # Build the right UPLOAD_PATH so the distribution and archive
156+ # can be correctly found during the upload:
157+ # <archive_id>/distribution_name
158+ # for all destination archive types.
159+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
160+ archive = build.archive
161+ distribution_name = build.distribution.name
162+ target_path = '%s/%s' % (archive.id, distribution_name)
163+ upload_path = os.path.join(upload_dir, target_path)
164+ os.makedirs(upload_path)
165+
166+ slave = removeSecurityProxy(queueItem.builder.slave)
167+ for filename in filemap:
168+ slave_file = slave.getFile(filemap[filename])
169+ out_file_name = os.path.join(upload_path, filename)
170+ out_file = open(out_file_name, "wb")
171+ copy_and_close(slave_file, out_file)
172+
173+ uploader_argv = list(config.builddmaster.uploader.split())
174+ uploader_logfilename = os.path.join(upload_dir, 'uploader.log')
175+ self.logger.debug("Saving uploader log at '%s'"
176+ % uploader_logfilename)
177+
178+ # add extra arguments for processing a binary upload
179+ extra_args = [
180+ "--log-file", "%s" % uploader_logfilename,
181+ "-d", "%s" % build.distribution.name,
182+ "-s", "%s" % (build.distroseries.name +
183+ pocketsuffix[build.pocket]),
184+ "-b", "%s" % build.id,
185+ "-J", "%s" % upload_leaf,
186+ "%s" % root,
187+ ]
188+
189+ uploader_argv.extend(extra_args)
190+
191+ self.logger.debug("Invoking uploader on %s" % root)
192+ self.logger.debug("%s" % uploader_argv)
193+
194+ uploader_process = subprocess.Popen(
195+ uploader_argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
196+
197+ # Nothing should be written to the stdout/stderr.
198+ upload_stdout, upload_stderr = uploader_process.communicate()
199+
200+ # XXX cprov 2007-04-17: we do not check uploader_result_code
201+ # anywhere. We need to find out what will be best strategy
202+ # when it failed HARD (there is a huge effort in process-upload
203+ # to not return error, it only happen when the code is broken).
204+ uploader_result_code = uploader_process.returncode
205+ self.logger.debug("Uploader returned %d" % uploader_result_code)
206+
207+ # Quick and dirty hack to carry on on process-upload failures
208+ if os.path.exists(upload_dir):
209+ self.logger.debug("The upload directory did not get moved.")
210+ failed_dir = os.path.join(root, "failed-to-move")
211+ if not os.path.exists(failed_dir):
212+ os.mkdir(failed_dir)
213+ os.rename(upload_dir, os.path.join(failed_dir, upload_leaf))
214+
215+ # The famous 'flush_updates + clear_cache' will make visible
216+ # the DB changes done in process-upload, considering that the
217+ # transaction was set with ISOLATION_LEVEL_READ_COMMITED
218+ # isolation level.
219+ cur = cursor()
220+ cur.execute('SHOW transaction_isolation')
221+ isolation_str = cur.fetchone()[0]
222+ assert isolation_str == 'read committed', (
223+ 'BuildMaster/BuilderGroup transaction isolation should be '
224+ 'ISOLATION_LEVEL_READ_COMMITTED (not "%s")' % isolation_str)
225+
226+ original_slave = queueItem.builder.slave
227+
228+ # XXX Robert Collins, Celso Providelo 2007-05-26:
229+ # 'Refreshing' objects procedure is forced on us by using a
230+ # different process to do the upload, but as that process runs
231+ # in the same unix account, it is simply double handling and we
232+ # would be better off to do it within this process.
233+ flush_database_updates()
234+ clear_current_connection_cache()
235+
236+ # XXX cprov 2007-06-15: Re-issuing removeSecurityProxy is forced on
237+ # us by sqlobject refreshing the builder object during the
238+ # transaction cache clearing. Once we sort the previous problem
239+ # this step should probably not be required anymore.
240+ queueItem.builder.setSlaveForTesting(
241+ removeSecurityProxy(original_slave))
242+
243+ # Store build information, build record was already updated during
244+ # the binary upload.
245+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
246+
247+ # Retrive the up-to-date build record and perform consistency
248+ # checks. The build record should be updated during the binary
249+ # upload processing, if it wasn't something is broken and needs
250+ # admins attention. Even when we have a FULLYBUILT build record,
251+ # if it is not related with at least one binary, there is also
252+ # a problem.
253+ # For both situations we will mark the builder as FAILEDTOUPLOAD
254+ # and the and update the build details (datebuilt, duration,
255+ # buildlog, builder) in LP. A build-failure-notification will be
256+ # sent to the lp-build-admin celebrity and to the sourcepackagerelease
257+ # uploader about this occurrence. The failure notification will
258+ # also contain the information required to manually reprocess the
259+ # binary upload when it was the case.
260+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
261+ if (build.buildstate != BuildStatus.FULLYBUILT or
262+ build.binarypackages.count() == 0):
263+ self.logger.debug("Build %s upload failed." % build.id)
264+ build.buildstate = BuildStatus.FAILEDTOUPLOAD
265+ # Retrieve log file content.
266+ possible_locations = (
267+ 'failed', 'failed-to-move', 'rejected', 'accepted')
268+ for location_dir in possible_locations:
269+ log_filepath = os.path.join(
270+ root, location_dir, upload_leaf,
271+ 'uploader.log')
272+ if os.path.exists(log_filepath):
273+ uploader_log_file = open(log_filepath)
274+ try:
275+ uploader_log_content = uploader_log_file.read()
276+ finally:
277+ uploader_log_file.close()
278+ break
279+ else:
280+ uploader_log_content = 'Could not find upload log file'
281+ # Store the upload_log_contents in librarian so it can be
282+ # accessed by anyone with permission to see the build.
283+ build.storeUploadLog(uploader_log_content)
284+ # Notify the build failure.
285+ build.notify(extra_info=uploader_log_content)
286+ else:
287+ self.logger.debug(
288+ "Gathered build %s completely" %
289+ build.sourcepackagerelease.name)
290+
291+ # Release the builder for another job.
292+ queueItem.builder.cleanSlave()
293+ # Remove BuildQueue record.
294+ queueItem.destroySelf()
295+ # Commit the transaction so that the uploader can see the updated
296+ # build record.
297+ self.commit()
298+
299+ def buildStatus_PACKAGEFAIL(self, queueItem, librarian, buildid,
300+ filemap=None, dependencies=None):
301+ """Handle a package that had failed to build.
302+
303+ Build has failed when trying the work with the target package,
304+ set the job status as FAILEDTOBUILD, store available info and
305+ remove Buildqueue entry.
306+ """
307+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
308+ build.buildstate = BuildStatus.FAILEDTOBUILD
309+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
310+ queueItem.builder.cleanSlave()
311+ build.notify()
312+ queueItem.destroySelf()
313+
314+ def buildStatus_DEPFAIL(self, queueItem, librarian, buildid,
315+ filemap=None, dependencies=None):
316+ """Handle a package that had missing dependencies.
317+
318+ Build has failed by missing dependencies, set the job status as
319+ MANUALDEPWAIT, store available information, remove BuildQueue
320+ entry and release builder slave for another job.
321+ """
322+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
323+ build.buildstate = BuildStatus.MANUALDEPWAIT
324+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
325+ self.logger.critical("***** %s is MANUALDEPWAIT *****"
326+ % queueItem.builder.name)
327+ queueItem.builder.cleanSlave()
328+ queueItem.destroySelf()
329+
330+ def buildStatus_CHROOTFAIL(self, queueItem, librarian, buildid,
331+ filemap=None, dependencies=None):
332+ """Handle a package that had failed when unpacking the CHROOT.
333+
334+ Build has failed when installing the current CHROOT, mark the
335+ job as CHROOTFAIL, store available information, remove BuildQueue
336+ and release the builder.
337+ """
338+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
339+ build.buildstate = BuildStatus.CHROOTWAIT
340+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
341+ self.logger.critical("***** %s is CHROOTWAIT *****" %
342+ queueItem.builder.name)
343+ queueItem.builder.cleanSlave()
344+ build.notify()
345+ queueItem.destroySelf()
346+
347+ def buildStatus_BUILDERFAIL(self, queueItem, librarian, buildid,
348+ filemap=None, dependencies=None):
349+ """Handle builder failures.
350+
351+ Build has been failed when trying to build the target package,
352+ The environment is working well, so mark the job as NEEDSBUILD again
353+ and 'clean' the builder to do another jobs.
354+ """
355+ self.logger.warning("***** %s has failed *****"
356+ % queueItem.builder.name)
357+ self.failBuilder(queueItem.builder,
358+ ("Builder returned BUILDERFAIL when asked "
359+ "for its status"))
360+ # simply reset job
361+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
362+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
363+ queueItem.reset()
364+
365+ def buildStatus_GIVENBACK(self, queueItem, librarian, buildid,
366+ filemap=None, dependencies=None):
367+ """Handle automatic retry requested by builder.
368+
369+ GIVENBACK pseudo-state represents a request for automatic retry
370+ later, the build records is delayed by reducing the lastscore to
371+ ZERO.
372+ """
373+ self.logger.warning("***** %s is GIVENBACK by %s *****"
374+ % (buildid, queueItem.builder.name))
375+ build = getUtility(IBuildSet).getByQueueEntry(queueItem)
376+ self.storeBuildInfo(queueItem, librarian, buildid, dependencies)
377+ # XXX cprov 2006-05-30: Currently this information is not
378+ # properly presented in the Web UI. We will discuss it in
379+ # the next Paris Summit, infinity has some ideas about how
380+ # to use this content. For now we just ensure it's stored.
381+ queueItem.builder.cleanSlave()
382+ queueItem.reset()
383+
384+
385+>>>>>>> MERGE-SOURCE

Subscribers

People subscribed via source and target branches

to status/vote changes: