Merge lp:~jelmer/launchpad/bug499115 into lp:launchpad/db-devel
- bug499115
- Merge into db-devel
Proposed by
Jelmer Vernooij
Status: | Superseded |
---|---|
Proposed branch: | lp:~jelmer/launchpad/bug499115 |
Merge into: | lp:launchpad/db-devel |
Diff against target: |
385 lines (+376/-0) (has conflicts) 1 file modified
lib/lp/buildmaster/buildergroup.py (+376/-0) Text conflict in lib/lp/buildmaster/buildergroup.py |
To merge this branch: | bzr merge lp:~jelmer/launchpad/bug499115 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Canonical Launchpad Engineering | Pending | ||
Review via email: mp+19118@code.launchpad.net |
This proposal has been superseded by a proposal from 2010-02-15.
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/buildmaster/buildergroup.py' |
2 | --- lib/lp/buildmaster/buildergroup.py 2010-01-21 05:03:16 +0000 |
3 | +++ lib/lp/buildmaster/buildergroup.py 2010-02-11 16:58:25 +0000 |
4 | @@ -122,5 +122,381 @@ |
5 | |
6 | Perform the required actions for each state. |
7 | """ |
8 | +<<<<<<< TREE |
9 | queueItem.builder.updateBuild(queueItem) |
10 | self.commit() |
11 | +======= |
12 | + try: |
13 | + slave_status = queueItem.builder.slaveStatus() |
14 | + |
15 | + except (xmlrpclib.Fault, socket.error), info: |
16 | + # XXX cprov 2005-06-29: |
17 | + # Hmm, a problem with the xmlrpc interface, |
18 | + # disable the builder ?? or simple notice the failure |
19 | + # with a timestamp. |
20 | + info = ("Could not contact the builder %s, caught a (%s)" |
21 | + % (queueItem.builder.url, info)) |
22 | + self.logger.debug(info, exc_info=True) |
23 | + # keep the job for scan |
24 | + return |
25 | + |
26 | + builder_status_handlers = { |
27 | + 'BuilderStatus.IDLE': queueItem.updateBuild_IDLE, |
28 | + 'BuilderStatus.BUILDING': queueItem.updateBuild_BUILDING, |
29 | + 'BuilderStatus.ABORTING': queueItem.updateBuild_ABORTING, |
30 | + 'BuilderStatus.ABORTED': queueItem.updateBuild_ABORTED, |
31 | + 'BuilderStatus.WAITING': self.updateBuild_WAITING, |
32 | + } |
33 | + |
34 | + builder_status = slave_status['builder_status'] |
35 | + if builder_status not in builder_status_handlers: |
36 | + self.logger.critical( |
37 | + "Builder on %s returned unknown status %s, failing it" |
38 | + % (queueItem.builder.url, builder_status)) |
39 | + self.failBuilder( |
40 | + queueItem.builder, |
41 | + "Unknown status code (%s) returned from status() probe." |
42 | + % builder_status) |
43 | + queueItem.builder = None |
44 | + queueItem.setDateStarted(None) |
45 | + self.commit() |
46 | + return |
47 | + |
48 | + # Since logtail is a xmlrpclib.Binary container and it is returned |
49 | + # from the IBuilder content class, it arrives protected by a Zope |
50 | + # Security Proxy, which is not declared, thus empty. Before passing |
51 | + # it to the status handlers we will simply remove the proxy. |
52 | + logtail = removeSecurityProxy(slave_status.get('logtail')) |
53 | + build_id = slave_status.get('build_id') |
54 | + build_status = slave_status.get('build_status') |
55 | + filemap = slave_status.get('filemap') |
56 | + dependencies = slave_status.get('dependencies') |
57 | + |
58 | + method = builder_status_handlers[builder_status] |
59 | + try: |
60 | + # XXX cprov 2007-05-25: We need this code for WAITING status |
61 | + # handler only until we are able to also move it to |
62 | + # BuildQueue content class and avoid to pass 'queueItem'. |
63 | + if builder_status == 'BuilderStatus.WAITING': |
64 | + method(queueItem, build_id, build_status, logtail, |
65 | + filemap, dependencies, self.logger) |
66 | + else: |
67 | + method(build_id, build_status, logtail, |
68 | + filemap, dependencies, self.logger) |
69 | + except TypeError, e: |
70 | + self.logger.critical("Received wrong number of args in response.") |
71 | + self.logger.exception(e) |
72 | + |
73 | + self.commit() |
74 | + |
75 | + def updateBuild_WAITING(self, queueItem, buildid, build_status, |
76 | + logtail, filemap, dependencies, logger): |
77 | + """Perform the actions needed for a slave in a WAITING state |
78 | + |
79 | + Buildslave can be WAITING in five situations: |
80 | + |
81 | + * Build has failed, no filemap is received (PACKAGEFAIL, DEPFAIL, |
82 | + CHROOTFAIL, BUILDERFAIL) |
83 | + |
84 | + * Build has been built successfully (BuildStatus.OK), in this case |
85 | + we have a 'filemap', so we can retrieve those files and store in |
86 | + Librarian with getFileFromSlave() and then pass the binaries to |
87 | + the uploader for processing. |
88 | + """ |
89 | + librarian = getUtility(ILibrarianClient) |
90 | + |
91 | + # XXX: dsilvers 2005-03-02: Confirm the builder has the right build? |
92 | + assert build_status.startswith('BuildStatus.'), ( |
93 | + 'Malformed status string: %s' % build_status) |
94 | + |
95 | + buildstatus = build_status[len('BuildStatus.'):] |
96 | + method = getattr(self, 'buildStatus_' + buildstatus, None) |
97 | + |
98 | + if method is None: |
99 | + logger.critical("Unknown BuildStatus '%s' for builder '%s'" |
100 | + % (buildstatus, queueItem.builder.url)) |
101 | + return |
102 | + |
103 | + method(queueItem, librarian, buildid, filemap, dependencies) |
104 | + |
105 | + def storeBuildInfo(self, queueItem, librarian, buildid, dependencies): |
106 | + """Store available information for build jobs. |
107 | + |
108 | + Store Buildlog, datebuilt, duration, dependencies. |
109 | + """ |
110 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
111 | + build.buildlog = self.getLogFromSlave(queueItem) |
112 | + build.builder = queueItem.builder |
113 | + build.dependencies = dependencies |
114 | + # XXX cprov 20060615 bug=120584: Currently buildduration includes |
115 | + # the scanner latency, it should really be asking the slave for |
116 | + # the duration spent building locally. |
117 | + build.datebuilt = UTC_NOW |
118 | + # We need dynamic datetime.now() instance to be able to perform |
119 | + # the time operations for duration. |
120 | + RIGHT_NOW = datetime.datetime.now(pytz.timezone('UTC')) |
121 | + build.buildduration = RIGHT_NOW - queueItem.date_started |
122 | + |
123 | + |
124 | + def buildStatus_OK(self, queueItem, librarian, buildid, |
125 | + filemap=None, dependencies=None): |
126 | + """Handle a package that built successfully. |
127 | + |
128 | + Once built successfully, we pull the files, store them in a |
129 | + directory, store build information and push them through the |
130 | + uploader. |
131 | + """ |
132 | + # XXX cprov 2007-07-11 bug=129487: untested code path. |
133 | + |
134 | + self.logger.debug("Processing successful build %s" % buildid) |
135 | + # Explode before collect a binary that is denied in this |
136 | + # distroseries/pocket |
137 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
138 | + if not build.archive.allowUpdatesToReleasePocket(): |
139 | + assert build.distroseries.canUploadToPocket(build.pocket), ( |
140 | + "%s (%s) can not be built for pocket %s: illegal status" |
141 | + % (build.title, build.id, |
142 | + build.pocket.name)) |
143 | + |
144 | + # ensure we have the correct build root as: |
145 | + # <BUILDMASTER_ROOT>/incoming/<UPLOAD_LEAF>/<TARGET_PATH>/[FILES] |
146 | + root = os.path.abspath(config.builddmaster.root) |
147 | + incoming = os.path.join(root, 'incoming') |
148 | + |
149 | + # create a single directory to store build result files |
150 | + # UPLOAD_LEAF: <TIMESTAMP>-<BUILD_ID>-<BUILDQUEUE_ID> |
151 | + upload_leaf = "%s-%s" % (time.strftime("%Y%m%d-%H%M%S"), buildid) |
152 | + upload_dir = os.path.join(incoming, upload_leaf) |
153 | + self.logger.debug("Storing build result at '%s'" % upload_dir) |
154 | + |
155 | + # Build the right UPLOAD_PATH so the distribution and archive |
156 | + # can be correctly found during the upload: |
157 | + # <archive_id>/distribution_name |
158 | + # for all destination archive types. |
159 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
160 | + archive = build.archive |
161 | + distribution_name = build.distribution.name |
162 | + target_path = '%s/%s' % (archive.id, distribution_name) |
163 | + upload_path = os.path.join(upload_dir, target_path) |
164 | + os.makedirs(upload_path) |
165 | + |
166 | + slave = removeSecurityProxy(queueItem.builder.slave) |
167 | + for filename in filemap: |
168 | + slave_file = slave.getFile(filemap[filename]) |
169 | + out_file_name = os.path.join(upload_path, filename) |
170 | + out_file = open(out_file_name, "wb") |
171 | + copy_and_close(slave_file, out_file) |
172 | + |
173 | + uploader_argv = list(config.builddmaster.uploader.split()) |
174 | + uploader_logfilename = os.path.join(upload_dir, 'uploader.log') |
175 | + self.logger.debug("Saving uploader log at '%s'" |
176 | + % uploader_logfilename) |
177 | + |
178 | + # add extra arguments for processing a binary upload |
179 | + extra_args = [ |
180 | + "--log-file", "%s" % uploader_logfilename, |
181 | + "-d", "%s" % build.distribution.name, |
182 | + "-s", "%s" % (build.distroseries.name + |
183 | + pocketsuffix[build.pocket]), |
184 | + "-b", "%s" % build.id, |
185 | + "-J", "%s" % upload_leaf, |
186 | + "%s" % root, |
187 | + ] |
188 | + |
189 | + uploader_argv.extend(extra_args) |
190 | + |
191 | + self.logger.debug("Invoking uploader on %s" % root) |
192 | + self.logger.debug("%s" % uploader_argv) |
193 | + |
194 | + uploader_process = subprocess.Popen( |
195 | + uploader_argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
196 | + |
197 | + # Nothing should be written to the stdout/stderr. |
198 | + upload_stdout, upload_stderr = uploader_process.communicate() |
199 | + |
200 | + # XXX cprov 2007-04-17: we do not check uploader_result_code |
201 | + # anywhere. We need to find out what will be best strategy |
202 | + # when it failed HARD (there is a huge effort in process-upload |
203 | + # to not return error, it only happen when the code is broken). |
204 | + uploader_result_code = uploader_process.returncode |
205 | + self.logger.debug("Uploader returned %d" % uploader_result_code) |
206 | + |
207 | + # Quick and dirty hack to carry on on process-upload failures |
208 | + if os.path.exists(upload_dir): |
209 | + self.logger.debug("The upload directory did not get moved.") |
210 | + failed_dir = os.path.join(root, "failed-to-move") |
211 | + if not os.path.exists(failed_dir): |
212 | + os.mkdir(failed_dir) |
213 | + os.rename(upload_dir, os.path.join(failed_dir, upload_leaf)) |
214 | + |
215 | + # The famous 'flush_updates + clear_cache' will make visible |
216 | + # the DB changes done in process-upload, considering that the |
217 | + # transaction was set with ISOLATION_LEVEL_READ_COMMITED |
218 | + # isolation level. |
219 | + cur = cursor() |
220 | + cur.execute('SHOW transaction_isolation') |
221 | + isolation_str = cur.fetchone()[0] |
222 | + assert isolation_str == 'read committed', ( |
223 | + 'BuildMaster/BuilderGroup transaction isolation should be ' |
224 | + 'ISOLATION_LEVEL_READ_COMMITTED (not "%s")' % isolation_str) |
225 | + |
226 | + original_slave = queueItem.builder.slave |
227 | + |
228 | + # XXX Robert Collins, Celso Providelo 2007-05-26: |
229 | + # 'Refreshing' objects procedure is forced on us by using a |
230 | + # different process to do the upload, but as that process runs |
231 | + # in the same unix account, it is simply double handling and we |
232 | + # would be better off to do it within this process. |
233 | + flush_database_updates() |
234 | + clear_current_connection_cache() |
235 | + |
236 | + # XXX cprov 2007-06-15: Re-issuing removeSecurityProxy is forced on |
237 | + # us by sqlobject refreshing the builder object during the |
238 | + # transaction cache clearing. Once we sort the previous problem |
239 | + # this step should probably not be required anymore. |
240 | + queueItem.builder.setSlaveForTesting( |
241 | + removeSecurityProxy(original_slave)) |
242 | + |
243 | + # Store build information, build record was already updated during |
244 | + # the binary upload. |
245 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
246 | + |
247 | + # Retrive the up-to-date build record and perform consistency |
248 | + # checks. The build record should be updated during the binary |
249 | + # upload processing, if it wasn't something is broken and needs |
250 | + # admins attention. Even when we have a FULLYBUILT build record, |
251 | + # if it is not related with at least one binary, there is also |
252 | + # a problem. |
253 | + # For both situations we will mark the builder as FAILEDTOUPLOAD |
254 | + # and the and update the build details (datebuilt, duration, |
255 | + # buildlog, builder) in LP. A build-failure-notification will be |
256 | + # sent to the lp-build-admin celebrity and to the sourcepackagerelease |
257 | + # uploader about this occurrence. The failure notification will |
258 | + # also contain the information required to manually reprocess the |
259 | + # binary upload when it was the case. |
260 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
261 | + if (build.buildstate != BuildStatus.FULLYBUILT or |
262 | + build.binarypackages.count() == 0): |
263 | + self.logger.debug("Build %s upload failed." % build.id) |
264 | + build.buildstate = BuildStatus.FAILEDTOUPLOAD |
265 | + # Retrieve log file content. |
266 | + possible_locations = ( |
267 | + 'failed', 'failed-to-move', 'rejected', 'accepted') |
268 | + for location_dir in possible_locations: |
269 | + log_filepath = os.path.join( |
270 | + root, location_dir, upload_leaf, |
271 | + 'uploader.log') |
272 | + if os.path.exists(log_filepath): |
273 | + uploader_log_file = open(log_filepath) |
274 | + try: |
275 | + uploader_log_content = uploader_log_file.read() |
276 | + finally: |
277 | + uploader_log_file.close() |
278 | + break |
279 | + else: |
280 | + uploader_log_content = 'Could not find upload log file' |
281 | + # Store the upload_log_contents in librarian so it can be |
282 | + # accessed by anyone with permission to see the build. |
283 | + build.storeUploadLog(uploader_log_content) |
284 | + # Notify the build failure. |
285 | + build.notify(extra_info=uploader_log_content) |
286 | + else: |
287 | + self.logger.debug( |
288 | + "Gathered build %s completely" % |
289 | + build.sourcepackagerelease.name) |
290 | + |
291 | + # Release the builder for another job. |
292 | + queueItem.builder.cleanSlave() |
293 | + # Remove BuildQueue record. |
294 | + queueItem.destroySelf() |
295 | + # Commit the transaction so that the uploader can see the updated |
296 | + # build record. |
297 | + self.commit() |
298 | + |
299 | + def buildStatus_PACKAGEFAIL(self, queueItem, librarian, buildid, |
300 | + filemap=None, dependencies=None): |
301 | + """Handle a package that had failed to build. |
302 | + |
303 | + Build has failed when trying the work with the target package, |
304 | + set the job status as FAILEDTOBUILD, store available info and |
305 | + remove Buildqueue entry. |
306 | + """ |
307 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
308 | + build.buildstate = BuildStatus.FAILEDTOBUILD |
309 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
310 | + queueItem.builder.cleanSlave() |
311 | + build.notify() |
312 | + queueItem.destroySelf() |
313 | + |
314 | + def buildStatus_DEPFAIL(self, queueItem, librarian, buildid, |
315 | + filemap=None, dependencies=None): |
316 | + """Handle a package that had missing dependencies. |
317 | + |
318 | + Build has failed by missing dependencies, set the job status as |
319 | + MANUALDEPWAIT, store available information, remove BuildQueue |
320 | + entry and release builder slave for another job. |
321 | + """ |
322 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
323 | + build.buildstate = BuildStatus.MANUALDEPWAIT |
324 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
325 | + self.logger.critical("***** %s is MANUALDEPWAIT *****" |
326 | + % queueItem.builder.name) |
327 | + queueItem.builder.cleanSlave() |
328 | + queueItem.destroySelf() |
329 | + |
330 | + def buildStatus_CHROOTFAIL(self, queueItem, librarian, buildid, |
331 | + filemap=None, dependencies=None): |
332 | + """Handle a package that had failed when unpacking the CHROOT. |
333 | + |
334 | + Build has failed when installing the current CHROOT, mark the |
335 | + job as CHROOTFAIL, store available information, remove BuildQueue |
336 | + and release the builder. |
337 | + """ |
338 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
339 | + build.buildstate = BuildStatus.CHROOTWAIT |
340 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
341 | + self.logger.critical("***** %s is CHROOTWAIT *****" % |
342 | + queueItem.builder.name) |
343 | + queueItem.builder.cleanSlave() |
344 | + build.notify() |
345 | + queueItem.destroySelf() |
346 | + |
347 | + def buildStatus_BUILDERFAIL(self, queueItem, librarian, buildid, |
348 | + filemap=None, dependencies=None): |
349 | + """Handle builder failures. |
350 | + |
351 | + Build has been failed when trying to build the target package, |
352 | + The environment is working well, so mark the job as NEEDSBUILD again |
353 | + and 'clean' the builder to do another jobs. |
354 | + """ |
355 | + self.logger.warning("***** %s has failed *****" |
356 | + % queueItem.builder.name) |
357 | + self.failBuilder(queueItem.builder, |
358 | + ("Builder returned BUILDERFAIL when asked " |
359 | + "for its status")) |
360 | + # simply reset job |
361 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
362 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
363 | + queueItem.reset() |
364 | + |
365 | + def buildStatus_GIVENBACK(self, queueItem, librarian, buildid, |
366 | + filemap=None, dependencies=None): |
367 | + """Handle automatic retry requested by builder. |
368 | + |
369 | + GIVENBACK pseudo-state represents a request for automatic retry |
370 | + later, the build records is delayed by reducing the lastscore to |
371 | + ZERO. |
372 | + """ |
373 | + self.logger.warning("***** %s is GIVENBACK by %s *****" |
374 | + % (buildid, queueItem.builder.name)) |
375 | + build = getUtility(IBuildSet).getByQueueEntry(queueItem) |
376 | + self.storeBuildInfo(queueItem, librarian, buildid, dependencies) |
377 | + # XXX cprov 2006-05-30: Currently this information is not |
378 | + # properly presented in the Web UI. We will discuss it in |
379 | + # the next Paris Summit, infinity has some ideas about how |
380 | + # to use this content. For now we just ensure it's stored. |
381 | + queueItem.builder.cleanSlave() |
382 | + queueItem.reset() |
383 | + |
384 | + |
385 | +>>>>>>> MERGE-SOURCE |
This trivial change prevents the buildd master from falling over if a container directory was created but no log file uploaded. (bug 499115)