Merge lp:~spiv/launchpad/codebrowse-oops-84838 into lp:launchpad

Proposed by Andrew Bennetts
Status: Superseded
Proposed branch: lp:~spiv/launchpad/codebrowse-oops-84838
Merge into: lp:launchpad
Prerequisite: lp:~spiv/launchpad/loggerhead-logging
Diff against target: 210 lines (+148/-1)
4 files modified
configs/development/launchpad-lazr.conf (+3/-0)
lib/canonical/config/schema-lazr.conf (+9/-0)
lib/launchpad_loggerhead/app.py (+134/-0)
scripts/start-loggerhead.py (+2/-1)
To merge this branch: bzr merge lp:~spiv/launchpad/codebrowse-oops-84838
Reviewer Review Type Date Requested Status
Andrew Bennetts (community) Needs Resubmitting
Michael Hudson-Doyle Needs Fixing
Review via email: mp+31007@code.launchpad.net

This proposal has been superseded by a proposal from 2010-07-27.

Description of the change

This fixes a 5-digit bug number. :)

This fixes codebrowse to log tracebacks from failed requests as OOPSes, rather than dumping them in debug.log. This is a fairly basic implementation: it shows very elementary HTML to the user, and it doesn't capture any information in the OOPS beyond the traceback and the URL... but even those flaws are improvements over the status quo, and are clearly marked with XXXs.

The only outstanding issue I know of is finding out what the production config values for this should be, particularly the error_dir. And of course arranging for the production OOPSes to get synced and reported on like OOPS reports from other systems.

This is based on an already-approved (but as yet unmerged) branch that makes a more modest tweak to codebrowse's logging.

To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

1) Yay!
2) I think it would be better to have some of the state around 'really-called' etc in an object
3) Unless I'm misreading/misremembering, the wsgi protocol is that the application can return an _iterable_, not necessarily an _iterator_, so I think you need to say "app_iter = iter(app(...))".
4) The oops page should still be a "500 Internal Server Error" at the HTTP level.
5) Maybe the oops template rendering can be factored out?
6) Yay!, again

review: Needs Fixing
Revision history for this message
Andrew Bennetts (spiv) wrote :

Ok, I think I've addressed all your comments, plus a couple of nits you didn't notice :)

It still appears to work after the changes, so that's a plus ;)

Please re-review.

review: Needs Resubmitting

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'configs/development/launchpad-lazr.conf'
--- configs/development/launchpad-lazr.conf 2010-07-25 05:16:58 +0000
+++ configs/development/launchpad-lazr.conf 2010-07-27 07:29:59 +0000
@@ -52,6 +52,9 @@
52log_folder: /var/tmp/codebrowse.launchpad.dev/logs52log_folder: /var/tmp/codebrowse.launchpad.dev/logs
53launchpad_root: https://code.launchpad.dev/53launchpad_root: https://code.launchpad.dev/
54secret_path: configs/development/codebrowse-secret54secret_path: configs/development/codebrowse-secret
55error_dir: /var/tmp/codebrowse.launchpad.dev/errors
56oops_prefix: CB
57copy_to_zlog: false
5558
56[codehosting]59[codehosting]
57launch: True60launch: True
5861
=== modified file 'lib/canonical/config/schema-lazr.conf'
--- lib/canonical/config/schema-lazr.conf 2010-07-26 09:23:26 +0000
+++ lib/canonical/config/schema-lazr.conf 2010-07-27 07:29:59 +0000
@@ -283,6 +283,15 @@
283# datatype: string283# datatype: string
284secret_path:284secret_path:
285285
286# See [error_reports].
287copy_to_zlog: False
288
289# See [error_reports].
290error_dir: none
291
292# See [error_reports].
293oops_prefix: CB
294
286295
287[codehosting]296[codehosting]
288# datatype: string297# datatype: string
289298
=== modified file 'lib/launchpad_loggerhead/app.py'
--- lib/launchpad_loggerhead/app.py 2010-05-15 17:43:59 +0000
+++ lib/launchpad_loggerhead/app.py 2010-07-27 07:29:59 +0000
@@ -3,6 +3,7 @@
33
4import logging4import logging
5import os5import os
6import sys
6import threading7import threading
7import urllib8import urllib
8import urlparse9import urlparse
@@ -26,6 +27,8 @@
26from canonical.config import config27from canonical.config import config
27from canonical.launchpad.xmlrpc import faults28from canonical.launchpad.xmlrpc import faults
28from canonical.launchpad.webapp.vhosts import allvhosts29from canonical.launchpad.webapp.vhosts import allvhosts
30from canonical.launchpad.webapp.errorlog import (
31 ErrorReportingUtility, ScriptRequest)
29from lp.code.interfaces.codehosting import (32from lp.code.interfaces.codehosting import (
30 BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)33 BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
31from lp.codehosting.vfs import get_lp_server34from lp.codehosting.vfs import get_lp_server
@@ -227,3 +230,134 @@
227 bzr_branch.unlock()230 bzr_branch.unlock()
228 finally:231 finally:
229 lp_server.stop_server()232 lp_server.stop_server()
233
234
235def make_oops_logging_exception_hook(error_utility, request):
236 """Make a hook for logging OOPSes."""
237 def log_oops():
238 error_utility.raising(sys.exc_info(), request)
239 return log_oops
240
241
242def make_error_utility():
243 """Make an error utility for logging errors from codebrowse."""
244 error_utility = ErrorReportingUtility()
245 error_utility.configure('codebrowse')
246 return error_utility
247
248
249# XXX: This HTML template should be replaced with the same one that lpnet uses
250# for reporting OOPSes to users, or at least something that looks similar. But
251# even this is better than the "Internal Server Error" you'd get otherwise.
252# - Andrew Bennetts, 2010-07-27.
253_oops_html_template = '''\
254<html>
255<head><title>Oops! %(oopsid)s</title></head>
256<body>
257<h1>Oops!</h1>
258<p>Something broke while generating the page.
259Please try again in a few minutes, and if the problem persists file a bug at
260<a href="https://bugs.launchpad.net/launchpad-code"
261>https://bugs.launchpad.net/launchpad-code</a>
262and quote OOPS-ID <strong>%(oopsid)s</strong>
263</p></body></html>'''
264
265
266_error_status = '500 Internal Server Error'
267_error_headers = [('Content-Type', 'text/html')]
268
269
270class WrappedStartResponse(object):
271 """Wraps start_response (from a WSGI request) to keep track of whether
272 start_response was called (and whether the callable it returns has been
273 called).
274
275 Used by oops_middleware.
276 """
277
278 def __init__(self, start_response):
279 self._real_start_response = start_response
280 self.response_start = None
281 self._write_callable = None
282
283 @property
284 def body_started(self):
285 return self._write_callable is not None
286
287 def start_response(self, status, headers, exc_info=None):
288 # Just keep a note of the values we were called with for now. We don't
289 # need to invoke the real start_response until the response body
290 # starts.
291 self.response_start = (status, headers)
292 if exc_info is not None:
293 self.response_start += (exc_info,)
294 return self.write_wrapper
295
296 def really_start(self):
297 self._write_callable = self._real_start_response(*self.response_start)
298
299 def write_wrapper(self, data):
300 if self._write_callable is None:
301 self.really_start()
302 self._write_callable(data)
303
304 def generate_oops(self, environ, error_utility):
305 """Generate an OOPS.
306
307 :returns: True if the error page was sent to the user, and False if it
308 couldn't (i.e. if the response body was already started).
309 """
310 oopsid = report_oops(environ, error_utility)
311 if self.body_started:
312 return False
313 write = self._real_start_response(_error_status, _error_headers)
314 write(_oops_html_template % {'oopsid': oopsid})
315 return True
316
317
318def report_oops(environ, error_utility):
319 # XXX: We should capture more per-request information to include in
320 # the OOPS here, e.g. duration, user, etc. But even an OOPS with
321 # just a traceback and URL is better than nothing.
322 # - Andrew Bennetts, 2010-07-27.
323 request = ScriptRequest(
324 [], URL=construct_url(environ))
325 error_utility.raising(sys.exc_info(), request)
326 return request.oopsid
327
328
329def oops_middleware(app):
330 """Middleware to log an OOPS if the request fails.
331
332 If the request fails before the response body has started then this returns
333 a basic HTML error page with the OOPS ID to the user (and status code 500).
334 """
335 error_utility = make_error_utility()
336 def wrapped_app(environ, start_response):
337 wrapped = WrappedStartResponse(start_response)
338 # Start processing this request
339 try:
340 app_iter = iter(app(environ, wrapped.start_response))
341 except:
342 error_page_sent = wrapped.generate_oops(environ, error_utility)
343 if error_page_sent:
344 return
345 # Could not send error page to user, so... just give up.
346 raise
347 # Start yielding the response
348 while True:
349 try:
350 data = app_iter.next()
351 except StopIteration:
352 return
353 except:
354 error_page_sent = wrapped.generate_oops(environ, error_utility)
355 if error_page_sent:
356 return
357 # Could not send error page to user, so... just give up.
358 raise
359 else:
360 if not wrapped.body_started:
361 wrapped.really_start()
362 yield data
363 return wrapped_app
230364
=== modified file 'scripts/start-loggerhead.py'
--- scripts/start-loggerhead.py 2010-06-08 15:43:13 +0000
+++ scripts/start-loggerhead.py 2010-07-27 07:29:59 +0000
@@ -119,7 +119,7 @@
119119
120from launchpad_loggerhead.debug import (120from launchpad_loggerhead.debug import (
121 change_kill_thread_criteria, threadpool_debug)121 change_kill_thread_criteria, threadpool_debug)
122from launchpad_loggerhead.app import RootApp122from launchpad_loggerhead.app import RootApp, oops_middleware
123from launchpad_loggerhead.session import SessionHandler123from launchpad_loggerhead.session import SessionHandler
124124
125SESSION_VAR = 'lh.session'125SESSION_VAR = 'lh.session'
@@ -153,6 +153,7 @@
153 return wrapped153 return wrapped
154app = set_scheme(app)154app = set_scheme(app)
155app = change_kill_thread_criteria(app)155app = change_kill_thread_criteria(app)
156app = oops_middleware(app)
156157
157try:158try:
158 httpserver.serve(159 httpserver.serve(