Merge lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead into lp:launchpad
- no-hosted-area-include-launchpad-loggerhead
- Merge into devel
Proposed by
Michael Hudson-Doyle
Status: | Merged |
---|---|
Approved by: | Tim Penhey |
Approved revision: | no longer in the source branch. |
Merged at revision: | 10828 |
Proposed branch: | lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead |
Merge into: | lp:launchpad |
Prerequisite: | lp:~mwhudson/launchpad/no-hosted-area-server-catchup |
Diff against target: |
709 lines (+641/-4) 9 files modified
Makefile (+3/-3) lib/launchpad_loggerhead/__init__.py (+1/-0) lib/launchpad_loggerhead/app.py (+232/-0) lib/launchpad_loggerhead/debug.py (+120/-0) lib/launchpad_loggerhead/session.py (+73/-0) lib/launchpad_loggerhead/static/robots.txt (+2/-0) scripts/start-loggerhead.py (+177/-0) scripts/stop-loggerhead.py (+33/-0) utilities/sourcedeps.conf (+0/-1) |
To merge this branch: | bzr merge lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Tim Penhey (community) | Approve | ||
Review via email: mp+24193@code.launchpad.net |
Commit message
Description of the change
Hi Tim,
This branch replaces https:/
Cheers,
mwh
To post a comment you must log in.
Revision history for this message
Tim Penhey (thumper) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'Makefile' |
2 | --- Makefile 2010-04-27 03:45:51 +0000 |
3 | +++ Makefile 2010-04-27 03:46:17 +0000 |
4 | @@ -229,13 +229,13 @@ |
5 | -i $(LPCONFIG) |
6 | |
7 | run_codebrowse: build |
8 | - BZR_PLUGIN_PATH=bzrplugins $(PY) sourcecode/launchpad-loggerhead/start-loggerhead.py -f |
9 | + BZR_PLUGIN_PATH=bzrplugins $(PY) scripts/start-loggerhead.py -f |
10 | |
11 | start_codebrowse: build |
12 | - BZR_PLUGIN_PATH=$(shell pwd)/bzrplugins $(PY) sourcecode/launchpad-loggerhead/start-loggerhead.py |
13 | + BZR_PLUGIN_PATH=$(shell pwd)/bzrplugins $(PY) scripts/start-loggerhead.py |
14 | |
15 | stop_codebrowse: |
16 | - $(PY) sourcecode/launchpad-loggerhead/stop-loggerhead.py |
17 | + $(PY) scripts/stop-loggerhead.py |
18 | |
19 | run_codehosting: check_schema inplace stop hosted_branches |
20 | $(RM) thread*.request |
21 | |
22 | === added directory 'lib/launchpad_loggerhead' |
23 | === removed symlink 'lib/launchpad_loggerhead' |
24 | === target was u'../sourcecode/launchpad-loggerhead/launchpad_loggerhead/' |
25 | === added file 'lib/launchpad_loggerhead/__init__.py' |
26 | --- lib/launchpad_loggerhead/__init__.py 1970-01-01 00:00:00 +0000 |
27 | +++ lib/launchpad_loggerhead/__init__.py 2010-04-27 03:46:17 +0000 |
28 | @@ -0,0 +1,1 @@ |
29 | + |
30 | |
31 | === added file 'lib/launchpad_loggerhead/app.py' |
32 | --- lib/launchpad_loggerhead/app.py 1970-01-01 00:00:00 +0000 |
33 | +++ lib/launchpad_loggerhead/app.py 2010-04-27 03:46:17 +0000 |
34 | @@ -0,0 +1,232 @@ |
35 | +# Copyright 2009 Canonical Ltd. This software is licensed under the |
36 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
37 | + |
38 | +import logging |
39 | +import re |
40 | +import os |
41 | +import threading |
42 | +import urllib |
43 | +import urlparse |
44 | +import xmlrpclib |
45 | + |
46 | +from bzrlib import branch, errors, lru_cache, urlutils |
47 | + |
48 | +from loggerhead.apps import favicon_app, static_app |
49 | +from loggerhead.apps.branch import BranchWSGIApp |
50 | + |
51 | +from openid.extensions.sreg import SRegRequest, SRegResponse |
52 | +from openid.consumer.consumer import CANCEL, Consumer, FAILURE, SUCCESS |
53 | +from openid.store.memstore import MemoryStore |
54 | + |
55 | +from paste.fileapp import DataApp |
56 | +from paste.request import construct_url, parse_querystring, path_info_pop |
57 | +from paste.httpexceptions import ( |
58 | + HTTPMovedPermanently, HTTPNotFound, HTTPUnauthorized) |
59 | + |
60 | +from canonical.config import config |
61 | +from canonical.launchpad.xmlrpc import faults |
62 | +from lp.code.interfaces.codehosting import ( |
63 | + BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS, LAUNCHPAD_SERVICES) |
64 | +from lp.codehosting.vfs import branch_id_to_path |
65 | + |
66 | +robots_txt = '''\ |
67 | +User-agent: * |
68 | +Disallow: / |
69 | +''' |
70 | + |
71 | +robots_app = DataApp(robots_txt, content_type='text/plain') |
72 | + |
73 | + |
74 | +thread_transports = threading.local() |
75 | + |
76 | +def valid_launchpad_name(s): |
77 | + return re.match('^[a-z0-9][a-z0-9\+\.\-]*$', s) is not None |
78 | + |
79 | + |
80 | +def valid_launchpad_user_name(s): |
81 | + return re.match('^~[a-z0-9][a-z0-9\+\.\-]*$', s) is not None |
82 | + |
83 | + |
84 | +def valid_launchpad_branch_name(s): |
85 | + return re.match(r'^(?i)[a-z0-9][a-z0-9+\.\-@_]*\Z', s) is not None |
86 | + |
87 | + |
88 | +class RootApp: |
89 | + |
90 | + def __init__(self, session_var): |
91 | + self.graph_cache = lru_cache.LRUCache(10) |
92 | + self.branchfs = xmlrpclib.ServerProxy( |
93 | + config.codehosting.branchfs_endpoint) |
94 | + self.session_var = session_var |
95 | + self.store = MemoryStore() |
96 | + self.log = logging.getLogger('lp-loggerhead') |
97 | + branch.Branch.hooks.install_named_hook( |
98 | + 'transform_fallback_location', |
99 | + self._transform_fallback_location_hook, |
100 | + 'RootApp._transform_fallback_location_hook') |
101 | + |
102 | + def _transform_fallback_location_hook(self, branch, url): |
103 | + """Transform a human-readable fallback URL into and id-based one. |
104 | + |
105 | + Branches on Launchpad record their stacked-on URLs in the form |
106 | + '/~user/product/branch', but we need to access branches based on |
107 | + database ID to gain access to private branches. So we use this hook |
108 | + into Bazaar's branch-opening process to translate the former to the |
109 | + latter. |
110 | + """ |
111 | + # It might seem that using the LAUNCHPAD_SERVICES 'user', which allows |
112 | + # access to all branches, here would be a security risk. But in fact |
113 | + # it isn't, because a user will only have launchpad.View on the |
114 | + # stacked branch if they have it for all the stacked-on branches. |
115 | + # (It would be nice to use the user from the request, but that's far |
116 | + # from simple because branch hooks are global per-process and we |
117 | + # handle different requests in different threads). |
118 | + transport_type, info, trail = self.branchfs.translatePath( |
119 | + LAUNCHPAD_SERVICES, url) |
120 | + return urlparse.urljoin( |
121 | + config.codehosting.internal_branch_by_id_root, |
122 | + branch_id_to_path(info['id'])) |
123 | + |
124 | + def get_transports(self): |
125 | + t = getattr(thread_transports, 'transports', None) |
126 | + if t is None: |
127 | + thread_transports.transports = [] |
128 | + return thread_transports.transports |
129 | + |
130 | + def _make_consumer(self, environ): |
131 | + """Build an OpenID `Consumer` object with standard arguments.""" |
132 | + return Consumer(environ[self.session_var], self.store) |
133 | + |
134 | + def _begin_login(self, environ, start_response): |
135 | + """Start the process of authenticating with OpenID. |
136 | + |
137 | + We redirect the user to Launchpad to identify themselves, asking to be |
138 | + sent their nickname. Launchpad will then redirect them to our +login |
139 | + page with enough information that we can then redirect them again to |
140 | + the page they were looking at, with a cookie that gives us the |
141 | + username. |
142 | + """ |
143 | + openid_request = self._make_consumer(environ).begin( |
144 | + 'https://' + config.vhost.openid.hostname) |
145 | + openid_request.addExtension( |
146 | + SRegRequest(required=['nickname'])) |
147 | + back_to = construct_url(environ) |
148 | + raise HTTPMovedPermanently(openid_request.redirectURL( |
149 | + config.codehosting.secure_codebrowse_root, |
150 | + config.codehosting.secure_codebrowse_root + '+login/?' |
151 | + + urllib.urlencode({'back_to':back_to}))) |
152 | + |
153 | + def _complete_login(self, environ, start_response): |
154 | + """Complete the OpenID authentication process. |
155 | + |
156 | + Here we handle the result of the OpenID process. If the process |
157 | + succeeded, we record the username in the session and redirect the user |
158 | + to the page they were trying to view that triggered the login attempt. |
159 | + In the various failures cases we return a 401 Unauthorized response |
160 | + with a brief explanation of what went wrong. |
161 | + """ |
162 | + query = dict(parse_querystring(environ)) |
163 | + # Passing query['openid.return_to'] here is massive cheating, but |
164 | + # given we control the endpoint who cares. |
165 | + response = self._make_consumer(environ).complete( |
166 | + query, query['openid.return_to']) |
167 | + if response.status == SUCCESS: |
168 | + self.log.error('open id response: SUCCESS') |
169 | + sreg_info = SRegResponse.fromSuccessResponse(response) |
170 | + environ[self.session_var]['user'] = sreg_info['nickname'] |
171 | + raise HTTPMovedPermanently(query['back_to']) |
172 | + elif response.status == FAILURE: |
173 | + self.log.error('open id response: FAILURE: %s', response.message) |
174 | + exc = HTTPUnauthorized() |
175 | + exc.explanation = response.message |
176 | + raise exc |
177 | + elif response.status == CANCEL: |
178 | + self.log.error('open id response: CANCEL') |
179 | + exc = HTTPUnauthorized() |
180 | + exc.explanation = "Authetication cancelled." |
181 | + raise exc |
182 | + else: |
183 | + self.log.error('open id response: UNKNOWN') |
184 | + exc = HTTPUnauthorized() |
185 | + exc.explanation = "Unknown OpenID response." |
186 | + raise exc |
187 | + |
188 | + def __call__(self, environ, start_response): |
189 | + environ['loggerhead.static.url'] = environ['SCRIPT_NAME'] |
190 | + if environ['PATH_INFO'].startswith('/static/'): |
191 | + path_info_pop(environ) |
192 | + return static_app(environ, start_response) |
193 | + elif environ['PATH_INFO'] == '/favicon.ico': |
194 | + return favicon_app(environ, start_response) |
195 | + elif environ['PATH_INFO'] == '/robots.txt': |
196 | + return robots_app(environ, start_response) |
197 | + elif environ['PATH_INFO'].startswith('/+login'): |
198 | + return self._complete_login(environ, start_response) |
199 | + path = environ['PATH_INFO'] |
200 | + trailingSlashCount = len(path) - len(path.rstrip('/')) |
201 | + user = environ[self.session_var].get('user', LAUNCHPAD_ANONYMOUS) |
202 | + try: |
203 | + transport_type, info, trail = self.branchfs.translatePath( |
204 | + user, urlutils.escape(path)) |
205 | + except xmlrpclib.Fault, f: |
206 | + if faults.check_fault(f, faults.PathTranslationError): |
207 | + raise HTTPNotFound() |
208 | + elif faults.check_fault(f, faults.PermissionDenied): |
209 | + # If we're not allowed to see the branch... |
210 | + if environ['wsgi.url_scheme'] != 'https': |
211 | + # ... the request shouldn't have come in over http, as |
212 | + # requests for private branches over http should be |
213 | + # redirected to https by the dynamic rewrite script we use |
214 | + # (which runs before this code is reached), but just in |
215 | + # case... |
216 | + env_copy = environ.copy() |
217 | + env_copy['wsgi.url_scheme'] = 'https' |
218 | + raise HTTPMovedPermanently(construct_url(env_copy)) |
219 | + elif user != LAUNCHPAD_ANONYMOUS: |
220 | + # ... if the user is already logged in and still can't see |
221 | + # the branch, they lose. |
222 | + exc = HTTPUnauthorized() |
223 | + exc.explanation = "You are logged in as %s." % user |
224 | + raise exc |
225 | + else: |
226 | + # ... otherwise, lets give them a chance to log in with |
227 | + # OpenID. |
228 | + return self._begin_login(environ, start_response) |
229 | + else: |
230 | + raise |
231 | + if transport_type != BRANCH_TRANSPORT: |
232 | + raise HTTPNotFound() |
233 | + trail = urlutils.unescape(trail).encode('utf-8') |
234 | + trail += trailingSlashCount * '/' |
235 | + amount_consumed = len(path) - len(trail) |
236 | + consumed = path[:amount_consumed] |
237 | + branch_name = consumed.strip('/') |
238 | + self.log.info('Using branch: %s', branch_name) |
239 | + if trail and not trail.startswith('/'): |
240 | + trail = '/' + trail |
241 | + environ['PATH_INFO'] = trail |
242 | + environ['SCRIPT_NAME'] += consumed.rstrip('/') |
243 | + branch_url = urlparse.urljoin( |
244 | + config.codehosting.internal_branch_by_id_root, |
245 | + branch_id_to_path(info['id'])) |
246 | + branch_link = urlparse.urljoin( |
247 | + config.codebrowse.launchpad_root, branch_name) |
248 | + cachepath = os.path.join( |
249 | + config.codebrowse.cachepath, branch_name[1:]) |
250 | + if not os.path.isdir(cachepath): |
251 | + os.makedirs(cachepath) |
252 | + self.log.info('branch_url: %s', branch_url) |
253 | + try: |
254 | + bzr_branch = branch.Branch.open( |
255 | + branch_url, possible_transports=self.get_transports()) |
256 | + except errors.NotBranchError, err: |
257 | + self.log.warning('Not a branch: %s', err) |
258 | + raise HTTPNotFound() |
259 | + bzr_branch.lock_read() |
260 | + try: |
261 | + view = BranchWSGIApp( |
262 | + bzr_branch, branch_name, {'cachepath': cachepath}, |
263 | + self.graph_cache, branch_link=branch_link, served_url=None) |
264 | + return view.app(environ, start_response) |
265 | + finally: |
266 | + bzr_branch.unlock() |
267 | |
268 | === added file 'lib/launchpad_loggerhead/debug.py' |
269 | --- lib/launchpad_loggerhead/debug.py 1970-01-01 00:00:00 +0000 |
270 | +++ lib/launchpad_loggerhead/debug.py 2010-04-27 03:46:17 +0000 |
271 | @@ -0,0 +1,120 @@ |
272 | +# Copyright 2009 Canonical Ltd. This software is licensed under the |
273 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
274 | + |
275 | +import thread |
276 | +import time |
277 | + |
278 | +from paste.request import construct_url |
279 | + |
280 | + |
281 | +def tabulate(cells): |
282 | + """Format a list of lists of strings in a table. |
283 | + |
284 | + The 'cells' are centered. |
285 | + |
286 | + >>> print ''.join(tabulate( |
287 | + ... [['title 1', 'title 2'], |
288 | + ... ['short', 'rather longer']])) |
289 | + title 1 title 2 |
290 | + short rather longer |
291 | + """ |
292 | + widths = {} |
293 | + for row in cells: |
294 | + for col_index, cell in enumerate(row): |
295 | + widths[col_index] = max(len(cell), widths.get(col_index, 0)) |
296 | + result = [] |
297 | + for row in cells: |
298 | + result_row = '' |
299 | + for col_index, cell in enumerate(row): |
300 | + result_row += cell.center(widths[col_index] + 2) |
301 | + result.append(result_row.rstrip() + '\n') |
302 | + return result |
303 | + |
304 | + |
305 | +def threadpool_debug(app): |
306 | + """Wrap `app` to provide debugging information about the threadpool state. |
307 | + |
308 | + The returned application will serve debugging information about the state |
309 | + of the threadpool at '/thread-debug' -- but only when accessed directly, |
310 | + not when accessed through Apache. |
311 | + """ |
312 | + def wrapped(environ, start_response): |
313 | + if ('HTTP_X_FORWARDED_SERVER' in environ |
314 | + or environ['PATH_INFO'] != '/thread-debug'): |
315 | + environ['lp.timestarted'] = time.time() |
316 | + return app(environ, start_response) |
317 | + threadpool = environ['paste.httpserver.thread_pool'] |
318 | + start_response("200 Ok", []) |
319 | + output = [("url", "time running", "time since last activity")] |
320 | + now = time.time() |
321 | + # Because we're accessing mutable structures without locks here, |
322 | + # we're a bit cautious about things looking like we expect -- if a |
323 | + # worker doesn't seem fully set up, we just ignore it. |
324 | + for worker in threadpool.workers: |
325 | + if not hasattr(worker, 'thread_id'): |
326 | + continue |
327 | + time_started, info = threadpool.worker_tracker.get( |
328 | + worker.thread_id, (None, None)) |
329 | + if time_started is not None and info is not None: |
330 | + real_time_started = info.get( |
331 | + 'lp.timestarted', time_started) |
332 | + output.append( |
333 | + map(str, |
334 | + (construct_url(info), |
335 | + now - real_time_started, |
336 | + now - time_started,))) |
337 | + return tabulate(output) |
338 | + return wrapped |
339 | + |
340 | + |
341 | +def change_kill_thread_criteria(application): |
342 | + """Interfere with threadpool so that threads are killed for inactivity. |
343 | + |
344 | + The usual rules with paste's threadpool is that a thread that takes longer |
345 | + than 'hung_thread_limit' seconds to process a request is considered hung |
346 | + and more than 'kill_thread_limit' seconds is killed. |
347 | + |
348 | + Because loggerhead streams its output, how long the entire request takes |
349 | + to process depends on things like how fast the users internet connection |
350 | + is. What we'd like to do is kill threads that don't _start_ to produce |
351 | + output for 'kill_thread_limit' seconds. |
352 | + |
353 | + What this class actually does is arrange things so that threads that |
354 | + produce no output for 'kill_thread_limit' are killed, because that's the |
355 | + rule Apache uses when interpreting ProxyTimeout. |
356 | + """ |
357 | + def wrapped_application(environ, start_response): |
358 | + threadpool = environ['paste.httpserver.thread_pool'] |
359 | + def reset_timer(): |
360 | + """Make this thread safe for another 'kill_thread_limit' seconds. |
361 | + |
362 | + We do this by hacking the threadpool's record of when this thread |
363 | + started to pretend that it started right now. Hacky, but it's |
364 | + enough to fool paste.httpserver.ThreadPool.kill_hung_threads and |
365 | + that's what matters. |
366 | + """ |
367 | + threadpool.worker_tracker[thread.get_ident()][0] = time.time() |
368 | + def response_hook(status, response_headers, exc_info=None): |
369 | + # We reset the timer when the HTTP headers are sent... |
370 | + reset_timer() |
371 | + writer = start_response(status, response_headers, exc_info) |
372 | + def wrapped_writer(arg): |
373 | + # ... and whenever more output has been generated. |
374 | + reset_timer() |
375 | + return writer(arg) |
376 | + return wrapped_writer |
377 | + result = application(environ, response_hook) |
378 | + # WSGI allows the application to return an iterable, which could be a |
379 | + # generator that does significant processing between successive items, |
380 | + # so we should reset the timer between each item. |
381 | + # |
382 | + # This isn't really necessary as loggerhead doesn't return any |
383 | + # non-trivial iterables to the WSGI server. But it's probably better |
384 | + # to cope with this case to avoid nasty suprises if loggerhead |
385 | + # changes. |
386 | + def reset_timer_between_items(iterable): |
387 | + for item in iterable: |
388 | + reset_timer() |
389 | + yield item |
390 | + return reset_timer_between_items(result) |
391 | + return wrapped_application |
392 | |
393 | === added file 'lib/launchpad_loggerhead/session.py' |
394 | --- lib/launchpad_loggerhead/session.py 1970-01-01 00:00:00 +0000 |
395 | +++ lib/launchpad_loggerhead/session.py 2010-04-27 03:46:17 +0000 |
396 | @@ -0,0 +1,73 @@ |
397 | +# Copyright 2009 Canonical Ltd. This software is licensed under the |
398 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
399 | + |
400 | +"""Simple paste-y session manager tuned for the needs of launchpad-loggerhead. |
401 | +""" |
402 | + |
403 | +import pickle |
404 | + |
405 | +from paste.auth.cookie import AuthCookieHandler, AuthCookieSigner |
406 | + |
407 | + |
408 | +class MyAuthCookieSigner(AuthCookieSigner): |
409 | + """Fix a bug in AuthCookieSigner.""" |
410 | + |
411 | + def sign(self, content): |
412 | + # XXX 2008-01-13 Michael Hudson: paste.auth.cookie generates bogus |
413 | + # cookies when the value is long: |
414 | + # http://trac.pythonpaste.org/pythonpaste/ticket/257. This is fixed |
415 | + # now, so when a new version is released and packaged we can remove |
416 | + # this class. |
417 | + r = AuthCookieSigner.sign(self, content) |
418 | + return r.replace('\n', '') |
419 | + |
420 | + |
421 | +class SessionHandler(object): |
422 | + """Middleware that provides a cookie-based session. |
423 | + |
424 | + The session dict is stored, pickled (and HMACed), in a cookie, so don't |
425 | + store very much in the session! |
426 | + """ |
427 | + |
428 | + def __init__(self, application, session_var, secret=None): |
429 | + """Initialize a SessionHandler instance. |
430 | + |
431 | + :param application: This is the wrapped application which will have |
432 | + access to the ``environ[session_var]`` dictionary managed by this |
433 | + middleware. |
434 | + :param session_var: The key under which to store the session |
435 | + dictionary in the environment. |
436 | + :param secret: A secret value used for signing the cookie. If not |
437 | + supplied, a new secret will be used for each instantiation of the |
438 | + SessionHandler. |
439 | + """ |
440 | + self.application = application |
441 | + self.cookie_handler = AuthCookieHandler( |
442 | + self._process, scanlist=[session_var], |
443 | + signer=MyAuthCookieSigner(secret)) |
444 | + self.session_var = session_var |
445 | + |
446 | + def __call__(self, environ, start_response): |
447 | + # We need to put the request through the cookie handler first, so we |
448 | + # can access the validated string in the environ in `_process` below. |
449 | + return self.cookie_handler(environ, start_response) |
450 | + |
451 | + def _process(self, environ, start_response): |
452 | + """Process a request. |
453 | + |
454 | + AuthCookieHandler takes care of getting the text value of the session |
455 | + in and out of the cookie (and validating the text using HMAC) so we |
456 | + just need to convert that string to and from a real dictionary using |
457 | + pickle. |
458 | + """ |
459 | + if self.session_var in environ: |
460 | + session = pickle.loads(environ[self.session_var]) |
461 | + else: |
462 | + session = {} |
463 | + environ[self.session_var] = session |
464 | + def response_hook(status, response_headers, exc_info=None): |
465 | + session = environ.pop(self.session_var) |
466 | + if session: |
467 | + environ[self.session_var] = pickle.dumps(session) |
468 | + return start_response(status, response_headers, exc_info) |
469 | + return self.application(environ, response_hook) |
470 | |
471 | === added directory 'lib/launchpad_loggerhead/static' |
472 | === added file 'lib/launchpad_loggerhead/static/robots.txt' |
473 | --- lib/launchpad_loggerhead/static/robots.txt 1970-01-01 00:00:00 +0000 |
474 | +++ lib/launchpad_loggerhead/static/robots.txt 2010-04-27 03:46:17 +0000 |
475 | @@ -0,0 +1,2 @@ |
476 | +User-agent: * |
477 | +Disallow: / |
478 | |
479 | === added file 'scripts/start-loggerhead.py' |
480 | --- scripts/start-loggerhead.py 1970-01-01 00:00:00 +0000 |
481 | +++ scripts/start-loggerhead.py 2010-04-27 03:46:17 +0000 |
482 | @@ -0,0 +1,177 @@ |
483 | +#!/usr/bin/python2.5 -S |
484 | +# |
485 | +# Copyright 2009, 2010 Canonical Ltd. This software is licensed under the |
486 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
487 | + |
488 | +import _pythonpath |
489 | + |
490 | +import logging |
491 | +import os |
492 | +import sys |
493 | + |
494 | +from paste import httpserver |
495 | +from paste.deploy.config import PrefixMiddleware |
496 | +from paste.httpexceptions import HTTPExceptionHandler |
497 | +from paste.request import construct_url |
498 | +from paste.translogger import TransLogger |
499 | + |
500 | +from canonical.config import config |
501 | +import lp.codehosting |
502 | + |
503 | +LISTEN_HOST = '0.0.0.0' |
504 | +LISTEN_PORT = 8080 |
505 | +THREADPOOL_WORKERS = 10 |
506 | + |
507 | + |
508 | +class NoLockingFileHandler(logging.FileHandler): |
509 | + """A version of logging.FileHandler that doesn't do it's own locking. |
510 | + |
511 | + We experienced occasional hangs in production where gdb-ery on the server |
512 | + revealed that we sometimes end up with many threads blocking on the RLock |
513 | + held by the logging file handler, and log reading finds that an exception |
514 | + managed to kill a thread in an unsafe window for RLock's. |
515 | + |
516 | + Luckily, there's no real reason for us to take a lock during logging as |
517 | + each log message translates to one call to .write on a file object, which |
518 | + translates to one fwrite call, and it seems that this does enough locking |
519 | + itself for our purposes. |
520 | + |
521 | + So this handler just doesn't lock in log message handling. |
522 | + """ |
523 | + |
524 | + def acquire(self): |
525 | + pass |
526 | + |
527 | + def release(self): |
528 | + pass |
529 | + |
530 | + |
531 | +def setup_logging(home, foreground): |
532 | + # i hate that stupid logging config format, so just set up logging here. |
533 | + |
534 | + log_folder = config.codebrowse.log_folder |
535 | + if not log_folder: |
536 | + log_folder = os.path.join(home, 'logs') |
537 | + if not os.path.exists(log_folder): |
538 | + os.mkdir(log_folder) |
539 | + |
540 | + f = logging.Formatter( |
541 | + '%(levelname)-.3s [%(asctime)s.%(msecs)03d] [%(thread)d] %(name)s: %(message)s', |
542 | + '%Y%m%d-%H:%M:%S') |
543 | + debug_log = NoLockingFileHandler(os.path.join(log_folder, 'debug.log')) |
544 | + debug_log.setLevel(logging.DEBUG) |
545 | + debug_log.setFormatter(f) |
546 | + if foreground: |
547 | + stdout_log = logging.StreamHandler(sys.stdout) |
548 | + stdout_log.setLevel(logging.DEBUG) |
549 | + stdout_log.setFormatter(f) |
550 | + f = logging.Formatter('[%(asctime)s.%(msecs)03d] %(message)s', |
551 | + '%Y%m%d-%H:%M:%S') |
552 | + access_log = NoLockingFileHandler(os.path.join(log_folder, 'access.log')) |
553 | + access_log.setLevel(logging.INFO) |
554 | + access_log.setFormatter(f) |
555 | + |
556 | + logging.getLogger('').setLevel(logging.DEBUG) |
557 | + logging.getLogger('').addHandler(debug_log) |
558 | + logging.getLogger('wsgi').addHandler(access_log) |
559 | + |
560 | + if foreground: |
561 | + logging.getLogger('').addHandler(stdout_log) |
562 | + else: |
563 | + class S(object): |
564 | + def write(self, str): |
565 | + logging.getLogger().error(str.rstrip('\n')) |
566 | + def flush(self): |
567 | + pass |
568 | + sys.stderr = S() |
569 | + |
570 | + |
571 | + |
572 | +foreground = False |
573 | +if len(sys.argv) > 1: |
574 | + if sys.argv[1] == '-f': |
575 | + foreground = True |
576 | + |
577 | +home = os.path.realpath(os.path.dirname(__file__)) |
578 | +pidfile = os.path.join(home, 'loggerhead.pid') |
579 | + |
580 | +if not foreground: |
581 | + sys.stderr.write('\n') |
582 | + sys.stderr.write('Launching loggerhead into the background.\n') |
583 | + sys.stderr.write('PID file: %s\n' % (pidfile,)) |
584 | + sys.stderr.write('\n') |
585 | + |
586 | + from loggerhead.daemon import daemonize |
587 | + daemonize(pidfile, home) |
588 | + |
589 | +setup_logging(home, foreground=foreground) |
590 | + |
591 | +log = logging.getLogger('loggerhead') |
592 | +log.info('Starting up...') |
593 | + |
594 | +log.info('Loading the bzr plugins...') |
595 | +from bzrlib.plugin import load_plugins |
596 | +load_plugins() |
597 | + |
598 | +import bzrlib.plugins |
599 | +if getattr(bzrlib.plugins, 'loom', None) is None: |
600 | + log.error('Loom plugin loading failed.') |
601 | + |
602 | +from launchpad_loggerhead.debug import ( |
603 | + change_kill_thread_criteria, threadpool_debug) |
604 | +from launchpad_loggerhead.app import RootApp |
605 | +from launchpad_loggerhead.session import SessionHandler |
606 | + |
607 | +SESSION_VAR = 'lh.session' |
608 | + |
609 | +secret = open(os.path.join(config.root, config.codebrowse.secret_path)).read() |
610 | + |
611 | +app = RootApp(SESSION_VAR) |
612 | +app = HTTPExceptionHandler(app) |
613 | +app = SessionHandler(app, SESSION_VAR, secret) |
614 | +def log_on_request_start(app): |
615 | + def wrapped(environ, start_response): |
616 | + log = logging.getLogger('loggerhead') |
617 | + log.info("Starting to process %s", construct_url(environ)) |
618 | + return app(environ, start_response) |
619 | + return wrapped |
620 | +app = log_on_request_start(app) |
621 | +app = PrefixMiddleware(app) |
622 | +app = TransLogger(app) |
623 | +app = threadpool_debug(app) |
624 | + |
625 | +def set_scheme(app): |
626 | + """Set wsgi.url_scheme in the environment correctly. |
627 | + |
628 | + We serve requests that originated from both http and https, and |
629 | + distinguish between them by adding a header in the https Apache config. |
630 | + """ |
631 | + def wrapped(environ, start_response): |
632 | + environ['wsgi.url_scheme'] = environ.pop( |
633 | + 'HTTP_X_FORWARDED_SCHEME', 'http') |
634 | + return app(environ, start_response) |
635 | + return wrapped |
636 | +app = set_scheme(app) |
637 | +app = change_kill_thread_criteria(app) |
638 | + |
639 | +try: |
640 | + httpserver.serve( |
641 | + app, host=LISTEN_HOST, port=LISTEN_PORT, |
642 | + threadpool_workers=THREADPOOL_WORKERS, |
643 | + threadpool_options={ |
644 | + # Kill threads after 300 seconds. This is insanely high, but |
645 | + # lower enough than the default (1800 seconds!) that evidence |
646 | + # suggests it will be hit occasionally, and there's very little |
647 | + # chance of it having negative consequences. |
648 | + 'kill_thread_limit': 300, |
649 | + # Check for threads that should be killed every 10 requests. The |
650 | + # default is every 100, which is easily long enough for things to |
651 | + # gum up completely in between checks. |
652 | + 'hung_check_period': 10, |
653 | + }) |
654 | +finally: |
655 | + log.info('Shutdown.') |
656 | + try: |
657 | + os.remove(pidfile) |
658 | + except OSError: |
659 | + pass |
660 | |
661 | === added file 'scripts/stop-loggerhead.py' |
662 | --- scripts/stop-loggerhead.py 1970-01-01 00:00:00 +0000 |
663 | +++ scripts/stop-loggerhead.py 2010-04-27 03:46:17 +0000 |
664 | @@ -0,0 +1,33 @@ |
665 | +#!/usr/bin/python2.5 -S |
666 | +# |
667 | +# Copyright 2009, 2010 Canonical Ltd. This software is licensed under the |
668 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
669 | + |
670 | +import _pythonpath |
671 | + |
672 | +import os |
673 | +import sys |
674 | + |
675 | +home = os.path.realpath(os.path.dirname(__file__)) |
676 | +pidfile = os.path.join(home, 'loggerhead.pid') |
677 | + |
678 | +try: |
679 | + f = open(pidfile, 'r') |
680 | +except IOError, e: |
681 | + print 'No pid file found.' |
682 | + sys.exit(1) |
683 | + |
684 | +pid = int(f.readline()) |
685 | + |
686 | +try: |
687 | + os.kill(pid, 0) |
688 | +except OSError, e: |
689 | + print 'Stale pid file; server is not running.' |
690 | + sys.exit(1) |
691 | + |
692 | |
693 | +print 'Shutting down previous server @ pid %d.' % (pid,) |
694 | |
695 | + |
696 | +import signal |
697 | +os.kill(pid, signal.SIGTERM) |
698 | |
699 | === modified file 'utilities/sourcedeps.conf' |
700 | --- utilities/sourcedeps.conf 2010-04-21 12:30:48 +0000 |
701 | +++ utilities/sourcedeps.conf 2010-04-27 03:46:17 +0000 |
702 | @@ -5,7 +5,6 @@ |
703 | bzr-svn lp:~launchpad-pqm/bzr-svn/devel;revno=2708 |
704 | cscvs lp:~launchpad-pqm/launchpad-cscvs/devel;revno=432 |
705 | dulwich lp:~launchpad-pqm/dulwich/devel;revno=418 |
706 | -launchpad-loggerhead lp:~launchpad-pqm/launchpad-loggerhead/devel;revno=54 |
707 | loggerhead lp:~launchpad-pqm/loggerhead/devel;revno=174 |
708 | lpreview lp:~launchpad-pqm/bzr-lpreview/devel;revno=23 |
709 | mailman lp:~launchpad-pqm/mailman/2.1;revno=976 |