Merge lp:~stub/launchpad/page-performance-report into lp:launchpad
- page-performance-report
- Merge into devel
Proposed by
Stuart Bishop
Status: | Rejected |
---|---|
Rejected by: | Stuart Bishop |
Proposed branch: | lp:~stub/launchpad/page-performance-report |
Merge into: | lp:launchpad |
Diff against target: |
424 lines (+383/-3) 5 files modified
lib/canonical/launchpad/webapp/publication.py (+10/-3) lib/lp/scripts/helpers.py (+54/-0) lib/lp/scripts/utilities/pageperformancereport.py (+288/-0) utilities/page-performance-report.ini (+13/-0) utilities/page-performance-report.py (+18/-0) |
To merge this branch: | bzr merge lp:~stub/launchpad/page-performance-report |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jeroen T. Vermeulen | Pending | ||
Review via email: mp+18198@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/canonical/launchpad/webapp/publication.py' |
2 | --- lib/canonical/launchpad/webapp/publication.py 2010-01-19 04:22:17 +0000 |
3 | +++ lib/canonical/launchpad/webapp/publication.py 2010-01-28 13:49:14 +0000 |
4 | @@ -28,8 +28,8 @@ |
5 | from storm.exceptions import DisconnectionError, IntegrityError |
6 | from storm.zope.interfaces import IZStorm |
7 | |
8 | +from zc.zservertracelog.interfaces import ITraceLog |
9 | import zope.app.publication.browser |
10 | - |
11 | from zope.app import zapi # used to get at the adapters service |
12 | from zope.app.publication.interfaces import BeforeTraverseEvent |
13 | from zope.app.security.interfaces import IUnauthenticatedPrincipal |
14 | @@ -459,8 +459,15 @@ |
15 | notify(BeforeTraverseEvent(ob, request)) |
16 | |
17 | def afterTraversal(self, request, ob): |
18 | - """ We don't want to call _maybePlacefullyAuthenticate as does |
19 | - zopepublication.""" |
20 | + """See zope.publisher.interfaces.IPublication. |
21 | + |
22 | + This hook does not invoke our parent's afterTraversal hook |
23 | + in zopepublication.py because we don't want to call |
24 | + _maybePlacefullyAuthenticate. |
25 | + """ |
26 | + # Log the URL including vhost information to the ZServer tracelog. |
27 | + ITraceLog(request).log(request.getURL()) |
28 | + |
29 | assert hasattr(request, '_traversalticks_start'), ( |
30 | 'request._traversalticks_start, which should have been set by ' |
31 | 'beforeTraversal(), was not found.') |
32 | |
33 | === added file 'lib/lp/scripts/helpers.py' |
34 | --- lib/lp/scripts/helpers.py 1970-01-01 00:00:00 +0000 |
35 | +++ lib/lp/scripts/helpers.py 2010-01-28 13:49:14 +0000 |
36 | @@ -0,0 +1,54 @@ |
37 | +# Copyright 2010 Canonical Ltd. This software is licensed under the |
38 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
39 | + |
40 | +"""Helpers for command line tools.""" |
41 | + |
42 | +__metaclass__ = type |
43 | +__all__ = ["LPOptionParser"] |
44 | + |
45 | +from copy import copy |
46 | +from datetime import datetime |
47 | +from optparse import Option, OptionParser, OptionValueError |
48 | + |
49 | +from canonical.launchpad.scripts.logger import logger_options |
50 | + |
51 | + |
52 | +def _check_datetime(option, opt, value): |
53 | + "Type checker for optparse datetime option type." |
54 | + # We support 5 valid ISO8601 formats. |
55 | + formats = [ |
56 | + '%Y-%m-%dT%H:%M:%S', |
57 | + '%Y-%m-%dT%H:%M', |
58 | + '%Y-%m-%d', |
59 | + '%Y-%m-%d %H:%M:%S', |
60 | + '%Y-%m-%d %H:%M' |
61 | + ] |
62 | + for format in formats: |
63 | + try: |
64 | + return datetime.strptime(value, format) |
65 | + except ValueError: |
66 | + pass |
67 | + raise OptionValueError( |
68 | + "option %s: invalid datetime value: %r" % (opt, value)) |
69 | + |
70 | + |
71 | +class LPOption(Option): |
72 | + """Extended optparse Option class. |
73 | + |
74 | + Adds a 'datetime' option type. |
75 | + """ |
76 | + TYPES = Option.TYPES + ("datetime",) |
77 | + TYPE_CHECKER = copy(Option.TYPE_CHECKER) |
78 | + TYPE_CHECKER["datetime"] = _check_datetime |
79 | + |
80 | + |
81 | +class LPOptionParser(OptionParser): |
82 | + """Extended optparse OptionParser. |
83 | + |
84 | + Adds a 'datetime' option type. |
85 | + """ |
86 | + def __init__(self, *args, **kw): |
87 | + kw.setdefault('option_class', LPOption) |
88 | + OptionParser.__init__(self, *args, **kw) |
89 | + logger_options(self) |
90 | + |
91 | |
92 | === added file 'lib/lp/scripts/utilities/pageperformancereport.py' |
93 | --- lib/lp/scripts/utilities/pageperformancereport.py 1970-01-01 00:00:00 +0000 |
94 | +++ lib/lp/scripts/utilities/pageperformancereport.py 2010-01-28 13:49:14 +0000 |
95 | @@ -0,0 +1,288 @@ |
96 | +# Copyright 2010 Canonical Ltd. This software is licensed under the |
97 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
98 | + |
99 | +"""Page performance report generated from zserver trace logs.""" |
100 | + |
101 | +__metaclass__ = type |
102 | +__all__ = [] |
103 | + |
104 | +from cgi import escape as html_quote |
105 | +from ConfigParser import RawConfigParser |
106 | +import re |
107 | +import os.path |
108 | +from textwrap import dedent |
109 | +import time |
110 | + |
111 | +import numpy |
112 | +import simplejson as json |
113 | +from zc.zservertracelog.tracereport import Request, parsedt |
114 | + |
115 | +from canonical.config import config |
116 | +from lp.scripts.helpers import LPOptionParser |
117 | + |
118 | + |
119 | +class Category: |
120 | + def __init__(self, title, regexp, timeout): |
121 | + self.title = title |
122 | + self.regexp = regexp |
123 | + self._compiled_regexp = re.compile(regexp) |
124 | + self.times = Times(timeout) |
125 | + |
126 | + def add(self, request): |
127 | + if self._compiled_regexp.search(request.url) is not None: |
128 | + self.times.add(request) |
129 | + |
130 | + |
131 | +class Times: |
132 | + def __init__(self, timeout): |
133 | + self.requests = [] |
134 | + self.timeout = timeout |
135 | + |
136 | + def add(self, request): |
137 | + self.requests.append(request) |
138 | + |
139 | + def stats(self): |
140 | + num_requests = len(self.requests) |
141 | + if num_requests == 0: |
142 | + return 0, 0, 0, None |
143 | + array = numpy.fromiter( |
144 | + (min(request.app_seconds, self.timeout) |
145 | + for request in self.requests), |
146 | + numpy.float, num_requests) |
147 | + mean = numpy.mean(array) |
148 | + median = numpy.median(array) |
149 | + standard_deviation = numpy.std(array) |
150 | + histogram = numpy.histogram( |
151 | + array, normed=True, |
152 | + range=(0, self.timeout), bins=self.timeout) |
153 | + histogram = zip(histogram[1], histogram[0]) |
154 | + return mean, median, standard_deviation, histogram |
155 | + |
156 | + def __str__(self): |
157 | + results = self.stats() |
158 | + mean, median, standard_deviation, histogram = results |
159 | + hstr = " ".join("%2d" % v for v in histogram) |
160 | + return "%2.2f %2.2f %2.2f %s" % ( |
161 | + mean, median, standard_deviation, hstr) |
162 | + |
163 | + |
164 | +def main(): |
165 | + parser = LPOptionParser("%prog [args] tracelog [...]") |
166 | + parser.add_option( |
167 | + "-c", "--config", dest="config", |
168 | + default=os.path.join( |
169 | + config.root, "utilities", "page-performance-report.ini"), |
170 | + metavar="FILE", help="Load configuration from FILE") |
171 | + parser.add_option( |
172 | + "--timeout", dest="timeout", type="int", |
173 | + default=20, metavar="SECS", |
174 | + help="Requests taking more than SECS seconds are timeouts") |
175 | + parser.add_option( |
176 | + "--from", dest="from_ts", type="datetime", |
177 | + default=None, metavar="TIMESTAMP", |
178 | + help="Ignore log entries before TIMESTAMP") |
179 | + parser.add_option( |
180 | + "--until", dest="until_ts", type="datetime", |
181 | + default=None, metavar="TIMESTAMP", |
182 | + help="Ignore log entries after TIMESTAMP") |
183 | + options, args = parser.parse_args() |
184 | + if len(args) == 0: |
185 | + parser.error("At least one zserver tracelog file must be provided") |
186 | + |
187 | + if options.from_ts is not None and options.until_ts is not None: |
188 | + if options.from_ts > options.until_ts: |
189 | + parser.error( |
190 | + "--from timestamp %s is before --until timestamp %s" |
191 | + % (options.from_ts, options.until_ts)) |
192 | + |
193 | + for filename in args: |
194 | + if not os.path.exists(filename): |
195 | + parser.error("Tracelog file %s not found." % filename) |
196 | + |
197 | + if not os.path.exists(options.config): |
198 | + parser.error("Config file %s not found." % options.config) |
199 | + |
200 | + # XXX: Need a better config as ConfigParser doesn't preserve order. |
201 | + script_config = RawConfigParser() |
202 | + script_config.optionxform = str # Make keys case sensitive. |
203 | + script_config.readfp(open(options.config)) |
204 | + |
205 | + categories = [] # A list of Category, in report order. |
206 | + for option in script_config.options('categories'): |
207 | + regexp = script_config.get('categories', option) |
208 | + categories.append(Category(option, regexp, options.timeout)) |
209 | + |
210 | + if len(categories) == 0: |
211 | + parser.error("No data in [categories] section of configuration.") |
212 | + |
213 | + parse(args, categories, options) |
214 | + |
215 | + print_html_report(categories) |
216 | + |
217 | + return 0 |
218 | + |
219 | + |
220 | +def parse(tracefiles, categories, options): |
221 | + requests = {} |
222 | + for tracefile in tracefiles: |
223 | + for line in open(tracefile): |
224 | + record = line.split() |
225 | + record_type, request_id, date, time_ = record[:4] |
226 | + |
227 | + if record_type == 'S': |
228 | + continue # Short circuit - we don't care about these entries. |
229 | + |
230 | + dt = parsedt('%s %s' % (date, time_)) |
231 | + if options.from_ts is not None and dt < options.from_ts: |
232 | + continue # Skip to next line. |
233 | + if options.until_ts is not None and dt > options.until_ts: |
234 | + break # Skip to next log file. |
235 | + |
236 | + args = record[4:] |
237 | + |
238 | + if record_type == 'B': # Request begins. |
239 | + requests[request_id] = Request(dt, args[0], args[1]) |
240 | + continue |
241 | + |
242 | + request = requests.get(request_id, None) |
243 | + if request is None: # Just ignore partial records. |
244 | + continue |
245 | + |
246 | + if record_type == '-': # Extension record from Launchpad. |
247 | + # Launchpad outputs the full URL to the tracelog, |
248 | + # including protocol & hostname. Use this in favor of |
249 | + # the ZServer logged path. |
250 | + request.url = args[0] |
251 | + |
252 | + elif record_type == 'I': # Got request input. |
253 | + request.I(dt, args[0]) |
254 | + |
255 | + elif record_type == 'C': # Entered application thread. |
256 | + request.C(dt) |
257 | + |
258 | + elif record_type == 'A': # Application done. |
259 | + request.A(dt, *args) |
260 | + |
261 | + elif record_type == 'E': # Request done. |
262 | + del requests[request_id] |
263 | + request.E(dt) |
264 | + for category in categories: |
265 | + category.add(request) |
266 | + |
267 | + else: |
268 | + pass # Ignore malformed records. |
269 | + |
270 | + |
271 | +def print_html_report(categories): |
272 | + |
273 | + print dedent('''\ |
274 | + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" |
275 | + "http://www.w3.org/TR/html4/loose.dtd"> |
276 | + <html> |
277 | + <head> |
278 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
279 | + <title>Launchpad Page Performance Report %(date)s</title> |
280 | + <script language="javascript" type="text/javascript" |
281 | + src="http://people.canonical.com/~stub/flot/jquery.min.js" |
282 | + ></script> |
283 | + <script language="javascript" type="text/javascript" |
284 | + src="http://people.canonical.com/~stub/flot/jquery.flot.min.js" |
285 | + ></script> |
286 | + <style type="text/css"> |
287 | + h2 { font-weight: normal; font-size: 100%%; } |
288 | + thead th { padding-left: 1em; padding-right: 1em; } |
289 | + .category-title { text-align: right; padding-right: 2em; } |
290 | + .regexp { font-size: x-small; font-weight: normal; } |
291 | + .mean { text-align: right; padding-right: 1em; } |
292 | + .median { text-align: right; padding-right: 1em; } |
293 | + .standard-deviation { text-align: right; padding-right: 1em; } |
294 | + .histogram { padding: 0.5em 1em; width:400px; height:200px; } |
295 | + .odd-row { background-color: #eeeeff; } |
296 | + .even-row { background-color: #ffffee; } |
297 | + </style> |
298 | + </head> |
299 | + <body> |
300 | + <h1>Launchpad Page Performance Report</h1> |
301 | + <h2>%(date)s</h2> |
302 | + |
303 | + <table class="launchpad-performance-report"> |
304 | + <thead> |
305 | + <tr> |
306 | + <td></td> |
307 | + <th>Mean</th> |
308 | + <th>Median</th> |
309 | + <th>Standard<br/>Deviation</th> |
310 | + <th>Distribution</th> |
311 | + </tr> |
312 | + </thead> |
313 | + <tbody> |
314 | + ''' % {'date': time.ctime()}) |
315 | + |
316 | + histograms = [] |
317 | + for i in range(0, len(categories)): |
318 | + category = categories[i] |
319 | + row_class = "even-row" if i % 2 else "odd-row" |
320 | + mean, median, standard_deviation, histogram = category.times.stats() |
321 | + histograms.append(histogram) |
322 | + print dedent("""\ |
323 | + <tr class="%s"> |
324 | + <th class="category-title">%s <div class="regexp">%s</span></th> |
325 | + <td class="mean">%.2f s</td> |
326 | + <td class="median">%.2f s</td> |
327 | + <td class="standard-deviation">%.2f s</td> |
328 | + <td> |
329 | + <div class="histogram" id="histogram%d"></div> |
330 | + </td> |
331 | + </tr> |
332 | + """ % ( |
333 | + row_class, |
334 | + html_quote(category.title), html_quote(category.regexp), |
335 | + mean, median, standard_deviation, i)) |
336 | + |
337 | + print dedent("""\ |
338 | + <script language="javascript" type="text/javascript"> |
339 | + $(function () { |
340 | + var options = { |
341 | + series: { |
342 | + bars: {show: true} |
343 | + }, |
344 | + xaxis: { |
345 | + tickDecimals: 0, |
346 | + tickFormatter: function (val, axis) { |
347 | + return val.toFixed(axis.tickDecimals) + "s"; |
348 | + } |
349 | + }, |
350 | + yaxis: { |
351 | + min: 0, |
352 | + max: 1, |
353 | + tickDecimals: 0, |
354 | + tickFormatter: function (val, axis) { |
355 | + return (val * 100).toFixed(axis.tickDecimals) + "%"; |
356 | + } |
357 | + }, |
358 | + grid: { |
359 | + aboveData: true, |
360 | + labelMargin: 15 |
361 | + } |
362 | + }; |
363 | + """) |
364 | + |
365 | + for i in range(0, len(histograms)): |
366 | + histogram = histograms[i] |
367 | + if histogram is None: |
368 | + continue |
369 | + print dedent("""\ |
370 | + var d = %s; |
371 | + |
372 | + $.plot( |
373 | + $("#histogram%d"), |
374 | + [{data: d}], options); |
375 | + """ % (json.dumps(histogram), i)) |
376 | + |
377 | + print dedent("""\ |
378 | + }); |
379 | + </script> |
380 | + </body> |
381 | + </html> |
382 | + """) |
383 | + |
384 | |
385 | === added file 'utilities/page-performance-report.ini' |
386 | --- utilities/page-performance-report.ini 1970-01-01 00:00:00 +0000 |
387 | +++ utilities/page-performance-report.ini 2010-01-28 13:49:14 +0000 |
388 | @@ -0,0 +1,13 @@ |
389 | +[categories] |
390 | +All Launchpad=. |
391 | +All Registry=^https?://launchpad\. |
392 | +All Answers=^https?://answers\.launchpad\. |
393 | +All Blueprints=^https?://blueprints\.launchpad\. |
394 | +All Bugs=^https?://bugs\.launchpad\. |
395 | +All Code=^https?://code\.launchpad\. |
396 | +All Translations=^https?://translations\.launchpad\. |
397 | +All API=^https?://api\.launchpad\. |
398 | +All Public XML-RPC=^https?://xmlrpc\.launchpad\. |
399 | +All Private XML-RPC=^https?://xmlrpc-private\.launchpad\. |
400 | +All Shipit=^https?://shipit\. |
401 | + |
402 | |
403 | === added file 'utilities/page-performance-report.py' |
404 | --- utilities/page-performance-report.py 1970-01-01 00:00:00 +0000 |
405 | +++ utilities/page-performance-report.py 2010-01-28 13:49:14 +0000 |
406 | @@ -0,0 +1,18 @@ |
407 | +#!/usr/bin/python2.5 |
408 | +# |
409 | +# Copyright 2010 Canonical Ltd. This software is licensed under the |
410 | +# GNU Affero General Public License version 3 (see the file LICENSE). |
411 | + |
412 | +"""Page performance report generated from zserver tracelogs.""" |
413 | + |
414 | +__metaclass__ = type |
415 | + |
416 | +import _pythonpath |
417 | + |
418 | +import sys |
419 | + |
420 | +from lp.scripts.utilities.pageperformancereport import main |
421 | + |
422 | + |
423 | +if __name__ == '__main__': |
424 | + sys.exit(main()) |
We want to make slow pages faster. At the moment we do this by optimizing test pages, but we really have no idea if we have made other pages slower. When we optimize, we need to ensure the system improves overall rather than playing whack a mole, speeding up some pages while causing others to timeout.
This hasn't been a genuine problem so far, but we want to evaluate using memcache. memcache is unusual in that it will slow down the initial page renderings due to overhead in stuffing data into the caches, and speed up repeat renderings. This branch is to provide us with tools to confirm if our experiments succeeded or failed.
The reporting tool is basic, but meets our initial needs in the current state. I'll probably tweak it some more, but the reason to land this branch now is for the extra information the publication machinery is emitting to the Zope trace log. Once this is landed, we can turn on the logging on one of the edge servers and start to get some real data to analyze.