Merge lp:~stub/launchpad/page-performance-report into lp:launchpad

Proposed by Stuart Bishop
Status: Rejected
Rejected by: Stuart Bishop
Proposed branch: lp:~stub/launchpad/page-performance-report
Merge into: lp:launchpad
Diff against target: 424 lines (+383/-3)
5 files modified
lib/canonical/launchpad/webapp/publication.py (+10/-3)
lib/lp/scripts/helpers.py (+54/-0)
lib/lp/scripts/utilities/pageperformancereport.py (+288/-0)
utilities/page-performance-report.ini (+13/-0)
utilities/page-performance-report.py (+18/-0)
To merge this branch: bzr merge lp:~stub/launchpad/page-performance-report
Reviewer Review Type Date Requested Status
Jeroen T. Vermeulen Pending
Review via email: mp+18198@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

We want to make slow pages faster. At the moment we do this by optimizing test pages, but we really have no idea if we have made other pages slower. When we optimize, we need to ensure the system improves overall rather than playing whack a mole, speeding up some pages while causing others to timeout.

This hasn't been a genuine problem so far, but we want to evaluate using memcache. memcache is unusual in that it will slow down the initial page renderings due to overhead in stuffing data into the caches, and speed up repeat renderings. This branch is to provide us with tools to confirm if our experiments succeeded or failed.

The reporting tool is basic, but meets our initial needs in the current state. I'll probably tweak it some more, but the reason to land this branch now is for the extra information the publication machinery is emitting to the Zope trace log. Once this is landed, we can turn on the logging on one of the edge servers and start to get some real data to analyze.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/canonical/launchpad/webapp/publication.py'
2--- lib/canonical/launchpad/webapp/publication.py 2010-01-19 04:22:17 +0000
3+++ lib/canonical/launchpad/webapp/publication.py 2010-01-28 13:49:14 +0000
4@@ -28,8 +28,8 @@
5 from storm.exceptions import DisconnectionError, IntegrityError
6 from storm.zope.interfaces import IZStorm
7
8+from zc.zservertracelog.interfaces import ITraceLog
9 import zope.app.publication.browser
10-
11 from zope.app import zapi # used to get at the adapters service
12 from zope.app.publication.interfaces import BeforeTraverseEvent
13 from zope.app.security.interfaces import IUnauthenticatedPrincipal
14@@ -459,8 +459,15 @@
15 notify(BeforeTraverseEvent(ob, request))
16
17 def afterTraversal(self, request, ob):
18- """ We don't want to call _maybePlacefullyAuthenticate as does
19- zopepublication."""
20+ """See zope.publisher.interfaces.IPublication.
21+
22+ This hook does not invoke our parent's afterTraversal hook
23+ in zopepublication.py because we don't want to call
24+ _maybePlacefullyAuthenticate.
25+ """
26+ # Log the URL including vhost information to the ZServer tracelog.
27+ ITraceLog(request).log(request.getURL())
28+
29 assert hasattr(request, '_traversalticks_start'), (
30 'request._traversalticks_start, which should have been set by '
31 'beforeTraversal(), was not found.')
32
33=== added file 'lib/lp/scripts/helpers.py'
34--- lib/lp/scripts/helpers.py 1970-01-01 00:00:00 +0000
35+++ lib/lp/scripts/helpers.py 2010-01-28 13:49:14 +0000
36@@ -0,0 +1,54 @@
37+# Copyright 2010 Canonical Ltd. This software is licensed under the
38+# GNU Affero General Public License version 3 (see the file LICENSE).
39+
40+"""Helpers for command line tools."""
41+
42+__metaclass__ = type
43+__all__ = ["LPOptionParser"]
44+
45+from copy import copy
46+from datetime import datetime
47+from optparse import Option, OptionParser, OptionValueError
48+
49+from canonical.launchpad.scripts.logger import logger_options
50+
51+
52+def _check_datetime(option, opt, value):
53+ "Type checker for optparse datetime option type."
54+ # We support 5 valid ISO8601 formats.
55+ formats = [
56+ '%Y-%m-%dT%H:%M:%S',
57+ '%Y-%m-%dT%H:%M',
58+ '%Y-%m-%d',
59+ '%Y-%m-%d %H:%M:%S',
60+ '%Y-%m-%d %H:%M'
61+ ]
62+ for format in formats:
63+ try:
64+ return datetime.strptime(value, format)
65+ except ValueError:
66+ pass
67+ raise OptionValueError(
68+ "option %s: invalid datetime value: %r" % (opt, value))
69+
70+
71+class LPOption(Option):
72+ """Extended optparse Option class.
73+
74+ Adds a 'datetime' option type.
75+ """
76+ TYPES = Option.TYPES + ("datetime",)
77+ TYPE_CHECKER = copy(Option.TYPE_CHECKER)
78+ TYPE_CHECKER["datetime"] = _check_datetime
79+
80+
81+class LPOptionParser(OptionParser):
82+ """Extended optparse OptionParser.
83+
84+ Adds a 'datetime' option type.
85+ """
86+ def __init__(self, *args, **kw):
87+ kw.setdefault('option_class', LPOption)
88+ OptionParser.__init__(self, *args, **kw)
89+ logger_options(self)
90+
91
92=== added file 'lib/lp/scripts/utilities/pageperformancereport.py'
93--- lib/lp/scripts/utilities/pageperformancereport.py 1970-01-01 00:00:00 +0000
94+++ lib/lp/scripts/utilities/pageperformancereport.py 2010-01-28 13:49:14 +0000
95@@ -0,0 +1,288 @@
96+# Copyright 2010 Canonical Ltd. This software is licensed under the
97+# GNU Affero General Public License version 3 (see the file LICENSE).
98+
99+"""Page performance report generated from zserver trace logs."""
100+
101+__metaclass__ = type
102+__all__ = []
103+
104+from cgi import escape as html_quote
105+from ConfigParser import RawConfigParser
106+import re
107+import os.path
108+from textwrap import dedent
109+import time
110+
111+import numpy
112+import simplejson as json
113+from zc.zservertracelog.tracereport import Request, parsedt
114+
115+from canonical.config import config
116+from lp.scripts.helpers import LPOptionParser
117+
118+
119+class Category:
120+ def __init__(self, title, regexp, timeout):
121+ self.title = title
122+ self.regexp = regexp
123+ self._compiled_regexp = re.compile(regexp)
124+ self.times = Times(timeout)
125+
126+ def add(self, request):
127+ if self._compiled_regexp.search(request.url) is not None:
128+ self.times.add(request)
129+
130+
131+class Times:
132+ def __init__(self, timeout):
133+ self.requests = []
134+ self.timeout = timeout
135+
136+ def add(self, request):
137+ self.requests.append(request)
138+
139+ def stats(self):
140+ num_requests = len(self.requests)
141+ if num_requests == 0:
142+ return 0, 0, 0, None
143+ array = numpy.fromiter(
144+ (min(request.app_seconds, self.timeout)
145+ for request in self.requests),
146+ numpy.float, num_requests)
147+ mean = numpy.mean(array)
148+ median = numpy.median(array)
149+ standard_deviation = numpy.std(array)
150+ histogram = numpy.histogram(
151+ array, normed=True,
152+ range=(0, self.timeout), bins=self.timeout)
153+ histogram = zip(histogram[1], histogram[0])
154+ return mean, median, standard_deviation, histogram
155+
156+ def __str__(self):
157+ results = self.stats()
158+ mean, median, standard_deviation, histogram = results
159+ hstr = " ".join("%2d" % v for v in histogram)
160+ return "%2.2f %2.2f %2.2f %s" % (
161+ mean, median, standard_deviation, hstr)
162+
163+
164+def main():
165+ parser = LPOptionParser("%prog [args] tracelog [...]")
166+ parser.add_option(
167+ "-c", "--config", dest="config",
168+ default=os.path.join(
169+ config.root, "utilities", "page-performance-report.ini"),
170+ metavar="FILE", help="Load configuration from FILE")
171+ parser.add_option(
172+ "--timeout", dest="timeout", type="int",
173+ default=20, metavar="SECS",
174+ help="Requests taking more than SECS seconds are timeouts")
175+ parser.add_option(
176+ "--from", dest="from_ts", type="datetime",
177+ default=None, metavar="TIMESTAMP",
178+ help="Ignore log entries before TIMESTAMP")
179+ parser.add_option(
180+ "--until", dest="until_ts", type="datetime",
181+ default=None, metavar="TIMESTAMP",
182+ help="Ignore log entries after TIMESTAMP")
183+ options, args = parser.parse_args()
184+ if len(args) == 0:
185+ parser.error("At least one zserver tracelog file must be provided")
186+
187+ if options.from_ts is not None and options.until_ts is not None:
188+ if options.from_ts > options.until_ts:
189+ parser.error(
190+ "--from timestamp %s is before --until timestamp %s"
191+ % (options.from_ts, options.until_ts))
192+
193+ for filename in args:
194+ if not os.path.exists(filename):
195+ parser.error("Tracelog file %s not found." % filename)
196+
197+ if not os.path.exists(options.config):
198+ parser.error("Config file %s not found." % options.config)
199+
200+ # XXX: Need a better config as ConfigParser doesn't preserve order.
201+ script_config = RawConfigParser()
202+ script_config.optionxform = str # Make keys case sensitive.
203+ script_config.readfp(open(options.config))
204+
205+ categories = [] # A list of Category, in report order.
206+ for option in script_config.options('categories'):
207+ regexp = script_config.get('categories', option)
208+ categories.append(Category(option, regexp, options.timeout))
209+
210+ if len(categories) == 0:
211+ parser.error("No data in [categories] section of configuration.")
212+
213+ parse(args, categories, options)
214+
215+ print_html_report(categories)
216+
217+ return 0
218+
219+
220+def parse(tracefiles, categories, options):
221+ requests = {}
222+ for tracefile in tracefiles:
223+ for line in open(tracefile):
224+ record = line.split()
225+ record_type, request_id, date, time_ = record[:4]
226+
227+ if record_type == 'S':
228+ continue # Short circuit - we don't care about these entries.
229+
230+ dt = parsedt('%s %s' % (date, time_))
231+ if options.from_ts is not None and dt < options.from_ts:
232+ continue # Skip to next line.
233+ if options.until_ts is not None and dt > options.until_ts:
234+ break # Skip to next log file.
235+
236+ args = record[4:]
237+
238+ if record_type == 'B': # Request begins.
239+ requests[request_id] = Request(dt, args[0], args[1])
240+ continue
241+
242+ request = requests.get(request_id, None)
243+ if request is None: # Just ignore partial records.
244+ continue
245+
246+ if record_type == '-': # Extension record from Launchpad.
247+ # Launchpad outputs the full URL to the tracelog,
248+ # including protocol & hostname. Use this in favor of
249+ # the ZServer logged path.
250+ request.url = args[0]
251+
252+ elif record_type == 'I': # Got request input.
253+ request.I(dt, args[0])
254+
255+ elif record_type == 'C': # Entered application thread.
256+ request.C(dt)
257+
258+ elif record_type == 'A': # Application done.
259+ request.A(dt, *args)
260+
261+ elif record_type == 'E': # Request done.
262+ del requests[request_id]
263+ request.E(dt)
264+ for category in categories:
265+ category.add(request)
266+
267+ else:
268+ pass # Ignore malformed records.
269+
270+
271+def print_html_report(categories):
272+
273+ print dedent('''\
274+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
275+ "http://www.w3.org/TR/html4/loose.dtd">
276+ <html>
277+ <head>
278+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
279+ <title>Launchpad Page Performance Report %(date)s</title>
280+ <script language="javascript" type="text/javascript"
281+ src="http://people.canonical.com/~stub/flot/jquery.min.js"
282+ ></script>
283+ <script language="javascript" type="text/javascript"
284+ src="http://people.canonical.com/~stub/flot/jquery.flot.min.js"
285+ ></script>
286+ <style type="text/css">
287+ h2 { font-weight: normal; font-size: 100%%; }
288+ thead th { padding-left: 1em; padding-right: 1em; }
289+ .category-title { text-align: right; padding-right: 2em; }
290+ .regexp { font-size: x-small; font-weight: normal; }
291+ .mean { text-align: right; padding-right: 1em; }
292+ .median { text-align: right; padding-right: 1em; }
293+ .standard-deviation { text-align: right; padding-right: 1em; }
294+ .histogram { padding: 0.5em 1em; width:400px; height:200px; }
295+ .odd-row { background-color: #eeeeff; }
296+ .even-row { background-color: #ffffee; }
297+ </style>
298+ </head>
299+ <body>
300+ <h1>Launchpad Page Performance Report</h1>
301+ <h2>%(date)s</h2>
302+
303+ <table class="launchpad-performance-report">
304+ <thead>
305+ <tr>
306+ <td></td>
307+ <th>Mean</th>
308+ <th>Median</th>
309+ <th>Standard<br/>Deviation</th>
310+ <th>Distribution</th>
311+ </tr>
312+ </thead>
313+ <tbody>
314+ ''' % {'date': time.ctime()})
315+
316+ histograms = []
317+ for i in range(0, len(categories)):
318+ category = categories[i]
319+ row_class = "even-row" if i % 2 else "odd-row"
320+ mean, median, standard_deviation, histogram = category.times.stats()
321+ histograms.append(histogram)
322+ print dedent("""\
323+ <tr class="%s">
324+ <th class="category-title">%s <div class="regexp">%s</span></th>
325+ <td class="mean">%.2f s</td>
326+ <td class="median">%.2f s</td>
327+ <td class="standard-deviation">%.2f s</td>
328+ <td>
329+ <div class="histogram" id="histogram%d"></div>
330+ </td>
331+ </tr>
332+ """ % (
333+ row_class,
334+ html_quote(category.title), html_quote(category.regexp),
335+ mean, median, standard_deviation, i))
336+
337+ print dedent("""\
338+ <script language="javascript" type="text/javascript">
339+ $(function () {
340+ var options = {
341+ series: {
342+ bars: {show: true}
343+ },
344+ xaxis: {
345+ tickDecimals: 0,
346+ tickFormatter: function (val, axis) {
347+ return val.toFixed(axis.tickDecimals) + "s";
348+ }
349+ },
350+ yaxis: {
351+ min: 0,
352+ max: 1,
353+ tickDecimals: 0,
354+ tickFormatter: function (val, axis) {
355+ return (val * 100).toFixed(axis.tickDecimals) + "%";
356+ }
357+ },
358+ grid: {
359+ aboveData: true,
360+ labelMargin: 15
361+ }
362+ };
363+ """)
364+
365+ for i in range(0, len(histograms)):
366+ histogram = histograms[i]
367+ if histogram is None:
368+ continue
369+ print dedent("""\
370+ var d = %s;
371+
372+ $.plot(
373+ $("#histogram%d"),
374+ [{data: d}], options);
375+ """ % (json.dumps(histogram), i))
376+
377+ print dedent("""\
378+ });
379+ </script>
380+ </body>
381+ </html>
382+ """)
383+
384
385=== added file 'utilities/page-performance-report.ini'
386--- utilities/page-performance-report.ini 1970-01-01 00:00:00 +0000
387+++ utilities/page-performance-report.ini 2010-01-28 13:49:14 +0000
388@@ -0,0 +1,13 @@
389+[categories]
390+All Launchpad=.
391+All Registry=^https?://launchpad\.
392+All Answers=^https?://answers\.launchpad\.
393+All Blueprints=^https?://blueprints\.launchpad\.
394+All Bugs=^https?://bugs\.launchpad\.
395+All Code=^https?://code\.launchpad\.
396+All Translations=^https?://translations\.launchpad\.
397+All API=^https?://api\.launchpad\.
398+All Public XML-RPC=^https?://xmlrpc\.launchpad\.
399+All Private XML-RPC=^https?://xmlrpc-private\.launchpad\.
400+All Shipit=^https?://shipit\.
401+
402
403=== added file 'utilities/page-performance-report.py'
404--- utilities/page-performance-report.py 1970-01-01 00:00:00 +0000
405+++ utilities/page-performance-report.py 2010-01-28 13:49:14 +0000
406@@ -0,0 +1,18 @@
407+#!/usr/bin/python2.5
408+#
409+# Copyright 2010 Canonical Ltd. This software is licensed under the
410+# GNU Affero General Public License version 3 (see the file LICENSE).
411+
412+"""Page performance report generated from zserver tracelogs."""
413+
414+__metaclass__ = type
415+
416+import _pythonpath
417+
418+import sys
419+
420+from lp.scripts.utilities.pageperformancereport import main
421+
422+
423+if __name__ == '__main__':
424+ sys.exit(main())