Merge lp:~stub/launchpad/page-performance-report into lp:launchpad

Proposed by Stuart Bishop
Status: Rejected
Rejected by: Stuart Bishop
Proposed branch: lp:~stub/launchpad/page-performance-report
Merge into: lp:launchpad
Diff against target: 424 lines (+383/-3)
5 files modified
lib/canonical/launchpad/webapp/publication.py (+10/-3)
lib/lp/scripts/helpers.py (+54/-0)
lib/lp/scripts/utilities/pageperformancereport.py (+288/-0)
utilities/page-performance-report.ini (+13/-0)
utilities/page-performance-report.py (+18/-0)
To merge this branch: bzr merge lp:~stub/launchpad/page-performance-report
Reviewer Review Type Date Requested Status
Jeroen T. Vermeulen Pending
Review via email: mp+18198@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

We want to make slow pages faster. At the moment we do this by optimizing test pages, but we really have no idea if we have made other pages slower. When we optimize, we need to ensure the system improves overall rather than playing whack a mole, speeding up some pages while causing others to timeout.

This hasn't been a genuine problem so far, but we want to evaluate using memcache. memcache is unusual in that it will slow down the initial page renderings due to overhead in stuffing data into the caches, and speed up repeat renderings. This branch is to provide us with tools to confirm if our experiments succeeded or failed.

The reporting tool is basic, but meets our initial needs in the current state. I'll probably tweak it some more, but the reason to land this branch now is for the extra information the publication machinery is emitting to the Zope trace log. Once this is landed, we can turn on the logging on one of the edge servers and start to get some real data to analyze.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/canonical/launchpad/webapp/publication.py'
--- lib/canonical/launchpad/webapp/publication.py 2010-01-19 04:22:17 +0000
+++ lib/canonical/launchpad/webapp/publication.py 2010-01-28 13:49:14 +0000
@@ -28,8 +28,8 @@
28from storm.exceptions import DisconnectionError, IntegrityError28from storm.exceptions import DisconnectionError, IntegrityError
29from storm.zope.interfaces import IZStorm29from storm.zope.interfaces import IZStorm
3030
31from zc.zservertracelog.interfaces import ITraceLog
31import zope.app.publication.browser32import zope.app.publication.browser
32
33from zope.app import zapi # used to get at the adapters service33from zope.app import zapi # used to get at the adapters service
34from zope.app.publication.interfaces import BeforeTraverseEvent34from zope.app.publication.interfaces import BeforeTraverseEvent
35from zope.app.security.interfaces import IUnauthenticatedPrincipal35from zope.app.security.interfaces import IUnauthenticatedPrincipal
@@ -459,8 +459,15 @@
459 notify(BeforeTraverseEvent(ob, request))459 notify(BeforeTraverseEvent(ob, request))
460460
461 def afterTraversal(self, request, ob):461 def afterTraversal(self, request, ob):
462 """ We don't want to call _maybePlacefullyAuthenticate as does462 """See zope.publisher.interfaces.IPublication.
463 zopepublication."""463
464 This hook does not invoke our parent's afterTraversal hook
465 in zopepublication.py because we don't want to call
466 _maybePlacefullyAuthenticate.
467 """
468 # Log the URL including vhost information to the ZServer tracelog.
469 ITraceLog(request).log(request.getURL())
470
464 assert hasattr(request, '_traversalticks_start'), (471 assert hasattr(request, '_traversalticks_start'), (
465 'request._traversalticks_start, which should have been set by '472 'request._traversalticks_start, which should have been set by '
466 'beforeTraversal(), was not found.')473 'beforeTraversal(), was not found.')
467474
=== added file 'lib/lp/scripts/helpers.py'
--- lib/lp/scripts/helpers.py 1970-01-01 00:00:00 +0000
+++ lib/lp/scripts/helpers.py 2010-01-28 13:49:14 +0000
@@ -0,0 +1,54 @@
1# Copyright 2010 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Helpers for command line tools."""
5
6__metaclass__ = type
7__all__ = ["LPOptionParser"]
8
9from copy import copy
10from datetime import datetime
11from optparse import Option, OptionParser, OptionValueError
12
13from canonical.launchpad.scripts.logger import logger_options
14
15
16def _check_datetime(option, opt, value):
17 "Type checker for optparse datetime option type."
18 # We support 5 valid ISO8601 formats.
19 formats = [
20 '%Y-%m-%dT%H:%M:%S',
21 '%Y-%m-%dT%H:%M',
22 '%Y-%m-%d',
23 '%Y-%m-%d %H:%M:%S',
24 '%Y-%m-%d %H:%M'
25 ]
26 for format in formats:
27 try:
28 return datetime.strptime(value, format)
29 except ValueError:
30 pass
31 raise OptionValueError(
32 "option %s: invalid datetime value: %r" % (opt, value))
33
34
35class LPOption(Option):
36 """Extended optparse Option class.
37
38 Adds a 'datetime' option type.
39 """
40 TYPES = Option.TYPES + ("datetime",)
41 TYPE_CHECKER = copy(Option.TYPE_CHECKER)
42 TYPE_CHECKER["datetime"] = _check_datetime
43
44
45class LPOptionParser(OptionParser):
46 """Extended optparse OptionParser.
47
48 Adds a 'datetime' option type.
49 """
50 def __init__(self, *args, **kw):
51 kw.setdefault('option_class', LPOption)
52 OptionParser.__init__(self, *args, **kw)
53 logger_options(self)
54
055
=== added file 'lib/lp/scripts/utilities/pageperformancereport.py'
--- lib/lp/scripts/utilities/pageperformancereport.py 1970-01-01 00:00:00 +0000
+++ lib/lp/scripts/utilities/pageperformancereport.py 2010-01-28 13:49:14 +0000
@@ -0,0 +1,288 @@
1# Copyright 2010 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Page performance report generated from zserver trace logs."""
5
6__metaclass__ = type
7__all__ = []
8
9from cgi import escape as html_quote
10from ConfigParser import RawConfigParser
11import re
12import os.path
13from textwrap import dedent
14import time
15
16import numpy
17import simplejson as json
18from zc.zservertracelog.tracereport import Request, parsedt
19
20from canonical.config import config
21from lp.scripts.helpers import LPOptionParser
22
23
24class Category:
25 def __init__(self, title, regexp, timeout):
26 self.title = title
27 self.regexp = regexp
28 self._compiled_regexp = re.compile(regexp)
29 self.times = Times(timeout)
30
31 def add(self, request):
32 if self._compiled_regexp.search(request.url) is not None:
33 self.times.add(request)
34
35
36class Times:
37 def __init__(self, timeout):
38 self.requests = []
39 self.timeout = timeout
40
41 def add(self, request):
42 self.requests.append(request)
43
44 def stats(self):
45 num_requests = len(self.requests)
46 if num_requests == 0:
47 return 0, 0, 0, None
48 array = numpy.fromiter(
49 (min(request.app_seconds, self.timeout)
50 for request in self.requests),
51 numpy.float, num_requests)
52 mean = numpy.mean(array)
53 median = numpy.median(array)
54 standard_deviation = numpy.std(array)
55 histogram = numpy.histogram(
56 array, normed=True,
57 range=(0, self.timeout), bins=self.timeout)
58 histogram = zip(histogram[1], histogram[0])
59 return mean, median, standard_deviation, histogram
60
61 def __str__(self):
62 results = self.stats()
63 mean, median, standard_deviation, histogram = results
64 hstr = " ".join("%2d" % v for v in histogram)
65 return "%2.2f %2.2f %2.2f %s" % (
66 mean, median, standard_deviation, hstr)
67
68
69def main():
70 parser = LPOptionParser("%prog [args] tracelog [...]")
71 parser.add_option(
72 "-c", "--config", dest="config",
73 default=os.path.join(
74 config.root, "utilities", "page-performance-report.ini"),
75 metavar="FILE", help="Load configuration from FILE")
76 parser.add_option(
77 "--timeout", dest="timeout", type="int",
78 default=20, metavar="SECS",
79 help="Requests taking more than SECS seconds are timeouts")
80 parser.add_option(
81 "--from", dest="from_ts", type="datetime",
82 default=None, metavar="TIMESTAMP",
83 help="Ignore log entries before TIMESTAMP")
84 parser.add_option(
85 "--until", dest="until_ts", type="datetime",
86 default=None, metavar="TIMESTAMP",
87 help="Ignore log entries after TIMESTAMP")
88 options, args = parser.parse_args()
89 if len(args) == 0:
90 parser.error("At least one zserver tracelog file must be provided")
91
92 if options.from_ts is not None and options.until_ts is not None:
93 if options.from_ts > options.until_ts:
94 parser.error(
95 "--from timestamp %s is before --until timestamp %s"
96 % (options.from_ts, options.until_ts))
97
98 for filename in args:
99 if not os.path.exists(filename):
100 parser.error("Tracelog file %s not found." % filename)
101
102 if not os.path.exists(options.config):
103 parser.error("Config file %s not found." % options.config)
104
105 # XXX: Need a better config as ConfigParser doesn't preserve order.
106 script_config = RawConfigParser()
107 script_config.optionxform = str # Make keys case sensitive.
108 script_config.readfp(open(options.config))
109
110 categories = [] # A list of Category, in report order.
111 for option in script_config.options('categories'):
112 regexp = script_config.get('categories', option)
113 categories.append(Category(option, regexp, options.timeout))
114
115 if len(categories) == 0:
116 parser.error("No data in [categories] section of configuration.")
117
118 parse(args, categories, options)
119
120 print_html_report(categories)
121
122 return 0
123
124
125def parse(tracefiles, categories, options):
126 requests = {}
127 for tracefile in tracefiles:
128 for line in open(tracefile):
129 record = line.split()
130 record_type, request_id, date, time_ = record[:4]
131
132 if record_type == 'S':
133 continue # Short circuit - we don't care about these entries.
134
135 dt = parsedt('%s %s' % (date, time_))
136 if options.from_ts is not None and dt < options.from_ts:
137 continue # Skip to next line.
138 if options.until_ts is not None and dt > options.until_ts:
139 break # Skip to next log file.
140
141 args = record[4:]
142
143 if record_type == 'B': # Request begins.
144 requests[request_id] = Request(dt, args[0], args[1])
145 continue
146
147 request = requests.get(request_id, None)
148 if request is None: # Just ignore partial records.
149 continue
150
151 if record_type == '-': # Extension record from Launchpad.
152 # Launchpad outputs the full URL to the tracelog,
153 # including protocol & hostname. Use this in favor of
154 # the ZServer logged path.
155 request.url = args[0]
156
157 elif record_type == 'I': # Got request input.
158 request.I(dt, args[0])
159
160 elif record_type == 'C': # Entered application thread.
161 request.C(dt)
162
163 elif record_type == 'A': # Application done.
164 request.A(dt, *args)
165
166 elif record_type == 'E': # Request done.
167 del requests[request_id]
168 request.E(dt)
169 for category in categories:
170 category.add(request)
171
172 else:
173 pass # Ignore malformed records.
174
175
176def print_html_report(categories):
177
178 print dedent('''\
179 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
180 "http://www.w3.org/TR/html4/loose.dtd">
181 <html>
182 <head>
183 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
184 <title>Launchpad Page Performance Report %(date)s</title>
185 <script language="javascript" type="text/javascript"
186 src="http://people.canonical.com/~stub/flot/jquery.min.js"
187 ></script>
188 <script language="javascript" type="text/javascript"
189 src="http://people.canonical.com/~stub/flot/jquery.flot.min.js"
190 ></script>
191 <style type="text/css">
192 h2 { font-weight: normal; font-size: 100%%; }
193 thead th { padding-left: 1em; padding-right: 1em; }
194 .category-title { text-align: right; padding-right: 2em; }
195 .regexp { font-size: x-small; font-weight: normal; }
196 .mean { text-align: right; padding-right: 1em; }
197 .median { text-align: right; padding-right: 1em; }
198 .standard-deviation { text-align: right; padding-right: 1em; }
199 .histogram { padding: 0.5em 1em; width:400px; height:200px; }
200 .odd-row { background-color: #eeeeff; }
201 .even-row { background-color: #ffffee; }
202 </style>
203 </head>
204 <body>
205 <h1>Launchpad Page Performance Report</h1>
206 <h2>%(date)s</h2>
207
208 <table class="launchpad-performance-report">
209 <thead>
210 <tr>
211 <td></td>
212 <th>Mean</th>
213 <th>Median</th>
214 <th>Standard<br/>Deviation</th>
215 <th>Distribution</th>
216 </tr>
217 </thead>
218 <tbody>
219 ''' % {'date': time.ctime()})
220
221 histograms = []
222 for i in range(0, len(categories)):
223 category = categories[i]
224 row_class = "even-row" if i % 2 else "odd-row"
225 mean, median, standard_deviation, histogram = category.times.stats()
226 histograms.append(histogram)
227 print dedent("""\
228 <tr class="%s">
229 <th class="category-title">%s <div class="regexp">%s</span></th>
230 <td class="mean">%.2f s</td>
231 <td class="median">%.2f s</td>
232 <td class="standard-deviation">%.2f s</td>
233 <td>
234 <div class="histogram" id="histogram%d"></div>
235 </td>
236 </tr>
237 """ % (
238 row_class,
239 html_quote(category.title), html_quote(category.regexp),
240 mean, median, standard_deviation, i))
241
242 print dedent("""\
243 <script language="javascript" type="text/javascript">
244 $(function () {
245 var options = {
246 series: {
247 bars: {show: true}
248 },
249 xaxis: {
250 tickDecimals: 0,
251 tickFormatter: function (val, axis) {
252 return val.toFixed(axis.tickDecimals) + "s";
253 }
254 },
255 yaxis: {
256 min: 0,
257 max: 1,
258 tickDecimals: 0,
259 tickFormatter: function (val, axis) {
260 return (val * 100).toFixed(axis.tickDecimals) + "%";
261 }
262 },
263 grid: {
264 aboveData: true,
265 labelMargin: 15
266 }
267 };
268 """)
269
270 for i in range(0, len(histograms)):
271 histogram = histograms[i]
272 if histogram is None:
273 continue
274 print dedent("""\
275 var d = %s;
276
277 $.plot(
278 $("#histogram%d"),
279 [{data: d}], options);
280 """ % (json.dumps(histogram), i))
281
282 print dedent("""\
283 });
284 </script>
285 </body>
286 </html>
287 """)
288
0289
=== added file 'utilities/page-performance-report.ini'
--- utilities/page-performance-report.ini 1970-01-01 00:00:00 +0000
+++ utilities/page-performance-report.ini 2010-01-28 13:49:14 +0000
@@ -0,0 +1,13 @@
1[categories]
2All Launchpad=.
3All Registry=^https?://launchpad\.
4All Answers=^https?://answers\.launchpad\.
5All Blueprints=^https?://blueprints\.launchpad\.
6All Bugs=^https?://bugs\.launchpad\.
7All Code=^https?://code\.launchpad\.
8All Translations=^https?://translations\.launchpad\.
9All API=^https?://api\.launchpad\.
10All Public XML-RPC=^https?://xmlrpc\.launchpad\.
11All Private XML-RPC=^https?://xmlrpc-private\.launchpad\.
12All Shipit=^https?://shipit\.
13
014
=== added file 'utilities/page-performance-report.py'
--- utilities/page-performance-report.py 1970-01-01 00:00:00 +0000
+++ utilities/page-performance-report.py 2010-01-28 13:49:14 +0000
@@ -0,0 +1,18 @@
1#!/usr/bin/python2.5
2#
3# Copyright 2010 Canonical Ltd. This software is licensed under the
4# GNU Affero General Public License version 3 (see the file LICENSE).
5
6"""Page performance report generated from zserver tracelogs."""
7
8__metaclass__ = type
9
10import _pythonpath
11
12import sys
13
14from lp.scripts.utilities.pageperformancereport import main
15
16
17if __name__ == '__main__':
18 sys.exit(main())