Merge lp:~stub/launchpad/page-performance-report into lp:launchpad

Proposed by Stuart Bishop
Status: Merged
Approved by: Stuart Bishop
Approved revision: no longer in the source branch.
Merged at revision: 11099
Proposed branch: lp:~stub/launchpad/page-performance-report
Merge into: lp:launchpad
Diff against target: 559 lines (+281/-75)
5 files modified
lib/lp/scripts/utilities/pageperformancereport.py (+177/-49)
setup.py (+1/-0)
utilities/page-performance-report-daily.sh (+64/-0)
utilities/page-performance-report.ini (+38/-26)
versions.cfg (+1/-0)
To merge this branch: bzr merge lp:~stub/launchpad/page-performance-report
Reviewer Review Type Date Requested Status
Jelmer Vernooij (community) code Approve
Review via email: mp+28894@code.launchpad.net

Commit message

Further improvements to the page performance report.

Description of the change

Further improvements to the page performance report. Example report from live data is at https://people.canonical.com/~stub/ppr_test.html.

Adds statistics for sql times and sql statement counts.

Histograms for these have not been added due to reports of the report crashing browsers... we need some java script lovin' to render the histograms on request, but that is for a future landing.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

The https version of that page doesn't seem to work here, but http://people.canonical.com/~stub/ppr_test.html works fine.

The page is indeed already quite slow as is, thanks for not adding those extra histograms. It's also becoming quite wide making it hard to read.

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/scripts/utilities/pageperformancereport.py'
2--- lib/lp/scripts/utilities/pageperformancereport.py 2010-06-18 04:10:22 +0000
3+++ lib/lp/scripts/utilities/pageperformancereport.py 2010-07-08 13:40:54 +0000
4@@ -13,6 +13,7 @@
5 import gzip
6 import re
7 import sre_constants
8+from tempfile import TemporaryFile
9 import os.path
10 from textwrap import dedent
11 import time
12@@ -29,6 +30,9 @@
13 class Request(zc.zservertracelog.tracereport.Request):
14 url = None
15 pageid = None
16+ ticks = None
17+ sql_statements = None
18+ sql_seconds = None
19
20 # Override the broken version in our superclass that always
21 # returns an integer.
22@@ -73,20 +77,36 @@
23
24 All times are in seconds.
25 """
26+ total_hits = 0 # Total hits.
27+
28 total_time = 0 # Total time spent rendering.
29- total_hits = 0 # Total hits.
30 mean = 0 # Mean time per hit.
31 median = 0 # Median time per hit.
32- standard_deviation = 0 # Standard deviation per hit.
33+ std = 0 # Standard deviation per hit.
34+ var = 0 # Variance per hit.
35 histogram = None # # Request times histogram.
36
37-empty_stats = Stats() # Singleton.
38+ total_sqltime = 0 # Total time spent waiting for SQL to process.
39+ mean_sqltime = 0 # Mean time spend waiting for SQL to process.
40+ median_sqltime = 0 # Median time spend waiting for SQL to process.
41+ std_sqltime = 0 # Standard deviation of SQL time.
42+ var_sqltime = 0 # Variance of SQL time
43+
44+ total_sqlstatements = 0 # Total number of SQL statements issued.
45+ mean_sqlstatements = 0
46+ median_sqlstatements = 0
47+ std_sqlstatements = 0
48+ var_sqlstatements = 0
49
50
51 class Times:
52 """Collection of request times."""
53 def __init__(self, timeout):
54+ self.spool = TemporaryFile()
55 self.request_times = []
56+ self.sql_statements = []
57+ self.sql_times = []
58+ self.ticks = []
59 self.timeout = timeout
60
61 def add(self, request):
62@@ -94,7 +114,13 @@
63
64 The application time is capped to our timeout.
65 """
66- self.request_times.append(min(request.app_seconds, self.timeout))
67+ print >> self.spool, "%s,%s,%s,%s" % (
68+ min(request.app_seconds, self.timeout),
69+ request.sql_statements or '',
70+ request.sql_seconds or '',
71+ request.ticks or '')
72+
73+ _stats = None
74
75 def stats(self):
76 """Generate statistics about our request times.
77@@ -107,31 +133,71 @@
78 1 and 2 seconds etc. histogram is None if there are no requests in
79 this Category.
80 """
81- if not self.request_times:
82- return empty_stats
83+ if self._stats is not None:
84+ return self._stats
85+
86+ def iter_spool(index, cast):
87+ """Generator returning one column from our spool file.
88+
89+ Skips None values.
90+ """
91+ self.spool.flush()
92+ self.spool.seek(0)
93+ for line in self.spool:
94+ value = line.split(',')[index]
95+ if value != '':
96+ yield cast(value)
97+
98 stats = Stats()
99- array = numpy.asarray(self.request_times, numpy.float32)
100+
101+ # Time stats
102+ array = numpy.fromiter(iter_spool(0, numpy.float32), numpy.float32)
103 stats.total_time = numpy.sum(array)
104 stats.total_hits = len(array)
105 stats.mean = numpy.mean(array)
106 stats.median = numpy.median(array)
107- stats.standard_deviation = numpy.std(array)
108+ stats.std = numpy.std(array)
109+ stats.var = numpy.var(array)
110 histogram = numpy.histogram(
111 array, normed=True,
112 range=(0, self.timeout), bins=self.timeout)
113 stats.histogram = zip(histogram[1], histogram[0])
114+
115+ # SQL query count.
116+ array = numpy.fromiter(iter_spool(1, numpy.int), numpy.int)
117+ stats.total_sqlstatements = numpy.sum(array)
118+ stats.mean_sqlstatements = numpy.mean(array)
119+ stats.median_sqlstatements = numpy.median(array)
120+ stats.std_sqlstatements = numpy.std(array)
121+ stats.var_sqlstatements = numpy.var(array)
122+
123+ # SQL time stats.
124+ array = numpy.fromiter(iter_spool(2, numpy.float32), numpy.float32)
125+ stats.total_sqltime = numpy.sum(array)
126+ stats.mean_sqltime = numpy.mean(array)
127+ stats.median_sqltime = numpy.median(array)
128+ stats.std_sqltime = numpy.std(array)
129+ stats.var_sqltime = numpy.var(array)
130+
131+ # Cache for next invocation.
132+ self._stats = stats
133+
134+ # Clean up the spool file
135+ self.spool = None
136+
137 return stats
138
139 def __str__(self):
140 results = self.stats()
141- total, mean, median, standard_deviation, histogram = results
142+ total, mean, median, std, histogram = results
143 hstr = " ".join("%2d" % v for v in histogram)
144 return "%2.2f %2.2f %2.2f %s" % (
145- total, mean, median, standard_deviation, hstr)
146+ total, mean, median, std, hstr)
147
148
149 def main():
150 parser = LPOptionParser("%prog [args] tracelog [...]")
151+
152 parser.add_option(
153 "-c", "--config", dest="config",
154 default=os.path.join(
155@@ -157,7 +223,16 @@
156 "--no-pageids", dest="pageids",
157 action="store_false", default=True,
158 help="Do not produce pageids report")
159+ parser.add_option(
160+ "--directory", dest="directory",
161+ default=os.getcwd(), metavar="DIR",
162+ help="Output reports in DIR directory")
163+
164 options, args = parser.parse_args()
165+
166+ if not os.path.isdir(options.directory):
167+ parser.error("Directory %s does not exist" % options.directory)
168+
169 if len(args) == 0:
170 parser.error("At least one zserver tracelog file must be provided")
171
172@@ -196,7 +271,22 @@
173
174 parse(args, categories, pageid_times, options)
175
176- print_html_report(options, categories, pageid_times)
177+ # Category only report.
178+ if options.categories:
179+ report_filename = os.path.join(options.directory,'categories.html')
180+ log.info("Generating %s", report_filename)
181+ html_report(open(report_filename, 'w'), categories, None)
182+
183+ # Pageid only report.
184+ if options.pageids:
185+ report_filename = os.path.join(options.directory,'pageids.html')
186+ log.info("Generating %s", report_filename)
187+ html_report(open(report_filename, 'w'), None, pageid_times)
188+
189+ # Combined report.
190+ if options.categories and options.pageids:
191+ report_filename = os.path.join(options.directory,'combined.html')
192+ html_report(open(report_filename, 'w'), categories, pageid_times)
193
194 return 0
195
196@@ -330,30 +420,31 @@
197 raise MalformedLine('Unknown record type %s', record_type)
198 except MalformedLine, x:
199 log.error(
200- "Malformed line %s %s (%s)" % (repr(line), repr(args), x))
201+ "Malformed line %s (%s)" % (repr(line), x))
202
203
204 def parse_extension_record(request, args):
205 """Decode a ZServer extension records and annotate request."""
206 prefix = args[0]
207
208- if len(args) > 1:
209- args = ' '.join(args[1:])
210- else:
211- args = None
212-
213 if prefix == 'u':
214- request.url = args
215+ request.url = ' '.join(args[1:]) or None
216 elif prefix == 'p':
217- request.pageid = args
218+ request.pageid = ' '.join(args[1:]) or None
219+ elif prefix == 't':
220+ if len(args) != 4:
221+ raise MalformedLine("Wrong number of arguments %s" % (args,))
222+ request.ticks = int(args[1])
223+ request.sql_statements = int(args[2])
224+ request.sql_seconds = float(args[3]) / 1000
225 else:
226 raise MalformedLine(
227 "Unknown extension prefix %s" % prefix)
228
229
230-def print_html_report(options, categories, pageid_times):
231+def html_report(outf, categories, pageid_times):
232
233- print dedent('''\
234+ print >> outf, dedent('''\
235 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
236 "http://www.w3.org/TR/html4/loose.dtd">
237 <html>
238@@ -391,6 +482,8 @@
239 padding: 1em;
240 }
241 .clickable { cursor: hand; }
242+ .total_hits, .histogram, .median_sqltime,
243+ .median_sqlstatements { border-right: 1px dashed #000000; }
244 </style>
245 </head>
246 <body>
247@@ -404,12 +497,29 @@
248 <thead>
249 <tr>
250 <th class="clickable">Name</th>
251+
252+ <th class="clickable">Total Hits</th>
253+
254 <th class="clickable">Total Time (secs)</th>
255- <th class="clickable">Total Hits</th>
256+
257 <th class="clickable">Mean Time (secs)</th>
258+ <th class="clickable">Time Standard Deviation</th>
259+ <th class="clickable">Time Variance</th>
260 <th class="clickable">Median Time (secs)</th>
261- <th class="clickable">Time Standard<br/>Deviation</th>
262- <th class="sorttable_nosort">Distribution</th>
263+ <th class="sorttable_nosort">Time Distribution</th>
264+
265+ <th class="clickable">Total SQL Time (secs)</th>
266+ <th class="clickable">Mean SQL Time (secs)</th>
267+ <th class="clickable">SQL Time Standard Deviation</th>
268+ <th class="clickable">SQL Time Variance</th>
269+ <th class="clickable">Median SQL Time (secs)</th>
270+
271+ <th class="clickable">Total SQL Statements</th>
272+ <th class="clickable">Mean SQL Statements</th>
273+ <th class="clickable">SQL Statement Standard Deviation</th>
274+ <th class="clickable">SQL Statement Variance</th>
275+ <th class="clickable">Median SQL Statements</th>
276+
277 </tr>
278 </thead>
279 <tbody>
280@@ -422,51 +532,69 @@
281 def handle_times(html_title, times):
282 stats = times.stats()
283 histograms.append(stats.histogram)
284- print dedent("""\
285+ print >> outf, dedent("""\
286 <tr>
287 <th class="category-title">%s</th>
288+ <td class="numeric total_hits">%d</td>
289 <td class="numeric total_time">%.2f</td>
290- <td class="numeric total_hits">%d</td>
291- <td class="numeric mean">%.2f</td>
292- <td class="numeric median">%.2f</td>
293- <td class="numeric standard-deviation">%.2f</td>
294+ <td class="numeric mean_time">%.2f</td>
295+ <td class="numeric std_time">%.2f</td>
296+ <td class="numeric var_time">%.2f</td>
297+ <td class="numeric median_time">%.2f</td>
298 <td>
299 <div class="histogram" id="histogram%d"></div>
300 </td>
301+ <td class="numeric total_sqltime">%.2f</td>
302+ <td class="numeric mean_sqltime">%.2f</td>
303+ <td class="numeric std_sqltime">%.2f</td>
304+ <td class="numeric var_sqltime">%.2f</td>
305+ <td class="numeric median_sqltime">%.2f</td>
306+
307+ <td class="numeric total_sqlstatements">%d</td>
308+ <td class="numeric mean_sqlstatements">%.2f</td>
309+ <td class="numeric std_sqlstatements">%.2f</td>
310+ <td class="numeric var_sqlstatements">%.2f</td>
311+ <td class="numeric median_sqlstatements">%.2f</td>
312 </tr>
313 """ % (
314 html_title,
315- stats.total_time, stats.total_hits,
316- stats.mean, stats.median, stats.standard_deviation,
317- len(histograms)-1))
318+ stats.total_hits, stats.total_time,
319+ stats.mean, stats.std, stats.var, stats.median,
320+ len(histograms) - 1,
321+ stats.total_sqltime, stats.mean_sqltime,
322+ stats.std_sqltime, stats.var_sqltime, stats.median_sqltime,
323+ stats.total_sqlstatements, stats.mean_sqlstatements,
324+ stats.std_sqlstatements, stats.var_sqlstatements,
325+ stats.median_sqlstatements))
326
327 # Table of contents
328- print '<ol>'
329- if options.categories:
330- print '<li><a href="#catrep">Category Report</a></li>'
331- if options.pageids:
332- print '<li><a href="#pageidrep">Pageid Report</a></li>'
333- print '</ol>'
334+ if categories and pageid_times:
335+ print >> outf, dedent('''\
336+ <ol>
337+ <li><a href="#catrep">Category Report</a></li>
338+ <li><a href="#pageidrep">Pageid Report</a></li>
339+ </ol>
340+ ''')
341
342- if options.categories:
343- print '<h2 id="catrep">Category Report</h2>'
344- print table_header
345+ if categories:
346+ print >> outf, '<h2 id="catrep">Category Report</h2>'
347+ print >> outf, table_header
348 for category in categories:
349 html_title = '%s<br/><span class="regexp">%s</span>' % (
350 html_quote(category.title), html_quote(category.regexp))
351 handle_times(html_title, category.times)
352- print table_footer
353+ print >> outf, table_footer
354
355- if options.pageids:
356- print '<h2 id="pageidrep">Pageid Report</h2>'
357- print table_header
358+ if pageid_times:
359+ print >> outf, '<h2 id="pageidrep">Pageid Report</h2>'
360+ print >> outf, table_header
361 for pageid, times in sorted(pageid_times.items()):
362 handle_times(html_quote(pageid), times)
363- print table_footer
364+ print >> outf, table_footer
365
366 # Ourput the javascript to render our histograms nicely, replacing
367 # the placeholder <div> tags output earlier.
368- print dedent("""\
369+ print >> outf, dedent("""\
370 <script language="javascript" type="text/javascript">
371 $(function () {
372 var options = {
373@@ -504,7 +632,7 @@
374 for i, histogram in enumerate(histograms):
375 if histogram is None:
376 continue
377- print dedent("""\
378+ print >> outf, dedent("""\
379 var d = %s;
380
381 $.plot(
382@@ -513,7 +641,7 @@
383
384 """ % (json.dumps(histogram), i))
385
386- print dedent("""\
387+ print >> outf, dedent("""\
388 });
389 </script>
390 </body>
391
392=== modified file 'setup.py'
393--- setup.py 2010-06-09 14:12:03 +0000
394+++ setup.py 2010-07-08 13:40:54 +0000
395@@ -46,6 +46,7 @@
396 'meliae',
397 'mercurial',
398 'mocker',
399+ 'numpy',
400 'oauth',
401 'paramiko',
402 'python-memcached',
403
404=== added file 'utilities/page-performance-report-daily.sh'
405--- utilities/page-performance-report-daily.sh 1970-01-01 00:00:00 +0000
406+++ utilities/page-performance-report-daily.sh 2010-07-08 13:40:54 +0000
407@@ -0,0 +1,64 @@
408+#!/bin/sh
409+
410+#TZ=UTC # trace logs are still BST - blech
411+
412+category_report() {
413+ max_log_age=$1
414+ type=$2
415+ from=$3
416+ until=$4
417+ category=$5
418+ log_root=$6
419+
420+ local logs
421+ logs=`find ${log_root} \
422+ -maxdepth 2 -type f -mtime -${max_log_age} -name launchpad-trace\* \
423+ | sort | xargs -x`
424+
425+ local root
426+ root=${HOME}/public_html/ppr/${category}
427+
428+ local dir
429+ dir=${root}/${type}_${from}_${until}
430+
431+ mkdir -p ${dir}
432+
433+ echo Generating report from $from until $until into $dir `date`
434+
435+ ./page-performance-report.py -v --from=$from --until=$until \
436+ --directory=${dir} $logs
437+
438+ ln -sf ${dir}/categories.html ${root}/latest-${type}-categories.html
439+ ln -sf ${dir}/pageids.html ${root}/latest-${type}-pageids.html
440+ ln -sf ${dir}/combined.html ${root}/latest-${type}-combined.html
441+
442+ return 0
443+ }
444+
445+report() {
446+ category_report $* edge /srv/launchpad.net-logs/edge
447+ category_report $* lpnet /srv/launchpad.net-logs/production
448+ return 0
449+}
450+
451+fmt='+%Y-%m-%d'
452+
453+now=`date $fmt`
454+
455+report 3 daily `date -d yesterday $fmt` $now
456+
457+if [ `date +%a` = 'Sat' ]; then
458+ report 9 weekly `date -d 'last week' $fmt` $now
459+fi
460+
461+# We don't seem to have a months worth of tracelogs, but we will
462+# generate what we can.
463+if [ `date +%d` = '01' ]; then
464+ report 32 monthly `date -d 'last month' $fmt` $now
465+fi
466+
467+# One off reports to populate history.
468+## report 40 monthly `date -d '1 june 2010' $fmt` `date -d '1 july 2010' $fmt`
469+## report 23 weekly `date -d '19 june 2010' $fmt` `date -d '26 june 2010' $fmt`
470+## report 16 weekly `date -d '26 june 2010' $fmt` `date -d '3 july 2010' $fmt`
471+
472
473=== modified file 'utilities/page-performance-report.ini'
474--- utilities/page-performance-report.ini 2010-03-11 06:10:08 +0000
475+++ utilities/page-performance-report.ini 2010-07-08 13:40:54 +0000
476@@ -3,33 +3,45 @@
477 # Remeber to quote ?, ., + & ? characters to match litterally.
478 # 'kodos' is useful for interactively testing regular expressions.
479 All Launchpad=.
480-
481-Registry=^https?://(edge\.)?launchpad\.
482-Registry - Person Index=/(~|%7E)[^/]+$
483-Registry - Pillar Index=^https?://(edge\.)?launchpad.[^/]+/\w[^/]*$
484-
485-Answers=^https?://answers\.(edge\.)?launchpad\.
486-
487-Blueprints=^https?://blueprints\.(edge\.)?launchpad\.
488-
489-Bugs=^https?://bugs\.(edge\.)?launchpad\.
490-Bugs - Bug Page=\+bug/\d+$
491-Bugs - Pillar Index=^https?://bugs\.(edge\.)?launchpad\.[^/]+/\w[^/]*$
492-Bugs - Search=/\+bugs\?
493-
494-Code=^https?://code\.(edge\.)?launchpad\.
495-Code - Front page=^https?://code.[^/]+/?(?:\?.*)?$
496-Code - PPA Index=/\+archive/[^/]+$
497-Code - Pillar Branches=^https?://code\.(edge\.)?launchpad\.[^/]+/\w[^/]*$
498-Code - Branch Page=^https?://code\.(edge\.)?launchpad\.[^/]+/(~|%7E)[^/]+/[^/]+/[^/]+$
499-Code - Merge Proposal=/\+merge/\d+$
500-
501-Translations=^https?://translations\.(edge\.)?launchpad\.
502+All launchpad except opstats=(?<!\+opstats)$
503+
504+Launchpad Frontpage=^https?://(edge\.)?launchpad\.[^/]+/?$
505+
506+# Note that the bug text dump is served on the main launchpad domain
507+# and we need to exlude it from the registry stats.
508+Registry=^https?://(edge\.)?launchpad\.(?<!/\+text)$
509+Registry - Person Index=^https?://(edge\.)?launchpad\.[^/]+/(~|%7E)[^/]+$
510+Registry - Pillar Index=^https?://(edge\.)?launchpad\.[^/]+/\w[^/]*$
511+
512+Answers=^https?://answers\.
513+Answers - Front page=^https?://answers\.[^/]+/?$
514+
515+Blueprints=^https?://blueprints\.
516+Blueprints - Front page=^https?://blueprints\.[^/]+/?$
517+
518+# Note that the bug text dump is not served on the bugs domain,
519+# probably for hysterical reasons. This is why the bugs regexp is
520+# confusing.
521+Bugs=^https?://(bugs\.|.+/bugs/\d+/\+text$)
522+Bugs - Front page=^https?://bugs\.[^/]+/?$
523+Bugs - Bug Page=^https?://bugs\.[^/]+/.+/\+bug/\d+$
524+Bugs - Pillar Index=^https?://bugs\.[^/]+/\w[^/]*$
525+Bugs - Search=^https?://bugs\.[^/]+/.+/\+bugs\?.*field.searchtext=
526+Bugs - Text Dump=^https?://(edge\.)?launchpad\..+/\+text$
527+
528+Code=^https?://code\.
529+Code - Front page=^https?://code\.[^/]+/?$
530+Code - PPA Index=^https?://code\.[^/]+/.+/\+archive/[^/]+$
531+Code - Pillar Branches=^https?://code\.[^/]+/\w[^/]*$
532+Code - Branch Page=^https?://code\.[^/]+/(~|%7E)[^/]+/[^/]+/[^/]+$
533+Code - Merge Proposal=^https?://code\.[^/]+/.+/\+merge/\d+$
534+
535+Translations=^https?://translations\.
536+Translations - Front page=^https?://translations\.[^/]+/?$
537 Translations - Overview=^https?://translations\..*/\+lang/\w+$
538
539-API=^https?://api\.(edge\.)?launchpad\.
540-Public XML-RPC=^https?://xmlrpc\.(edge\.)launchpad\.
541-Private XML-RPC=^https?://xmlrpc-private\.(edge\.)launchpad\.
542+API=^https?://api\.
543+Public XML-RPC=^https?://xmlrpc\.
544+Private XML-RPC=^https?://xmlrpc-private\.
545 Shipit=^https?://shipit\.
546-Launchpad Homepage=^https?://(edge\.)?launchpad\.
547
548
549=== modified file 'versions.cfg'
550--- versions.cfg 2010-06-14 18:32:58 +0000
551+++ versions.cfg 2010-07-08 13:40:54 +0000
552@@ -40,6 +40,7 @@
553 mercurial = 1.3.1
554 mocker = 0.10.1
555 mozrunner = 1.3.4
556+numpy = 1.3.0
557 oauth = 1.0
558 paramiko = 1.7.4
559 Paste = 1.7.2