Merge lp:~lifeless/launchpad/foundations into lp:launchpad

Proposed by Robert Collins
Status: Merged
Approved by: Stuart Bishop
Approved revision: no longer in the source branch.
Merged at revision: 11396
Proposed branch: lp:~lifeless/launchpad/foundations
Merge into: lp:launchpad
Diff against target: 440 lines (+158/-73)
2 files modified
lib/lp/scripts/utilities/pageperformancereport.py (+154/-68)
utilities/page-performance-report-daily.sh (+4/-5)
To merge this branch: bzr merge lp:~lifeless/launchpad/foundations
Reviewer Review Type Date Requested Status
Stuart Bishop (community) Approve
Review via email: mp+32299@code.launchpad.net

Commit message

Page performance report improvements - top N urls and 99th percentile

Description of the change

Generate a page performance report page that will (hopefully) be a useful hit-list for developers to focus on.

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

I've fixed some bugs in this - it doesn't look like it had been tested.

I've merged it in with changes I had neglected to push in the days before the Epic but have been running live. This work is in lp:~stub/launchpad/page-performance-report. It seems to be working locally just fine with a single log downloaded from devpad. I've pushed this to the tree on devpad so we can see the result with the scheduled run.

Revision history for this message
Robert Collins (lifeless) wrote :

Thanks Stuart; the width of 10 for the candidates-timeout graphs is a
little weird; I think it should take the same upper bound - or even a
higher one of the max time in the dataset.

-Rob

Revision history for this message
Robert Collins (lifeless) wrote :

Oh, and I was a muppet, setting the timeout -2 *default* to 10, should
have set it to 12, the staging timeout which would cut off
appropriately at 10. Mea culpa.

Revision history for this message
Robert Collins (lifeless) wrote :

Stuart, I've updated this with a patch to raise the default timeout to
12 (stagings current timeout) and to separate out the
graph capping (50% above timeout : we regularly go up to this or
higher due to the way the timeouts work), and the stddeviation
calculations : our numeric stats were being distorted by the
calculations to make the histograms nice.

Thanks,
Rob

Revision history for this message
Stuart Bishop (stub) wrote :

Looks fine and is running happily on devpad

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/scripts/utilities/pageperformancereport.py'
--- lib/lp/scripts/utilities/pageperformancereport.py 2010-07-08 11:44:02 +0000
+++ lib/lp/scripts/utilities/pageperformancereport.py 2010-08-12 05:59:44 +0000
@@ -6,15 +6,13 @@
6__metaclass__ = type6__metaclass__ = type
7__all__ = ['main']7__all__ = ['main']
88
9import bz2
10from cgi import escape as html_quote9from cgi import escape as html_quote
11from ConfigParser import RawConfigParser10from ConfigParser import RawConfigParser
12from datetime import datetime11from datetime import datetime
13import gzip
14import re12import re
15import sre_constants13import sre_constants
16from tempfile import TemporaryFile
17import os.path14import os.path
15import subprocess
18from textwrap import dedent16from textwrap import dedent
19import time17import time
2018
@@ -84,6 +82,7 @@
84 median = 0 # Median time per hit.82 median = 0 # Median time per hit.
85 std = 0 # Standard deviation per hit.83 std = 0 # Standard deviation per hit.
86 var = 0 # Variance per hit.84 var = 0 # Variance per hit.
85 ninetyninth_percentile_time = 0
87 histogram = None # # Request times histogram.86 histogram = None # # Request times histogram.
8887
89 total_sqltime = 0 # Total time spent waiting for SQL to process.88 total_sqltime = 0 # Total time spent waiting for SQL to process.
@@ -98,27 +97,31 @@
98 std_sqlstatements = 097 std_sqlstatements = 0
99 var_sqlstatements = 098 var_sqlstatements = 0
10099
100empty_stats = Stats() # Singleton.
101
101102
102class Times:103class Times:
103 """Collection of request times."""104 """Collection of request times."""
104 def __init__(self, timeout):105 def __init__(self, timeout):
105 self.spool = TemporaryFile()106 self.total_hits = 0
107 self.total_time = 0
106 self.request_times = []108 self.request_times = []
107 self.sql_statements = []109 self.sql_statements = []
108 self.sql_times = []110 self.sql_times = []
109 self.ticks = []111 self.ticks = []
110 self.timeout = timeout112 self.histogram_width = int(1.5*timeout)
111113
112 def add(self, request):114 def add(self, request):
113 """Add the application time from the request to the collection.115 """Add the application time from the request to the collection."""
114116 self.total_hits += 1
115 The application time is capped to our timeout.117 self.total_time += request.app_seconds
116 """118 self.request_times.append(request.app_seconds)
117 print >> self.spool, "%s,%s,%s,%s" % (119 if request.sql_statements is not None:
118 min(request.app_seconds, self.timeout),120 self.sql_statements.append(request.sql_statements)
119 request.sql_statements or '',121 if request.sql_seconds is not None:
120 request.sql_seconds or '',122 self.sql_times.append(request.sql_seconds)
121 request.ticks or '')123 if request.ticks is not None:
124 self.ticks.append(request.ticks)
122125
123 _stats = None126 _stats = None
124127
@@ -133,58 +136,56 @@
133 1 and 2 seconds etc. histogram is None if there are no requests in136 1 and 2 seconds etc. histogram is None if there are no requests in
134 this Category.137 this Category.
135 """138 """
139 if not self.total_hits:
140 return empty_stats
141
136 if self._stats is not None:142 if self._stats is not None:
137 return self._stats143 return self._stats
138144
139 def iter_spool(index, cast):
140 """Generator returning one column from our spool file.
141
142 Skips None values.
143 """
144 self.spool.flush()
145 self.spool.seek(0)
146 for line in self.spool:
147 value = line.split(',')[index]
148 if value != '':
149 yield cast(value)
150
151 stats = Stats()145 stats = Stats()
152146
147 stats.total_hits = self.total_hits
148
153 # Time stats149 # Time stats
154 array = numpy.fromiter(iter_spool(0, numpy.float32), numpy.float32)150 array = numpy.asarray(self.request_times, numpy.float32)
155 stats.total_time = numpy.sum(array)151 stats.total_time = numpy.sum(array)
156 stats.total_hits = len(array)
157 stats.mean = numpy.mean(array)152 stats.mean = numpy.mean(array)
158 stats.median = numpy.median(array)153 stats.median = numpy.median(array)
159 stats.std = numpy.std(array)154 stats.std = numpy.std(array)
160 stats.var = numpy.var(array)155 stats.var = numpy.var(array)
156 # This is an approximation which may not be true: we don't know if we
157 # have a std distribution or not. We could just find the 99th
158 # percentile by counting. Shock. Horror; however this appears pretty
159 # good based on eyeballing things so far - once we're down in the 2-3
160 # second range for everything we may want to revisit.
161 stats.ninetyninth_percentile_time = stats.mean + stats.std*3
162 capped_times = (min(a_time, self.histogram_width) for a_time in
163 self.request_times)
164 array = numpy.fromiter(capped_times, numpy.float32,
165 len(self.request_times))
161 histogram = numpy.histogram(166 histogram = numpy.histogram(
162 array, normed=True,167 array, normed=True,
163 range=(0, self.timeout), bins=self.timeout)168 range=(0, self.histogram_width), bins=self.histogram_width)
164 stats.histogram = zip(histogram[1], histogram[0])169 stats.histogram = zip(histogram[1], histogram[0])
165170
166 # SQL query count.
167 array = numpy.fromiter(iter_spool(1, numpy.int), numpy.int)
168 stats.total_sqlstatements = numpy.sum(array)
169 stats.mean_sqlstatements = numpy.mean(array)
170 stats.median_sqlstatements = numpy.median(array)
171 stats.std_sqlstatements = numpy.std(array)
172 stats.var_sqlstatements = numpy.var(array)
173
174 # SQL time stats.171 # SQL time stats.
175 array = numpy.fromiter(iter_spool(2, numpy.float32), numpy.float32)172 array = numpy.asarray(self.sql_times, numpy.float32)
176 stats.total_sqltime = numpy.sum(array)173 stats.total_sqltime = numpy.sum(array)
177 stats.mean_sqltime = numpy.mean(array)174 stats.mean_sqltime = numpy.mean(array)
178 stats.median_sqltime = numpy.median(array)175 stats.median_sqltime = numpy.median(array)
179 stats.std_sqltime = numpy.std(array)176 stats.std_sqltime = numpy.std(array)
180 stats.var_sqltime = numpy.var(array)177 stats.var_sqltime = numpy.var(array)
181178
179 # SQL query count.
180 array = numpy.asarray(self.sql_statements, numpy.int)
181 stats.total_sqlstatements = int(numpy.sum(array))
182 stats.mean_sqlstatements = numpy.mean(array)
183 stats.median_sqlstatements = numpy.median(array)
184 stats.std_sqlstatements = numpy.std(array)
185 stats.var_sqlstatements = numpy.var(array)
186
182 # Cache for next invocation.187 # Cache for next invocation.
183 self._stats = stats188 self._stats = stats
184
185 # Clean up the spool file
186 self.spool = None
187
188 return stats189 return stats
189190
190 def __str__(self):191 def __str__(self):
@@ -194,6 +195,9 @@
194 return "%2.2f %2.2f %2.2f %s" % (195 return "%2.2f %2.2f %2.2f %s" % (
195 total, mean, median, std, hstr)196 total, mean, median, std, hstr)
196197
198 def __cmp__(self, b):
199 return cmp(self.total_time, b.total_time)
200
197201
198def main():202def main():
199 parser = LPOptionParser("%prog [args] tracelog [...]")203 parser = LPOptionParser("%prog [args] tracelog [...]")
@@ -204,10 +208,6 @@
204 config.root, "utilities", "page-performance-report.ini"),208 config.root, "utilities", "page-performance-report.ini"),
205 metavar="FILE", help="Load configuration from FILE")209 metavar="FILE", help="Load configuration from FILE")
206 parser.add_option(210 parser.add_option(
207 "--timeout", dest="timeout", type="int",
208 default=20, metavar="SECS",
209 help="Requests taking more than SECS seconds are timeouts")
210 parser.add_option(
211 "--from", dest="from_ts", type="datetime",211 "--from", dest="from_ts", type="datetime",
212 default=None, metavar="TIMESTAMP",212 default=None, metavar="TIMESTAMP",
213 help="Ignore log entries before TIMESTAMP")213 help="Ignore log entries before TIMESTAMP")
@@ -224,9 +224,17 @@
224 action="store_false", default=True,224 action="store_false", default=True,
225 help="Do not produce pageids report")225 help="Do not produce pageids report")
226 parser.add_option(226 parser.add_option(
227 "--top-urls", dest="top_urls", type=int, metavar="N",
228 default=50, help="Generate report for top N urls by hitcount.")
229 parser.add_option(
227 "--directory", dest="directory",230 "--directory", dest="directory",
228 default=os.getcwd(), metavar="DIR",231 default=os.getcwd(), metavar="DIR",
229 help="Output reports in DIR directory")232 help="Output reports in DIR directory")
233 parser.add_option(
234 "--timeout", dest="timeout",
235 # Default to 12: the staging timeout.
236 default=12, type="int",
237 help="The configured timeout value : determines high risk page ids.")
230238
231 options, args = parser.parse_args()239 options, args = parser.parse_args()
232240
@@ -268,25 +276,51 @@
268 parser.error("No data in [categories] section of configuration.")276 parser.error("No data in [categories] section of configuration.")
269277
270 pageid_times = {}278 pageid_times = {}
271279 url_times = {}
272 parse(args, categories, pageid_times, options)280
281 parse(args, categories, pageid_times, url_times, options)
282
283 # Truncate the URL times to the top N.
284 if options.top_urls:
285 sorted_urls = sorted(
286 ((times, url) for url, times in url_times.items()
287 if times.total_hits > 0), reverse=True)
288 url_times = [(url, times)
289 for times, url in sorted_urls[:options.top_urls]]
290
291 def _report_filename(filename):
292 return os.path.join(options.directory, filename)
273293
274 # Category only report.294 # Category only report.
275 if options.categories:295 if options.categories:
276 report_filename = os.path.join(options.directory,'categories.html')296 report_filename = _report_filename('categories.html')
277 log.info("Generating %s", report_filename)297 log.info("Generating %s", report_filename)
278 html_report(open(report_filename, 'w'), categories, None)298 html_report(open(report_filename, 'w'), categories, None, None)
279299
280 # Pageid only report.300 # Pageid only report.
281 if options.pageids:301 if options.pageids:
282 report_filename = os.path.join(options.directory,'pageids.html')302 report_filename = _report_filename('pageids.html')
283 log.info("Generating %s", report_filename)303 log.info("Generating %s", report_filename)
284 html_report(open(report_filename, 'w'), None, pageid_times)304 html_report(open(report_filename, 'w'), None, pageid_times, None)
305
306 # Top URL only report.
307 if options.top_urls:
308 report_filename = _report_filename('top%d.html' % options.top_urls)
309 log.info("Generating %s", report_filename)
310 html_report(open(report_filename, 'w'), None, None, url_times)
285311
286 # Combined report.312 # Combined report.
287 if options.categories and options.pageids:313 if options.categories and options.pageids:
288 report_filename = os.path.join(options.directory,'combined.html')314 report_filename = _report_filename('combined.html')
289 html_report(open(report_filename, 'w'), categories, pageid_times)315 html_report(
316 open(report_filename, 'w'), categories, pageid_times, url_times)
317
318 # Report of likely timeout candidates
319 report_filename = _report_filename('timeout-candidates.html')
320 log.info("Generating %s", report_filename)
321 html_report(
322 open(report_filename, 'w'), None, pageid_times, None,
323 options.timeout - 2)
290324
291 return 0325 return 0
292326
@@ -298,9 +332,17 @@
298 """332 """
299 ext = os.path.splitext(filename)[1]333 ext = os.path.splitext(filename)[1]
300 if ext == '.bz2':334 if ext == '.bz2':
301 return bz2.BZ2File(filename, mode)335 p = subprocess.Popen(
336 ['bunzip2', '-c', filename],
337 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
338 p.stdin.close()
339 return p.stdout
302 elif ext == '.gz':340 elif ext == '.gz':
303 return gzip.open(filename, mode)341 p = subprocess.Popen(
342 ['gunzip', '-c', filename],
343 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
344 p.stdin.close()
345 return p.stdout
304 else:346 else:
305 return open(filename, mode)347 return open(filename, mode)
306348
@@ -321,7 +363,7 @@
321 *(int(elem) for elem in match.groups() if elem is not None))363 *(int(elem) for elem in match.groups() if elem is not None))
322364
323365
324def parse(tracefiles, categories, pageid_times, options):366def parse(tracefiles, categories, pageid_times, url_times, options):
325 requests = {}367 requests = {}
326 total_requests = 0368 total_requests = 0
327 for tracefile in tracefiles:369 for tracefile in tracefiles:
@@ -402,12 +444,12 @@
402 log.debug("Parsed %d requests", total_requests)444 log.debug("Parsed %d requests", total_requests)
403445
404 # Add the request to any matching categories.446 # Add the request to any matching categories.
405 if categories is not None:447 if options.categories:
406 for category in categories:448 for category in categories:
407 category.add(request)449 category.add(request)
408450
409 # Add the request to the times for that pageid.451 # Add the request to the times for that pageid.
410 if pageid_times is not None and request.pageid is not None:452 if options.pageids:
411 pageid = request.pageid453 pageid = request.pageid
412 try:454 try:
413 times = pageid_times[pageid]455 times = pageid_times[pageid]
@@ -416,6 +458,21 @@
416 pageid_times[pageid] = times458 pageid_times[pageid] = times
417 times.add(request)459 times.add(request)
418460
461 # Add the request to the times for that URL.
462 if options.top_urls:
463 url = request.url
464 # Hack to remove opstats from top N report. This
465 # should go into a config file if we end up with
466 # more pages that need to be ignored because
467 # they are just noise.
468 if not (url is None or url.endswith('+opstats')):
469 try:
470 times = url_times[url]
471 except KeyError:
472 times = Times(options.timeout)
473 url_times[url] = times
474 times.add(request)
475
419 else:476 else:
420 raise MalformedLine('Unknown record type %s', record_type)477 raise MalformedLine('Unknown record type %s', record_type)
421 except MalformedLine, x:478 except MalformedLine, x:
@@ -442,7 +499,19 @@
442 "Unknown extension prefix %s" % prefix)499 "Unknown extension prefix %s" % prefix)
443500
444501
445def html_report(outf, categories, pageid_times):502def html_report(
503 outf, categories, pageid_times, url_times,
504 ninetyninth_percentile_threshold=None):
505 """Write an html report to outf.
506
507 :param outf: A file object to write the report to.
508 :param categories: Categories to report.
509 :param pageid_times: The time statistics for pageids.
510 :param url_times: The time statistics for the top XXX urls.
511 :param ninetyninth_percentile_threshold: Lower threshold for inclusion of
512 pages in the pageid section; pages where 99 percent of the requests are
513 served under this threshold will not be included.
514 """
446515
447 print >> outf, dedent('''\516 print >> outf, dedent('''\
448 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"517 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
@@ -502,6 +571,8 @@
502571
503 <th class="clickable">Total Time (secs)</th>572 <th class="clickable">Total Time (secs)</th>
504573
574 <th class="clickable">99% Under Time (secs)</th>
575
505 <th class="clickable">Mean Time (secs)</th>576 <th class="clickable">Mean Time (secs)</th>
506 <th class="clickable">Time Standard Deviation</th>577 <th class="clickable">Time Standard Deviation</th>
507 <th class="clickable">Time Variance</th>578 <th class="clickable">Time Variance</th>
@@ -537,6 +608,7 @@
537 <th class="category-title">%s</th>608 <th class="category-title">%s</th>
538 <td class="numeric total_hits">%d</td>609 <td class="numeric total_hits">%d</td>
539 <td class="numeric total_time">%.2f</td>610 <td class="numeric total_time">%.2f</td>
611 <td class="numeric 99pc_under">%.2f</td>
540 <td class="numeric mean_time">%.2f</td>612 <td class="numeric mean_time">%.2f</td>
541 <td class="numeric std_time">%.2f</td>613 <td class="numeric std_time">%.2f</td>
542 <td class="numeric var_time">%.2f</td>614 <td class="numeric var_time">%.2f</td>
@@ -559,6 +631,7 @@
559 """ % (631 """ % (
560 html_title,632 html_title,
561 stats.total_hits, stats.total_time,633 stats.total_hits, stats.total_time,
634 stats.ninetyninth_percentile_time,
562 stats.mean, stats.std, stats.var, stats.median,635 stats.mean, stats.std, stats.var, stats.median,
563 len(histograms) - 1,636 len(histograms) - 1,
564 stats.total_sqltime, stats.mean_sqltime,637 stats.total_sqltime, stats.mean_sqltime,
@@ -568,13 +641,14 @@
568 stats.median_sqlstatements))641 stats.median_sqlstatements))
569642
570 # Table of contents643 # Table of contents
571 if categories and pageid_times:644 print >> outf, '<ol>'
572 print >> outf, dedent('''\645 if categories:
573 <ol>646 print >> outf, '<li><a href="#catrep">Category Report</a></li>'
574 <li><a href="#catrep">Category Report</a></li>647 if pageid_times:
575 <li><a href="#pageidrep">Pageid Report</a></li>648 print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>'
576 </ol>649 if url_times:
577 ''')650 print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>'
651 print >> outf, '</ol>'
578652
579 if categories:653 if categories:
580 print >> outf, '<h2 id="catrep">Category Report</h2>'654 print >> outf, '<h2 id="catrep">Category Report</h2>'
@@ -589,9 +663,21 @@
589 print >> outf, '<h2 id="pageidrep">Pageid Report</h2>'663 print >> outf, '<h2 id="pageidrep">Pageid Report</h2>'
590 print >> outf, table_header664 print >> outf, table_header
591 for pageid, times in sorted(pageid_times.items()):665 for pageid, times in sorted(pageid_times.items()):
666 pageid = pageid or 'None'
667 if (ninetyninth_percentile_threshold is not None and
668 (times.stats().ninetyninth_percentile_time <
669 ninetyninth_percentile_threshold)):
670 continue
592 handle_times(html_quote(pageid), times)671 handle_times(html_quote(pageid), times)
593 print >> outf, table_footer672 print >> outf, table_footer
594673
674 if url_times:
675 print >> outf, '<h2 id="topurlrep">Top URL Report</h2>'
676 print >> outf, table_header
677 for url, times in url_times:
678 handle_times(html_quote(url), times)
679 print >> outf, table_footer
680
595 # Ourput the javascript to render our histograms nicely, replacing681 # Ourput the javascript to render our histograms nicely, replacing
596 # the placeholder <div> tags output earlier.682 # the placeholder <div> tags output earlier.
597 print >> outf, dedent("""\683 print >> outf, dedent("""\
598684
=== modified file 'utilities/page-performance-report-daily.sh'
--- utilities/page-performance-report-daily.sh 2010-07-08 09:11:54 +0000
+++ utilities/page-performance-report-daily.sh 2010-08-12 05:59:44 +0000
@@ -26,11 +26,14 @@
26 echo Generating report from $from until $until into $dir `date`26 echo Generating report from $from until $until into $dir `date`
2727
28 ./page-performance-report.py -v --from=$from --until=$until \28 ./page-performance-report.py -v --from=$from --until=$until \
29 --directory=${dir} $logs29 --top-urls=200 --directory=${dir} $logs
3030
31 ln -sf ${dir}/categories.html ${root}/latest-${type}-categories.html31 ln -sf ${dir}/categories.html ${root}/latest-${type}-categories.html
32 ln -sf ${dir}/pageids.html ${root}/latest-${type}-pageids.html32 ln -sf ${dir}/pageids.html ${root}/latest-${type}-pageids.html
33 ln -sf ${dir}/combined.html ${root}/latest-${type}-combined.html33 ln -sf ${dir}/combined.html ${root}/latest-${type}-combined.html
34 ln -sf ${dir}/top200.html ${root}/latest-${type}-top200.html
35 ln -sf ${dir}/timeout-candidates.html \
36 ${root}/latest-${type}-timeout-candidates.html
3437
35 return 038 return 0
36 }39 }
@@ -57,8 +60,4 @@
57 report 32 monthly `date -d 'last month' $fmt` $now60 report 32 monthly `date -d 'last month' $fmt` $now
58fi61fi
5962
60# One off reports to populate history.
61## report 40 monthly `date -d '1 june 2010' $fmt` `date -d '1 july 2010' $fmt`
62## report 23 weekly `date -d '19 june 2010' $fmt` `date -d '26 june 2010' $fmt`
63## report 16 weekly `date -d '26 june 2010' $fmt` `date -d '3 july 2010' $fmt`
6463