Merge lp:~lifeless/launchpad/foundations into lp:launchpad
- foundations
- Merge into devel
Status: | Merged |
---|---|
Approved by: | Stuart Bishop |
Approved revision: | no longer in the source branch. |
Merged at revision: | 11396 |
Proposed branch: | lp:~lifeless/launchpad/foundations |
Merge into: | lp:launchpad |
Diff against target: |
440 lines (+158/-73) 2 files modified
lib/lp/scripts/utilities/pageperformancereport.py (+154/-68) utilities/page-performance-report-daily.sh (+4/-5) |
To merge this branch: | bzr merge lp:~lifeless/launchpad/foundations |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Stuart Bishop (community) | Approve | ||
Review via email: mp+32299@code.launchpad.net |
Commit message
Page performance report improvements - top N urls and 99th percentile
Description of the change
Generate a page performance report page that will (hopefully) be a useful hit-list for developers to focus on.
Stuart Bishop (stub) wrote : | # |
Robert Collins (lifeless) wrote : | # |
Thanks Stuart; the width of 10 for the candidates-timeout graphs is a
little weird; I think it should take the same upper bound - or even a
higher one of the max time in the dataset.
-Rob
Robert Collins (lifeless) wrote : | # |
Oh, and I was a muppet, setting the timeout -2 *default* to 10, should
have set it to 12, the staging timeout which would cut off
appropriately at 10. Mea culpa.
Robert Collins (lifeless) wrote : | # |
Stuart, I've updated this with a patch to raise the default timeout to
12 (stagings current timeout) and to separate out the
graph capping (50% above timeout : we regularly go up to this or
higher due to the way the timeouts work), and the stddeviation
calculations : our numeric stats were being distorted by the
calculations to make the histograms nice.
Thanks,
Rob
Stuart Bishop (stub) wrote : | # |
Looks fine and is running happily on devpad
Preview Diff
1 | === modified file 'lib/lp/scripts/utilities/pageperformancereport.py' | |||
2 | --- lib/lp/scripts/utilities/pageperformancereport.py 2010-07-08 11:44:02 +0000 | |||
3 | +++ lib/lp/scripts/utilities/pageperformancereport.py 2010-08-12 05:59:44 +0000 | |||
4 | @@ -6,15 +6,13 @@ | |||
5 | 6 | __metaclass__ = type | 6 | __metaclass__ = type |
6 | 7 | __all__ = ['main'] | 7 | __all__ = ['main'] |
7 | 8 | 8 | ||
8 | 9 | import bz2 | ||
9 | 10 | from cgi import escape as html_quote | 9 | from cgi import escape as html_quote |
10 | 11 | from ConfigParser import RawConfigParser | 10 | from ConfigParser import RawConfigParser |
11 | 12 | from datetime import datetime | 11 | from datetime import datetime |
12 | 13 | import gzip | ||
13 | 14 | import re | 12 | import re |
14 | 15 | import sre_constants | 13 | import sre_constants |
15 | 16 | from tempfile import TemporaryFile | ||
16 | 17 | import os.path | 14 | import os.path |
17 | 15 | import subprocess | ||
18 | 18 | from textwrap import dedent | 16 | from textwrap import dedent |
19 | 19 | import time | 17 | import time |
20 | 20 | 18 | ||
21 | @@ -84,6 +82,7 @@ | |||
22 | 84 | median = 0 # Median time per hit. | 82 | median = 0 # Median time per hit. |
23 | 85 | std = 0 # Standard deviation per hit. | 83 | std = 0 # Standard deviation per hit. |
24 | 86 | var = 0 # Variance per hit. | 84 | var = 0 # Variance per hit. |
25 | 85 | ninetyninth_percentile_time = 0 | ||
26 | 87 | histogram = None # # Request times histogram. | 86 | histogram = None # # Request times histogram. |
27 | 88 | 87 | ||
28 | 89 | total_sqltime = 0 # Total time spent waiting for SQL to process. | 88 | total_sqltime = 0 # Total time spent waiting for SQL to process. |
29 | @@ -98,27 +97,31 @@ | |||
30 | 98 | std_sqlstatements = 0 | 97 | std_sqlstatements = 0 |
31 | 99 | var_sqlstatements = 0 | 98 | var_sqlstatements = 0 |
32 | 100 | 99 | ||
33 | 100 | empty_stats = Stats() # Singleton. | ||
34 | 101 | |||
35 | 101 | 102 | ||
36 | 102 | class Times: | 103 | class Times: |
37 | 103 | """Collection of request times.""" | 104 | """Collection of request times.""" |
38 | 104 | def __init__(self, timeout): | 105 | def __init__(self, timeout): |
40 | 105 | self.spool = TemporaryFile() | 106 | self.total_hits = 0 |
41 | 107 | self.total_time = 0 | ||
42 | 106 | self.request_times = [] | 108 | self.request_times = [] |
43 | 107 | self.sql_statements = [] | 109 | self.sql_statements = [] |
44 | 108 | self.sql_times = [] | 110 | self.sql_times = [] |
45 | 109 | self.ticks = [] | 111 | self.ticks = [] |
47 | 110 | self.timeout = timeout | 112 | self.histogram_width = int(1.5*timeout) |
48 | 111 | 113 | ||
49 | 112 | def add(self, request): | 114 | def add(self, request): |
59 | 113 | """Add the application time from the request to the collection. | 115 | """Add the application time from the request to the collection.""" |
60 | 114 | 116 | self.total_hits += 1 | |
61 | 115 | The application time is capped to our timeout. | 117 | self.total_time += request.app_seconds |
62 | 116 | """ | 118 | self.request_times.append(request.app_seconds) |
63 | 117 | print >> self.spool, "%s,%s,%s,%s" % ( | 119 | if request.sql_statements is not None: |
64 | 118 | min(request.app_seconds, self.timeout), | 120 | self.sql_statements.append(request.sql_statements) |
65 | 119 | request.sql_statements or '', | 121 | if request.sql_seconds is not None: |
66 | 120 | request.sql_seconds or '', | 122 | self.sql_times.append(request.sql_seconds) |
67 | 121 | request.ticks or '') | 123 | if request.ticks is not None: |
68 | 124 | self.ticks.append(request.ticks) | ||
69 | 122 | 125 | ||
70 | 123 | _stats = None | 126 | _stats = None |
71 | 124 | 127 | ||
72 | @@ -133,58 +136,56 @@ | |||
73 | 133 | 1 and 2 seconds etc. histogram is None if there are no requests in | 136 | 1 and 2 seconds etc. histogram is None if there are no requests in |
74 | 134 | this Category. | 137 | this Category. |
75 | 135 | """ | 138 | """ |
76 | 139 | if not self.total_hits: | ||
77 | 140 | return empty_stats | ||
78 | 141 | |||
79 | 136 | if self._stats is not None: | 142 | if self._stats is not None: |
80 | 137 | return self._stats | 143 | return self._stats |
81 | 138 | 144 | ||
82 | 139 | def iter_spool(index, cast): | ||
83 | 140 | """Generator returning one column from our spool file. | ||
84 | 141 | |||
85 | 142 | Skips None values. | ||
86 | 143 | """ | ||
87 | 144 | self.spool.flush() | ||
88 | 145 | self.spool.seek(0) | ||
89 | 146 | for line in self.spool: | ||
90 | 147 | value = line.split(',')[index] | ||
91 | 148 | if value != '': | ||
92 | 149 | yield cast(value) | ||
93 | 150 | |||
94 | 151 | stats = Stats() | 145 | stats = Stats() |
95 | 152 | 146 | ||
96 | 147 | stats.total_hits = self.total_hits | ||
97 | 148 | |||
98 | 153 | # Time stats | 149 | # Time stats |
100 | 154 | array = numpy.fromiter(iter_spool(0, numpy.float32), numpy.float32) | 150 | array = numpy.asarray(self.request_times, numpy.float32) |
101 | 155 | stats.total_time = numpy.sum(array) | 151 | stats.total_time = numpy.sum(array) |
102 | 156 | stats.total_hits = len(array) | ||
103 | 157 | stats.mean = numpy.mean(array) | 152 | stats.mean = numpy.mean(array) |
104 | 158 | stats.median = numpy.median(array) | 153 | stats.median = numpy.median(array) |
105 | 159 | stats.std = numpy.std(array) | 154 | stats.std = numpy.std(array) |
106 | 160 | stats.var = numpy.var(array) | 155 | stats.var = numpy.var(array) |
107 | 156 | # This is an approximation which may not be true: we don't know if we | ||
108 | 157 | # have a std distribution or not. We could just find the 99th | ||
109 | 158 | # percentile by counting. Shock. Horror; however this appears pretty | ||
110 | 159 | # good based on eyeballing things so far - once we're down in the 2-3 | ||
111 | 160 | # second range for everything we may want to revisit. | ||
112 | 161 | stats.ninetyninth_percentile_time = stats.mean + stats.std*3 | ||
113 | 162 | capped_times = (min(a_time, self.histogram_width) for a_time in | ||
114 | 163 | self.request_times) | ||
115 | 164 | array = numpy.fromiter(capped_times, numpy.float32, | ||
116 | 165 | len(self.request_times)) | ||
117 | 161 | histogram = numpy.histogram( | 166 | histogram = numpy.histogram( |
118 | 162 | array, normed=True, | 167 | array, normed=True, |
120 | 163 | range=(0, self.timeout), bins=self.timeout) | 168 | range=(0, self.histogram_width), bins=self.histogram_width) |
121 | 164 | stats.histogram = zip(histogram[1], histogram[0]) | 169 | stats.histogram = zip(histogram[1], histogram[0]) |
122 | 165 | 170 | ||
123 | 166 | # SQL query count. | ||
124 | 167 | array = numpy.fromiter(iter_spool(1, numpy.int), numpy.int) | ||
125 | 168 | stats.total_sqlstatements = numpy.sum(array) | ||
126 | 169 | stats.mean_sqlstatements = numpy.mean(array) | ||
127 | 170 | stats.median_sqlstatements = numpy.median(array) | ||
128 | 171 | stats.std_sqlstatements = numpy.std(array) | ||
129 | 172 | stats.var_sqlstatements = numpy.var(array) | ||
130 | 173 | |||
131 | 174 | # SQL time stats. | 171 | # SQL time stats. |
133 | 175 | array = numpy.fromiter(iter_spool(2, numpy.float32), numpy.float32) | 172 | array = numpy.asarray(self.sql_times, numpy.float32) |
134 | 176 | stats.total_sqltime = numpy.sum(array) | 173 | stats.total_sqltime = numpy.sum(array) |
135 | 177 | stats.mean_sqltime = numpy.mean(array) | 174 | stats.mean_sqltime = numpy.mean(array) |
136 | 178 | stats.median_sqltime = numpy.median(array) | 175 | stats.median_sqltime = numpy.median(array) |
137 | 179 | stats.std_sqltime = numpy.std(array) | 176 | stats.std_sqltime = numpy.std(array) |
138 | 180 | stats.var_sqltime = numpy.var(array) | 177 | stats.var_sqltime = numpy.var(array) |
139 | 181 | 178 | ||
140 | 179 | # SQL query count. | ||
141 | 180 | array = numpy.asarray(self.sql_statements, numpy.int) | ||
142 | 181 | stats.total_sqlstatements = int(numpy.sum(array)) | ||
143 | 182 | stats.mean_sqlstatements = numpy.mean(array) | ||
144 | 183 | stats.median_sqlstatements = numpy.median(array) | ||
145 | 184 | stats.std_sqlstatements = numpy.std(array) | ||
146 | 185 | stats.var_sqlstatements = numpy.var(array) | ||
147 | 186 | |||
148 | 182 | # Cache for next invocation. | 187 | # Cache for next invocation. |
149 | 183 | self._stats = stats | 188 | self._stats = stats |
150 | 184 | |||
151 | 185 | # Clean up the spool file | ||
152 | 186 | self.spool = None | ||
153 | 187 | |||
154 | 188 | return stats | 189 | return stats |
155 | 189 | 190 | ||
156 | 190 | def __str__(self): | 191 | def __str__(self): |
157 | @@ -194,6 +195,9 @@ | |||
158 | 194 | return "%2.2f %2.2f %2.2f %s" % ( | 195 | return "%2.2f %2.2f %2.2f %s" % ( |
159 | 195 | total, mean, median, std, hstr) | 196 | total, mean, median, std, hstr) |
160 | 196 | 197 | ||
161 | 198 | def __cmp__(self, b): | ||
162 | 199 | return cmp(self.total_time, b.total_time) | ||
163 | 200 | |||
164 | 197 | 201 | ||
165 | 198 | def main(): | 202 | def main(): |
166 | 199 | parser = LPOptionParser("%prog [args] tracelog [...]") | 203 | parser = LPOptionParser("%prog [args] tracelog [...]") |
167 | @@ -204,10 +208,6 @@ | |||
168 | 204 | config.root, "utilities", "page-performance-report.ini"), | 208 | config.root, "utilities", "page-performance-report.ini"), |
169 | 205 | metavar="FILE", help="Load configuration from FILE") | 209 | metavar="FILE", help="Load configuration from FILE") |
170 | 206 | parser.add_option( | 210 | parser.add_option( |
171 | 207 | "--timeout", dest="timeout", type="int", | ||
172 | 208 | default=20, metavar="SECS", | ||
173 | 209 | help="Requests taking more than SECS seconds are timeouts") | ||
174 | 210 | parser.add_option( | ||
175 | 211 | "--from", dest="from_ts", type="datetime", | 211 | "--from", dest="from_ts", type="datetime", |
176 | 212 | default=None, metavar="TIMESTAMP", | 212 | default=None, metavar="TIMESTAMP", |
177 | 213 | help="Ignore log entries before TIMESTAMP") | 213 | help="Ignore log entries before TIMESTAMP") |
178 | @@ -224,9 +224,17 @@ | |||
179 | 224 | action="store_false", default=True, | 224 | action="store_false", default=True, |
180 | 225 | help="Do not produce pageids report") | 225 | help="Do not produce pageids report") |
181 | 226 | parser.add_option( | 226 | parser.add_option( |
182 | 227 | "--top-urls", dest="top_urls", type=int, metavar="N", | ||
183 | 228 | default=50, help="Generate report for top N urls by hitcount.") | ||
184 | 229 | parser.add_option( | ||
185 | 227 | "--directory", dest="directory", | 230 | "--directory", dest="directory", |
186 | 228 | default=os.getcwd(), metavar="DIR", | 231 | default=os.getcwd(), metavar="DIR", |
187 | 229 | help="Output reports in DIR directory") | 232 | help="Output reports in DIR directory") |
188 | 233 | parser.add_option( | ||
189 | 234 | "--timeout", dest="timeout", | ||
190 | 235 | # Default to 12: the staging timeout. | ||
191 | 236 | default=12, type="int", | ||
192 | 237 | help="The configured timeout value : determines high risk page ids.") | ||
193 | 230 | 238 | ||
194 | 231 | options, args = parser.parse_args() | 239 | options, args = parser.parse_args() |
195 | 232 | 240 | ||
196 | @@ -268,25 +276,51 @@ | |||
197 | 268 | parser.error("No data in [categories] section of configuration.") | 276 | parser.error("No data in [categories] section of configuration.") |
198 | 269 | 277 | ||
199 | 270 | pageid_times = {} | 278 | pageid_times = {} |
202 | 271 | 279 | url_times = {} | |
203 | 272 | parse(args, categories, pageid_times, options) | 280 | |
204 | 281 | parse(args, categories, pageid_times, url_times, options) | ||
205 | 282 | |||
206 | 283 | # Truncate the URL times to the top N. | ||
207 | 284 | if options.top_urls: | ||
208 | 285 | sorted_urls = sorted( | ||
209 | 286 | ((times, url) for url, times in url_times.items() | ||
210 | 287 | if times.total_hits > 0), reverse=True) | ||
211 | 288 | url_times = [(url, times) | ||
212 | 289 | for times, url in sorted_urls[:options.top_urls]] | ||
213 | 290 | |||
214 | 291 | def _report_filename(filename): | ||
215 | 292 | return os.path.join(options.directory, filename) | ||
216 | 273 | 293 | ||
217 | 274 | # Category only report. | 294 | # Category only report. |
218 | 275 | if options.categories: | 295 | if options.categories: |
220 | 276 | report_filename = os.path.join(options.directory,'categories.html') | 296 | report_filename = _report_filename('categories.html') |
221 | 277 | log.info("Generating %s", report_filename) | 297 | log.info("Generating %s", report_filename) |
223 | 278 | html_report(open(report_filename, 'w'), categories, None) | 298 | html_report(open(report_filename, 'w'), categories, None, None) |
224 | 279 | 299 | ||
225 | 280 | # Pageid only report. | 300 | # Pageid only report. |
226 | 281 | if options.pageids: | 301 | if options.pageids: |
230 | 282 | report_filename = os.path.join(options.directory,'pageids.html') | 302 | report_filename = _report_filename('pageids.html') |
231 | 283 | log.info("Generating %s", report_filename) | 303 | log.info("Generating %s", report_filename) |
232 | 284 | html_report(open(report_filename, 'w'), None, pageid_times) | 304 | html_report(open(report_filename, 'w'), None, pageid_times, None) |
233 | 305 | |||
234 | 306 | # Top URL only report. | ||
235 | 307 | if options.top_urls: | ||
236 | 308 | report_filename = _report_filename('top%d.html' % options.top_urls) | ||
237 | 309 | log.info("Generating %s", report_filename) | ||
238 | 310 | html_report(open(report_filename, 'w'), None, None, url_times) | ||
239 | 285 | 311 | ||
240 | 286 | # Combined report. | 312 | # Combined report. |
241 | 287 | if options.categories and options.pageids: | 313 | if options.categories and options.pageids: |
244 | 288 | report_filename = os.path.join(options.directory,'combined.html') | 314 | report_filename = _report_filename('combined.html') |
245 | 289 | html_report(open(report_filename, 'w'), categories, pageid_times) | 315 | html_report( |
246 | 316 | open(report_filename, 'w'), categories, pageid_times, url_times) | ||
247 | 317 | |||
248 | 318 | # Report of likely timeout candidates | ||
249 | 319 | report_filename = _report_filename('timeout-candidates.html') | ||
250 | 320 | log.info("Generating %s", report_filename) | ||
251 | 321 | html_report( | ||
252 | 322 | open(report_filename, 'w'), None, pageid_times, None, | ||
253 | 323 | options.timeout - 2) | ||
254 | 290 | 324 | ||
255 | 291 | return 0 | 325 | return 0 |
256 | 292 | 326 | ||
257 | @@ -298,9 +332,17 @@ | |||
258 | 298 | """ | 332 | """ |
259 | 299 | ext = os.path.splitext(filename)[1] | 333 | ext = os.path.splitext(filename)[1] |
260 | 300 | if ext == '.bz2': | 334 | if ext == '.bz2': |
262 | 301 | return bz2.BZ2File(filename, mode) | 335 | p = subprocess.Popen( |
263 | 336 | ['bunzip2', '-c', filename], | ||
264 | 337 | stdout=subprocess.PIPE, stdin=subprocess.PIPE) | ||
265 | 338 | p.stdin.close() | ||
266 | 339 | return p.stdout | ||
267 | 302 | elif ext == '.gz': | 340 | elif ext == '.gz': |
269 | 303 | return gzip.open(filename, mode) | 341 | p = subprocess.Popen( |
270 | 342 | ['gunzip', '-c', filename], | ||
271 | 343 | stdout=subprocess.PIPE, stdin=subprocess.PIPE) | ||
272 | 344 | p.stdin.close() | ||
273 | 345 | return p.stdout | ||
274 | 304 | else: | 346 | else: |
275 | 305 | return open(filename, mode) | 347 | return open(filename, mode) |
276 | 306 | 348 | ||
277 | @@ -321,7 +363,7 @@ | |||
278 | 321 | *(int(elem) for elem in match.groups() if elem is not None)) | 363 | *(int(elem) for elem in match.groups() if elem is not None)) |
279 | 322 | 364 | ||
280 | 323 | 365 | ||
282 | 324 | def parse(tracefiles, categories, pageid_times, options): | 366 | def parse(tracefiles, categories, pageid_times, url_times, options): |
283 | 325 | requests = {} | 367 | requests = {} |
284 | 326 | total_requests = 0 | 368 | total_requests = 0 |
285 | 327 | for tracefile in tracefiles: | 369 | for tracefile in tracefiles: |
286 | @@ -402,12 +444,12 @@ | |||
287 | 402 | log.debug("Parsed %d requests", total_requests) | 444 | log.debug("Parsed %d requests", total_requests) |
288 | 403 | 445 | ||
289 | 404 | # Add the request to any matching categories. | 446 | # Add the request to any matching categories. |
291 | 405 | if categories is not None: | 447 | if options.categories: |
292 | 406 | for category in categories: | 448 | for category in categories: |
293 | 407 | category.add(request) | 449 | category.add(request) |
294 | 408 | 450 | ||
295 | 409 | # Add the request to the times for that pageid. | 451 | # Add the request to the times for that pageid. |
297 | 410 | if pageid_times is not None and request.pageid is not None: | 452 | if options.pageids: |
298 | 411 | pageid = request.pageid | 453 | pageid = request.pageid |
299 | 412 | try: | 454 | try: |
300 | 413 | times = pageid_times[pageid] | 455 | times = pageid_times[pageid] |
301 | @@ -416,6 +458,21 @@ | |||
302 | 416 | pageid_times[pageid] = times | 458 | pageid_times[pageid] = times |
303 | 417 | times.add(request) | 459 | times.add(request) |
304 | 418 | 460 | ||
305 | 461 | # Add the request to the times for that URL. | ||
306 | 462 | if options.top_urls: | ||
307 | 463 | url = request.url | ||
308 | 464 | # Hack to remove opstats from top N report. This | ||
309 | 465 | # should go into a config file if we end up with | ||
310 | 466 | # more pages that need to be ignored because | ||
311 | 467 | # they are just noise. | ||
312 | 468 | if not (url is None or url.endswith('+opstats')): | ||
313 | 469 | try: | ||
314 | 470 | times = url_times[url] | ||
315 | 471 | except KeyError: | ||
316 | 472 | times = Times(options.timeout) | ||
317 | 473 | url_times[url] = times | ||
318 | 474 | times.add(request) | ||
319 | 475 | |||
320 | 419 | else: | 476 | else: |
321 | 420 | raise MalformedLine('Unknown record type %s', record_type) | 477 | raise MalformedLine('Unknown record type %s', record_type) |
322 | 421 | except MalformedLine, x: | 478 | except MalformedLine, x: |
323 | @@ -442,7 +499,19 @@ | |||
324 | 442 | "Unknown extension prefix %s" % prefix) | 499 | "Unknown extension prefix %s" % prefix) |
325 | 443 | 500 | ||
326 | 444 | 501 | ||
328 | 445 | def html_report(outf, categories, pageid_times): | 502 | def html_report( |
329 | 503 | outf, categories, pageid_times, url_times, | ||
330 | 504 | ninetyninth_percentile_threshold=None): | ||
331 | 505 | """Write an html report to outf. | ||
332 | 506 | |||
333 | 507 | :param outf: A file object to write the report to. | ||
334 | 508 | :param categories: Categories to report. | ||
335 | 509 | :param pageid_times: The time statistics for pageids. | ||
336 | 510 | :param url_times: The time statistics for the top XXX urls. | ||
337 | 511 | :param ninetyninth_percentile_threshold: Lower threshold for inclusion of | ||
338 | 512 | pages in the pageid section; pages where 99 percent of the requests are | ||
339 | 513 | served under this threshold will not be included. | ||
340 | 514 | """ | ||
341 | 446 | 515 | ||
342 | 447 | print >> outf, dedent('''\ | 516 | print >> outf, dedent('''\ |
343 | 448 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" | 517 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" |
344 | @@ -502,6 +571,8 @@ | |||
345 | 502 | 571 | ||
346 | 503 | <th class="clickable">Total Time (secs)</th> | 572 | <th class="clickable">Total Time (secs)</th> |
347 | 504 | 573 | ||
348 | 574 | <th class="clickable">99% Under Time (secs)</th> | ||
349 | 575 | |||
350 | 505 | <th class="clickable">Mean Time (secs)</th> | 576 | <th class="clickable">Mean Time (secs)</th> |
351 | 506 | <th class="clickable">Time Standard Deviation</th> | 577 | <th class="clickable">Time Standard Deviation</th> |
352 | 507 | <th class="clickable">Time Variance</th> | 578 | <th class="clickable">Time Variance</th> |
353 | @@ -537,6 +608,7 @@ | |||
354 | 537 | <th class="category-title">%s</th> | 608 | <th class="category-title">%s</th> |
355 | 538 | <td class="numeric total_hits">%d</td> | 609 | <td class="numeric total_hits">%d</td> |
356 | 539 | <td class="numeric total_time">%.2f</td> | 610 | <td class="numeric total_time">%.2f</td> |
357 | 611 | <td class="numeric 99pc_under">%.2f</td> | ||
358 | 540 | <td class="numeric mean_time">%.2f</td> | 612 | <td class="numeric mean_time">%.2f</td> |
359 | 541 | <td class="numeric std_time">%.2f</td> | 613 | <td class="numeric std_time">%.2f</td> |
360 | 542 | <td class="numeric var_time">%.2f</td> | 614 | <td class="numeric var_time">%.2f</td> |
361 | @@ -559,6 +631,7 @@ | |||
362 | 559 | """ % ( | 631 | """ % ( |
363 | 560 | html_title, | 632 | html_title, |
364 | 561 | stats.total_hits, stats.total_time, | 633 | stats.total_hits, stats.total_time, |
365 | 634 | stats.ninetyninth_percentile_time, | ||
366 | 562 | stats.mean, stats.std, stats.var, stats.median, | 635 | stats.mean, stats.std, stats.var, stats.median, |
367 | 563 | len(histograms) - 1, | 636 | len(histograms) - 1, |
368 | 564 | stats.total_sqltime, stats.mean_sqltime, | 637 | stats.total_sqltime, stats.mean_sqltime, |
369 | @@ -568,13 +641,14 @@ | |||
370 | 568 | stats.median_sqlstatements)) | 641 | stats.median_sqlstatements)) |
371 | 569 | 642 | ||
372 | 570 | # Table of contents | 643 | # Table of contents |
380 | 571 | if categories and pageid_times: | 644 | print >> outf, '<ol>' |
381 | 572 | print >> outf, dedent('''\ | 645 | if categories: |
382 | 573 | <ol> | 646 | print >> outf, '<li><a href="#catrep">Category Report</a></li>' |
383 | 574 | <li><a href="#catrep">Category Report</a></li> | 647 | if pageid_times: |
384 | 575 | <li><a href="#pageidrep">Pageid Report</a></li> | 648 | print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>' |
385 | 576 | </ol> | 649 | if url_times: |
386 | 577 | ''') | 650 | print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>' |
387 | 651 | print >> outf, '</ol>' | ||
388 | 578 | 652 | ||
389 | 579 | if categories: | 653 | if categories: |
390 | 580 | print >> outf, '<h2 id="catrep">Category Report</h2>' | 654 | print >> outf, '<h2 id="catrep">Category Report</h2>' |
391 | @@ -589,9 +663,21 @@ | |||
392 | 589 | print >> outf, '<h2 id="pageidrep">Pageid Report</h2>' | 663 | print >> outf, '<h2 id="pageidrep">Pageid Report</h2>' |
393 | 590 | print >> outf, table_header | 664 | print >> outf, table_header |
394 | 591 | for pageid, times in sorted(pageid_times.items()): | 665 | for pageid, times in sorted(pageid_times.items()): |
395 | 666 | pageid = pageid or 'None' | ||
396 | 667 | if (ninetyninth_percentile_threshold is not None and | ||
397 | 668 | (times.stats().ninetyninth_percentile_time < | ||
398 | 669 | ninetyninth_percentile_threshold)): | ||
399 | 670 | continue | ||
400 | 592 | handle_times(html_quote(pageid), times) | 671 | handle_times(html_quote(pageid), times) |
401 | 593 | print >> outf, table_footer | 672 | print >> outf, table_footer |
402 | 594 | 673 | ||
403 | 674 | if url_times: | ||
404 | 675 | print >> outf, '<h2 id="topurlrep">Top URL Report</h2>' | ||
405 | 676 | print >> outf, table_header | ||
406 | 677 | for url, times in url_times: | ||
407 | 678 | handle_times(html_quote(url), times) | ||
408 | 679 | print >> outf, table_footer | ||
409 | 680 | |||
410 | 595 | # Ourput the javascript to render our histograms nicely, replacing | 681 | # Ourput the javascript to render our histograms nicely, replacing |
411 | 596 | # the placeholder <div> tags output earlier. | 682 | # the placeholder <div> tags output earlier. |
412 | 597 | print >> outf, dedent("""\ | 683 | print >> outf, dedent("""\ |
413 | 598 | 684 | ||
414 | === modified file 'utilities/page-performance-report-daily.sh' | |||
415 | --- utilities/page-performance-report-daily.sh 2010-07-08 09:11:54 +0000 | |||
416 | +++ utilities/page-performance-report-daily.sh 2010-08-12 05:59:44 +0000 | |||
417 | @@ -26,11 +26,14 @@ | |||
418 | 26 | echo Generating report from $from until $until into $dir `date` | 26 | echo Generating report from $from until $until into $dir `date` |
419 | 27 | 27 | ||
420 | 28 | ./page-performance-report.py -v --from=$from --until=$until \ | 28 | ./page-performance-report.py -v --from=$from --until=$until \ |
422 | 29 | --directory=${dir} $logs | 29 | --top-urls=200 --directory=${dir} $logs |
423 | 30 | 30 | ||
424 | 31 | ln -sf ${dir}/categories.html ${root}/latest-${type}-categories.html | 31 | ln -sf ${dir}/categories.html ${root}/latest-${type}-categories.html |
425 | 32 | ln -sf ${dir}/pageids.html ${root}/latest-${type}-pageids.html | 32 | ln -sf ${dir}/pageids.html ${root}/latest-${type}-pageids.html |
426 | 33 | ln -sf ${dir}/combined.html ${root}/latest-${type}-combined.html | 33 | ln -sf ${dir}/combined.html ${root}/latest-${type}-combined.html |
427 | 34 | ln -sf ${dir}/top200.html ${root}/latest-${type}-top200.html | ||
428 | 35 | ln -sf ${dir}/timeout-candidates.html \ | ||
429 | 36 | ${root}/latest-${type}-timeout-candidates.html | ||
430 | 34 | 37 | ||
431 | 35 | return 0 | 38 | return 0 |
432 | 36 | } | 39 | } |
433 | @@ -57,8 +60,4 @@ | |||
434 | 57 | report 32 monthly `date -d 'last month' $fmt` $now | 60 | report 32 monthly `date -d 'last month' $fmt` $now |
435 | 58 | fi | 61 | fi |
436 | 59 | 62 | ||
437 | 60 | # One off reports to populate history. | ||
438 | 61 | ## report 40 monthly `date -d '1 june 2010' $fmt` `date -d '1 july 2010' $fmt` | ||
439 | 62 | ## report 23 weekly `date -d '19 june 2010' $fmt` `date -d '26 june 2010' $fmt` | ||
440 | 63 | ## report 16 weekly `date -d '26 june 2010' $fmt` `date -d '3 july 2010' $fmt` | ||
441 | 64 | 63 |
I've fixed some bugs in this - it doesn't look like it had been tested.
I've merged it in with changes I had neglected to push in the days before the Epic but have been running live. This work is in lp:~stub/launchpad/page-performance-report. It seems to be working locally just fine with a single log downloaded from devpad. I've pushed this to the tree on devpad so we can see the result with the scheduled run.