Merge lp:~jml/txpkgme/csv-summary into lp:txpkgme

Proposed by Jonathan Lange
Status: Merged
Approved by: James Westby
Approved revision: 65
Merged at revision: 45
Proposed branch: lp:~jml/txpkgme/csv-summary
Merge into: lp:txpkgme
Diff against target: 417 lines (+241/-34)
3 files modified
txpkgme/reports.py (+144/-25)
txpkgme/scoreboard.py (+1/-7)
txpkgme/tests/test_reports.py (+96/-2)
To merge this branch: bzr merge lp:~jml/txpkgme/csv-summary
Reviewer Review Type Date Requested Status
James Westby (community) Approve
Review via email: mp+126023@code.launchpad.net

Commit message

CSV output from subunit output

Description of the change

I started writing an email about automatic packaging success.

This train of thought roughly followed:

-- You know what would make this better? A pretty picture.

-- How can I make a pretty picture? ggplot2 in R was pretty cool last time
   I used it.

-- Oh, but doing anything in R is pretty tough unless the data is already
   there in tabulated format like CSV

-- I guess it wouldn't be too hard to get CSV output

-- Huh, this subunit-to-json thing should just be a generic "turn into a
   different format" script

-- And we can _almost_ but not quite guess the backend, let's do that.

-- Oh, huh, and median sucks. Might as well fix while I'm here.

-- I have something shelved? What gi... oh, I thought I landed this.
   Might as well chuck it in too.

Thanks,
jml

To post a comment you must log in.
Revision history for this message
James Westby (james-w) wrote :

Hi,

Looks good.

It looks like if the error text has a comma in it it would change the csv as they
aren't escaped?

I might have done a len(results) check before printing getting the median times, but
this way is fine too.

Thanks,

James

review: Approve
lp:~jml/txpkgme/csv-summary updated
64. By Jonathan Lange

Handle comma in cells

65. By Jonathan Lange

If there are no results, just say so.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== renamed file 'bin/subunit-to-json' => 'bin/parse-subunit-results'
2=== modified file 'txpkgme/reports.py'
3--- txpkgme/reports.py 2012-09-14 12:37:09 +0000
4+++ txpkgme/reports.py 2012-09-24 15:34:21 +0000
5@@ -14,7 +14,7 @@
6 # along with this program. If not, see <http://www.gnu.org/licenses/>.
7
8 import argparse
9-from itertools import ifilter
10+from itertools import ifilter, imap
11 import json
12 from StringIO import StringIO
13 import sys
14@@ -54,17 +54,21 @@
15 SUCCESSFUL_STATUSES = ('success',)
16
17
18+def was_successful(result):
19+ return result['status'] in SUCCESSFUL_STATUSES
20+
21+
22 def calculate_success_ratio(parsed_results):
23 total = len(parsed_results)
24- successes = len(
25- [None for result in parsed_results
26- if result['status'] in SUCCESSFUL_STATUSES])
27+ successes = len(filter(was_successful, parsed_results))
28 return successes, total - successes
29
30
31 def median(xs, percentile=0.5):
32+ if not xs:
33+ raise ValueError("Cannot find median of empty sequence: %r" % (xs,))
34 xs = sorted(xs)
35- midpoint = len(xs) * percentile
36+ midpoint = (len(xs) - 1) * percentile
37 i = int(midpoint)
38 if midpoint == i:
39 return xs[i]
40@@ -72,11 +76,14 @@
41 return (xs[i] + xs[i + 1]) / 2.0
42
43
44+def get_duration(result):
45+ duration = result.get('duration', None)
46+ if duration:
47+ return duration.total_seconds()
48+
49+
50 def iter_times(results):
51- for result in results:
52- duration = result.get('duration', None)
53- if duration:
54- yield duration.seconds + (duration.microseconds / 1e6)
55+ return imap(get_duration, results)
56
57
58 def format_success_failure(success, failure):
59@@ -94,31 +101,57 @@
60 ])
61
62
63-MULTI_ARCH = u'MULTI-ARCH'
64-NO_BACKEND = u'NO-BACKEND'
65-TIMEOUT = u'TIMEOUT'
66-
67-
68-def map_error(details):
69+def get_error_text(details):
70 if not details:
71 return
72 traceback = details.get('traceback', None)
73 if traceback:
74- traceback = traceback.strip()
75- if 'TimeoutError' in traceback:
76- return TIMEOUT
77- if traceback.startswith('No eligible backends'):
78- return NO_BACKEND
79- multiarch_tag = 'Only a single architecture at a time is supported.'
80- if traceback.endswith(multiarch_tag):
81- return MULTI_ARCH
82- return traceback.splitlines()[-1].strip().lstrip('|').strip()
83+ return traceback.strip().splitlines()[-1].strip().lstrip('|').strip()
84 error = details.get('error', None)
85 if error:
86 return error
87 return None
88
89
90+MULTI_ARCH = u'MULTI-ARCH'
91+NO_BACKEND = u'NO-BACKEND'
92+TIMEOUT = u'TIMEOUT'
93+MISSING_DEP = u'MISSING-DEP'
94+NOT_IMPLEMENTED_YET = u'NOT-IMPLEMENTED-YET'
95+UNPACKAGEABLE = u'UNPACKAGEABLE'
96+OTHER = u'OTHER'
97+
98+
99+def map_error(details):
100+ error = get_error_text(details)
101+ if not error:
102+ return
103+ category, extra = map_error_to_tuple(error)
104+ if extra is None:
105+ return category
106+ return extra
107+
108+
109+def map_error_to_tuple(error):
110+ if 'TimeoutError' in error:
111+ return TIMEOUT, None
112+ if error.startswith('No eligible backends'):
113+ return NO_BACKEND, None
114+ multiarch_tag = 'Only a single architecture at a time is supported.'
115+ if error.endswith(multiarch_tag):
116+ return MULTI_ARCH, None
117+ missing_dep_tag = "Can't find dependency for"
118+ if error.startswith(missing_dep_tag):
119+ return MISSING_DEP, error[len(missing_dep_tag):].strip().strip('"')
120+ if 'not implemented yet' in error:
121+ return (NOT_IMPLEMENTED_YET,
122+ error.split('not implemented yet', 1)[0].strip())
123+ if ' will never be implemented' in error:
124+ return (UNPACKAGEABLE,
125+ error.split(' will never be implemented', 1)[0].strip())
126+ return OTHER, error
127+
128+
129 def group_errors(parsed_results):
130 error_to_results = {}
131 for result in parsed_results:
132@@ -168,6 +201,37 @@
133 return output
134
135
136+def _encode_csv_cell(cell):
137+ text = unicode(cell)
138+ if u',' in text:
139+ text = u'"%s"' % (text,)
140+ return text.encode('utf8')
141+
142+
143+def _result_to_csv_row(result, columns):
144+ return ','.join(map(_encode_csv_cell, (col(result) for col in columns)))
145+
146+
147+def results_to_csv(results, output):
148+ columns = [
149+ ('app_id', get_app_id),
150+ ('name', get_app_name),
151+ ('queue', get_app_queue),
152+ ('state', get_app_state),
153+ ('successful', was_successful),
154+ ('backend', get_backend),
155+ ('error', get_error_category),
156+ ('duration', get_duration),
157+ ('url', get_app_url),
158+ ]
159+ titles, functions = zip(*columns)
160+ output.write(','.join('"%s"' % title for title in titles))
161+ output.write('\n')
162+ for result in results:
163+ output.write(_result_to_csv_row(result, functions))
164+ output.write('\n')
165+
166+
167 def get_app_state(test):
168 for tag in test.get('tags', []):
169 if tag.startswith('state='):
170@@ -175,10 +239,50 @@
171 return None
172
173
174+def get_app_name(test):
175+ return test['test_id'].rsplit(':', 1)[0]
176+
177+
178+def get_app_id(test):
179+ return test['test_id'].rsplit(':', 1)[1]
180+
181+
182 def get_app_url(test):
183 return test['details']['myapps']['package_url']
184
185
186+def get_backend(test):
187+ backend = guess_backend(
188+ get_error_text(test['details']),
189+ get_app_url(test),
190+ was_successful(test))
191+ if backend:
192+ return backend
193+ return 'UNKNOWN'
194+
195+
196+def guess_backend(error_text, url=None, successful=False):
197+ if not error_text:
198+ # The only useful information we get back about the backend actually
199+ # used comes from the error. Still, we can guess which backend was
200+ # probably used based on a few simple heuristics.
201+ if url and url.endswith('.pdf'):
202+ return 'PDF'
203+ if successful:
204+ # The only other successful backend as of 2012-09-24 is the binary
205+ # backend.
206+ return 'binary'
207+ return None
208+ category, extra = map_error_to_tuple(error_text)
209+ if category == NO_BACKEND:
210+ return NO_BACKEND
211+ if category in (UNPACKAGEABLE, NOT_IMPLEMENTED_YET):
212+ return extra
213+ if category in (MISSING_DEP, MULTI_ARCH):
214+ return 'binary'
215+ return None
216+
217+
218 def get_app_queue(test):
219 url = get_app_url(test)
220 if '/internal_packages/' in url:
221@@ -188,6 +292,13 @@
222 return None
223
224
225+def get_error_category(test):
226+ error = get_error_text(test.get('details', None))
227+ if not error:
228+ return ''
229+ return map_error_to_tuple(error)[0]
230+
231+
232 def make_filter(include_state=None, include_queue=None):
233 def philtre(result):
234 return (
235@@ -203,6 +314,9 @@
236 """
237 w = output_stream.write
238 results = list(results)
239+ if not results:
240+ w("No results\n")
241+ return
242 successes, failures = calculate_success_ratio(results)
243 w('Summary\n')
244 w('-------\n')
245@@ -234,7 +348,12 @@
246
247
248 def subunit_to_json():
249+ OUTPUTS = {
250+ 'json': lambda results, output: dump_json(list(results), output),
251+ 'csv': results_to_csv,
252+ }
253 parser = make_base_options()
254+ parser.add_argument('--format', choices=OUTPUTS.keys(), default='json')
255 args = parser.parse_args()
256- dump_json(list(iter_results(args)), sys.stdout)
257+ OUTPUTS[args.format](iter_results(args), sys.stdout)
258 return 0
259
260=== modified file 'txpkgme/scoreboard.py'
261--- txpkgme/scoreboard.py 2012-09-11 13:44:35 +0000
262+++ txpkgme/scoreboard.py 2012-09-24 15:34:21 +0000
263@@ -30,10 +30,9 @@
264 from subunit import TestProtocolClient
265 from testtools import PlaceHolder
266 from testtools.content import (
267- Content,
268+ json_content,
269 text_content,
270 )
271-from testtools.content_type import ContentType
272 from twisted.internet import defer
273 from twisted.internet import reactor as mod_reactor
274
275@@ -50,11 +49,6 @@
276 from .utils import parse_json
277
278
279-def json_content(data):
280- JSON_TYPE = ContentType('application', 'json', {'charset': 'utf8'})
281- return Content(JSON_TYPE, lambda: [json.dumps(data)])
282-
283-
284 MYAPPS_SERVERS = {
285 'vps': 'https://sca.razorgirl.info/dev/api/app-metadata/',
286 'staging': 'https://developer.staging.ubuntu.com/dev/api/app-metadata/',
287
288=== modified file 'txpkgme/tests/test_reports.py'
289--- txpkgme/tests/test_reports.py 2012-09-14 12:16:15 +0000
290+++ txpkgme/tests/test_reports.py 2012-09-24 15:34:21 +0000
291@@ -13,18 +13,24 @@
292 # along with this program. If not, see <http://www.gnu.org/licenses/>.
293
294 from datetime import timedelta
295+import random
296 from StringIO import StringIO
297 import traceback
298
299 from testtools import TestCase
300-from testtools.content import text_content
301+from testtools.content import (
302+ json_content,
303+ text_content,
304+ )
305 from twisted.internet.error import TimeoutError
306
307 from txpkgme.reports import (
308 calculate_success_ratio,
309 get_app_queue,
310 get_app_state,
311+ guess_backend,
312 map_error,
313+ median,
314 format_success_failure,
315 MULTI_ARCH,
316 NO_BACKEND,
317@@ -32,7 +38,6 @@
318 parse_subunit,
319 TIMEOUT,
320 )
321-from txpkgme.scoreboard import json_content
322
323
324 def get_stack_trace(f, *a, **kw):
325@@ -221,3 +226,92 @@
326 url = 'https://myapps.developer.ubuntu.com/internal_packages/2011/06/whatever.tar.gz'
327 queue = get_app_queue({'details': {'myapps': {'package_url': url}}})
328 self.assertEquals('commercial', queue)
329+
330+
331+class TestGuessBackend(TestCase):
332+
333+ def test_no_eligible(self):
334+ backend = guess_backend('No eligible backends')
335+ self.assertEqual(NO_BACKEND, backend)
336+
337+ def test_unimplemented(self):
338+ backend = guess_backend('Foo Bar not implemented yet')
339+ self.assertEqual('Foo Bar', backend)
340+
341+ def test_unimplementable(self):
342+ backend = guess_backend('Foo Bar will never be implemented')
343+ self.assertEqual('Foo Bar', backend)
344+
345+ def test_no_error(self):
346+ backend = guess_backend(None)
347+ self.assertIs(None, backend)
348+
349+ def test_unrecognized_string(self):
350+ backend = guess_backend(self.getUniqueString())
351+ self.assertIs(None, backend)
352+
353+ def test_pdf_url(self):
354+ backend = guess_backend(None, 'http://example.com/foo.pdf')
355+ self.assertEqual('PDF', backend)
356+
357+ def test_timeout(self):
358+ backend = guess_backend("TimeoutError")
359+ self.assertEqual(None, backend)
360+
361+ def test_missing_dep_implies_binary(self):
362+ backend = guess_backend("Can't find dependency for 'libfoo.so'")
363+ self.assertEqual('binary', backend)
364+
365+ def test_multi_arch_implies_binary(self):
366+ backend = guess_backend(
367+ "Only a single architecture at a time is supported.")
368+ self.assertEqual('binary', backend)
369+
370+ def test_successful_non_pdf_is_binary(self):
371+ backend = guess_backend(None, 'http://example.com/whatever', True)
372+ self.assertEqual('binary', backend)
373+
374+
375+def shuffle(sequence):
376+ shuffled = list(sequence)
377+ random.shuffle(shuffled)
378+ return shuffled
379+
380+
381+def num_less_than(sequence, limit):
382+ return len([1 for x in sequence if x <= limit])
383+
384+
385+def ratio_less_than(sequence, limit):
386+ return num_less_than(sequence, limit) / float(len(sequence))
387+
388+
389+class TestMedian(TestCase):
390+
391+ def test_odd_length(self):
392+ self.assertEqual(2, median(shuffle([1, 2, 3])))
393+ self.assertEqual(3, median(shuffle([1, 2, 3, 4, 5])))
394+
395+ def test_even_length(self):
396+ self.assertEqual(2.5, median(shuffle([1, 2, 3, 4])))
397+ self.assertEqual(3.5, median(shuffle([1, 2, 3, 4, 5, 6])))
398+
399+ def test_singleton(self):
400+ self.assertEqual(1, median([1]))
401+
402+ def test_percentile(self):
403+ self.assertEqual(8.5, median(shuffle(range(10)), 0.9))
404+ self.assertEqual(17.5, median(shuffle(range(20)), 0.9))
405+ self.assertEqual(89.5, median(shuffle(range(100)), 0.9))
406+
407+ def assertMedianConsistent(self, sequence, percentile):
408+ point = median(sequence, percentile)
409+ self.assertEqual(ratio_less_than(sequence, point), percentile)
410+
411+ def test_consistency(self):
412+ self.assertMedianConsistent(shuffle(range(10)), 0.9)
413+ self.assertMedianConsistent(shuffle(range(20)), 0.9)
414+ self.assertMedianConsistent(shuffle(range(100)), 0.9)
415+
416+ def test_empty_sequence(self):
417+ self.assertRaises(ValueError, median, [])

Subscribers

People subscribed via source and target branches