txpkgme

Merge lp:~jml/txpkgme/csv-summary into lp:txpkgme

csv-summary
Merge into trunk

Proposed by Jonathan Lange on 2012-09-24

Status:	Merged
Approved by:	James Westby on 2012-09-24
Approved revision:	65
Merged at revision:	45
Proposed branch:	lp:~jml/txpkgme/csv-summary
Merge into:	lp:txpkgme
Diff against target:	417 lines (+241/-34) 3 files modified txpkgme/reports.py (+144/-25) txpkgme/scoreboard.py (+1/-7) txpkgme/tests/test_reports.py (+96/-2)
To merge this branch:	bzr merge lp:~jml/txpkgme/csv-summary
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
James Westby (community)		2012-09-24	Approve on 2012-09-24
Review via email: mp+126023@code.launchpad.net

Commit message

CSV output from subunit output

Description of the change

I started writing an email about automatic packaging success.

This train of thought roughly followed:

-- You know what would make this better? A pretty picture.

-- How can I make a pretty picture? ggplot2 in R was pretty cool last time
I used it.

-- Oh, but doing anything in R is pretty tough unless the data is already
there in tabulated format like CSV

-- I guess it wouldn't be too hard to get CSV output

-- Huh, this subunit-to-json thing should just be a generic "turn into a
different format" script

-- And we can _almost_ but not quite guess the backend, let's do that.

-- Oh, huh, and median sucks. Might as well fix while I'm here.

-- I have something shelved? What gi... oh, I thought I landed this.
Might as well chuck it in too.

Thanks,
jml

Revision history for this message

James Westby (james-w) wrote on 2012-09-24:

Hi,

Looks good.

It looks like if the error text has a comma in it it would change the csv as they
aren't escaped?

I might have done a len(results) check before printing getting the median times, but
this way is fine too.

Thanks,

James

review: Approve

lp:~jml/txpkgme/csv-summary updated on 2012-09-24

64. By Jonathan Lange on 2012-09-24: Handle comma in cells
65. By Jonathan Lange on 2012-09-24: If there are no results, just say so.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Canonical Consumer Applications Hackers

ISD Bot

Jonathan Lange

 === renamed file 'bin/subunit-to-json' => 'bin/parse-subunit-results'
 === modified file 'txpkgme/reports.py'
 --- txpkgme/reports.py	2012-09-14 12:37:09 +0000
 +++ txpkgme/reports.py	2012-09-24 15:34:21 +0000
@@ -14,7 +14,7 @@
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  import argparse
--from itertools import ifilter
++from itertools import ifilter, imap
  import json
  from StringIO import StringIO
  import sys
@@ -54,17 +54,21 @@
  SUCCESSFUL_STATUSES = ('success',)
++def was_successful(result):
++    return result['status'] in SUCCESSFUL_STATUSES
++
++
  def calculate_success_ratio(parsed_results):
      total = len(parsed_results)
--    successes = len(
--        [None for result in parsed_results
--         if result['status'] in SUCCESSFUL_STATUSES])
++    successes = len(filter(was_successful, parsed_results))
      return successes, total - successes
  def median(xs, percentile=0.5):
++    if not xs:
++        raise ValueError("Cannot find median of empty sequence: %r" % (xs,))
      xs = sorted(xs)
--    midpoint = len(xs) * percentile
++    midpoint = (len(xs) - 1) * percentile
      i = int(midpoint)
      if midpoint == i:
          return xs[i]
@@ -72,11 +76,14 @@
          return (xs[i] + xs[i + 1]) / 2.0
++def get_duration(result):
++    duration = result.get('duration', None)
++    if duration:
++        return duration.total_seconds()
++
++
  def iter_times(results):
--    for result in results:
--        duration = result.get('duration', None)
--        if duration:
--            yield duration.seconds + (duration.microseconds / 1e6)
++    return imap(get_duration, results)
  def format_success_failure(success, failure):
@@ -94,31 +101,57 @@
           ])
--MULTI_ARCH = u'MULTI-ARCH'
--NO_BACKEND = u'NO-BACKEND'
--TIMEOUT = u'TIMEOUT'
--
--
--def map_error(details):
++def get_error_text(details):
      if not details:
          return
      traceback = details.get('traceback', None)
      if traceback:
--        traceback = traceback.strip()
--        if 'TimeoutError' in traceback:
--            return TIMEOUT
--        if traceback.startswith('No eligible backends'):
--            return NO_BACKEND
--        multiarch_tag = 'Only a single architecture at a time is supported.'
--        if traceback.endswith(multiarch_tag):
--            return MULTI_ARCH
--        return traceback.splitlines()[-1].strip().lstrip('|').strip()
++        return traceback.strip().splitlines()[-1].strip().lstrip('|').strip()
      error = details.get('error', None)
      if error:
          return error
      return None
++MULTI_ARCH = u'MULTI-ARCH'
++NO_BACKEND = u'NO-BACKEND'
++TIMEOUT = u'TIMEOUT'
++MISSING_DEP = u'MISSING-DEP'
++NOT_IMPLEMENTED_YET = u'NOT-IMPLEMENTED-YET'
++UNPACKAGEABLE = u'UNPACKAGEABLE'
++OTHER = u'OTHER'
++
++
++def map_error(details):
++    error = get_error_text(details)
++    if not error:
++        return
++    category, extra = map_error_to_tuple(error)
++    if extra is None:
++        return category
++    return extra
++
++
++def map_error_to_tuple(error):
++    if 'TimeoutError' in error:
++        return TIMEOUT, None
++    if error.startswith('No eligible backends'):
++        return NO_BACKEND, None
++    multiarch_tag = 'Only a single architecture at a time is supported.'
++    if error.endswith(multiarch_tag):
++        return MULTI_ARCH, None
++    missing_dep_tag = "Can't find dependency for"
++    if error.startswith(missing_dep_tag):
++        return MISSING_DEP, error[len(missing_dep_tag):].strip().strip('"')
++    if 'not implemented yet' in error:
++        return (NOT_IMPLEMENTED_YET,
++                error.split('not implemented yet', 1)[0].strip())
++    if ' will never be implemented' in error:
++        return (UNPACKAGEABLE,
++                error.split(' will never be implemented', 1)[0].strip())
++    return OTHER, error
++
++
  def group_errors(parsed_results):
      error_to_results = {}
      for result in parsed_results:
@@ -168,6 +201,37 @@
      return output
++def _encode_csv_cell(cell):
++    text = unicode(cell)
++    if u',' in text:
++        text = u'"%s"' % (text,)
++    return text.encode('utf8')
++
++
++def _result_to_csv_row(result, columns):
++    return ','.join(map(_encode_csv_cell, (col(result) for col in columns)))
++
++
++def results_to_csv(results, output):
++    columns = [
++        ('app_id', get_app_id),
++        ('name', get_app_name),
++        ('queue', get_app_queue),
++        ('state', get_app_state),
++        ('successful', was_successful),
++        ('backend', get_backend),
++        ('error', get_error_category),
++        ('duration', get_duration),
++        ('url', get_app_url),
++        ]
++    titles, functions = zip(*columns)
++    output.write(','.join('"%s"' % title for title in titles))
++    output.write('\n')
++    for result in results:
++        output.write(_result_to_csv_row(result, functions))
++        output.write('\n')
++
++
  def get_app_state(test):
      for tag in test.get('tags', []):
          if tag.startswith('state='):
@@ -175,10 +239,50 @@
      return None
++def get_app_name(test):
++    return test['test_id'].rsplit(':', 1)[0]
++
++
++def get_app_id(test):
++    return test['test_id'].rsplit(':', 1)[1]
++
++
  def get_app_url(test):
      return test['details']['myapps']['package_url']
++def get_backend(test):
++    backend = guess_backend(
++        get_error_text(test['details']),
++        get_app_url(test),
++        was_successful(test))
++    if backend:
++        return backend
++    return 'UNKNOWN'
++
++
++def guess_backend(error_text, url=None, successful=False):
++    if not error_text:
++        # The only useful information we get back about the backend actually
++        # used comes from the error.  Still, we can guess which backend was
++        # probably used based on a few simple heuristics.
++        if url and url.endswith('.pdf'):
++            return 'PDF'
++        if successful:
++            # The only other successful backend as of 2012-09-24 is the binary
++            # backend.
++            return 'binary'
++        return None
++    category, extra = map_error_to_tuple(error_text)
++    if category == NO_BACKEND:
++        return NO_BACKEND
++    if category in (UNPACKAGEABLE, NOT_IMPLEMENTED_YET):
++        return extra
++    if category in (MISSING_DEP, MULTI_ARCH):
++        return 'binary'
++    return None
++
++
  def get_app_queue(test):
      url = get_app_url(test)
      if '/internal_packages/' in url:
@@ -188,6 +292,13 @@
      return None
++def get_error_category(test):
++    error = get_error_text(test.get('details', None))
++    if not error:
++        return ''
++    return map_error_to_tuple(error)[0]
++
++
  def make_filter(include_state=None, include_queue=None):
      def philtre(result):
          return (
@@ -203,6 +314,9 @@
      """
      w = output_stream.write
      results = list(results)
++    if not results:
++        w("No results\n")
++        return
      successes, failures = calculate_success_ratio(results)
      w('Summary\n')
      w('-------\n')
@@ -234,7 +348,12 @@
  def subunit_to_json():
++    OUTPUTS = {
++        'json': lambda results, output: dump_json(list(results), output),
++        'csv': results_to_csv,
++    }
      parser = make_base_options()
++    parser.add_argument('--format', choices=OUTPUTS.keys(), default='json')
      args = parser.parse_args()
--    dump_json(list(iter_results(args)), sys.stdout)
++    OUTPUTS[args.format](iter_results(args), sys.stdout)
      return 0
 === modified file 'txpkgme/scoreboard.py'
 --- txpkgme/scoreboard.py	2012-09-11 13:44:35 +0000
 +++ txpkgme/scoreboard.py	2012-09-24 15:34:21 +0000
@@ -30,10 +30,9 @@
  from subunit import TestProtocolClient
  from testtools import PlaceHolder
  from testtools.content import (
--    Content,
++    json_content,
      text_content,
+     )
--from testtools.content_type import ContentType
  from twisted.internet import defer
  from twisted.internet import reactor as mod_reactor
@@ -50,11 +49,6 @@
  from .utils import parse_json
--def json_content(data):
--    JSON_TYPE = ContentType('application', 'json', {'charset': 'utf8'})
--    return  Content(JSON_TYPE, lambda: [json.dumps(data)])
--
--
  MYAPPS_SERVERS = {
      'vps': 'https://sca.razorgirl.info/dev/api/app-metadata/',
      'staging': 'https://developer.staging.ubuntu.com/dev/api/app-metadata/',
 === modified file 'txpkgme/tests/test_reports.py'
 --- txpkgme/tests/test_reports.py	2012-09-14 12:16:15 +0000
 +++ txpkgme/tests/test_reports.py	2012-09-24 15:34:21 +0000
@@ -13,18 +13,24 @@
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  from datetime import timedelta
++import random
  from StringIO import StringIO
  import traceback
  from testtools import TestCase
--from testtools.content import text_content
++from testtools.content import (
++    json_content,
++    text_content,
++    )
  from twisted.internet.error import TimeoutError
  from txpkgme.reports import (
      calculate_success_ratio,
      get_app_queue,
      get_app_state,
++    guess_backend,
      map_error,
++    median,
      format_success_failure,
      MULTI_ARCH,
      NO_BACKEND,
@@ -32,7 +38,6 @@
      parse_subunit,
      TIMEOUT,
+     )
--from txpkgme.scoreboard import json_content
  def get_stack_trace(f, *a, **kw):
@@ -221,3 +226,92 @@
          url = 'https://myapps.developer.ubuntu.com/internal_packages/2011/06/whatever.tar.gz'
          queue = get_app_queue({'details': {'myapps': {'package_url': url}}})
          self.assertEquals('commercial', queue)
++
++
++class TestGuessBackend(TestCase):
++
++    def test_no_eligible(self):
++        backend = guess_backend('No eligible backends')
++        self.assertEqual(NO_BACKEND, backend)
++
++    def test_unimplemented(self):
++        backend = guess_backend('Foo Bar not implemented yet')
++        self.assertEqual('Foo Bar', backend)
++
++    def test_unimplementable(self):
++        backend = guess_backend('Foo Bar will never be implemented')
++        self.assertEqual('Foo Bar', backend)
++
++    def test_no_error(self):
++        backend = guess_backend(None)
++        self.assertIs(None, backend)
++
++    def test_unrecognized_string(self):
++        backend = guess_backend(self.getUniqueString())
++        self.assertIs(None, backend)
++
++    def test_pdf_url(self):
++        backend = guess_backend(None, 'http://example.com/foo.pdf')
++        self.assertEqual('PDF', backend)
++
++    def test_timeout(self):
++        backend = guess_backend("TimeoutError")
++        self.assertEqual(None, backend)
++
++    def test_missing_dep_implies_binary(self):
++        backend = guess_backend("Can't find dependency for 'libfoo.so'")
++        self.assertEqual('binary', backend)
++
++    def test_multi_arch_implies_binary(self):
++        backend = guess_backend(
++            "Only a single architecture at a time is supported.")
++        self.assertEqual('binary', backend)
++
++    def test_successful_non_pdf_is_binary(self):
++        backend = guess_backend(None, 'http://example.com/whatever', True)
++        self.assertEqual('binary', backend)
++
++
++def shuffle(sequence):
++    shuffled = list(sequence)
++    random.shuffle(shuffled)
++    return shuffled
++
++
++def num_less_than(sequence, limit):
++    return len([1 for x in sequence if x <= limit])
++
++
++def ratio_less_than(sequence, limit):
++    return num_less_than(sequence, limit) / float(len(sequence))
++
++
++class TestMedian(TestCase):
++
++    def test_odd_length(self):
++        self.assertEqual(2, median(shuffle([1, 2, 3])))
++        self.assertEqual(3, median(shuffle([1, 2, 3, 4, 5])))
++
++    def test_even_length(self):
++        self.assertEqual(2.5, median(shuffle([1, 2, 3, 4])))
++        self.assertEqual(3.5, median(shuffle([1, 2, 3, 4, 5, 6])))
++
++    def test_singleton(self):
++        self.assertEqual(1, median([1]))
++
++    def test_percentile(self):
++        self.assertEqual(8.5, median(shuffle(range(10)), 0.9))
++        self.assertEqual(17.5, median(shuffle(range(20)), 0.9))
++        self.assertEqual(89.5, median(shuffle(range(100)), 0.9))
++
++    def assertMedianConsistent(self, sequence, percentile):
++        point = median(sequence, percentile)
++        self.assertEqual(ratio_less_than(sequence, point), percentile)
++
++    def test_consistency(self):
++        self.assertMedianConsistent(shuffle(range(10)), 0.9)
++        self.assertMedianConsistent(shuffle(range(20)), 0.9)
++        self.assertMedianConsistent(shuffle(range(100)), 0.9)
++
++    def test_empty_sequence(self):
++        self.assertRaises(ValueError, median, [])