testresources

Merge lp:~lifeless/testresources/bug-522338 into lp:~testresources-developers/testresources/trunk

bug-522338
Merge into trunk

Proposed by Robert Collins on 2010-02-19

Status:

Merged

Merged at revision:

not available

Proposed branch:

lp:~lifeless/testresources/bug-522338

Merge into:

lp:~testresources-developers/testresources/trunk

Diff against target:

598 lines (+447/-34)

5 files modified

NEWS (+8/-0)
lib/testresources/__init__.py (+248/-28)
lib/testresources/tests/__init__.py (+2/-0)
lib/testresources/tests/test_optimising_test_suite.py (+48/-6)
lib/testresources/tests/test_resource_graph.py (+141/-0)

To merge this branch:

bzr merge lp:~lifeless/testresources/bug-522338

Related bugs:

Bug #522338: sortTests too slow

Medium

Fix Released

Link a bug report

Reviewer	Review Type	Date Requested	Status
testresources developers		2010-02-19	Pending
Review via email: mp+19684@code.launchpad.net

Revision history for this message

Robert Collins (lifeless) wrote on 2010-02-19:

Faster stuff good. Good stuff good. Stuff is good. Faster is good, so stuff is faster.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Robert Collins

testresources developers

testresources

Merge lp:~lifeless/testresources/bug-522338 into lp:~testresources-developers/testresources/trunk

Commit message

Description of the change

Preview Diff

Subscribers

 === modified file 'NEWS'
 --- NEWS	2010-02-12 02:01:38 +0000
 +++ NEWS	2010-02-19 05:54:16 +0000
@@ -13,6 +13,14 @@
  * Rename TestResource to TestResourceManager leaving TestResource as an alias.
    Fixes bug #520769.
++* Test sorting no longer performs N! work on tests that can never benefit from
++  order optimisations (when resources are not shared an arbitrary order is
++  sufficient). Partial fix for bug #522338.
++
++* Test sorting now uses a heuristic on each partition to get a sort that is
++  no worse than twice the optimal number of setup/teardown operations and is
++  fast to calculate. Fixes bug #522338
++
 .2.3
  -----
 === modified file 'lib/testresources/__init__.py'
 --- lib/testresources/__init__.py	2010-02-12 02:01:38 +0000
 +++ lib/testresources/__init__.py	2010-02-19 05:54:16 +0000
@@ -19,7 +19,9 @@
  """TestResources: declarative management of external resources for tests."""
++import heapq
  import inspect
++import operator
  import unittest
@@ -28,6 +30,105 @@
      return testresources.tests.test_suite()
++def _digraph_to_graph(digraph, prime_node_mapping):
++    """Convert digraph to a graph.
++
++    :param digraph: A directed graph in the form
++        {from:{to:value}}.
++    :param prime_node_mapping: A mapping from every
++        node in digraph to a new unique and not in digraph node.
++    :return: A symmetric graph in the form {from:to:value}} created by
++        creating edges in the result between every N to M-prime with the
++        original N-M value and from every N to N-prime with a cost of 0.
++        No other edges are created.
++    """
++    result = {}
++    for from_node, from_prime_node in prime_node_mapping.iteritems():
++        result[from_node] = {from_prime_node:0}
++        result[from_prime_node] = {from_node:0}
++    for from_node, to_nodes in digraph.iteritems():
++        from_prime = prime_node_mapping[from_node]
++        for to_node, value in to_nodes.iteritems():
++            to_prime = prime_node_mapping[to_node]
++            result[from_prime][to_node] = value
++            result[to_node][from_prime] = value
++    return result
++
++
++def _kruskals_graph_MST(graph):
++    """Find the minimal spanning tree in graph using Kruskals algorithm.
++
++    See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
++    :param graph: A graph in {from:{to:value}} form. Every node present in
++        graph must be in the outer dict (because graph is not a directed graph.
++    :return: A graph with all nodes and those vertices that are part of the MST
++        for graph. If graph is not connected, then the result will also be a
++        forest.
++    """
++    # forest contains all the nodes -> graph that node is in.
++    forest = {}
++    # graphs is the count of graphs we have yet to combine.
++    for node in graph:
++        forest[node] = {node:{}}
++    graphs = len(forest)
++    # collect edges: every edge is present twice (due to the graph
++    # representation), so normalise.
++    edges = set()
++    for from_node, to_nodes in graph.iteritems():
++        for to_node, value in to_nodes.iteritems():
++            edge = (value,) + tuple(sorted([from_node, to_node]))
++            edges.add(edge)
++    edges = list(edges)
++    heapq.heapify(edges)
++    while edges and graphs > 1:
++        # more edges to go and we haven't gotten a spanning tree yet.
++        edge = heapq.heappop(edges)
++        g1 = forest[edge[1]]
++        g2 = forest[edge[2]]
++        if g1 is g2:
++            continue # already joined
++        # combine g1 and g2 into g1
++        graphs -= 1
++        for from_node, to_nodes in g2.iteritems():
++            #remember its symmetric, don't need to do 'to'.
++            forest[from_node] = g1
++            g1.setdefault(from_node, {}).update(to_nodes)
++        # add edge
++        g1[edge[1]][edge[2]] = edge[0]
++        g1[edge[2]][edge[1]] = edge[0]
++    # union the remaining graphs
++    _, result = forest.popitem()
++    for _, g2 in forest.iteritems():
++        if g2 is result: # common case
++            continue
++        for from_node, to_nodes in g2.iteritems():
++            result.setdefault(from_node, {}).update(to_nodes)
++    return result
++
++
++def _resource_graph(resource_sets):
++    """Convert an iterable of resource_sets into a graph.
++
++    Each resource_set in the iterable is treated as a node, and each resource
++    in that resource_set is used as an edge to other nodes.
++    """
++    nodes = {}
++    edges = {}
++    for resource_set in resource_sets:
++        # put node in nodes
++        node = frozenset(resource_set)
++        nodes[node] = set()
++        # put its contents in as edges
++        for resource in resource_set:
++            edges.setdefault(resource, []).append(node)
++    # populate the adjacent members of nodes
++    for node, connected in nodes.iteritems():
++        for resource in node:
++            connected.update(edges[resource])
++        connected.discard(node)
++    return nodes
++
++
  def split_by_resources(tests):
      """Split a list of tests by the resources that the tests use.
@@ -47,6 +148,30 @@
      return resource_set_tests
++def _strongly_connected_components(graph, no_resources):
++    """Find the strongly connected components in graph.
++
++    This is essentially a nonrecursive flatterning of Tarjan's method. It
++    may be worth profiling against an actual Tarjan's implementation at some
++    point, but sets are often faster than python calls.
++
++    graph gets consumed, but that could be changed easily enough.
++    """
++    partitions = []
++    while graph:
++        node, pending = graph.popitem()
++        current_partition = set([node])
++        while pending:
++            # add all the nodes connected to a connected node to pending.
++            node = pending.pop()
++            current_partition.add(node)
++            pending.update(graph.pop(node, []))
++            # don't try to process things we've allready processed.
++            pending.difference_update(current_partition)
++        partitions.append(current_partition)
++    return partitions
++
++
  class OptimisingTestSuite(unittest.TestSuite):
      """A resource creation optimising TestSuite."""
@@ -126,54 +251,149 @@
      def sortTests(self):
          """Attempt to topographically sort the contained tests.
++        This function biases to reusing a resource: it assumes that resetting
++        a resource is usually cheaper than a teardown + setup; and that most
++        resources are not dirtied by most tests.
++
          Feel free to override to improve the sort behaviour.
          """
          # We group the tests by the resource combinations they use,
          # since there will usually be fewer resource combinations than
--        # actual tests and there can never be more.
++        # actual tests and there can never be more: This gives us 'nodes' or
++        # 'resource_sets' that represent many tests using the same set of
++        # resources.
          resource_set_tests = split_by_resources(self._tests)
--
--        graph = self._getGraph(resource_set_tests.keys())
++        # Partition into separate sets of resources, there is no ordering
++        # preference between sets that do not share members. Rationale:
++        # If resource_set A and B have no common resources, AB and BA are
++        # equally good - the global setup/teardown sums are identical. Secondly
++        # if A shares one or more resources with C, then pairing AC|CA is better
++        # than having B between A and C, because the shared resources can be
++        # reset or reused. Having partitioned we can use connected graph logic
++        # on each partition.
++        resource_set_graph = _resource_graph(resource_set_tests)
          no_resources = frozenset()
--        # Recursive visit-all-nodes all-permutations.
--        def cost(from_set, resource_sets):
--            """Get the cost of resource traversal for resource sets.
--
--            :return: cost, order
--            """
--            if not resource_sets:
--                # tear down last resources
--                return graph[from_set][no_resources], []
--            costs = []
--            for to_set in resource_sets:
--                child_cost, child_order = cost(
--                    to_set, resource_sets - set([to_set]))
--                costs.append((graph[from_set][to_set] + child_cost,
--                              [to_set] + child_order))
--            return min(costs)
--        _, order = cost(no_resources,
--                        set(resource_set_tests) - set([no_resources]))
--        order.append(no_resources)
--        self._tests = sum(
--            (resource_set_tests[resource_set] for resource_set in order), [])
++        # A list of resource_set_tests, all fully internally connected.
++        partitions = _strongly_connected_components(resource_set_graph,
++            no_resources)
++        result = []
++        for partition in partitions:
++            # we process these at the end for no particularly good reason (it makes
++            # testing slightly easier).
++            if partition == [no_resources]:
++                continue
++            order = self._makeOrder(partition)
++            # Spit this partition out into result
++            for resource_set in order:
++                result.extend(resource_set_tests[resource_set])
++        result.extend(resource_set_tests[no_resources])
++        self._tests = result
      def _getGraph(self, resource_sets):
          """Build a graph of the resource-using nodes.
++        This special cases set(['root']) to be a node with no resources and
++        edges to everything.
++
          :return: A complete directed graph of the switching costs
--            between each resource combination.
++            between each resource combination. Note that links from N to N are
++            not included.
          """
++        no_resources = frozenset()
          graph = {}
++        root = set(['root'])
++        # bottom = set(['bottom'])
          for from_set in resource_sets:
              graph[from_set] = {}
++            if from_set == root:
++                from_resources = no_resources
++            #elif from_set == bottom:
++            #    continue # no links from bottom
++            else:
++                from_resources = from_set
              for to_set in resource_sets:
                  if from_set is to_set:
--                    graph[from_set][to_set] = 0
++                    continue # no self-edges
++                #if to_set == bottom:
++                #   if from_set == root:
++                #       continue # no short cuts!
++                #   to_resources = no_resources
++                #el
++                if to_set == root:
++                    continue # no links to root
                  else:
--                    graph[from_set][to_set] = self.cost_of_switching(
--                        from_set, to_set)
++                    to_resources = to_set
++                graph[from_set][to_set] = self.cost_of_switching(
++                        from_resources, to_resources)
          return graph
++    def _makeOrder(self, partition):
++        """Return a order for the resource sets in partition."""
++        # This problem is NP-C - find the lowest cost hamiltonian path. It
++        # also meets the triangle inequality, so we can use an approximation.
++        # TODO: implement Christofides.
++        # See http://en.wikipedia.org/wiki/Travelling_salesman_problem#Metric_TSP
++        # We need a root
++        root = frozenset(['root'])
++        partition.add(root)
++        # and an end
++        # partition.add(frozenset(['bottom']))
++        # get rid of 'noresources'
++        partition.discard(frozenset())
++        digraph = self._getGraph(partition)
++        # build a prime map
++        primes = {}
++        prime = frozenset(['prime'])
++        for node in digraph:
++            primes[node] = node.union(prime)
++        graph = _digraph_to_graph(digraph, primes)
++        mst = _kruskals_graph_MST(graph)
++        # Because the representation is a digraph, we can build an Eulerian
++        # cycle directly from the representation by just following the links:
++        # a node with only 1 'edge' has two directed edges; and we can only
++        # enter and leave it once, so the edge lookups will match precisely.
++        # As the mst is a spanning tree, the graph will become disconnected
++        # (we choose non-disconnecting edges first)
++        #  - for a stub node (1 outgoing link): when exiting it unless it is
++        #    the first node started at
++        # - for a non-stub node if choosing an outgoing link where some other
++        #   endpoints incoming link has not been traversed. [exit by a
++        #   different node than entering, until all exits taken].
++        # We don't need the mst after, so it gets modified in place.
++        node = root
++        cycle = [node]
++        steps = 2 * (len(mst) - 1)
++        for step in xrange(steps):
++            found = False
++            outgoing = None # For clearer debugging.
++            for outgoing in mst[node]:
++                if node in mst[outgoing]:
++                    # we have a return path: take it
++                    # print node, '->', outgoing, ' can return'
++                    del mst[node][outgoing]
++                    node = outgoing
++                    cycle.append(node)
++                    found = True
++                    break
++            if not found:
++                # none of the outgoing links have an incoming, so follow an
++                # arbitrary one (the last examined outgoing)
++                # print node, '->', outgoing
++                del mst[node][outgoing]
++                node = outgoing
++                cycle.append(node)
++        # Convert to a path:
++        visited = set()
++        order = []
++        for node in cycle:
++            if node in visited:
++                continue
++            if node in primes:
++                order.append(node)
++            visited.add(node)
++        assert order[0] == root
++        return order[1:]
++
  class TestLoader(unittest.TestLoader):
      """Custom TestLoader to set the right TestSuite class."""
 === modified file 'lib/testresources/tests/__init__.py'
 --- lib/testresources/tests/__init__.py	2009-07-13 08:22:13 +0000
 +++ lib/testresources/tests/__init__.py	2010-02-19 05:54:16 +0000
@@ -28,10 +28,12 @@
      import testresources.tests.test_resourced_test_case
      import testresources.tests.test_test_loader
      import testresources.tests.test_test_resource
++    import testresources.tests.test_resource_graph
      result = TestUtil.TestSuite()
      result.addTest(testresources.tests.test_test_loader.test_suite())
      result.addTest(testresources.tests.test_test_resource.test_suite())
      result.addTest(testresources.tests.test_resourced_test_case.test_suite())
++    result.addTest(testresources.tests.test_resource_graph.test_suite())
      result.addTest(
          testresources.tests.test_optimising_test_suite.test_suite())
      return result
 === modified file 'lib/testresources/tests/test_optimising_test_suite.py'
 --- lib/testresources/tests/test_optimising_test_suite.py	2010-01-05 05:51:14 +0000
 +++ lib/testresources/tests/test_optimising_test_suite.py	2010-02-19 05:54:16 +0000
@@ -403,7 +403,7 @@
          resource = self.makeResource()
          suite = testresources.OptimisingTestSuite()
          graph = suite._getGraph([frozenset()])
--        self.assertEqual({frozenset(): {frozenset(): 0}}, graph)
++        self.assertEqual({frozenset(): {}}, graph)
      def testTwoCasesInGraph(self):
          res1 = self.makeResource()
@@ -415,9 +415,9 @@
          suite = testresources.OptimisingTestSuite()
          graph = suite._getGraph([no_resources, set1, set2])
--        self.assertEqual({no_resources: {no_resources: 0, set1: 2, set2: 1},
--                          set1: {no_resources: 2, set1: 0, set2: 1},
--                          set2: {no_resources: 1, set1: 1, set2: 0}}, graph)
++        self.assertEqual({no_resources: {set1: 2, set2: 1},
++                          set1: {no_resources: 2, set2: 1},
++                          set2: {no_resources: 1, set1: 1 }}, graph)
  class TestGraphStuff(testtools.TestCase):
@@ -489,10 +489,15 @@
          return permutations
      def testBasicSortTests(self):
--        # Test every permutation of inputs, with equal cost resources and
--        # legacy tests.
++        # Test every permutation of inputs, with legacy tests.
++        # Cannot use equal costs because of the use of
++        # a 2*optimal heuristic for sorting: with equal
++        # costs the wrong sort order is < twice the optimal
++        # weight, and thus can be selected.
          resource_one = testresources.TestResource()
          resource_two = testresources.TestResource()
++        resource_two.setUpCost = 5
++        resource_two.tearDownCost = 5
          resource_three = testresources.TestResource()
          self.case1.resources = [
@@ -584,6 +589,43 @@
                  permutation.index(self.case3) < permutation.index(self.case4),
                  sorted.index(self.case3) < sorted.index(self.case4))
++    def testSortingTwelveIndependentIsFast(self):
++        # Given twelve independent resource sets, my patience is not exhausted.
++        managers = []
++        for pos in range(12):
++            managers.append(testresources.TestResourceManager())
++        # Add more sample tests
++        cases = [self.case1, self.case2, self.case3, self.case4]
++        for pos in range(5,13):
++            cases.append(
++                testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
++        # We care that this is fast in this test, so we don't need to have
++        # overlapping resource usage
++        for case, manager in zip(cases, managers):
++            case.resources = [('_resource', manager)]
++        # Any sort is ok, as long as its the right length :)
++        result = self.sortTests(cases)
++        self.assertEqual(12, len(result))
++
++    def testSortingTwelveOverlappingIsFast(self):
++        # Given twelve connected resource sets, my patience is not exhausted.
++        managers = []
++        for pos in range(12):
++            managers.append(testresources.TestResourceManager())
++        # Add more sample tests
++        cases = [self.case1, self.case2, self.case3, self.case4]
++        for pos in range(5,13):
++            cases.append(
++                testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
++        tempdir = testresources.TestResourceManager()
++        # give all tests a tempdir, enough to provoke a single partition in
++        # the current code.
++        for case, manager in zip(cases, managers):
++            case.resources = [('_resource', manager), ('tempdir', tempdir)]
++        # Any sort is ok, as long as its the right length :)
++        result = self.sortTests(cases)
++        self.assertEqual(12, len(result))
++
      def testSortConsidersDependencies(self):
          """Tests with different dependencies are sorted together."""
          # We test this by having two resources (one and two) that share a very
 === added file 'lib/testresources/tests/test_resource_graph.py'
 --- lib/testresources/tests/test_resource_graph.py	1970-01-01 00:00:00 +0000
 +++ lib/testresources/tests/test_resource_graph.py	2010-02-19 05:54:16 +0000
@@ -0,0 +1,141 @@
++#
++#  testresources: extensions to python unittest to allow declaritive use
++#  of resources by test cases.
++#  Copyright (C) 2010  Robert Collins <robertc@robertcollins.net>
++#
++#  This program is free software; you can redistribute it and/or modify
++#  it under the terms of the GNU General Public License as published by
++#  the Free Software Foundation; either version 2 of the License, or
++#  (at your option) any later version.
++#
++#  This program is distributed in the hope that it will be useful,
++#  but WITHOUT ANY WARRANTY; without even the implied warranty of
++#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++#  GNU General Public License for more details.
++#
++#  You should have received a copy of the GNU General Public License
++#  along with this program; if not, write to the Free Software
++#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++#
++
++"""Test _resource_graph(resource_sets)."""
++
++import testtools
++import testresources
++from testresources import split_by_resources, _resource_graph
++from testresources.tests import ResultWithResourceExtensions
++import unittest
++
++
++def test_suite():
++    from testresources.tests import TestUtil
++    loader = TestUtil.TestLoader()
++    result = loader.loadTestsFromName(__name__)
++    return result
++
++
++class TestResourceGraph(testtools.TestCase):
++
++    def test_empty(self):
++        no_resources = frozenset()
++        resource_sets = [no_resources]
++        self.assertEqual({no_resources:set([])}, _resource_graph(resource_sets))
++
++    def test_discrete(self):
++        resset1 = frozenset([testresources.TestResourceManager()])
++        resset2 = frozenset([testresources.TestResourceManager()])
++        resource_sets = [resset1, resset2]
++        result = _resource_graph(resource_sets)
++        self.assertEqual({resset1:set([]), resset2:set([])}, result)
++
++    def test_overlapping(self):
++        res1 = testresources.TestResourceManager()
++        res2 = testresources.TestResourceManager()
++        resset1 = frozenset([res1])
++        resset2 = frozenset([res2])
++        resset3 = frozenset([res1, res2])
++        resource_sets = [resset1, resset2, resset3]
++        result = _resource_graph(resource_sets)
++        self.assertEqual(
++            {resset1:set([resset3]),
++             resset2:set([resset3]),
++             resset3:set([resset1, resset2])},
++            result)
++
++
++class TestDigraphToGraph(testtools.TestCase):
++
++    def test_wikipedia_example(self):
++        """Converting a digraph mirrors it in the XZ axis (matrix view).
++
++        See http://en.wikipedia.org/wiki/Travelling_salesman_problem \
++        #Solving_by_conversion_to_Symmetric_TSP
++        """
++        #       A   B   C
++        #   A       1   2
++        #   B   6       3
++        #   C   5   4
++        A = "A"
++        Ap = "A'"
++        B = "B"
++        Bp = "B'"
++        C = "C"
++        Cp = "C'"
++        digraph = {A:{     B:1, C:2},
++                   B:{A:6,      C:3},
++                   C:{A:5, B:4     }}
++        # and the output
++        #       A   B   C   A'  B'  C'
++        #   A               0   6   5
++        #   B               1   0   4
++        #   C               2   3   0
++        #   A'  0   1   2
++        #   B'  6   0   3
++        #   C'  5   4   0
++        expected = {
++            A :{              Ap:0, Bp:6, Cp:5},
++            B :{              Ap:1, Bp:0, Cp:4},
++            C :{              Ap:2, Bp:3, Cp:0},
++            Ap:{A:0, B:1, C:2                 },
++            Bp:{A:6, B:0, C:3                 },
++            Cp:{A:5, B:4, C:0                 }}
++        self.assertEqual(expected,
++            testresources._digraph_to_graph(digraph, {A:Ap, B:Bp, C:Cp}))
++
++
++class TestKruskalsMST(testtools.TestCase):
++
++    def test_wikipedia_example(self):
++        """Performing KruskalsMST on a graph returns a spanning tree.
++
++        See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
++        """
++        A = "A"
++        B = "B"
++        C = "C"
++        D = "D"
++        E = "E"
++        F = "F"
++        G = "G"
++        graph = {
++            A:{     B:7,      D:5},
++            B:{A:7,      C:8, D:9,  E:7},
++            C:{     B:8,            E:5},
++            D:{A:5, B:9,            E:15, F:6},
++            E:{     B:7, C:5, D:15,       F:8, G:9},
++            F:{               D:6,  E:8,       G:11},
++            G:{                     E:9, F:11}}
++        expected = {
++            A:{     B:7,      D:5},
++            B:{A:7,                 E:7},
++            C:{                     E:5},
++            D:{A:5,                      F:6},
++            E:{     B:7, C:5,                  G:9},
++            F:{               D:6},
++            G:{                     E:9}}
++        result = testresources._kruskals_graph_MST(graph)
++        e_weight = sum(sum(row.itervalues()) for row in expected.itervalues())
++        r_weight = sum(sum(row.itervalues()) for row in result.itervalues())
++        self.assertEqual(e_weight, r_weight)
++        self.assertEqual(expected,
++            testresources._kruskals_graph_MST(graph))