Merge lp:~lifeless/testresources/bug-522338 into lp:~testresources-developers/testresources/trunk

Proposed by Robert Collins
Status: Merged
Merged at revision: not available
Proposed branch: lp:~lifeless/testresources/bug-522338
Merge into: lp:~testresources-developers/testresources/trunk
Diff against target: 598 lines (+447/-34)
5 files modified
NEWS (+8/-0)
lib/testresources/__init__.py (+248/-28)
lib/testresources/tests/__init__.py (+2/-0)
lib/testresources/tests/test_optimising_test_suite.py (+48/-6)
lib/testresources/tests/test_resource_graph.py (+141/-0)
To merge this branch: bzr merge lp:~lifeless/testresources/bug-522338
Reviewer Review Type Date Requested Status
testresources developers Pending
Review via email: mp+19684@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) wrote :

Faster stuff good. Good stuff good. Stuff is good. Faster is good, so stuff is faster.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'NEWS'
2--- NEWS 2010-02-12 02:01:38 +0000
3+++ NEWS 2010-02-19 05:54:16 +0000
4@@ -13,6 +13,14 @@
5 * Rename TestResource to TestResourceManager leaving TestResource as an alias.
6 Fixes bug #520769.
7
8+* Test sorting no longer performs N! work on tests that can never benefit from
9+ order optimisations (when resources are not shared an arbitrary order is
10+ sufficient). Partial fix for bug #522338.
11+
12+* Test sorting now uses a heuristic on each partition to get a sort that is
13+ no worse than twice the optimal number of setup/teardown operations and is
14+ fast to calculate. Fixes bug #522338
15+
16 0.2.3
17 -----
18
19
20=== modified file 'lib/testresources/__init__.py'
21--- lib/testresources/__init__.py 2010-02-12 02:01:38 +0000
22+++ lib/testresources/__init__.py 2010-02-19 05:54:16 +0000
23@@ -19,7 +19,9 @@
24
25 """TestResources: declarative management of external resources for tests."""
26
27+import heapq
28 import inspect
29+import operator
30 import unittest
31
32
33@@ -28,6 +30,105 @@
34 return testresources.tests.test_suite()
35
36
37+def _digraph_to_graph(digraph, prime_node_mapping):
38+ """Convert digraph to a graph.
39+
40+ :param digraph: A directed graph in the form
41+ {from:{to:value}}.
42+ :param prime_node_mapping: A mapping from every
43+ node in digraph to a new unique and not in digraph node.
44+ :return: A symmetric graph in the form {from:to:value}} created by
45+ creating edges in the result between every N to M-prime with the
46+ original N-M value and from every N to N-prime with a cost of 0.
47+ No other edges are created.
48+ """
49+ result = {}
50+ for from_node, from_prime_node in prime_node_mapping.iteritems():
51+ result[from_node] = {from_prime_node:0}
52+ result[from_prime_node] = {from_node:0}
53+ for from_node, to_nodes in digraph.iteritems():
54+ from_prime = prime_node_mapping[from_node]
55+ for to_node, value in to_nodes.iteritems():
56+ to_prime = prime_node_mapping[to_node]
57+ result[from_prime][to_node] = value
58+ result[to_node][from_prime] = value
59+ return result
60+
61+
62+def _kruskals_graph_MST(graph):
63+ """Find the minimal spanning tree in graph using Kruskals algorithm.
64+
65+ See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
66+ :param graph: A graph in {from:{to:value}} form. Every node present in
67+ graph must be in the outer dict (because graph is not a directed graph.
68+ :return: A graph with all nodes and those vertices that are part of the MST
69+ for graph. If graph is not connected, then the result will also be a
70+ forest.
71+ """
72+ # forest contains all the nodes -> graph that node is in.
73+ forest = {}
74+ # graphs is the count of graphs we have yet to combine.
75+ for node in graph:
76+ forest[node] = {node:{}}
77+ graphs = len(forest)
78+ # collect edges: every edge is present twice (due to the graph
79+ # representation), so normalise.
80+ edges = set()
81+ for from_node, to_nodes in graph.iteritems():
82+ for to_node, value in to_nodes.iteritems():
83+ edge = (value,) + tuple(sorted([from_node, to_node]))
84+ edges.add(edge)
85+ edges = list(edges)
86+ heapq.heapify(edges)
87+ while edges and graphs > 1:
88+ # more edges to go and we haven't gotten a spanning tree yet.
89+ edge = heapq.heappop(edges)
90+ g1 = forest[edge[1]]
91+ g2 = forest[edge[2]]
92+ if g1 is g2:
93+ continue # already joined
94+ # combine g1 and g2 into g1
95+ graphs -= 1
96+ for from_node, to_nodes in g2.iteritems():
97+ #remember its symmetric, don't need to do 'to'.
98+ forest[from_node] = g1
99+ g1.setdefault(from_node, {}).update(to_nodes)
100+ # add edge
101+ g1[edge[1]][edge[2]] = edge[0]
102+ g1[edge[2]][edge[1]] = edge[0]
103+ # union the remaining graphs
104+ _, result = forest.popitem()
105+ for _, g2 in forest.iteritems():
106+ if g2 is result: # common case
107+ continue
108+ for from_node, to_nodes in g2.iteritems():
109+ result.setdefault(from_node, {}).update(to_nodes)
110+ return result
111+
112+
113+def _resource_graph(resource_sets):
114+ """Convert an iterable of resource_sets into a graph.
115+
116+ Each resource_set in the iterable is treated as a node, and each resource
117+ in that resource_set is used as an edge to other nodes.
118+ """
119+ nodes = {}
120+ edges = {}
121+ for resource_set in resource_sets:
122+ # put node in nodes
123+ node = frozenset(resource_set)
124+ nodes[node] = set()
125+ # put its contents in as edges
126+ for resource in resource_set:
127+ edges.setdefault(resource, []).append(node)
128+ # populate the adjacent members of nodes
129+ for node, connected in nodes.iteritems():
130+ for resource in node:
131+ connected.update(edges[resource])
132+ connected.discard(node)
133+ return nodes
134+
135+
136 def split_by_resources(tests):
137 """Split a list of tests by the resources that the tests use.
138
139@@ -47,6 +148,30 @@
140 return resource_set_tests
141
142
143+def _strongly_connected_components(graph, no_resources):
144+ """Find the strongly connected components in graph.
145+
146+ This is essentially a nonrecursive flatterning of Tarjan's method. It
147+ may be worth profiling against an actual Tarjan's implementation at some
148+ point, but sets are often faster than python calls.
149+
150+ graph gets consumed, but that could be changed easily enough.
151+ """
152+ partitions = []
153+ while graph:
154+ node, pending = graph.popitem()
155+ current_partition = set([node])
156+ while pending:
157+ # add all the nodes connected to a connected node to pending.
158+ node = pending.pop()
159+ current_partition.add(node)
160+ pending.update(graph.pop(node, []))
161+ # don't try to process things we've allready processed.
162+ pending.difference_update(current_partition)
163+ partitions.append(current_partition)
164+ return partitions
165+
166+
167 class OptimisingTestSuite(unittest.TestSuite):
168 """A resource creation optimising TestSuite."""
169
170@@ -126,54 +251,149 @@
171 def sortTests(self):
172 """Attempt to topographically sort the contained tests.
173
174+ This function biases to reusing a resource: it assumes that resetting
175+ a resource is usually cheaper than a teardown + setup; and that most
176+ resources are not dirtied by most tests.
177+
178 Feel free to override to improve the sort behaviour.
179 """
180 # We group the tests by the resource combinations they use,
181 # since there will usually be fewer resource combinations than
182- # actual tests and there can never be more.
183+ # actual tests and there can never be more: This gives us 'nodes' or
184+ # 'resource_sets' that represent many tests using the same set of
185+ # resources.
186 resource_set_tests = split_by_resources(self._tests)
187-
188- graph = self._getGraph(resource_set_tests.keys())
189+ # Partition into separate sets of resources, there is no ordering
190+ # preference between sets that do not share members. Rationale:
191+ # If resource_set A and B have no common resources, AB and BA are
192+ # equally good - the global setup/teardown sums are identical. Secondly
193+ # if A shares one or more resources with C, then pairing AC|CA is better
194+ # than having B between A and C, because the shared resources can be
195+ # reset or reused. Having partitioned we can use connected graph logic
196+ # on each partition.
197+ resource_set_graph = _resource_graph(resource_set_tests)
198 no_resources = frozenset()
199- # Recursive visit-all-nodes all-permutations.
200- def cost(from_set, resource_sets):
201- """Get the cost of resource traversal for resource sets.
202-
203- :return: cost, order
204- """
205- if not resource_sets:
206- # tear down last resources
207- return graph[from_set][no_resources], []
208- costs = []
209- for to_set in resource_sets:
210- child_cost, child_order = cost(
211- to_set, resource_sets - set([to_set]))
212- costs.append((graph[from_set][to_set] + child_cost,
213- [to_set] + child_order))
214- return min(costs)
215- _, order = cost(no_resources,
216- set(resource_set_tests) - set([no_resources]))
217- order.append(no_resources)
218- self._tests = sum(
219- (resource_set_tests[resource_set] for resource_set in order), [])
220+ # A list of resource_set_tests, all fully internally connected.
221+ partitions = _strongly_connected_components(resource_set_graph,
222+ no_resources)
223+ result = []
224+ for partition in partitions:
225+ # we process these at the end for no particularly good reason (it makes
226+ # testing slightly easier).
227+ if partition == [no_resources]:
228+ continue
229+ order = self._makeOrder(partition)
230+ # Spit this partition out into result
231+ for resource_set in order:
232+ result.extend(resource_set_tests[resource_set])
233+ result.extend(resource_set_tests[no_resources])
234+ self._tests = result
235
236 def _getGraph(self, resource_sets):
237 """Build a graph of the resource-using nodes.
238
239+ This special cases set(['root']) to be a node with no resources and
240+ edges to everything.
241+
242 :return: A complete directed graph of the switching costs
243- between each resource combination.
244+ between each resource combination. Note that links from N to N are
245+ not included.
246 """
247+ no_resources = frozenset()
248 graph = {}
249+ root = set(['root'])
250+ # bottom = set(['bottom'])
251 for from_set in resource_sets:
252 graph[from_set] = {}
253+ if from_set == root:
254+ from_resources = no_resources
255+ #elif from_set == bottom:
256+ # continue # no links from bottom
257+ else:
258+ from_resources = from_set
259 for to_set in resource_sets:
260 if from_set is to_set:
261- graph[from_set][to_set] = 0
262+ continue # no self-edges
263+ #if to_set == bottom:
264+ # if from_set == root:
265+ # continue # no short cuts!
266+ # to_resources = no_resources
267+ #el
268+ if to_set == root:
269+ continue # no links to root
270 else:
271- graph[from_set][to_set] = self.cost_of_switching(
272- from_set, to_set)
273+ to_resources = to_set
274+ graph[from_set][to_set] = self.cost_of_switching(
275+ from_resources, to_resources)
276 return graph
277
278+ def _makeOrder(self, partition):
279+ """Return a order for the resource sets in partition."""
280+ # This problem is NP-C - find the lowest cost hamiltonian path. It
281+ # also meets the triangle inequality, so we can use an approximation.
282+ # TODO: implement Christofides.
283+ # See http://en.wikipedia.org/wiki/Travelling_salesman_problem#Metric_TSP
284+ # We need a root
285+ root = frozenset(['root'])
286+ partition.add(root)
287+ # and an end
288+ # partition.add(frozenset(['bottom']))
289+ # get rid of 'noresources'
290+ partition.discard(frozenset())
291+ digraph = self._getGraph(partition)
292+ # build a prime map
293+ primes = {}
294+ prime = frozenset(['prime'])
295+ for node in digraph:
296+ primes[node] = node.union(prime)
297+ graph = _digraph_to_graph(digraph, primes)
298+ mst = _kruskals_graph_MST(graph)
299+ # Because the representation is a digraph, we can build an Eulerian
300+ # cycle directly from the representation by just following the links:
301+ # a node with only 1 'edge' has two directed edges; and we can only
302+ # enter and leave it once, so the edge lookups will match precisely.
303+ # As the mst is a spanning tree, the graph will become disconnected
304+ # (we choose non-disconnecting edges first)
305+ # - for a stub node (1 outgoing link): when exiting it unless it is
306+ # the first node started at
307+ # - for a non-stub node if choosing an outgoing link where some other
308+ # endpoints incoming link has not been traversed. [exit by a
309+ # different node than entering, until all exits taken].
310+ # We don't need the mst after, so it gets modified in place.
311+ node = root
312+ cycle = [node]
313+ steps = 2 * (len(mst) - 1)
314+ for step in xrange(steps):
315+ found = False
316+ outgoing = None # For clearer debugging.
317+ for outgoing in mst[node]:
318+ if node in mst[outgoing]:
319+ # we have a return path: take it
320+ # print node, '->', outgoing, ' can return'
321+ del mst[node][outgoing]
322+ node = outgoing
323+ cycle.append(node)
324+ found = True
325+ break
326+ if not found:
327+ # none of the outgoing links have an incoming, so follow an
328+ # arbitrary one (the last examined outgoing)
329+ # print node, '->', outgoing
330+ del mst[node][outgoing]
331+ node = outgoing
332+ cycle.append(node)
333+ # Convert to a path:
334+ visited = set()
335+ order = []
336+ for node in cycle:
337+ if node in visited:
338+ continue
339+ if node in primes:
340+ order.append(node)
341+ visited.add(node)
342+ assert order[0] == root
343+ return order[1:]
344+
345
346 class TestLoader(unittest.TestLoader):
347 """Custom TestLoader to set the right TestSuite class."""
348
349=== modified file 'lib/testresources/tests/__init__.py'
350--- lib/testresources/tests/__init__.py 2009-07-13 08:22:13 +0000
351+++ lib/testresources/tests/__init__.py 2010-02-19 05:54:16 +0000
352@@ -28,10 +28,12 @@
353 import testresources.tests.test_resourced_test_case
354 import testresources.tests.test_test_loader
355 import testresources.tests.test_test_resource
356+ import testresources.tests.test_resource_graph
357 result = TestUtil.TestSuite()
358 result.addTest(testresources.tests.test_test_loader.test_suite())
359 result.addTest(testresources.tests.test_test_resource.test_suite())
360 result.addTest(testresources.tests.test_resourced_test_case.test_suite())
361+ result.addTest(testresources.tests.test_resource_graph.test_suite())
362 result.addTest(
363 testresources.tests.test_optimising_test_suite.test_suite())
364 return result
365
366=== modified file 'lib/testresources/tests/test_optimising_test_suite.py'
367--- lib/testresources/tests/test_optimising_test_suite.py 2010-01-05 05:51:14 +0000
368+++ lib/testresources/tests/test_optimising_test_suite.py 2010-02-19 05:54:16 +0000
369@@ -403,7 +403,7 @@
370 resource = self.makeResource()
371 suite = testresources.OptimisingTestSuite()
372 graph = suite._getGraph([frozenset()])
373- self.assertEqual({frozenset(): {frozenset(): 0}}, graph)
374+ self.assertEqual({frozenset(): {}}, graph)
375
376 def testTwoCasesInGraph(self):
377 res1 = self.makeResource()
378@@ -415,9 +415,9 @@
379
380 suite = testresources.OptimisingTestSuite()
381 graph = suite._getGraph([no_resources, set1, set2])
382- self.assertEqual({no_resources: {no_resources: 0, set1: 2, set2: 1},
383- set1: {no_resources: 2, set1: 0, set2: 1},
384- set2: {no_resources: 1, set1: 1, set2: 0}}, graph)
385+ self.assertEqual({no_resources: {set1: 2, set2: 1},
386+ set1: {no_resources: 2, set2: 1},
387+ set2: {no_resources: 1, set1: 1 }}, graph)
388
389
390 class TestGraphStuff(testtools.TestCase):
391@@ -489,10 +489,15 @@
392 return permutations
393
394 def testBasicSortTests(self):
395- # Test every permutation of inputs, with equal cost resources and
396- # legacy tests.
397+ # Test every permutation of inputs, with legacy tests.
398+ # Cannot use equal costs because of the use of
399+ # a 2*optimal heuristic for sorting: with equal
400+ # costs the wrong sort order is < twice the optimal
401+ # weight, and thus can be selected.
402 resource_one = testresources.TestResource()
403 resource_two = testresources.TestResource()
404+ resource_two.setUpCost = 5
405+ resource_two.tearDownCost = 5
406 resource_three = testresources.TestResource()
407
408 self.case1.resources = [
409@@ -584,6 +589,43 @@
410 permutation.index(self.case3) < permutation.index(self.case4),
411 sorted.index(self.case3) < sorted.index(self.case4))
412
413+ def testSortingTwelveIndependentIsFast(self):
414+ # Given twelve independent resource sets, my patience is not exhausted.
415+ managers = []
416+ for pos in range(12):
417+ managers.append(testresources.TestResourceManager())
418+ # Add more sample tests
419+ cases = [self.case1, self.case2, self.case3, self.case4]
420+ for pos in range(5,13):
421+ cases.append(
422+ testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
423+ # We care that this is fast in this test, so we don't need to have
424+ # overlapping resource usage
425+ for case, manager in zip(cases, managers):
426+ case.resources = [('_resource', manager)]
427+ # Any sort is ok, as long as its the right length :)
428+ result = self.sortTests(cases)
429+ self.assertEqual(12, len(result))
430+
431+ def testSortingTwelveOverlappingIsFast(self):
432+ # Given twelve connected resource sets, my patience is not exhausted.
433+ managers = []
434+ for pos in range(12):
435+ managers.append(testresources.TestResourceManager())
436+ # Add more sample tests
437+ cases = [self.case1, self.case2, self.case3, self.case4]
438+ for pos in range(5,13):
439+ cases.append(
440+ testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
441+ tempdir = testresources.TestResourceManager()
442+ # give all tests a tempdir, enough to provoke a single partition in
443+ # the current code.
444+ for case, manager in zip(cases, managers):
445+ case.resources = [('_resource', manager), ('tempdir', tempdir)]
446+ # Any sort is ok, as long as its the right length :)
447+ result = self.sortTests(cases)
448+ self.assertEqual(12, len(result))
449+
450 def testSortConsidersDependencies(self):
451 """Tests with different dependencies are sorted together."""
452 # We test this by having two resources (one and two) that share a very
453
454=== added file 'lib/testresources/tests/test_resource_graph.py'
455--- lib/testresources/tests/test_resource_graph.py 1970-01-01 00:00:00 +0000
456+++ lib/testresources/tests/test_resource_graph.py 2010-02-19 05:54:16 +0000
457@@ -0,0 +1,141 @@
458+#
459+# testresources: extensions to python unittest to allow declaritive use
460+# of resources by test cases.
461+# Copyright (C) 2010 Robert Collins <robertc@robertcollins.net>
462+#
463+# This program is free software; you can redistribute it and/or modify
464+# it under the terms of the GNU General Public License as published by
465+# the Free Software Foundation; either version 2 of the License, or
466+# (at your option) any later version.
467+#
468+# This program is distributed in the hope that it will be useful,
469+# but WITHOUT ANY WARRANTY; without even the implied warranty of
470+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
471+# GNU General Public License for more details.
472+#
473+# You should have received a copy of the GNU General Public License
474+# along with this program; if not, write to the Free Software
475+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
476+#
477+
478+"""Test _resource_graph(resource_sets)."""
479+
480+import testtools
481+import testresources
482+from testresources import split_by_resources, _resource_graph
483+from testresources.tests import ResultWithResourceExtensions
484+import unittest
485+
486+
487+def test_suite():
488+ from testresources.tests import TestUtil
489+ loader = TestUtil.TestLoader()
490+ result = loader.loadTestsFromName(__name__)
491+ return result
492+
493+
494+class TestResourceGraph(testtools.TestCase):
495+
496+ def test_empty(self):
497+ no_resources = frozenset()
498+ resource_sets = [no_resources]
499+ self.assertEqual({no_resources:set([])}, _resource_graph(resource_sets))
500+
501+ def test_discrete(self):
502+ resset1 = frozenset([testresources.TestResourceManager()])
503+ resset2 = frozenset([testresources.TestResourceManager()])
504+ resource_sets = [resset1, resset2]
505+ result = _resource_graph(resource_sets)
506+ self.assertEqual({resset1:set([]), resset2:set([])}, result)
507+
508+ def test_overlapping(self):
509+ res1 = testresources.TestResourceManager()
510+ res2 = testresources.TestResourceManager()
511+ resset1 = frozenset([res1])
512+ resset2 = frozenset([res2])
513+ resset3 = frozenset([res1, res2])
514+ resource_sets = [resset1, resset2, resset3]
515+ result = _resource_graph(resource_sets)
516+ self.assertEqual(
517+ {resset1:set([resset3]),
518+ resset2:set([resset3]),
519+ resset3:set([resset1, resset2])},
520+ result)
521+
522+
523+class TestDigraphToGraph(testtools.TestCase):
524+
525+ def test_wikipedia_example(self):
526+ """Converting a digraph mirrors it in the XZ axis (matrix view).
527+
528+ See http://en.wikipedia.org/wiki/Travelling_salesman_problem \
529+ #Solving_by_conversion_to_Symmetric_TSP
530+ """
531+ # A B C
532+ # A 1 2
533+ # B 6 3
534+ # C 5 4
535+ A = "A"
536+ Ap = "A'"
537+ B = "B"
538+ Bp = "B'"
539+ C = "C"
540+ Cp = "C'"
541+ digraph = {A:{ B:1, C:2},
542+ B:{A:6, C:3},
543+ C:{A:5, B:4 }}
544+ # and the output
545+ # A B C A' B' C'
546+ # A 0 6 5
547+ # B 1 0 4
548+ # C 2 3 0
549+ # A' 0 1 2
550+ # B' 6 0 3
551+ # C' 5 4 0
552+ expected = {
553+ A :{ Ap:0, Bp:6, Cp:5},
554+ B :{ Ap:1, Bp:0, Cp:4},
555+ C :{ Ap:2, Bp:3, Cp:0},
556+ Ap:{A:0, B:1, C:2 },
557+ Bp:{A:6, B:0, C:3 },
558+ Cp:{A:5, B:4, C:0 }}
559+ self.assertEqual(expected,
560+ testresources._digraph_to_graph(digraph, {A:Ap, B:Bp, C:Cp}))
561+
562+
563+class TestKruskalsMST(testtools.TestCase):
564+
565+ def test_wikipedia_example(self):
566+ """Performing KruskalsMST on a graph returns a spanning tree.
567+
568+ See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
569+ """
570+ A = "A"
571+ B = "B"
572+ C = "C"
573+ D = "D"
574+ E = "E"
575+ F = "F"
576+ G = "G"
577+ graph = {
578+ A:{ B:7, D:5},
579+ B:{A:7, C:8, D:9, E:7},
580+ C:{ B:8, E:5},
581+ D:{A:5, B:9, E:15, F:6},
582+ E:{ B:7, C:5, D:15, F:8, G:9},
583+ F:{ D:6, E:8, G:11},
584+ G:{ E:9, F:11}}
585+ expected = {
586+ A:{ B:7, D:5},
587+ B:{A:7, E:7},
588+ C:{ E:5},
589+ D:{A:5, F:6},
590+ E:{ B:7, C:5, G:9},
591+ F:{ D:6},
592+ G:{ E:9}}
593+ result = testresources._kruskals_graph_MST(graph)
594+ e_weight = sum(sum(row.itervalues()) for row in expected.itervalues())
595+ r_weight = sum(sum(row.itervalues()) for row in result.itervalues())
596+ self.assertEqual(e_weight, r_weight)
597+ self.assertEqual(expected,
598+ testresources._kruskals_graph_MST(graph))

Subscribers

People subscribed via source and target branches