Merge lp:~thumper/launchpad/scanner-awesomeness into lp:launchpad

Proposed by Tim Penhey
Status: Merged
Approved by: Michael Hudson-Doyle
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~thumper/launchpad/scanner-awesomeness
Merge into: lp:launchpad
Diff against target: 291 lines (+163/-9)
4 files modified
lib/lp/codehosting/scanner/mergedetection.py (+39/-5)
lib/lp/codehosting/scanner/tests/test_mergedetection.py (+44/-3)
lib/lp/services/tests/test_utils.py (+40/-1)
lib/lp/services/utils.py (+40/-0)
To merge this branch: bzr merge lp:~thumper/launchpad/scanner-awesomeness
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle Approve
Review via email: mp+23211@code.launchpad.net

Commit message

Record the merged_revno in the scanner.

Description of the change

This branch records the merged revno for branches that have been merged. It only looks for the landing candidates as it uses the merge sorted graph, and we don't want to do that for the possible landing targets.

Since there may be many landing candidates for a series branch, and the merge sorted graph is a non-trivial thing to generate, I added a caching iterator. This allows the code to be written in a more natural way, but without multiple generations of the merge sorted graph.

tests:
  TestFindMergedRevno
  TestCachingIterator
  TestAutoMergeDetectionForMergeProposals

To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

I pointed out a couple of typos on IRC.

I think a test of CachedIterator that iterated two iterators in parallel like so:

ci = CachedIterator([...])
i1 = iter(ci)
assert i1.next() == ...
i2 = iter(ci)
assert i2.next() == ...
assert i1.next() == ...

I think it'll pass fine, but it's something my devious mind wants to see tested.

CachingIterator needs some blank lines between its members.

Otherwise it all looks nice, and will be a nice change!

review: Approve
Revision history for this message
Andrew Bennetts (spiv) wrote :

Michael Hudson wrote:
> Review: Approve
> I pointed out a couple of typos on IRC.
>
> I think a test of CachedIterator that iterated two iterators in parallel like so:
>
> ci = CachedIterator([...])
> i1 = iter(ci)
> assert i1.next() == ...
> i2 = iter(ci)
> assert i2.next() == ...
> assert i1.next() == ...
>
> I think it'll pass fine, but it's something my devious mind wants to see tested.

You need to be more devious than that: the assumption CachedIterator is making
is that its __iter__ will never call itself. You'd think that's a reasonable
assumption, but I've seen code that accidentally does it. The more interacting
layers of code you have, with lazy DB iterators and so forth, the more likely it
is...

And boy is it confusing as hell when it does happen and you get “impossible”
results from apparently simple code.

So, try this:

class EvilIterator(object):
    def __init__(self):
        self.elem_source = iter('abcdef')
    def next(self):
        e = self.elem_source.next()
        if e == 'c':
            print 'evil:', list(ci)
        return e

ci = CachingIterator(EvilIterator())
i1 = iter(ci)
i2 = iter(ci)
for elem in i1:
    print '1:', elem
for elem in i2:
    print '2:', elem

-Andrew.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/codehosting/scanner/mergedetection.py'
2--- lib/lp/codehosting/scanner/mergedetection.py 2010-03-02 19:30:18 +0000
3+++ lib/lp/codehosting/scanner/mergedetection.py 2010-04-12 10:04:37 +0000
4@@ -17,6 +17,7 @@
5 from lp.code.interfaces.branchcollection import IAllBranches
6 from lp.code.interfaces.branchmergeproposal import (
7 BRANCH_MERGE_PROPOSAL_FINAL_STATES, notify_modified)
8+from lp.services.utils import CachingIterator
9
10
11 def is_series_branch(branch):
12@@ -47,7 +48,7 @@
13 branch.lifecycle_status = BranchLifecycleStatus.MERGED
14
15
16-def merge_detected(logger, source, target, proposal=None):
17+def merge_detected(logger, source, target, proposal=None, merge_revno=None):
18 """Handle the merge of source into target."""
19 # If the target branch is not the development focus, then don't update
20 # the status of the source branch.
21@@ -60,7 +61,7 @@
22 if is_development_focus(target):
23 mark_branch_merged(logger, source)
24 else:
25- notify_modified(proposal, proposal.markAsMerged)
26+ notify_modified(proposal, proposal.markAsMerged, merge_revno)
27 # If there is an explicit merge proposal, change the branch's
28 # status when it's been merged into a development focus or any
29 # other series branch.
30@@ -114,6 +115,27 @@
31 merge_detected(logger, branch, db_branch)
32
33
34+def find_merged_revno(merge_sorted, tip_rev_id):
35+ """Find the mainline revno that merged tip_rev_id.
36+
37+ This method traverses the merge sorted graph looking for the first
38+ """
39+ last_mainline = None
40+ iterator = iter(merge_sorted)
41+ while True:
42+ try:
43+ rev_id, depth, revno, ignored = iterator.next()
44+ except StopIteration:
45+ break
46+ if depth == 0:
47+ last_mainline = revno[0]
48+ if rev_id == tip_rev_id:
49+ return last_mainline
50+ # The only reason we get here is that the tip_rev_id isn't in the merge
51+ # sorted graph.
52+ return None
53+
54+
55 def auto_merge_proposals(scan_completed):
56 """Detect merged proposals."""
57 db_branch = scan_completed.db_branch
58@@ -127,12 +149,24 @@
59 # At this stage we are not going to worry about the revno
60 # which introduced the change, that will either be set through the web
61 # ui by a person, or by PQM once it is integrated.
62+
63+ if scan_completed.bzr_branch is None:
64+ # Only happens in tests.
65+ merge_sorted = []
66+ else:
67+ merge_sorted = CachingIterator(
68+ scan_completed.bzr_branch.iter_merge_sorted_revisions())
69 for proposal in db_branch.landing_candidates:
70- if proposal.source_branch.last_scanned_id in bzr_ancestry:
71+ tip_rev_id = proposal.source_branch.last_scanned_id
72+ if tip_rev_id in bzr_ancestry:
73+ merged_revno = find_merged_revno(merge_sorted, tip_rev_id)
74+ # Remember so we can find the merged revision number.
75 merge_detected(
76- logger, proposal.source_branch, db_branch, proposal)
77+ logger, proposal.source_branch, db_branch, proposal,
78+ merged_revno)
79
80- # Now check the landing targets.
81+ # Now check the landing targets. We should probably get rid of this,
82+ # especially if we are trying to get rid of the branch revision table.
83 final_states = BRANCH_MERGE_PROPOSAL_FINAL_STATES
84 tip_rev_id = db_branch.last_scanned_id
85 for proposal in db_branch.landing_targets:
86
87=== modified file 'lib/lp/codehosting/scanner/tests/test_mergedetection.py'
88--- lib/lp/codehosting/scanner/tests/test_mergedetection.py 2010-03-02 18:56:25 +0000
89+++ lib/lp/codehosting/scanner/tests/test_mergedetection.py 2010-04-12 10:04:37 +0000
90@@ -21,7 +21,7 @@
91 from canonical.config import config
92 from lp.code.enums import BranchLifecycleStatus, BranchMergeProposalStatus
93 from lp.code.interfaces.branchlookup import IBranchLookup
94-from lp.testing import TestCaseWithFactory
95+from lp.testing import TestCase, TestCaseWithFactory
96 from lp.testing.mail_helpers import pop_notifications
97 from canonical.testing import LaunchpadZopelessLayer
98
99@@ -35,7 +35,7 @@
100 @run_as_db_user(config.launchpad.dbuser)
101 def createProposal(self, source, target):
102 # The scanner doesn't have insert rights, so do it here.
103- proposal = source.addLandingTarget(source.owner, target)
104+ source.addLandingTarget(source.owner, target)
105 transaction.commit()
106
107 def _createBranchesAndProposal(self):
108@@ -69,6 +69,7 @@
109 self.assertEqual(
110 BranchMergeProposalStatus.MERGED,
111 proposal.queue_status)
112+ self.assertEqual(3, proposal.merged_revno)
113
114 def test_auto_merge_proposals_real_merge_target_scanned_first(self):
115 # If there is a merge proposal where the tip of the source is in the
116@@ -210,7 +211,7 @@
117 # Other branches for the product are checked, but if the tip revision
118 # of the branch is not yet been set no merge event is emitted for that
119 # branch.
120- source = self.factory.makeProductBranch(product=self.product)
121+ self.factory.makeProductBranch(product=self.product)
122 self.autoMergeBranches(self.db_branch, ['revid'])
123 self.assertEqual([], self.merges)
124
125@@ -333,5 +334,45 @@
126 BranchLifecycleStatus.MERGED, source.lifecycle_status)
127
128
129+class TestFindMergedRevno(TestCase):
130+ """Tests for find_merged_revno."""
131+
132+ def get_merge_graph(self):
133+ # Create a fake merge graph.
134+ return [
135+ ('rev-3', 0, (3,), False),
136+ ('rev-3a', 1, (15, 4, 8), False),
137+ ('rev-3b', 1, (15, 4, 7), False),
138+ ('rev-3c', 1, (15, 4, 6), False),
139+ ('rev-2', 0, (2,), False),
140+ ('rev-2a', 1, (4, 4, 8), False),
141+ ('rev-2b', 1, (4, 4, 7), False),
142+ ('rev-2-1a', 2, (7, 2, 47), False),
143+ ('rev-2-1b', 2, (7, 2, 45), False),
144+ ('rev-2-1c', 2, (7, 2, 42), False),
145+ ('rev-2c', 1, (4, 4, 6), False),
146+ ('rev-1', 0, (1,), False),
147+ ]
148+
149+ def assertFoundRevisionNumber(self, expected, rev_id):
150+ merge_sorted = self.get_merge_graph()
151+ revno = mergedetection.find_merged_revno(merge_sorted, rev_id)
152+ if expected is None:
153+ self.assertIs(None, revno)
154+ else:
155+ self.assertEqual(expected, revno)
156+
157+ def test_not_found(self):
158+ # If the rev_id passed into the function isn't in the merge sorted
159+ # graph, None is returned.
160+ self.assertFoundRevisionNumber(None, 'not-there')
161+
162+ def test_existing_revision(self):
163+ # If a revision is found, the last mainline revision is returned.
164+ self.assertFoundRevisionNumber(3, 'rev-3b')
165+ self.assertFoundRevisionNumber(2, 'rev-2-1c')
166+ self.assertFoundRevisionNumber(1, 'rev-1')
167+
168+
169 def test_suite():
170 return unittest.TestLoader().loadTestsFromName(__name__)
171
172=== modified file 'lib/lp/services/tests/test_utils.py'
173--- lib/lp/services/tests/test_utils.py 2009-10-19 05:40:52 +0000
174+++ lib/lp/services/tests/test_utils.py 2010-04-12 10:04:37 +0000
175@@ -5,9 +5,10 @@
176
177 __metaclass__ = type
178
179+import itertools
180 import unittest
181
182-from lp.services.utils import iter_split
183+from lp.services.utils import CachingIterator, iter_split
184 from lp.testing import TestCase
185
186
187@@ -28,5 +29,43 @@
188 list(iter_split('one/two/three', '/')))
189
190
191+class TestCachingIterator(TestCase):
192+ """Tests for CachingIterator."""
193+
194+ def test_reuse(self):
195+ # The same iterator can be used multiple times.
196+ iterator = CachingIterator(itertools.count())
197+ self.assertEqual(
198+ [0,1,2,3,4], list(itertools.islice(iterator, 0, 5)))
199+ self.assertEqual(
200+ [0,1,2,3,4], list(itertools.islice(iterator, 0, 5)))
201+
202+ def test_more_values(self):
203+ # If a subsequent call to iter causes more values to be fetched, they
204+ # are also cached.
205+ iterator = CachingIterator(itertools.count())
206+ self.assertEqual(
207+ [0,1,2], list(itertools.islice(iterator, 0, 3)))
208+ self.assertEqual(
209+ [0,1,2,3,4], list(itertools.islice(iterator, 0, 5)))
210+
211+ def test_limited_iterator(self):
212+ # Make sure that StopIteration is handled correctly.
213+ iterator = CachingIterator(iter([0,1,2,3,4]))
214+ self.assertEqual(
215+ [0,1,2], list(itertools.islice(iterator, 0, 3)))
216+ self.assertEqual([0,1,2,3,4], list(iterator))
217+
218+ def test_parallel_iteration(self):
219+ # There can be parallel iterators over the CachingIterator.
220+ ci = CachingIterator(iter([0,1,2,3,4]))
221+ i1 = iter(ci)
222+ i2 = iter(ci)
223+ self.assertEqual(0, i1.next())
224+ self.assertEqual(0, i2.next())
225+ self.assertEqual([1,2,3,4], list(i2))
226+ self.assertEqual([1,2,3,4], list(i1))
227+
228+
229 def test_suite():
230 return unittest.TestLoader().loadTestsFromName(__name__)
231
232=== modified file 'lib/lp/services/utils.py'
233--- lib/lp/services/utils.py 2010-04-09 12:45:48 +0000
234+++ lib/lp/services/utils.py 2010-04-12 10:04:37 +0000
235@@ -9,12 +9,14 @@
236
237 __metaclass__ = type
238 __all__ = [
239+ 'CachingIterator',
240 'iter_split',
241 'synchronize',
242 'text_delta',
243 'value_string',
244 ]
245
246+import itertools
247
248 from lazr.enum import BaseItem
249 from zope.security.proxy import isinstance as zope_isinstance
250@@ -101,3 +103,41 @@
251 output.append('')
252 output.append('%s changed to:\n\n%s' % (title, delta))
253 return '\n'.join(output)
254+
255+
256+class CachingIterator:
257+ """Remember the items extracted from the iterator for the next iteration.
258+
259+ Some generators and iterators are expensive to calculate, like calculating
260+ the merge sorted revision graph for a bazaar branch, so you don't want to
261+ call them too often. Rearranging the code so it doesn't call the
262+ expensive iterator can make the code awkward. This class provides a way
263+ to have the iterator called once, and the results stored. The results
264+ can then be iterated over again, and more values retrieved from the
265+ iterator if necessary.
266+ """
267+
268+ def __init__(self, iterator):
269+ self.iterator = iterator
270+ self.data = []
271+
272+ def __iter__(self):
273+ index = itertools.count()
274+ while True:
275+ pos = index.next()
276+ try:
277+ yield self.data[pos]
278+ except IndexError:
279+ # Defer to the iterator.
280+ pass
281+ else:
282+ continue
283+ if self.iterator is None:
284+ break
285+ try:
286+ item = self.iterator.next()
287+ except StopIteration:
288+ self.iterator = None
289+ break
290+ self.data.append(item)
291+ yield item