Merge lp:~spiv/bzr/ids-only-necessary-texts into lp:~bzr/bzr/trunk-old

Proposed by Andrew Bennetts
Status: Merged
Merged at revision: not available
Proposed branch: lp:~spiv/bzr/ids-only-necessary-texts
Merge into: lp:~bzr/bzr/trunk-old
Diff against target: 341 lines
To merge this branch: bzr merge lp:~spiv/bzr/ids-only-necessary-texts
Reviewer Review Type Date Requested Status
Robert Collins (community) Approve
Review via email: mp+10444@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Andrew Bennetts (spiv) wrote :

This fixes bug 399140. As explained in the bug discussion, the issue is that InterDifferingSerializer doesn't behave correctly with stacked sources. It should take care to copy any parent inventories that are present for the revisions it is copying, and also take care to only try to copy texts necessary for the revisions it is copying. Due to a combination of these factors it was generating an inventory delta from null for the first revision in a stacked repository during upgrade, and then expected to be able to find every text named in that delta.

The test is a somewhat copied-and-pasted variant of other stacking tests in per_interrepository/test_fetch.py. So it's both long and somewhat repetitive compared to the existing tests. We should refactor those tests at some point... Anyway, the test does fail with bzr.dev's copy of repository.py, and passes with the fix, of course :)

I expect this will have a minor performance impact on InterDifferingSerializer, but I don't expect it to be very large. Correctness beats performance, anyway.

Revision history for this message
Robert Collins (lifeless) wrote :

218 + if not self.repository_format.supports_external_lookups:
219 + raise TestNotApplicable("Need stacking support in the source.")
220 + if not self.repository_format_to.supports_external_lookups:
221 + raise TestNotApplicable("Need stacking support in the target.")

It would be good to ensure that there is a compatible pair being tested. Otherwise we'll get TNA but not run the test.

Also, the 'get parents of the edge of a graph subset' is a repeated idiom now. I think its time to put that into graph.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'NEWS'
--- NEWS 2009-08-21 03:17:13 +0000
+++ NEWS 2009-08-24 03:35:14 +0000
@@ -37,6 +37,10 @@
37* Fix IndexError printing CannotBindAddress errors.37* Fix IndexError printing CannotBindAddress errors.
38 (Martin Pool, #286871)38 (Martin Pool, #286871)
3939
40* Fix "Revision ... not present" errors when upgrading stacked branches,
41 or when doing fetches from a stacked source to a stacked target.
42 (Andrew Bennetts, #399140)
43
40Improvements44Improvements
41************45************
4246
4347
=== modified file 'bzrlib/bzrdir.py'
--- bzrlib/bzrdir.py 2009-08-18 05:18:52 +0000
+++ bzrlib/bzrdir.py 2009-08-24 03:35:14 +0000
@@ -129,8 +129,16 @@
129 return True129 return True
130130
131 def check_conversion_target(self, target_format):131 def check_conversion_target(self, target_format):
132 """Check that a bzrdir as a whole can be converted to a new format."""
133 # The only current restriction is that the repository content can be
134 # fetched compatibly with the target.
132 target_repo_format = target_format.repository_format135 target_repo_format = target_format.repository_format
133 self.open_repository()._format.check_conversion_target(target_repo_format)136 try:
137 self.open_repository()._format.check_conversion_target(
138 target_repo_format)
139 except errors.NoRepositoryPresent:
140 # No repo, no problem.
141 pass
134142
135 @staticmethod143 @staticmethod
136 def _check_supported(format, allow_unsupported,144 def _check_supported(format, allow_unsupported,
137145
=== modified file 'bzrlib/repository.py'
--- bzrlib/repository.py 2009-08-19 18:04:49 +0000
+++ bzrlib/repository.py 2009-08-24 03:35:14 +0000
@@ -3732,25 +3732,80 @@
3732 return False3732 return False
3733 return True3733 return True
37343734
3735 def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):3735 def _get_trees(self, revision_ids, cache):
3736 possible_trees = []
3737 for rev_id in revision_ids:
3738 if rev_id in cache:
3739 possible_trees.append((rev_id, cache[rev_id]))
3740 else:
3741 # Not cached, but inventory might be present anyway.
3742 try:
3743 tree = self.source.revision_tree(rev_id)
3744 except errors.NoSuchRevision:
3745 # Nope, parent is ghost.
3746 pass
3747 else:
3748 cache[rev_id] = tree
3749 possible_trees.append((rev_id, tree))
3750 return possible_trees
3751
3752 def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3736 """Get the best delta and base for this revision.3753 """Get the best delta and base for this revision.
37373754
3738 :return: (basis_id, delta)3755 :return: (basis_id, delta)
3739 """3756 """
3740 possible_trees = [(parent_id, cache[parent_id])
3741 for parent_id in parent_ids
3742 if parent_id in cache]
3743 if len(possible_trees) == 0:
3744 # There either aren't any parents, or the parents aren't in the
3745 # cache, so just use the last converted tree
3746 possible_trees.append((basis_id, cache[basis_id]))
3747 deltas = []3757 deltas = []
3758 # Generate deltas against each tree, to find the shortest.
3759 texts_possibly_new_in_tree = set()
3748 for basis_id, basis_tree in possible_trees:3760 for basis_id, basis_tree in possible_trees:
3749 delta = tree.inventory._make_delta(basis_tree.inventory)3761 delta = tree.inventory._make_delta(basis_tree.inventory)
3762 for old_path, new_path, file_id, new_entry in delta:
3763 if new_path is None:
3764 # This file_id isn't present in the new rev, so we don't
3765 # care about it.
3766 continue
3767 if not new_path:
3768 # Rich roots are handled elsewhere...
3769 continue
3770 kind = new_entry.kind
3771 if kind != 'directory' and kind != 'file':
3772 # No text record associated with this inventory entry.
3773 continue
3774 # This is a directory or file that has changed somehow.
3775 texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3750 deltas.append((len(delta), basis_id, delta))3776 deltas.append((len(delta), basis_id, delta))
3751 deltas.sort()3777 deltas.sort()
3752 return deltas[0][1:]3778 return deltas[0][1:]
37533779
3780 def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3781 """Find all parent revisions that are absent, but for which the
3782 inventory is present, and copy those inventories.
3783
3784 This is necessary to preserve correctness when the source is stacked
3785 without fallbacks configured. (Note that in cases like upgrade the
3786 source may be not have _fallback_repositories even though it is
3787 stacked.)
3788 """
3789 parent_revs = set()
3790 for parents in parent_map.values():
3791 parent_revs.update(parents)
3792 present_parents = self.source.get_parent_map(parent_revs)
3793 absent_parents = set(parent_revs).difference(present_parents)
3794 parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3795 (rev_id,) for rev_id in absent_parents)
3796 parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3797 for parent_tree in self.source.revision_trees(parent_inv_ids):
3798 current_revision_id = parent_tree.get_revision_id()
3799 parents_parents_keys = parent_invs_keys_for_stacking[
3800 (current_revision_id,)]
3801 parents_parents = [key[-1] for key in parents_parents_keys]
3802 basis_id = _mod_revision.NULL_REVISION
3803 basis_tree = self.source.revision_tree(basis_id)
3804 delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3805 self.target.add_inventory_by_delta(
3806 basis_id, delta, current_revision_id, parents_parents)
3807 cache[current_revision_id] = parent_tree
3808
3754 def _fetch_batch(self, revision_ids, basis_id, cache):3809 def _fetch_batch(self, revision_ids, basis_id, cache):
3755 """Fetch across a few revisions.3810 """Fetch across a few revisions.
37563811
@@ -3769,29 +3824,54 @@
3769 pending_deltas = []3824 pending_deltas = []
3770 pending_revisions = []3825 pending_revisions = []
3771 parent_map = self.source.get_parent_map(revision_ids)3826 parent_map = self.source.get_parent_map(revision_ids)
3827 self._fetch_parent_invs_for_stacking(parent_map, cache)
3772 for tree in self.source.revision_trees(revision_ids):3828 for tree in self.source.revision_trees(revision_ids):
3829 # Find a inventory delta for this revision.
3830 # Find text entries that need to be copied, too.
3773 current_revision_id = tree.get_revision_id()3831 current_revision_id = tree.get_revision_id()
3774 parent_ids = parent_map.get(current_revision_id, ())3832 parent_ids = parent_map.get(current_revision_id, ())
3833 parent_trees = self._get_trees(parent_ids, cache)
3834 possible_trees = list(parent_trees)
3835 if len(possible_trees) == 0:
3836 # There either aren't any parents, or the parents are ghosts,
3837 # so just use the last converted tree.
3838 possible_trees.append((basis_id, cache[basis_id]))
3775 basis_id, delta = self._get_delta_for_revision(tree, parent_ids,3839 basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3776 basis_id, cache)3840 possible_trees)
3777 if self._converting_to_rich_root:3841 if self._converting_to_rich_root:
3778 self._revision_id_to_root_id[current_revision_id] = \3842 self._revision_id_to_root_id[current_revision_id] = \
3779 tree.get_root_id()3843 tree.get_root_id()
3780 # Find text entries that need to be copied3844 # Determine which texts are in present in this revision but not in
3845 # any of the available parents.
3846 texts_possibly_new_in_tree = set()
3781 for old_path, new_path, file_id, entry in delta:3847 for old_path, new_path, file_id, entry in delta:
3782 if new_path is not None:3848 if new_path is None:
3783 if not new_path:3849 # This file_id isn't present in the new rev
3784 # This is the root3850 continue
3785 if not self.target.supports_rich_root():3851 if not new_path:
3786 # The target doesn't support rich root, so we don't3852 # This is the root
3787 # copy3853 if not self.target.supports_rich_root():
3788 continue3854 # The target doesn't support rich root, so we don't
3789 if self._converting_to_rich_root:3855 # copy
3790 # This can't be copied normally, we have to insert3856 continue
3791 # it specially3857 if self._converting_to_rich_root:
3792 root_keys_to_create.add((file_id, entry.revision))3858 # This can't be copied normally, we have to insert
3793 continue3859 # it specially
3794 text_keys.add((file_id, entry.revision))3860 root_keys_to_create.add((file_id, entry.revision))
3861 continue
3862 kind = entry.kind
3863 texts_possibly_new_in_tree.add((file_id, entry.revision))
3864 for basis_id, basis_tree in possible_trees:
3865 basis_inv = basis_tree.inventory
3866 for file_key in list(texts_possibly_new_in_tree):
3867 file_id, file_revision = file_key
3868 try:
3869 entry = basis_inv[file_id]
3870 except errors.NoSuchId:
3871 continue
3872 if entry.revision == file_revision:
3873 texts_possibly_new_in_tree.remove(file_key)
3874 text_keys.update(texts_possibly_new_in_tree)
3795 revision = self.source.get_revision(current_revision_id)3875 revision = self.source.get_revision(current_revision_id)
3796 pending_deltas.append((basis_id, delta,3876 pending_deltas.append((basis_id, delta,
3797 current_revision_id, revision.parent_ids))3877 current_revision_id, revision.parent_ids))
@@ -3833,8 +3913,13 @@
3833 for parent_tree in self.source.revision_trees(parent_map):3913 for parent_tree in self.source.revision_trees(parent_map):
3834 current_revision_id = parent_tree.get_revision_id()3914 current_revision_id = parent_tree.get_revision_id()
3835 parents_parents = parent_map[current_revision_id]3915 parents_parents = parent_map[current_revision_id]
3916 possible_trees = self._get_trees(parents_parents, cache)
3917 if len(possible_trees) == 0:
3918 # There either aren't any parents, or the parents are
3919 # ghosts, so just use the last converted tree.
3920 possible_trees.append((basis_id, cache[basis_id]))
3836 basis_id, delta = self._get_delta_for_revision(parent_tree,3921 basis_id, delta = self._get_delta_for_revision(parent_tree,
3837 parents_parents, basis_id, cache)3922 parents_parents, possible_trees)
3838 self.target.add_inventory_by_delta(3923 self.target.add_inventory_by_delta(
3839 basis_id, delta, current_revision_id, parents_parents)3924 basis_id, delta, current_revision_id, parents_parents)
3840 # insert signatures and revisions3925 # insert signatures and revisions
38413926
=== modified file 'bzrlib/tests/blackbox/test_upgrade.py'
--- bzrlib/tests/blackbox/test_upgrade.py 2009-08-14 08:57:57 +0000
+++ bzrlib/tests/blackbox/test_upgrade.py 2009-08-24 03:35:14 +0000
@@ -184,7 +184,7 @@
184 self.assertEqual('', err)184 self.assertEqual('', err)
185185
186186
187class UpgradeRecommendedTests(TestCaseInTempDir):187class UpgradeRecommendedTests(TestCaseWithTransport):
188188
189 def test_recommend_upgrade_wt4(self):189 def test_recommend_upgrade_wt4(self):
190 # using a deprecated format gives a warning190 # using a deprecated format gives a warning
@@ -200,3 +200,9 @@
200 out, err = self.run_bzr('revno a')200 out, err = self.run_bzr('revno a')
201 if err.find('upgrade') > -1:201 if err.find('upgrade') > -1:
202 self.fail("message shouldn't suggest upgrade:\n%s" % err)202 self.fail("message shouldn't suggest upgrade:\n%s" % err)
203
204 def test_upgrade_shared_repo(self):
205 repo = self.make_repository('repo', format='2a', shared=True)
206 branch = self.make_branch_and_tree('repo/branch', format="pack-0.92")
207 self.get_transport('repo/branch/.bzr/repository').delete_tree('.')
208 out, err = self.run_bzr(['upgrade'], working_dir='repo/branch')
203209
=== modified file 'bzrlib/tests/per_interrepository/test_fetch.py'
--- bzrlib/tests/per_interrepository/test_fetch.py 2009-08-14 00:55:42 +0000
+++ bzrlib/tests/per_interrepository/test_fetch.py 2009-08-24 03:35:14 +0000
@@ -17,7 +17,6 @@
1717
18import sys18import sys
1919
20import bzrlib
21from bzrlib import (20from bzrlib import (
22 errors,21 errors,
23 inventory,22 inventory,
@@ -266,6 +265,91 @@
266 self.assertEqual(expected_texts, unstacked_repo.texts.keys())265 self.assertEqual(expected_texts, unstacked_repo.texts.keys())
267 self.assertCanStreamRevision(unstacked_repo, 'third')266 self.assertCanStreamRevision(unstacked_repo, 'third')
268267
268 def test_fetch_from_stacked_to_stacked_copies_parent_inventories(self):
269 """Fetch from a stacked branch copies inventories for parents of
270 revisions at the stacking boundary.
271
272 Specifically, fetch will copy the parent inventories from the
273 source for which the corresponding revisions are not present. This
274 will happen even when the source repository has no fallbacks configured
275 (as is the case during upgrade).
276 """
277 if not self.repository_format.supports_external_lookups:
278 raise TestNotApplicable("Need stacking support in the source.")
279 if not self.repository_format_to.supports_external_lookups:
280 raise TestNotApplicable("Need stacking support in the target.")
281 builder = self.make_branch_builder('branch')
282 builder.start_series()
283 builder.build_snapshot('base', None, [
284 ('add', ('', 'root-id', 'directory', '')),
285 ('add', ('file', 'file-id', 'file', 'content\n'))])
286 builder.build_snapshot('left', ['base'], [
287 ('modify', ('file-id', 'left content\n'))])
288 builder.build_snapshot('right', ['base'], [
289 ('modify', ('file-id', 'right content\n'))])
290 builder.build_snapshot('merge', ['left', 'right'], [
291 ('modify', ('file-id', 'left and right content\n'))])
292 builder.finish_series()
293 branch = builder.get_branch()
294 repo = self.make_repository('old-trunk')
295 # Make a pair of equivalent trunk repos in the from and to formats.
296 old_trunk = repo.bzrdir.create_branch()
297 old_trunk.repository.fetch(branch.repository, 'left')
298 old_trunk.repository.fetch(branch.repository, 'right')
299 repo = self.make_to_repository('new-trunk')
300 new_trunk = repo.bzrdir.create_branch()
301 new_trunk.repository.fetch(branch.repository, 'left')
302 new_trunk.repository.fetch(branch.repository, 'right')
303 # Make the source; a repo stacked on old_trunk contained just the data
304 # for 'merge'.
305 repo = self.make_repository('old-stacked')
306 old_stacked_branch = repo.bzrdir.create_branch()
307 old_stacked_branch.set_stacked_on_url(old_trunk.base)
308 old_stacked_branch.repository.fetch(branch.repository, 'merge')
309 # Make the target, a repo stacked on new_trunk.
310 repo = self.make_to_repository('new-stacked')
311 new_stacked_branch = repo.bzrdir.create_branch()
312 new_stacked_branch.set_stacked_on_url(new_trunk.base)
313 old_unstacked_repo = old_stacked_branch.bzrdir.open_repository()
314 new_unstacked_repo = new_stacked_branch.bzrdir.open_repository()
315 # Reopen the source and target repos without any fallbacks, and fetch
316 # 'merge'.
317 new_unstacked_repo.fetch(old_unstacked_repo, 'merge')
318 # Now check the results. new_unstacked_repo should contain all the
319 # data necessary to stream 'merge' (i.e. the parent inventories).
320 new_unstacked_repo.lock_read()
321 self.addCleanup(new_unstacked_repo.unlock)
322 self.assertFalse(new_unstacked_repo.has_revision('left'))
323 self.assertFalse(new_unstacked_repo.has_revision('right'))
324 self.assertEqual(
325 set([('left',), ('right',), ('merge',)]),
326 new_unstacked_repo.inventories.keys())
327 # And the basis inventories have been copied correctly
328 new_trunk.lock_read()
329 self.addCleanup(new_trunk.unlock)
330 left_tree, right_tree = new_trunk.repository.revision_trees(
331 ['left', 'right'])
332 new_stacked_branch.lock_read()
333 self.addCleanup(new_stacked_branch.unlock)
334 (stacked_left_tree,
335 stacked_right_tree) = new_stacked_branch.repository.revision_trees(
336 ['left', 'right'])
337 self.assertEqual(left_tree.inventory, stacked_left_tree.inventory)
338 self.assertEqual(right_tree.inventory, stacked_right_tree.inventory)
339 # Finally, it's not enough to see that the basis inventories are
340 # present. The texts introduced in merge (and only those) should be
341 # present, and also generating a stream should succeed without blowing
342 # up.
343 self.assertTrue(new_unstacked_repo.has_revision('merge'))
344 expected_texts = set([('file-id', 'merge')])
345 if new_stacked_branch.repository.texts.get_parent_map([('root-id',
346 'merge')]):
347 # If a (root-id,merge) text exists, it should be in the stacked
348 # repo.
349 expected_texts.add(('root-id', 'merge'))
350 self.assertEqual(expected_texts, new_unstacked_repo.texts.keys())
351 self.assertCanStreamRevision(new_unstacked_repo, 'merge')
352
269 def test_fetch_missing_basis_text(self):353 def test_fetch_missing_basis_text(self):
270 """If fetching a delta, we should die if a basis is not present."""354 """If fetching a delta, we should die if a basis is not present."""
271 tree = self.make_branch_and_tree('tree')355 tree = self.make_branch_and_tree('tree')