Merge lp:~cjwatson/launchpad/archive-file-history into lp:launchpad

Proposed by Colin Watson
Status: Rejected
Rejected by: Colin Watson
Proposed branch: lp:~cjwatson/launchpad/archive-file-history
Merge into: lp:launchpad
Prerequisite: lp:~cjwatson/launchpad/get-transaction-timestamp-per-store
Diff against target: 771 lines (+248/-178)
5 files modified
lib/lp/archivepublisher/publishing.py (+43/-22)
lib/lp/archivepublisher/tests/test_publisher.py (+94/-61)
lib/lp/soyuz/interfaces/archivefile.py (+12/-9)
lib/lp/soyuz/model/archivefile.py (+37/-47)
lib/lp/soyuz/tests/test_archivefile.py (+62/-39)
To merge this branch: bzr merge lp:~cjwatson/launchpad/archive-file-history
Reviewer Review Type Date Requested Status
Launchpad code reviewers Pending
Review via email: mp+343752@code.launchpad.net

This proposal supersedes a proposal from 2018-04-21.

Commit message

Turn ArchiveFile into a history table, adding date_created and date_superseded columns. Adjust the publisher to match.

Description of the change

The main complexity here is in the changed publisher logic, especially for reprieving (that is, the situation where file contents that were scheduled for deletion become live again, particularly common for empty files). We previously did this by simply clearing scheduled_deletion_date on the old ArchiveFile row, but that strategy no longer works when we're trying to maintain history: instead, we need to create new rows in such cases. As a result of the logic changes here, we no longer need the only_condemned=True option in ArchiveFileSet.getByArchive.

I think the publisher tests are now somewhat clearer, since they now explicitly test creation dates, making the chain of events more obvious.

ArchiveFile.superseded_at is arguably redundant with ArchiveFile.scheduled_deletion_date, but I think keeping both is a good idea for clarity, and in case we ever change the stay of execution in future.

We'll need to backfill the new columns. I'll probably make a separate branch with a garbo job for this: my plan is to set date_created to some arbitrary value (probably just the epoch, so that it's clear that it's arbitrary), and to set date_superseded to scheduled_deletion_date minus the stay of execution for rows that have a scheduled_deletion_date.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) wrote :

Unmerged revisions

18624. By Colin Watson

Turn ArchiveFile into a history table, adding date_created and date_superseded columns. Adjust the publisher to match.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2018-03-27 23:26:12 +0000
+++ lib/lp/archivepublisher/publishing.py 2018-04-21 11:24:08 +0000
@@ -1063,8 +1063,7 @@
1063 assert path.startswith("dists/")1063 assert path.startswith("dists/")
1064 return path[len("dists/"):]1064 return path[len("dists/"):]
10651065
1066 # Gather information on entries in the current Release file, and1066 # Gather information on entries in the current Release file.
1067 # make sure nothing there is condemned.
1068 current_files = {}1067 current_files = {}
1069 for current_entry in (1068 for current_entry in (
1070 release_data["SHA256"] + extra_data.get("SHA256", [])):1069 release_data["SHA256"] + extra_data.get("SHA256", [])):
@@ -1073,33 +1072,54 @@
1073 real_path = os.path.join(suite_dir, real_name)1072 real_path = os.path.join(suite_dir, real_name)
1074 current_files[path] = (1073 current_files[path] = (
1075 int(current_entry["size"]), current_entry["sha256"], real_path)1074 int(current_entry["size"]), current_entry["sha256"], real_path)
1075
1076 # Gather information on entries currently in the database. Ensure
1077 # that we know about all the relevant by-hash directory trees before
1078 # doing any removals so that we can prune them properly later, and
1079 # work out which condemned files should be reprieved due to the
1080 # paths in question having their previous content again.
1081 reprieved_files = defaultdict(list)
1076 uncondemned_files = set()1082 uncondemned_files = set()
1077 for db_file in archive_file_set.getByArchive(1083 for db_file in archive_file_set.getByArchive(
1078 self.archive, container=container, only_condemned=True,1084 self.archive, container=container, eager_load=True):
1079 eager_load=True):1085 by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
1080 stripped_path = strip_dists(db_file.path)1086 file_key = (db_file.path, db_file.library_file.content.sha256)
1081 if stripped_path in current_files:1087 if db_file.scheduled_deletion_date is None:
1082 current_sha256 = current_files[stripped_path][1]1088 uncondemned_files.add(file_key)
1083 if db_file.library_file.content.sha256 == current_sha256:1089 else:
1084 uncondemned_files.add(db_file)1090 stripped_path = strip_dists(db_file.path)
1085 if uncondemned_files:1091 if stripped_path in current_files:
1086 for container, path, sha256 in archive_file_set.unscheduleDeletion(1092 current_sha256 = current_files[stripped_path][1]
1087 uncondemned_files):1093 if db_file.library_file.content.sha256 == current_sha256:
1094 reprieved_files[file_key].append(db_file)
1095
1096 # We may already have uncondemned entries with the same path and
1097 # content as condemned entries that we were about to reprieve; if
1098 # so, there's no need to reprieve them.
1099 for file_key in uncondemned_files:
1100 reprieved_files.pop(file_key, None)
1101
1102 # Make sure nothing in the current Release file is condemned.
1103 if reprieved_files:
1104 reprieved_files_flat = set(
1105 chain.from_iterable(reprieved_files.values()))
1106 archive_file_set.unscheduleDeletion(reprieved_files_flat)
1107 for db_file in reprieved_files_flat:
1088 self.log.debug(1108 self.log.debug(
1089 "by-hash: Unscheduled %s for %s in %s for deletion" % (1109 "by-hash: Unscheduled %s for %s in %s for deletion" % (
1090 sha256, path, container))1110 db_file.library_file.content.sha256, db_file.path,
1111 db_file.container))
10911112
1092 # Remove any condemned files from the database whose stay of1113 # Remove any condemned files from the database whose stay of
1093 # execution has elapsed. We ensure that we know about all the1114 # execution has elapsed. We ensure that we know about all the
1094 # relevant by-hash directory trees before doing any removals so that1115 # relevant by-hash directory trees before doing any removals so that
1095 # we can prune them properly later.1116 # we can prune them properly later.
1096 for db_file in archive_file_set.getByArchive(
1097 self.archive, container=container):
1098 by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
1099 for container, path, sha256 in archive_file_set.reap(1117 for container, path, sha256 in archive_file_set.reap(
1100 self.archive, container=container):1118 self.archive, container=container):
1101 self.log.debug(1119 if (path, sha256) not in uncondemned_files:
1102 "by-hash: Deleted %s for %s in %s" % (sha256, path, container))1120 self.log.debug(
1121 "by-hash: Deleted %s for %s in %s" %
1122 (sha256, path, container))
11031123
1104 # Ensure that all files recorded in the database are in by-hash.1124 # Ensure that all files recorded in the database are in by-hash.
1105 db_files = archive_file_set.getByArchive(1125 db_files = archive_file_set.getByArchive(
@@ -1120,12 +1140,13 @@
1120 if db_file.library_file.content.sha256 != current_sha256:1140 if db_file.library_file.content.sha256 != current_sha256:
1121 condemned_files.add(db_file)1141 condemned_files.add(db_file)
1122 if condemned_files:1142 if condemned_files:
1123 for container, path, sha256 in archive_file_set.scheduleDeletion(1143 archive_file_set.scheduleDeletion(
1124 condemned_files,1144 condemned_files, timedelta(days=BY_HASH_STAY_OF_EXECUTION))
1125 timedelta(days=BY_HASH_STAY_OF_EXECUTION)):1145 for db_file in condemned_files:
1126 self.log.debug(1146 self.log.debug(
1127 "by-hash: Scheduled %s for %s in %s for deletion" % (1147 "by-hash: Scheduled %s for %s in %s for deletion" % (
1128 sha256, path, container))1148 db_file.library_file.content.sha256, db_file.path,
1149 db_file.container))
11291150
1130 # Ensure that all the current index files are in by-hash and have1151 # Ensure that all the current index files are in by-hash and have
1131 # corresponding database entries.1152 # corresponding database entries.
11321153
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2018-04-05 11:32:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2018-04-21 11:24:08 +0000
@@ -21,8 +21,6 @@
21from functools import partial21from functools import partial
22import gzip22import gzip
23import hashlib23import hashlib
24from itertools import product
25from operator import attrgetter
26import os24import os
27import shutil25import shutil
28import stat26import stat
@@ -51,7 +49,6 @@
51 LessThan,49 LessThan,
52 Matcher,50 Matcher,
53 MatchesDict,51 MatchesDict,
54 MatchesListwise,
55 MatchesSetwise,52 MatchesSetwise,
56 MatchesStructure,53 MatchesStructure,
57 Not,54 Not,
@@ -2581,12 +2578,12 @@
2581 publisher.D_writeReleaseFiles(False)2578 publisher.D_writeReleaseFiles(False)
25822579
2583 @classmethod2580 @classmethod
2584 def _makeScheduledDeletionDateMatcher(cls, condemned_at):2581 def _makeScheduledDeletionDateMatcher(cls, superseded_at):
2585 if condemned_at is None:2582 if superseded_at is None:
2586 return Is(None)2583 return Is(None)
2587 else:2584 else:
2588 return Equals(2585 return Equals(
2589 condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))2586 superseded_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
25902587
2591 def assertHasSuiteFiles(self, patterns, *properties):2588 def assertHasSuiteFiles(self, patterns, *properties):
2592 def is_interesting(path):2589 def is_interesting(path):
@@ -2600,11 +2597,13 @@
2600 self.ubuntutest.main_archive)2597 self.ubuntutest.main_archive)
2601 if is_interesting(archive_file.path)]2598 if is_interesting(archive_file.path)]
2602 matchers = []2599 matchers = []
2603 for path, condemned_at in properties:2600 for path, created_at, superseded_at in properties:
2604 matchers.append(MatchesStructure(2601 matchers.append(MatchesStructure(
2605 path=Equals('dists/breezy-autotest/%s' % path),2602 path=Equals('dists/breezy-autotest/%s' % path),
2603 date_created=Equals(created_at),
2604 date_superseded=Equals(superseded_at),
2606 scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(2605 scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(
2607 condemned_at)))2606 superseded_at)))
2608 self.assertThat(files, MatchesSetwise(*matchers))2607 self.assertThat(files, MatchesSetwise(*matchers))
26092608
2610 def test_disabled(self):2609 def test_disabled(self):
@@ -2754,7 +2753,8 @@
2754 flush_database_caches()2753 flush_database_caches()
2755 self.assertHasSuiteFiles(2754 self.assertHasSuiteFiles(
2756 ('Contents-*', 'Release'),2755 ('Contents-*', 'Release'),
2757 ('Contents-i386', None), ('Release', None))2756 ('Contents-i386', self.times[0], None),
2757 ('Release', self.times[0], None))
2758 releases = [get_release_contents()]2758 releases = [get_release_contents()]
2759 self.assertThat(2759 self.assertThat(
2760 suite_path('by-hash'),2760 suite_path('by-hash'),
@@ -2768,8 +2768,10 @@
2768 flush_database_caches()2768 flush_database_caches()
2769 self.assertHasSuiteFiles(2769 self.assertHasSuiteFiles(
2770 ('Contents-*', 'Release'),2770 ('Contents-*', 'Release'),
2771 ('Contents-i386', None), ('Contents-hppa', None),2771 ('Contents-i386', self.times[0], None),
2772 ('Release', self.times[1]), ('Release', None))2772 ('Contents-hppa', self.times[1], None),
2773 ('Release', self.times[0], self.times[1]),
2774 ('Release', self.times[1], None))
2773 releases.append(get_release_contents())2775 releases.append(get_release_contents())
2774 self.assertThat(2776 self.assertThat(
2775 suite_path('by-hash'),2777 suite_path('by-hash'),
@@ -2782,9 +2784,11 @@
2782 flush_database_caches()2784 flush_database_caches()
2783 self.assertHasSuiteFiles(2785 self.assertHasSuiteFiles(
2784 ('Contents-*', 'Release'),2786 ('Contents-*', 'Release'),
2785 ('Contents-i386', self.times[2]), ('Contents-hppa', None),2787 ('Contents-i386', self.times[0], self.times[2]),
2786 ('Release', self.times[1]), ('Release', self.times[2]),2788 ('Contents-hppa', self.times[1], None),
2787 ('Release', None))2789 ('Release', self.times[0], self.times[1]),
2790 ('Release', self.times[1], self.times[2]),
2791 ('Release', self.times[2], None))
2788 releases.append(get_release_contents())2792 releases.append(get_release_contents())
2789 self.assertThat(2793 self.assertThat(
2790 suite_path('by-hash'),2794 suite_path('by-hash'),
@@ -2796,9 +2800,12 @@
2796 flush_database_caches()2800 flush_database_caches()
2797 self.assertHasSuiteFiles(2801 self.assertHasSuiteFiles(
2798 ('Contents-*', 'Release'),2802 ('Contents-*', 'Release'),
2799 ('Contents-i386', self.times[2]), ('Contents-hppa', None),2803 ('Contents-i386', self.times[0], self.times[2]),
2800 ('Release', self.times[1]), ('Release', self.times[2]),2804 ('Contents-hppa', self.times[1], None),
2801 ('Release', self.times[3]), ('Release', None))2805 ('Release', self.times[0], self.times[1]),
2806 ('Release', self.times[1], self.times[2]),
2807 ('Release', self.times[2], self.times[3]),
2808 ('Release', self.times[3], None))
2802 releases.append(get_release_contents())2809 releases.append(get_release_contents())
2803 self.assertThat(2810 self.assertThat(
2804 suite_path('by-hash'),2811 suite_path('by-hash'),
@@ -2817,9 +2824,10 @@
2817 flush_database_caches()2824 flush_database_caches()
2818 self.assertHasSuiteFiles(2825 self.assertHasSuiteFiles(
2819 ('Contents-*', 'Release'),2826 ('Contents-*', 'Release'),
2820 ('Contents-hppa', self.times[4]),2827 ('Contents-hppa', self.times[1], self.times[4]),
2821 ('Release', self.times[3]), ('Release', self.times[4]),2828 ('Release', self.times[2], self.times[3]),
2822 ('Release', None))2829 ('Release', self.times[3], self.times[4]),
2830 ('Release', self.times[4], None))
2823 releases.append(get_release_contents())2831 releases.append(get_release_contents())
2824 self.assertThat(2832 self.assertThat(
2825 suite_path('by-hash'),2833 suite_path('by-hash'),
@@ -2836,7 +2844,8 @@
2836 flush_database_caches()2844 flush_database_caches()
2837 self.assertHasSuiteFiles(2845 self.assertHasSuiteFiles(
2838 ('Contents-*', 'Release'),2846 ('Contents-*', 'Release'),
2839 ('Release', self.times[5]), ('Release', None))2847 ('Release', self.times[4], self.times[5]),
2848 ('Release', self.times[5], None))
2840 releases.append(get_release_contents())2849 releases.append(get_release_contents())
2841 self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))2850 self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))
28422851
@@ -2863,9 +2872,13 @@
2863 for name in ('Release', 'Sources'):2872 for name in ('Release', 'Sources'):
2864 with open(suite_path('main', 'source', name), 'rb') as f:2873 with open(suite_path('main', 'source', name), 'rb') as f:
2865 main_contents.add(f.read())2874 main_contents.add(f.read())
2875 self.assertHasSuiteFiles(
2876 ('main/source/Sources',),
2877 ('main/source/Sources', self.times[0], None))
28662878
2867 # Add a source package so that Sources is non-empty.2879 # Add a source package so that Sources is non-empty.
2868 pub_source = self.getPubSource(filecontent='Source: foo\n')2880 pub_source = self.getPubSource(filecontent='Source: foo\n')
2881 self.advanceTime(delta=timedelta(hours=1))
2869 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)2882 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2870 transaction.commit()2883 transaction.commit()
2871 with open(suite_path('main', 'source', 'Sources'), 'rb') as f:2884 with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
@@ -2874,28 +2887,42 @@
2874 self.assertThat(2887 self.assertThat(
2875 suite_path('main', 'source', 'by-hash'),2888 suite_path('main', 'source', 'by-hash'),
2876 ByHashHasContents(main_contents))2889 ByHashHasContents(main_contents))
28772890 self.assertHasSuiteFiles(
2878 # Make the empty Sources file ready to prune.2891 ('main/source/Sources',),
2879 self.advanceTime(2892 ('main/source/Sources', self.times[0], self.times[1]),
2880 delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1))2893 ('main/source/Sources', self.times[1], None))
28812894
2882 # Delete the source package so that Sources is empty again. The2895 # Delete the source package so that Sources is empty again. The
2883 # empty file is reprieved and the non-empty one is condemned.2896 # empty file is reprieved (by creating a new ArchiveFile referring
2897 # to it) and the non-empty one is condemned.
2884 pub_source.requestDeletion(self.ubuntutest.owner)2898 pub_source.requestDeletion(self.ubuntutest.owner)
2885 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)2899 self.advanceTime(delta=timedelta(hours=1))
2886 transaction.commit()2900 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2887 self.assertThat(2901 transaction.commit()
2888 suite_path('main', 'source', 'by-hash'),2902 self.assertThat(
2889 ByHashHasContents(main_contents))2903 suite_path('main', 'source', 'by-hash'),
2890 archive_files = getUtility(IArchiveFileSet).getByArchive(2904 ByHashHasContents(main_contents))
2891 self.ubuntutest.main_archive,2905 self.assertHasSuiteFiles(
2892 path='dists/breezy-autotest/main/source/Sources')2906 ('main/source/Sources',),
2893 self.assertThat(2907 ('main/source/Sources', self.times[0], self.times[1]),
2894 sorted(archive_files, key=attrgetter('id')),2908 ('main/source/Sources', self.times[1], self.times[2]),
2895 MatchesListwise([2909 ('main/source/Sources', self.times[2], None))
2896 MatchesStructure(scheduled_deletion_date=Is(None)),2910
2897 MatchesStructure(scheduled_deletion_date=Not(Is(None))),2911 # Make the first empty Sources file ready to prune. This doesn't
2898 ]))2912 # change the set of files on disk, because there's still a newer
2913 # reference to the empty file.
2914 self.advanceTime(
2915 absolute=self.times[1] + timedelta(
2916 days=BY_HASH_STAY_OF_EXECUTION, minutes=30))
2917 self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
2918 transaction.commit()
2919 self.assertThat(
2920 suite_path('main', 'source', 'by-hash'),
2921 ByHashHasContents(main_contents))
2922 self.assertHasSuiteFiles(
2923 ('main/source/Sources',),
2924 ('main/source/Sources', self.times[1], self.times[2]),
2925 ('main/source/Sources', self.times[2], None))
28992926
2900 def setUpPruneableSuite(self):2927 def setUpPruneableSuite(self):
2901 self.setUpMockTime()2928 self.setUpMockTime()
@@ -2924,14 +2951,18 @@
2924 # We have two condemned sets of index files and one uncondemned set.2951 # We have two condemned sets of index files and one uncondemned set.
2925 # main/source/Release contains a small enough amount of information2952 # main/source/Release contains a small enough amount of information
2926 # that it doesn't change.2953 # that it doesn't change.
2927 expected_suite_files = (
2928 list(product(
2929 ('main/source/Sources.gz', 'main/source/Sources.bz2',
2930 'Release'),
2931 (self.times[1], self.times[2], None))) +
2932 [('main/source/Release', None)])
2933 self.assertHasSuiteFiles(2954 self.assertHasSuiteFiles(
2934 ('main/source/*', 'Release'), *expected_suite_files)2955 ('main/source/*', 'Release'),
2956 ('main/source/Sources.gz', self.times[0], self.times[1]),
2957 ('main/source/Sources.gz', self.times[1], self.times[2]),
2958 ('main/source/Sources.gz', self.times[2], None),
2959 ('main/source/Sources.bz2', self.times[0], self.times[1]),
2960 ('main/source/Sources.bz2', self.times[1], self.times[2]),
2961 ('main/source/Sources.bz2', self.times[2], None),
2962 ('main/source/Release', self.times[0], None),
2963 ('Release', self.times[0], self.times[1]),
2964 ('Release', self.times[1], self.times[2]),
2965 ('Release', self.times[2], None))
2935 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))2966 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2936 self.assertThat(2967 self.assertThat(
2937 suite_path('main', 'source', 'by-hash'),2968 suite_path('main', 'source', 'by-hash'),
@@ -2964,14 +2995,15 @@
2964 self.assertEqual(set(), publisher.dirty_pockets)2995 self.assertEqual(set(), publisher.dirty_pockets)
2965 # The condemned index files are removed, and no new Release file is2996 # The condemned index files are removed, and no new Release file is
2966 # generated.2997 # generated.
2967 expected_suite_files = (
2968 list(product(
2969 ('main/source/Sources.gz', 'main/source/Sources.bz2'),
2970 (self.times[2], None))) +
2971 [('main/source/Release', None),
2972 ('Release', self.times[2]), ('Release', None)])
2973 self.assertHasSuiteFiles(2998 self.assertHasSuiteFiles(
2974 ('main/source/*', 'Release'), *expected_suite_files)2999 ('main/source/*', 'Release'),
3000 ('main/source/Sources.gz', self.times[1], self.times[2]),
3001 ('main/source/Sources.gz', self.times[2], None),
3002 ('main/source/Sources.bz2', self.times[1], self.times[2]),
3003 ('main/source/Sources.bz2', self.times[2], None),
3004 ('main/source/Release', self.times[0], None),
3005 ('Release', self.times[1], self.times[2]),
3006 ('Release', self.times[2], None))
2975 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))3007 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
2976 self.assertThat(3008 self.assertThat(
2977 suite_path('main', 'source', 'by-hash'),3009 suite_path('main', 'source', 'by-hash'),
@@ -2998,14 +3030,15 @@
2998 self.assertEqual(release_mtime, os.stat(release_path).st_mtime)3030 self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
2999 # The condemned index files are removed, and no new Release file is3031 # The condemned index files are removed, and no new Release file is
3000 # generated.3032 # generated.
3001 expected_suite_files = (
3002 list(product(
3003 ('main/source/Sources.gz', 'main/source/Sources.bz2'),
3004 (self.times[2], None))) +
3005 [('main/source/Release', None),
3006 ('Release', self.times[2]), ('Release', None)])
3007 self.assertHasSuiteFiles(3033 self.assertHasSuiteFiles(
3008 ('main/source/*', 'Release'), *expected_suite_files)3034 ('main/source/*', 'Release'),
3035 ('main/source/Sources.gz', self.times[1], self.times[2]),
3036 ('main/source/Sources.gz', self.times[2], None),
3037 ('main/source/Sources.bz2', self.times[1], self.times[2]),
3038 ('main/source/Sources.bz2', self.times[2], None),
3039 ('main/source/Release', self.times[0], None),
3040 ('Release', self.times[1], self.times[2]),
3041 ('Release', self.times[2], None))
3009 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))3042 self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
3010 self.assertThat(3043 self.assertThat(
3011 suite_path('main', 'source', 'by-hash'),3044 suite_path('main', 'source', 'by-hash'),
30123045
=== modified file 'lib/lp/soyuz/interfaces/archivefile.py'
--- lib/lp/soyuz/interfaces/archivefile.py 2016-04-04 10:06:33 +0000
+++ lib/lp/soyuz/interfaces/archivefile.py 2018-04-21 11:24:08 +0000
@@ -1,4 +1,4 @@
1# Copyright 2016 Canonical Ltd. This software is licensed under the1# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Interface for a file in an archive."""4"""Interface for a file in an archive."""
@@ -49,6 +49,16 @@
49 title=_("The index file in the librarian."),49 title=_("The index file in the librarian."),
50 schema=ILibraryFileAlias, required=True, readonly=True)50 schema=ILibraryFileAlias, required=True, readonly=True)
5151
52 date_created = Datetime(
53 title=_("The date when this file was created."),
54 # XXX cjwatson 2018-04-17: Should be required=True, but we need to
55 # backfill existing rows first.
56 required=False, readonly=False)
57
58 date_superseded = Datetime(
59 title=_("The date when this file was scheduled for future deletion."),
60 required=False, readonly=False)
61
52 scheduled_deletion_date = Datetime(62 scheduled_deletion_date = Datetime(
53 title=_("The date when this file should stop being published."),63 title=_("The date when this file should stop being published."),
54 required=False, readonly=False)64 required=False, readonly=False)
@@ -79,15 +89,12 @@
79 :param content_type: The MIME type of the file.89 :param content_type: The MIME type of the file.
80 """90 """
8191
82 def getByArchive(archive, container=None, path=None, only_condemned=False,92 def getByArchive(archive, container=None, path=None, eager_load=False):
83 eager_load=False):
84 """Get files in an archive.93 """Get files in an archive.
8594
86 :param archive: Return files in this `IArchive`.95 :param archive: Return files in this `IArchive`.
87 :param container: Return only files with this container.96 :param container: Return only files with this container.
88 :param path: Return only files with this path.97 :param path: Return only files with this path.
89 :param only_condemned: If True, return only files with a
90 scheduled_deletion_date set.
91 :param eager_load: If True, preload related `LibraryFileAlias` and98 :param eager_load: If True, preload related `LibraryFileAlias` and
92 `LibraryFileContent` rows.99 `LibraryFileContent` rows.
93 :return: An iterable of matched files.100 :return: An iterable of matched files.
@@ -99,8 +106,6 @@
99 :param archive_files: The `IArchiveFile`s to schedule for deletion.106 :param archive_files: The `IArchiveFile`s to schedule for deletion.
100 :param stay_of_execution: A `timedelta`; schedule files for deletion107 :param stay_of_execution: A `timedelta`; schedule files for deletion
101 this amount of time in the future.108 this amount of time in the future.
102 :return: An iterable of (container, path, sha256) for files that
103 were scheduled for deletion.
104 """109 """
105110
106 def unscheduleDeletion(archive_files):111 def unscheduleDeletion(archive_files):
@@ -110,8 +115,6 @@
110 identical to a version that was previously condemned.115 identical to a version that was previously condemned.
111116
112 :param archive_files: The `IArchiveFile`s to unschedule for deletion.117 :param archive_files: The `IArchiveFile`s to unschedule for deletion.
113 :return: An iterable of (container, path, sha256) for files that
114 were unscheduled for deletion.
115 """118 """
116119
117 def getContainersToReap(archive, container_prefix=None):120 def getContainersToReap(archive, container_prefix=None):
118121
=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py 2018-01-26 10:11:33 +0000
+++ lib/lp/soyuz/model/archivefile.py 2018-04-21 11:24:08 +0000
@@ -14,7 +14,6 @@
14import os.path14import os.path
1515
16import pytz16import pytz
17from storm.databases.postgres import Returning
18from storm.locals import (17from storm.locals import (
19 And,18 And,
20 DateTime,19 DateTime,
@@ -26,7 +25,10 @@
26from zope.component import getUtility25from zope.component import getUtility
27from zope.interface import implementer26from zope.interface import implementer
2827
29from lp.services.database.bulk import load_related28from lp.services.database.bulk import (
29 create,
30 load_related,
31 )
30from lp.services.database.constants import UTC_NOW32from lp.services.database.constants import UTC_NOW
31from lp.services.database.decoratedresultset import DecoratedResultSet33from lp.services.database.decoratedresultset import DecoratedResultSet
32from lp.services.database.interfaces import (34from lp.services.database.interfaces import (
@@ -34,7 +36,6 @@
34 IStore,36 IStore,
35 )37 )
36from lp.services.database.sqlbase import convert_storm_clause_to_string38from lp.services.database.sqlbase import convert_storm_clause_to_string
37from lp.services.database.stormexpr import BulkUpdate
38from lp.services.librarian.interfaces import ILibraryFileAliasSet39from lp.services.librarian.interfaces import ILibraryFileAliasSet
39from lp.services.librarian.model import (40from lp.services.librarian.model import (
40 LibraryFileAlias,41 LibraryFileAlias,
@@ -46,6 +47,15 @@
46 )47 )
4748
4849
50def _now():
51 """Get the current transaction timestamp.
52
53 Tests can override this with a Storm expression or a `datetime` to
54 simulate time changes.
55 """
56 return UTC_NOW
57
58
49@implementer(IArchiveFile)59@implementer(IArchiveFile)
50class ArchiveFile(Storm):60class ArchiveFile(Storm):
51 """See `IArchiveFile`."""61 """See `IArchiveFile`."""
@@ -64,6 +74,14 @@
64 library_file_id = Int(name='library_file', allow_none=False)74 library_file_id = Int(name='library_file', allow_none=False)
65 library_file = Reference(library_file_id, 'LibraryFileAlias.id')75 library_file = Reference(library_file_id, 'LibraryFileAlias.id')
6676
77 date_created = DateTime(
78 # XXX cjwatson 2018-04-17: Should be allow_none=False, but we need
79 # to backfill existing rows first.
80 name='date_created', tzinfo=pytz.UTC, allow_none=True)
81
82 date_superseded = DateTime(
83 name='date_superseded', tzinfo=pytz.UTC, allow_none=True)
84
67 scheduled_deletion_date = DateTime(85 scheduled_deletion_date = DateTime(
68 name='scheduled_deletion_date', tzinfo=pytz.UTC, allow_none=True)86 name='scheduled_deletion_date', tzinfo=pytz.UTC, allow_none=True)
6987
@@ -74,18 +92,11 @@
74 self.container = container92 self.container = container
75 self.path = path93 self.path = path
76 self.library_file = library_file94 self.library_file = library_file
95 self.date_created = _now()
96 self.date_superseded = None
77 self.scheduled_deletion_date = None97 self.scheduled_deletion_date = None
7898
7999
80def _now():
81 """Get the current transaction timestamp.
82
83 Tests can override this with a Storm expression or a `datetime` to
84 simulate time changes.
85 """
86 return UTC_NOW
87
88
89@implementer(IArchiveFileSet)100@implementer(IArchiveFileSet)
90class ArchiveFileSet:101class ArchiveFileSet:
91 """See `IArchiveFileSet`."""102 """See `IArchiveFileSet`."""
@@ -106,8 +117,7 @@
106 return cls.new(archive, container, path, library_file)117 return cls.new(archive, container, path, library_file)
107118
108 @staticmethod119 @staticmethod
109 def getByArchive(archive, container=None, path=None, only_condemned=False,120 def getByArchive(archive, container=None, path=None, eager_load=False):
110 eager_load=False):
111 """See `IArchiveFileSet`."""121 """See `IArchiveFileSet`."""
112 clauses = [ArchiveFile.archive == archive]122 clauses = [ArchiveFile.archive == archive]
113 # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to123 # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to
@@ -116,8 +126,6 @@
116 clauses.append(ArchiveFile.container == container)126 clauses.append(ArchiveFile.container == container)
117 if path is not None:127 if path is not None:
118 clauses.append(ArchiveFile.path == path)128 clauses.append(ArchiveFile.path == path)
119 if only_condemned:
120 clauses.append(ArchiveFile.scheduled_deletion_date != None)
121 archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)129 archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)
122130
123 def eager_load(rows):131 def eager_load(rows):
@@ -132,41 +140,23 @@
132 @staticmethod140 @staticmethod
133 def scheduleDeletion(archive_files, stay_of_execution):141 def scheduleDeletion(archive_files, stay_of_execution):
134 """See `IArchiveFileSet`."""142 """See `IArchiveFileSet`."""
135 clauses = [143 rows = IMasterStore(ArchiveFile).find(
136 ArchiveFile.id.is_in(144 ArchiveFile, ArchiveFile.id.is_in(
137 set(archive_file.id for archive_file in archive_files)),145 set(archive_file.id for archive_file in archive_files)))
138 ArchiveFile.library_file == LibraryFileAlias.id,146 rows.set(
139 LibraryFileAlias.content == LibraryFileContent.id,147 date_superseded=_now(),
140 ]148 scheduled_deletion_date=_now() + stay_of_execution)
141 new_date = _now() + stay_of_execution
142 return_columns = [
143 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
144 return list(IMasterStore(ArchiveFile).execute(Returning(
145 BulkUpdate(
146 {ArchiveFile.scheduled_deletion_date: new_date},
147 table=ArchiveFile,
148 values=[LibraryFileAlias, LibraryFileContent],
149 where=And(*clauses)),
150 columns=return_columns)))
151149
152 @staticmethod150 @staticmethod
153 def unscheduleDeletion(archive_files):151 def unscheduleDeletion(archive_files):
154 """See `IArchiveFileSet`."""152 """See `IArchiveFileSet`."""
155 clauses = [153 create(
156 ArchiveFile.id.is_in(154 (ArchiveFile.archive, ArchiveFile.container, ArchiveFile.path,
157 set(archive_file.id for archive_file in archive_files)),155 ArchiveFile.library_file, ArchiveFile.date_created,
158 ArchiveFile.library_file == LibraryFileAlias.id,156 ArchiveFile.date_superseded, ArchiveFile.scheduled_deletion_date),
159 LibraryFileAlias.content == LibraryFileContent.id,157 [(archive_file.archive, archive_file.container, archive_file.path,
160 ]158 archive_file.library_file, _now(), None, None)
161 return_columns = [159 for archive_file in archive_files])
162 ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
163 return list(IMasterStore(ArchiveFile).execute(Returning(
164 BulkUpdate(
165 {ArchiveFile.scheduled_deletion_date: None},
166 table=ArchiveFile,
167 values=[LibraryFileAlias, LibraryFileContent],
168 where=And(*clauses)),
169 columns=return_columns)))
170160
171 @staticmethod161 @staticmethod
172 def getContainersToReap(archive, container_prefix=None):162 def getContainersToReap(archive, container_prefix=None):
173163
=== modified file 'lib/lp/soyuz/tests/test_archivefile.py'
--- lib/lp/soyuz/tests/test_archivefile.py 2018-04-21 11:24:07 +0000
+++ lib/lp/soyuz/tests/test_archivefile.py 2018-04-21 11:24:08 +0000
@@ -1,4 +1,4 @@
1# Copyright 2016 Canonical Ltd. This software is licensed under the1# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""ArchiveFile tests."""4"""ArchiveFile tests."""
@@ -11,6 +11,13 @@
11import os11import os
1212
13from storm.store import Store13from storm.store import Store
14from testtools.matchers import (
15 AfterPreprocessing,
16 Equals,
17 Is,
18 MatchesSetwise,
19 MatchesStructure,
20 )
14import transaction21import transaction
15from zope.component import getUtility22from zope.component import getUtility
16from zope.security.proxy import removeSecurityProxy23from zope.security.proxy import removeSecurityProxy
@@ -25,6 +32,14 @@
25from lp.testing.layers import LaunchpadZopelessLayer32from lp.testing.layers import LaunchpadZopelessLayer
2633
2734
35def read_library_file(library_file):
36 library_file.open()
37 try:
38 return library_file.read()
39 finally:
40 library_file.close()
41
42
28class TestArchiveFile(TestCaseWithFactory):43class TestArchiveFile(TestCaseWithFactory):
2944
30 layer = LaunchpadZopelessLayer45 layer = LaunchpadZopelessLayer
@@ -34,11 +49,15 @@
34 library_file = self.factory.makeLibraryFileAlias()49 library_file = self.factory.makeLibraryFileAlias()
35 archive_file = getUtility(IArchiveFileSet).new(50 archive_file = getUtility(IArchiveFileSet).new(
36 archive, "foo", "dists/foo", library_file)51 archive, "foo", "dists/foo", library_file)
37 self.assertEqual(archive, archive_file.archive)52 self.assertThat(archive_file, MatchesStructure(
38 self.assertEqual("foo", archive_file.container)53 archive=Equals(archive),
39 self.assertEqual("dists/foo", archive_file.path)54 container=Equals("foo"),
40 self.assertEqual(library_file, archive_file.library_file)55 path=Equals("dists/foo"),
41 self.assertIsNone(archive_file.scheduled_deletion_date)56 library_file=Equals(library_file),
57 date_created=Equals(
58 get_transaction_timestamp(Store.of(archive_file))),
59 date_superseded=Is(None),
60 scheduled_deletion_date=Is(None)))
4261
43 def test_newFromFile(self):62 def test_newFromFile(self):
44 root = self.makeTemporaryDirectory()63 root = self.makeTemporaryDirectory()
@@ -48,24 +67,24 @@
48 with open(os.path.join(root, "dists/foo"), "rb") as f:67 with open(os.path.join(root, "dists/foo"), "rb") as f:
49 archive_file = getUtility(IArchiveFileSet).newFromFile(68 archive_file = getUtility(IArchiveFileSet).newFromFile(
50 archive, "foo", "dists/foo", f, 4, "text/plain")69 archive, "foo", "dists/foo", f, 4, "text/plain")
70 now = get_transaction_timestamp(Store.of(archive_file))
51 transaction.commit()71 transaction.commit()
52 self.assertEqual(archive, archive_file.archive)72 self.assertThat(archive_file, MatchesStructure(
53 self.assertEqual("foo", archive_file.container)73 archive=Equals(archive),
54 self.assertEqual("dists/foo", archive_file.path)74 container=Equals("foo"),
55 archive_file.library_file.open()75 path=Equals("dists/foo"),
56 try:76 library_file=AfterPreprocessing(
57 self.assertEqual("abc\n", archive_file.library_file.read())77 read_library_file, Equals("abc\n")),
58 finally:78 date_created=Equals(now),
59 archive_file.library_file.close()79 date_superseded=Is(None),
60 self.assertIsNone(archive_file.scheduled_deletion_date)80 scheduled_deletion_date=Is(None)))
6181
62 def test_getByArchive(self):82 def test_getByArchive(self):
63 archives = [self.factory.makeArchive(), self.factory.makeArchive()]83 archives = [self.factory.makeArchive(), self.factory.makeArchive()]
64 archive_files = []84 archive_files = []
65 now = get_transaction_timestamp(Store.of(archives[0]))
66 for archive in archives:85 for archive in archives:
67 archive_files.append(self.factory.makeArchiveFile(86 archive_files.append(self.factory.makeArchiveFile(
68 archive=archive, scheduled_deletion_date=now))87 archive=archive))
69 archive_files.append(self.factory.makeArchiveFile(88 archive_files.append(self.factory.makeArchiveFile(
70 archive=archive, container="foo"))89 archive=archive, container="foo"))
71 archive_file_set = getUtility(IArchiveFileSet)90 archive_file_set = getUtility(IArchiveFileSet)
@@ -83,9 +102,6 @@
83 self.assertContentEqual(102 self.assertContentEqual(
84 [], archive_file_set.getByArchive(archives[0], path="other"))103 [], archive_file_set.getByArchive(archives[0], path="other"))
85 self.assertContentEqual(104 self.assertContentEqual(
86 [archive_files[0]],
87 archive_file_set.getByArchive(archives[0], only_condemned=True))
88 self.assertContentEqual(
89 archive_files[2:], archive_file_set.getByArchive(archives[1]))105 archive_files[2:], archive_file_set.getByArchive(archives[1]))
90 self.assertContentEqual(106 self.assertContentEqual(
91 [archive_files[3]],107 [archive_files[3]],
@@ -98,19 +114,11 @@
98 archives[1], path=archive_files[3].path))114 archives[1], path=archive_files[3].path))
99 self.assertContentEqual(115 self.assertContentEqual(
100 [], archive_file_set.getByArchive(archives[1], path="other"))116 [], archive_file_set.getByArchive(archives[1], path="other"))
101 self.assertContentEqual(
102 [archive_files[2]],
103 archive_file_set.getByArchive(archives[1], only_condemned=True))
104117
105 def test_scheduleDeletion(self):118 def test_scheduleDeletion(self):
106 archive_files = [self.factory.makeArchiveFile() for _ in range(3)]119 archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
107 expected_rows = [120 getUtility(IArchiveFileSet).scheduleDeletion(
108 (archive_file.container, archive_file.path,
109 archive_file.library_file.content.sha256)
110 for archive_file in archive_files[:2]]
111 rows = getUtility(IArchiveFileSet).scheduleDeletion(
112 archive_files[:2], timedelta(days=1))121 archive_files[:2], timedelta(days=1))
113 self.assertContentEqual(expected_rows, rows)
114 flush_database_caches()122 flush_database_caches()
115 tomorrow = (123 tomorrow = (
116 get_transaction_timestamp(Store.of(archive_files[0])) +124 get_transaction_timestamp(Store.of(archive_files[0])) +
@@ -124,17 +132,32 @@
124 now = get_transaction_timestamp(Store.of(archive_files[0]))132 now = get_transaction_timestamp(Store.of(archive_files[0]))
125 for archive_file in archive_files:133 for archive_file in archive_files:
126 removeSecurityProxy(archive_file).scheduled_deletion_date = now134 removeSecurityProxy(archive_file).scheduled_deletion_date = now
127 expected_rows = [135 archive_file_set = getUtility(IArchiveFileSet)
128 (archive_file.container, archive_file.path,136 archive_file_set.unscheduleDeletion(archive_files[:2])
129 archive_file.library_file.content.sha256)
130 for archive_file in archive_files[:2]]
131 rows = getUtility(IArchiveFileSet).unscheduleDeletion(
132 archive_files[:2])
133 self.assertContentEqual(expected_rows, rows)
134 flush_database_caches()137 flush_database_caches()
135 self.assertIsNone(archive_files[0].scheduled_deletion_date)138 self.assertThat(
136 self.assertIsNone(archive_files[1].scheduled_deletion_date)139 archive_file_set.getByArchive(
137 self.assertEqual(now, archive_files[2].scheduled_deletion_date)140 archive_files[0].archive,
141 container=archive_files[0].container,
142 path=archive_files[0].path),
143 MatchesSetwise(
144 MatchesStructure(scheduled_deletion_date=Equals(now)),
145 MatchesStructure(scheduled_deletion_date=Is(None))))
146 self.assertThat(
147 archive_file_set.getByArchive(
148 archive_files[1].archive,
149 container=archive_files[1].container,
150 path=archive_files[1].path),
151 MatchesSetwise(
152 MatchesStructure(scheduled_deletion_date=Equals(now)),
153 MatchesStructure(scheduled_deletion_date=Is(None))))
154 self.assertThat(
155 archive_file_set.getByArchive(
156 archive_files[2].archive,
157 container=archive_files[2].container,
158 path=archive_files[2].path),
159 MatchesSetwise(
160 MatchesStructure(scheduled_deletion_date=Equals(now))))
138161
139 def test_getContainersToReap(self):162 def test_getContainersToReap(self):
140 archive = self.factory.makeArchive()163 archive = self.factory.makeArchive()