Merge lp:~stub/launchpad/librarian-gc into lp:launchpad

Proposed by Stuart Bishop
Status: Merged
Approved by: Stuart Bishop
Approved revision: not available
Merged at revision: not available
Proposed branch: lp:~stub/launchpad/librarian-gc
Merge into: lp:launchpad
Diff against target: 208 lines (+126/-16)
3 files modified
cronscripts/librarian-gc.py (+8/-0)
lib/canonical/librarian/ftests/test_gc.py (+69/-16)
lib/canonical/librarian/librariangc.py (+49/-0)
To merge this branch: bzr merge lp:~stub/launchpad/librarian-gc
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle Approve
Canonical Launchpad Engineering release-critical Pending
Review via email: mp+16486@code.launchpad.net

Commit message

Librarian garbage collector expires expired aliases.

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

The Librarian garbage collector should unlink LibraryFileAliases that have expired from their content, allowing the content to be removed from disk when it is no longer referenced by anything else.

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

As said on IRC, a comment about the timescale of "recent" in this context in the tests would be great.

It would also be nice to not have SQL in string constants and key off a config value not a literal '1 week' -- but probably not in this branch.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'cronscripts/librarian-gc.py'
--- cronscripts/librarian-gc.py 2009-11-25 08:53:00 +0000
+++ cronscripts/librarian-gc.py 2009-12-23 04:06:15 +0000
@@ -51,6 +51,12 @@
51 help="Skip removing files on disk with no database references"51 help="Skip removing files on disk with no database references"
52 " or flagged for deletion."52 " or flagged for deletion."
53 )53 )
54 self.parser.add_option(
55 '', "--skip-expiry", action="store_true", default=False,
56 dest="skip_expiry",
57 help="Skip expiring aliases with an expiry date in the past."
58 )
59
5460
55 def main(self):61 def main(self):
56 librariangc.log = self.logger62 librariangc.log = self.logger
@@ -66,6 +72,8 @@
6672
67 # Note that each of these next steps will issue commit commands73 # Note that each of these next steps will issue commit commands
68 # as appropriate to make this script transaction friendly74 # as appropriate to make this script transaction friendly
75 if not self.options.skip_expiry:
76 librariangc.expire_aliases(conn)
69 if not self.options.skip_content:77 if not self.options.skip_content:
70 librariangc.delete_unreferenced_content(conn) # first sweep78 librariangc.delete_unreferenced_content(conn) # first sweep
71 if not self.options.skip_blobs:79 if not self.options.skip_blobs:
7280
=== modified file 'lib/canonical/librarian/ftests/test_gc.py'
--- lib/canonical/librarian/ftests/test_gc.py 2009-11-25 08:53:00 +0000
+++ lib/canonical/librarian/ftests/test_gc.py 2009-12-23 04:06:15 +0000
@@ -55,6 +55,22 @@
55 self.client = LibrarianClient()55 self.client = LibrarianClient()
56 librariangc.log = MockLogger()56 librariangc.log = MockLogger()
5757
58 # A value we use in a number of tests. This represents the
59 # stay of execution hard coded into the garbage collector.
60 # We don't destroy any data unless it has been waiting to be
61 # destroyed for longer than this period. We pick a value
62 # that is close enough to the stay of execution so that
63 # forgetting timezone information will break things, but
64 # far enough so that how long it takes the test to run
65 # is not an issue. 'stay_of_excution - 1 hour' fits these
66 # criteria.
67 self.recent_past = (
68 datetime.utcnow().replace(tzinfo=utc)
69 - timedelta(days=6, hours=23))
70 # A time beyond the stay of execution.
71 self.ancient_past = (
72 datetime.utcnow().replace(tzinfo=utc) - timedelta(days=30))
73
58 self.f1_id, self.f2_id = self._makeDupes()74 self.f1_id, self.f2_id = self._makeDupes()
5975
60 self.layer.switchDbUser(config.librarian_gc.dbuser)76 self.layer.switchDbUser(config.librarian_gc.dbuser)
@@ -66,12 +82,6 @@
66 path = librariangc.get_file_path(self.f1_id)82 path = librariangc.get_file_path(self.f1_id)
67 self.failUnless(os.path.exists(path), "Librarian uploads failed")83 self.failUnless(os.path.exists(path), "Librarian uploads failed")
6884
69 # A value we use in a number of tests
70 self.recent_past = (
71 datetime.utcnow().replace(tzinfo=utc)
72 - timedelta(days=6, hours=23)
73 )
74
75 # Make sure that every file the database knows about exists on disk.85 # Make sure that every file the database knows about exists on disk.
76 # We manually remove them for tests that need to cope with missing86 # We manually remove them for tests that need to cope with missing
77 # library items.87 # library items.
@@ -124,14 +134,12 @@
124134
125 # Set the last accessed time into the past so they will be garbage135 # Set the last accessed time into the past so they will be garbage
126 # collected136 # collected
127 past = datetime.utcnow() - timedelta(days=30)137 f1.last_accessed = self.ancient_past
128 past = past.replace(tzinfo=utc)138 f2.last_accessed = self.ancient_past
129 f1.last_accessed = past139 f1.date_created = self.ancient_past
130 f2.last_accessed = past140 f2.date_created = self.ancient_past
131 f1.date_created = past141 f1.content.datecreated = self.ancient_past
132 f2.date_created = past142 f2.content.datecreated = self.ancient_past
133 f1.content.datecreated = past
134 f2.content.datecreated = past
135143
136 del f1, f2144 del f1, f2
137145
@@ -219,8 +227,7 @@
219 # Flag one of our LibraryFileAliases with an expiry date in the past227 # Flag one of our LibraryFileAliases with an expiry date in the past
220 self.ztm.begin()228 self.ztm.begin()
221 f1 = LibraryFileAlias.get(self.f1_id)229 f1 = LibraryFileAlias.get(self.f1_id)
222 past = datetime.utcnow().replace(tzinfo=utc) - timedelta(days=30)230 f1.expires = self.ancient_past
223 f1.expires = past
224 del f1231 del f1
225 self.ztm.commit()232 self.ztm.commit()
226233
@@ -261,6 +268,52 @@
261 # Our recently expired LibraryFileAlias is still available.268 # Our recently expired LibraryFileAlias is still available.
262 LibraryFileAlias.get(self.f1_id)269 LibraryFileAlias.get(self.f1_id)
263270
271 def test_deleteWellExpiredAliases(self):
272 # LibraryFileAlias records that are expired are unlinked from their
273 # content.
274
275 # Flag one of our LibraryFileAliases with an expiry date in the past
276 self.ztm.begin()
277 f1 = LibraryFileAlias.get(self.f1_id)
278 f1.expires = self.ancient_past
279 del f1
280 self.ztm.commit()
281
282 # Unlink expired LibraryFileAliases.
283 librariangc.expire_aliases(self.con)
284
285 self.ztm.begin()
286 # Make sure the well expired f1 is still there, but has no content.
287 f1 = LibraryFileAlias.get(self.f1_id)
288 self.assert_(f1.content is None)
289 # f2 should still have content, as it isn't flagged for expiry.
290 f2 = LibraryFileAlias.get(self.f2_id)
291 self.assert_(f2.content is not None)
292
293 def test_ignoreRecentlyExpiredAliases(self):
294 # LibraryFileAlias records that have expired recently are not
295 # garbage collected.
296
297 # Flag one of our LibraryFileAliases with an expiry date in the
298 # recent past.
299 self.ztm.begin()
300 f1 = LibraryFileAlias.get(self.f1_id)
301 f1.expires = self.recent_past # Within stay of execution.
302 del f1
303 self.ztm.commit()
304
305 # Unlink expired LibraryFileAliases.
306 librariangc.expire_aliases(self.con)
307
308 self.ztm.begin()
309 # Make sure f1 is still there and has content. This ensures that
310 # our stay of execution is still working.
311 f1 = LibraryFileAlias.get(self.f1_id)
312 self.assert_(f1.content is not None)
313 # f2 should still have content, as it isn't flagged for expiry.
314 f2 = LibraryFileAlias.get(self.f2_id)
315 self.assert_(f2.content is not None)
316
264 def test_DeleteUnreferencedContent(self):317 def test_DeleteUnreferencedContent(self):
265 # Merge the duplicates. This creates an318 # Merge the duplicates. This creates an
266 # unreferenced LibraryFileContent319 # unreferenced LibraryFileContent
267320
=== modified file 'lib/canonical/librarian/librariangc.py'
--- lib/canonical/librarian/librariangc.py 2009-11-25 13:47:07 +0000
+++ lib/canonical/librarian/librariangc.py 2009-12-23 04:06:15 +0000
@@ -182,6 +182,55 @@
182 con.commit()182 con.commit()
183183
184184
185class ExpireAliases:
186 """Expire expired LibraryFileAlias records.
187
188 This simply involves setting the LibraryFileAlias.content to NULL.
189 Unreferenced LibraryFileContent records are cleaned up elsewhere.
190 """
191 implements(ITunableLoop)
192
193 def __init__(self, con):
194 self.con = con
195 self.total_expired = 0
196 self._done = False
197
198 def isDone(self):
199 if self._done:
200 log.info(
201 "Expired %d LibraryFileAlias records." % self.total_expired)
202 return True
203 else:
204 return False
205
206 def __call__(self, chunksize):
207 chunksize = int(chunksize)
208 cur = self.con.cursor()
209 cur.execute("""
210 UPDATE LibraryFileAlias
211 SET content=NULL
212 WHERE id IN (
213 SELECT id FROM LibraryFileAlias
214 WHERE
215 content IS NOT NULL
216 AND expires < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
217 - interval '1 week'
218 LIMIT %d)
219 """ % chunksize)
220 self.total_expired += cur.rowcount
221 if cur.rowcount == 0:
222 self._done = True
223 else:
224 log.debug("Expired %d LibraryFileAlias records." % cur.rowcount)
225 self.con.commit()
226
227
228def expire_aliases(con):
229 """Invoke ExpireLibraryFileAliases."""
230 loop_tuner = DBLoopTuner(ExpireAliases(con), 5, log=log)
231 loop_tuner.run()
232
233
185class UnreferencedLibraryFileAliasPruner:234class UnreferencedLibraryFileAliasPruner:
186 """Delete unreferenced LibraryFileAliases.235 """Delete unreferenced LibraryFileAliases.
187236