Merge lp:~stub/launchpad/librarian-gc into lp:launchpad

Proposed by Stuart Bishop
Status: Merged
Approved by: Stuart Bishop
Approved revision: not available
Merged at revision: not available
Proposed branch: lp:~stub/launchpad/librarian-gc
Merge into: lp:launchpad
Diff against target: 208 lines (+126/-16)
3 files modified
cronscripts/librarian-gc.py (+8/-0)
lib/canonical/librarian/ftests/test_gc.py (+69/-16)
lib/canonical/librarian/librariangc.py (+49/-0)
To merge this branch: bzr merge lp:~stub/launchpad/librarian-gc
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle Approve
Canonical Launchpad Engineering release-critical Pending
Review via email: mp+16486@code.launchpad.net

Commit message

Librarian garbage collector expires expired aliases.

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

The Librarian garbage collector should unlink LibraryFileAliases that have expired from their content, allowing the content to be removed from disk when it is no longer referenced by anything else.

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

As said on IRC, a comment about the timescale of "recent" in this context in the tests would be great.

It would also be nice to not have SQL in string constants and key off a config value not a literal '1 week' -- but probably not in this branch.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'cronscripts/librarian-gc.py'
2--- cronscripts/librarian-gc.py 2009-11-25 08:53:00 +0000
3+++ cronscripts/librarian-gc.py 2009-12-23 04:06:15 +0000
4@@ -51,6 +51,12 @@
5 help="Skip removing files on disk with no database references"
6 " or flagged for deletion."
7 )
8+ self.parser.add_option(
9+ '', "--skip-expiry", action="store_true", default=False,
10+ dest="skip_expiry",
11+ help="Skip expiring aliases with an expiry date in the past."
12+ )
13+
14
15 def main(self):
16 librariangc.log = self.logger
17@@ -66,6 +72,8 @@
18
19 # Note that each of these next steps will issue commit commands
20 # as appropriate to make this script transaction friendly
21+ if not self.options.skip_expiry:
22+ librariangc.expire_aliases(conn)
23 if not self.options.skip_content:
24 librariangc.delete_unreferenced_content(conn) # first sweep
25 if not self.options.skip_blobs:
26
27=== modified file 'lib/canonical/librarian/ftests/test_gc.py'
28--- lib/canonical/librarian/ftests/test_gc.py 2009-11-25 08:53:00 +0000
29+++ lib/canonical/librarian/ftests/test_gc.py 2009-12-23 04:06:15 +0000
30@@ -55,6 +55,22 @@
31 self.client = LibrarianClient()
32 librariangc.log = MockLogger()
33
34+ # A value we use in a number of tests. This represents the
35+ # stay of execution hard coded into the garbage collector.
36+ # We don't destroy any data unless it has been waiting to be
37+ # destroyed for longer than this period. We pick a value
38+ # that is close enough to the stay of execution so that
39+ # forgetting timezone information will break things, but
40+ # far enough so that how long it takes the test to run
41+ # is not an issue. 'stay_of_excution - 1 hour' fits these
42+ # criteria.
43+ self.recent_past = (
44+ datetime.utcnow().replace(tzinfo=utc)
45+ - timedelta(days=6, hours=23))
46+ # A time beyond the stay of execution.
47+ self.ancient_past = (
48+ datetime.utcnow().replace(tzinfo=utc) - timedelta(days=30))
49+
50 self.f1_id, self.f2_id = self._makeDupes()
51
52 self.layer.switchDbUser(config.librarian_gc.dbuser)
53@@ -66,12 +82,6 @@
54 path = librariangc.get_file_path(self.f1_id)
55 self.failUnless(os.path.exists(path), "Librarian uploads failed")
56
57- # A value we use in a number of tests
58- self.recent_past = (
59- datetime.utcnow().replace(tzinfo=utc)
60- - timedelta(days=6, hours=23)
61- )
62-
63 # Make sure that every file the database knows about exists on disk.
64 # We manually remove them for tests that need to cope with missing
65 # library items.
66@@ -124,14 +134,12 @@
67
68 # Set the last accessed time into the past so they will be garbage
69 # collected
70- past = datetime.utcnow() - timedelta(days=30)
71- past = past.replace(tzinfo=utc)
72- f1.last_accessed = past
73- f2.last_accessed = past
74- f1.date_created = past
75- f2.date_created = past
76- f1.content.datecreated = past
77- f2.content.datecreated = past
78+ f1.last_accessed = self.ancient_past
79+ f2.last_accessed = self.ancient_past
80+ f1.date_created = self.ancient_past
81+ f2.date_created = self.ancient_past
82+ f1.content.datecreated = self.ancient_past
83+ f2.content.datecreated = self.ancient_past
84
85 del f1, f2
86
87@@ -219,8 +227,7 @@
88 # Flag one of our LibraryFileAliases with an expiry date in the past
89 self.ztm.begin()
90 f1 = LibraryFileAlias.get(self.f1_id)
91- past = datetime.utcnow().replace(tzinfo=utc) - timedelta(days=30)
92- f1.expires = past
93+ f1.expires = self.ancient_past
94 del f1
95 self.ztm.commit()
96
97@@ -261,6 +268,52 @@
98 # Our recently expired LibraryFileAlias is still available.
99 LibraryFileAlias.get(self.f1_id)
100
101+ def test_deleteWellExpiredAliases(self):
102+ # LibraryFileAlias records that are expired are unlinked from their
103+ # content.
104+
105+ # Flag one of our LibraryFileAliases with an expiry date in the past
106+ self.ztm.begin()
107+ f1 = LibraryFileAlias.get(self.f1_id)
108+ f1.expires = self.ancient_past
109+ del f1
110+ self.ztm.commit()
111+
112+ # Unlink expired LibraryFileAliases.
113+ librariangc.expire_aliases(self.con)
114+
115+ self.ztm.begin()
116+ # Make sure the well expired f1 is still there, but has no content.
117+ f1 = LibraryFileAlias.get(self.f1_id)
118+ self.assert_(f1.content is None)
119+ # f2 should still have content, as it isn't flagged for expiry.
120+ f2 = LibraryFileAlias.get(self.f2_id)
121+ self.assert_(f2.content is not None)
122+
123+ def test_ignoreRecentlyExpiredAliases(self):
124+ # LibraryFileAlias records that have expired recently are not
125+ # garbage collected.
126+
127+ # Flag one of our LibraryFileAliases with an expiry date in the
128+ # recent past.
129+ self.ztm.begin()
130+ f1 = LibraryFileAlias.get(self.f1_id)
131+ f1.expires = self.recent_past # Within stay of execution.
132+ del f1
133+ self.ztm.commit()
134+
135+ # Unlink expired LibraryFileAliases.
136+ librariangc.expire_aliases(self.con)
137+
138+ self.ztm.begin()
139+ # Make sure f1 is still there and has content. This ensures that
140+ # our stay of execution is still working.
141+ f1 = LibraryFileAlias.get(self.f1_id)
142+ self.assert_(f1.content is not None)
143+ # f2 should still have content, as it isn't flagged for expiry.
144+ f2 = LibraryFileAlias.get(self.f2_id)
145+ self.assert_(f2.content is not None)
146+
147 def test_DeleteUnreferencedContent(self):
148 # Merge the duplicates. This creates an
149 # unreferenced LibraryFileContent
150
151=== modified file 'lib/canonical/librarian/librariangc.py'
152--- lib/canonical/librarian/librariangc.py 2009-11-25 13:47:07 +0000
153+++ lib/canonical/librarian/librariangc.py 2009-12-23 04:06:15 +0000
154@@ -182,6 +182,55 @@
155 con.commit()
156
157
158+class ExpireAliases:
159+ """Expire expired LibraryFileAlias records.
160+
161+ This simply involves setting the LibraryFileAlias.content to NULL.
162+ Unreferenced LibraryFileContent records are cleaned up elsewhere.
163+ """
164+ implements(ITunableLoop)
165+
166+ def __init__(self, con):
167+ self.con = con
168+ self.total_expired = 0
169+ self._done = False
170+
171+ def isDone(self):
172+ if self._done:
173+ log.info(
174+ "Expired %d LibraryFileAlias records." % self.total_expired)
175+ return True
176+ else:
177+ return False
178+
179+ def __call__(self, chunksize):
180+ chunksize = int(chunksize)
181+ cur = self.con.cursor()
182+ cur.execute("""
183+ UPDATE LibraryFileAlias
184+ SET content=NULL
185+ WHERE id IN (
186+ SELECT id FROM LibraryFileAlias
187+ WHERE
188+ content IS NOT NULL
189+ AND expires < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
190+ - interval '1 week'
191+ LIMIT %d)
192+ """ % chunksize)
193+ self.total_expired += cur.rowcount
194+ if cur.rowcount == 0:
195+ self._done = True
196+ else:
197+ log.debug("Expired %d LibraryFileAlias records." % cur.rowcount)
198+ self.con.commit()
199+
200+
201+def expire_aliases(con):
202+ """Invoke ExpireLibraryFileAliases."""
203+ loop_tuner = DBLoopTuner(ExpireAliases(con), 5, log=log)
204+ loop_tuner.run()
205+
206+
207 class UnreferencedLibraryFileAliasPruner:
208 """Delete unreferenced LibraryFileAliases.
209