Merge lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2 into lp:launchpad

Proposed by Michael Nelson
Status: Merged
Approved by: Brad Crittenden
Approved revision: no longer in the source branch.
Merged at revision: 11503
Proposed branch: lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2
Merge into: lp:launchpad
Diff against target: 328 lines (+148/-54)
2 files modified
lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py (+111/-50)
lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py (+37/-4)
To merge this branch: bzr merge lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2
Reviewer Review Type Date Requested Status
Graham Binns (community) release-critical Approve
Brad Crittenden (community) code Approve
Review via email: mp+34546@code.launchpad.net

Commit message

Speed up the generate-ppa-htacess cron script.

Description of the change

= Summary =
This branch addresses bug 628711 - generate-ppa-htaccess is too slow.

== Proposed fix ==
Based on the discussion on the bug, this branch updates the script so that it:

1) Queries for *all* current subscriptions that should be marked as expired
and expires them,
2) Queries for all tokens that are now invalid (ie. based on subscriptions
that have been cancelled or expired) and deactivates them, noting the ppas
affected.
3) Finds all the ppas that have new tokens created since the last successful
run of the script, combines these with the ppas affected by token deactivation
and checks/recreates their htpasswd files.

== Pre-implementation notes ==
See bug 628711

== Implementation details ==

== Tests ==
bin/test -vvm test_generate_ppa_htaccess

== Demo and Q/A ==
I'll be starting the QA on dogfood now.

= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py
  lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :

This change looks good Michael and I'm glad to hear you've run it on dogfood.

review: Approve (code)
Revision history for this message
Graham Binns (gmb) :
review: Approve (release-critical)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
2--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2010-08-23 17:26:42 +0000
3+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2010-09-03 16:19:41 +0000
4@@ -6,34 +6,34 @@
5 # pylint: disable-msg=C0103,W0403
6
7 import crypt
8-from datetime import datetime
9 import filecmp
10-from operator import attrgetter
11 import os
12+import pytz
13 import tempfile
14
15-import pytz
16+from collections import defaultdict
17+from datetime import datetime
18+from operator import attrgetter
19 from zope.component import getUtility
20
21 from canonical.config import config
22 from canonical.launchpad.helpers import get_email_template
23+from canonical.launchpad.interfaces.lpstorm import IStore
24 from canonical.launchpad.mail import (
25 format_address,
26 simple_sendmail,
27 )
28 from canonical.launchpad.webapp import canonical_url
29 from lp.archivepublisher.config import getPubConfig
30+from lp.registry.model.teammembership import TeamParticipation
31 from lp.services.mail.mailwrapper import MailWrapper
32 from lp.services.scripts.base import LaunchpadCronScript
33+from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
34 from lp.soyuz.enums import ArchiveStatus
35-from lp.soyuz.interfaces.archive import (
36- IArchiveSet,
37- )
38-from lp.soyuz.interfaces.archiveauthtoken import IArchiveAuthTokenSet
39-from lp.soyuz.interfaces.archivesubscriber import (
40- IArchiveSubscriberSet,
41- )
42 from lp.soyuz.enums import ArchiveSubscriberStatus
43+from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
44+from lp.soyuz.model.archivesubscriber import ArchiveSubscriber
45+
46
47 # These PPAs should never have their htaccess/pwd files touched.
48 BLACKLISTED_PPAS = {
49@@ -96,7 +96,7 @@
50 # It's not there, so create it.
51 if not os.path.exists(pub_config.htaccessroot):
52 os.makedirs(pub_config.htaccessroot)
53- interpolations = {"path" : pub_config.htaccessroot}
54+ interpolations = {"path": pub_config.htaccessroot}
55 file = open(htaccess_filename, "w")
56 file.write(HTACCESS_TEMPLATE % interpolations)
57 file.close()
58@@ -169,9 +169,9 @@
59
60 to_address = [send_to_person.preferredemail.email]
61 replacements = {
62- 'recipient_name' : send_to_person.displayname,
63- 'ppa_name' : ppa_name,
64- 'ppa_owner_url' : ppa_owner_url,
65+ 'recipient_name': send_to_person.displayname,
66+ 'ppa_name': ppa_name,
67+ 'ppa_owner_url': ppa_owner_url,
68 }
69 body = MailWrapper(72).format(
70 template % replacements, force_wrap=True)
71@@ -181,64 +181,126 @@
72 config.canonical.noreply_from_address)
73
74 headers = {
75- 'Sender' : config.canonical.bounce_address,
76+ 'Sender': config.canonical.bounce_address,
77 }
78
79 simple_sendmail(from_address, to_address, subject, body, headers)
80
81- def deactivateTokens(self, ppa, send_email=False):
82+ def deactivateTokens(self, send_email=False):
83 """Deactivate tokens as necessary.
84
85 If a subscriber no longer has an active token for the PPA, we
86 deactivate it.
87
88- :param ppa: The PPA to check tokens for.
89 :param send_email: Whether to send a cancellation email to the owner
90 of the token. This defaults to False to speed up the test
91 suite.
92- :return: a list of valid tokens.
93+ :return: the set of ppa ids affected by token deactivations.
94 """
95- tokens = getUtility(IArchiveAuthTokenSet).getByArchive(ppa)
96- valid_tokens = []
97- for token in tokens:
98- result = getUtility(
99- IArchiveSubscriberSet).getBySubscriberWithActiveToken(
100- token.person, ppa)
101- if result.count() == 0:
102- # The subscriber's token is no longer active,
103- # deactivate it.
104- if send_email:
105- self.sendCancellationEmail(token)
106- token.deactivate()
107- else:
108- valid_tokens.append(token)
109- return valid_tokens
110-
111- def expireSubscriptions(self, ppa):
112+ # First we grab all the active tokens for which there is a
113+ # matching current archive subscription for a team of which the
114+ # token owner is a member.
115+ store = IStore(ArchiveSubscriber)
116+ valid_tokens = store.find(
117+ ArchiveAuthToken,
118+ ArchiveAuthToken.date_deactivated == None,
119+ ArchiveAuthToken.archive_id == ArchiveSubscriber.archive_id,
120+ ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
121+ ArchiveSubscriber.subscriber_id == TeamParticipation.teamID,
122+ TeamParticipation.personID == ArchiveAuthToken.person_id)
123+
124+ # We can then evaluate the invalid tokens by the difference of
125+ # all active tokens and valid tokens.
126+ all_active_tokens = store.find(
127+ ArchiveAuthToken,
128+ ArchiveAuthToken.date_deactivated == None)
129+ invalid_tokens = all_active_tokens.difference(valid_tokens)
130+
131+ # We note the ppas affected by these token deactivations so that
132+ # we can later update their htpasswd files.
133+ affected_ppa_ids = set()
134+ for token in invalid_tokens:
135+ if send_email:
136+ self.sendCancellationEmail(token)
137+ token.deactivate()
138+ affected_ppa_ids.add(token.archive_id)
139+
140+ return affected_ppa_ids
141+
142+ def expireSubscriptions(self):
143 """Expire subscriptions as necessary.
144
145 If an `ArchiveSubscriber`'s date_expires has passed, then
146 set its status to EXPIRED.
147-
148- :param ppa: The PPA to expire subscriptons for.
149 """
150 now = datetime.now(pytz.UTC)
151- subscribers = getUtility(IArchiveSubscriberSet).getByArchive(ppa)
152- for subscriber in subscribers:
153- date_expires = subscriber.date_expires
154- if date_expires is not None and date_expires <= now:
155- self.logger.info(
156- "Expiring subscription: %s" % subscriber.displayname)
157- subscriber.status = ArchiveSubscriberStatus.EXPIRED
158+
159+ store = IStore(ArchiveSubscriber)
160+ newly_expired_subscriptions = store.find(
161+ ArchiveSubscriber,
162+ ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
163+ ArchiveSubscriber.date_expires != None,
164+ ArchiveSubscriber.date_expires <= now)
165+
166+ subscription_names = [
167+ subs.displayname for subs in newly_expired_subscriptions]
168+ if subscription_names:
169+ newly_expired_subscriptions.set(
170+ status=ArchiveSubscriberStatus.EXPIRED)
171+ self.logger.info(
172+ "Expired subscriptions: %s" % ", ".join(subscription_names))
173+
174+ def getNewTokensSinceLastRun(self):
175+ """Return result set of new tokens created since the last run."""
176+ store = IStore(ArchiveAuthToken)
177+ # If we don't know when we last ran, we include all active
178+ # tokens by default.
179+ last_success = getUtility(IScriptActivitySet).getLastActivity(
180+ 'generate-ppa-htaccess')
181+ extra_expr = []
182+ if last_success:
183+ extra_expr = [
184+ ArchiveAuthToken.date_created >= last_success.date_completed]
185+
186+ new_ppa_tokens = store.find(
187+ ArchiveAuthToken,
188+ ArchiveAuthToken.date_deactivated == None,
189+ *extra_expr)
190+
191+ return new_ppa_tokens
192+
193+ def _getValidTokensForPPAIDs(self, ppa_ids):
194+ """Returns a dict keyed by archive with all the tokens for each."""
195+ affected_ppas_with_tokens = defaultdict(list)
196+ store = IStore(ArchiveAuthToken)
197+ affected_ppa_tokens = store.find(
198+ ArchiveAuthToken,
199+ ArchiveAuthToken.date_deactivated == None,
200+ ArchiveAuthToken.archive_id.is_in(ppa_ids))
201+
202+ for token in affected_ppa_tokens:
203+ affected_ppas_with_tokens[token.archive].append(token)
204+
205+ return affected_ppas_with_tokens
206
207 def main(self):
208 """Script entry point."""
209 self.logger.info('Starting the PPA .htaccess generation')
210- ppas = getUtility(IArchiveSet).getPrivatePPAs()
211- for ppa in ppas:
212- self.expireSubscriptions(ppa)
213- valid_tokens = self.deactivateTokens(ppa, send_email=True)
214-
215+ self.expireSubscriptions()
216+ affected_ppa_ids = self.deactivateTokens(send_email=True)
217+
218+ # In addition to the ppas that are affected by deactivated
219+ # tokens, we also want to include any ppas that have tokens
220+ # created since the last time we ran.
221+ for token in self.getNewTokensSinceLastRun():
222+ affected_ppa_ids.add(token.archive_id)
223+
224+ affected_ppas_with_tokens = {}
225+ if affected_ppa_ids:
226+ affected_ppas_with_tokens = self._getValidTokensForPPAIDs(
227+ affected_ppa_ids)
228+
229+ for ppa, valid_tokens in affected_ppas_with_tokens.iteritems():
230 # If this PPA is blacklisted, do not touch it's htaccess/pwd
231 # files.
232 blacklisted_ppa_names_for_owner = self.blacklist.get(
233@@ -271,4 +333,3 @@
234 self.txn.commit()
235
236 self.logger.info('Finished PPA .htaccess generation')
237-
238
239=== modified file 'lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py'
240--- lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2010-08-23 17:26:42 +0000
241+++ lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2010-09-03 16:19:41 +0000
242@@ -15,6 +15,7 @@
243
244 import pytz
245 from zope.component import getUtility
246+from zope.security.proxy import removeSecurityProxy
247
248 from canonical.config import config
249 from canonical.launchpad.interfaces import (
250@@ -29,6 +30,7 @@
251 HtaccessTokenGenerator,
252 )
253 from lp.services.mail import stub
254+from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
255 from lp.soyuz.enums import (
256 ArchiveStatus,
257 ArchiveSubscriberStatus,
258@@ -298,7 +300,7 @@
259
260 # Initially, nothing is eligible for deactivation.
261 script = self.getScript()
262- script.deactivateTokens(self.ppa)
263+ script.deactivateTokens()
264 for person in tokens:
265 self.assertNotDeactivated(tokens[person])
266
267@@ -311,7 +313,7 @@
268 # Clear out emails generated when leaving a team.
269 pop_notifications()
270
271- script.deactivateTokens(self.ppa, send_email=True)
272+ script.deactivateTokens(send_email=True)
273 self.assertDeactivated(tokens[team1_person])
274 del tokens[team1_person]
275 for person in tokens:
276@@ -332,7 +334,7 @@
277 self.layer.switchDbUser(self.dbuser)
278 # Clear out emails generated when leaving a team.
279 pop_notifications()
280- script.deactivateTokens(self.ppa, send_email=True)
281+ script.deactivateTokens(send_email=True)
282 self.assertNotDeactivated(tokens[promiscuous_person])
283 for person in tokens:
284 self.assertNotDeactivated(tokens[person])
285@@ -353,7 +355,7 @@
286 self.assertFalse(team2.inTeam(parent_team))
287 self.layer.txn.commit()
288 self.layer.switchDbUser(self.dbuser)
289- script.deactivateTokens(self.ppa)
290+ script.deactivateTokens()
291 for person in persons2:
292 self.assertDeactivated(tokens[person])
293
294@@ -573,3 +575,34 @@
295 "feedback@launchpad.net\n\n"
296 "Regards,\n"
297 "The Launchpad team")
298+
299+ def test_getNewTokensSinceLastRun_no_previous_run(self):
300+ """All valid tokens returned if there is no record of previous run."""
301+ now = datetime.now(pytz.UTC)
302+ tokens = self.setupDummyTokens()[1]
303+
304+ # If there is no record of the script running previously, all
305+ # valid tokens are returned.
306+ script = self.getScript()
307+ self.assertContentEqual(tokens, script.getNewTokensSinceLastRun())
308+
309+ def test_getNewTokensSinceLastRun_only_those_since_last_run(self):
310+ """Only tokens created since the last run are returned."""
311+ now = datetime.now(pytz.UTC)
312+ tokens = self.setupDummyTokens()[1]
313+ getUtility(IScriptActivitySet).recordSuccess(
314+ 'generate-ppa-htaccess', now, now - timedelta(minutes=3))
315+ removeSecurityProxy(tokens[0]).date_created = (
316+ now - timedelta(minutes=4))
317+
318+ script = self.getScript()
319+ self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())
320+
321+ def test_getNewTokensSinceLastRun_only_active_tokens(self):
322+ """Only active tokens are returned."""
323+ now = datetime.now(pytz.UTC)
324+ tokens = self.setupDummyTokens()[1]
325+ tokens[0].deactivate()
326+
327+ script = self.getScript()
328+ self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())