Merge lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2 into lp:launchpad

Proposed by Michael Nelson
Status: Merged
Approved by: Brad Crittenden
Approved revision: no longer in the source branch.
Merged at revision: 11503
Proposed branch: lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2
Merge into: lp:launchpad
Diff against target: 328 lines (+148/-54)
2 files modified
lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py (+111/-50)
lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py (+37/-4)
To merge this branch: bzr merge lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2
Reviewer Review Type Date Requested Status
Graham Binns (community) release-critical Approve
Brad Crittenden (community) code Approve
Review via email: mp+34546@code.launchpad.net

Commit message

Speed up the generate-ppa-htacess cron script.

Description of the change

= Summary =
This branch addresses bug 628711 - generate-ppa-htaccess is too slow.

== Proposed fix ==
Based on the discussion on the bug, this branch updates the script so that it:

1) Queries for *all* current subscriptions that should be marked as expired
and expires them,
2) Queries for all tokens that are now invalid (ie. based on subscriptions
that have been cancelled or expired) and deactivates them, noting the ppas
affected.
3) Finds all the ppas that have new tokens created since the last successful
run of the script, combines these with the ppas affected by token deactivation
and checks/recreates their htpasswd files.

== Pre-implementation notes ==
See bug 628711

== Implementation details ==

== Tests ==
bin/test -vvm test_generate_ppa_htaccess

== Demo and Q/A ==
I'll be starting the QA on dogfood now.

= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py
  lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :

This change looks good Michael and I'm glad to hear you've run it on dogfood.

review: Approve (code)
Revision history for this message
Graham Binns (gmb) :
review: Approve (release-critical)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2010-08-23 17:26:42 +0000
+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2010-09-03 16:19:41 +0000
@@ -6,34 +6,34 @@
6# pylint: disable-msg=C0103,W04036# pylint: disable-msg=C0103,W0403
77
8import crypt8import crypt
9from datetime import datetime
10import filecmp9import filecmp
11from operator import attrgetter
12import os10import os
11import pytz
13import tempfile12import tempfile
1413
15import pytz14from collections import defaultdict
15from datetime import datetime
16from operator import attrgetter
16from zope.component import getUtility17from zope.component import getUtility
1718
18from canonical.config import config19from canonical.config import config
19from canonical.launchpad.helpers import get_email_template20from canonical.launchpad.helpers import get_email_template
21from canonical.launchpad.interfaces.lpstorm import IStore
20from canonical.launchpad.mail import (22from canonical.launchpad.mail import (
21 format_address,23 format_address,
22 simple_sendmail,24 simple_sendmail,
23 )25 )
24from canonical.launchpad.webapp import canonical_url26from canonical.launchpad.webapp import canonical_url
25from lp.archivepublisher.config import getPubConfig27from lp.archivepublisher.config import getPubConfig
28from lp.registry.model.teammembership import TeamParticipation
26from lp.services.mail.mailwrapper import MailWrapper29from lp.services.mail.mailwrapper import MailWrapper
27from lp.services.scripts.base import LaunchpadCronScript30from lp.services.scripts.base import LaunchpadCronScript
31from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
28from lp.soyuz.enums import ArchiveStatus32from lp.soyuz.enums import ArchiveStatus
29from lp.soyuz.interfaces.archive import (
30 IArchiveSet,
31 )
32from lp.soyuz.interfaces.archiveauthtoken import IArchiveAuthTokenSet
33from lp.soyuz.interfaces.archivesubscriber import (
34 IArchiveSubscriberSet,
35 )
36from lp.soyuz.enums import ArchiveSubscriberStatus33from lp.soyuz.enums import ArchiveSubscriberStatus
34from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
35from lp.soyuz.model.archivesubscriber import ArchiveSubscriber
36
3737
38# These PPAs should never have their htaccess/pwd files touched.38# These PPAs should never have their htaccess/pwd files touched.
39BLACKLISTED_PPAS = {39BLACKLISTED_PPAS = {
@@ -96,7 +96,7 @@
96 # It's not there, so create it.96 # It's not there, so create it.
97 if not os.path.exists(pub_config.htaccessroot):97 if not os.path.exists(pub_config.htaccessroot):
98 os.makedirs(pub_config.htaccessroot)98 os.makedirs(pub_config.htaccessroot)
99 interpolations = {"path" : pub_config.htaccessroot}99 interpolations = {"path": pub_config.htaccessroot}
100 file = open(htaccess_filename, "w")100 file = open(htaccess_filename, "w")
101 file.write(HTACCESS_TEMPLATE % interpolations)101 file.write(HTACCESS_TEMPLATE % interpolations)
102 file.close()102 file.close()
@@ -169,9 +169,9 @@
169169
170 to_address = [send_to_person.preferredemail.email]170 to_address = [send_to_person.preferredemail.email]
171 replacements = {171 replacements = {
172 'recipient_name' : send_to_person.displayname,172 'recipient_name': send_to_person.displayname,
173 'ppa_name' : ppa_name,173 'ppa_name': ppa_name,
174 'ppa_owner_url' : ppa_owner_url,174 'ppa_owner_url': ppa_owner_url,
175 }175 }
176 body = MailWrapper(72).format(176 body = MailWrapper(72).format(
177 template % replacements, force_wrap=True)177 template % replacements, force_wrap=True)
@@ -181,64 +181,126 @@
181 config.canonical.noreply_from_address)181 config.canonical.noreply_from_address)
182182
183 headers = {183 headers = {
184 'Sender' : config.canonical.bounce_address,184 'Sender': config.canonical.bounce_address,
185 }185 }
186186
187 simple_sendmail(from_address, to_address, subject, body, headers)187 simple_sendmail(from_address, to_address, subject, body, headers)
188188
189 def deactivateTokens(self, ppa, send_email=False):189 def deactivateTokens(self, send_email=False):
190 """Deactivate tokens as necessary.190 """Deactivate tokens as necessary.
191191
192 If a subscriber no longer has an active token for the PPA, we192 If a subscriber no longer has an active token for the PPA, we
193 deactivate it.193 deactivate it.
194194
195 :param ppa: The PPA to check tokens for.
196 :param send_email: Whether to send a cancellation email to the owner195 :param send_email: Whether to send a cancellation email to the owner
197 of the token. This defaults to False to speed up the test196 of the token. This defaults to False to speed up the test
198 suite.197 suite.
199 :return: a list of valid tokens.198 :return: the set of ppa ids affected by token deactivations.
200 """199 """
201 tokens = getUtility(IArchiveAuthTokenSet).getByArchive(ppa)200 # First we grab all the active tokens for which there is a
202 valid_tokens = []201 # matching current archive subscription for a team of which the
203 for token in tokens:202 # token owner is a member.
204 result = getUtility(203 store = IStore(ArchiveSubscriber)
205 IArchiveSubscriberSet).getBySubscriberWithActiveToken(204 valid_tokens = store.find(
206 token.person, ppa)205 ArchiveAuthToken,
207 if result.count() == 0:206 ArchiveAuthToken.date_deactivated == None,
208 # The subscriber's token is no longer active,207 ArchiveAuthToken.archive_id == ArchiveSubscriber.archive_id,
209 # deactivate it.208 ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
210 if send_email:209 ArchiveSubscriber.subscriber_id == TeamParticipation.teamID,
211 self.sendCancellationEmail(token)210 TeamParticipation.personID == ArchiveAuthToken.person_id)
212 token.deactivate()211
213 else:212 # We can then evaluate the invalid tokens by the difference of
214 valid_tokens.append(token)213 # all active tokens and valid tokens.
215 return valid_tokens214 all_active_tokens = store.find(
216215 ArchiveAuthToken,
217 def expireSubscriptions(self, ppa):216 ArchiveAuthToken.date_deactivated == None)
217 invalid_tokens = all_active_tokens.difference(valid_tokens)
218
219 # We note the ppas affected by these token deactivations so that
220 # we can later update their htpasswd files.
221 affected_ppa_ids = set()
222 for token in invalid_tokens:
223 if send_email:
224 self.sendCancellationEmail(token)
225 token.deactivate()
226 affected_ppa_ids.add(token.archive_id)
227
228 return affected_ppa_ids
229
230 def expireSubscriptions(self):
218 """Expire subscriptions as necessary.231 """Expire subscriptions as necessary.
219232
220 If an `ArchiveSubscriber`'s date_expires has passed, then233 If an `ArchiveSubscriber`'s date_expires has passed, then
221 set its status to EXPIRED.234 set its status to EXPIRED.
222
223 :param ppa: The PPA to expire subscriptons for.
224 """235 """
225 now = datetime.now(pytz.UTC)236 now = datetime.now(pytz.UTC)
226 subscribers = getUtility(IArchiveSubscriberSet).getByArchive(ppa)237
227 for subscriber in subscribers:238 store = IStore(ArchiveSubscriber)
228 date_expires = subscriber.date_expires239 newly_expired_subscriptions = store.find(
229 if date_expires is not None and date_expires <= now:240 ArchiveSubscriber,
230 self.logger.info(241 ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
231 "Expiring subscription: %s" % subscriber.displayname)242 ArchiveSubscriber.date_expires != None,
232 subscriber.status = ArchiveSubscriberStatus.EXPIRED243 ArchiveSubscriber.date_expires <= now)
244
245 subscription_names = [
246 subs.displayname for subs in newly_expired_subscriptions]
247 if subscription_names:
248 newly_expired_subscriptions.set(
249 status=ArchiveSubscriberStatus.EXPIRED)
250 self.logger.info(
251 "Expired subscriptions: %s" % ", ".join(subscription_names))
252
253 def getNewTokensSinceLastRun(self):
254 """Return result set of new tokens created since the last run."""
255 store = IStore(ArchiveAuthToken)
256 # If we don't know when we last ran, we include all active
257 # tokens by default.
258 last_success = getUtility(IScriptActivitySet).getLastActivity(
259 'generate-ppa-htaccess')
260 extra_expr = []
261 if last_success:
262 extra_expr = [
263 ArchiveAuthToken.date_created >= last_success.date_completed]
264
265 new_ppa_tokens = store.find(
266 ArchiveAuthToken,
267 ArchiveAuthToken.date_deactivated == None,
268 *extra_expr)
269
270 return new_ppa_tokens
271
272 def _getValidTokensForPPAIDs(self, ppa_ids):
273 """Returns a dict keyed by archive with all the tokens for each."""
274 affected_ppas_with_tokens = defaultdict(list)
275 store = IStore(ArchiveAuthToken)
276 affected_ppa_tokens = store.find(
277 ArchiveAuthToken,
278 ArchiveAuthToken.date_deactivated == None,
279 ArchiveAuthToken.archive_id.is_in(ppa_ids))
280
281 for token in affected_ppa_tokens:
282 affected_ppas_with_tokens[token.archive].append(token)
283
284 return affected_ppas_with_tokens
233285
234 def main(self):286 def main(self):
235 """Script entry point."""287 """Script entry point."""
236 self.logger.info('Starting the PPA .htaccess generation')288 self.logger.info('Starting the PPA .htaccess generation')
237 ppas = getUtility(IArchiveSet).getPrivatePPAs()289 self.expireSubscriptions()
238 for ppa in ppas:290 affected_ppa_ids = self.deactivateTokens(send_email=True)
239 self.expireSubscriptions(ppa)291
240 valid_tokens = self.deactivateTokens(ppa, send_email=True)292 # In addition to the ppas that are affected by deactivated
241293 # tokens, we also want to include any ppas that have tokens
294 # created since the last time we ran.
295 for token in self.getNewTokensSinceLastRun():
296 affected_ppa_ids.add(token.archive_id)
297
298 affected_ppas_with_tokens = {}
299 if affected_ppa_ids:
300 affected_ppas_with_tokens = self._getValidTokensForPPAIDs(
301 affected_ppa_ids)
302
303 for ppa, valid_tokens in affected_ppas_with_tokens.iteritems():
242 # If this PPA is blacklisted, do not touch it's htaccess/pwd304 # If this PPA is blacklisted, do not touch it's htaccess/pwd
243 # files.305 # files.
244 blacklisted_ppa_names_for_owner = self.blacklist.get(306 blacklisted_ppa_names_for_owner = self.blacklist.get(
@@ -271,4 +333,3 @@
271 self.txn.commit()333 self.txn.commit()
272334
273 self.logger.info('Finished PPA .htaccess generation')335 self.logger.info('Finished PPA .htaccess generation')
274
275336
=== modified file 'lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2010-08-23 17:26:42 +0000
+++ lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2010-09-03 16:19:41 +0000
@@ -15,6 +15,7 @@
1515
16import pytz16import pytz
17from zope.component import getUtility17from zope.component import getUtility
18from zope.security.proxy import removeSecurityProxy
1819
19from canonical.config import config20from canonical.config import config
20from canonical.launchpad.interfaces import (21from canonical.launchpad.interfaces import (
@@ -29,6 +30,7 @@
29 HtaccessTokenGenerator,30 HtaccessTokenGenerator,
30 )31 )
31from lp.services.mail import stub32from lp.services.mail import stub
33from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
32from lp.soyuz.enums import (34from lp.soyuz.enums import (
33 ArchiveStatus,35 ArchiveStatus,
34 ArchiveSubscriberStatus,36 ArchiveSubscriberStatus,
@@ -298,7 +300,7 @@
298300
299 # Initially, nothing is eligible for deactivation.301 # Initially, nothing is eligible for deactivation.
300 script = self.getScript()302 script = self.getScript()
301 script.deactivateTokens(self.ppa)303 script.deactivateTokens()
302 for person in tokens:304 for person in tokens:
303 self.assertNotDeactivated(tokens[person])305 self.assertNotDeactivated(tokens[person])
304306
@@ -311,7 +313,7 @@
311 # Clear out emails generated when leaving a team.313 # Clear out emails generated when leaving a team.
312 pop_notifications()314 pop_notifications()
313315
314 script.deactivateTokens(self.ppa, send_email=True)316 script.deactivateTokens(send_email=True)
315 self.assertDeactivated(tokens[team1_person])317 self.assertDeactivated(tokens[team1_person])
316 del tokens[team1_person]318 del tokens[team1_person]
317 for person in tokens:319 for person in tokens:
@@ -332,7 +334,7 @@
332 self.layer.switchDbUser(self.dbuser)334 self.layer.switchDbUser(self.dbuser)
333 # Clear out emails generated when leaving a team.335 # Clear out emails generated when leaving a team.
334 pop_notifications()336 pop_notifications()
335 script.deactivateTokens(self.ppa, send_email=True)337 script.deactivateTokens(send_email=True)
336 self.assertNotDeactivated(tokens[promiscuous_person])338 self.assertNotDeactivated(tokens[promiscuous_person])
337 for person in tokens:339 for person in tokens:
338 self.assertNotDeactivated(tokens[person])340 self.assertNotDeactivated(tokens[person])
@@ -353,7 +355,7 @@
353 self.assertFalse(team2.inTeam(parent_team))355 self.assertFalse(team2.inTeam(parent_team))
354 self.layer.txn.commit()356 self.layer.txn.commit()
355 self.layer.switchDbUser(self.dbuser)357 self.layer.switchDbUser(self.dbuser)
356 script.deactivateTokens(self.ppa)358 script.deactivateTokens()
357 for person in persons2:359 for person in persons2:
358 self.assertDeactivated(tokens[person])360 self.assertDeactivated(tokens[person])
359361
@@ -573,3 +575,34 @@
573 "feedback@launchpad.net\n\n"575 "feedback@launchpad.net\n\n"
574 "Regards,\n"576 "Regards,\n"
575 "The Launchpad team")577 "The Launchpad team")
578
579 def test_getNewTokensSinceLastRun_no_previous_run(self):
580 """All valid tokens returned if there is no record of previous run."""
581 now = datetime.now(pytz.UTC)
582 tokens = self.setupDummyTokens()[1]
583
584 # If there is no record of the script running previously, all
585 # valid tokens are returned.
586 script = self.getScript()
587 self.assertContentEqual(tokens, script.getNewTokensSinceLastRun())
588
589 def test_getNewTokensSinceLastRun_only_those_since_last_run(self):
590 """Only tokens created since the last run are returned."""
591 now = datetime.now(pytz.UTC)
592 tokens = self.setupDummyTokens()[1]
593 getUtility(IScriptActivitySet).recordSuccess(
594 'generate-ppa-htaccess', now, now - timedelta(minutes=3))
595 removeSecurityProxy(tokens[0]).date_created = (
596 now - timedelta(minutes=4))
597
598 script = self.getScript()
599 self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())
600
601 def test_getNewTokensSinceLastRun_only_active_tokens(self):
602 """Only active tokens are returned."""
603 now = datetime.now(pytz.UTC)
604 tokens = self.setupDummyTokens()[1]
605 tokens[0].deactivate()
606
607 script = self.getScript()
608 self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())