Merge lp:~wgrant/launchpad/update-pkgcache-weight-loss into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18047
Proposed branch: lp:~wgrant/launchpad/update-pkgcache-weight-loss
Merge into: lp:launchpad
Diff against target: 651 lines (+250/-216)
4 files modified
lib/lp/soyuz/doc/package-cache.txt (+10/-18)
lib/lp/soyuz/model/distributionsourcepackagecache.py (+132/-106)
lib/lp/soyuz/model/distroseriespackagecache.py (+103/-87)
lib/lp/soyuz/tests/test_distroseriesbinarypackage.py (+5/-5)
To merge this branch: bzr merge lp:~wgrant/launchpad/update-pkgcache-weight-loss
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+295019@code.launchpad.net

Commit message

Rewrite and optimise the guts of update-pkgcache's DSPC and DSPC updaters.

Description of the change

Rewrite and optimise the guts of the DSPC and DSPC updaters.

Name selection is replaced with a quick index scan, and detail collection is bulked up bro. These rows should never be updated by anything other than update-pkgcache, so I haven't thrown in a DBLoopTuner just yet, but a later adaptation would be simple.

The behaviour is subtly changed, but it's probably more correct now: packages are now only included in the caches if their status is Pending or Published, rather than whenever their dateremoved is unset. The old dateremoved filter was confusing, slow, and only made sense before ArchiveRemovalRedesign.

The no-op case is now a good bit faster, slowed mostly by the huge number of test rebuilds clogging up the SPR -> BPB -> BPR queries. Each cache row modification still requires a separate query, but the vast majority of rows remain untouched in the vast majority of runs, so bulkifying changes wasn't a priority.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) :
review: Approve
Revision history for this message
William Grant (wgrant) :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/soyuz/doc/package-cache.txt'
2--- lib/lp/soyuz/doc/package-cache.txt 2014-07-09 15:11:03 +0000
3+++ lib/lp/soyuz/doc/package-cache.txt 2016-05-18 23:30:28 +0000
4@@ -162,12 +162,10 @@
5
6 >>> updates = DistributionSourcePackageCache.updateAll(
7 ... ubuntu, archive=ubuntu.main_archive, ztm=transaction,
8- ... log=FakeLogger())
9- DEBUG ...
10- DEBUG Considering source 'cdrkit'
11- DEBUG Creating new source cache entry.
12+ ... log=FakeLogger(), commit_chunk=3)
13+ DEBUG Considering sources alsa-utils, cdrkit, cnews
14 ...
15- DEBUG Considering source 'mozilla-firefox'
16+ DEBUG Considering sources linux-source-2.6.15, mozilla-firefox, netapplet
17 ...
18
19 >>> print updates
20@@ -246,16 +244,11 @@
21
22 >>> updates = DistroSeriesPackageCache.updateAll(
23 ... warty, archive=ubuntu.main_archive, ztm=transaction,
24- ... log=FakeLogger())
25- DEBUG Considering binary 'at'
26- DEBUG Considering binary 'cdrkit'
27- DEBUG Creating new binary cache entry.
28- DEBUG Considering binary 'linux-2.6.12'
29- DEBUG Considering binary 'mozilla-firefox'
30- DEBUG Considering binary 'mozilla-firefox-data'
31- DEBUG Creating new binary cache entry.
32- DEBUG Considering binary 'pmount'
33-
34+ ... log=FakeLogger(), commit_chunk=3)
35+ DEBUG Considering binaries at, cdrkit, linux-2.6.12
36+ DEBUG Committing
37+ DEBUG Considering binaries mozilla-firefox, mozilla-firefox-data, pmount
38+ DEBUG Committing
39
40 >>> print updates
41 6
42@@ -341,14 +334,13 @@
43
44 >>> source_updates = DistributionSourcePackageCache.updateAll(
45 ... ubuntu, archive=cprov.archive, ztm=transaction, log=FakeLogger())
46- DEBUG ...
47- DEBUG Considering source 'pmount'
48+ DEBUG Considering sources cdrkit, iceweasel, pmount
49 ...
50
51 >>> binary_updates = DistroSeriesPackageCache.updateAll(
52 ... warty, archive=cprov.archive, ztm=transaction,
53 ... log=FakeLogger())
54- DEBUG Considering binary 'mozilla-firefox'
55+ DEBUG Considering binaries mozilla-firefox, pmount
56 ...
57
58 >>> cprov.archive.updateArchiveCache()
59
60=== modified file 'lib/lp/soyuz/model/distributionsourcepackagecache.py'
61--- lib/lp/soyuz/model/distributionsourcepackagecache.py 2016-05-17 13:35:03 +0000
62+++ lib/lp/soyuz/model/distributionsourcepackagecache.py 2016-05-18 23:30:28 +0000
63@@ -4,7 +4,11 @@
64 __metaclass__ = type
65 __all__ = ['DistributionSourcePackageCache', ]
66
67-from operator import itemgetter
68+from collections import defaultdict
69+from operator import (
70+ attrgetter,
71+ itemgetter,
72+ )
73
74 from sqlobject import (
75 ForeignKey,
76@@ -13,18 +17,18 @@
77 from zope.interface import implementer
78
79 from lp.registry.model.sourcepackagename import SourcePackageName
80+from lp.services.database import bulk
81 from lp.services.database.decoratedresultset import DecoratedResultSet
82 from lp.services.database.interfaces import IStore
83-from lp.services.database.sqlbase import (
84- SQLBase,
85- sqlvalues,
86- )
87+from lp.services.database.sqlbase import SQLBase
88+from lp.soyuz.enums import PackagePublishingStatus
89 from lp.soyuz.interfaces.distributionsourcepackagecache import (
90 IDistributionSourcePackageCache,
91 )
92 from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
93 from lp.soyuz.model.binarypackagename import BinaryPackageName
94 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
95+from lp.soyuz.model.publishing import SourcePackagePublishingHistory
96 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
97
98
99@@ -57,6 +61,17 @@
100 self.sourcepackagename)
101
102 @classmethod
103+ def findCurrentSourcePackageNames(cls, archive):
104+ spn_ids = IStore(SourcePackagePublishingHistory).find(
105+ SourcePackagePublishingHistory.sourcepackagenameID,
106+ SourcePackagePublishingHistory.archive == archive,
107+ SourcePackagePublishingHistory.status.is_in((
108+ PackagePublishingStatus.PENDING,
109+ PackagePublishingStatus.PUBLISHED))).config(
110+ distinct=True)
111+ return bulk.load(SourcePackageName, spn_ids)
112+
113+ @classmethod
114 def _find(cls, distro, archive=None):
115 """The set of all source package info caches for this distribution.
116
117@@ -89,22 +104,11 @@
118 """
119
120 # Get the set of source package names to deal with.
121- spns = set(SourcePackageName.select("""
122- SourcePackagePublishingHistory.distroseries =
123- DistroSeries.id AND
124- DistroSeries.distribution = %s AND
125- Archive.id = %s AND
126- SourcePackagePublishingHistory.archive = Archive.id AND
127- SourcePackagePublishingHistory.sourcepackagename =
128- SourcePackageName.id AND
129- SourcePackagePublishingHistory.dateremoved is NULL AND
130- Archive.enabled = TRUE
131- """ % sqlvalues(distro, archive),
132- distinct=True,
133- clauseTables=[
134- 'Archive',
135- 'DistroSeries',
136- 'SourcePackagePublishingHistory']))
137+ if not archive.enabled:
138+ spns = set()
139+ else:
140+ spns = set(
141+ cls.findCurrentSourcePackageNames(archive))
142
143 # Remove the cache entries for packages we no longer publish.
144 for cache in cls._find(distro, archive):
145@@ -115,78 +119,101 @@
146 cache.destroySelf()
147
148 @classmethod
149- def _update(cls, distro, sourcepackagename, archive, log):
150- """Update cached source package details.
151-
152- Update cache details for a given ISourcePackageName, including
153- generated binarypackage names, summary and description fti.
154+ def _update(cls, distro, sourcepackagenames, archive, log):
155+ """Update the package cache for a given set of `ISourcePackageName`s.
156+
157+ Cached details include generated binarypackage names, summary
158+ and description fti.
159+
160 'log' is required and only prints debug level information.
161 """
162
163 # Get the set of published sourcepackage releases.
164- sprs = list(SourcePackageRelease.select("""
165- SourcePackageRelease.id =
166- SourcePackagePublishingHistory.sourcepackagerelease AND
167- SourcePackagePublishingHistory.sourcepackagename = %s AND
168- SourcePackagePublishingHistory.distroseries =
169- DistroSeries.id AND
170- DistroSeries.distribution = %s AND
171- SourcePackagePublishingHistory.archive = %s AND
172- SourcePackagePublishingHistory.dateremoved is NULL
173- """ % sqlvalues(sourcepackagename, distro, archive),
174- orderBy='id',
175- clauseTables=['SourcePackagePublishingHistory', 'DistroSeries'],
176- distinct=True))
177-
178- if len(sprs) == 0:
179+ all_sprs = list(IStore(SourcePackageRelease).find(
180+ (SourcePackageRelease.sourcepackagenameID,
181+ SourcePackageRelease.id, SourcePackageRelease.version),
182+ SourcePackageRelease.id ==
183+ SourcePackagePublishingHistory.sourcepackagereleaseID,
184+ SourcePackagePublishingHistory.sourcepackagenameID.is_in(
185+ [spn.id for spn in sourcepackagenames]),
186+ SourcePackagePublishingHistory.archive == archive,
187+ SourcePackagePublishingHistory.status.is_in((
188+ PackagePublishingStatus.PENDING,
189+ PackagePublishingStatus.PUBLISHED))
190+ ).config(distinct=True).order_by(SourcePackageRelease.id))
191+ if len(all_sprs) == 0:
192 log.debug("No sources releases found.")
193 return
194
195- # Find or create the cache entry.
196- cache = DistributionSourcePackageCache.selectOne("""
197- distribution = %s AND
198- archive = %s AND
199- sourcepackagename = %s
200- """ % sqlvalues(distro, archive, sourcepackagename))
201- if cache is None:
202- log.debug("Creating new source cache entry.")
203- cache = DistributionSourcePackageCache(
204- archive=archive,
205- distribution=distro,
206- sourcepackagename=sourcepackagename)
207-
208- # Make sure the name is correct.
209- cache.name = sourcepackagename.name
210-
211- # Get the sets of binary package names, summaries, descriptions.
212-
213- # XXX Julian 2007-04-03:
214- # This bit of code needs fixing up, it is doing stuff that
215- # really needs to be done in SQL, such as sorting and uniqueness.
216- # This would also improve the performance.
217- binpkgnames = set()
218- binpkgsummaries = set()
219- binpkgdescriptions = set()
220- for spr in sprs:
221- log.debug("Considering source version %s" % spr.version)
222- binpkgs = IStore(BinaryPackageRelease).find(
223- (BinaryPackageName.name, BinaryPackageRelease.summary,
224- BinaryPackageRelease.description),
225- BinaryPackageRelease.buildID == BinaryPackageBuild.id,
226+ spr_map = defaultdict(list)
227+ for spn_id, spr_id, spr_version in all_sprs:
228+ spn = IStore(SourcePackageName).get(SourcePackageName, spn_id)
229+ spr_map[spn].append((spr_id, spr_version))
230+
231+ all_caches = IStore(cls).find(
232+ cls, cls.distribution == distro, cls.archive == archive,
233+ cls.sourcepackagenameID.is_in(
234+ [spn.id for spn in sourcepackagenames]))
235+ cache_map = {cache.sourcepackagename: cache for cache in all_caches}
236+
237+ for spn in set(sourcepackagenames) - set(cache_map.keys()):
238+ cache_map[spn] = cls(
239+ archive=archive, distribution=distro,
240+ sourcepackagename=spn)
241+
242+ # Query BinaryPackageBuilds and their BinaryPackageReleases
243+ # separately, since the big and inconsistent intermediates can
244+ # confuse postgres into a seq scan over BPR, which never ends
245+ # well for anybody.
246+ #
247+ # Beware: the sets expand much faster than you might expect for
248+ # the primary archive; COPY archive builds are caught too, of
249+ # which there are dozens for most SPRs, and there's no easy way
250+ # to exclude them!
251+ all_builds = list(IStore(BinaryPackageBuild).find(
252+ (BinaryPackageBuild.source_package_release_id,
253+ BinaryPackageBuild.id),
254 BinaryPackageBuild.source_package_release_id.is_in(
255- [spr.id for spr in sprs]),
256- BinaryPackageName.id == BinaryPackageRelease.binarypackagenameID)
257- for name, summary, description in binpkgs:
258- binpkgnames.add(name)
259- binpkgsummaries.add(summary)
260- binpkgdescriptions.add(description)
261-
262- # Update the caches.
263- cache.binpkgnames = ' '.join(sorted(binpkgnames))
264- cache.binpkgsummaries = ' '.join(sorted(binpkgsummaries))
265- cache.binpkgdescriptions = ' '.join(sorted(binpkgdescriptions))
266- # Column due for deletion.
267- cache.changelog = None
268+ [row[1] for row in all_sprs])))
269+ all_binaries = list(IStore(BinaryPackageRelease).find(
270+ (BinaryPackageRelease.buildID,
271+ BinaryPackageRelease.binarypackagenameID,
272+ BinaryPackageRelease.summary, BinaryPackageRelease.description),
273+ BinaryPackageRelease.buildID.is_in(
274+ [row[1] for row in all_builds])))
275+ sprs_by_build = {build_id: spr_id for spr_id, build_id in all_builds}
276+
277+ bulk.load(BinaryPackageName, [row[1] for row in all_binaries])
278+ binaries_by_spr = defaultdict(list)
279+ for bpb_id, bpn_id, summary, description in all_binaries:
280+ spr_id = sprs_by_build[bpb_id]
281+ binaries_by_spr[spr_id].append((
282+ IStore(BinaryPackageName).get(BinaryPackageName, bpn_id),
283+ summary, description))
284+
285+ for spn in sourcepackagenames:
286+ cache = cache_map[spn]
287+ cache.name = spn.name
288+
289+ sprs = spr_map.get(spn, [])
290+
291+ binpkgnames = set()
292+ binpkgsummaries = set()
293+ binpkgdescriptions = set()
294+ for spr_id, spr_version in sprs:
295+ log.debug("Considering source %s %s", spn.name, spr_version)
296+ binpkgs = binaries_by_spr.get(spr_id, [])
297+ for bpn, summary, description in binpkgs:
298+ binpkgnames.add(bpn.name)
299+ binpkgsummaries.add(summary)
300+ binpkgdescriptions.add(description)
301+
302+ # Update the caches.
303+ cache.binpkgnames = ' '.join(sorted(binpkgnames))
304+ cache.binpkgsummaries = ' '.join(sorted(binpkgsummaries))
305+ cache.binpkgdescriptions = ' '.join(sorted(binpkgdescriptions))
306+ # Column due for deletion.
307+ cache.changelog = None
308
309 @classmethod
310 def updateAll(cls, distro, archive, log, ztm, commit_chunk=500):
311@@ -208,29 +235,28 @@
312 return
313
314 # Get the set of source package names to deal with.
315- spns = list(SourcePackageName.select("""
316- SourcePackagePublishingHistory.distroseries =
317- DistroSeries.id AND
318- DistroSeries.distribution = %s AND
319- SourcePackagePublishingHistory.archive = %s AND
320- SourcePackagePublishingHistory.sourcepackagename =
321- SourcePackageName.id AND
322- SourcePackagePublishingHistory.dateremoved is NULL
323- """ % sqlvalues(distro, archive),
324- distinct=True,
325- orderBy="name",
326- clauseTables=['SourcePackagePublishingHistory', 'DistroSeries']))
327+ spns = list(sorted(
328+ cls.findCurrentSourcePackageNames(archive),
329+ key=attrgetter('name')))
330
331 number_of_updates = 0
332- chunk_size = 0
333+ chunks = []
334+ chunk = []
335 for spn in spns:
336- log.debug("Considering source '%s'" % spn.name)
337- cls._update(distro, spn, archive, log)
338- chunk_size += 1
339- number_of_updates += 1
340- if chunk_size == commit_chunk:
341- chunk_size = 0
342- log.debug("Committing")
343- ztm.commit()
344+ chunk.append(spn)
345+ if len(chunk) == commit_chunk:
346+ chunks.append(chunk)
347+ chunk = []
348+ if chunk:
349+ chunks.append(chunk)
350+ for chunk in chunks:
351+ bulk.load(SourcePackageName, [spn.id for spn in chunk])
352+ log.debug(
353+ "Considering sources %s",
354+ ', '.join([spn.name for spn in chunk]))
355+ cls._update(distro, chunk, archive, log)
356+ number_of_updates += len(chunk)
357+ log.debug("Committing")
358+ ztm.commit()
359
360 return number_of_updates
361
362=== modified file 'lib/lp/soyuz/model/distroseriespackagecache.py'
363--- lib/lp/soyuz/model/distroseriespackagecache.py 2015-07-08 16:05:11 +0000
364+++ lib/lp/soyuz/model/distroseriespackagecache.py 2016-05-18 23:30:28 +0000
365@@ -1,4 +1,4 @@
366-# Copyright 2009 Canonical Ltd. This software is licensed under the
367+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
368 # GNU Affero General Public License version 3 (see the file LICENSE).
369
370 __metaclass__ = type
371@@ -6,22 +6,25 @@
372 'DistroSeriesPackageCache',
373 ]
374
375+from collections import defaultdict
376+from operator import attrgetter
377+
378 from sqlobject import (
379 ForeignKey,
380 StringCol,
381 )
382-from storm.locals import (
383+from storm.expr import (
384 Desc,
385 Max,
386- RawStr,
387+ Select,
388 )
389+from storm.locals import RawStr
390 from zope.interface import implementer
391
392+from lp.services.database import bulk
393 from lp.services.database.interfaces import IStore
394-from lp.services.database.sqlbase import (
395- SQLBase,
396- sqlvalues,
397- )
398+from lp.services.database.sqlbase import SQLBase
399+from lp.soyuz.enums import PackagePublishingStatus
400 from lp.soyuz.interfaces.distroseriespackagecache import (
401 IDistroSeriesPackageCache,
402 )
403@@ -50,6 +53,21 @@
404 descriptions = StringCol(notNull=False, default=None)
405
406 @classmethod
407+ def findCurrentBinaryPackageNames(cls, archive, distroseries):
408+ bpn_ids = IStore(BinaryPackagePublishingHistory).find(
409+ BinaryPackagePublishingHistory.binarypackagenameID,
410+ BinaryPackagePublishingHistory.distroarchseriesID.is_in(
411+ Select(
412+ DistroArchSeries.id, tables=[DistroArchSeries],
413+ where=DistroArchSeries.distroseries == distroseries)),
414+ BinaryPackagePublishingHistory.archive == archive,
415+ BinaryPackagePublishingHistory.status.is_in((
416+ PackagePublishingStatus.PENDING,
417+ PackagePublishingStatus.PUBLISHED))).config(
418+ distinct=True)
419+ return bulk.load(BinaryPackageName, bpn_ids)
420+
421+ @classmethod
422 def _find(cls, distroseries, archive=None):
423 """All of the cached binary package records for this distroseries.
424
425@@ -79,25 +97,11 @@
426 messages.
427 """
428 # get the set of package names that should be there
429- bpns = set(BinaryPackageName.select("""
430- BinaryPackagePublishingHistory.distroarchseries =
431- DistroArchSeries.id AND
432- DistroArchSeries.distroseries = %s AND
433- Archive.id = %s AND
434- BinaryPackagePublishingHistory.archive = Archive.id AND
435- BinaryPackagePublishingHistory.binarypackagerelease =
436- BinaryPackageRelease.id AND
437- BinaryPackagePublishingHistory.binarypackagename =
438- BinaryPackageName.id AND
439- BinaryPackagePublishingHistory.dateremoved is NULL AND
440- Archive.enabled = TRUE
441- """ % sqlvalues(distroseries.id, archive.id),
442- distinct=True,
443- clauseTables=[
444- 'Archive',
445- 'DistroArchSeries',
446- 'BinaryPackagePublishingHistory',
447- 'BinaryPackageRelease']))
448+ if not archive.enabled:
449+ bpns = set()
450+ else:
451+ bpns = set(
452+ cls.findCurrentBinaryPackageNames(archive, distroseries))
453
454 # remove the cache entries for binary packages we no longer want
455 for cache in cls._find(distroseries, archive):
456@@ -108,8 +112,8 @@
457 cache.destroySelf()
458
459 @classmethod
460- def _update(cls, distroseries, binarypackagename, archive, log):
461- """Update the package cache for a given IBinaryPackageName
462+ def _update(cls, distroseries, binarypackagenames, archive, log):
463+ """Update the package cache for a given set of `IBinaryPackageName`s.
464
465 'log' is required, it should be a logger object able to print
466 DEBUG level messages.
467@@ -117,58 +121,69 @@
468 (in full batches of 100 elements)
469 """
470 # get the set of published binarypackagereleases
471- details = list(IStore(BinaryPackageRelease).find(
472- (BinaryPackageRelease.summary, BinaryPackageRelease.description,
473+ all_details = list(IStore(BinaryPackageRelease).find(
474+ (BinaryPackageRelease.binarypackagenameID,
475+ BinaryPackageRelease.summary, BinaryPackageRelease.description,
476 Max(BinaryPackageRelease.datecreated)),
477 BinaryPackageRelease.id ==
478 BinaryPackagePublishingHistory.binarypackagereleaseID,
479- BinaryPackagePublishingHistory.binarypackagename ==
480- binarypackagename,
481- BinaryPackagePublishingHistory.distroarchseriesID ==
482- DistroArchSeries.id,
483- DistroArchSeries.distroseries == distroseries,
484+ BinaryPackagePublishingHistory.binarypackagenameID.is_in(
485+ [bpn.id for bpn in binarypackagenames]),
486+ BinaryPackagePublishingHistory.distroarchseriesID.is_in(
487+ Select(
488+ DistroArchSeries.id, tables=[DistroArchSeries],
489+ where=DistroArchSeries.distroseries == distroseries)),
490 BinaryPackagePublishingHistory.archive == archive,
491- BinaryPackagePublishingHistory.dateremoved == None
492+ BinaryPackagePublishingHistory.status.is_in((
493+ PackagePublishingStatus.PENDING,
494+ PackagePublishingStatus.PUBLISHED))
495 ).group_by(
496+ BinaryPackageRelease.binarypackagenameID,
497 BinaryPackageRelease.summary,
498 BinaryPackageRelease.description
499 ).order_by(
500+ BinaryPackageRelease.binarypackagenameID,
501 Desc(Max(BinaryPackageRelease.datecreated))))
502-
503- if not details:
504+ if not all_details:
505 log.debug("No binary releases found.")
506 return
507
508- # find or create the cache entry
509- cache = cls.selectOne("""
510- distroseries = %s AND
511- archive = %s AND
512- binarypackagename = %s
513- """ % sqlvalues(distroseries, archive, binarypackagename))
514- if cache is None:
515- log.debug("Creating new binary cache entry.")
516- cache = cls(
517- archive=archive,
518- distroseries=distroseries,
519- binarypackagename=binarypackagename)
520-
521- # make sure the cached name, summary and description are correct
522- cache.name = binarypackagename.name
523- cache.summary = details[0][0]
524- cache.description = details[0][1]
525-
526- # get the sets of binary package summaries, descriptions. there is
527- # likely only one, but just in case...
528-
529- summaries = set()
530- descriptions = set()
531- for summary, description, datecreated in details:
532- summaries.add(summary)
533- descriptions.add(description)
534-
535- # and update the caches
536- cache.summaries = ' '.join(sorted(summaries))
537- cache.descriptions = ' '.join(sorted(descriptions))
538+ details_map = defaultdict(list)
539+ for (bpn_id, summary, description, datecreated) in all_details:
540+ bpn = IStore(BinaryPackageName).get(BinaryPackageName, bpn_id)
541+ details_map[bpn].append((summary, description))
542+
543+ all_caches = IStore(cls).find(
544+ cls, cls.distroseries == distroseries, cls.archive == archive,
545+ cls.binarypackagenameID.is_in(
546+ [bpn.id for bpn in binarypackagenames]))
547+ cache_map = {cache.binarypackagename: cache for cache in all_caches}
548+
549+ for bpn in set(binarypackagenames) - set(cache_map):
550+ cache_map[bpn] = cls(
551+ archive=archive, distroseries=distroseries,
552+ binarypackagename=bpn)
553+
554+ for bpn in binarypackagenames:
555+ cache = cache_map[bpn]
556+ details = details_map[bpn]
557+ # make sure the cached name, summary and description are correct
558+ cache.name = bpn.name
559+ cache.summary = details[0][0]
560+ cache.description = details[0][1]
561+
562+ # get the sets of binary package summaries, descriptions. there is
563+ # likely only one, but just in case...
564+
565+ summaries = set()
566+ descriptions = set()
567+ for summary, description in details:
568+ summaries.add(summary)
569+ descriptions.add(description)
570+
571+ # and update the caches
572+ cache.summaries = ' '.join(sorted(summaries))
573+ cache.descriptions = ' '.join(sorted(descriptions))
574
575 @classmethod
576 def updateAll(cls, distroseries, archive, log, ztm, commit_chunk=500):
577@@ -190,27 +205,28 @@
578 return
579
580 # Get the set of package names to deal with.
581- bpns = IStore(BinaryPackageName).find(
582- BinaryPackageName,
583- DistroArchSeries.distroseries == distroseries,
584- BinaryPackagePublishingHistory.distroarchseriesID ==
585- DistroArchSeries.id,
586- BinaryPackagePublishingHistory.archive == archive,
587- BinaryPackagePublishingHistory.binarypackagename ==
588- BinaryPackageName.id,
589- BinaryPackagePublishingHistory.dateremoved == None).config(
590- distinct=True).order_by(BinaryPackageName.name)
591+ bpns = list(sorted(
592+ cls.findCurrentBinaryPackageNames(archive, distroseries),
593+ key=attrgetter('name')))
594
595 number_of_updates = 0
596- chunk_size = 0
597+ chunks = []
598+ chunk = []
599 for bpn in bpns:
600- log.debug("Considering binary '%s'" % bpn.name)
601- cls._update(distroseries, bpn, archive, log)
602- number_of_updates += 1
603- chunk_size += 1
604- if chunk_size == commit_chunk:
605- chunk_size = 0
606- log.debug("Committing")
607- ztm.commit()
608+ chunk.append(bpn)
609+ if len(chunk) == commit_chunk:
610+ chunks.append(chunk)
611+ chunk = []
612+ if chunk:
613+ chunks.append(chunk)
614+ for chunk in chunks:
615+ bulk.load(BinaryPackageName, [bpn.id for bpn in chunk])
616+ log.debug(
617+ "Considering binaries %s",
618+ ', '.join([bpn.name for bpn in chunk]))
619+ cls._update(distroseries, chunk, archive, log)
620+ number_of_updates += len(chunk)
621+ log.debug("Committing")
622+ ztm.commit()
623
624 return number_of_updates
625
626=== modified file 'lib/lp/soyuz/tests/test_distroseriesbinarypackage.py'
627--- lib/lp/soyuz/tests/test_distroseriesbinarypackage.py 2012-01-20 15:42:44 +0000
628+++ lib/lp/soyuz/tests/test_distroseriesbinarypackage.py 2016-05-18 23:30:28 +0000
629@@ -1,4 +1,4 @@
630-# Copyright 2010 Canonical Ltd. This software is licensed under the
631+# Copyright 2010-2016 Canonical Ltd. This software is licensed under the
632 # GNU Affero General Public License version 3 (see the file LICENSE).
633
634 """Tests for `DistroSeriesBinaryPackage`."""
635@@ -61,12 +61,12 @@
636 logger = BufferLogger()
637 with dbuser(config.statistician.dbuser):
638 DistroSeriesPackageCache._update(
639- self.distroseries, self.binary_package_name, distro_archive_1,
640- logger)
641+ self.distroseries, [self.binary_package_name],
642+ distro_archive_1, logger)
643
644 DistroSeriesPackageCache._update(
645- self.distroseries, self.binary_package_name, distro_archive_2,
646- logger)
647+ self.distroseries, [self.binary_package_name],
648+ distro_archive_2, logger)
649
650 self.failUnlessEqual(
651 'Foo is the best', self.distroseries_binary_package.summary)