Merge lp:~wgrant/launchpad/update-pkgcache-weight-loss into lp:launchpad
- update-pkgcache-weight-loss
- Merge into devel
Status: | Merged |
---|---|
Merged at revision: | 18047 |
Proposed branch: | lp:~wgrant/launchpad/update-pkgcache-weight-loss |
Merge into: | lp:launchpad |
Diff against target: |
651 lines (+250/-216) 4 files modified
lib/lp/soyuz/doc/package-cache.txt (+10/-18) lib/lp/soyuz/model/distributionsourcepackagecache.py (+132/-106) lib/lp/soyuz/model/distroseriespackagecache.py (+103/-87) lib/lp/soyuz/tests/test_distroseriesbinarypackage.py (+5/-5) |
To merge this branch: | bzr merge lp:~wgrant/launchpad/update-pkgcache-weight-loss |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Colin Watson (community) | Approve | ||
Review via email: mp+295019@code.launchpad.net |
Commit message
Rewrite and optimise the guts of update-pkgcache's DSPC and DSPC updaters.
Description of the change
Rewrite and optimise the guts of the DSPC and DSPC updaters.
Name selection is replaced with a quick index scan, and detail collection is bulked up bro. These rows should never be updated by anything other than update-pkgcache, so I haven't thrown in a DBLoopTuner just yet, but a later adaptation would be simple.
The behaviour is subtly changed, but it's probably more correct now: packages are now only included in the caches if their status is Pending or Published, rather than whenever their dateremoved is unset. The old dateremoved filter was confusing, slow, and only made sense before ArchiveRemovalR
The no-op case is now a good bit faster, slowed mostly by the huge number of test rebuilds clogging up the SPR -> BPB -> BPR queries. Each cache row modification still requires a separate query, but the vast majority of rows remain untouched in the vast majority of runs, so bulkifying changes wasn't a priority.
Colin Watson (cjwatson) : | # |
William Grant (wgrant) : | # |
Preview Diff
1 | === modified file 'lib/lp/soyuz/doc/package-cache.txt' |
2 | --- lib/lp/soyuz/doc/package-cache.txt 2014-07-09 15:11:03 +0000 |
3 | +++ lib/lp/soyuz/doc/package-cache.txt 2016-05-18 23:30:28 +0000 |
4 | @@ -162,12 +162,10 @@ |
5 | |
6 | >>> updates = DistributionSourcePackageCache.updateAll( |
7 | ... ubuntu, archive=ubuntu.main_archive, ztm=transaction, |
8 | - ... log=FakeLogger()) |
9 | - DEBUG ... |
10 | - DEBUG Considering source 'cdrkit' |
11 | - DEBUG Creating new source cache entry. |
12 | + ... log=FakeLogger(), commit_chunk=3) |
13 | + DEBUG Considering sources alsa-utils, cdrkit, cnews |
14 | ... |
15 | - DEBUG Considering source 'mozilla-firefox' |
16 | + DEBUG Considering sources linux-source-2.6.15, mozilla-firefox, netapplet |
17 | ... |
18 | |
19 | >>> print updates |
20 | @@ -246,16 +244,11 @@ |
21 | |
22 | >>> updates = DistroSeriesPackageCache.updateAll( |
23 | ... warty, archive=ubuntu.main_archive, ztm=transaction, |
24 | - ... log=FakeLogger()) |
25 | - DEBUG Considering binary 'at' |
26 | - DEBUG Considering binary 'cdrkit' |
27 | - DEBUG Creating new binary cache entry. |
28 | - DEBUG Considering binary 'linux-2.6.12' |
29 | - DEBUG Considering binary 'mozilla-firefox' |
30 | - DEBUG Considering binary 'mozilla-firefox-data' |
31 | - DEBUG Creating new binary cache entry. |
32 | - DEBUG Considering binary 'pmount' |
33 | - |
34 | + ... log=FakeLogger(), commit_chunk=3) |
35 | + DEBUG Considering binaries at, cdrkit, linux-2.6.12 |
36 | + DEBUG Committing |
37 | + DEBUG Considering binaries mozilla-firefox, mozilla-firefox-data, pmount |
38 | + DEBUG Committing |
39 | |
40 | >>> print updates |
41 | 6 |
42 | @@ -341,14 +334,13 @@ |
43 | |
44 | >>> source_updates = DistributionSourcePackageCache.updateAll( |
45 | ... ubuntu, archive=cprov.archive, ztm=transaction, log=FakeLogger()) |
46 | - DEBUG ... |
47 | - DEBUG Considering source 'pmount' |
48 | + DEBUG Considering sources cdrkit, iceweasel, pmount |
49 | ... |
50 | |
51 | >>> binary_updates = DistroSeriesPackageCache.updateAll( |
52 | ... warty, archive=cprov.archive, ztm=transaction, |
53 | ... log=FakeLogger()) |
54 | - DEBUG Considering binary 'mozilla-firefox' |
55 | + DEBUG Considering binaries mozilla-firefox, pmount |
56 | ... |
57 | |
58 | >>> cprov.archive.updateArchiveCache() |
59 | |
60 | === modified file 'lib/lp/soyuz/model/distributionsourcepackagecache.py' |
61 | --- lib/lp/soyuz/model/distributionsourcepackagecache.py 2016-05-17 13:35:03 +0000 |
62 | +++ lib/lp/soyuz/model/distributionsourcepackagecache.py 2016-05-18 23:30:28 +0000 |
63 | @@ -4,7 +4,11 @@ |
64 | __metaclass__ = type |
65 | __all__ = ['DistributionSourcePackageCache', ] |
66 | |
67 | -from operator import itemgetter |
68 | +from collections import defaultdict |
69 | +from operator import ( |
70 | + attrgetter, |
71 | + itemgetter, |
72 | + ) |
73 | |
74 | from sqlobject import ( |
75 | ForeignKey, |
76 | @@ -13,18 +17,18 @@ |
77 | from zope.interface import implementer |
78 | |
79 | from lp.registry.model.sourcepackagename import SourcePackageName |
80 | +from lp.services.database import bulk |
81 | from lp.services.database.decoratedresultset import DecoratedResultSet |
82 | from lp.services.database.interfaces import IStore |
83 | -from lp.services.database.sqlbase import ( |
84 | - SQLBase, |
85 | - sqlvalues, |
86 | - ) |
87 | +from lp.services.database.sqlbase import SQLBase |
88 | +from lp.soyuz.enums import PackagePublishingStatus |
89 | from lp.soyuz.interfaces.distributionsourcepackagecache import ( |
90 | IDistributionSourcePackageCache, |
91 | ) |
92 | from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild |
93 | from lp.soyuz.model.binarypackagename import BinaryPackageName |
94 | from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease |
95 | +from lp.soyuz.model.publishing import SourcePackagePublishingHistory |
96 | from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease |
97 | |
98 | |
99 | @@ -57,6 +61,17 @@ |
100 | self.sourcepackagename) |
101 | |
102 | @classmethod |
103 | + def findCurrentSourcePackageNames(cls, archive): |
104 | + spn_ids = IStore(SourcePackagePublishingHistory).find( |
105 | + SourcePackagePublishingHistory.sourcepackagenameID, |
106 | + SourcePackagePublishingHistory.archive == archive, |
107 | + SourcePackagePublishingHistory.status.is_in(( |
108 | + PackagePublishingStatus.PENDING, |
109 | + PackagePublishingStatus.PUBLISHED))).config( |
110 | + distinct=True) |
111 | + return bulk.load(SourcePackageName, spn_ids) |
112 | + |
113 | + @classmethod |
114 | def _find(cls, distro, archive=None): |
115 | """The set of all source package info caches for this distribution. |
116 | |
117 | @@ -89,22 +104,11 @@ |
118 | """ |
119 | |
120 | # Get the set of source package names to deal with. |
121 | - spns = set(SourcePackageName.select(""" |
122 | - SourcePackagePublishingHistory.distroseries = |
123 | - DistroSeries.id AND |
124 | - DistroSeries.distribution = %s AND |
125 | - Archive.id = %s AND |
126 | - SourcePackagePublishingHistory.archive = Archive.id AND |
127 | - SourcePackagePublishingHistory.sourcepackagename = |
128 | - SourcePackageName.id AND |
129 | - SourcePackagePublishingHistory.dateremoved is NULL AND |
130 | - Archive.enabled = TRUE |
131 | - """ % sqlvalues(distro, archive), |
132 | - distinct=True, |
133 | - clauseTables=[ |
134 | - 'Archive', |
135 | - 'DistroSeries', |
136 | - 'SourcePackagePublishingHistory'])) |
137 | + if not archive.enabled: |
138 | + spns = set() |
139 | + else: |
140 | + spns = set( |
141 | + cls.findCurrentSourcePackageNames(archive)) |
142 | |
143 | # Remove the cache entries for packages we no longer publish. |
144 | for cache in cls._find(distro, archive): |
145 | @@ -115,78 +119,101 @@ |
146 | cache.destroySelf() |
147 | |
148 | @classmethod |
149 | - def _update(cls, distro, sourcepackagename, archive, log): |
150 | - """Update cached source package details. |
151 | - |
152 | - Update cache details for a given ISourcePackageName, including |
153 | - generated binarypackage names, summary and description fti. |
154 | + def _update(cls, distro, sourcepackagenames, archive, log): |
155 | + """Update the package cache for a given set of `ISourcePackageName`s. |
156 | + |
157 | + Cached details include generated binarypackage names, summary |
158 | + and description fti. |
159 | + |
160 | 'log' is required and only prints debug level information. |
161 | """ |
162 | |
163 | # Get the set of published sourcepackage releases. |
164 | - sprs = list(SourcePackageRelease.select(""" |
165 | - SourcePackageRelease.id = |
166 | - SourcePackagePublishingHistory.sourcepackagerelease AND |
167 | - SourcePackagePublishingHistory.sourcepackagename = %s AND |
168 | - SourcePackagePublishingHistory.distroseries = |
169 | - DistroSeries.id AND |
170 | - DistroSeries.distribution = %s AND |
171 | - SourcePackagePublishingHistory.archive = %s AND |
172 | - SourcePackagePublishingHistory.dateremoved is NULL |
173 | - """ % sqlvalues(sourcepackagename, distro, archive), |
174 | - orderBy='id', |
175 | - clauseTables=['SourcePackagePublishingHistory', 'DistroSeries'], |
176 | - distinct=True)) |
177 | - |
178 | - if len(sprs) == 0: |
179 | + all_sprs = list(IStore(SourcePackageRelease).find( |
180 | + (SourcePackageRelease.sourcepackagenameID, |
181 | + SourcePackageRelease.id, SourcePackageRelease.version), |
182 | + SourcePackageRelease.id == |
183 | + SourcePackagePublishingHistory.sourcepackagereleaseID, |
184 | + SourcePackagePublishingHistory.sourcepackagenameID.is_in( |
185 | + [spn.id for spn in sourcepackagenames]), |
186 | + SourcePackagePublishingHistory.archive == archive, |
187 | + SourcePackagePublishingHistory.status.is_in(( |
188 | + PackagePublishingStatus.PENDING, |
189 | + PackagePublishingStatus.PUBLISHED)) |
190 | + ).config(distinct=True).order_by(SourcePackageRelease.id)) |
191 | + if len(all_sprs) == 0: |
192 | log.debug("No sources releases found.") |
193 | return |
194 | |
195 | - # Find or create the cache entry. |
196 | - cache = DistributionSourcePackageCache.selectOne(""" |
197 | - distribution = %s AND |
198 | - archive = %s AND |
199 | - sourcepackagename = %s |
200 | - """ % sqlvalues(distro, archive, sourcepackagename)) |
201 | - if cache is None: |
202 | - log.debug("Creating new source cache entry.") |
203 | - cache = DistributionSourcePackageCache( |
204 | - archive=archive, |
205 | - distribution=distro, |
206 | - sourcepackagename=sourcepackagename) |
207 | - |
208 | - # Make sure the name is correct. |
209 | - cache.name = sourcepackagename.name |
210 | - |
211 | - # Get the sets of binary package names, summaries, descriptions. |
212 | - |
213 | - # XXX Julian 2007-04-03: |
214 | - # This bit of code needs fixing up, it is doing stuff that |
215 | - # really needs to be done in SQL, such as sorting and uniqueness. |
216 | - # This would also improve the performance. |
217 | - binpkgnames = set() |
218 | - binpkgsummaries = set() |
219 | - binpkgdescriptions = set() |
220 | - for spr in sprs: |
221 | - log.debug("Considering source version %s" % spr.version) |
222 | - binpkgs = IStore(BinaryPackageRelease).find( |
223 | - (BinaryPackageName.name, BinaryPackageRelease.summary, |
224 | - BinaryPackageRelease.description), |
225 | - BinaryPackageRelease.buildID == BinaryPackageBuild.id, |
226 | + spr_map = defaultdict(list) |
227 | + for spn_id, spr_id, spr_version in all_sprs: |
228 | + spn = IStore(SourcePackageName).get(SourcePackageName, spn_id) |
229 | + spr_map[spn].append((spr_id, spr_version)) |
230 | + |
231 | + all_caches = IStore(cls).find( |
232 | + cls, cls.distribution == distro, cls.archive == archive, |
233 | + cls.sourcepackagenameID.is_in( |
234 | + [spn.id for spn in sourcepackagenames])) |
235 | + cache_map = {cache.sourcepackagename: cache for cache in all_caches} |
236 | + |
237 | + for spn in set(sourcepackagenames) - set(cache_map.keys()): |
238 | + cache_map[spn] = cls( |
239 | + archive=archive, distribution=distro, |
240 | + sourcepackagename=spn) |
241 | + |
242 | + # Query BinaryPackageBuilds and their BinaryPackageReleases |
243 | + # separately, since the big and inconsistent intermediates can |
244 | + # confuse postgres into a seq scan over BPR, which never ends |
245 | + # well for anybody. |
246 | + # |
247 | + # Beware: the sets expand much faster than you might expect for |
248 | + # the primary archive; COPY archive builds are caught too, of |
249 | + # which there are dozens for most SPRs, and there's no easy way |
250 | + # to exclude them! |
251 | + all_builds = list(IStore(BinaryPackageBuild).find( |
252 | + (BinaryPackageBuild.source_package_release_id, |
253 | + BinaryPackageBuild.id), |
254 | BinaryPackageBuild.source_package_release_id.is_in( |
255 | - [spr.id for spr in sprs]), |
256 | - BinaryPackageName.id == BinaryPackageRelease.binarypackagenameID) |
257 | - for name, summary, description in binpkgs: |
258 | - binpkgnames.add(name) |
259 | - binpkgsummaries.add(summary) |
260 | - binpkgdescriptions.add(description) |
261 | - |
262 | - # Update the caches. |
263 | - cache.binpkgnames = ' '.join(sorted(binpkgnames)) |
264 | - cache.binpkgsummaries = ' '.join(sorted(binpkgsummaries)) |
265 | - cache.binpkgdescriptions = ' '.join(sorted(binpkgdescriptions)) |
266 | - # Column due for deletion. |
267 | - cache.changelog = None |
268 | + [row[1] for row in all_sprs]))) |
269 | + all_binaries = list(IStore(BinaryPackageRelease).find( |
270 | + (BinaryPackageRelease.buildID, |
271 | + BinaryPackageRelease.binarypackagenameID, |
272 | + BinaryPackageRelease.summary, BinaryPackageRelease.description), |
273 | + BinaryPackageRelease.buildID.is_in( |
274 | + [row[1] for row in all_builds]))) |
275 | + sprs_by_build = {build_id: spr_id for spr_id, build_id in all_builds} |
276 | + |
277 | + bulk.load(BinaryPackageName, [row[1] for row in all_binaries]) |
278 | + binaries_by_spr = defaultdict(list) |
279 | + for bpb_id, bpn_id, summary, description in all_binaries: |
280 | + spr_id = sprs_by_build[bpb_id] |
281 | + binaries_by_spr[spr_id].append(( |
282 | + IStore(BinaryPackageName).get(BinaryPackageName, bpn_id), |
283 | + summary, description)) |
284 | + |
285 | + for spn in sourcepackagenames: |
286 | + cache = cache_map[spn] |
287 | + cache.name = spn.name |
288 | + |
289 | + sprs = spr_map.get(spn, []) |
290 | + |
291 | + binpkgnames = set() |
292 | + binpkgsummaries = set() |
293 | + binpkgdescriptions = set() |
294 | + for spr_id, spr_version in sprs: |
295 | + log.debug("Considering source %s %s", spn.name, spr_version) |
296 | + binpkgs = binaries_by_spr.get(spr_id, []) |
297 | + for bpn, summary, description in binpkgs: |
298 | + binpkgnames.add(bpn.name) |
299 | + binpkgsummaries.add(summary) |
300 | + binpkgdescriptions.add(description) |
301 | + |
302 | + # Update the caches. |
303 | + cache.binpkgnames = ' '.join(sorted(binpkgnames)) |
304 | + cache.binpkgsummaries = ' '.join(sorted(binpkgsummaries)) |
305 | + cache.binpkgdescriptions = ' '.join(sorted(binpkgdescriptions)) |
306 | + # Column due for deletion. |
307 | + cache.changelog = None |
308 | |
309 | @classmethod |
310 | def updateAll(cls, distro, archive, log, ztm, commit_chunk=500): |
311 | @@ -208,29 +235,28 @@ |
312 | return |
313 | |
314 | # Get the set of source package names to deal with. |
315 | - spns = list(SourcePackageName.select(""" |
316 | - SourcePackagePublishingHistory.distroseries = |
317 | - DistroSeries.id AND |
318 | - DistroSeries.distribution = %s AND |
319 | - SourcePackagePublishingHistory.archive = %s AND |
320 | - SourcePackagePublishingHistory.sourcepackagename = |
321 | - SourcePackageName.id AND |
322 | - SourcePackagePublishingHistory.dateremoved is NULL |
323 | - """ % sqlvalues(distro, archive), |
324 | - distinct=True, |
325 | - orderBy="name", |
326 | - clauseTables=['SourcePackagePublishingHistory', 'DistroSeries'])) |
327 | + spns = list(sorted( |
328 | + cls.findCurrentSourcePackageNames(archive), |
329 | + key=attrgetter('name'))) |
330 | |
331 | number_of_updates = 0 |
332 | - chunk_size = 0 |
333 | + chunks = [] |
334 | + chunk = [] |
335 | for spn in spns: |
336 | - log.debug("Considering source '%s'" % spn.name) |
337 | - cls._update(distro, spn, archive, log) |
338 | - chunk_size += 1 |
339 | - number_of_updates += 1 |
340 | - if chunk_size == commit_chunk: |
341 | - chunk_size = 0 |
342 | - log.debug("Committing") |
343 | - ztm.commit() |
344 | + chunk.append(spn) |
345 | + if len(chunk) == commit_chunk: |
346 | + chunks.append(chunk) |
347 | + chunk = [] |
348 | + if chunk: |
349 | + chunks.append(chunk) |
350 | + for chunk in chunks: |
351 | + bulk.load(SourcePackageName, [spn.id for spn in chunk]) |
352 | + log.debug( |
353 | + "Considering sources %s", |
354 | + ', '.join([spn.name for spn in chunk])) |
355 | + cls._update(distro, chunk, archive, log) |
356 | + number_of_updates += len(chunk) |
357 | + log.debug("Committing") |
358 | + ztm.commit() |
359 | |
360 | return number_of_updates |
361 | |
362 | === modified file 'lib/lp/soyuz/model/distroseriespackagecache.py' |
363 | --- lib/lp/soyuz/model/distroseriespackagecache.py 2015-07-08 16:05:11 +0000 |
364 | +++ lib/lp/soyuz/model/distroseriespackagecache.py 2016-05-18 23:30:28 +0000 |
365 | @@ -1,4 +1,4 @@ |
366 | -# Copyright 2009 Canonical Ltd. This software is licensed under the |
367 | +# Copyright 2009-2016 Canonical Ltd. This software is licensed under the |
368 | # GNU Affero General Public License version 3 (see the file LICENSE). |
369 | |
370 | __metaclass__ = type |
371 | @@ -6,22 +6,25 @@ |
372 | 'DistroSeriesPackageCache', |
373 | ] |
374 | |
375 | +from collections import defaultdict |
376 | +from operator import attrgetter |
377 | + |
378 | from sqlobject import ( |
379 | ForeignKey, |
380 | StringCol, |
381 | ) |
382 | -from storm.locals import ( |
383 | +from storm.expr import ( |
384 | Desc, |
385 | Max, |
386 | - RawStr, |
387 | + Select, |
388 | ) |
389 | +from storm.locals import RawStr |
390 | from zope.interface import implementer |
391 | |
392 | +from lp.services.database import bulk |
393 | from lp.services.database.interfaces import IStore |
394 | -from lp.services.database.sqlbase import ( |
395 | - SQLBase, |
396 | - sqlvalues, |
397 | - ) |
398 | +from lp.services.database.sqlbase import SQLBase |
399 | +from lp.soyuz.enums import PackagePublishingStatus |
400 | from lp.soyuz.interfaces.distroseriespackagecache import ( |
401 | IDistroSeriesPackageCache, |
402 | ) |
403 | @@ -50,6 +53,21 @@ |
404 | descriptions = StringCol(notNull=False, default=None) |
405 | |
406 | @classmethod |
407 | + def findCurrentBinaryPackageNames(cls, archive, distroseries): |
408 | + bpn_ids = IStore(BinaryPackagePublishingHistory).find( |
409 | + BinaryPackagePublishingHistory.binarypackagenameID, |
410 | + BinaryPackagePublishingHistory.distroarchseriesID.is_in( |
411 | + Select( |
412 | + DistroArchSeries.id, tables=[DistroArchSeries], |
413 | + where=DistroArchSeries.distroseries == distroseries)), |
414 | + BinaryPackagePublishingHistory.archive == archive, |
415 | + BinaryPackagePublishingHistory.status.is_in(( |
416 | + PackagePublishingStatus.PENDING, |
417 | + PackagePublishingStatus.PUBLISHED))).config( |
418 | + distinct=True) |
419 | + return bulk.load(BinaryPackageName, bpn_ids) |
420 | + |
421 | + @classmethod |
422 | def _find(cls, distroseries, archive=None): |
423 | """All of the cached binary package records for this distroseries. |
424 | |
425 | @@ -79,25 +97,11 @@ |
426 | messages. |
427 | """ |
428 | # get the set of package names that should be there |
429 | - bpns = set(BinaryPackageName.select(""" |
430 | - BinaryPackagePublishingHistory.distroarchseries = |
431 | - DistroArchSeries.id AND |
432 | - DistroArchSeries.distroseries = %s AND |
433 | - Archive.id = %s AND |
434 | - BinaryPackagePublishingHistory.archive = Archive.id AND |
435 | - BinaryPackagePublishingHistory.binarypackagerelease = |
436 | - BinaryPackageRelease.id AND |
437 | - BinaryPackagePublishingHistory.binarypackagename = |
438 | - BinaryPackageName.id AND |
439 | - BinaryPackagePublishingHistory.dateremoved is NULL AND |
440 | - Archive.enabled = TRUE |
441 | - """ % sqlvalues(distroseries.id, archive.id), |
442 | - distinct=True, |
443 | - clauseTables=[ |
444 | - 'Archive', |
445 | - 'DistroArchSeries', |
446 | - 'BinaryPackagePublishingHistory', |
447 | - 'BinaryPackageRelease'])) |
448 | + if not archive.enabled: |
449 | + bpns = set() |
450 | + else: |
451 | + bpns = set( |
452 | + cls.findCurrentBinaryPackageNames(archive, distroseries)) |
453 | |
454 | # remove the cache entries for binary packages we no longer want |
455 | for cache in cls._find(distroseries, archive): |
456 | @@ -108,8 +112,8 @@ |
457 | cache.destroySelf() |
458 | |
459 | @classmethod |
460 | - def _update(cls, distroseries, binarypackagename, archive, log): |
461 | - """Update the package cache for a given IBinaryPackageName |
462 | + def _update(cls, distroseries, binarypackagenames, archive, log): |
463 | + """Update the package cache for a given set of `IBinaryPackageName`s. |
464 | |
465 | 'log' is required, it should be a logger object able to print |
466 | DEBUG level messages. |
467 | @@ -117,58 +121,69 @@ |
468 | (in full batches of 100 elements) |
469 | """ |
470 | # get the set of published binarypackagereleases |
471 | - details = list(IStore(BinaryPackageRelease).find( |
472 | - (BinaryPackageRelease.summary, BinaryPackageRelease.description, |
473 | + all_details = list(IStore(BinaryPackageRelease).find( |
474 | + (BinaryPackageRelease.binarypackagenameID, |
475 | + BinaryPackageRelease.summary, BinaryPackageRelease.description, |
476 | Max(BinaryPackageRelease.datecreated)), |
477 | BinaryPackageRelease.id == |
478 | BinaryPackagePublishingHistory.binarypackagereleaseID, |
479 | - BinaryPackagePublishingHistory.binarypackagename == |
480 | - binarypackagename, |
481 | - BinaryPackagePublishingHistory.distroarchseriesID == |
482 | - DistroArchSeries.id, |
483 | - DistroArchSeries.distroseries == distroseries, |
484 | + BinaryPackagePublishingHistory.binarypackagenameID.is_in( |
485 | + [bpn.id for bpn in binarypackagenames]), |
486 | + BinaryPackagePublishingHistory.distroarchseriesID.is_in( |
487 | + Select( |
488 | + DistroArchSeries.id, tables=[DistroArchSeries], |
489 | + where=DistroArchSeries.distroseries == distroseries)), |
490 | BinaryPackagePublishingHistory.archive == archive, |
491 | - BinaryPackagePublishingHistory.dateremoved == None |
492 | + BinaryPackagePublishingHistory.status.is_in(( |
493 | + PackagePublishingStatus.PENDING, |
494 | + PackagePublishingStatus.PUBLISHED)) |
495 | ).group_by( |
496 | + BinaryPackageRelease.binarypackagenameID, |
497 | BinaryPackageRelease.summary, |
498 | BinaryPackageRelease.description |
499 | ).order_by( |
500 | + BinaryPackageRelease.binarypackagenameID, |
501 | Desc(Max(BinaryPackageRelease.datecreated)))) |
502 | - |
503 | - if not details: |
504 | + if not all_details: |
505 | log.debug("No binary releases found.") |
506 | return |
507 | |
508 | - # find or create the cache entry |
509 | - cache = cls.selectOne(""" |
510 | - distroseries = %s AND |
511 | - archive = %s AND |
512 | - binarypackagename = %s |
513 | - """ % sqlvalues(distroseries, archive, binarypackagename)) |
514 | - if cache is None: |
515 | - log.debug("Creating new binary cache entry.") |
516 | - cache = cls( |
517 | - archive=archive, |
518 | - distroseries=distroseries, |
519 | - binarypackagename=binarypackagename) |
520 | - |
521 | - # make sure the cached name, summary and description are correct |
522 | - cache.name = binarypackagename.name |
523 | - cache.summary = details[0][0] |
524 | - cache.description = details[0][1] |
525 | - |
526 | - # get the sets of binary package summaries, descriptions. there is |
527 | - # likely only one, but just in case... |
528 | - |
529 | - summaries = set() |
530 | - descriptions = set() |
531 | - for summary, description, datecreated in details: |
532 | - summaries.add(summary) |
533 | - descriptions.add(description) |
534 | - |
535 | - # and update the caches |
536 | - cache.summaries = ' '.join(sorted(summaries)) |
537 | - cache.descriptions = ' '.join(sorted(descriptions)) |
538 | + details_map = defaultdict(list) |
539 | + for (bpn_id, summary, description, datecreated) in all_details: |
540 | + bpn = IStore(BinaryPackageName).get(BinaryPackageName, bpn_id) |
541 | + details_map[bpn].append((summary, description)) |
542 | + |
543 | + all_caches = IStore(cls).find( |
544 | + cls, cls.distroseries == distroseries, cls.archive == archive, |
545 | + cls.binarypackagenameID.is_in( |
546 | + [bpn.id for bpn in binarypackagenames])) |
547 | + cache_map = {cache.binarypackagename: cache for cache in all_caches} |
548 | + |
549 | + for bpn in set(binarypackagenames) - set(cache_map): |
550 | + cache_map[bpn] = cls( |
551 | + archive=archive, distroseries=distroseries, |
552 | + binarypackagename=bpn) |
553 | + |
554 | + for bpn in binarypackagenames: |
555 | + cache = cache_map[bpn] |
556 | + details = details_map[bpn] |
557 | + # make sure the cached name, summary and description are correct |
558 | + cache.name = bpn.name |
559 | + cache.summary = details[0][0] |
560 | + cache.description = details[0][1] |
561 | + |
562 | + # get the sets of binary package summaries, descriptions. there is |
563 | + # likely only one, but just in case... |
564 | + |
565 | + summaries = set() |
566 | + descriptions = set() |
567 | + for summary, description in details: |
568 | + summaries.add(summary) |
569 | + descriptions.add(description) |
570 | + |
571 | + # and update the caches |
572 | + cache.summaries = ' '.join(sorted(summaries)) |
573 | + cache.descriptions = ' '.join(sorted(descriptions)) |
574 | |
575 | @classmethod |
576 | def updateAll(cls, distroseries, archive, log, ztm, commit_chunk=500): |
577 | @@ -190,27 +205,28 @@ |
578 | return |
579 | |
580 | # Get the set of package names to deal with. |
581 | - bpns = IStore(BinaryPackageName).find( |
582 | - BinaryPackageName, |
583 | - DistroArchSeries.distroseries == distroseries, |
584 | - BinaryPackagePublishingHistory.distroarchseriesID == |
585 | - DistroArchSeries.id, |
586 | - BinaryPackagePublishingHistory.archive == archive, |
587 | - BinaryPackagePublishingHistory.binarypackagename == |
588 | - BinaryPackageName.id, |
589 | - BinaryPackagePublishingHistory.dateremoved == None).config( |
590 | - distinct=True).order_by(BinaryPackageName.name) |
591 | + bpns = list(sorted( |
592 | + cls.findCurrentBinaryPackageNames(archive, distroseries), |
593 | + key=attrgetter('name'))) |
594 | |
595 | number_of_updates = 0 |
596 | - chunk_size = 0 |
597 | + chunks = [] |
598 | + chunk = [] |
599 | for bpn in bpns: |
600 | - log.debug("Considering binary '%s'" % bpn.name) |
601 | - cls._update(distroseries, bpn, archive, log) |
602 | - number_of_updates += 1 |
603 | - chunk_size += 1 |
604 | - if chunk_size == commit_chunk: |
605 | - chunk_size = 0 |
606 | - log.debug("Committing") |
607 | - ztm.commit() |
608 | + chunk.append(bpn) |
609 | + if len(chunk) == commit_chunk: |
610 | + chunks.append(chunk) |
611 | + chunk = [] |
612 | + if chunk: |
613 | + chunks.append(chunk) |
614 | + for chunk in chunks: |
615 | + bulk.load(BinaryPackageName, [bpn.id for bpn in chunk]) |
616 | + log.debug( |
617 | + "Considering binaries %s", |
618 | + ', '.join([bpn.name for bpn in chunk])) |
619 | + cls._update(distroseries, chunk, archive, log) |
620 | + number_of_updates += len(chunk) |
621 | + log.debug("Committing") |
622 | + ztm.commit() |
623 | |
624 | return number_of_updates |
625 | |
626 | === modified file 'lib/lp/soyuz/tests/test_distroseriesbinarypackage.py' |
627 | --- lib/lp/soyuz/tests/test_distroseriesbinarypackage.py 2012-01-20 15:42:44 +0000 |
628 | +++ lib/lp/soyuz/tests/test_distroseriesbinarypackage.py 2016-05-18 23:30:28 +0000 |
629 | @@ -1,4 +1,4 @@ |
630 | -# Copyright 2010 Canonical Ltd. This software is licensed under the |
631 | +# Copyright 2010-2016 Canonical Ltd. This software is licensed under the |
632 | # GNU Affero General Public License version 3 (see the file LICENSE). |
633 | |
634 | """Tests for `DistroSeriesBinaryPackage`.""" |
635 | @@ -61,12 +61,12 @@ |
636 | logger = BufferLogger() |
637 | with dbuser(config.statistician.dbuser): |
638 | DistroSeriesPackageCache._update( |
639 | - self.distroseries, self.binary_package_name, distro_archive_1, |
640 | - logger) |
641 | + self.distroseries, [self.binary_package_name], |
642 | + distro_archive_1, logger) |
643 | |
644 | DistroSeriesPackageCache._update( |
645 | - self.distroseries, self.binary_package_name, distro_archive_2, |
646 | - logger) |
647 | + self.distroseries, [self.binary_package_name], |
648 | + distro_archive_2, logger) |
649 | |
650 | self.failUnlessEqual( |
651 | 'Foo is the best', self.distroseries_binary_package.summary) |