Merge lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18059
Proposed branch: lp:~wgrant/launchpad/scannery-stabbery
Merge into: lp:launchpad
Diff against target: 119 lines (+19/-24)
2 files modified
lib/lp/code/model/branch.py (+13/-24)
lib/lp/codehosting/scanner/bzrsync.py (+6/-0)
To merge this branch: bzr merge lp:~wgrant/launchpad/scannery-stabbery
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+295542@code.launchpad.net

Commit message

Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create.

Description of the change

Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create. It previously used a temp table and potentially overcomplicated queries in ways that caused postgres to choose a bad plan.

Also log a bit more verbosely in bzrsync, so we can see which bits remain slow.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/code/model/branch.py'
2--- lib/lp/code/model/branch.py 2016-04-06 10:59:15 +0000
3+++ lib/lp/code/model/branch.py 2016-05-24 05:37:38 +0000
4@@ -27,7 +27,6 @@
5 Coalesce,
6 Count,
7 Desc,
8- Insert,
9 Join,
10 NamedFunc,
11 Not,
12@@ -154,7 +153,7 @@
13 )
14 from lp.registry.model.teammembership import TeamParticipation
15 from lp.services.config import config
16-from lp.services.database.bulk import load_related
17+from lp.services.database import bulk
18 from lp.services.database.constants import (
19 DEFAULT,
20 UTC_NOW,
21@@ -499,7 +498,7 @@
22 from lp.code.model.branchcollection import GenericBranchCollection
23
24 def eager_load(rows):
25- branches = load_related(
26+ branches = bulk.load_related(
27 Branch, rows, ['source_branchID', 'prerequisite_branchID'])
28 GenericBranchCollection.preloadVisibleStackedOnBranches(
29 branches, user)
30@@ -781,7 +780,8 @@
31
32 def eager_load(rows):
33 revisions = map(operator.itemgetter(1), rows)
34- load_related(RevisionAuthor, revisions, ['revision_author_id'])
35+ bulk.load_related(
36+ RevisionAuthor, revisions, ['revision_author_id'])
37 return DecoratedResultSet(result, pre_iter_hook=eager_load)
38
39 def getRevisionsSince(self, timestamp):
40@@ -1094,26 +1094,15 @@
41 if not revision_id_sequence_pairs:
42 return
43 store = Store.of(self)
44- store.execute(
45- """
46- CREATE TEMPORARY TABLE RevidSequence
47- (revision_id text, sequence integer)
48- """)
49- # Force to Unicode or we will end up with bad quoting under
50- # PostgreSQL 9.1.
51- unicode_revid_sequence_pairs = [
52- (a and unicode(a) or None, b and unicode(b) or None)
53- for a, b in revision_id_sequence_pairs]
54- store.execute(Insert(('revision_id', 'sequence'),
55- table=['RevidSequence'], values=unicode_revid_sequence_pairs))
56- store.execute(
57- """
58- INSERT INTO BranchRevision (branch, revision, sequence)
59- SELECT %s, Revision.id, RevidSequence.sequence
60- FROM RevidSequence, Revision
61- WHERE Revision.revision_id = RevidSequence.revision_id
62- """ % sqlvalues(self))
63- store.execute("DROP TABLE RevidSequence")
64+ rev_db_ids = dict(store.find(
65+ (Revision.revision_id, Revision.id),
66+ Revision.revision_id.is_in(
67+ (revid for revid, _ in revision_id_sequence_pairs))))
68+ bulk.create(
69+ (BranchRevision.branch, BranchRevision.revision_id,
70+ BranchRevision.sequence),
71+ [(self, rev_db_ids[revid], seq)
72+ for revid, seq in revision_id_sequence_pairs])
73
74 def getTipRevision(self):
75 """See `IBranch`."""
76
77=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
78--- lib/lp/codehosting/scanner/bzrsync.py 2015-09-24 13:44:02 +0000
79+++ lib/lp/codehosting/scanner/bzrsync.py 2016-05-24 05:37:38 +0000
80@@ -111,11 +111,13 @@
81 self.insertBranchRevisions(bzr_branch, revids_to_insert)
82 transaction.commit()
83 # Synchronize the RevisionCache for this branch.
84+ self.logger.info("Updating revision cache.")
85 getUtility(IRevisionSet).updateRevisionCacheForBranch(self.db_branch)
86 transaction.commit()
87
88 # Notify any listeners that the tip of the branch has changed, but
89 # before we've actually updated the database branch.
90+ self.logger.info("Firing tip change event.")
91 initial_scan = (len(db_history) == 0)
92 notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan))
93
94@@ -126,7 +128,9 @@
95 # not been updated. Since this has no ill-effect, and can only err on
96 # the pessimistic side (tell the user the data has not yet been
97 # updated although it has), the race is acceptable.
98+ self.logger.info("Updating branch status.")
99 self.updateBranchStatus(bzr_history)
100+ self.logger.info("Firing scan completion event.")
101 notify(
102 events.ScanCompleted(
103 self.db_branch, bzr_branch, self.logger, new_ancestry))
104@@ -157,6 +161,7 @@
105 return bzr_branch.repository.get_graph(PPSource)
106
107 def getAncestryDelta(self, bzr_branch):
108+ self.logger.info("Calculating ancestry delta.")
109 bzr_last = bzr_branch.last_revision()
110 db_last = self.db_branch.last_scanned_id
111 if db_last is None:
112@@ -221,6 +226,7 @@
113 added_history, last_revno, added_ancestry))
114 # We must remove any stray BranchRevisions that happen to already be
115 # present.
116+ self.logger.info("Finding stray BranchRevisions.")
117 existing_branchrevisions = Store.of(self.db_branch).find(
118 Revision.revision_id, BranchRevision.branch == self.db_branch,
119 BranchRevision.revision_id == Revision.id,