Merge lp:~jtv/launchpad/bug-458049 into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~jtv/launchpad/bug-458049
Merge into: lp:launchpad
Diff against target: 210 lines
4 files modified
lib/lp/translations/interfaces/potemplate.py (+7/-3)
lib/lp/translations/model/potemplate.py (+7/-4)
lib/lp/translations/scripts/message_sharing_migration.py (+2/-2)
lib/lp/translations/scripts/tests/test_message_sharing_migration.py (+86/-2)
To merge this branch: bzr merge lp:~jtv/launchpad/bug-458049
Reviewer Review Type Date Requested Status
Paul Hummer (community) Approve
Review via email: mp+13779@code.launchpad.net

Commit message

Make sure that message-sharing migration does not pull in msgids from the database.

To post a comment you must log in.
Revision history for this message
Jeroen T. Vermeulen (jtv) wrote :

= Bug 458049 =

Memory usage is the bane of the message-sharing migration script. In a previous fix I stopped it from retrieving POMsgID objects, since only their object ids were needed for comparison. But as it turns out, the msgids were still being loaded from the database because of an implicit prejoin in POTemplate.getPOTMsgSets.

So I'm making that prejoin optional. It's still the default behaviour, but the script disables it.

This may conceivably make the counting of potmsgsets in a template slightly faster as well, as it has no need for the additional left joins. The database backend may know that too, but why put it to the trouble?

A new test tries to ensure that migration really does not suck any POMsgID objects into memory:
{{{
./bin/test -vv -t SharingMigrationPerformance
}}}

For Q/A, try running this script:

  https://pastebin.canonical.com/23602/

*If* this change really helps, that test *may* not bring the staging server to its knees by consuming over 2.5 GB of address space.

Jeroen

Revision history for this message
Paul Hummer (rockstar) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/interfaces/potemplate.py'
2--- lib/lp/translations/interfaces/potemplate.py 2009-09-15 15:30:59 +0000
3+++ lib/lp/translations/interfaces/potemplate.py 2009-10-22 11:30:28 +0000
4@@ -358,11 +358,15 @@
5 The sequence number must be > 0.
6 """
7
8- def getPOTMsgSets(current=True):
9+ def getPOTMsgSets(current=True, prefetch=True):
10 """Return an iterator over `IPOTMsgSet` objects in this template.
11
12- The 'current' argument is used to select only current POTMsgSets or
13- all of them.
14+ :param current: Whether to limit the search to current
15+ POTMsgSets.
16+ :param prefetch: Whether to prefetch the `POMsgID`s attached to
17+ the POTMsgSets. This is for optimization only.
18+ :return: All current POTMsgSets for the template if `current` is
19+ True, or all POTMsgSets for the template otherwise.
20 """
21
22 def getPOTMsgSetsCount(current=True):
23
24=== modified file 'lib/lp/translations/model/potemplate.py'
25--- lib/lp/translations/model/potemplate.py 2009-10-10 13:43:31 +0000
26+++ lib/lp/translations/model/potemplate.py 2009-10-22 11:30:28 +0000
27@@ -405,7 +405,7 @@
28 return POTMsgSet.selectOne(' AND '.join(clauses),
29 clauseTables=['TranslationTemplateItem'])
30
31- def getPOTMsgSets(self, current=True):
32+ def getPOTMsgSets(self, current=True, prefetch=True):
33 """See `IPOTemplate`."""
34 clauses = self._getPOTMsgSetSelectionClauses()
35
36@@ -416,11 +416,14 @@
37 query = POTMsgSet.select(" AND ".join(clauses),
38 clauseTables=['TranslationTemplateItem'],
39 orderBy=['TranslationTemplateItem.sequence'])
40- return query.prejoin(['msgid_singular', 'msgid_plural'])
41+ if prefetch:
42+ query = query.prejoin(['msgid_singular', 'msgid_plural'])
43+
44+ return query
45
46 def getPOTMsgSetsCount(self, current=True):
47 """See `IPOTemplate`."""
48- results = self.getPOTMsgSets(current)
49+ results = self.getPOTMsgSets(current, prefetch=False)
50 return results.count()
51
52 def getPOTMsgSetByID(self, id):
53@@ -644,7 +647,7 @@
54
55 def expireAllMessages(self):
56 """See `IPOTemplate`."""
57- for potmsgset in self.getPOTMsgSets():
58+ for potmsgset in self.getPOTMsgSets(prefetch=False):
59 potmsgset.setSequence(self, 0)
60
61 def _lookupLanguage(self, language_code):
62
63=== modified file 'lib/lp/translations/scripts/message_sharing_migration.py'
64--- lib/lp/translations/scripts/message_sharing_migration.py 2009-10-19 14:09:52 +0000
65+++ lib/lp/translations/scripts/message_sharing_migration.py 2009-10-22 11:30:28 +0000
66@@ -291,7 +291,7 @@
67
68 for template in potemplates:
69 order_check.check(template)
70- for potmsgset in template.getPOTMsgSets(False):
71+ for potmsgset in template.getPOTMsgSets(False, prefetch=False):
72 key = get_potmsgset_key(potmsgset)
73 if key not in representatives:
74 representatives[key] = potmsgset
75@@ -362,7 +362,7 @@
76 order_check = OrderingCheck(cmp=self.compare_template_precedence)
77 for template in potemplates:
78 order_check.check(template)
79- for potmsgset in template.getPOTMsgSets(False):
80+ for potmsgset in template.getPOTMsgSets(False, prefetch=False):
81 for message in potmsgset.getAllTranslationMessages():
82 removeSecurityProxy(message).shareIfPossible()
83
84
85=== modified file 'lib/lp/translations/scripts/tests/test_message_sharing_migration.py'
86--- lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-08-13 19:03:36 +0000
87+++ lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-10-22 11:30:28 +0000
88@@ -4,7 +4,9 @@
89 __metaclass__ = type
90
91 from datetime import timedelta
92+import gc
93 from logging import ERROR
94+import transaction
95 from unittest import TestLoader
96
97 from zope.component import getUtility
98@@ -14,6 +16,8 @@
99 from lp.testing import TestCaseWithFactory
100 from lp.translations.interfaces.pofiletranslator import (
101 IPOFileTranslatorSet)
102+from lp.translations.model.pomsgid import POMsgID
103+from lp.translations.model.potemplate import POTemplate
104 from lp.translations.scripts.message_sharing_migration import (
105 MessageSharingMerge)
106 from canonical.testing import LaunchpadZopelessLayer
107@@ -233,7 +237,7 @@
108 layer = LaunchpadZopelessLayer
109
110 def setUp(self):
111- """Set up test environment with.
112+ """Set up test environment.
113
114 The test setup includes:
115 * Two templates for the "trunk" and "stable" release series.
116@@ -427,7 +431,8 @@
117
118 def setUp(self):
119 self.layer.switchDbUser('postgres')
120- super(TestTranslationMessageMerging, self).setUp(user='mark@example.com')
121+ super(TestTranslationMessageMerging, self).setUp(
122+ user='mark@example.com')
123 super(TestTranslationMessageMerging, self).setUpProduct()
124
125 def test_messagesCanStayDiverged(self):
126@@ -736,5 +741,84 @@
127 self.assertEqual(twin, trunk_message)
128
129
130+class TestSharingMigrationPerformance(TestCaseWithFactory,
131+ TranslatedProductMixin):
132+ """Test performance-related aspects of migration.
133+
134+ Memory usage is a particular problem for this script, so this class
135+ particularly looks for regressions in that area.
136+ """
137+ layer = LaunchpadZopelessLayer
138+
139+ def setUp(self):
140+ self.layer.switchDbUser('postgres')
141+ super(TestSharingMigrationPerformance, self).setUp()
142+ super(TestSharingMigrationPerformance, self).setUpProduct()
143+
144+ def _flushDbObjects(self):
145+ """Flush ORM-backed objects from memory as much as possible.
146+
147+ This involves a commit.
148+ """
149+ transaction.commit()
150+ gc.collect()
151+
152+ def _listLoadedPOMsgIDs(self, ignore_list=None):
153+ """Return the set of all `POMsgID` objects that are in memory.
154+
155+ :param ignore_list: A previous return value. Any POMsgIDs that
156+ were already in that list are ignored here.
157+ """
158+ pomsgids = set([
159+ whatever
160+ for whatever in gc.get_objects()
161+ if isinstance(whatever, POMsgID)
162+ ])
163+ if ignore_list is not None:
164+ pomsgids -= ignore_list
165+ return pomsgids
166+
167+ def _resetReferences(self):
168+ """Reset translation-related references in the test object.
169+
170+ This stops the test itself from pinning things in memory as
171+ caches are cleared.
172+
173+ Transactions are committed and the templates list is discarded
174+ and rebuilt to get rid of pinned objects.
175+ """
176+ if self.templates is None:
177+ template_ids = []
178+ else:
179+ template_ids = [template.id for template in self.templates]
180+
181+ self.templates = None
182+ self.trunk_potmsgset = None
183+ self.stable_potmsgset = None
184+ self.msgid = None
185+ self.trunk_pofile = None
186+ self.stable_pofile = None
187+ self._flushDbObjects()
188+
189+ self.templates = [POTemplate.get(id) for id in template_ids]
190+
191+ def test_merging_loads_no_msgids(self):
192+ # Migration does not load actual msgids into memory.
193+ self._flushDbObjects()
194+ msgids_before = self._listLoadedPOMsgIDs()
195+
196+ self._makeTranslationMessages('x', 'y', trunk_diverged=True)
197+ self._makeTranslationMessages('1', '2', stable_diverged=True)
198+
199+ self._resetReferences()
200+ self.assertNotEqual([], self.templates)
201+ self.assertEqual(set(), self._listLoadedPOMsgIDs(msgids_before))
202+
203+ self.script._mergePOTMsgSets(self.templates)
204+ self.script._mergeTranslationMessages(self.templates)
205+
206+ self.assertEqual(set(), self._listLoadedPOMsgIDs(msgids_before))
207+
208+
209 def test_suite():
210 return TestLoader().loadTestsFromName(__name__)