Merge lp:~jtv/launchpad/bug-458049 into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~jtv/launchpad/bug-458049
Merge into: lp:launchpad
Diff against target: 210 lines
4 files modified
lib/lp/translations/interfaces/potemplate.py (+7/-3)
lib/lp/translations/model/potemplate.py (+7/-4)
lib/lp/translations/scripts/message_sharing_migration.py (+2/-2)
lib/lp/translations/scripts/tests/test_message_sharing_migration.py (+86/-2)
To merge this branch: bzr merge lp:~jtv/launchpad/bug-458049
Reviewer Review Type Date Requested Status
Paul Hummer (community) Approve
Review via email: mp+13779@code.launchpad.net

Commit message

Make sure that message-sharing migration does not pull in msgids from the database.

To post a comment you must log in.
Revision history for this message
Jeroen T. Vermeulen (jtv) wrote :

= Bug 458049 =

Memory usage is the bane of the message-sharing migration script. In a previous fix I stopped it from retrieving POMsgID objects, since only their object ids were needed for comparison. But as it turns out, the msgids were still being loaded from the database because of an implicit prejoin in POTemplate.getPOTMsgSets.

So I'm making that prejoin optional. It's still the default behaviour, but the script disables it.

This may conceivably make the counting of potmsgsets in a template slightly faster as well, as it has no need for the additional left joins. The database backend may know that too, but why put it to the trouble?

A new test tries to ensure that migration really does not suck any POMsgID objects into memory:
{{{
./bin/test -vv -t SharingMigrationPerformance
}}}

For Q/A, try running this script:

  https://pastebin.canonical.com/23602/

*If* this change really helps, that test *may* not bring the staging server to its knees by consuming over 2.5 GB of address space.

Jeroen

Revision history for this message
Paul Hummer (rockstar) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/translations/interfaces/potemplate.py'
--- lib/lp/translations/interfaces/potemplate.py 2009-09-15 15:30:59 +0000
+++ lib/lp/translations/interfaces/potemplate.py 2009-10-22 11:30:28 +0000
@@ -358,11 +358,15 @@
358 The sequence number must be > 0.358 The sequence number must be > 0.
359 """359 """
360360
361 def getPOTMsgSets(current=True):361 def getPOTMsgSets(current=True, prefetch=True):
362 """Return an iterator over `IPOTMsgSet` objects in this template.362 """Return an iterator over `IPOTMsgSet` objects in this template.
363363
364 The 'current' argument is used to select only current POTMsgSets or364 :param current: Whether to limit the search to current
365 all of them.365 POTMsgSets.
366 :param prefetch: Whether to prefetch the `POMsgID`s attached to
367 the POTMsgSets. This is for optimization only.
368 :return: All current POTMsgSets for the template if `current` is
369 True, or all POTMsgSets for the template otherwise.
366 """370 """
367371
368 def getPOTMsgSetsCount(current=True):372 def getPOTMsgSetsCount(current=True):
369373
=== modified file 'lib/lp/translations/model/potemplate.py'
--- lib/lp/translations/model/potemplate.py 2009-10-10 13:43:31 +0000
+++ lib/lp/translations/model/potemplate.py 2009-10-22 11:30:28 +0000
@@ -405,7 +405,7 @@
405 return POTMsgSet.selectOne(' AND '.join(clauses),405 return POTMsgSet.selectOne(' AND '.join(clauses),
406 clauseTables=['TranslationTemplateItem'])406 clauseTables=['TranslationTemplateItem'])
407407
408 def getPOTMsgSets(self, current=True):408 def getPOTMsgSets(self, current=True, prefetch=True):
409 """See `IPOTemplate`."""409 """See `IPOTemplate`."""
410 clauses = self._getPOTMsgSetSelectionClauses()410 clauses = self._getPOTMsgSetSelectionClauses()
411411
@@ -416,11 +416,14 @@
416 query = POTMsgSet.select(" AND ".join(clauses),416 query = POTMsgSet.select(" AND ".join(clauses),
417 clauseTables=['TranslationTemplateItem'],417 clauseTables=['TranslationTemplateItem'],
418 orderBy=['TranslationTemplateItem.sequence'])418 orderBy=['TranslationTemplateItem.sequence'])
419 return query.prejoin(['msgid_singular', 'msgid_plural'])419 if prefetch:
420 query = query.prejoin(['msgid_singular', 'msgid_plural'])
421
422 return query
420423
421 def getPOTMsgSetsCount(self, current=True):424 def getPOTMsgSetsCount(self, current=True):
422 """See `IPOTemplate`."""425 """See `IPOTemplate`."""
423 results = self.getPOTMsgSets(current)426 results = self.getPOTMsgSets(current, prefetch=False)
424 return results.count()427 return results.count()
425428
426 def getPOTMsgSetByID(self, id):429 def getPOTMsgSetByID(self, id):
@@ -644,7 +647,7 @@
644647
645 def expireAllMessages(self):648 def expireAllMessages(self):
646 """See `IPOTemplate`."""649 """See `IPOTemplate`."""
647 for potmsgset in self.getPOTMsgSets():650 for potmsgset in self.getPOTMsgSets(prefetch=False):
648 potmsgset.setSequence(self, 0)651 potmsgset.setSequence(self, 0)
649652
650 def _lookupLanguage(self, language_code):653 def _lookupLanguage(self, language_code):
651654
=== modified file 'lib/lp/translations/scripts/message_sharing_migration.py'
--- lib/lp/translations/scripts/message_sharing_migration.py 2009-10-19 14:09:52 +0000
+++ lib/lp/translations/scripts/message_sharing_migration.py 2009-10-22 11:30:28 +0000
@@ -291,7 +291,7 @@
291291
292 for template in potemplates:292 for template in potemplates:
293 order_check.check(template)293 order_check.check(template)
294 for potmsgset in template.getPOTMsgSets(False):294 for potmsgset in template.getPOTMsgSets(False, prefetch=False):
295 key = get_potmsgset_key(potmsgset)295 key = get_potmsgset_key(potmsgset)
296 if key not in representatives:296 if key not in representatives:
297 representatives[key] = potmsgset297 representatives[key] = potmsgset
@@ -362,7 +362,7 @@
362 order_check = OrderingCheck(cmp=self.compare_template_precedence)362 order_check = OrderingCheck(cmp=self.compare_template_precedence)
363 for template in potemplates:363 for template in potemplates:
364 order_check.check(template)364 order_check.check(template)
365 for potmsgset in template.getPOTMsgSets(False):365 for potmsgset in template.getPOTMsgSets(False, prefetch=False):
366 for message in potmsgset.getAllTranslationMessages():366 for message in potmsgset.getAllTranslationMessages():
367 removeSecurityProxy(message).shareIfPossible()367 removeSecurityProxy(message).shareIfPossible()
368368
369369
=== modified file 'lib/lp/translations/scripts/tests/test_message_sharing_migration.py'
--- lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-08-13 19:03:36 +0000
+++ lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-10-22 11:30:28 +0000
@@ -4,7 +4,9 @@
4__metaclass__ = type4__metaclass__ = type
55
6from datetime import timedelta6from datetime import timedelta
7import gc
7from logging import ERROR8from logging import ERROR
9import transaction
8from unittest import TestLoader10from unittest import TestLoader
911
10from zope.component import getUtility12from zope.component import getUtility
@@ -14,6 +16,8 @@
14from lp.testing import TestCaseWithFactory16from lp.testing import TestCaseWithFactory
15from lp.translations.interfaces.pofiletranslator import (17from lp.translations.interfaces.pofiletranslator import (
16 IPOFileTranslatorSet)18 IPOFileTranslatorSet)
19from lp.translations.model.pomsgid import POMsgID
20from lp.translations.model.potemplate import POTemplate
17from lp.translations.scripts.message_sharing_migration import (21from lp.translations.scripts.message_sharing_migration import (
18 MessageSharingMerge)22 MessageSharingMerge)
19from canonical.testing import LaunchpadZopelessLayer23from canonical.testing import LaunchpadZopelessLayer
@@ -233,7 +237,7 @@
233 layer = LaunchpadZopelessLayer237 layer = LaunchpadZopelessLayer
234238
235 def setUp(self):239 def setUp(self):
236 """Set up test environment with.240 """Set up test environment.
237241
238 The test setup includes:242 The test setup includes:
239 * Two templates for the "trunk" and "stable" release series.243 * Two templates for the "trunk" and "stable" release series.
@@ -427,7 +431,8 @@
427431
428 def setUp(self):432 def setUp(self):
429 self.layer.switchDbUser('postgres')433 self.layer.switchDbUser('postgres')
430 super(TestTranslationMessageMerging, self).setUp(user='mark@example.com')434 super(TestTranslationMessageMerging, self).setUp(
435 user='mark@example.com')
431 super(TestTranslationMessageMerging, self).setUpProduct()436 super(TestTranslationMessageMerging, self).setUpProduct()
432437
433 def test_messagesCanStayDiverged(self):438 def test_messagesCanStayDiverged(self):
@@ -736,5 +741,84 @@
736 self.assertEqual(twin, trunk_message)741 self.assertEqual(twin, trunk_message)
737742
738743
744class TestSharingMigrationPerformance(TestCaseWithFactory,
745 TranslatedProductMixin):
746 """Test performance-related aspects of migration.
747
748 Memory usage is a particular problem for this script, so this class
749 particularly looks for regressions in that area.
750 """
751 layer = LaunchpadZopelessLayer
752
753 def setUp(self):
754 self.layer.switchDbUser('postgres')
755 super(TestSharingMigrationPerformance, self).setUp()
756 super(TestSharingMigrationPerformance, self).setUpProduct()
757
758 def _flushDbObjects(self):
759 """Flush ORM-backed objects from memory as much as possible.
760
761 This involves a commit.
762 """
763 transaction.commit()
764 gc.collect()
765
766 def _listLoadedPOMsgIDs(self, ignore_list=None):
767 """Return the set of all `POMsgID` objects that are in memory.
768
769 :param ignore_list: A previous return value. Any POMsgIDs that
770 were already in that list are ignored here.
771 """
772 pomsgids = set([
773 whatever
774 for whatever in gc.get_objects()
775 if isinstance(whatever, POMsgID)
776 ])
777 if ignore_list is not None:
778 pomsgids -= ignore_list
779 return pomsgids
780
781 def _resetReferences(self):
782 """Reset translation-related references in the test object.
783
784 This stops the test itself from pinning things in memory as
785 caches are cleared.
786
787 Transactions are committed and the templates list is discarded
788 and rebuilt to get rid of pinned objects.
789 """
790 if self.templates is None:
791 template_ids = []
792 else:
793 template_ids = [template.id for template in self.templates]
794
795 self.templates = None
796 self.trunk_potmsgset = None
797 self.stable_potmsgset = None
798 self.msgid = None
799 self.trunk_pofile = None
800 self.stable_pofile = None
801 self._flushDbObjects()
802
803 self.templates = [POTemplate.get(id) for id in template_ids]
804
805 def test_merging_loads_no_msgids(self):
806 # Migration does not load actual msgids into memory.
807 self._flushDbObjects()
808 msgids_before = self._listLoadedPOMsgIDs()
809
810 self._makeTranslationMessages('x', 'y', trunk_diverged=True)
811 self._makeTranslationMessages('1', '2', stable_diverged=True)
812
813 self._resetReferences()
814 self.assertNotEqual([], self.templates)
815 self.assertEqual(set(), self._listLoadedPOMsgIDs(msgids_before))
816
817 self.script._mergePOTMsgSets(self.templates)
818 self.script._mergeTranslationMessages(self.templates)
819
820 self.assertEqual(set(), self._listLoadedPOMsgIDs(msgids_before))
821
822
739def test_suite():823def test_suite():
740 return TestLoader().loadTestsFromName(__name__)824 return TestLoader().loadTestsFromName(__name__)