Code review comment for lp:~mwhudson/launchpad/much-faster-rewrite-map

Revision history for this message
Tim Penhey (thumper) wrote :

 review needs-info

> === modified file 'lib/canonical/launchpad/scripts/logger.py'
> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
> @@ -113,8 +113,8 @@
> def __init__(self):
> self.buffer = StringIO()
>
> - def message(self, prefix, *stuff, **kw):
> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
> + def message(self, prefix, msg, *stuff, **kw):
> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))

What is the impact of other code using message?

This to me looks like it'll brake things.

> === modified file 'lib/lp/codehosting/rewrite.py'
> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
> @@ -5,38 +5,71 @@
> """
>
> import time
> -import xmlrpclib
>
> from bzrlib import urlutils
>
> -from lp.codehosting.vfs import (
> - branch_id_to_path, BranchFileSystemClient)
> +from canonical.launchpad.webapp.interfaces import (
> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
> +from zope.component import getUtility
> +from lp.code.model.branch import Branch
> +from lp.codehosting.vfs import branch_id_to_path
> +
> from canonical.config import config
> -from lp.code.interfaces.codehosting import (
> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
> -from canonical.launchpad.xmlrpc import faults
> -from canonical.twistedsupport import extract_result
>
> __all__ = ['BranchRewriter']
>
>
> class BranchRewriter:
>
> - def __init__(self, logger, proxy):
> + def __init__(self, logger, _now=None):
> """
>
> :param logger: Logger than messages about what the rewriter is
doing
> will be sent to.
> :param proxy: A blocking proxy for a branchfilesystem endpoint.
> """
> + if _now is None:
> + self._now = time.time
> + else:
> + self._now = _now
> self.logger = logger
> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
10.0)
> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
SLAVE_FLAVOR)
> + self._cache = {}
>
> def _codebrowse_url(self, path):
> return urlutils.join(
> config.codehosting.internal_codebrowse_root,
> path)
>
> + def _getBranchIdAndTrailingPath(self, location):
> + """Return the branch id and trailing path for 'location'.
> +
> + In addition this method returns whether the answer can from the
cache
> + or from the database.
> + """
> + parts = location[1:].split('/')
> + prefixes = []
> + for i in range(1, len(parts) + 1):
> + prefix = '/'.join(parts[:i])
> + if prefix in self._cache:
> + branch_id, inserted_time = self._cache[prefix]
> + if (self._now() < inserted_time +
> + config.codehosting.branch_rewrite_cache_lifetime):
> + trailing = location[len(prefix) + 1:]
> + return branch_id, trailing, "HIT"
> + prefixes.append(prefix)
> + result = self.store.find(
> + Branch,
> + Branch.unique_name.is_in(prefixes), Branch.private == False)
> + try:
> + branch_id, unique_name = result.values(
> + Branch.id, Branch.unique_name).next()
> + except StopIteration:
> + return None, None, "MISS"

How about:

       result = self.store.find(
           (Branch.id, Branch.unique_name),
           Branch.unique_name.is_in(prefixes), Branch.private == False).one()

That way result should be None if not found, and a tuple of id and unique_name
if it was found.

That'll make this code a little cleaner.

> === modified file 'lib/lp/codehosting/tests/test_rewrite.py'
> --- lib/lp/codehosting/tests/test_rewrite.py 2009-07-17 00:26:05 +0000
> +++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
> class TestBranchRewriterScript(TestCaseWithFactory):
>
> - layer = ZopelessAppServerLayer
> + layer = DatabaseFunctionalLayer
>
> def test_script(self):

I think a comment here explaining why you have '\n'.join(expected) + '\n'
would be *really* helpful.

I think it has todo with how the rewriting works through apache, but a
comment would help.

> - branch = self.factory.makeAnyBranch()
> - input = "/%s/.bzr/README\n" % branch.unique_name
> - expected = (
> - "file:///var/tmp/bzrsync/%s/.bzr/README\n"
> - % branch_id_to_path(branch.id))
> - self.layer.txn.commit()
> + branches = [
> + self.factory.makeProductBranch(),
> + self.factory.makePersonalBranch(),
> + self.factory.makePackageBranch()]
> + input = [
> + "/%s/.bzr/README" % branch.unique_name
> + for branch in branches] + [
> + "/%s/changes" % branch.unique_name
> + for branch in branches]
> + expected = [
> + 'file:///var/tmp/bzrsync/%s/.bzr/README'
> + % branch_id_to_path(branch.id)
> + for branch in branches] + [
> + 'http://localhost:8080/%s/changes' % branch.unique_name
> + for branch in branches]
> + transaction.commit()
> script_file = os.path.join(
> config.root, 'scripts', 'branch-rewrite.py')
> proc = subprocess.Popen(
> [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
> stderr=subprocess.PIPE, bufsize=0)
> - proc.stdin.write(input)
> - output = proc.stdout.readline()
> + proc.stdin.write('\n'.join(input) + '\n')

A comment here too about why the extra carriage return.

> + output = []
> + for i in range(len(input)):
> + output.append(proc.stdout.readline())
> os.kill(proc.pid, signal.SIGINT)
> err = proc.stderr.read()
> # The script produces logging output, but not to stderr.
> self.assertEqual('', err)
> - self.assertEqual(expected, output)
> + self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
>
>
> def test_suite():
>

I look forward to seeing this tested live :)

Thanks
Tim

review: Needs Information

« Back to merge proposal