Code review comment for lp:~mwhudson/launchpad/much-faster-rewrite-map

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Tim Penhey wrote:
> Review: Needs Information
> review needs-info
>
>> === modified file 'lib/canonical/launchpad/scripts/logger.py'
>> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
>> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
>> @@ -113,8 +113,8 @@
>> def __init__(self):
>> self.buffer = StringIO()
>>
>> - def message(self, prefix, *stuff, **kw):
>> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
>> + def message(self, prefix, msg, *stuff, **kw):
>> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))
>
> What is the impact of other code using message?

It means that logger.info('%f', 0.1) doesn't break.

> This to me looks like it'll brake things.

I don't think it will. ec2test will confirm :)

>> === modified file 'lib/lp/codehosting/rewrite.py'
>> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
>> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
>> @@ -5,38 +5,71 @@
>> """
>>
>> import time
>> -import xmlrpclib
>>
>> from bzrlib import urlutils
>>
>> -from lp.codehosting.vfs import (
>> - branch_id_to_path, BranchFileSystemClient)
>> +from canonical.launchpad.webapp.interfaces import (
>> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
>> +from zope.component import getUtility
>> +from lp.code.model.branch import Branch
>> +from lp.codehosting.vfs import branch_id_to_path
>> +
>> from canonical.config import config
>> -from lp.code.interfaces.codehosting import (
>> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
>> -from canonical.launchpad.xmlrpc import faults
>> -from canonical.twistedsupport import extract_result
>>
>> __all__ = ['BranchRewriter']
>>
>>
>> class BranchRewriter:
>>
>> - def __init__(self, logger, proxy):
>> + def __init__(self, logger, _now=None):
>> """
>>
>> :param logger: Logger than messages about what the rewriter is
> doing
>> will be sent to.
>> :param proxy: A blocking proxy for a branchfilesystem endpoint.
>> """
>> + if _now is None:
>> + self._now = time.time
>> + else:
>> + self._now = _now
>> self.logger = logger
>> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
> 10.0)
>> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
> SLAVE_FLAVOR)
>> + self._cache = {}
>>
>> def _codebrowse_url(self, path):
>> return urlutils.join(
>> config.codehosting.internal_codebrowse_root,
>> path)
>>
>> + def _getBranchIdAndTrailingPath(self, location):
>> + """Return the branch id and trailing path for 'location'.
>> +
>> + In addition this method returns whether the answer can from the
> cache
>> + or from the database.
>> + """
>> + parts = location[1:].split('/')
>> + prefixes = []
>> + for i in range(1, len(parts) + 1):
>> + prefix = '/'.join(parts[:i])
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_time +
>> + config.codehosting.branch_rewrite_cache_lifetime):
>> + trailing = location[len(prefix) + 1:]
>> + return branch_id, trailing, "HIT"
>> + prefixes.append(prefix)
>> + result = self.store.find(
>> + Branch,
>> + Branch.unique_name.is_in(prefixes), Branch.private == False)
>> + try:
>> + branch_id, unique_name = result.values(
>> + Branch.id, Branch.unique_name).next()
>> + except StopIteration:
>> + return None, None, "MISS"
>
> How about:
>
> result = self.store.find(
> (Branch.id, Branch.unique_name),
> Branch.unique_name.is_in(prefixes), Branch.private == False).one()
>
> That way result should be None if not found, and a tuple of id and unique_name
> if it was found.
>
> That'll make this code a little cleaner.

Ah, yes, thanks for the tip.

>> === modified file 'lib/lp/codehosting/tests/test_rewrite.py'
>> --- lib/lp/codehosting/tests/test_rewrite.py 2009-07-17 00:26:05 +0000
>> +++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
>> class TestBranchRewriterScript(TestCaseWithFactory):
>>
>> - layer = ZopelessAppServerLayer
>> + layer = DatabaseFunctionalLayer
>>
>> def test_script(self):
>
> I think a comment here explaining why you have '\n'.join(expected) + '\n'
> would be *really* helpful.
>
> I think it has todo with how the rewriting works through apache, but a
> comment would help.

I've tried to make this make more sense -- have a look at the attached diff.

>> - branch = self.factory.makeAnyBranch()
>> - input = "/%s/.bzr/README\n" % branch.unique_name
>> - expected = (
>> - "file:///var/tmp/bzrsync/%s/.bzr/README\n"
>> - % branch_id_to_path(branch.id))
>> - self.layer.txn.commit()
>> + branches = [
>> + self.factory.makeProductBranch(),
>> + self.factory.makePersonalBranch(),
>> + self.factory.makePackageBranch()]
>> + input = [
>> + "/%s/.bzr/README" % branch.unique_name
>> + for branch in branches] + [
>> + "/%s/changes" % branch.unique_name
>> + for branch in branches]
>> + expected = [
>> + 'file:///var/tmp/bzrsync/%s/.bzr/README'
>> + % branch_id_to_path(branch.id)
>> + for branch in branches] + [
>> + 'http://localhost:8080/%s/changes' % branch.unique_name
>> + for branch in branches]
>> + transaction.commit()
>> script_file = os.path.join(
>> config.root, 'scripts', 'branch-rewrite.py')
>> proc = subprocess.Popen(
>> [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
>> stderr=subprocess.PIPE, bufsize=0)
>> - proc.stdin.write(input)
>> - output = proc.stdout.readline()
>> + proc.stdin.write('\n'.join(input) + '\n')
>
> A comment here too about why the extra carriage return.

Subsumed by the above effort, I hope.

>> + output = []
>> + for i in range(len(input)):
>> + output.append(proc.stdout.readline())
>> os.kill(proc.pid, signal.SIGINT)
>> err = proc.stderr.read()
>> # The script produces logging output, but not to stderr.
>> self.assertEqual('', err)
>> - self.assertEqual(expected, output)
>> + self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
>>
>>
>> def test_suite():
>>
>
>
> I look forward to seeing this tested live :)

So do I!

Cheers,
mwh

1=== modified file 'lib/lp/codehosting/rewrite.py'
2--- lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
3+++ lib/lp/codehosting/rewrite.py 2009-07-22 04:41:03 +0000
4@@ -59,16 +59,15 @@
5 return branch_id, trailing, "HIT"
6 prefixes.append(prefix)
7 result = self.store.find(
8- Branch,
9- Branch.unique_name.is_in(prefixes), Branch.private == False)
10- try:
11- branch_id, unique_name = result.values(
12- Branch.id, Branch.unique_name).next()
13- except StopIteration:
14+ (Branch.id, Branch.unique_name),
15+ Branch.unique_name.is_in(prefixes), Branch.private == False).one()
16+ if result is None:
17 return None, None, "MISS"
18- self._cache[unique_name] = (branch_id, self._now())
19- trailing = location[len(unique_name) + 1:]
20- return branch_id, trailing, "MISS"
21+ else:
22+ branch_id, unique_name = result
23+ self._cache[unique_name] = (branch_id, self._now())
24+ trailing = location[len(unique_name) + 1:]
25+ return branch_id, trailing, "MISS"
26
27 def rewriteLine(self, resource_location):
28 """Rewrite 'resource_location' to a more concrete location.
29
30=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
31--- lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
32+++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 04:56:35 +0000
33@@ -161,12 +161,10 @@
34 self.factory.makeProductBranch(),
35 self.factory.makePersonalBranch(),
36 self.factory.makePackageBranch()]
37- input = [
38- "/%s/.bzr/README" % branch.unique_name
39- for branch in branches] + [
40- "/%s/changes" % branch.unique_name
41- for branch in branches]
42- expected = [
43+ input_lines = [
44+ "/%s/.bzr/README" % branch.unique_name for branch in branches] + [
45+ "/%s/changes" % branch.unique_name for branch in branches]
46+ expected_lines = [
47 'file:///var/tmp/bzrsync/%s/.bzr/README'
48 % branch_id_to_path(branch.id)
49 for branch in branches] + [
50@@ -178,15 +176,17 @@
51 proc = subprocess.Popen(
52 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
53 stderr=subprocess.PIPE, bufsize=0)
54- proc.stdin.write('\n'.join(input) + '\n')
55- output = []
56- for i in range(len(input)):
57- output.append(proc.stdout.readline())
58+ output_lines = []
59+ # For each complete line of input, the script should, without
60+ # buffering, write a complete line of output.
61+ for input_line in input_lines:
62+ proc.stdin.write(input_line + '\n')
63+ output_lines.append(proc.stdout.readline().rstrip('\n'))
64 os.kill(proc.pid, signal.SIGINT)
65 err = proc.stderr.read()
66 # The script produces logging output, but not to stderr.
67 self.assertEqual('', err)
68- self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
69+ self.assertEqual(expected_lines, output_lines)
70
71
72 def test_suite():

« Back to merge proposal