Code review comment for lp:~mwhudson/launchpad/much-faster-rewrite-map
- much-faster-rewrite-map
- Merge into db-devel
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote : | # |
1 | === modified file 'lib/lp/codehosting/rewrite.py' |
2 | --- lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000 |
3 | +++ lib/lp/codehosting/rewrite.py 2009-07-22 04:41:03 +0000 |
4 | @@ -59,16 +59,15 @@ |
5 | return branch_id, trailing, "HIT" |
6 | prefixes.append(prefix) |
7 | result = self.store.find( |
8 | - Branch, |
9 | - Branch.unique_name.is_in(prefixes), Branch.private == False) |
10 | - try: |
11 | - branch_id, unique_name = result.values( |
12 | - Branch.id, Branch.unique_name).next() |
13 | - except StopIteration: |
14 | + (Branch.id, Branch.unique_name), |
15 | + Branch.unique_name.is_in(prefixes), Branch.private == False).one() |
16 | + if result is None: |
17 | return None, None, "MISS" |
18 | - self._cache[unique_name] = (branch_id, self._now()) |
19 | - trailing = location[len(unique_name) + 1:] |
20 | - return branch_id, trailing, "MISS" |
21 | + else: |
22 | + branch_id, unique_name = result |
23 | + self._cache[unique_name] = (branch_id, self._now()) |
24 | + trailing = location[len(unique_name) + 1:] |
25 | + return branch_id, trailing, "MISS" |
26 | |
27 | def rewriteLine(self, resource_location): |
28 | """Rewrite 'resource_location' to a more concrete location. |
29 | |
30 | === modified file 'lib/lp/codehosting/tests/test_rewrite.py' |
31 | --- lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000 |
32 | +++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 04:56:35 +0000 |
33 | @@ -161,12 +161,10 @@ |
34 | self.factory.makeProductBranch(), |
35 | self.factory.makePersonalBranch(), |
36 | self.factory.makePackageBranch()] |
37 | - input = [ |
38 | - "/%s/.bzr/README" % branch.unique_name |
39 | - for branch in branches] + [ |
40 | - "/%s/changes" % branch.unique_name |
41 | - for branch in branches] |
42 | - expected = [ |
43 | + input_lines = [ |
44 | + "/%s/.bzr/README" % branch.unique_name for branch in branches] + [ |
45 | + "/%s/changes" % branch.unique_name for branch in branches] |
46 | + expected_lines = [ |
47 | 'file:///var/tmp/bzrsync/%s/.bzr/README' |
48 | % branch_id_to_path(branch.id) |
49 | for branch in branches] + [ |
50 | @@ -178,15 +176,17 @@ |
51 | proc = subprocess.Popen( |
52 | [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
53 | stderr=subprocess.PIPE, bufsize=0) |
54 | - proc.stdin.write('\n'.join(input) + '\n') |
55 | - output = [] |
56 | - for i in range(len(input)): |
57 | - output.append(proc.stdout.readline()) |
58 | + output_lines = [] |
59 | + # For each complete line of input, the script should, without |
60 | + # buffering, write a complete line of output. |
61 | + for input_line in input_lines: |
62 | + proc.stdin.write(input_line + '\n') |
63 | + output_lines.append(proc.stdout.readline().rstrip('\n')) |
64 | os.kill(proc.pid, signal.SIGINT) |
65 | err = proc.stderr.read() |
66 | # The script produces logging output, but not to stderr. |
67 | self.assertEqual('', err) |
68 | - self.assertEqual('\n'.join(expected) + '\n', ''.join(output)) |
69 | + self.assertEqual(expected_lines, output_lines) |
70 | |
71 | |
72 | def test_suite(): |
Tim Penhey wrote: launchpad/ scripts/ logger. py' launchpad/ scripts/ logger. py 2009-06-25 05:30:52 +0000 launchpad/ scripts/ logger. py 2009-07-22 02:16:56 +0000 write(' %s: %s\n' % (prefix, ' '.join(stuff))) write(' %s: %s\n' % (prefix, msg % stuff))
> Review: Needs Information
> review needs-info
>
>> === modified file 'lib/canonical/
>> --- lib/canonical/
>> +++ lib/canonical/
>> @@ -113,8 +113,8 @@
>> def __init__(self):
>> self.buffer = StringIO()
>>
>> - def message(self, prefix, *stuff, **kw):
>> - self.buffer.
>> + def message(self, prefix, msg, *stuff, **kw):
>> + self.buffer.
>
> What is the impact of other code using message?
It means that logger.info('%f', 0.1) doesn't break.
> This to me looks like it'll brake things.
I don't think it will. ec2test will confirm :)
>> === modified file 'lib/lp/ codehosting/ rewrite. py' codehosting/ rewrite. py 2009-07-20 21:32:15 +0000 codehosting/ rewrite. py 2009-07-22 03:24:19 +0000 mClient) launchpad. webapp. interfaces import ( model.branch import Branch interfaces. codehosting import ( ANONYMOUS) launchpad. xmlrpc import faults twistedsupport import extract_result mClient( proxy, LAUNCHPAD_ ANONYMOUS, IStoreSelector) .get(MAIN_ STORE, url(self, path): codehosting. internal_ codebrowse_ root, TrailingPath( self, location): 1:].split( '/') codehosting. branch_ rewrite_ cache_lifetime) : len(prefix) + 1:] append( prefix) unique_ name.is_ in(prefixes) , Branch.private == False) unique_ name).next( ) unique_ name), unique_ name.is_ in(prefixes) , Branch.private == False).one()
>> --- lib/lp/
>> +++ lib/lp/
>> @@ -5,38 +5,71 @@
>> """
>>
>> import time
>> -import xmlrpclib
>>
>> from bzrlib import urlutils
>>
>> -from lp.codehosting.vfs import (
>> - branch_id_to_path, BranchFileSyste
>> +from canonical.
>> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
>> +from zope.component import getUtility
>> +from lp.code.
>> +from lp.codehosting.vfs import branch_id_to_path
>> +
>> from canonical.config import config
>> -from lp.code.
>> - BRANCH_TRANSPORT, LAUNCHPAD_
>> -from canonical.
>> -from canonical.
>>
>> __all__ = ['BranchRewriter']
>>
>>
>> class BranchRewriter:
>>
>> - def __init__(self, logger, proxy):
>> + def __init__(self, logger, _now=None):
>> """
>>
>> :param logger: Logger than messages about what the rewriter is
> doing
>> will be sent to.
>> :param proxy: A blocking proxy for a branchfilesystem endpoint.
>> """
>> + if _now is None:
>> + self._now = time.time
>> + else:
>> + self._now = _now
>> self.logger = logger
>> - self.client = BranchFileSyste
> 10.0)
>> + self.store = getUtility(
> SLAVE_FLAVOR)
>> + self._cache = {}
>>
>> def _codebrowse_
>> return urlutils.join(
>> config.
>> path)
>>
>> + def _getBranchIdAnd
>> + """Return the branch id and trailing path for 'location'.
>> +
>> + In addition this method returns whether the answer can from the
> cache
>> + or from the database.
>> + """
>> + parts = location[
>> + prefixes = []
>> + for i in range(1, len(parts) + 1):
>> + prefix = '/'.join(parts[:i])
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_time +
>> + config.
>> + trailing = location[
>> + return branch_id, trailing, "HIT"
>> + prefixes.
>> + result = self.store.find(
>> + Branch,
>> + Branch.
>> + try:
>> + branch_id, unique_name = result.values(
>> + Branch.id, Branch.
>> + except StopIteration:
>> + return None, None, "MISS"
>
> How about:
>
> result = self.store.find(
> (Branch.id, Branch.
> Branch.
>
> That way result should be None if not found, and a tuple of id and unique_name
> if it was found.
>
> That'll make this code a little cleaner.
Ah, yes, thanks for the tip.
>> === modified file 'lib/lp/ codehosting/ tests/test_ rewrite. py' codehosting/ tests/test_ rewrite. py 2009-07-17 00:26:05 +0000 codehosting/ tests/test_ rewrite. py 2009-07-22 03:23:28 +0000 terScript( TestCaseWithFac tory): erLayer nalLayer
>> --- lib/lp/
>> +++ lib/lp/
>> class TestBranchRewri
>>
>> - layer = ZopelessAppServ
>> + layer = DatabaseFunctio
>>
>> def test_script(self):
>
> I think a comment here explaining why you have '\n'.join(expected) + '\n'
> would be *really* helpful.
>
> I think it has todo with how the rewriting works through apache, but a
> comment would help.
I've tried to make this make more sense -- have a look at the attached diff.
>> - branch = self.factory. makeAnyBranch( ) //var/tmp/ bzrsync/ %s/.bzr/ README\ n" id_to_path( branch. id)) txn.commit( ) makeProductBran ch(), makePersonalBra nch(), makePackageBran ch()] //var/tmp/ bzrsync/ %s/.bzr/ README' id_to_path( branch. id) localhost: 8080/%s/ changes' % branch.unique_name commit( ) rewrite. py') s.PIPE, stdout= subprocess. PIPE, subprocess. PIPE, bufsize=0) write(input) readline( ) write(' \n'.join( input) + '\n')
>> - input = "/%s/.bzr/README\n" % branch.unique_name
>> - expected = (
>> - "file:/
>> - % branch_
>> - self.layer.
>> + branches = [
>> + self.factory.
>> + self.factory.
>> + self.factory.
>> + input = [
>> + "/%s/.bzr/README" % branch.unique_name
>> + for branch in branches] + [
>> + "/%s/changes" % branch.unique_name
>> + for branch in branches]
>> + expected = [
>> + 'file:/
>> + % branch_
>> + for branch in branches] + [
>> + 'http://
>> + for branch in branches]
>> + transaction.
>> script_file = os.path.join(
>> config.root, 'scripts', 'branch-
>> proc = subprocess.Popen(
>> [script_file], stdin=subproces
>> stderr=
>> - proc.stdin.
>> - output = proc.stdout.
>> + proc.stdin.
>
> A comment here too about why the extra carriage return.
Subsumed by the above effort, I hope.
>> + output = [] append( proc.stdout. readline( )) l('', err) l(expected, output) l('\n'. join(expected) + '\n', ''.join(output))
>> + for i in range(len(input)):
>> + output.
>> os.kill(proc.pid, signal.SIGINT)
>> err = proc.stderr.read()
>> # The script produces logging output, but not to stderr.
>> self.assertEqua
>> - self.assertEqua
>> + self.assertEqua
>>
>>
>> def test_suite():
>>
>
>
> I look forward to seeing this tested live :)
So do I!
Cheers,
mwh