Merge lp:~mwhudson/launchpad/much-faster-rewrite-map into lp:launchpad/db-devel
- much-faster-rewrite-map
- Merge into db-devel
Status: | Merged | ||||
---|---|---|---|---|---|
Approved by: | Tim Penhey | ||||
Approved revision: | no longer in the source branch. | ||||
Merged at revision: | not available | ||||
Proposed branch: | lp:~mwhudson/launchpad/much-faster-rewrite-map | ||||
Merge into: | lp:launchpad/db-devel | ||||
Diff against target: | None lines | ||||
To merge this branch: | bzr merge lp:~mwhudson/launchpad/much-faster-rewrite-map | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Tim Penhey (community) | Approve | ||
Review via email: mp+9121@code.launchpad.net |
Commit message
Description of the change
Michael Hudson-Doyle (mwhudson) wrote : | # |
Tim Penhey (thumper) wrote : | # |
review needs-info
> === modified file 'lib/canonical/
> --- lib/canonical/
> +++ lib/canonical/
> @@ -113,8 +113,8 @@
> def __init__(self):
> self.buffer = StringIO()
>
> - def message(self, prefix, *stuff, **kw):
> - self.buffer.
> + def message(self, prefix, msg, *stuff, **kw):
> + self.buffer.
What is the impact of other code using message?
This to me looks like it'll brake things.
> === modified file 'lib/lp/
> --- lib/lp/
> +++ lib/lp/
> @@ -5,38 +5,71 @@
> """
>
> import time
> -import xmlrpclib
>
> from bzrlib import urlutils
>
> -from lp.codehosting.vfs import (
> - branch_id_to_path, BranchFileSyste
> +from canonical.
> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
> +from zope.component import getUtility
> +from lp.code.
> +from lp.codehosting.vfs import branch_id_to_path
> +
> from canonical.config import config
> -from lp.code.
> - BRANCH_TRANSPORT, LAUNCHPAD_
> -from canonical.
> -from canonical.
>
> __all__ = ['BranchRewriter']
>
>
> class BranchRewriter:
>
> - def __init__(self, logger, proxy):
> + def __init__(self, logger, _now=None):
> """
>
> :param logger: Logger than messages about what the rewriter is
doing
> will be sent to.
> :param proxy: A blocking proxy for a branchfilesystem endpoint.
> """
> + if _now is None:
> + self._now = time.time
> + else:
> + self._now = _now
> self.logger = logger
> - self.client = BranchFileSyste
10.0)
> + self.store = getUtility(
SLAVE_FLAVOR)
> + self._cache = {}
>
> def _codebrowse_
> return urlutils.join(
> config.
> path)
>
> + def _getBranchIdAnd
> + """Return the branch id and trailing path for 'location'.
> +
> + In addition this method returns whether the answer can from the
cache
> + or from the database.
> + """
> + parts = location[
> + prefixes = []
> + for i in range(1, len(parts) + 1):
> + prefix = '/'.join(parts[:i])
> + if prefix in self._cache:
> + branch_id, inserted_time = self._cache[prefix]
> + if (self._now() < inserted_time +
> + config.
> + trailing = location[
> + return branch_id, trailing, "HIT"
> + prefixes.
> + ...
Michael Hudson-Doyle (mwhudson) wrote : | # |
Tim Penhey wrote:
> Review: Needs Information
> review needs-info
>
>> === modified file 'lib/canonical/
>> --- lib/canonical/
>> +++ lib/canonical/
>> @@ -113,8 +113,8 @@
>> def __init__(self):
>> self.buffer = StringIO()
>>
>> - def message(self, prefix, *stuff, **kw):
>> - self.buffer.
>> + def message(self, prefix, msg, *stuff, **kw):
>> + self.buffer.
>
> What is the impact of other code using message?
It means that logger.info('%f', 0.1) doesn't break.
> This to me looks like it'll brake things.
I don't think it will. ec2test will confirm :)
>> === modified file 'lib/lp/
>> --- lib/lp/
>> +++ lib/lp/
>> @@ -5,38 +5,71 @@
>> """
>>
>> import time
>> -import xmlrpclib
>>
>> from bzrlib import urlutils
>>
>> -from lp.codehosting.vfs import (
>> - branch_id_to_path, BranchFileSyste
>> +from canonical.
>> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
>> +from zope.component import getUtility
>> +from lp.code.
>> +from lp.codehosting.vfs import branch_id_to_path
>> +
>> from canonical.config import config
>> -from lp.code.
>> - BRANCH_TRANSPORT, LAUNCHPAD_
>> -from canonical.
>> -from canonical.
>>
>> __all__ = ['BranchRewriter']
>>
>>
>> class BranchRewriter:
>>
>> - def __init__(self, logger, proxy):
>> + def __init__(self, logger, _now=None):
>> """
>>
>> :param logger: Logger than messages about what the rewriter is
> doing
>> will be sent to.
>> :param proxy: A blocking proxy for a branchfilesystem endpoint.
>> """
>> + if _now is None:
>> + self._now = time.time
>> + else:
>> + self._now = _now
>> self.logger = logger
>> - self.client = BranchFileSyste
> 10.0)
>> + self.store = getUtility(
> SLAVE_FLAVOR)
>> + self._cache = {}
>>
>> def _codebrowse_
>> return urlutils.join(
>> config.
>> path)
>>
>> + def _getBranchIdAnd
>> + """Return the branch id and trailing path for 'location'.
>> +
>> + In addition this method returns whether the answer can from the
> cache
>> + or from the database.
>> + """
>> + parts = location[
>> + prefixes = []
>> + for i in range(1, len(parts) + 1):
>> + prefix = '/'.join(parts[:i])
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_tim...
1 | === modified file 'lib/lp/codehosting/rewrite.py' |
2 | --- lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000 |
3 | +++ lib/lp/codehosting/rewrite.py 2009-07-22 04:41:03 +0000 |
4 | @@ -59,16 +59,15 @@ |
5 | return branch_id, trailing, "HIT" |
6 | prefixes.append(prefix) |
7 | result = self.store.find( |
8 | - Branch, |
9 | - Branch.unique_name.is_in(prefixes), Branch.private == False) |
10 | - try: |
11 | - branch_id, unique_name = result.values( |
12 | - Branch.id, Branch.unique_name).next() |
13 | - except StopIteration: |
14 | + (Branch.id, Branch.unique_name), |
15 | + Branch.unique_name.is_in(prefixes), Branch.private == False).one() |
16 | + if result is None: |
17 | return None, None, "MISS" |
18 | - self._cache[unique_name] = (branch_id, self._now()) |
19 | - trailing = location[len(unique_name) + 1:] |
20 | - return branch_id, trailing, "MISS" |
21 | + else: |
22 | + branch_id, unique_name = result |
23 | + self._cache[unique_name] = (branch_id, self._now()) |
24 | + trailing = location[len(unique_name) + 1:] |
25 | + return branch_id, trailing, "MISS" |
26 | |
27 | def rewriteLine(self, resource_location): |
28 | """Rewrite 'resource_location' to a more concrete location. |
29 | |
30 | === modified file 'lib/lp/codehosting/tests/test_rewrite.py' |
31 | --- lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000 |
32 | +++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 04:56:35 +0000 |
33 | @@ -161,12 +161,10 @@ |
34 | self.factory.makeProductBranch(), |
35 | self.factory.makePersonalBranch(), |
36 | self.factory.makePackageBranch()] |
37 | - input = [ |
38 | - "/%s/.bzr/README" % branch.unique_name |
39 | - for branch in branches] + [ |
40 | - "/%s/changes" % branch.unique_name |
41 | - for branch in branches] |
42 | - expected = [ |
43 | + input_lines = [ |
44 | + "/%s/.bzr/README" % branch.unique_name for branch in branches] + [ |
45 | + "/%s/changes" % branch.unique_name for branch in branches] |
46 | + expected_lines = [ |
47 | 'file:///var/tmp/bzrsync/%s/.bzr/README' |
48 | % branch_id_to_path(branch.id) |
49 | for branch in branches] + [ |
50 | @@ -178,15 +176,17 @@ |
51 | proc = subprocess.Popen( |
52 | [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
53 | stderr=subprocess.PIPE, bufsize=0) |
54 | - proc.stdin.write('\n'.join(input) + '\n') |
55 | - output = [] |
56 | - for i in range(len(input)): |
57 | - output.append(proc.stdout.readline()) |
58 | + output_lines = [] |
59 | + # For each complete line of input, the script should, without |
60 | + # buffering, write a complete line of output. |
61 | + for input_line in input_lines: |
62 | + proc.stdin.write(input_line + '\n') |
63 | + output_lines.append(proc.stdout.readline().rstrip('\n')) |
64 | os.kill(proc.pid, signal.SIGINT) |
65 | err = proc.stderr.read() |
66 | # The script produces logging output, but not to stderr. |
67 | self.assertEqual('', err) |
68 | - self.assertEqual('\n'.join(expected) + '\n', ''.join(output)) |
69 | + self.assertEqual(expected_lines, output_lines) |
70 | |
71 | |
72 | def test_suite(): |
Stuart Bishop (stub) wrote : | # |
On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>> + self.store = getUtility(
> SLAVE_FLAVOR)
You can spell this 'self.store = ISlaveStore(
nicer and more meaningful.
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_time +
>> + config.
>> + trailing = location[
>> + return branch_id, trailing, "HIT"
>> + prefixes.
>> + result = self.store.find(
>> + Branch,
>> + Branch.
>> + try:
>> + branch_id, unique_name = result.values(
>> + Branch.id, Branch.
>> + except StopIteration:
>> + return None, None, "MISS"
How do we know the Branch was retrieved from the database and not the
Storm cache?
I also don't see any commits. Both of these issues can be fixed in the
schema-lazr.conf - set the isolation level to autocommit and the
storm_cache_size to 0 for this component.
> How about:
>
> result = self.store.find(
> (Branch.id, Branch.
> Branch.
>
> That way result should be None if not found, and a tuple of id and unique_name
> if it was found.
>
> That'll make this code a little cleaner.
Shouldn't that be .any()? .one() will explode if multiple rows matched.
--
Stuart Bishop <email address hidden>
http://
Michael Hudson-Doyle (mwhudson) wrote : | # |
Hi Stuart, thanks for taking a look!
Stuart Bishop wrote:
> On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>>> + self.store = getUtility(
>> SLAVE_FLAVOR)
>
> You can spell this 'self.store = ISlaveStore(
> nicer and more meaningful.
Ah, that is nicer.
>>> + if prefix in self._cache:
>>> + branch_id, inserted_time = self._cache[prefix]
>>> + if (self._now() < inserted_time +
>>> + config.
>>> + trailing = location[
>>> + return branch_id, trailing, "HIT"
>>> + prefixes.
>>> + result = self.store.find(
>>> + Branch,
>>> + Branch.
>>> + try:
>>> + branch_id, unique_name = result.values(
>>> + Branch.id, Branch.
>>> + except StopIteration:
>>> + return None, None, "MISS"
>
> How do we know the Branch was retrieved from the database and not the
> Storm cache?
We don't. Is there some way I could test for this?
> I also don't see any commits. Both of these issues can be fixed in the
> schema-lazr.conf - set the isolation level to autocommit and the
> storm_cache_size to 0 for this component.
Why is autocommit better than read committed, given that we don't write
to the database? Serializable would be very very wrong....
I also admit that I don't know what I have to change in schema-lazr.conf
to change the cache size -- what do you mean by 'component'?
>> How about:
>>
>> result = self.store.find(
>> (Branch.id, Branch.
>> Branch.
>>
>> That way result should be None if not found, and a tuple of id and unique_name
>> if it was found.
>>
>> That'll make this code a little cleaner.
>
> Shouldn't that be .any()? .one() will explode if multiple rows matched.
We want it to explode if multiple rows match.
Cheers,
mwh
Michael Hudson-Doyle (mwhudson) wrote : | # |
Michael Hudson wrote:
>>>> + if prefix in self._cache:
>>>> + branch_id, inserted_time = self._cache[prefix]
>>>> + if (self._now() < inserted_time +
>>>> + config.
>>>> + trailing = location[
>>>> + return branch_id, trailing, "HIT"
>>>> + prefixes.
>>>> + result = self.store.find(
>>>> + Branch,
>>>> + Branch.
>>>> + try:
>>>> + branch_id, unique_name = result.values(
>>>> + Branch.id, Branch.
>>>> + except StopIteration:
>>>> + return None, None, "MISS"
>> How do we know the Branch was retrieved from the database and not the
>> Storm cache?
>
> We don't. Is there some way I could test for this?
Actually we do: we never retrieve a whole branch object from the
database, so there's nothing for storm to cache.
Cheers,
mwh
Preview Diff
1 | === modified file 'database/schema/security.cfg' |
2 | --- database/schema/security.cfg 2009-07-19 04:41:14 +0000 |
3 | +++ database/schema/security.cfg 2009-07-22 03:37:42 +0000 |
4 | @@ -1699,3 +1699,7 @@ |
5 | public.person = SELECT |
6 | public.teammembership = SELECT |
7 | public.teamparticipation = SELECT |
8 | + |
9 | +[branch-rewrite] |
10 | +type=user |
11 | +public.branch = SELECT |
12 | |
13 | === modified file 'lib/canonical/config/schema-lazr.conf' |
14 | --- lib/canonical/config/schema-lazr.conf 2009-07-18 01:03:09 +0000 |
15 | +++ lib/canonical/config/schema-lazr.conf 2009-07-22 03:37:42 +0000 |
16 | @@ -365,6 +365,12 @@ |
17 | # save ourselves some load by directing them all to a HTTP url. |
18 | hot_products: |
19 | |
20 | +# Branch rewrite cache lifetime. |
21 | +# |
22 | +# How long, in seconds, to cache results of the branch path -> id |
23 | +# mapping done by branch-rewrite.py for. |
24 | +branch_rewrite_cache_lifetime: 10 |
25 | + |
26 | |
27 | [codeimport] |
28 | # Where the Bazaar imports are stored. |
29 | |
30 | === modified file 'lib/canonical/launchpad/scripts/logger.py' |
31 | --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000 |
32 | +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000 |
33 | @@ -113,8 +113,8 @@ |
34 | def __init__(self): |
35 | self.buffer = StringIO() |
36 | |
37 | - def message(self, prefix, *stuff, **kw): |
38 | - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff))) |
39 | + def message(self, prefix, msg, *stuff, **kw): |
40 | + self.buffer.write('%s: %s\n' % (prefix, msg % stuff)) |
41 | |
42 | if 'exc_info' in kw: |
43 | exception = traceback.format_exception(*sys.exc_info()) |
44 | |
45 | === modified file 'lib/lp/codehosting/rewrite.py' |
46 | --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000 |
47 | +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000 |
48 | @@ -5,38 +5,71 @@ |
49 | """ |
50 | |
51 | import time |
52 | -import xmlrpclib |
53 | |
54 | from bzrlib import urlutils |
55 | |
56 | -from lp.codehosting.vfs import ( |
57 | - branch_id_to_path, BranchFileSystemClient) |
58 | +from canonical.launchpad.webapp.interfaces import ( |
59 | + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR) |
60 | +from zope.component import getUtility |
61 | +from lp.code.model.branch import Branch |
62 | +from lp.codehosting.vfs import branch_id_to_path |
63 | + |
64 | from canonical.config import config |
65 | -from lp.code.interfaces.codehosting import ( |
66 | - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS) |
67 | -from canonical.launchpad.xmlrpc import faults |
68 | -from canonical.twistedsupport import extract_result |
69 | |
70 | __all__ = ['BranchRewriter'] |
71 | |
72 | |
73 | class BranchRewriter: |
74 | |
75 | - def __init__(self, logger, proxy): |
76 | + def __init__(self, logger, _now=None): |
77 | """ |
78 | |
79 | :param logger: Logger than messages about what the rewriter is doing |
80 | will be sent to. |
81 | :param proxy: A blocking proxy for a branchfilesystem endpoint. |
82 | """ |
83 | + if _now is None: |
84 | + self._now = time.time |
85 | + else: |
86 | + self._now = _now |
87 | self.logger = logger |
88 | - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS, 10.0) |
89 | + self.store = getUtility(IStoreSelector).get(MAIN_STORE, SLAVE_FLAVOR) |
90 | + self._cache = {} |
91 | |
92 | def _codebrowse_url(self, path): |
93 | return urlutils.join( |
94 | config.codehosting.internal_codebrowse_root, |
95 | path) |
96 | |
97 | + def _getBranchIdAndTrailingPath(self, location): |
98 | + """Return the branch id and trailing path for 'location'. |
99 | + |
100 | + In addition this method returns whether the answer can from the cache |
101 | + or from the database. |
102 | + """ |
103 | + parts = location[1:].split('/') |
104 | + prefixes = [] |
105 | + for i in range(1, len(parts) + 1): |
106 | + prefix = '/'.join(parts[:i]) |
107 | + if prefix in self._cache: |
108 | + branch_id, inserted_time = self._cache[prefix] |
109 | + if (self._now() < inserted_time + |
110 | + config.codehosting.branch_rewrite_cache_lifetime): |
111 | + trailing = location[len(prefix) + 1:] |
112 | + return branch_id, trailing, "HIT" |
113 | + prefixes.append(prefix) |
114 | + result = self.store.find( |
115 | + Branch, |
116 | + Branch.unique_name.is_in(prefixes), Branch.private == False) |
117 | + try: |
118 | + branch_id, unique_name = result.values( |
119 | + Branch.id, Branch.unique_name).next() |
120 | + except StopIteration: |
121 | + return None, None, "MISS" |
122 | + self._cache[unique_name] = (branch_id, self._now()) |
123 | + trailing = location[len(unique_name) + 1:] |
124 | + return branch_id, trailing, "MISS" |
125 | + |
126 | def rewriteLine(self, resource_location): |
127 | """Rewrite 'resource_location' to a more concrete location. |
128 | |
129 | @@ -65,35 +98,26 @@ |
130 | Other errors are allowed to propagate, on the assumption that the |
131 | caller will catch and log them. |
132 | """ |
133 | - T = time.time() |
134 | # Codebrowse generates references to its images and stylesheets |
135 | # starting with "/static", so pass them on unthinkingly. |
136 | + T = time.time() |
137 | + cached = None |
138 | if resource_location.startswith('/static/'): |
139 | - return self._codebrowse_url(resource_location) |
140 | - trailingSlash = resource_location.endswith('/') |
141 | - deferred = self.client.translatePath(resource_location) |
142 | - try: |
143 | - transport_type, info, trailing = extract_result(deferred) |
144 | - except xmlrpclib.Fault, f: |
145 | - if (faults.check_fault(f, faults.PathTranslationError) |
146 | - or faults.check_fault(f, faults.PermissionDenied)): |
147 | - # In this situation, we'd *like* to generate a 404 |
148 | - # (PathTranslationError) or 301 (PermissionDenied) error. But |
149 | - # we can't, so we just forward on to codebrowse which can. |
150 | - return self._codebrowse_url(resource_location) |
151 | - else: |
152 | - raise |
153 | - if transport_type == BRANCH_TRANSPORT: |
154 | - if trailing.startswith('.bzr'): |
155 | - r = urlutils.join( |
156 | - config.codehosting.internal_branch_by_id_root, |
157 | - branch_id_to_path(info['id']), trailing) |
158 | - if trailingSlash: |
159 | - r += '/' |
160 | - else: |
161 | + r = self._codebrowse_url(resource_location) |
162 | + cached = 'N/A' |
163 | + else: |
164 | + branch_id, trailing, cached = self._getBranchIdAndTrailingPath( |
165 | + resource_location) |
166 | + if branch_id is None: |
167 | r = self._codebrowse_url(resource_location) |
168 | - self.logger.info( |
169 | - "%r -> %r (%fs)", resource_location, r, time.time() - T) |
170 | - return r |
171 | - else: |
172 | - return "NULL" |
173 | + else: |
174 | + if trailing.startswith('/.bzr'): |
175 | + r = urlutils.join( |
176 | + config.codehosting.internal_branch_by_id_root, |
177 | + branch_id_to_path(branch_id), trailing[1:]) |
178 | + else: |
179 | + r = self._codebrowse_url(resource_location) |
180 | + self.logger.info( |
181 | + "%r -> %r (%fs, cache: %s)", |
182 | + resource_location, r, time.time() - T, cached) |
183 | + return r |
184 | |
185 | === modified file 'lib/lp/codehosting/tests/test_rewrite.py' |
186 | --- lib/lp/codehosting/tests/test_rewrite.py 2009-07-17 00:26:05 +0000 |
187 | +++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000 |
188 | @@ -5,68 +5,91 @@ |
189 | |
190 | __metaclass__ = type |
191 | |
192 | +import re |
193 | import os |
194 | import signal |
195 | import subprocess |
196 | import unittest |
197 | |
198 | +import transaction |
199 | + |
200 | +from zope.security.proxy import removeSecurityProxy |
201 | + |
202 | from lp.codehosting.vfs import branch_id_to_path |
203 | -from lp.codehosting.inmemory import InMemoryFrontend, XMLRPCWrapper |
204 | from lp.codehosting.rewrite import BranchRewriter |
205 | from canonical.config import config |
206 | -from lp.testing import TestCase, TestCaseWithFactory |
207 | -from canonical.launchpad.scripts import QuietFakeLogger |
208 | -from canonical.testing.layers import ZopelessAppServerLayer |
209 | - |
210 | - |
211 | -class TestBranchRewriter(TestCase): |
212 | +from lp.testing import FakeTime, TestCaseWithFactory |
213 | +from canonical.launchpad.scripts import BufferLogger |
214 | +from canonical.testing.layers import DatabaseFunctionalLayer |
215 | + |
216 | + |
217 | +class TestBranchRewriter(TestCaseWithFactory): |
218 | + |
219 | + layer = DatabaseFunctionalLayer |
220 | |
221 | def setUp(self): |
222 | - frontend = InMemoryFrontend() |
223 | - self._branchfs = frontend.getFilesystemEndpoint() |
224 | - self.factory = frontend.getLaunchpadObjectFactory() |
225 | + TestCaseWithFactory.setUp(self) |
226 | + self.fake_time = FakeTime(0) |
227 | |
228 | def makeRewriter(self): |
229 | - return BranchRewriter( |
230 | - QuietFakeLogger(), XMLRPCWrapper(self._branchfs)) |
231 | - |
232 | - def test_translateLine_found_dot_bzr(self): |
233 | + return BranchRewriter(BufferLogger(), self.fake_time.now) |
234 | + |
235 | + def getLoggerOutput(self, rewriter): |
236 | + return rewriter.logger.buffer.getvalue() |
237 | + |
238 | + def test_rewriteLine_found_dot_bzr(self): |
239 | # Requests for /$branch_name/.bzr/... are redirected to where the |
240 | # branches are served from by ID. |
241 | rewriter = self.makeRewriter() |
242 | - branch = self.factory.makeAnyBranch() |
243 | - line = rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name) |
244 | - self.assertEqual( |
245 | + branches = [ |
246 | + self.factory.makeProductBranch(), |
247 | + self.factory.makePersonalBranch(), |
248 | + self.factory.makePackageBranch()] |
249 | + transaction.commit() |
250 | + output = [ |
251 | + rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name) |
252 | + for branch in branches] |
253 | + expected = [ |
254 | 'file:///var/tmp/bzrsync/%s/.bzr/README' |
255 | - % branch_id_to_path(branch.id), |
256 | - line) |
257 | + % branch_id_to_path(branch.id) |
258 | + for branch in branches] |
259 | + self.assertEqual(expected, output) |
260 | |
261 | - def test_translateLine_found_not_dot_bzr(self): |
262 | + def test_rewriteLine_found_not_dot_bzr(self): |
263 | # Requests for /$branch_name/... that are not to .bzr directories are |
264 | # redirected to codebrowse. |
265 | rewriter = self.makeRewriter() |
266 | - branch = self.factory.makeAnyBranch() |
267 | - output = rewriter.rewriteLine("/%s/changes" % branch.unique_name) |
268 | - self.assertEqual( |
269 | - 'http://localhost:8080/%s/changes' % branch.unique_name, |
270 | - output) |
271 | + branches = [ |
272 | + self.factory.makeProductBranch(), |
273 | + self.factory.makePersonalBranch(), |
274 | + self.factory.makePackageBranch()] |
275 | + transaction.commit() |
276 | + output = [ |
277 | + rewriter.rewriteLine("/%s/changes" % branch.unique_name) |
278 | + for branch in branches] |
279 | + expected = [ |
280 | + 'http://localhost:8080/%s/changes' % branch.unique_name |
281 | + for branch in branches] |
282 | + self.assertEqual(expected, output) |
283 | |
284 | - def test_translateLine_private(self): |
285 | + def test_rewriteLine_private(self): |
286 | # All requests for /$branch_name/... for private branches are |
287 | # rewritten to codebrowse, which will then redirect them to https and |
288 | # handle them there. |
289 | rewriter = self.makeRewriter() |
290 | - branch = self.factory.makeBranch(private=True) |
291 | - output = rewriter.rewriteLine("/%s/changes" % branch.unique_name) |
292 | - self.assertEqual( |
293 | - 'http://localhost:8080/%s/changes' % branch.unique_name, |
294 | - output) |
295 | - output = rewriter.rewriteLine("/%s/.bzr" % branch.unique_name) |
296 | - self.assertEqual( |
297 | - 'http://localhost:8080/%s/.bzr' % branch.unique_name, |
298 | + branch = self.factory.makeAnyBranch(private=True) |
299 | + unique_name = removeSecurityProxy(branch).unique_name |
300 | + transaction.commit() |
301 | + output = [ |
302 | + rewriter.rewriteLine("/%s/changes" % unique_name), |
303 | + rewriter.rewriteLine("/%s/.bzr" % unique_name) |
304 | + ] |
305 | + self.assertEqual( |
306 | + ['http://localhost:8080/%s/changes' % unique_name, |
307 | + 'http://localhost:8080/%s/.bzr' % unique_name], |
308 | output) |
309 | |
310 | - def test_translateLine_static(self): |
311 | + def test_rewriteLine_static(self): |
312 | # Requests to /static are rewritten to codebrowse urls. |
313 | rewriter = self.makeRewriter() |
314 | output = rewriter.rewriteLine("/static/foo") |
315 | @@ -74,7 +97,7 @@ |
316 | 'http://localhost:8080/static/foo', |
317 | output) |
318 | |
319 | - def test_translateLine_not_found(self): |
320 | + def test_rewriteLine_not_found(self): |
321 | # If the request does not map to a branch, we redirect it to |
322 | # codebrowse as it can generate a 404. |
323 | rewriter = self.makeRewriter() |
324 | @@ -84,30 +107,86 @@ |
325 | 'http://localhost:8080%s' % not_found_path, |
326 | output) |
327 | |
328 | + def test_rewriteLine_logs_cache_miss(self): |
329 | + # The first request for a branch misses the cache and logs this fact. |
330 | + rewriter = self.makeRewriter() |
331 | + branch = self.factory.makeAnyBranch() |
332 | + transaction.commit() |
333 | + rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README') |
334 | + logging_output = self.getLoggerOutput(rewriter) |
335 | + self.assertIsNot( |
336 | + None, |
337 | + re.match("INFO: .* -> .* (.*s, cache: MISS)", logging_output), |
338 | + "No miss found in %r" % logging_output) |
339 | + |
340 | + def test_rewriteLine_logs_cache_hit(self): |
341 | + # The second request for a branch misses the cache and logs this fact. |
342 | + rewriter = self.makeRewriter() |
343 | + branch = self.factory.makeAnyBranch() |
344 | + transaction.commit() |
345 | + rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README') |
346 | + rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README') |
347 | + logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n') |
348 | + self.assertEqual(2, len(logging_output_lines)) |
349 | + self.assertIsNot( |
350 | + None, |
351 | + re.match("INFO: .* -> .* (.*s, cache: HIT)", |
352 | + logging_output_lines[-1]), |
353 | + "No hit found in %r" % logging_output_lines[-1]) |
354 | + |
355 | + def test_rewriteLine_cache_expires(self): |
356 | + # The second request for a branch misses the cache and logs this fact. |
357 | + rewriter = self.makeRewriter() |
358 | + branch = self.factory.makeAnyBranch() |
359 | + transaction.commit() |
360 | + rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README') |
361 | + self.fake_time.advance( |
362 | + config.codehosting.branch_rewrite_cache_lifetime + 1) |
363 | + rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README') |
364 | + logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n') |
365 | + self.assertEqual(2, len(logging_output_lines)) |
366 | + self.assertIsNot( |
367 | + None, |
368 | + re.match("INFO: .* -> .* (.*s, cache: MISS)", |
369 | + logging_output_lines[-1]), |
370 | + "No miss found in %r" % logging_output_lines[-1]) |
371 | + |
372 | |
373 | class TestBranchRewriterScript(TestCaseWithFactory): |
374 | |
375 | - layer = ZopelessAppServerLayer |
376 | + layer = DatabaseFunctionalLayer |
377 | |
378 | def test_script(self): |
379 | - branch = self.factory.makeAnyBranch() |
380 | - input = "/%s/.bzr/README\n" % branch.unique_name |
381 | - expected = ( |
382 | - "file:///var/tmp/bzrsync/%s/.bzr/README\n" |
383 | - % branch_id_to_path(branch.id)) |
384 | - self.layer.txn.commit() |
385 | + branches = [ |
386 | + self.factory.makeProductBranch(), |
387 | + self.factory.makePersonalBranch(), |
388 | + self.factory.makePackageBranch()] |
389 | + input = [ |
390 | + "/%s/.bzr/README" % branch.unique_name |
391 | + for branch in branches] + [ |
392 | + "/%s/changes" % branch.unique_name |
393 | + for branch in branches] |
394 | + expected = [ |
395 | + 'file:///var/tmp/bzrsync/%s/.bzr/README' |
396 | + % branch_id_to_path(branch.id) |
397 | + for branch in branches] + [ |
398 | + 'http://localhost:8080/%s/changes' % branch.unique_name |
399 | + for branch in branches] |
400 | + transaction.commit() |
401 | script_file = os.path.join( |
402 | config.root, 'scripts', 'branch-rewrite.py') |
403 | proc = subprocess.Popen( |
404 | [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
405 | stderr=subprocess.PIPE, bufsize=0) |
406 | - proc.stdin.write(input) |
407 | - output = proc.stdout.readline() |
408 | + proc.stdin.write('\n'.join(input) + '\n') |
409 | + output = [] |
410 | + for i in range(len(input)): |
411 | + output.append(proc.stdout.readline()) |
412 | os.kill(proc.pid, signal.SIGINT) |
413 | err = proc.stderr.read() |
414 | # The script produces logging output, but not to stderr. |
415 | self.assertEqual('', err) |
416 | - self.assertEqual(expected, output) |
417 | + self.assertEqual('\n'.join(expected) + '\n', ''.join(output)) |
418 | |
419 | |
420 | def test_suite(): |
421 | |
422 | === modified file 'lib/lp/testing/__init__.py' |
423 | --- lib/lp/testing/__init__.py 2009-07-17 18:46:25 +0000 |
424 | +++ lib/lp/testing/__init__.py 2009-07-22 02:26:16 +0000 |
425 | @@ -224,10 +224,11 @@ |
426 | self.assertTrue(expected is observed, |
427 | "%r is not %r" % (expected, observed)) |
428 | |
429 | - def assertIsNot(self, expected, observed): |
430 | + def assertIsNot(self, expected, observed, msg=None): |
431 | """Assert that `expected` is not the same object as `observed`.""" |
432 | - self.assertTrue(expected is not observed, |
433 | - "%r is %r" % (expected, observed)) |
434 | + if msg is None: |
435 | + msg = "%r is %r" % (expected, observed) |
436 | + self.assertTrue(expected is not observed, msg) |
437 | |
438 | def assertIn(self, needle, haystack): |
439 | """Assert that 'needle' is in 'haystack'.""" |
440 | |
441 | === modified file 'scripts/branch-rewrite.py' |
442 | --- scripts/branch-rewrite.py 2009-06-30 16:56:07 +0000 |
443 | +++ scripts/branch-rewrite.py 2009-07-22 03:36:19 +0000 |
444 | @@ -14,10 +14,13 @@ |
445 | |
446 | import _pythonpath |
447 | |
448 | +import os |
449 | import sys |
450 | -import xmlrpclib |
451 | - |
452 | -from lp.codehosting.vfs import BlockingProxy |
453 | + |
454 | +# XXX, MichaelHudson, 2009-07-22, bug=402845: This pointless import avoids a |
455 | +# circular import killing us. |
456 | +from canonical.launchpad.database import account |
457 | + |
458 | from lp.codehosting.rewrite import BranchRewriter |
459 | from canonical.config import config |
460 | from lp.services.scripts.base import LaunchpadScript |
461 | @@ -25,11 +28,6 @@ |
462 | |
463 | class BranchRewriteScript(LaunchpadScript): |
464 | |
465 | - def __init__(self, name): |
466 | - LaunchpadScript.__init__(self, name) |
467 | - proxy = xmlrpclib.ServerProxy(config.codehosting.branchfs_endpoint) |
468 | - self.rewriter = BranchRewriter(self.logger, BlockingProxy(proxy)) |
469 | - |
470 | def add_my_options(self): |
471 | """Make the logging go to a file by default. |
472 | |
473 | @@ -40,26 +38,20 @@ |
474 | value from the config. |
475 | """ |
476 | log_file_location = config.codehosting.rewrite_script_log_file |
477 | + log_file_directory = os.path.dirname(log_file_location) |
478 | + if not os.path.isdir(log_file_directory): |
479 | + os.makedirs(log_file_directory) |
480 | self.parser.defaults['log_file'] = log_file_location |
481 | |
482 | - def run(self, use_web_security=False, implicit_begin=True, |
483 | - isolation=None): |
484 | - """See `LaunchpadScript.run`. |
485 | - |
486 | - As this script does not need the component architecture or a |
487 | - connection to the database, we override this method to avoid setting |
488 | - them up. |
489 | - """ |
490 | - self.main() |
491 | - |
492 | def main(self): |
493 | + rewriter = BranchRewriter(self.logger) |
494 | self.logger.debug("Starting up...") |
495 | while True: |
496 | try: |
497 | line = sys.stdin.readline() |
498 | # Mod-rewrite always gives us a newline terminated string. |
499 | if line: |
500 | - print self.rewriter.rewriteLine(line.strip()) |
501 | + print rewriter.rewriteLine(line.strip()) |
502 | else: |
503 | # Standard input has been closed, so die. |
504 | return |
505 | @@ -71,4 +63,4 @@ |
506 | |
507 | |
508 | if __name__ == '__main__': |
509 | - BranchRewriteScript("branch-rewrite").run() |
510 | + BranchRewriteScript("branch-rewrite", dbuser='branch-rewrite').run() |
As we've discovered, we need the rewriting done by the branch-rewrite.py script to be much quicker. So this branch implements it via a single database query. This should (a) be much much faster (b) take load off the XML-RPC server.
In addition, it records in the log whether the cache was hit or not.
There is now some duplicated code between lp.codehosting. rewrite and lp.codehosting. vfs.branchfscli ent, which could perhaps be reduced with some cleverness. OTOH, an extremely un-abstract, bare bones approach seems sane for something we've just found is so performance sensitive.