Merge lp:~mwhudson/launchpad/much-faster-rewrite-map into lp:launchpad/db-devel

Proposed by Michael Hudson-Doyle
Status: Merged
Approved by: Tim Penhey
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~mwhudson/launchpad/much-faster-rewrite-map
Merge into: lp:launchpad/db-devel
Diff against target: None lines
To merge this branch: bzr merge lp:~mwhudson/launchpad/much-faster-rewrite-map
Reviewer Review Type Date Requested Status
Tim Penhey (community) Approve
Review via email: mp+9121@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

As we've discovered, we need the rewriting done by the branch-rewrite.py script to be much quicker. So this branch implements it via a single database query. This should (a) be much much faster (b) take load off the XML-RPC server.

In addition, it records in the log whether the cache was hit or not.

There is now some duplicated code between lp.codehosting.rewrite and lp.codehosting.vfs.branchfsclient, which could perhaps be reduced with some cleverness. OTOH, an extremely un-abstract, bare bones approach seems sane for something we've just found is so performance sensitive.

Revision history for this message
Tim Penhey (thumper) wrote :
Download full text (6.2 KiB)

 review needs-info

> === modified file 'lib/canonical/launchpad/scripts/logger.py'
> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
> @@ -113,8 +113,8 @@
> def __init__(self):
> self.buffer = StringIO()
>
> - def message(self, prefix, *stuff, **kw):
> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
> + def message(self, prefix, msg, *stuff, **kw):
> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))

What is the impact of other code using message?

This to me looks like it'll brake things.

> === modified file 'lib/lp/codehosting/rewrite.py'
> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
> @@ -5,38 +5,71 @@
> """
>
> import time
> -import xmlrpclib
>
> from bzrlib import urlutils
>
> -from lp.codehosting.vfs import (
> - branch_id_to_path, BranchFileSystemClient)
> +from canonical.launchpad.webapp.interfaces import (
> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
> +from zope.component import getUtility
> +from lp.code.model.branch import Branch
> +from lp.codehosting.vfs import branch_id_to_path
> +
> from canonical.config import config
> -from lp.code.interfaces.codehosting import (
> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
> -from canonical.launchpad.xmlrpc import faults
> -from canonical.twistedsupport import extract_result
>
> __all__ = ['BranchRewriter']
>
>
> class BranchRewriter:
>
> - def __init__(self, logger, proxy):
> + def __init__(self, logger, _now=None):
> """
>
> :param logger: Logger than messages about what the rewriter is
doing
> will be sent to.
> :param proxy: A blocking proxy for a branchfilesystem endpoint.
> """
> + if _now is None:
> + self._now = time.time
> + else:
> + self._now = _now
> self.logger = logger
> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
10.0)
> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
SLAVE_FLAVOR)
> + self._cache = {}
>
> def _codebrowse_url(self, path):
> return urlutils.join(
> config.codehosting.internal_codebrowse_root,
> path)
>
> + def _getBranchIdAndTrailingPath(self, location):
> + """Return the branch id and trailing path for 'location'.
> +
> + In addition this method returns whether the answer can from the
cache
> + or from the database.
> + """
> + parts = location[1:].split('/')
> + prefixes = []
> + for i in range(1, len(parts) + 1):
> + prefix = '/'.join(parts[:i])
> + if prefix in self._cache:
> + branch_id, inserted_time = self._cache[prefix]
> + if (self._now() < inserted_time +
> + config.codehosting.branch_rewrite_cache_lifetime):
> + trailing = location[len(prefix) + 1:]
> + return branch_id, trailing, "HIT"
> + prefixes.append(prefix)
> + ...

Read more...

review: Needs Information
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :
Download full text (6.7 KiB)

Tim Penhey wrote:
> Review: Needs Information
> review needs-info
>
>> === modified file 'lib/canonical/launchpad/scripts/logger.py'
>> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
>> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
>> @@ -113,8 +113,8 @@
>> def __init__(self):
>> self.buffer = StringIO()
>>
>> - def message(self, prefix, *stuff, **kw):
>> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
>> + def message(self, prefix, msg, *stuff, **kw):
>> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))
>
> What is the impact of other code using message?

It means that logger.info('%f', 0.1) doesn't break.

> This to me looks like it'll brake things.

I don't think it will. ec2test will confirm :)

>> === modified file 'lib/lp/codehosting/rewrite.py'
>> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
>> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
>> @@ -5,38 +5,71 @@
>> """
>>
>> import time
>> -import xmlrpclib
>>
>> from bzrlib import urlutils
>>
>> -from lp.codehosting.vfs import (
>> - branch_id_to_path, BranchFileSystemClient)
>> +from canonical.launchpad.webapp.interfaces import (
>> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
>> +from zope.component import getUtility
>> +from lp.code.model.branch import Branch
>> +from lp.codehosting.vfs import branch_id_to_path
>> +
>> from canonical.config import config
>> -from lp.code.interfaces.codehosting import (
>> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
>> -from canonical.launchpad.xmlrpc import faults
>> -from canonical.twistedsupport import extract_result
>>
>> __all__ = ['BranchRewriter']
>>
>>
>> class BranchRewriter:
>>
>> - def __init__(self, logger, proxy):
>> + def __init__(self, logger, _now=None):
>> """
>>
>> :param logger: Logger than messages about what the rewriter is
> doing
>> will be sent to.
>> :param proxy: A blocking proxy for a branchfilesystem endpoint.
>> """
>> + if _now is None:
>> + self._now = time.time
>> + else:
>> + self._now = _now
>> self.logger = logger
>> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
> 10.0)
>> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
> SLAVE_FLAVOR)
>> + self._cache = {}
>>
>> def _codebrowse_url(self, path):
>> return urlutils.join(
>> config.codehosting.internal_codebrowse_root,
>> path)
>>
>> + def _getBranchIdAndTrailingPath(self, location):
>> + """Return the branch id and trailing path for 'location'.
>> +
>> + In addition this method returns whether the answer can from the
> cache
>> + or from the database.
>> + """
>> + parts = location[1:].split('/')
>> + prefixes = []
>> + for i in range(1, len(parts) + 1):
>> + prefix = '/'.join(parts[:i])
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_tim...

Read more...

1=== modified file 'lib/lp/codehosting/rewrite.py'
2--- lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
3+++ lib/lp/codehosting/rewrite.py 2009-07-22 04:41:03 +0000
4@@ -59,16 +59,15 @@
5 return branch_id, trailing, "HIT"
6 prefixes.append(prefix)
7 result = self.store.find(
8- Branch,
9- Branch.unique_name.is_in(prefixes), Branch.private == False)
10- try:
11- branch_id, unique_name = result.values(
12- Branch.id, Branch.unique_name).next()
13- except StopIteration:
14+ (Branch.id, Branch.unique_name),
15+ Branch.unique_name.is_in(prefixes), Branch.private == False).one()
16+ if result is None:
17 return None, None, "MISS"
18- self._cache[unique_name] = (branch_id, self._now())
19- trailing = location[len(unique_name) + 1:]
20- return branch_id, trailing, "MISS"
21+ else:
22+ branch_id, unique_name = result
23+ self._cache[unique_name] = (branch_id, self._now())
24+ trailing = location[len(unique_name) + 1:]
25+ return branch_id, trailing, "MISS"
26
27 def rewriteLine(self, resource_location):
28 """Rewrite 'resource_location' to a more concrete location.
29
30=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
31--- lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
32+++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 04:56:35 +0000
33@@ -161,12 +161,10 @@
34 self.factory.makeProductBranch(),
35 self.factory.makePersonalBranch(),
36 self.factory.makePackageBranch()]
37- input = [
38- "/%s/.bzr/README" % branch.unique_name
39- for branch in branches] + [
40- "/%s/changes" % branch.unique_name
41- for branch in branches]
42- expected = [
43+ input_lines = [
44+ "/%s/.bzr/README" % branch.unique_name for branch in branches] + [
45+ "/%s/changes" % branch.unique_name for branch in branches]
46+ expected_lines = [
47 'file:///var/tmp/bzrsync/%s/.bzr/README'
48 % branch_id_to_path(branch.id)
49 for branch in branches] + [
50@@ -178,15 +176,17 @@
51 proc = subprocess.Popen(
52 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
53 stderr=subprocess.PIPE, bufsize=0)
54- proc.stdin.write('\n'.join(input) + '\n')
55- output = []
56- for i in range(len(input)):
57- output.append(proc.stdout.readline())
58+ output_lines = []
59+ # For each complete line of input, the script should, without
60+ # buffering, write a complete line of output.
61+ for input_line in input_lines:
62+ proc.stdin.write(input_line + '\n')
63+ output_lines.append(proc.stdout.readline().rstrip('\n'))
64 os.kill(proc.pid, signal.SIGINT)
65 err = proc.stderr.read()
66 # The script produces logging output, but not to stderr.
67 self.assertEqual('', err)
68- self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
69+ self.assertEqual(expected_lines, output_lines)
70
71
72 def test_suite():
Revision history for this message
Tim Penhey (thumper) wrote :

 merge approved

review: Approve
Revision history for this message
Stuart Bishop (stub) wrote :

On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>> +        self.store = getUtility(IStoreSelector).get(MAIN_STORE,
> SLAVE_FLAVOR)

You can spell this 'self.store = ISlaveStore(Branch)' which I think is
nicer and more meaningful.

>> +            if prefix in self._cache:
>> +                branch_id, inserted_time = self._cache[prefix]
>> +                if (self._now() < inserted_time +
>> +                    config.codehosting.branch_rewrite_cache_lifetime):
>> +                    trailing = location[len(prefix) + 1:]
>> +                    return branch_id, trailing, "HIT"
>> +            prefixes.append(prefix)
>> +        result = self.store.find(
>> +            Branch,
>> +            Branch.unique_name.is_in(prefixes), Branch.private == False)
>> +        try:
>> +            branch_id, unique_name = result.values(
>> +                Branch.id, Branch.unique_name).next()
>> +        except StopIteration:
>> +            return None, None, "MISS"

How do we know the Branch was retrieved from the database and not the
Storm cache?
I also don't see any commits. Both of these issues can be fixed in the
schema-lazr.conf - set the isolation level to autocommit and the
storm_cache_size to 0 for this component.

> How about:
>
>       result = self.store.find(
>           (Branch.id, Branch.unique_name),
>           Branch.unique_name.is_in(prefixes), Branch.private == False).one()
>
> That way result should be None if not found, and a tuple of id and unique_name
> if it was found.
>
> That'll make this code a little cleaner.

Shouldn't that be .any()? .one() will explode if multiple rows matched.

--
Stuart Bishop <email address hidden>
http://www.stuartbishop.net/

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Hi Stuart, thanks for taking a look!

Stuart Bishop wrote:
> On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>>> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
>> SLAVE_FLAVOR)
>
> You can spell this 'self.store = ISlaveStore(Branch)' which I think is
> nicer and more meaningful.

Ah, that is nicer.

>>> + if prefix in self._cache:
>>> + branch_id, inserted_time = self._cache[prefix]
>>> + if (self._now() < inserted_time +
>>> + config.codehosting.branch_rewrite_cache_lifetime):
>>> + trailing = location[len(prefix) + 1:]
>>> + return branch_id, trailing, "HIT"
>>> + prefixes.append(prefix)
>>> + result = self.store.find(
>>> + Branch,
>>> + Branch.unique_name.is_in(prefixes), Branch.private == False)
>>> + try:
>>> + branch_id, unique_name = result.values(
>>> + Branch.id, Branch.unique_name).next()
>>> + except StopIteration:
>>> + return None, None, "MISS"
>
> How do we know the Branch was retrieved from the database and not the
> Storm cache?

We don't. Is there some way I could test for this?

> I also don't see any commits. Both of these issues can be fixed in the
> schema-lazr.conf - set the isolation level to autocommit and the
> storm_cache_size to 0 for this component.

Why is autocommit better than read committed, given that we don't write
to the database? Serializable would be very very wrong....

I also admit that I don't know what I have to change in schema-lazr.conf
to change the cache size -- what do you mean by 'component'?

>> How about:
>>
>> result = self.store.find(
>> (Branch.id, Branch.unique_name),
>> Branch.unique_name.is_in(prefixes), Branch.private == False).one()
>>
>> That way result should be None if not found, and a tuple of id and unique_name
>> if it was found.
>>
>> That'll make this code a little cleaner.
>
> Shouldn't that be .any()? .one() will explode if multiple rows matched.

We want it to explode if multiple rows match.

Cheers,
mwh

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Michael Hudson wrote:

>>>> + if prefix in self._cache:
>>>> + branch_id, inserted_time = self._cache[prefix]
>>>> + if (self._now() < inserted_time +
>>>> + config.codehosting.branch_rewrite_cache_lifetime):
>>>> + trailing = location[len(prefix) + 1:]
>>>> + return branch_id, trailing, "HIT"
>>>> + prefixes.append(prefix)
>>>> + result = self.store.find(
>>>> + Branch,
>>>> + Branch.unique_name.is_in(prefixes), Branch.private == False)
>>>> + try:
>>>> + branch_id, unique_name = result.values(
>>>> + Branch.id, Branch.unique_name).next()
>>>> + except StopIteration:
>>>> + return None, None, "MISS"
>> How do we know the Branch was retrieved from the database and not the
>> Storm cache?
>
> We don't. Is there some way I could test for this?

Actually we do: we never retrieve a whole branch object from the
database, so there's nothing for storm to cache.

Cheers,
mwh

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'database/schema/security.cfg'
2--- database/schema/security.cfg 2009-07-19 04:41:14 +0000
3+++ database/schema/security.cfg 2009-07-22 03:37:42 +0000
4@@ -1699,3 +1699,7 @@
5 public.person = SELECT
6 public.teammembership = SELECT
7 public.teamparticipation = SELECT
8+
9+[branch-rewrite]
10+type=user
11+public.branch = SELECT
12
13=== modified file 'lib/canonical/config/schema-lazr.conf'
14--- lib/canonical/config/schema-lazr.conf 2009-07-18 01:03:09 +0000
15+++ lib/canonical/config/schema-lazr.conf 2009-07-22 03:37:42 +0000
16@@ -365,6 +365,12 @@
17 # save ourselves some load by directing them all to a HTTP url.
18 hot_products:
19
20+# Branch rewrite cache lifetime.
21+#
22+# How long, in seconds, to cache results of the branch path -> id
23+# mapping done by branch-rewrite.py for.
24+branch_rewrite_cache_lifetime: 10
25+
26
27 [codeimport]
28 # Where the Bazaar imports are stored.
29
30=== modified file 'lib/canonical/launchpad/scripts/logger.py'
31--- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
32+++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
33@@ -113,8 +113,8 @@
34 def __init__(self):
35 self.buffer = StringIO()
36
37- def message(self, prefix, *stuff, **kw):
38- self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
39+ def message(self, prefix, msg, *stuff, **kw):
40+ self.buffer.write('%s: %s\n' % (prefix, msg % stuff))
41
42 if 'exc_info' in kw:
43 exception = traceback.format_exception(*sys.exc_info())
44
45=== modified file 'lib/lp/codehosting/rewrite.py'
46--- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
47+++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
48@@ -5,38 +5,71 @@
49 """
50
51 import time
52-import xmlrpclib
53
54 from bzrlib import urlutils
55
56-from lp.codehosting.vfs import (
57- branch_id_to_path, BranchFileSystemClient)
58+from canonical.launchpad.webapp.interfaces import (
59+ IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
60+from zope.component import getUtility
61+from lp.code.model.branch import Branch
62+from lp.codehosting.vfs import branch_id_to_path
63+
64 from canonical.config import config
65-from lp.code.interfaces.codehosting import (
66- BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
67-from canonical.launchpad.xmlrpc import faults
68-from canonical.twistedsupport import extract_result
69
70 __all__ = ['BranchRewriter']
71
72
73 class BranchRewriter:
74
75- def __init__(self, logger, proxy):
76+ def __init__(self, logger, _now=None):
77 """
78
79 :param logger: Logger than messages about what the rewriter is doing
80 will be sent to.
81 :param proxy: A blocking proxy for a branchfilesystem endpoint.
82 """
83+ if _now is None:
84+ self._now = time.time
85+ else:
86+ self._now = _now
87 self.logger = logger
88- self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS, 10.0)
89+ self.store = getUtility(IStoreSelector).get(MAIN_STORE, SLAVE_FLAVOR)
90+ self._cache = {}
91
92 def _codebrowse_url(self, path):
93 return urlutils.join(
94 config.codehosting.internal_codebrowse_root,
95 path)
96
97+ def _getBranchIdAndTrailingPath(self, location):
98+ """Return the branch id and trailing path for 'location'.
99+
100+ In addition this method returns whether the answer can from the cache
101+ or from the database.
102+ """
103+ parts = location[1:].split('/')
104+ prefixes = []
105+ for i in range(1, len(parts) + 1):
106+ prefix = '/'.join(parts[:i])
107+ if prefix in self._cache:
108+ branch_id, inserted_time = self._cache[prefix]
109+ if (self._now() < inserted_time +
110+ config.codehosting.branch_rewrite_cache_lifetime):
111+ trailing = location[len(prefix) + 1:]
112+ return branch_id, trailing, "HIT"
113+ prefixes.append(prefix)
114+ result = self.store.find(
115+ Branch,
116+ Branch.unique_name.is_in(prefixes), Branch.private == False)
117+ try:
118+ branch_id, unique_name = result.values(
119+ Branch.id, Branch.unique_name).next()
120+ except StopIteration:
121+ return None, None, "MISS"
122+ self._cache[unique_name] = (branch_id, self._now())
123+ trailing = location[len(unique_name) + 1:]
124+ return branch_id, trailing, "MISS"
125+
126 def rewriteLine(self, resource_location):
127 """Rewrite 'resource_location' to a more concrete location.
128
129@@ -65,35 +98,26 @@
130 Other errors are allowed to propagate, on the assumption that the
131 caller will catch and log them.
132 """
133- T = time.time()
134 # Codebrowse generates references to its images and stylesheets
135 # starting with "/static", so pass them on unthinkingly.
136+ T = time.time()
137+ cached = None
138 if resource_location.startswith('/static/'):
139- return self._codebrowse_url(resource_location)
140- trailingSlash = resource_location.endswith('/')
141- deferred = self.client.translatePath(resource_location)
142- try:
143- transport_type, info, trailing = extract_result(deferred)
144- except xmlrpclib.Fault, f:
145- if (faults.check_fault(f, faults.PathTranslationError)
146- or faults.check_fault(f, faults.PermissionDenied)):
147- # In this situation, we'd *like* to generate a 404
148- # (PathTranslationError) or 301 (PermissionDenied) error. But
149- # we can't, so we just forward on to codebrowse which can.
150- return self._codebrowse_url(resource_location)
151- else:
152- raise
153- if transport_type == BRANCH_TRANSPORT:
154- if trailing.startswith('.bzr'):
155- r = urlutils.join(
156- config.codehosting.internal_branch_by_id_root,
157- branch_id_to_path(info['id']), trailing)
158- if trailingSlash:
159- r += '/'
160- else:
161+ r = self._codebrowse_url(resource_location)
162+ cached = 'N/A'
163+ else:
164+ branch_id, trailing, cached = self._getBranchIdAndTrailingPath(
165+ resource_location)
166+ if branch_id is None:
167 r = self._codebrowse_url(resource_location)
168- self.logger.info(
169- "%r -> %r (%fs)", resource_location, r, time.time() - T)
170- return r
171- else:
172- return "NULL"
173+ else:
174+ if trailing.startswith('/.bzr'):
175+ r = urlutils.join(
176+ config.codehosting.internal_branch_by_id_root,
177+ branch_id_to_path(branch_id), trailing[1:])
178+ else:
179+ r = self._codebrowse_url(resource_location)
180+ self.logger.info(
181+ "%r -> %r (%fs, cache: %s)",
182+ resource_location, r, time.time() - T, cached)
183+ return r
184
185=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
186--- lib/lp/codehosting/tests/test_rewrite.py 2009-07-17 00:26:05 +0000
187+++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
188@@ -5,68 +5,91 @@
189
190 __metaclass__ = type
191
192+import re
193 import os
194 import signal
195 import subprocess
196 import unittest
197
198+import transaction
199+
200+from zope.security.proxy import removeSecurityProxy
201+
202 from lp.codehosting.vfs import branch_id_to_path
203-from lp.codehosting.inmemory import InMemoryFrontend, XMLRPCWrapper
204 from lp.codehosting.rewrite import BranchRewriter
205 from canonical.config import config
206-from lp.testing import TestCase, TestCaseWithFactory
207-from canonical.launchpad.scripts import QuietFakeLogger
208-from canonical.testing.layers import ZopelessAppServerLayer
209-
210-
211-class TestBranchRewriter(TestCase):
212+from lp.testing import FakeTime, TestCaseWithFactory
213+from canonical.launchpad.scripts import BufferLogger
214+from canonical.testing.layers import DatabaseFunctionalLayer
215+
216+
217+class TestBranchRewriter(TestCaseWithFactory):
218+
219+ layer = DatabaseFunctionalLayer
220
221 def setUp(self):
222- frontend = InMemoryFrontend()
223- self._branchfs = frontend.getFilesystemEndpoint()
224- self.factory = frontend.getLaunchpadObjectFactory()
225+ TestCaseWithFactory.setUp(self)
226+ self.fake_time = FakeTime(0)
227
228 def makeRewriter(self):
229- return BranchRewriter(
230- QuietFakeLogger(), XMLRPCWrapper(self._branchfs))
231-
232- def test_translateLine_found_dot_bzr(self):
233+ return BranchRewriter(BufferLogger(), self.fake_time.now)
234+
235+ def getLoggerOutput(self, rewriter):
236+ return rewriter.logger.buffer.getvalue()
237+
238+ def test_rewriteLine_found_dot_bzr(self):
239 # Requests for /$branch_name/.bzr/... are redirected to where the
240 # branches are served from by ID.
241 rewriter = self.makeRewriter()
242- branch = self.factory.makeAnyBranch()
243- line = rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name)
244- self.assertEqual(
245+ branches = [
246+ self.factory.makeProductBranch(),
247+ self.factory.makePersonalBranch(),
248+ self.factory.makePackageBranch()]
249+ transaction.commit()
250+ output = [
251+ rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name)
252+ for branch in branches]
253+ expected = [
254 'file:///var/tmp/bzrsync/%s/.bzr/README'
255- % branch_id_to_path(branch.id),
256- line)
257+ % branch_id_to_path(branch.id)
258+ for branch in branches]
259+ self.assertEqual(expected, output)
260
261- def test_translateLine_found_not_dot_bzr(self):
262+ def test_rewriteLine_found_not_dot_bzr(self):
263 # Requests for /$branch_name/... that are not to .bzr directories are
264 # redirected to codebrowse.
265 rewriter = self.makeRewriter()
266- branch = self.factory.makeAnyBranch()
267- output = rewriter.rewriteLine("/%s/changes" % branch.unique_name)
268- self.assertEqual(
269- 'http://localhost:8080/%s/changes' % branch.unique_name,
270- output)
271+ branches = [
272+ self.factory.makeProductBranch(),
273+ self.factory.makePersonalBranch(),
274+ self.factory.makePackageBranch()]
275+ transaction.commit()
276+ output = [
277+ rewriter.rewriteLine("/%s/changes" % branch.unique_name)
278+ for branch in branches]
279+ expected = [
280+ 'http://localhost:8080/%s/changes' % branch.unique_name
281+ for branch in branches]
282+ self.assertEqual(expected, output)
283
284- def test_translateLine_private(self):
285+ def test_rewriteLine_private(self):
286 # All requests for /$branch_name/... for private branches are
287 # rewritten to codebrowse, which will then redirect them to https and
288 # handle them there.
289 rewriter = self.makeRewriter()
290- branch = self.factory.makeBranch(private=True)
291- output = rewriter.rewriteLine("/%s/changes" % branch.unique_name)
292- self.assertEqual(
293- 'http://localhost:8080/%s/changes' % branch.unique_name,
294- output)
295- output = rewriter.rewriteLine("/%s/.bzr" % branch.unique_name)
296- self.assertEqual(
297- 'http://localhost:8080/%s/.bzr' % branch.unique_name,
298+ branch = self.factory.makeAnyBranch(private=True)
299+ unique_name = removeSecurityProxy(branch).unique_name
300+ transaction.commit()
301+ output = [
302+ rewriter.rewriteLine("/%s/changes" % unique_name),
303+ rewriter.rewriteLine("/%s/.bzr" % unique_name)
304+ ]
305+ self.assertEqual(
306+ ['http://localhost:8080/%s/changes' % unique_name,
307+ 'http://localhost:8080/%s/.bzr' % unique_name],
308 output)
309
310- def test_translateLine_static(self):
311+ def test_rewriteLine_static(self):
312 # Requests to /static are rewritten to codebrowse urls.
313 rewriter = self.makeRewriter()
314 output = rewriter.rewriteLine("/static/foo")
315@@ -74,7 +97,7 @@
316 'http://localhost:8080/static/foo',
317 output)
318
319- def test_translateLine_not_found(self):
320+ def test_rewriteLine_not_found(self):
321 # If the request does not map to a branch, we redirect it to
322 # codebrowse as it can generate a 404.
323 rewriter = self.makeRewriter()
324@@ -84,30 +107,86 @@
325 'http://localhost:8080%s' % not_found_path,
326 output)
327
328+ def test_rewriteLine_logs_cache_miss(self):
329+ # The first request for a branch misses the cache and logs this fact.
330+ rewriter = self.makeRewriter()
331+ branch = self.factory.makeAnyBranch()
332+ transaction.commit()
333+ rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
334+ logging_output = self.getLoggerOutput(rewriter)
335+ self.assertIsNot(
336+ None,
337+ re.match("INFO: .* -> .* (.*s, cache: MISS)", logging_output),
338+ "No miss found in %r" % logging_output)
339+
340+ def test_rewriteLine_logs_cache_hit(self):
341+ # The second request for a branch misses the cache and logs this fact.
342+ rewriter = self.makeRewriter()
343+ branch = self.factory.makeAnyBranch()
344+ transaction.commit()
345+ rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
346+ rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
347+ logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
348+ self.assertEqual(2, len(logging_output_lines))
349+ self.assertIsNot(
350+ None,
351+ re.match("INFO: .* -> .* (.*s, cache: HIT)",
352+ logging_output_lines[-1]),
353+ "No hit found in %r" % logging_output_lines[-1])
354+
355+ def test_rewriteLine_cache_expires(self):
356+ # The second request for a branch misses the cache and logs this fact.
357+ rewriter = self.makeRewriter()
358+ branch = self.factory.makeAnyBranch()
359+ transaction.commit()
360+ rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
361+ self.fake_time.advance(
362+ config.codehosting.branch_rewrite_cache_lifetime + 1)
363+ rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
364+ logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
365+ self.assertEqual(2, len(logging_output_lines))
366+ self.assertIsNot(
367+ None,
368+ re.match("INFO: .* -> .* (.*s, cache: MISS)",
369+ logging_output_lines[-1]),
370+ "No miss found in %r" % logging_output_lines[-1])
371+
372
373 class TestBranchRewriterScript(TestCaseWithFactory):
374
375- layer = ZopelessAppServerLayer
376+ layer = DatabaseFunctionalLayer
377
378 def test_script(self):
379- branch = self.factory.makeAnyBranch()
380- input = "/%s/.bzr/README\n" % branch.unique_name
381- expected = (
382- "file:///var/tmp/bzrsync/%s/.bzr/README\n"
383- % branch_id_to_path(branch.id))
384- self.layer.txn.commit()
385+ branches = [
386+ self.factory.makeProductBranch(),
387+ self.factory.makePersonalBranch(),
388+ self.factory.makePackageBranch()]
389+ input = [
390+ "/%s/.bzr/README" % branch.unique_name
391+ for branch in branches] + [
392+ "/%s/changes" % branch.unique_name
393+ for branch in branches]
394+ expected = [
395+ 'file:///var/tmp/bzrsync/%s/.bzr/README'
396+ % branch_id_to_path(branch.id)
397+ for branch in branches] + [
398+ 'http://localhost:8080/%s/changes' % branch.unique_name
399+ for branch in branches]
400+ transaction.commit()
401 script_file = os.path.join(
402 config.root, 'scripts', 'branch-rewrite.py')
403 proc = subprocess.Popen(
404 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
405 stderr=subprocess.PIPE, bufsize=0)
406- proc.stdin.write(input)
407- output = proc.stdout.readline()
408+ proc.stdin.write('\n'.join(input) + '\n')
409+ output = []
410+ for i in range(len(input)):
411+ output.append(proc.stdout.readline())
412 os.kill(proc.pid, signal.SIGINT)
413 err = proc.stderr.read()
414 # The script produces logging output, but not to stderr.
415 self.assertEqual('', err)
416- self.assertEqual(expected, output)
417+ self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
418
419
420 def test_suite():
421
422=== modified file 'lib/lp/testing/__init__.py'
423--- lib/lp/testing/__init__.py 2009-07-17 18:46:25 +0000
424+++ lib/lp/testing/__init__.py 2009-07-22 02:26:16 +0000
425@@ -224,10 +224,11 @@
426 self.assertTrue(expected is observed,
427 "%r is not %r" % (expected, observed))
428
429- def assertIsNot(self, expected, observed):
430+ def assertIsNot(self, expected, observed, msg=None):
431 """Assert that `expected` is not the same object as `observed`."""
432- self.assertTrue(expected is not observed,
433- "%r is %r" % (expected, observed))
434+ if msg is None:
435+ msg = "%r is %r" % (expected, observed)
436+ self.assertTrue(expected is not observed, msg)
437
438 def assertIn(self, needle, haystack):
439 """Assert that 'needle' is in 'haystack'."""
440
441=== modified file 'scripts/branch-rewrite.py'
442--- scripts/branch-rewrite.py 2009-06-30 16:56:07 +0000
443+++ scripts/branch-rewrite.py 2009-07-22 03:36:19 +0000
444@@ -14,10 +14,13 @@
445
446 import _pythonpath
447
448+import os
449 import sys
450-import xmlrpclib
451-
452-from lp.codehosting.vfs import BlockingProxy
453+
454+# XXX, MichaelHudson, 2009-07-22, bug=402845: This pointless import avoids a
455+# circular import killing us.
456+from canonical.launchpad.database import account
457+
458 from lp.codehosting.rewrite import BranchRewriter
459 from canonical.config import config
460 from lp.services.scripts.base import LaunchpadScript
461@@ -25,11 +28,6 @@
462
463 class BranchRewriteScript(LaunchpadScript):
464
465- def __init__(self, name):
466- LaunchpadScript.__init__(self, name)
467- proxy = xmlrpclib.ServerProxy(config.codehosting.branchfs_endpoint)
468- self.rewriter = BranchRewriter(self.logger, BlockingProxy(proxy))
469-
470 def add_my_options(self):
471 """Make the logging go to a file by default.
472
473@@ -40,26 +38,20 @@
474 value from the config.
475 """
476 log_file_location = config.codehosting.rewrite_script_log_file
477+ log_file_directory = os.path.dirname(log_file_location)
478+ if not os.path.isdir(log_file_directory):
479+ os.makedirs(log_file_directory)
480 self.parser.defaults['log_file'] = log_file_location
481
482- def run(self, use_web_security=False, implicit_begin=True,
483- isolation=None):
484- """See `LaunchpadScript.run`.
485-
486- As this script does not need the component architecture or a
487- connection to the database, we override this method to avoid setting
488- them up.
489- """
490- self.main()
491-
492 def main(self):
493+ rewriter = BranchRewriter(self.logger)
494 self.logger.debug("Starting up...")
495 while True:
496 try:
497 line = sys.stdin.readline()
498 # Mod-rewrite always gives us a newline terminated string.
499 if line:
500- print self.rewriter.rewriteLine(line.strip())
501+ print rewriter.rewriteLine(line.strip())
502 else:
503 # Standard input has been closed, so die.
504 return
505@@ -71,4 +63,4 @@
506
507
508 if __name__ == '__main__':
509- BranchRewriteScript("branch-rewrite").run()
510+ BranchRewriteScript("branch-rewrite", dbuser='branch-rewrite').run()

Subscribers

People subscribed via source and target branches

to status/vote changes: