Merge lp:~mwhudson/launchpad/much-faster-rewrite-map into lp:launchpad/db-devel

Proposed by Michael Hudson-Doyle
Status: Merged
Approved by: Tim Penhey
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~mwhudson/launchpad/much-faster-rewrite-map
Merge into: lp:launchpad/db-devel
Diff against target: None lines
To merge this branch: bzr merge lp:~mwhudson/launchpad/much-faster-rewrite-map
Reviewer Review Type Date Requested Status
Tim Penhey (community) Approve
Review via email: mp+9121@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

As we've discovered, we need the rewriting done by the branch-rewrite.py script to be much quicker. So this branch implements it via a single database query. This should (a) be much much faster (b) take load off the XML-RPC server.

In addition, it records in the log whether the cache was hit or not.

There is now some duplicated code between lp.codehosting.rewrite and lp.codehosting.vfs.branchfsclient, which could perhaps be reduced with some cleverness. OTOH, an extremely un-abstract, bare bones approach seems sane for something we've just found is so performance sensitive.

Revision history for this message
Tim Penhey (thumper) wrote :
Download full text (6.2 KiB)

 review needs-info

> === modified file 'lib/canonical/launchpad/scripts/logger.py'
> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
> @@ -113,8 +113,8 @@
> def __init__(self):
> self.buffer = StringIO()
>
> - def message(self, prefix, *stuff, **kw):
> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
> + def message(self, prefix, msg, *stuff, **kw):
> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))

What is the impact of other code using message?

This to me looks like it'll brake things.

> === modified file 'lib/lp/codehosting/rewrite.py'
> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
> @@ -5,38 +5,71 @@
> """
>
> import time
> -import xmlrpclib
>
> from bzrlib import urlutils
>
> -from lp.codehosting.vfs import (
> - branch_id_to_path, BranchFileSystemClient)
> +from canonical.launchpad.webapp.interfaces import (
> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
> +from zope.component import getUtility
> +from lp.code.model.branch import Branch
> +from lp.codehosting.vfs import branch_id_to_path
> +
> from canonical.config import config
> -from lp.code.interfaces.codehosting import (
> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
> -from canonical.launchpad.xmlrpc import faults
> -from canonical.twistedsupport import extract_result
>
> __all__ = ['BranchRewriter']
>
>
> class BranchRewriter:
>
> - def __init__(self, logger, proxy):
> + def __init__(self, logger, _now=None):
> """
>
> :param logger: Logger than messages about what the rewriter is
doing
> will be sent to.
> :param proxy: A blocking proxy for a branchfilesystem endpoint.
> """
> + if _now is None:
> + self._now = time.time
> + else:
> + self._now = _now
> self.logger = logger
> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
10.0)
> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
SLAVE_FLAVOR)
> + self._cache = {}
>
> def _codebrowse_url(self, path):
> return urlutils.join(
> config.codehosting.internal_codebrowse_root,
> path)
>
> + def _getBranchIdAndTrailingPath(self, location):
> + """Return the branch id and trailing path for 'location'.
> +
> + In addition this method returns whether the answer can from the
cache
> + or from the database.
> + """
> + parts = location[1:].split('/')
> + prefixes = []
> + for i in range(1, len(parts) + 1):
> + prefix = '/'.join(parts[:i])
> + if prefix in self._cache:
> + branch_id, inserted_time = self._cache[prefix]
> + if (self._now() < inserted_time +
> + config.codehosting.branch_rewrite_cache_lifetime):
> + trailing = location[len(prefix) + 1:]
> + return branch_id, trailing, "HIT"
> + prefixes.append(prefix)
> + ...

Read more...

review: Needs Information
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :
Download full text (6.7 KiB)

Tim Penhey wrote:
> Review: Needs Information
> review needs-info
>
>> === modified file 'lib/canonical/launchpad/scripts/logger.py'
>> --- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
>> +++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
>> @@ -113,8 +113,8 @@
>> def __init__(self):
>> self.buffer = StringIO()
>>
>> - def message(self, prefix, *stuff, **kw):
>> - self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))
>> + def message(self, prefix, msg, *stuff, **kw):
>> + self.buffer.write('%s: %s\n' % (prefix, msg % stuff))
>
> What is the impact of other code using message?

It means that logger.info('%f', 0.1) doesn't break.

> This to me looks like it'll brake things.

I don't think it will. ec2test will confirm :)

>> === modified file 'lib/lp/codehosting/rewrite.py'
>> --- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
>> +++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
>> @@ -5,38 +5,71 @@
>> """
>>
>> import time
>> -import xmlrpclib
>>
>> from bzrlib import urlutils
>>
>> -from lp.codehosting.vfs import (
>> - branch_id_to_path, BranchFileSystemClient)
>> +from canonical.launchpad.webapp.interfaces import (
>> + IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
>> +from zope.component import getUtility
>> +from lp.code.model.branch import Branch
>> +from lp.codehosting.vfs import branch_id_to_path
>> +
>> from canonical.config import config
>> -from lp.code.interfaces.codehosting import (
>> - BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
>> -from canonical.launchpad.xmlrpc import faults
>> -from canonical.twistedsupport import extract_result
>>
>> __all__ = ['BranchRewriter']
>>
>>
>> class BranchRewriter:
>>
>> - def __init__(self, logger, proxy):
>> + def __init__(self, logger, _now=None):
>> """
>>
>> :param logger: Logger than messages about what the rewriter is
> doing
>> will be sent to.
>> :param proxy: A blocking proxy for a branchfilesystem endpoint.
>> """
>> + if _now is None:
>> + self._now = time.time
>> + else:
>> + self._now = _now
>> self.logger = logger
>> - self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS,
> 10.0)
>> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
> SLAVE_FLAVOR)
>> + self._cache = {}
>>
>> def _codebrowse_url(self, path):
>> return urlutils.join(
>> config.codehosting.internal_codebrowse_root,
>> path)
>>
>> + def _getBranchIdAndTrailingPath(self, location):
>> + """Return the branch id and trailing path for 'location'.
>> +
>> + In addition this method returns whether the answer can from the
> cache
>> + or from the database.
>> + """
>> + parts = location[1:].split('/')
>> + prefixes = []
>> + for i in range(1, len(parts) + 1):
>> + prefix = '/'.join(parts[:i])
>> + if prefix in self._cache:
>> + branch_id, inserted_time = self._cache[prefix]
>> + if (self._now() < inserted_tim...

Read more...

=== modified file 'lib/lp/codehosting/rewrite.py'
--- lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
+++ lib/lp/codehosting/rewrite.py 2009-07-22 04:41:03 +0000
@@ -59,16 +59,15 @@
59 return branch_id, trailing, "HIT"59 return branch_id, trailing, "HIT"
60 prefixes.append(prefix)60 prefixes.append(prefix)
61 result = self.store.find(61 result = self.store.find(
62 Branch,62 (Branch.id, Branch.unique_name),
63 Branch.unique_name.is_in(prefixes), Branch.private == False)63 Branch.unique_name.is_in(prefixes), Branch.private == False).one()
64 try:64 if result is None:
65 branch_id, unique_name = result.values(
66 Branch.id, Branch.unique_name).next()
67 except StopIteration:
68 return None, None, "MISS"65 return None, None, "MISS"
69 self._cache[unique_name] = (branch_id, self._now())66 else:
70 trailing = location[len(unique_name) + 1:]67 branch_id, unique_name = result
71 return branch_id, trailing, "MISS"68 self._cache[unique_name] = (branch_id, self._now())
69 trailing = location[len(unique_name) + 1:]
70 return branch_id, trailing, "MISS"
7271
73 def rewriteLine(self, resource_location):72 def rewriteLine(self, resource_location):
74 """Rewrite 'resource_location' to a more concrete location.73 """Rewrite 'resource_location' to a more concrete location.
7574
=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
--- lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
+++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 04:56:35 +0000
@@ -161,12 +161,10 @@
161 self.factory.makeProductBranch(),161 self.factory.makeProductBranch(),
162 self.factory.makePersonalBranch(),162 self.factory.makePersonalBranch(),
163 self.factory.makePackageBranch()]163 self.factory.makePackageBranch()]
164 input = [164 input_lines = [
165 "/%s/.bzr/README" % branch.unique_name165 "/%s/.bzr/README" % branch.unique_name for branch in branches] + [
166 for branch in branches] + [166 "/%s/changes" % branch.unique_name for branch in branches]
167 "/%s/changes" % branch.unique_name167 expected_lines = [
168 for branch in branches]
169 expected = [
170 'file:///var/tmp/bzrsync/%s/.bzr/README'168 'file:///var/tmp/bzrsync/%s/.bzr/README'
171 % branch_id_to_path(branch.id)169 % branch_id_to_path(branch.id)
172 for branch in branches] + [170 for branch in branches] + [
@@ -178,15 +176,17 @@
178 proc = subprocess.Popen(176 proc = subprocess.Popen(
179 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,177 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
180 stderr=subprocess.PIPE, bufsize=0)178 stderr=subprocess.PIPE, bufsize=0)
181 proc.stdin.write('\n'.join(input) + '\n')179 output_lines = []
182 output = []180 # For each complete line of input, the script should, without
183 for i in range(len(input)):181 # buffering, write a complete line of output.
184 output.append(proc.stdout.readline())182 for input_line in input_lines:
183 proc.stdin.write(input_line + '\n')
184 output_lines.append(proc.stdout.readline().rstrip('\n'))
185 os.kill(proc.pid, signal.SIGINT)185 os.kill(proc.pid, signal.SIGINT)
186 err = proc.stderr.read()186 err = proc.stderr.read()
187 # The script produces logging output, but not to stderr.187 # The script produces logging output, but not to stderr.
188 self.assertEqual('', err)188 self.assertEqual('', err)
189 self.assertEqual('\n'.join(expected) + '\n', ''.join(output))189 self.assertEqual(expected_lines, output_lines)
190190
191191
192def test_suite():192def test_suite():
Revision history for this message
Tim Penhey (thumper) wrote :

 merge approved

review: Approve
Revision history for this message
Stuart Bishop (stub) wrote :

On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>> +        self.store = getUtility(IStoreSelector).get(MAIN_STORE,
> SLAVE_FLAVOR)

You can spell this 'self.store = ISlaveStore(Branch)' which I think is
nicer and more meaningful.

>> +            if prefix in self._cache:
>> +                branch_id, inserted_time = self._cache[prefix]
>> +                if (self._now() < inserted_time +
>> +                    config.codehosting.branch_rewrite_cache_lifetime):
>> +                    trailing = location[len(prefix) + 1:]
>> +                    return branch_id, trailing, "HIT"
>> +            prefixes.append(prefix)
>> +        result = self.store.find(
>> +            Branch,
>> +            Branch.unique_name.is_in(prefixes), Branch.private == False)
>> +        try:
>> +            branch_id, unique_name = result.values(
>> +                Branch.id, Branch.unique_name).next()
>> +        except StopIteration:
>> +            return None, None, "MISS"

How do we know the Branch was retrieved from the database and not the
Storm cache?
I also don't see any commits. Both of these issues can be fixed in the
schema-lazr.conf - set the isolation level to autocommit and the
storm_cache_size to 0 for this component.

> How about:
>
>       result = self.store.find(
>           (Branch.id, Branch.unique_name),
>           Branch.unique_name.is_in(prefixes), Branch.private == False).one()
>
> That way result should be None if not found, and a tuple of id and unique_name
> if it was found.
>
> That'll make this code a little cleaner.

Shouldn't that be .any()? .one() will explode if multiple rows matched.

--
Stuart Bishop <email address hidden>
http://www.stuartbishop.net/

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Hi Stuart, thanks for taking a look!

Stuart Bishop wrote:
> On Wed, Jul 22, 2009 at 11:27 AM, Tim Penhey<email address hidden> wrote:
>>> + self.store = getUtility(IStoreSelector).get(MAIN_STORE,
>> SLAVE_FLAVOR)
>
> You can spell this 'self.store = ISlaveStore(Branch)' which I think is
> nicer and more meaningful.

Ah, that is nicer.

>>> + if prefix in self._cache:
>>> + branch_id, inserted_time = self._cache[prefix]
>>> + if (self._now() < inserted_time +
>>> + config.codehosting.branch_rewrite_cache_lifetime):
>>> + trailing = location[len(prefix) + 1:]
>>> + return branch_id, trailing, "HIT"
>>> + prefixes.append(prefix)
>>> + result = self.store.find(
>>> + Branch,
>>> + Branch.unique_name.is_in(prefixes), Branch.private == False)
>>> + try:
>>> + branch_id, unique_name = result.values(
>>> + Branch.id, Branch.unique_name).next()
>>> + except StopIteration:
>>> + return None, None, "MISS"
>
> How do we know the Branch was retrieved from the database and not the
> Storm cache?

We don't. Is there some way I could test for this?

> I also don't see any commits. Both of these issues can be fixed in the
> schema-lazr.conf - set the isolation level to autocommit and the
> storm_cache_size to 0 for this component.

Why is autocommit better than read committed, given that we don't write
to the database? Serializable would be very very wrong....

I also admit that I don't know what I have to change in schema-lazr.conf
to change the cache size -- what do you mean by 'component'?

>> How about:
>>
>> result = self.store.find(
>> (Branch.id, Branch.unique_name),
>> Branch.unique_name.is_in(prefixes), Branch.private == False).one()
>>
>> That way result should be None if not found, and a tuple of id and unique_name
>> if it was found.
>>
>> That'll make this code a little cleaner.
>
> Shouldn't that be .any()? .one() will explode if multiple rows matched.

We want it to explode if multiple rows match.

Cheers,
mwh

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Michael Hudson wrote:

>>>> + if prefix in self._cache:
>>>> + branch_id, inserted_time = self._cache[prefix]
>>>> + if (self._now() < inserted_time +
>>>> + config.codehosting.branch_rewrite_cache_lifetime):
>>>> + trailing = location[len(prefix) + 1:]
>>>> + return branch_id, trailing, "HIT"
>>>> + prefixes.append(prefix)
>>>> + result = self.store.find(
>>>> + Branch,
>>>> + Branch.unique_name.is_in(prefixes), Branch.private == False)
>>>> + try:
>>>> + branch_id, unique_name = result.values(
>>>> + Branch.id, Branch.unique_name).next()
>>>> + except StopIteration:
>>>> + return None, None, "MISS"
>> How do we know the Branch was retrieved from the database and not the
>> Storm cache?
>
> We don't. Is there some way I could test for this?

Actually we do: we never retrieve a whole branch object from the
database, so there's nothing for storm to cache.

Cheers,
mwh

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2009-07-19 04:41:14 +0000
+++ database/schema/security.cfg 2009-07-22 03:37:42 +0000
@@ -1699,3 +1699,7 @@
1699public.person = SELECT1699public.person = SELECT
1700public.teammembership = SELECT1700public.teammembership = SELECT
1701public.teamparticipation = SELECT1701public.teamparticipation = SELECT
1702
1703[branch-rewrite]
1704type=user
1705public.branch = SELECT
17021706
=== modified file 'lib/canonical/config/schema-lazr.conf'
--- lib/canonical/config/schema-lazr.conf 2009-07-18 01:03:09 +0000
+++ lib/canonical/config/schema-lazr.conf 2009-07-22 03:37:42 +0000
@@ -365,6 +365,12 @@
365# save ourselves some load by directing them all to a HTTP url.365# save ourselves some load by directing them all to a HTTP url.
366hot_products:366hot_products:
367367
368# Branch rewrite cache lifetime.
369#
370# How long, in seconds, to cache results of the branch path -> id
371# mapping done by branch-rewrite.py for.
372branch_rewrite_cache_lifetime: 10
373
368374
369[codeimport]375[codeimport]
370# Where the Bazaar imports are stored.376# Where the Bazaar imports are stored.
371377
=== modified file 'lib/canonical/launchpad/scripts/logger.py'
--- lib/canonical/launchpad/scripts/logger.py 2009-06-25 05:30:52 +0000
+++ lib/canonical/launchpad/scripts/logger.py 2009-07-22 02:16:56 +0000
@@ -113,8 +113,8 @@
113 def __init__(self):113 def __init__(self):
114 self.buffer = StringIO()114 self.buffer = StringIO()
115115
116 def message(self, prefix, *stuff, **kw):116 def message(self, prefix, msg, *stuff, **kw):
117 self.buffer.write('%s: %s\n' % (prefix, ' '.join(stuff)))117 self.buffer.write('%s: %s\n' % (prefix, msg % stuff))
118118
119 if 'exc_info' in kw:119 if 'exc_info' in kw:
120 exception = traceback.format_exception(*sys.exc_info())120 exception = traceback.format_exception(*sys.exc_info())
121121
=== modified file 'lib/lp/codehosting/rewrite.py'
--- lib/lp/codehosting/rewrite.py 2009-07-20 21:32:15 +0000
+++ lib/lp/codehosting/rewrite.py 2009-07-22 03:24:19 +0000
@@ -5,38 +5,71 @@
5"""5"""
66
7import time7import time
8import xmlrpclib
98
10from bzrlib import urlutils9from bzrlib import urlutils
1110
12from lp.codehosting.vfs import (11from canonical.launchpad.webapp.interfaces import (
13 branch_id_to_path, BranchFileSystemClient)12 IStoreSelector, MAIN_STORE, SLAVE_FLAVOR)
13from zope.component import getUtility
14from lp.code.model.branch import Branch
15from lp.codehosting.vfs import branch_id_to_path
16
14from canonical.config import config17from canonical.config import config
15from lp.code.interfaces.codehosting import (
16 BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS)
17from canonical.launchpad.xmlrpc import faults
18from canonical.twistedsupport import extract_result
1918
20__all__ = ['BranchRewriter']19__all__ = ['BranchRewriter']
2120
2221
23class BranchRewriter:22class BranchRewriter:
2423
25 def __init__(self, logger, proxy):24 def __init__(self, logger, _now=None):
26 """25 """
2726
28 :param logger: Logger than messages about what the rewriter is doing27 :param logger: Logger than messages about what the rewriter is doing
29 will be sent to.28 will be sent to.
30 :param proxy: A blocking proxy for a branchfilesystem endpoint.29 :param proxy: A blocking proxy for a branchfilesystem endpoint.
31 """30 """
31 if _now is None:
32 self._now = time.time
33 else:
34 self._now = _now
32 self.logger = logger35 self.logger = logger
33 self.client = BranchFileSystemClient(proxy, LAUNCHPAD_ANONYMOUS, 10.0)36 self.store = getUtility(IStoreSelector).get(MAIN_STORE, SLAVE_FLAVOR)
37 self._cache = {}
3438
35 def _codebrowse_url(self, path):39 def _codebrowse_url(self, path):
36 return urlutils.join(40 return urlutils.join(
37 config.codehosting.internal_codebrowse_root,41 config.codehosting.internal_codebrowse_root,
38 path)42 path)
3943
44 def _getBranchIdAndTrailingPath(self, location):
45 """Return the branch id and trailing path for 'location'.
46
47 In addition this method returns whether the answer can from the cache
48 or from the database.
49 """
50 parts = location[1:].split('/')
51 prefixes = []
52 for i in range(1, len(parts) + 1):
53 prefix = '/'.join(parts[:i])
54 if prefix in self._cache:
55 branch_id, inserted_time = self._cache[prefix]
56 if (self._now() < inserted_time +
57 config.codehosting.branch_rewrite_cache_lifetime):
58 trailing = location[len(prefix) + 1:]
59 return branch_id, trailing, "HIT"
60 prefixes.append(prefix)
61 result = self.store.find(
62 Branch,
63 Branch.unique_name.is_in(prefixes), Branch.private == False)
64 try:
65 branch_id, unique_name = result.values(
66 Branch.id, Branch.unique_name).next()
67 except StopIteration:
68 return None, None, "MISS"
69 self._cache[unique_name] = (branch_id, self._now())
70 trailing = location[len(unique_name) + 1:]
71 return branch_id, trailing, "MISS"
72
40 def rewriteLine(self, resource_location):73 def rewriteLine(self, resource_location):
41 """Rewrite 'resource_location' to a more concrete location.74 """Rewrite 'resource_location' to a more concrete location.
4275
@@ -65,35 +98,26 @@
65 Other errors are allowed to propagate, on the assumption that the98 Other errors are allowed to propagate, on the assumption that the
66 caller will catch and log them.99 caller will catch and log them.
67 """100 """
68 T = time.time()
69 # Codebrowse generates references to its images and stylesheets101 # Codebrowse generates references to its images and stylesheets
70 # starting with "/static", so pass them on unthinkingly.102 # starting with "/static", so pass them on unthinkingly.
103 T = time.time()
104 cached = None
71 if resource_location.startswith('/static/'):105 if resource_location.startswith('/static/'):
72 return self._codebrowse_url(resource_location)106 r = self._codebrowse_url(resource_location)
73 trailingSlash = resource_location.endswith('/')107 cached = 'N/A'
74 deferred = self.client.translatePath(resource_location)108 else:
75 try:109 branch_id, trailing, cached = self._getBranchIdAndTrailingPath(
76 transport_type, info, trailing = extract_result(deferred)110 resource_location)
77 except xmlrpclib.Fault, f:111 if branch_id is None:
78 if (faults.check_fault(f, faults.PathTranslationError)
79 or faults.check_fault(f, faults.PermissionDenied)):
80 # In this situation, we'd *like* to generate a 404
81 # (PathTranslationError) or 301 (PermissionDenied) error. But
82 # we can't, so we just forward on to codebrowse which can.
83 return self._codebrowse_url(resource_location)
84 else:
85 raise
86 if transport_type == BRANCH_TRANSPORT:
87 if trailing.startswith('.bzr'):
88 r = urlutils.join(
89 config.codehosting.internal_branch_by_id_root,
90 branch_id_to_path(info['id']), trailing)
91 if trailingSlash:
92 r += '/'
93 else:
94 r = self._codebrowse_url(resource_location)112 r = self._codebrowse_url(resource_location)
95 self.logger.info(113 else:
96 "%r -> %r (%fs)", resource_location, r, time.time() - T)114 if trailing.startswith('/.bzr'):
97 return r115 r = urlutils.join(
98 else:116 config.codehosting.internal_branch_by_id_root,
99 return "NULL"117 branch_id_to_path(branch_id), trailing[1:])
118 else:
119 r = self._codebrowse_url(resource_location)
120 self.logger.info(
121 "%r -> %r (%fs, cache: %s)",
122 resource_location, r, time.time() - T, cached)
123 return r
100124
=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
--- lib/lp/codehosting/tests/test_rewrite.py 2009-07-17 00:26:05 +0000
+++ lib/lp/codehosting/tests/test_rewrite.py 2009-07-22 03:23:28 +0000
@@ -5,68 +5,91 @@
55
6__metaclass__ = type6__metaclass__ = type
77
8import re
8import os9import os
9import signal10import signal
10import subprocess11import subprocess
11import unittest12import unittest
1213
14import transaction
15
16from zope.security.proxy import removeSecurityProxy
17
13from lp.codehosting.vfs import branch_id_to_path18from lp.codehosting.vfs import branch_id_to_path
14from lp.codehosting.inmemory import InMemoryFrontend, XMLRPCWrapper
15from lp.codehosting.rewrite import BranchRewriter19from lp.codehosting.rewrite import BranchRewriter
16from canonical.config import config20from canonical.config import config
17from lp.testing import TestCase, TestCaseWithFactory21from lp.testing import FakeTime, TestCaseWithFactory
18from canonical.launchpad.scripts import QuietFakeLogger22from canonical.launchpad.scripts import BufferLogger
19from canonical.testing.layers import ZopelessAppServerLayer23from canonical.testing.layers import DatabaseFunctionalLayer
2024
2125
22class TestBranchRewriter(TestCase):26class TestBranchRewriter(TestCaseWithFactory):
27
28 layer = DatabaseFunctionalLayer
2329
24 def setUp(self):30 def setUp(self):
25 frontend = InMemoryFrontend()31 TestCaseWithFactory.setUp(self)
26 self._branchfs = frontend.getFilesystemEndpoint()32 self.fake_time = FakeTime(0)
27 self.factory = frontend.getLaunchpadObjectFactory()
2833
29 def makeRewriter(self):34 def makeRewriter(self):
30 return BranchRewriter(35 return BranchRewriter(BufferLogger(), self.fake_time.now)
31 QuietFakeLogger(), XMLRPCWrapper(self._branchfs))36
3237 def getLoggerOutput(self, rewriter):
33 def test_translateLine_found_dot_bzr(self):38 return rewriter.logger.buffer.getvalue()
39
40 def test_rewriteLine_found_dot_bzr(self):
34 # Requests for /$branch_name/.bzr/... are redirected to where the41 # Requests for /$branch_name/.bzr/... are redirected to where the
35 # branches are served from by ID.42 # branches are served from by ID.
36 rewriter = self.makeRewriter()43 rewriter = self.makeRewriter()
37 branch = self.factory.makeAnyBranch()44 branches = [
38 line = rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name)45 self.factory.makeProductBranch(),
39 self.assertEqual(46 self.factory.makePersonalBranch(),
47 self.factory.makePackageBranch()]
48 transaction.commit()
49 output = [
50 rewriter.rewriteLine("/%s/.bzr/README" % branch.unique_name)
51 for branch in branches]
52 expected = [
40 'file:///var/tmp/bzrsync/%s/.bzr/README'53 'file:///var/tmp/bzrsync/%s/.bzr/README'
41 % branch_id_to_path(branch.id),54 % branch_id_to_path(branch.id)
42 line)55 for branch in branches]
56 self.assertEqual(expected, output)
4357
44 def test_translateLine_found_not_dot_bzr(self):58 def test_rewriteLine_found_not_dot_bzr(self):
45 # Requests for /$branch_name/... that are not to .bzr directories are59 # Requests for /$branch_name/... that are not to .bzr directories are
46 # redirected to codebrowse.60 # redirected to codebrowse.
47 rewriter = self.makeRewriter()61 rewriter = self.makeRewriter()
48 branch = self.factory.makeAnyBranch()62 branches = [
49 output = rewriter.rewriteLine("/%s/changes" % branch.unique_name)63 self.factory.makeProductBranch(),
50 self.assertEqual(64 self.factory.makePersonalBranch(),
51 'http://localhost:8080/%s/changes' % branch.unique_name,65 self.factory.makePackageBranch()]
52 output)66 transaction.commit()
67 output = [
68 rewriter.rewriteLine("/%s/changes" % branch.unique_name)
69 for branch in branches]
70 expected = [
71 'http://localhost:8080/%s/changes' % branch.unique_name
72 for branch in branches]
73 self.assertEqual(expected, output)
5374
54 def test_translateLine_private(self):75 def test_rewriteLine_private(self):
55 # All requests for /$branch_name/... for private branches are76 # All requests for /$branch_name/... for private branches are
56 # rewritten to codebrowse, which will then redirect them to https and77 # rewritten to codebrowse, which will then redirect them to https and
57 # handle them there.78 # handle them there.
58 rewriter = self.makeRewriter()79 rewriter = self.makeRewriter()
59 branch = self.factory.makeBranch(private=True)80 branch = self.factory.makeAnyBranch(private=True)
60 output = rewriter.rewriteLine("/%s/changes" % branch.unique_name)81 unique_name = removeSecurityProxy(branch).unique_name
61 self.assertEqual(82 transaction.commit()
62 'http://localhost:8080/%s/changes' % branch.unique_name,83 output = [
63 output)84 rewriter.rewriteLine("/%s/changes" % unique_name),
64 output = rewriter.rewriteLine("/%s/.bzr" % branch.unique_name)85 rewriter.rewriteLine("/%s/.bzr" % unique_name)
65 self.assertEqual(86 ]
66 'http://localhost:8080/%s/.bzr' % branch.unique_name,87 self.assertEqual(
88 ['http://localhost:8080/%s/changes' % unique_name,
89 'http://localhost:8080/%s/.bzr' % unique_name],
67 output)90 output)
6891
69 def test_translateLine_static(self):92 def test_rewriteLine_static(self):
70 # Requests to /static are rewritten to codebrowse urls.93 # Requests to /static are rewritten to codebrowse urls.
71 rewriter = self.makeRewriter()94 rewriter = self.makeRewriter()
72 output = rewriter.rewriteLine("/static/foo")95 output = rewriter.rewriteLine("/static/foo")
@@ -74,7 +97,7 @@
74 'http://localhost:8080/static/foo',97 'http://localhost:8080/static/foo',
75 output)98 output)
7699
77 def test_translateLine_not_found(self):100 def test_rewriteLine_not_found(self):
78 # If the request does not map to a branch, we redirect it to101 # If the request does not map to a branch, we redirect it to
79 # codebrowse as it can generate a 404.102 # codebrowse as it can generate a 404.
80 rewriter = self.makeRewriter()103 rewriter = self.makeRewriter()
@@ -84,30 +107,86 @@
84 'http://localhost:8080%s' % not_found_path,107 'http://localhost:8080%s' % not_found_path,
85 output)108 output)
86109
110 def test_rewriteLine_logs_cache_miss(self):
111 # The first request for a branch misses the cache and logs this fact.
112 rewriter = self.makeRewriter()
113 branch = self.factory.makeAnyBranch()
114 transaction.commit()
115 rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
116 logging_output = self.getLoggerOutput(rewriter)
117 self.assertIsNot(
118 None,
119 re.match("INFO: .* -> .* (.*s, cache: MISS)", logging_output),
120 "No miss found in %r" % logging_output)
121
122 def test_rewriteLine_logs_cache_hit(self):
123 # The second request for a branch misses the cache and logs this fact.
124 rewriter = self.makeRewriter()
125 branch = self.factory.makeAnyBranch()
126 transaction.commit()
127 rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
128 rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
129 logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
130 self.assertEqual(2, len(logging_output_lines))
131 self.assertIsNot(
132 None,
133 re.match("INFO: .* -> .* (.*s, cache: HIT)",
134 logging_output_lines[-1]),
135 "No hit found in %r" % logging_output_lines[-1])
136
137 def test_rewriteLine_cache_expires(self):
138 # The second request for a branch misses the cache and logs this fact.
139 rewriter = self.makeRewriter()
140 branch = self.factory.makeAnyBranch()
141 transaction.commit()
142 rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
143 self.fake_time.advance(
144 config.codehosting.branch_rewrite_cache_lifetime + 1)
145 rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
146 logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
147 self.assertEqual(2, len(logging_output_lines))
148 self.assertIsNot(
149 None,
150 re.match("INFO: .* -> .* (.*s, cache: MISS)",
151 logging_output_lines[-1]),
152 "No miss found in %r" % logging_output_lines[-1])
153
87154
88class TestBranchRewriterScript(TestCaseWithFactory):155class TestBranchRewriterScript(TestCaseWithFactory):
89156
90 layer = ZopelessAppServerLayer157 layer = DatabaseFunctionalLayer
91158
92 def test_script(self):159 def test_script(self):
93 branch = self.factory.makeAnyBranch()160 branches = [
94 input = "/%s/.bzr/README\n" % branch.unique_name161 self.factory.makeProductBranch(),
95 expected = (162 self.factory.makePersonalBranch(),
96 "file:///var/tmp/bzrsync/%s/.bzr/README\n"163 self.factory.makePackageBranch()]
97 % branch_id_to_path(branch.id))164 input = [
98 self.layer.txn.commit()165 "/%s/.bzr/README" % branch.unique_name
166 for branch in branches] + [
167 "/%s/changes" % branch.unique_name
168 for branch in branches]
169 expected = [
170 'file:///var/tmp/bzrsync/%s/.bzr/README'
171 % branch_id_to_path(branch.id)
172 for branch in branches] + [
173 'http://localhost:8080/%s/changes' % branch.unique_name
174 for branch in branches]
175 transaction.commit()
99 script_file = os.path.join(176 script_file = os.path.join(
100 config.root, 'scripts', 'branch-rewrite.py')177 config.root, 'scripts', 'branch-rewrite.py')
101 proc = subprocess.Popen(178 proc = subprocess.Popen(
102 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,179 [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
103 stderr=subprocess.PIPE, bufsize=0)180 stderr=subprocess.PIPE, bufsize=0)
104 proc.stdin.write(input)181 proc.stdin.write('\n'.join(input) + '\n')
105 output = proc.stdout.readline()182 output = []
183 for i in range(len(input)):
184 output.append(proc.stdout.readline())
106 os.kill(proc.pid, signal.SIGINT)185 os.kill(proc.pid, signal.SIGINT)
107 err = proc.stderr.read()186 err = proc.stderr.read()
108 # The script produces logging output, but not to stderr.187 # The script produces logging output, but not to stderr.
109 self.assertEqual('', err)188 self.assertEqual('', err)
110 self.assertEqual(expected, output)189 self.assertEqual('\n'.join(expected) + '\n', ''.join(output))
111190
112191
113def test_suite():192def test_suite():
114193
=== modified file 'lib/lp/testing/__init__.py'
--- lib/lp/testing/__init__.py 2009-07-17 18:46:25 +0000
+++ lib/lp/testing/__init__.py 2009-07-22 02:26:16 +0000
@@ -224,10 +224,11 @@
224 self.assertTrue(expected is observed,224 self.assertTrue(expected is observed,
225 "%r is not %r" % (expected, observed))225 "%r is not %r" % (expected, observed))
226226
227 def assertIsNot(self, expected, observed):227 def assertIsNot(self, expected, observed, msg=None):
228 """Assert that `expected` is not the same object as `observed`."""228 """Assert that `expected` is not the same object as `observed`."""
229 self.assertTrue(expected is not observed,229 if msg is None:
230 "%r is %r" % (expected, observed))230 msg = "%r is %r" % (expected, observed)
231 self.assertTrue(expected is not observed, msg)
231232
232 def assertIn(self, needle, haystack):233 def assertIn(self, needle, haystack):
233 """Assert that 'needle' is in 'haystack'."""234 """Assert that 'needle' is in 'haystack'."""
234235
=== modified file 'scripts/branch-rewrite.py'
--- scripts/branch-rewrite.py 2009-06-30 16:56:07 +0000
+++ scripts/branch-rewrite.py 2009-07-22 03:36:19 +0000
@@ -14,10 +14,13 @@
1414
15import _pythonpath15import _pythonpath
1616
17import os
17import sys18import sys
18import xmlrpclib19
1920# XXX, MichaelHudson, 2009-07-22, bug=402845: This pointless import avoids a
20from lp.codehosting.vfs import BlockingProxy21# circular import killing us.
22from canonical.launchpad.database import account
23
21from lp.codehosting.rewrite import BranchRewriter24from lp.codehosting.rewrite import BranchRewriter
22from canonical.config import config25from canonical.config import config
23from lp.services.scripts.base import LaunchpadScript26from lp.services.scripts.base import LaunchpadScript
@@ -25,11 +28,6 @@
2528
26class BranchRewriteScript(LaunchpadScript):29class BranchRewriteScript(LaunchpadScript):
2730
28 def __init__(self, name):
29 LaunchpadScript.__init__(self, name)
30 proxy = xmlrpclib.ServerProxy(config.codehosting.branchfs_endpoint)
31 self.rewriter = BranchRewriter(self.logger, BlockingProxy(proxy))
32
33 def add_my_options(self):31 def add_my_options(self):
34 """Make the logging go to a file by default.32 """Make the logging go to a file by default.
3533
@@ -40,26 +38,20 @@
40 value from the config.38 value from the config.
41 """39 """
42 log_file_location = config.codehosting.rewrite_script_log_file40 log_file_location = config.codehosting.rewrite_script_log_file
41 log_file_directory = os.path.dirname(log_file_location)
42 if not os.path.isdir(log_file_directory):
43 os.makedirs(log_file_directory)
43 self.parser.defaults['log_file'] = log_file_location44 self.parser.defaults['log_file'] = log_file_location
4445
45 def run(self, use_web_security=False, implicit_begin=True,
46 isolation=None):
47 """See `LaunchpadScript.run`.
48
49 As this script does not need the component architecture or a
50 connection to the database, we override this method to avoid setting
51 them up.
52 """
53 self.main()
54
55 def main(self):46 def main(self):
47 rewriter = BranchRewriter(self.logger)
56 self.logger.debug("Starting up...")48 self.logger.debug("Starting up...")
57 while True:49 while True:
58 try:50 try:
59 line = sys.stdin.readline()51 line = sys.stdin.readline()
60 # Mod-rewrite always gives us a newline terminated string.52 # Mod-rewrite always gives us a newline terminated string.
61 if line:53 if line:
62 print self.rewriter.rewriteLine(line.strip())54 print rewriter.rewriteLine(line.strip())
63 else:55 else:
64 # Standard input has been closed, so die.56 # Standard input has been closed, so die.
65 return57 return
@@ -71,4 +63,4 @@
7163
7264
73if __name__ == '__main__':65if __name__ == '__main__':
74 BranchRewriteScript("branch-rewrite").run()66 BranchRewriteScript("branch-rewrite", dbuser='branch-rewrite').run()

Subscribers

People subscribed via source and target branches

to status/vote changes: