Merge lp:~mwhudson/launchpad/unicod-branch-names-bug-449528 into lp:launchpad

Proposed by Michael Hudson-Doyle
Status: Merged
Approved by: Tim Penhey
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~mwhudson/launchpad/unicod-branch-names-bug-449528
Merge into: lp:launchpad
Diff against target: 396 lines (+121/-29)
8 files modified
lib/lp/code/doc/branch-xmlrpc.txt (+2/-2)
lib/lp/code/model/branch.py (+1/-1)
lib/lp/code/xmlrpc/branch.py (+11/-8)
lib/lp/code/xmlrpc/codehosting.py (+11/-6)
lib/lp/code/xmlrpc/tests/test_branch.py (+63/-6)
lib/lp/code/xmlrpc/tests/test_codehosting.py (+22/-1)
lib/lp/codehosting/inmemory.py (+7/-4)
lib/lp/codehosting/vfs/branchfs.py (+4/-1)
To merge this branch: bzr merge lp:~mwhudson/launchpad/unicod-branch-names-bug-449528
Reviewer Review Type Date Requested Status
Tim Penhey (community) Approve
Review via email: mp+19572@code.launchpad.net

Commit message

Fix some OOPSes that can result from attempts to push branches containing non-ASCII names.

To post a comment you must log in.
Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

Hi there,

This branch fixes various explosions you can get when trying to use non-ascii characters in branch names. We don't allow this, but we should fail gracefully and this branch tries to get us there.

There are various things that conspire to making this annoying:

 * xmlrpclib papers over the difference between str and unicode to the extent that if you send a utf-8 encoded str containing non-ascii characters, the other side will get a unicode string.
   * in particular this applied to the 'faultString' attribute of Faults.
 * Python exceptions containing non-ascii unicode strings are a bit like dodgy hand grenades, at least in Python 2.5: they tend to blow up if you touch them.
 * There turned out to be bugs in bzr in this area, now fixed in the version we are using.

Anyway, this branch fixes most problems I found. I recommend running make run_all and trying to push branches named things like "bzr+ssh://<email address hidden>/~m%c2%a9rk/gnome-trminal/dd".

This branch has been sitting around in my repo for ages. I'll be happy to get it done!

Cheers,
mwh

Revision history for this message
Tim Penhey (thumper) wrote :

Looks good to me. I trust you've tried pushing locally - I'm not going to.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/code/doc/branch-xmlrpc.txt'
2--- lib/lp/code/doc/branch-xmlrpc.txt 2009-08-13 15:12:16 +0000
3+++ lib/lp/code/doc/branch-xmlrpc.txt 2010-02-22 01:48:21 +0000
4@@ -417,8 +417,8 @@
5 >>> results = branchset_api.resolve_lp_path('~mark/+junk/doesntexist')
6 >>> for url in results['urls']:
7 ... print url
8- bzr+ssh://bazaar.launchpad.dev/~mark/+junk/doesntexist
9- http://bazaar.launchpad.dev/~mark/+junk/doesntexist
10+ bzr+ssh://bazaar.launchpad.dev/~mark/%2Bjunk/doesntexist
11+ http://bazaar.launchpad.dev/~mark/%2Bjunk/doesntexist
12
13
14 But if the project or the user doesn't exist, Launchpad will return a fault:
15
16=== modified file 'lib/lp/code/model/branch.py'
17--- lib/lp/code/model/branch.py 2010-02-16 20:36:58 +0000
18+++ lib/lp/code/model/branch.py 2010-02-22 01:48:21 +0000
19@@ -1309,7 +1309,7 @@
20 accepted_schemes.add('sftp')
21 assert scheme in accepted_schemes, "Unknown scheme: %s" % scheme
22 host = URI(config.codehosting.supermirror_root).host
23- path = '/' + unique_name
24+ path = '/' + urlutils.escape(unique_name)
25 if suffix is not None:
26 path = os.path.join(path, suffix)
27 return str(URI(scheme=scheme, host=host, path=path))
28
29=== modified file 'lib/lp/code/xmlrpc/branch.py'
30--- lib/lp/code/xmlrpc/branch.py 2010-02-16 20:36:58 +0000
31+++ lib/lp/code/xmlrpc/branch.py 2010-02-22 01:48:21 +0000
32@@ -12,6 +12,8 @@
33 'PublicCodehostingAPI']
34
35
36+from bzrlib import urlutils
37+
38 from zope.component import getUtility
39 from zope.interface import Interface, implements
40
41@@ -100,7 +102,7 @@
42
43 try:
44 unicode_branch_url = branch_url.decode('utf-8')
45- url = IBranch['url'].validate(unicode_branch_url)
46+ IBranch['url'].validate(unicode_branch_url)
47 except LaunchpadValidationError, exc:
48 return faults.InvalidBranchUrl(branch_url, exc)
49
50@@ -238,23 +240,24 @@
51 # the model code(blech) or some automated way of reraising as faults
52 # or using a narrower range of faults (e.g. only one "NoSuch" fault).
53 except InvalidProductName, e:
54- raise faults.InvalidProductIdentifier(e.name)
55+ raise faults.InvalidProductIdentifier(urlutils.escape(e.name))
56 except NoSuchProductSeries, e:
57- raise faults.NoSuchProductSeries(e.name, e.product)
58+ raise faults.NoSuchProductSeries(
59+ urlutils.escape(e.name), e.product)
60 except NoSuchPerson, e:
61- raise faults.NoSuchPersonWithName(e.name)
62+ raise faults.NoSuchPersonWithName(urlutils.escape(e.name))
63 except NoSuchProduct, e:
64- raise faults.NoSuchProduct(e.name)
65+ raise faults.NoSuchProduct(urlutils.escape(e.name))
66 except NoSuchDistroSeries, e:
67- raise faults.NoSuchDistroSeries(e.name)
68+ raise faults.NoSuchDistroSeries(urlutils.escape(e.name))
69 except NoSuchSourcePackageName, e:
70- raise faults.NoSuchSourcePackageName(e.name)
71+ raise faults.NoSuchSourcePackageName(urlutils.escape(e.name))
72 except NoLinkedBranch, e:
73 raise faults.NoLinkedBranch(e.component)
74 except CannotHaveLinkedBranch, e:
75 raise faults.CannotHaveLinkedBranch(e.component)
76 except InvalidNamespace, e:
77- raise faults.InvalidBranchUniqueName(e.name)
78+ raise faults.InvalidBranchUniqueName(urlutils.escape(e.name))
79 return self._getResultDict(branch, suffix, supported_schemes)
80
81 def resolve_lp_path(self, path):
82
83=== modified file 'lib/lp/code/xmlrpc/codehosting.py'
84--- lib/lp/code/xmlrpc/codehosting.py 2009-10-19 05:40:52 +0000
85+++ lib/lp/code/xmlrpc/codehosting.py 2010-02-22 01:48:21 +0000
86@@ -12,6 +12,7 @@
87
88
89 import datetime
90+import urllib
91
92 import pytz
93
94@@ -232,7 +233,7 @@
95 def create_branch(requester):
96 if not branch_path.startswith('/'):
97 return faults.InvalidPath(branch_path)
98- escaped_path = unescape(branch_path.strip('/')).encode('utf-8')
99+ escaped_path = unescape(branch_path.strip('/'))
100 try:
101 namespace_name, branch_name = split_unique_name(escaped_path)
102 except ValueError:
103@@ -254,7 +255,12 @@
104 try:
105 branch = namespace.createBranch(
106 BranchType.HOSTED, branch_name, requester)
107- except (BranchCreationException, LaunchpadValidationError), e:
108+ except LaunchpadValidationError, e:
109+ msg = e.args[0]
110+ if isinstance(msg, unicode):
111+ msg = msg.encode('utf-8')
112+ return faults.PermissionDenied(msg)
113+ except BranchCreationException, e:
114 return faults.PermissionDenied(str(e))
115 else:
116 return branch.id
117@@ -294,8 +300,7 @@
118 def _serializeControlDirectory(self, requester, product_path,
119 trailing_path):
120 try:
121- namespace = lookup_branch_namespace(
122- unescape(product_path).encode('utf-8'))
123+ namespace = lookup_branch_namespace(product_path)
124 except (InvalidNamespace, NotFoundError):
125 return
126 if not ('.bzr' == trailing_path or trailing_path.startswith('.bzr/')):
127@@ -321,9 +326,9 @@
128 return faults.InvalidPath(path)
129 stripped_path = path.strip('/')
130 for first, second in iter_split(stripped_path, '/'):
131+ first = unescape(first)
132 # Is it a branch?
133- branch = getUtility(IBranchLookup).getByUniqueName(
134- unescape(first).encode('utf-8'))
135+ branch = getUtility(IBranchLookup).getByUniqueName(first)
136 if branch is not None:
137 branch = self._serializeBranch(requester, branch, second)
138 if branch is None:
139
140=== modified file 'lib/lp/code/xmlrpc/tests/test_branch.py'
141--- lib/lp/code/xmlrpc/tests/test_branch.py 2009-11-12 04:28:28 +0000
142+++ lib/lp/code/xmlrpc/tests/test_branch.py 2010-02-22 01:48:21 +0000
143@@ -11,6 +11,8 @@
144 import unittest
145 import xmlrpclib
146
147+from bzrlib import urlutils
148+
149 from zope.security.proxy import removeSecurityProxy
150
151 from canonical.config import config
152@@ -23,6 +25,9 @@
153 from canonical.testing import DatabaseFunctionalLayer
154
155
156+NON_ASCII_NAME = u'nam\N{LATIN SMALL LETTER E WITH ACUTE}'
157+
158+
159 class TestExpandURL(TestCaseWithFactory):
160 """Test the way that URLs are expanded."""
161
162@@ -54,9 +59,7 @@
163 return branch
164
165 def assertResolves(self, lp_url_path, unique_name):
166- """Assert that the given lp URL path expands to the unique name of
167- 'branch'.
168- """
169+ """Assert that `lp_url_path` path expands to `unique_name`."""
170 results = self.api.resolve_lp_path(lp_url_path)
171 # This improves the error message if results happens to be a fault.
172 if isinstance(results, faults.LaunchpadFault):
173@@ -110,7 +113,7 @@
174 self.assertResolves(
175 self.product.name, trunk_series.branch.unique_name)
176
177- def test_productDoesntExist(self):
178+ def test_product_doesnt_exist(self):
179 # Return a NoSuchProduct fault if the product doesn't exist.
180 self.assertFault(
181 'doesntexist', faults.NoSuchProduct('doesntexist'))
182@@ -141,11 +144,19 @@
183 def test_invalid_product_name(self):
184 # If we get a string that cannot be a name for a product where we
185 # expect the name of a product, we should error appropriately.
186- invalid_name = '+' + self.factory.getUniqueString()
187+ invalid_name = '_' + self.factory.getUniqueString()
188 self.assertFault(
189 invalid_name,
190 faults.InvalidProductIdentifier(invalid_name))
191
192+ def test_invalid_product_name_non_ascii(self):
193+ # lp:<non-ascii-string> returns InvalidProductIdentifier with the name
194+ # escaped.
195+ invalid_name = '_' + NON_ASCII_NAME
196+ self.assertFault(
197+ invalid_name,
198+ faults.InvalidProductIdentifier(urlutils.escape(invalid_name)))
199+
200 def test_product_and_series(self):
201 # lp:product/series expands to the branch associated with the product
202 # series 'series' on 'product'.
203@@ -184,6 +195,14 @@
204 '%s/%s' % (self.product.name, "doesntexist"),
205 faults.NoSuchProductSeries("doesntexist", self.product))
206
207+ def test_no_such_product_series_non_ascii(self):
208+ # lp:product/<non-ascii-string> returns NoSuchProductSeries with the
209+ # name escaped.
210+ self.assertFault(
211+ '%s/%s' % (self.product.name, NON_ASCII_NAME),
212+ faults.NoSuchProductSeries(
213+ urlutils.escape(NON_ASCII_NAME), self.product))
214+
215 def test_no_such_distro_series(self):
216 # Return a NoSuchDistroSeries fault if there is no series of the given
217 # name on that distribution.
218@@ -192,6 +211,14 @@
219 '%s/doesntexist/whocares' % distro.name,
220 faults.NoSuchDistroSeries("doesntexist"))
221
222+ def test_no_such_distro_series_non_ascii(self):
223+ # lp:distro/<non-ascii-string>/whatever returns NoSuchDistroSeries
224+ # with the name escaped.
225+ distro = self.factory.makeDistribution()
226+ self.assertFault(
227+ '%s/%s/whocares' % (distro.name, NON_ASCII_NAME),
228+ faults.NoSuchDistroSeries(urlutils.escape(NON_ASCII_NAME)))
229+
230 def test_no_such_source_package(self):
231 # Return a NoSuchSourcePackageName fault if there is no source package
232 # of the given name.
233@@ -201,6 +228,16 @@
234 '%s/%s/doesntexist' % (distribution.name, distroseries.name),
235 faults.NoSuchSourcePackageName('doesntexist'))
236
237+ def test_no_such_source_package_non_ascii(self):
238+ # lp:distro/series/<non-ascii-name> returns NoSuchSourcePackageName
239+ # with the name escaped.
240+ distroseries = self.factory.makeDistroRelease()
241+ distribution = distroseries.distribution
242+ self.assertFault(
243+ '%s/%s/%s' % (
244+ distribution.name, distroseries.name, NON_ASCII_NAME),
245+ faults.NoSuchSourcePackageName(urlutils.escape(NON_ASCII_NAME)))
246+
247 def test_no_linked_branch_for_source_package(self):
248 # Return a NoLinkedBranch fault if there's no linked branch for the
249 # sourcepackage.
250@@ -234,13 +271,23 @@
251 owner.name, self.product.name)
252 self.assertResolves(nonexistent_branch, nonexistent_branch)
253
254+ def test_no_such_branch_product_non_ascii(self):
255+ # A path to a branch that contains non ascii characters will never
256+ # find a branch, but it still resolves rather than erroring.
257+ owner = self.factory.makePerson()
258+ nonexistent_branch = u'~%s/%s/%s' % (
259+ owner.name, self.product.name, NON_ASCII_NAME)
260+ self.assertResolves(
261+ nonexistent_branch, urlutils.escape(nonexistent_branch))
262+
263 def test_no_such_branch_personal(self):
264 # Resolve paths to junk branches.
265 # This test added to make sure we don't raise a fault when looking for
266 # the '+junk' project, which doesn't actually exist.
267 owner = self.factory.makePerson()
268 nonexistent_branch = '~%s/+junk/doesntexist' % owner.name
269- self.assertResolves(nonexistent_branch, nonexistent_branch)
270+ self.assertResolves(
271+ nonexistent_branch, urlutils.escape(nonexistent_branch))
272
273 def test_no_such_branch_package(self):
274 # Resolve paths to package branches even if there's no branch of that
275@@ -269,6 +316,16 @@
276 nonexistent_owner_branch,
277 faults.NoSuchPersonWithName('doesntexist'))
278
279+ def test_resolve_branch_with_no_such_owner_non_ascii(self):
280+ # lp:~<non-ascii-string>/product/name returns NoSuchPersonWithName
281+ # with the name escaped.
282+ nonexistent_owner_branch = u"~%s/%s/%s" % (
283+ NON_ASCII_NAME, self.factory.getUniqueString(),
284+ self.factory.getUniqueString())
285+ self.assertFault(
286+ nonexistent_owner_branch,
287+ faults.NoSuchPersonWithName(urlutils.escape(NON_ASCII_NAME)))
288+
289 def test_too_many_segments(self):
290 # If we have more segments than are necessary to refer to a branch,
291 # then attach these segments to the resolved url.
292
293=== modified file 'lib/lp/code/xmlrpc/tests/test_codehosting.py'
294--- lib/lp/code/xmlrpc/tests/test_codehosting.py 2009-10-19 05:40:52 +0000
295+++ lib/lp/code/xmlrpc/tests/test_codehosting.py 2010-02-22 01:48:21 +0000
296@@ -676,13 +676,27 @@
297 owner = self.factory.makePerson()
298 product = self.factory.makeProduct()
299 invalid_name = 'invalid name!'
300- message = ("Invalid branch name %r. %s"
301+ message = ("Invalid branch name '%s'. %s"
302 % (invalid_name, BRANCH_NAME_VALIDATION_ERROR_MESSAGE))
303 fault = self.branchfs.createBranch(
304 owner.id, escape(
305 '/~%s/%s/%s' % (owner.name, product.name, invalid_name)))
306 self.assertFaultEqual(faults.PermissionDenied(message), fault)
307
308+ def test_createBranch_unicode_name(self):
309+ # Creating a branch with an invalid name fails.
310+ owner = self.factory.makePerson()
311+ product = self.factory.makeProduct()
312+ invalid_name = u'invalid\N{LATIN SMALL LETTER E WITH ACUTE}'
313+ message = ("Invalid branch name '%s'. %s"
314+ % (invalid_name.encode('utf-8'),
315+ str(BRANCH_NAME_VALIDATION_ERROR_MESSAGE)))
316+ fault = self.branchfs.createBranch(
317+ owner.id, escape(
318+ '/~%s/%s/%s' % (owner.name, product.name, invalid_name)))
319+ self.assertFaultEqual(
320+ faults.PermissionDenied(message), fault)
321+
322 def test_createBranch_bad_user(self):
323 # Creating a branch under a non-existent user fails.
324 owner = self.factory.makePerson()
325@@ -920,6 +934,13 @@
326 path = '/~%s/%s/no-such-branch' % (requester.name, product.name)
327 self.assertNotFound(requester, path)
328
329+ def test_translatePath_no_such_branch_non_ascii(self):
330+ requester = self.factory.makePerson()
331+ product = self.factory.makeProduct()
332+ path = u'/~%s/%s/non-asci\N{LATIN SMALL LETTER I WITH DIAERESIS}' % (
333+ requester.name, product.name)
334+ self.assertNotFound(requester, escape(path))
335+
336 def test_translatePath_private_branch(self):
337 requester = self.factory.makePerson()
338 branch = removeSecurityProxy(
339
340=== modified file 'lib/lp/codehosting/inmemory.py'
341--- lib/lp/codehosting/inmemory.py 2010-01-06 12:09:46 +0000
342+++ lib/lp/codehosting/inmemory.py 2010-02-22 01:48:21 +0000
343@@ -555,7 +555,7 @@
344 def createBranch(self, requester_id, branch_path):
345 if not branch_path.startswith('/'):
346 return faults.InvalidPath(branch_path)
347- escaped_path = unescape(branch_path.strip('/')).encode('utf-8')
348+ escaped_path = unescape(branch_path.strip('/'))
349 try:
350 namespace_path, branch_name = escaped_path.rsplit('/', 1)
351 except ValueError:
352@@ -565,7 +565,7 @@
353 owner = self._person_set.getByName(data['person'])
354 if owner is None:
355 return faults.NotFound(
356- "User/team %r does not exist." % (data['person'],))
357+ "User/team '%s' does not exist." % (data['person'],))
358 registrant = self._person_set.get(requester_id)
359 # The real code consults the branch creation policy of the product. We
360 # don't need to do so here, since the tests above this layer never
361@@ -583,7 +583,7 @@
362 product = self._product_set.getByName(data['product'])
363 if product is None:
364 return faults.NotFound(
365- "Project %r does not exist." % (data['product'],))
366+ "Project '%s' does not exist." % (data['product'],))
367 elif data['distribution'] is not None:
368 distro = self._distribution_set.getByName(data['distribution'])
369 if distro is None:
370@@ -612,7 +612,10 @@
371 sourcepackage=sourcepackage, registrant=registrant,
372 branch_type=BranchType.HOSTED).id
373 except LaunchpadValidationError, e:
374- return faults.PermissionDenied(str(e))
375+ msg = e.args[0]
376+ if isinstance(msg, unicode):
377+ msg = msg.encode('utf-8')
378+ return faults.PermissionDenied(msg)
379
380 def requestMirror(self, requester_id, branch_id):
381 self._branch_set.get(branch_id).requestMirror()
382
383=== modified file 'lib/lp/codehosting/vfs/branchfs.py'
384--- lib/lp/codehosting/vfs/branchfs.py 2010-01-20 02:15:27 +0000
385+++ lib/lp/codehosting/vfs/branchfs.py 2010-02-22 01:48:21 +0000
386@@ -612,7 +612,10 @@
387 # parent directories", which is just misleading.
388 fault = trap_fault(
389 failure, faults.NotFound, faults.PermissionDenied)
390- raise PermissionDenied(virtual_url_fragment, fault.faultString)
391+ faultString = fault.faultString
392+ if isinstance(faultString, unicode):
393+ faultString = faultString.encode('utf-8')
394+ raise PermissionDenied(virtual_url_fragment, faultString)
395
396 return deferred.addErrback(translate_fault)
397