Merge lp:~wgrant/launchpad/bug-677270 into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 17866
Proposed branch: lp:~wgrant/launchpad/bug-677270
Merge into: lp:launchpad
Diff against target: 152 lines (+57/-8)
3 files modified
lib/lp/services/librarian/client.py (+6/-3)
lib/lp/services/librarianserver/db.py (+20/-2)
lib/lp/services/librarianserver/tests/test_web.py (+31/-3)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-677270
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+279974@code.launchpad.net

Commit message

Canonicalise path encoding before checking a librarian TimeLimitedToken.

Description of the change

Canonicalise path encoding before checking a librarian TimeLimitedToken, and leave tildes unescaped in librarian URLs to appease RFC 3986 and Chromium.

I'm also tempted to whitelist +, as it has no special meaning in the path. Thoughts welcome.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) wrote :

IRC discussion has convinced me that we should whitelist +; doing so is harmless and would probably make e.g. dget behave more gracefully. Otherwise this looks good to me.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/librarian/client.py'
2--- lib/lp/services/librarian/client.py 2015-07-08 16:05:11 +0000
3+++ lib/lp/services/librarian/client.py 2015-12-10 00:38:11 +0000
4@@ -53,9 +53,12 @@
5
6 def url_path_quote(filename):
7 """Quote `filename` for use in a URL."""
8- # XXX RobertCollins 2004-09-21: Perhaps filenames with / in them
9- # should be disallowed?
10- return urllib.quote(filename).replace('/', '%2F')
11+ # RFC 3986 says ~ should not be generated escaped, but urllib.quote
12+ # predates it. Additionally, + is safe to use unescaped in paths and is
13+ # frequently used in Debian versions, so leave it alone.
14+ #
15+ # This needs to match Library.getAlias' TimeLimitedToken handling.
16+ return urllib.quote(filename, safe='/~+')
17
18
19 def get_libraryfilealias_download_path(aliasID, filename):
20
21=== modified file 'lib/lp/services/librarianserver/db.py'
22--- lib/lp/services/librarianserver/db.py 2014-09-02 02:03:37 +0000
23+++ lib/lp/services/librarianserver/db.py 2015-12-10 00:38:11 +0000
24@@ -9,9 +9,11 @@
25 ]
26
27 import hashlib
28+import urllib
29
30 from storm.expr import (
31 And,
32+ Or,
33 SQL,
34 )
35
36@@ -56,13 +58,29 @@
37 """
38 restricted = self.restricted
39 if token and path:
40- # with a token and a path we may be able to serve restricted files
41+ # With a token and a path we may be able to serve restricted files
42 # on the public port.
43+ #
44+ # The URL-encoding of the path may have changed somewhere
45+ # along the line, so reencode it canonically. LFA.filename
46+ # can't contain slashes, so they're safe to leave unencoded.
47+ # And urllib.quote erroneously excludes ~ from its safe set,
48+ # while RFC 3986 says it should be unescaped and Chromium
49+ # forcibly decodes it in any URL that it sees.
50+ #
51+ # This needs to match url_path_quote.
52+ plain_tilde_path = urllib.quote(urllib.unquote(path), safe='/~+')
53+ # XXX wgrant 2015-12-09: We used to generate URLs with
54+ # escaped tildes, so support those until the tokens are all
55+ # expired.
56+ encoded_tilde_path = urllib.quote(urllib.unquote(path), safe='/')
57 store = session_store()
58 token_found = store.find(TimeLimitedToken,
59 SQL("age(created) < interval '1 day'"),
60 TimeLimitedToken.token == hashlib.sha256(token).hexdigest(),
61- TimeLimitedToken.path == path).is_empty()
62+ Or(
63+ TimeLimitedToken.path == plain_tilde_path,
64+ TimeLimitedToken.path == encoded_tilde_path)).is_empty()
65 store.reset()
66 if token_found:
67 raise LookupError("Token stale/pruned/path mismatch")
68
69=== modified file 'lib/lp/services/librarianserver/tests/test_web.py'
70--- lib/lp/services/librarianserver/tests/test_web.py 2015-10-14 15:22:01 +0000
71+++ lib/lp/services/librarianserver/tests/test_web.py 2015-12-10 00:38:11 +0000
72@@ -15,6 +15,8 @@
73 from lazr.uri import URI
74 import pytz
75 from storm.expr import SQL
76+import testtools
77+from testtools.matchers import EndsWith
78 import transaction
79 from zope.component import getUtility
80
81@@ -48,7 +50,7 @@
82 return str(parsed.replace(path=parsed.path.replace(old, new)))
83
84
85-class LibrarianWebTestCase(unittest.TestCase):
86+class LibrarianWebTestCase(testtools.TestCase):
87 """Test the librarian's web interface."""
88 layer = LaunchpadFunctionalLayer
89 dbuser = 'librarian'
90@@ -237,13 +239,13 @@
91 self.failUnlessEqual(
92 last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
93
94- def get_restricted_file_and_public_url(self):
95+ def get_restricted_file_and_public_url(self, filename='sample'):
96 # Use a regular LibrarianClient to ensure we speak to the
97 # nonrestricted port on the librarian which is where secured
98 # restricted files are served from.
99 client = LibrarianClient()
100 fileAlias = client.addFile(
101- 'sample', 12, StringIO('a' * 12), contentType='text/plain')
102+ filename, 12, StringIO('a' * 12), contentType='text/plain')
103 # Note: We're deliberately using the wrong url here: we should be
104 # passing secure=True to getURLForAlias, but to use the returned URL
105 # we would need a wildcard DNS facility patched into urlopen; instead
106@@ -333,6 +335,30 @@
107 finally:
108 fileObj.close()
109
110+ def test_restricted_with_token_encoding(self):
111+ fileAlias, url = self.get_restricted_file_and_public_url('foo~%')
112+ self.assertThat(url, EndsWith('/foo~%25'))
113+
114+ # We have the base url for a restricted file; grant access to it
115+ # for a short time.
116+ token = TimeLimitedToken.allocate(url)
117+
118+ # Now we should be able to access the file.
119+ fileObj = urlopen(url + "?token=%s" % token)
120+ try:
121+ self.assertEqual("a" * 12, fileObj.read())
122+ finally:
123+ fileObj.close()
124+
125+ # The token is valid even if the filename is encoded differently.
126+ mangled_url = url.replace('~', '%7E')
127+ self.assertNotEqual(mangled_url, url)
128+ fileObj = urlopen(mangled_url + "?token=%s" % token)
129+ try:
130+ self.assertEqual("a" * 12, fileObj.read())
131+ finally:
132+ fileObj.close()
133+
134 def test_restricted_with_expired_token(self):
135 fileAlias, url = self.get_restricted_file_and_public_url()
136 # We have the base url for a restricted file; grant access to it
137@@ -384,6 +410,7 @@
138 layer = LaunchpadZopelessLayer
139
140 def setUp(self):
141+ super(LibrarianZopelessWebTestCase, self).setUp()
142 switch_dbuser(config.librarian.dbuser)
143
144 def commit(self):
145@@ -409,6 +436,7 @@
146 layer = LaunchpadZopelessLayer
147
148 def setUp(self):
149+ super(DeletedContentTestCase, self).setUp()
150 switch_dbuser(config.librarian.dbuser)
151
152 def test_deletedContentNotFound(self):