Merge lp:~leonardr/lazr.restfulclient/retry_on_502 into lp:lazr.restfulclient

Proposed by Leonard Richardson
Status: Merged
Approved by: Brad Crittenden
Approved revision: 99
Merged at revision: not available
Proposed branch: lp:~leonardr/lazr.restfulclient/retry_on_502
Merge into: lp:lazr.restfulclient
Diff against target: 245 lines (+168/-5)
5 files modified
src/lazr/restfulclient/NEWS.txt (+7/-0)
src/lazr/restfulclient/_browser.py (+21/-2)
src/lazr/restfulclient/docs/retry.standalone.txt (+136/-0)
src/lazr/restfulclient/resource.py (+3/-2)
src/lazr/restfulclient/version.txt (+1/-1)
To merge this branch: bzr merge lp:~leonardr/lazr.restfulclient/retry_on_502
Reviewer Review Type Date Requested Status
Brad Crittenden (community) code Approve
Review via email: mp+24257@code.launchpad.net

Description of the change

This branch changes lazr.restfulclient to retry a request that results in a 502 or 503 error. As bug 380504 demonstrates, these errors are pervasive in the Launchpad web service, but they generally go away immediately. This branch makes scripts based on launchpadlib robust in the face of server-side weirdness.

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :
Download full text (3.9 KiB)

Hi Leonard,

> === modified file 'src/lazr/restfulclient/_browser.py'
> --- src/lazr/restfulclient/_browser.py 2010-04-12 19:18:31 +0000
> +++ src/lazr/restfulclient/_browser.py 2010-04-27 19:59:13 +0000

> @@ -242,6 +246,21 @@
> self._connection = service_root.httpFactory(
> credentials, cache, timeout, proxy_info)
> self.user_agent = user_agent
> + self.max_retries = max_retries
> +
> + def _request_and_retry(self, url, method, body, headers):
> + for retry_count in range(0, self.max_retries+1):
> + response, content = self._connection.request(
> + url, method=method, body=body, headers=headers)
> + if response.status in [502, 503]:
> + # The server returned a 502 or 503. Sleep for 0, 1, 2,
> + # 4, 8, 16, ... seconds and try again.
> + sleep_for = int(2**(retry_count-1))
> + sleep(sleep_for)

Even after the terminal failure you sleep again. I guess it doesn't
matter but due to the exponential wait times you may have someone
waiting for a long time even though you've given up.

> + else:
> + break
> + # Either the request succeeded or we gave up.
> + return response, content
>
> def _request(self, url, data=None, method='GET',
> media_type='application/json', extra_headers=None):
> @@ -261,7 +280,7 @@
> if extra_headers is not None:
> headers.update(extra_headers)
> # Make the request.
> - response, content = self._connection.request(
> + response, content = self._request_and_retry(
> str(url), method=method, body=data, headers=headers)
> if response.status == 304:
> # The resource didn't change.

> === added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
> --- src/lazr/restfulclient/docs/retry.standalone.txt 1970-01-01 00:00:00 +0000
> +++ src/lazr/restfulclient/docs/retry.standalone.txt 2010-04-27 19:59:13 +0000
> @@ -0,0 +1,136 @@
> +Retry requests on server error
> +******************************
> +
> +If lazr.restfulclient talks to a server that sends out a server-side
> +error with status codes 502 or 503, the client will wait a few seconds
> +and try the request again. Eventually it will give up and escalate the
> +error code in the form of an exception.
> +
> +To test this, let's simulate a lazr.restful server prone to transient
> +errors using a WSGI application.
> +
> + >>> import pkg_resources
> + >>> wadl_string = pkg_resources.resource_string(
> + ... 'wadllib.tests.data', 'launchpad-wadl.xml')
> + >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
> + ... 'application/json' : '{}' }
> +
> +This application will cause one request to fail for every item in its
> +BROKEN_RESPONSES list.
> +
> + >>> class BrokenApplication:
> + ... BROKEN_RESPONSES = []
> + ...
> + ... def __init__(self, environ, start_response):
> + ... self.environ = environ
> + ... self.start_response = start_response
> + ...
> + ... def __iter__(self):
> +...

Read more...

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/lazr/restfulclient/NEWS.txt'
2--- src/lazr/restfulclient/NEWS.txt 2010-04-27 12:36:16 +0000
3+++ src/lazr/restfulclient/NEWS.txt 2010-04-27 19:59:13 +0000
4@@ -2,6 +2,13 @@
5 NEWS for lazr.restfulclient
6 ===========================
7
8+0.9.16 (2010-04-27)
9+===================
10+
11+ - If a server returns a 502 or 503 error code, lazr.restfulclient
12+ will retry its request a configurable number of times in hopes that
13+ the error is transient.
14+
15 0.9.15 (2010-04-27)
16 ====================
17
18
19=== modified file 'src/lazr/restfulclient/_browser.py'
20--- src/lazr/restfulclient/_browser.py 2010-04-12 19:18:31 +0000
21+++ src/lazr/restfulclient/_browser.py 2010-04-27 19:59:13 +0000
22@@ -34,6 +34,9 @@
23 import gzip
24 import shutil
25 import tempfile
26+# Import sleep directly into the module so we can monkey-patch it
27+# during a test.
28+from time import sleep
29 from httplib2 import (
30 FailedToDecompressContent, FileCache, Http, urlnorm)
31 import simplejson
32@@ -225,9 +228,10 @@
33 """A class for making calls to lazr.restful web services."""
34
35 NOT_MODIFIED = object()
36+ MAX_RETRIES = 6
37
38 def __init__(self, service_root, credentials, cache=None, timeout=None,
39- proxy_info=None, user_agent=None):
40+ proxy_info=None, user_agent=None, max_retries=MAX_RETRIES):
41 """Initialize, possibly creating a cache.
42
43 If no cache is provided, a temporary directory will be used as
44@@ -242,6 +246,21 @@
45 self._connection = service_root.httpFactory(
46 credentials, cache, timeout, proxy_info)
47 self.user_agent = user_agent
48+ self.max_retries = max_retries
49+
50+ def _request_and_retry(self, url, method, body, headers):
51+ for retry_count in range(0, self.max_retries+1):
52+ response, content = self._connection.request(
53+ url, method=method, body=body, headers=headers)
54+ if response.status in [502, 503]:
55+ # The server returned a 502 or 503. Sleep for 0, 1, 2,
56+ # 4, 8, 16, ... seconds and try again.
57+ sleep_for = int(2**(retry_count-1))
58+ sleep(sleep_for)
59+ else:
60+ break
61+ # Either the request succeeded or we gave up.
62+ return response, content
63
64 def _request(self, url, data=None, method='GET',
65 media_type='application/json', extra_headers=None):
66@@ -261,7 +280,7 @@
67 if extra_headers is not None:
68 headers.update(extra_headers)
69 # Make the request.
70- response, content = self._connection.request(
71+ response, content = self._request_and_retry(
72 str(url), method=method, body=data, headers=headers)
73 if response.status == 304:
74 # The resource didn't change.
75
76=== added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
77--- src/lazr/restfulclient/docs/retry.standalone.txt 1970-01-01 00:00:00 +0000
78+++ src/lazr/restfulclient/docs/retry.standalone.txt 2010-04-27 19:59:13 +0000
79@@ -0,0 +1,136 @@
80+Retry requests on server error
81+******************************
82+
83+If lazr.restfulclient talks to a server that sends out a server-side
84+error with status codes 502 or 503, the client will wait a few seconds
85+and try the request again. Eventually it will give up and escalate the
86+error code in the form of an exception.
87+
88+To test this, let's simulate a lazr.restful server prone to transient
89+errors using a WSGI application.
90+
91+ >>> import pkg_resources
92+ >>> wadl_string = pkg_resources.resource_string(
93+ ... 'wadllib.tests.data', 'launchpad-wadl.xml')
94+ >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
95+ ... 'application/json' : '{}' }
96+
97+This application will cause one request to fail for every item in its
98+BROKEN_RESPONSES list.
99+
100+ >>> class BrokenApplication:
101+ ... BROKEN_RESPONSES = []
102+ ...
103+ ... def __init__(self, environ, start_response):
104+ ... self.environ = environ
105+ ... self.start_response = start_response
106+ ...
107+ ... def __iter__(self):
108+ ... if len(self.BROKEN_RESPONSES) > 0:
109+ ... start_response(self.BROKEN_RESPONSES.pop(),
110+ ... [('Content-type', 'text/plain')])
111+ ... yield "Sorry, I'm still broken."
112+ ... else:
113+ ... media_type = self.environ['HTTP_ACCEPT']
114+ ... content = representations[media_type]
115+ ... self.start_response(
116+ ... '200', [('Content-type', media_type)])
117+ ... yield content
118+
119+ >>> BROKEN_RESPONSES = []
120+ >>> def broken_application(environ, start_response):
121+ ... if len(BROKEN_RESPONSES) > 0:
122+ ... start_response(str(BROKEN_RESPONSES.pop()),
123+ ... [('Content-type', 'text/plain')])
124+ ... return ["Sorry, I'm still broken."]
125+ ... else:
126+ ... media_type = environ['HTTP_ACCEPT']
127+ ... content = representations[media_type]
128+ ... start_response(
129+ ... '200', [('Content-type', media_type)])
130+ ... return [content]
131+
132+ >>> def make_broken_application():
133+ ... return broken_application
134+
135+ >>> import wsgi_intercept
136+ >>> wsgi_intercept.add_wsgi_intercept(
137+ ... 'api.launchpad.dev', 80, make_broken_application)
138+ >>> BROKEN_RESPONSES = []
139+
140+ >>> from wsgi_intercept.httplib2_intercept import install
141+ >>> install()
142+
143+Here's a fake implementation of time.sleep() so that this test doesn't
144+take a really long time to run, and so we can visualize sleep() being
145+called as lazr.restfulclient retries over and over again.
146+
147+ >>> def fake_sleep(time):
148+ ... print "sleep(%s) called" % time
149+ >>> import lazr.restfulclient._browser
150+ >>> old_sleep = lazr.restfulclient._browser.sleep
151+ >>> lazr.restfulclient._browser.sleep = fake_sleep
152+
153+As it starts out, the application isn't broken at all.
154+
155+ >>> from lazr.restfulclient.resource import ServiceRoot
156+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
157+
158+Let's queue up one broken response. The client will sleep once and
159+try again.
160+
161+ >>> BROKEN_RESPONSES = [502]
162+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
163+ sleep(0) called
164+
165+Now the application will fail six times and then start working.
166+
167+ >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503]
168+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
169+ sleep(0) called
170+ sleep(1) called
171+ sleep(2) called
172+ sleep(4) called
173+ sleep(8) called
174+ sleep(16) called
175+
176+Now the application will fail seven times and then start working. But
177+the client will give up before then--it will only retry the request
178+six times.
179+
180+ >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
181+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
182+ Traceback (most recent call last):
183+ ...
184+ HTTPError: HTTP Error 502:
185+ ...
186+
187+By increasing the 'max_retries' constructor argument, we can make the
188+application try more than six times, and eventually succeed.
189+
190+ >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
191+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/",
192+ ... max_retries=10)
193+ sleep(0) called
194+ sleep(1) called
195+ sleep(2) called
196+ sleep(4) called
197+ sleep(8) called
198+ sleep(16) called
199+ sleep(32) called
200+
201+Now the application will fail once and then give a 400 error. The
202+client will not retry in hopes that the 400 error will go away--400 is
203+a client error.
204+
205+ >>> BROKEN_RESPONSES = [502, 400]
206+ >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
207+ Traceback (most recent call last):
208+ ...
209+ HTTPError: HTTP Error 400:
210+ ...
211+
212+Teardown.
213+
214+ >>> wsgi_intercept.remove_wsgi_intercept("api.launchpad.dev", 80)
215+ >>> lazr.restfulclient._browser.sleep = old_sleep
216
217=== modified file 'src/lazr/restfulclient/resource.py'
218--- src/lazr/restfulclient/resource.py 2010-04-22 17:54:16 +0000
219+++ src/lazr/restfulclient/resource.py 2010-04-27 19:59:13 +0000
220@@ -380,7 +380,7 @@
221
222 def __init__(self, authorizer, service_root, cache=None,
223 timeout=None, proxy_info=None, version=None,
224- base_client_name=''):
225+ base_client_name='', max_retries=Browser.MAX_RETRIES):
226 """Root access to a lazr.restful API.
227
228 :param credentials: The credentials used to access the service.
229@@ -401,7 +401,8 @@
230 # Get the WADL definition.
231 self.credentials = authorizer
232 self._browser = Browser(
233- self, authorizer, cache, timeout, proxy_info, self._user_agent)
234+ self, authorizer, cache, timeout, proxy_info, self._user_agent,
235+ max_retries)
236 self._wadl = self._browser.get_wadl_application(self._root_uri)
237
238 # Get the root resource.
239
240=== modified file 'src/lazr/restfulclient/version.txt'
241--- src/lazr/restfulclient/version.txt 2010-04-22 17:57:00 +0000
242+++ src/lazr/restfulclient/version.txt 2010-04-27 19:59:13 +0000
243@@ -1,1 +1,1 @@
244-0.9.15
245+0.9.16

Subscribers

People subscribed via source and target branches