Merge lp:~leonardr/lazr.restfulclient/retry_on_502 into lp:lazr.restfulclient

Proposed by Leonard Richardson
Status: Merged
Approved by: Brad Crittenden
Approved revision: 99
Merged at revision: not available
Proposed branch: lp:~leonardr/lazr.restfulclient/retry_on_502
Merge into: lp:lazr.restfulclient
Diff against target: 245 lines (+168/-5)
5 files modified
src/lazr/restfulclient/NEWS.txt (+7/-0)
src/lazr/restfulclient/_browser.py (+21/-2)
src/lazr/restfulclient/docs/retry.standalone.txt (+136/-0)
src/lazr/restfulclient/resource.py (+3/-2)
src/lazr/restfulclient/version.txt (+1/-1)
To merge this branch: bzr merge lp:~leonardr/lazr.restfulclient/retry_on_502
Reviewer Review Type Date Requested Status
Brad Crittenden (community) code Approve
Review via email: mp+24257@code.launchpad.net

Description of the change

This branch changes lazr.restfulclient to retry a request that results in a 502 or 503 error. As bug 380504 demonstrates, these errors are pervasive in the Launchpad web service, but they generally go away immediately. This branch makes scripts based on launchpadlib robust in the face of server-side weirdness.

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :
Download full text (3.9 KiB)

Hi Leonard,

> === modified file 'src/lazr/restfulclient/_browser.py'
> --- src/lazr/restfulclient/_browser.py 2010-04-12 19:18:31 +0000
> +++ src/lazr/restfulclient/_browser.py 2010-04-27 19:59:13 +0000

> @@ -242,6 +246,21 @@
> self._connection = service_root.httpFactory(
> credentials, cache, timeout, proxy_info)
> self.user_agent = user_agent
> + self.max_retries = max_retries
> +
> + def _request_and_retry(self, url, method, body, headers):
> + for retry_count in range(0, self.max_retries+1):
> + response, content = self._connection.request(
> + url, method=method, body=body, headers=headers)
> + if response.status in [502, 503]:
> + # The server returned a 502 or 503. Sleep for 0, 1, 2,
> + # 4, 8, 16, ... seconds and try again.
> + sleep_for = int(2**(retry_count-1))
> + sleep(sleep_for)

Even after the terminal failure you sleep again. I guess it doesn't
matter but due to the exponential wait times you may have someone
waiting for a long time even though you've given up.

> + else:
> + break
> + # Either the request succeeded or we gave up.
> + return response, content
>
> def _request(self, url, data=None, method='GET',
> media_type='application/json', extra_headers=None):
> @@ -261,7 +280,7 @@
> if extra_headers is not None:
> headers.update(extra_headers)
> # Make the request.
> - response, content = self._connection.request(
> + response, content = self._request_and_retry(
> str(url), method=method, body=data, headers=headers)
> if response.status == 304:
> # The resource didn't change.

> === added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
> --- src/lazr/restfulclient/docs/retry.standalone.txt 1970-01-01 00:00:00 +0000
> +++ src/lazr/restfulclient/docs/retry.standalone.txt 2010-04-27 19:59:13 +0000
> @@ -0,0 +1,136 @@
> +Retry requests on server error
> +******************************
> +
> +If lazr.restfulclient talks to a server that sends out a server-side
> +error with status codes 502 or 503, the client will wait a few seconds
> +and try the request again. Eventually it will give up and escalate the
> +error code in the form of an exception.
> +
> +To test this, let's simulate a lazr.restful server prone to transient
> +errors using a WSGI application.
> +
> + >>> import pkg_resources
> + >>> wadl_string = pkg_resources.resource_string(
> + ... 'wadllib.tests.data', 'launchpad-wadl.xml')
> + >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
> + ... 'application/json' : '{}' }
> +
> +This application will cause one request to fail for every item in its
> +BROKEN_RESPONSES list.
> +
> + >>> class BrokenApplication:
> + ... BROKEN_RESPONSES = []
> + ...
> + ... def __init__(self, environ, start_response):
> + ... self.environ = environ
> + ... self.start_response = start_response
> + ...
> + ... def __iter__(self):
> +...

Read more...

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/lazr/restfulclient/NEWS.txt'
--- src/lazr/restfulclient/NEWS.txt 2010-04-27 12:36:16 +0000
+++ src/lazr/restfulclient/NEWS.txt 2010-04-27 19:59:13 +0000
@@ -2,6 +2,13 @@
2NEWS for lazr.restfulclient2NEWS for lazr.restfulclient
3===========================3===========================
44
50.9.16 (2010-04-27)
6===================
7
8 - If a server returns a 502 or 503 error code, lazr.restfulclient
9 will retry its request a configurable number of times in hopes that
10 the error is transient.
11
50.9.15 (2010-04-27)120.9.15 (2010-04-27)
6====================13====================
714
815
=== modified file 'src/lazr/restfulclient/_browser.py'
--- src/lazr/restfulclient/_browser.py 2010-04-12 19:18:31 +0000
+++ src/lazr/restfulclient/_browser.py 2010-04-27 19:59:13 +0000
@@ -34,6 +34,9 @@
34import gzip34import gzip
35import shutil35import shutil
36import tempfile36import tempfile
37# Import sleep directly into the module so we can monkey-patch it
38# during a test.
39from time import sleep
37from httplib2 import (40from httplib2 import (
38 FailedToDecompressContent, FileCache, Http, urlnorm)41 FailedToDecompressContent, FileCache, Http, urlnorm)
39import simplejson42import simplejson
@@ -225,9 +228,10 @@
225 """A class for making calls to lazr.restful web services."""228 """A class for making calls to lazr.restful web services."""
226229
227 NOT_MODIFIED = object()230 NOT_MODIFIED = object()
231 MAX_RETRIES = 6
228232
229 def __init__(self, service_root, credentials, cache=None, timeout=None,233 def __init__(self, service_root, credentials, cache=None, timeout=None,
230 proxy_info=None, user_agent=None):234 proxy_info=None, user_agent=None, max_retries=MAX_RETRIES):
231 """Initialize, possibly creating a cache.235 """Initialize, possibly creating a cache.
232236
233 If no cache is provided, a temporary directory will be used as237 If no cache is provided, a temporary directory will be used as
@@ -242,6 +246,21 @@
242 self._connection = service_root.httpFactory(246 self._connection = service_root.httpFactory(
243 credentials, cache, timeout, proxy_info)247 credentials, cache, timeout, proxy_info)
244 self.user_agent = user_agent248 self.user_agent = user_agent
249 self.max_retries = max_retries
250
251 def _request_and_retry(self, url, method, body, headers):
252 for retry_count in range(0, self.max_retries+1):
253 response, content = self._connection.request(
254 url, method=method, body=body, headers=headers)
255 if response.status in [502, 503]:
256 # The server returned a 502 or 503. Sleep for 0, 1, 2,
257 # 4, 8, 16, ... seconds and try again.
258 sleep_for = int(2**(retry_count-1))
259 sleep(sleep_for)
260 else:
261 break
262 # Either the request succeeded or we gave up.
263 return response, content
245264
246 def _request(self, url, data=None, method='GET',265 def _request(self, url, data=None, method='GET',
247 media_type='application/json', extra_headers=None):266 media_type='application/json', extra_headers=None):
@@ -261,7 +280,7 @@
261 if extra_headers is not None:280 if extra_headers is not None:
262 headers.update(extra_headers)281 headers.update(extra_headers)
263 # Make the request.282 # Make the request.
264 response, content = self._connection.request(283 response, content = self._request_and_retry(
265 str(url), method=method, body=data, headers=headers)284 str(url), method=method, body=data, headers=headers)
266 if response.status == 304:285 if response.status == 304:
267 # The resource didn't change.286 # The resource didn't change.
268287
=== added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
--- src/lazr/restfulclient/docs/retry.standalone.txt 1970-01-01 00:00:00 +0000
+++ src/lazr/restfulclient/docs/retry.standalone.txt 2010-04-27 19:59:13 +0000
@@ -0,0 +1,136 @@
1Retry requests on server error
2******************************
3
4If lazr.restfulclient talks to a server that sends out a server-side
5error with status codes 502 or 503, the client will wait a few seconds
6and try the request again. Eventually it will give up and escalate the
7error code in the form of an exception.
8
9To test this, let's simulate a lazr.restful server prone to transient
10errors using a WSGI application.
11
12 >>> import pkg_resources
13 >>> wadl_string = pkg_resources.resource_string(
14 ... 'wadllib.tests.data', 'launchpad-wadl.xml')
15 >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
16 ... 'application/json' : '{}' }
17
18This application will cause one request to fail for every item in its
19BROKEN_RESPONSES list.
20
21 >>> class BrokenApplication:
22 ... BROKEN_RESPONSES = []
23 ...
24 ... def __init__(self, environ, start_response):
25 ... self.environ = environ
26 ... self.start_response = start_response
27 ...
28 ... def __iter__(self):
29 ... if len(self.BROKEN_RESPONSES) > 0:
30 ... start_response(self.BROKEN_RESPONSES.pop(),
31 ... [('Content-type', 'text/plain')])
32 ... yield "Sorry, I'm still broken."
33 ... else:
34 ... media_type = self.environ['HTTP_ACCEPT']
35 ... content = representations[media_type]
36 ... self.start_response(
37 ... '200', [('Content-type', media_type)])
38 ... yield content
39
40 >>> BROKEN_RESPONSES = []
41 >>> def broken_application(environ, start_response):
42 ... if len(BROKEN_RESPONSES) > 0:
43 ... start_response(str(BROKEN_RESPONSES.pop()),
44 ... [('Content-type', 'text/plain')])
45 ... return ["Sorry, I'm still broken."]
46 ... else:
47 ... media_type = environ['HTTP_ACCEPT']
48 ... content = representations[media_type]
49 ... start_response(
50 ... '200', [('Content-type', media_type)])
51 ... return [content]
52
53 >>> def make_broken_application():
54 ... return broken_application
55
56 >>> import wsgi_intercept
57 >>> wsgi_intercept.add_wsgi_intercept(
58 ... 'api.launchpad.dev', 80, make_broken_application)
59 >>> BROKEN_RESPONSES = []
60
61 >>> from wsgi_intercept.httplib2_intercept import install
62 >>> install()
63
64Here's a fake implementation of time.sleep() so that this test doesn't
65take a really long time to run, and so we can visualize sleep() being
66called as lazr.restfulclient retries over and over again.
67
68 >>> def fake_sleep(time):
69 ... print "sleep(%s) called" % time
70 >>> import lazr.restfulclient._browser
71 >>> old_sleep = lazr.restfulclient._browser.sleep
72 >>> lazr.restfulclient._browser.sleep = fake_sleep
73
74As it starts out, the application isn't broken at all.
75
76 >>> from lazr.restfulclient.resource import ServiceRoot
77 >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
78
79Let's queue up one broken response. The client will sleep once and
80try again.
81
82 >>> BROKEN_RESPONSES = [502]
83 >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
84 sleep(0) called
85
86Now the application will fail six times and then start working.
87
88 >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503]
89 >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
90 sleep(0) called
91 sleep(1) called
92 sleep(2) called
93 sleep(4) called
94 sleep(8) called
95 sleep(16) called
96
97Now the application will fail seven times and then start working. But
98the client will give up before then--it will only retry the request
99six times.
100
101 >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
102 >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
103 Traceback (most recent call last):
104 ...
105 HTTPError: HTTP Error 502:
106 ...
107
108By increasing the 'max_retries' constructor argument, we can make the
109application try more than six times, and eventually succeed.
110
111 >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
112 >>> client = ServiceRoot(None, "http://api.launchpad.dev/",
113 ... max_retries=10)
114 sleep(0) called
115 sleep(1) called
116 sleep(2) called
117 sleep(4) called
118 sleep(8) called
119 sleep(16) called
120 sleep(32) called
121
122Now the application will fail once and then give a 400 error. The
123client will not retry in hopes that the 400 error will go away--400 is
124a client error.
125
126 >>> BROKEN_RESPONSES = [502, 400]
127 >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
128 Traceback (most recent call last):
129 ...
130 HTTPError: HTTP Error 400:
131 ...
132
133Teardown.
134
135 >>> wsgi_intercept.remove_wsgi_intercept("api.launchpad.dev", 80)
136 >>> lazr.restfulclient._browser.sleep = old_sleep
0137
=== modified file 'src/lazr/restfulclient/resource.py'
--- src/lazr/restfulclient/resource.py 2010-04-22 17:54:16 +0000
+++ src/lazr/restfulclient/resource.py 2010-04-27 19:59:13 +0000
@@ -380,7 +380,7 @@
380380
381 def __init__(self, authorizer, service_root, cache=None,381 def __init__(self, authorizer, service_root, cache=None,
382 timeout=None, proxy_info=None, version=None,382 timeout=None, proxy_info=None, version=None,
383 base_client_name=''):383 base_client_name='', max_retries=Browser.MAX_RETRIES):
384 """Root access to a lazr.restful API.384 """Root access to a lazr.restful API.
385385
386 :param credentials: The credentials used to access the service.386 :param credentials: The credentials used to access the service.
@@ -401,7 +401,8 @@
401 # Get the WADL definition.401 # Get the WADL definition.
402 self.credentials = authorizer402 self.credentials = authorizer
403 self._browser = Browser(403 self._browser = Browser(
404 self, authorizer, cache, timeout, proxy_info, self._user_agent)404 self, authorizer, cache, timeout, proxy_info, self._user_agent,
405 max_retries)
405 self._wadl = self._browser.get_wadl_application(self._root_uri)406 self._wadl = self._browser.get_wadl_application(self._root_uri)
406407
407 # Get the root resource.408 # Get the root resource.
408409
=== modified file 'src/lazr/restfulclient/version.txt'
--- src/lazr/restfulclient/version.txt 2010-04-22 17:57:00 +0000
+++ src/lazr/restfulclient/version.txt 2010-04-27 19:59:13 +0000
@@ -1,1 +1,1 @@
10.9.1510.9.16

Subscribers

People subscribed via source and target branches