Merge into trunk : retry_on_502 : Code : lazr.restfulclient

Status:

Merged

Approved by:

Brad Crittenden on 2010-04-27

Approved revision:

99

Merged at revision:

not available

Proposed branch:

lp:~leonardr/lazr.restfulclient/retry_on_502

Merge into:

lp:lazr.restfulclient

Diff against target:

245 lines (+168/-5)

5 files modified

src/lazr/restfulclient/NEWS.txt (+7/-0)
src/lazr/restfulclient/_browser.py (+21/-2)
src/lazr/restfulclient/docs/retry.standalone.txt (+136/-0)
src/lazr/restfulclient/resource.py (+3/-2)
src/lazr/restfulclient/version.txt (+1/-1)

To merge this branch:

bzr merge lp:~leonardr/lazr.restfulclient/retry_on_502

High

Fix Released

Link a bug report

Reviewer	Review Type	Date Requested	Status
Brad Crittenden (community)	code	2010-04-27	Approve on 2010-04-27
Review via email: mp+24257@code.launchpad.net

Description of the change

This branch changes lazr.restfulclient to retry a request that results in a 502 or 503 error. As bug 380504 demonstrates, these errors are pervasive in the Launchpad web service, but they generally go away immediately. This branch makes scripts based on launchpadlib robust in the face of server-side weirdness.

Revision history for this message

Brad Crittenden (bac) wrote on 2010-04-27:

#

Download full text (3.9 KiB)

Hi Leonard,

> === modified file 'src/lazr/restfulclient/_browser.py'
> --- src/lazr/restfulclient/_browser.py 2010-04-12 19:18:31 +0000
> +++ src/lazr/restfulclient/_browser.py 2010-04-27 19:59:13 +0000

> @@ -242,6 +246,21 @@
> self._connection = service_root.httpFactory(
> credentials, cache, timeout, proxy_info)
> self.user_agent = user_agent
> + self.max_retries = max_retries
> +
> + def _request_and_retry(self, url, method, body, headers):
> + for retry_count in range(0, self.max_retries+1):
> + response, content = self._connection.request(
> + url, method=method, body=body, headers=headers)
> + if response.status in [502, 503]:
> + # The server returned a 502 or 503. Sleep for 0, 1, 2,
> + # 4, 8, 16, ... seconds and try again.
> + sleep_for = int(2**(retry_count-1))
> + sleep(sleep_for)

Even after the terminal failure you sleep again. I guess it doesn't
matter but due to the exponential wait times you may have someone
waiting for a long time even though you've given up.

> + else:
> + break
> + # Either the request succeeded or we gave up.
> + return response, content
>
> def _request(self, url, data=None, method='GET',
> media_type='application/json', extra_headers=None):
> @@ -261,7 +280,7 @@
> if extra_headers is not None:
> headers.update(extra_headers)
> # Make the request.
> - response, content = self._connection.request(
> + response, content = self._request_and_retry(
> str(url), method=method, body=data, headers=headers)
> if response.status == 304:
> # The resource didn't change.

> === added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
> --- src/lazr/restfulclient/docs/retry.standalone.txt 1970-01-01 00:00:00 +0000
> +++ src/lazr/restfulclient/docs/retry.standalone.txt 2010-04-27 19:59:13 +0000
> @@ -0,0 +1,136 @@
> +Retry requests on server error
> +******************************
> +
> +If lazr.restfulclient talks to a server that sends out a server-side
> +error with status codes 502 or 503, the client will wait a few seconds
> +and try the request again. Eventually it will give up and escalate the
> +error code in the form of an exception.
> +
> +To test this, let's simulate a lazr.restful server prone to transient
> +errors using a WSGI application.
> +
> + >>> import pkg_resources
> + >>> wadl_string = pkg_resources.resource_string(
> + ... 'wadllib.tests.data', 'launchpad-wadl.xml')
> + >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
> + ... 'application/json' : '{}' }
> +
> +This application will cause one request to fail for every item in its
> +BROKEN_RESPONSES list.
> +
> + >>> class BrokenApplication:
> + ... BROKEN_RESPONSES = []
> + ...
> + ... def __init__(self, environ, start_response):
> + ... self.environ = environ
> + ... self.start_response = start_response
> + ...
> + ... def __iter__(self):
> +...

Hi Leonard,

> === modified file 'src/lazr/restfulclient/_browser.py'
> --- src/lazr/restfulclient/_browser.py	2010-04-12 19:18:31 +0000
> +++ src/lazr/restfulclient/_browser.py	2010-04-27 19:59:13 +0000

> @@ -242,6 +246,21 @@
>          self._connection = service_root.httpFactory(
>              credentials, cache, timeout, proxy_info)
>          self.user_agent = user_agent
> +        self.max_retries = max_retries
> +
> +    def _request_and_retry(self, url, method, body, headers):
> +        for retry_count in range(0, self.max_retries+1):
> +            response, content = self._connection.request(
> +                url, method=method, body=body, headers=headers)
> +            if response.status in [502, 503]:
> +                # The server returned a 502 or 503. Sleep for 0, 1, 2,
> +                # 4, 8, 16, ... seconds and try again.
> +                sleep_for = int(2**(retry_count-1))
> +                sleep(sleep_for)

Even after the terminal failure you sleep again.  I guess it doesn't
matter but due to the exponential wait times you may have someone
waiting for a long time even though you've given up.

> +            else:
> +                break
> +        # Either the request succeeded or we gave up.
> +        return response, content
>  
>      def _request(self, url, data=None, method='GET',
>                   media_type='application/json', extra_headers=None):
> @@ -261,7 +280,7 @@
>          if extra_headers is not None:
>              headers.update(extra_headers)
>          # Make the request.
> -        response, content = self._connection.request(
> +        response, content = self._request_and_retry(
>              str(url), method=method, body=data, headers=headers)
>          if response.status == 304:
>              # The resource didn't change.

> === added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
> --- src/lazr/restfulclient/docs/retry.standalone.txt	1970-01-01 00:00:00 +0000
> +++ src/lazr/restfulclient/docs/retry.standalone.txt	2010-04-27 19:59:13 +0000
> @@ -0,0 +1,136 @@
> +Retry requests on server error
> +******************************
> +
> +If lazr.restfulclient talks to a server that sends out a server-side
> +error with status codes 502 or 503, the client will wait a few seconds
> +and try the request again. Eventually it will give up and escalate the
> +error code in the form of an exception.
> +
> +To test this, let's simulate a lazr.restful server prone to transient
> +errors using a WSGI application.
> +
> +    >>> import pkg_resources
> +    >>> wadl_string = pkg_resources.resource_string(
> +    ...     'wadllib.tests.data', 'launchpad-wadl.xml')
> +    >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
> +    ...                     'application/json' : '{}' }
> +
> +This application will cause one request to fail for every item in its
> +BROKEN_RESPONSES list.
> +
> +    >>> class BrokenApplication:
> +    ...     BROKEN_RESPONSES = []
> +    ...
> +    ...     def __init__(self, environ, start_response):
> +    ...         self.environ = environ
> +    ...         self.start_response = start_response
> +    ...
> +    ...     def __iter__(self):
> +    ...         if len(self.BROKEN_RESPONSES) > 0:
> +    ...             start_response(self.BROKEN_RESPONSES.pop(),
> +    ...                            [('Content-type', 'text/plain')])
> +    ...             yield "Sorry, I'm still broken."
> +    ...         else:
> +    ...             media_type = self.environ['HTTP_ACCEPT']
> +    ...             content = representations[media_type]
> +    ...             self.start_response(
> +    ...                 '200', [('Content-type', media_type)])
> +    ...             yield content

On IRC we agreed BrokenApplication may not be required now.

> +    >>> BROKEN_RESPONSES = []
> +    >>> def broken_application(environ, start_response):
> +    ...     if len(BROKEN_RESPONSES) > 0:

review: Approve (code)

lazr.restfulclient

Merge lp:~leonardr/lazr.restfulclient/retry_on_502 into lp:lazr.restfulclient

Commit message

Description of the change

Preview Diff

Subscribers

 === modified file 'src/lazr/restfulclient/NEWS.txt'
 --- src/lazr/restfulclient/NEWS.txt	2010-04-27 12:36:16 +0000
 +++ src/lazr/restfulclient/NEWS.txt	2010-04-27 19:59:13 +0000
@@ -2,6 +2,13 @@
  NEWS for lazr.restfulclient
  ===========================
++0.9.16 (2010-04-27)
++===================
++
++ - If a server returns a 502 or 503 error code, lazr.restfulclient
++   will retry its request a configurable number of times in hopes that
++   the error is transient.
++
 .9.15 (2010-04-27)
  ====================
 === modified file 'src/lazr/restfulclient/_browser.py'
 --- src/lazr/restfulclient/_browser.py	2010-04-12 19:18:31 +0000
 +++ src/lazr/restfulclient/_browser.py	2010-04-27 19:59:13 +0000
@@ -34,6 +34,9 @@
  import gzip
  import shutil
  import tempfile
++# Import sleep directly into the module so we can monkey-patch it
++# during a test.
++from time import sleep
  from httplib2 import (
      FailedToDecompressContent, FileCache, Http, urlnorm)
  import simplejson
@@ -225,9 +228,10 @@
      """A class for making calls to lazr.restful web services."""
      NOT_MODIFIED = object()
++    MAX_RETRIES = 6
      def __init__(self, service_root, credentials, cache=None, timeout=None,
--                 proxy_info=None, user_agent=None):
++                 proxy_info=None, user_agent=None, max_retries=MAX_RETRIES):
          """Initialize, possibly creating a cache.
          If no cache is provided, a temporary directory will be used as
@@ -242,6 +246,21 @@
          self._connection = service_root.httpFactory(
              credentials, cache, timeout, proxy_info)
          self.user_agent = user_agent
++        self.max_retries = max_retries
++
++    def _request_and_retry(self, url, method, body, headers):
++        for retry_count in range(0, self.max_retries+1):
++            response, content = self._connection.request(
++                url, method=method, body=body, headers=headers)
++            if response.status in [502, 503]:
++                # The server returned a 502 or 503. Sleep for 0, 1, 2,
++                # 4, 8, 16, ... seconds and try again.
++                sleep_for = int(2**(retry_count-1))
++                sleep(sleep_for)
++            else:
++                break
++        # Either the request succeeded or we gave up.
++        return response, content
      def _request(self, url, data=None, method='GET',
                   media_type='application/json', extra_headers=None):
@@ -261,7 +280,7 @@
          if extra_headers is not None:
              headers.update(extra_headers)
          # Make the request.
--        response, content = self._connection.request(
++        response, content = self._request_and_retry(
              str(url), method=method, body=data, headers=headers)
          if response.status == 304:
              # The resource didn't change.
 === added file 'src/lazr/restfulclient/docs/retry.standalone.txt'
 --- src/lazr/restfulclient/docs/retry.standalone.txt	1970-01-01 00:00:00 +0000
 +++ src/lazr/restfulclient/docs/retry.standalone.txt	2010-04-27 19:59:13 +0000
@@ -0,0 +1,136 @@
++Retry requests on server error
++******************************
++
++If lazr.restfulclient talks to a server that sends out a server-side
++error with status codes 502 or 503, the client will wait a few seconds
++and try the request again. Eventually it will give up and escalate the
++error code in the form of an exception.
++
++To test this, let's simulate a lazr.restful server prone to transient
++errors using a WSGI application.
++
++    >>> import pkg_resources
++    >>> wadl_string = pkg_resources.resource_string(
++    ...     'wadllib.tests.data', 'launchpad-wadl.xml')
++    >>> representations = { 'application/vnd.sun.wadl+xml' : wadl_string,
++    ...                     'application/json' : '{}' }
++
++This application will cause one request to fail for every item in its
++BROKEN_RESPONSES list.
++
++    >>> class BrokenApplication:
++    ...     BROKEN_RESPONSES = []
++    ...
++    ...     def __init__(self, environ, start_response):
++    ...         self.environ = environ
++    ...         self.start_response = start_response
++    ...
++    ...     def __iter__(self):
++    ...         if len(self.BROKEN_RESPONSES) > 0:
++    ...             start_response(self.BROKEN_RESPONSES.pop(),
++    ...                            [('Content-type', 'text/plain')])
++    ...             yield "Sorry, I'm still broken."
++    ...         else:
++    ...             media_type = self.environ['HTTP_ACCEPT']
++    ...             content = representations[media_type]
++    ...             self.start_response(
++    ...                 '200', [('Content-type', media_type)])
++    ...             yield content
++
++    >>> BROKEN_RESPONSES = []
++    >>> def broken_application(environ, start_response):
++    ...     if len(BROKEN_RESPONSES) > 0:
++    ...         start_response(str(BROKEN_RESPONSES.pop()),
++    ...                        [('Content-type', 'text/plain')])
++    ...         return ["Sorry, I'm still broken."]
++    ...     else:
++    ...         media_type = environ['HTTP_ACCEPT']
++    ...         content = representations[media_type]
++    ...         start_response(
++    ...             '200', [('Content-type', media_type)])
++    ...         return [content]
++
++    >>> def make_broken_application():
++    ...     return broken_application
++
++    >>> import wsgi_intercept
++    >>> wsgi_intercept.add_wsgi_intercept(
++    ...     'api.launchpad.dev', 80, make_broken_application)
++    >>> BROKEN_RESPONSES = []
++
++    >>> from wsgi_intercept.httplib2_intercept import install
++    >>> install()
++
++Here's a fake implementation of time.sleep() so that this test doesn't
++take a really long time to run, and so we can visualize sleep() being
++called as lazr.restfulclient retries over and over again.
++
++    >>> def fake_sleep(time):
++    ...     print "sleep(%s) called" % time
++    >>> import lazr.restfulclient._browser
++    >>> old_sleep = lazr.restfulclient._browser.sleep
++    >>> lazr.restfulclient._browser.sleep = fake_sleep
++
++As it starts out, the application isn't broken at all.
++
++    >>> from lazr.restfulclient.resource import ServiceRoot
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
++
++Let's queue up one broken response. The client will sleep once and
++try again.
++
++    >>> BROKEN_RESPONSES = [502]
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
++    sleep(0) called
++
++Now the application will fail six times and then start working.
++
++    >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503]
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
++    sleep(0) called
++    sleep(1) called
++    sleep(2) called
++    sleep(4) called
++    sleep(8) called
++    sleep(16) called
++
++Now the application will fail seven times and then start working. But
++the client will give up before then--it will only retry the request
++six times.
++
++    >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
++    Traceback (most recent call last):
++    ...
++    HTTPError: HTTP Error 502:
++    ...
++
++By increasing the 'max_retries' constructor argument, we can make the
++application try more than six times, and eventually succeed.
++
++    >>> BROKEN_RESPONSES = [502, 503, 502, 503, 502, 503, 502]
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/",
++    ...                      max_retries=10)
++    sleep(0) called
++    sleep(1) called
++    sleep(2) called
++    sleep(4) called
++    sleep(8) called
++    sleep(16) called
++    sleep(32) called
++
++Now the application will fail once and then give a 400 error. The
++client will not retry in hopes that the 400 error will go away--400 is
++a client error.
++
++    >>> BROKEN_RESPONSES = [502, 400]
++    >>> client = ServiceRoot(None, "http://api.launchpad.dev/")
++    Traceback (most recent call last):
++    ...
++    HTTPError: HTTP Error 400:
++    ...
++
++Teardown.
++
++    >>> wsgi_intercept.remove_wsgi_intercept("api.launchpad.dev", 80)
++    >>> lazr.restfulclient._browser.sleep = old_sleep
 === modified file 'src/lazr/restfulclient/resource.py'
 --- src/lazr/restfulclient/resource.py	2010-04-22 17:54:16 +0000
 +++ src/lazr/restfulclient/resource.py	2010-04-27 19:59:13 +0000
@@ -380,7 +380,7 @@
      def __init__(self, authorizer, service_root, cache=None,
                   timeout=None, proxy_info=None, version=None,
--                 base_client_name=''):
++                 base_client_name='', max_retries=Browser.MAX_RETRIES):
          """Root access to a lazr.restful API.
          :param credentials: The credentials used to access the service.
@@ -401,7 +401,8 @@
          # Get the WADL definition.
          self.credentials = authorizer
          self._browser = Browser(
--            self, authorizer, cache, timeout, proxy_info, self._user_agent)
++            self, authorizer, cache, timeout, proxy_info, self._user_agent,
++            max_retries)
          self._wadl = self._browser.get_wadl_application(self._root_uri)
          # Get the root resource.
 === modified file 'src/lazr/restfulclient/version.txt'
 --- src/lazr/restfulclient/version.txt	2010-04-22 17:57:00 +0000
 +++ src/lazr/restfulclient/version.txt	2010-04-27 19:59:13 +0000
@@ -1,1 +1,1 @@
--0.9.15
++0.9.16