From 45146b6c45d306cb0312fc97509b5eca4c075c95 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Wed, 3 Mar 2021 14:54:47 +0000 Subject: [PATCH 1/4] Add ObjectRetrier to perform retries on openstack client calls This adds a wrapper class that detects if a callable object in any of the descendent objects raises an Exception. If so, then it retries that exception. This is to attempt to make the zaza tests a little more robust in the face of small network failures or strange restarts. This is a test, and robust logging a reporting should be used to determine whether it is covering up actual bugs rather than CI system issues. Related Bug: (zot repo)#348 --- unit_tests/utilities/test_utilities.py | 166 +++++++++++++++++++++++++ zaza/openstack/utilities/__init__.py | 128 +++++++++++++++++++ 2 files changed, 294 insertions(+) create mode 100644 unit_tests/utilities/test_utilities.py diff --git a/unit_tests/utilities/test_utilities.py b/unit_tests/utilities/test_utilities.py new file mode 100644 index 0000000..e961de1 --- /dev/null +++ b/unit_tests/utilities/test_utilities.py @@ -0,0 +1,166 @@ +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +import unit_tests.utils as ut_utils + +import zaza.openstack.utilities as utilities + + +class SomeException(Exception): + pass + + +class SomeException2(Exception): + pass + + +class SomeException3(Exception): + pass + + +class TestObjectRetrier(ut_utils.BaseTestCase): + + def test_object_wrap(self): + + class A: + + def func(self, a, b=1): + return a + b + + a = A() + wrapped_a = utilities.ObjectRetrier(a) + self.assertEqual(wrapped_a.func(3), 4) + + def test_object_multilevel_wrap(self): + + class A: + + def f1(self, a, b): + return a * b + + class B: + + @property + def f2(self): + + return A() + + b = B() + wrapped_b = utilities.ObjectRetrier(b) + self.assertEqual(wrapped_b.f2.f1(5, 6), 30) + + def test_object_wrap_number(self): + + class A: + + class_a = 5 + + def __init__(self): + self.instance_a = 10 + + def f1(self, a, b): + return a * b + + a = A() + wrapped_a = utilities.ObjectRetrier(a) + self.assertEqual(wrapped_a.class_a, 5) + self.assertEqual(wrapped_a.instance_a, 10) + + @mock.patch("time.sleep") + def test_object_wrap_exception(self, mock_sleep): + + class A: + + def func(self): + raise SomeException() + + a = A() + # retry on a specific exception + wrapped_a = utilities.ObjectRetrier(a, num_retries=1, + retry_exceptions=[SomeException]) + with self.assertRaises(SomeException): + wrapped_a.func() + + mock_sleep.assert_called_once_with(5) + + # also retry on any exception if none specified + wrapped_a = utilities.ObjectRetrier(a, num_retries=1) + mock_sleep.reset_mock() + with self.assertRaises(SomeException): + wrapped_a.func() + + mock_sleep.assert_called_once_with(5) + + # no retry if exception isn't listed. + wrapped_a = utilities.ObjectRetrier(a, num_retries=1, + retry_exceptions=[SomeException2]) + mock_sleep.reset_mock() + with self.assertRaises(SomeException): + wrapped_a.func() + + mock_sleep.assert_not_called() + + @mock.patch("time.sleep") + def test_log_called(self, mock_sleep): + + class A: + + def func(self): + raise SomeException() + + a = A() + mock_log = mock.Mock() + wrapped_a = utilities.ObjectRetrier(a, num_retries=1, log=mock_log) + with self.assertRaises(SomeException): + wrapped_a.func() + + # there should be two calls; one for the single retry and one for the + # failure. + self.assertEqual(mock_log.call_count, 2) + + @mock.patch("time.sleep") + def test_back_off_maximum(self, mock_sleep): + + class A: + + def func(self): + raise SomeException() + + a = A() + wrapped_a = utilities.ObjectRetrier(a, num_retries=3, backoff=2) + with self.assertRaises(SomeException): + wrapped_a.func() + # Note third call hits maximum wait time of 15. + mock_sleep.assert_has_calls([mock.call(5), + mock.call(10), + mock.call(15)]) + + @mock.patch("time.sleep") + def test_total_wait(self, mock_sleep): + + class A: + + def func(self): + raise SomeException() + + a = A() + wrapped_a = utilities.ObjectRetrier(a, num_retries=3, total_wait=9) + with self.assertRaises(SomeException): + wrapped_a.func() + # Note only two calls, as total wait is 9, so a 3rd retry would exceed + # that. + mock_sleep.assert_has_calls([mock.call(5), + mock.call(5)]) diff --git a/zaza/openstack/utilities/__init__.py b/zaza/openstack/utilities/__init__.py index 35b5a14..5011689 100644 --- a/zaza/openstack/utilities/__init__.py +++ b/zaza/openstack/utilities/__init__.py @@ -13,3 +13,131 @@ # limitations under the License. """Collection of utilities to support zaza tests etc.""" + + +import time + + +class ObjectRetrier(object): + """An automatic retrier for an object. + + This is designed to be used with an instance of an object. Basically, it + wraps the object and any attributes that are fetched. Essentially, it is + used to provide retries on method calls on openstack client objects in + tests to increase robustness of tests. + + Although, technically this is bad, retries can be logged with the optional + log method. + + Usage: + + # get a client that does 3 retries, waits 5 seconds between retries and + # retries on any error. + some_client = ObjectRetrier(get_some_client) + # this gets retried up to 3 times. + things = some_client.list_things() + + Note, it is quite simple. It wraps the object and on a getattr(obj, name) + it finds the name and then returns a wrapped version of that name. On a + call, it returns the value of that call. It only wraps objects in the + chain that are either callable or have a __getattr__() method. i.e. one + that can then be retried or further fetched. This means that if a.b.c() is + a chain of objects, and we just wrap 'a', then 'b' and 'c' will both be + wrapped that the 'c' object __call__() method will be the one that is + actually retried. + + Note: this means that properties that do method calls won't be retried. + This is a limitation that may be addressed in the future, if it is needed. + """ + + def __init__(self, obj, num_retries=3, initial_interval=5.0, backoff=1.0, + max_interval=15.0, total_wait=30.0, retry_exceptions=None, + log=None): + """Initialise the retrier object. + + :param obj: The object to wrap. Ought to be an instance of something + that you want to get methods on to call or be called itself. + :type obj: Any + :param num_retries: The (maximum) number of retries. May not be hit if + the total_wait time is exceeded. + :type num_retries: int + :param initial_interval: The initial or starting interval between + retries. + :type initial_interval: float + :param backoff: The exponential backoff multiple. 1 is linear. + :type backoff: float + :param max_interval: The maximum interval between retries. + If backoff is >1 then the initial_interval will never grow larger + than max_interval. + :type max_interval: float + :param retry_exceptions: The list of exceptions to retry on, or None. + If a list, then it will only retry if the exception is one of the + ones in the list. + :type retry_exceptions: List[Exception] + """ + # Note we use semi-private variable names that shouldn't clash with any + # on the actual object. + self.__obj = obj + self.__kwargs = { + 'num_retries': num_retries, + 'initial_interval': initial_interval, + 'backoff': backoff, + 'max_interval': max_interval, + 'total_wait': total_wait, + 'retry_exceptions': retry_exceptions, + 'log': log or (lambda x: None), + } + + def __getattr__(self, name): + """Get attribute; delegates to wrapped object.""" + # Note the above may generate an attribute error; we expect this and + # will fail with an attribute error. + attr = getattr(self.__obj, name) + if callable(attr) or hasattr(attr, "__getattr__"): + return ObjectRetrier(attr, **self.__kwargs) + else: + return attr + # TODO(ajkavanagh): Note detecting a property is a bit trickier. we + # can do isinstance(attr, property), but then the act of accessing it + # is what calls it. i.e. it would fail at the getattr(self.__obj, + # name) stage. The solution is to check first, and if it's a property, + # then treat it like the retrier. However, I think this is too + # complex for the first go, and to use manual retries in that instance. + + def __call__(self, *args, **kwargs): + """Call the object; delegates to the wrapped object.""" + obj = self.__obj + retry = 0 + wait = self.__kwargs['initial_interval'] + max_interval = self.__kwargs['max_interval'] + log = self.__kwargs['log'] + backoff = self.__kwargs['backoff'] + total_wait = self.__kwargs['total_wait'] + num_retries = self.__kwargs['num_retries'] + retry_exceptions = self.__kwargs['retry_exceptions'] + wait_so_far = 0 + while True: + try: + return obj(*args, **kwargs) + except Exception as e: + # if retry_exceptions is None, or the type of the exception is + # not in the list of retries, then raise an exception + # immediately. This means that if retry_exceptions is None, + # then the method is always retried. + if (retry_exceptions is not None and + type(e) not in retry_exceptions): + raise + retry += 1 + if retry > num_retries: + log("{}: exceeded number of retries, so erroring out" + .format(str(obj))) + raise e + log("{}: call failed: retrying in {} seconds" + .format(str(obj), wait)) + time.sleep(wait) + wait_so_far += wait + if wait_so_far >= total_wait: + raise e + wait = wait * backoff + if wait > max_interval: + wait = max_interval From ed8528b76a842d5729ed6f5700d4cd4270de1c8a Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Wed, 3 Mar 2021 14:58:27 +0000 Subject: [PATCH 2/4] Add object retrier to the LB tests for octavia --- zaza/openstack/charm_tests/octavia/tests.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/zaza/openstack/charm_tests/octavia/tests.py b/zaza/openstack/charm_tests/octavia/tests.py index 26941a2..d6bae64 100644 --- a/zaza/openstack/charm_tests/octavia/tests.py +++ b/zaza/openstack/charm_tests/octavia/tests.py @@ -25,6 +25,8 @@ import osc_lib.exceptions import zaza.openstack.charm_tests.test_utils as test_utils import zaza.openstack.utilities.openstack as openstack_utils +from zaza.openstack import ObjectRetrier + class CharmOperationTest(test_utils.OpenStackBaseTest): """Charm operation tests.""" @@ -63,12 +65,12 @@ class LBAASv2Test(test_utils.OpenStackBaseTest): def setUpClass(cls): """Run class setup for running LBaaSv2 service tests.""" super(LBAASv2Test, cls).setUpClass() - cls.keystone_client = openstack_utils.get_keystone_session_client( - cls.keystone_session) - cls.neutron_client = openstack_utils.get_neutron_session_client( - cls.keystone_session) - cls.octavia_client = openstack_utils.get_octavia_session_client( - cls.keystone_session) + cls.keystone_client = ObjectRetrier( + openstack_utils.get_keystone_session_client(cls.keystone_session)) + cls.neutron_client = ObjectRetrier( + openstack_utils.get_neutron_session_client(cls.keystone_session)) + cls.octavia_client = ObjectRetrier( + openstack_utils.get_octavia_session_client(cls.keystone_session)) cls.RESOURCE_PREFIX = 'zaza-octavia' # NOTE(fnordahl): in the event of a test failure we do not want to run From c041042fe220fa5de33f8006734c66acb8255f11 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Mon, 8 Mar 2021 10:50:41 +0000 Subject: [PATCH 3/4] Add function to just retry on keystone ConnectFailure The main failure that seems to occur with clients is the ConnectFailure, which according to the docs, is retry-able. Thus provide a function that adds that exception condition automatically. Also fix the import problem in octavia test for the object retrier that is being used to validate the ObjectRetrier feature. --- unit_tests/utilities/test_utilities.py | 20 ++++++++++++++++++ zaza/openstack/charm_tests/octavia/tests.py | 2 +- zaza/openstack/utilities/__init__.py | 23 +++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/unit_tests/utilities/test_utilities.py b/unit_tests/utilities/test_utilities.py index e961de1..a78600c 100644 --- a/unit_tests/utilities/test_utilities.py +++ b/unit_tests/utilities/test_utilities.py @@ -164,3 +164,23 @@ class TestObjectRetrier(ut_utils.BaseTestCase): # that. mock_sleep.assert_has_calls([mock.call(5), mock.call(5)]) + + @mock.patch("time.sleep") + def test_retry_on_connect_failure(self, mock_sleep): + + class A: + + def func1(self): + raise SomeException() + + def func2(self): + raise utilities.ConnectFailure() + + a = A() + wrapped_a = utilities.retry_on_connect_failure(a, num_retries=2) + with self.assertRaises(SomeException): + wrapped_a.func1() + mock_sleep.assert_not_called() + with self.assertRaises(utilities.ConnectFailure): + wrapped_a.func2() + mock_sleep.assert_has_calls([mock.call(5)]) diff --git a/zaza/openstack/charm_tests/octavia/tests.py b/zaza/openstack/charm_tests/octavia/tests.py index d6bae64..9a71b49 100644 --- a/zaza/openstack/charm_tests/octavia/tests.py +++ b/zaza/openstack/charm_tests/octavia/tests.py @@ -25,7 +25,7 @@ import osc_lib.exceptions import zaza.openstack.charm_tests.test_utils as test_utils import zaza.openstack.utilities.openstack as openstack_utils -from zaza.openstack import ObjectRetrier +from zaza.openstack.utilities import ObjectRetrier class CharmOperationTest(test_utils.OpenStackBaseTest): diff --git a/zaza/openstack/utilities/__init__.py b/zaza/openstack/utilities/__init__.py index 5011689..6db3ce3 100644 --- a/zaza/openstack/utilities/__init__.py +++ b/zaza/openstack/utilities/__init__.py @@ -17,6 +17,8 @@ import time +from keystoneauth1.exceptions.connection import ConnectFailure + class ObjectRetrier(object): """An automatic retrier for an object. @@ -141,3 +143,24 @@ class ObjectRetrier(object): wait = wait * backoff if wait > max_interval: wait = max_interval + + +def retry_on_connect_failure(client, **kwargs): + """Retry an object that eventually gets resolved to a call. + + Specifically, this uses ObjectRetrier but only against the + keystoneauth1.exceptions.connection.ConnectFailure exeception. + + :params client: the object that may throw and exception when called. + :type client: Any + :params **kwargs: the arguments supplied to the ObjectRetrier init method + :type **kwargs: Dict[Any] + :returns: client wrapped in an ObjectRetrier instance + :rtype: ObjectRetrier[client] + """ + kwcopy = kwargs.copy() + if 'retry_exceptions' not in kwcopy: + kwcopy['retry_exceptions'] = [] + if ConnectFailure not in kwcopy['retry_exceptions']: + kwcopy['retry_exceptions'].append(ConnectFailure) + return ObjectRetrier(client, **kwcopy) From e9f11da5df5c536eb953f4e4d6f99a68fd840079 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Thu, 25 Mar 2021 14:47:42 +0000 Subject: [PATCH 4/4] Fix typo and rename class for better understanding Fixes a typo (None -> not None) and renames the class from ObjectRetrier -> ObjectRetrierWraps to make it clearer that the class instantiation is to wrap and object with the retrier code rather than do retries at that moment. --- unit_tests/utilities/test_utilities.py | 26 +++++++++++---------- zaza/openstack/charm_tests/octavia/tests.py | 8 +++---- zaza/openstack/utilities/__init__.py | 21 +++++++++-------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/unit_tests/utilities/test_utilities.py b/unit_tests/utilities/test_utilities.py index a78600c..6af6089 100644 --- a/unit_tests/utilities/test_utilities.py +++ b/unit_tests/utilities/test_utilities.py @@ -31,7 +31,7 @@ class SomeException3(Exception): pass -class TestObjectRetrier(ut_utils.BaseTestCase): +class TestObjectRetrierWraps(ut_utils.BaseTestCase): def test_object_wrap(self): @@ -41,7 +41,7 @@ class TestObjectRetrier(ut_utils.BaseTestCase): return a + b a = A() - wrapped_a = utilities.ObjectRetrier(a) + wrapped_a = utilities.ObjectRetrierWraps(a) self.assertEqual(wrapped_a.func(3), 4) def test_object_multilevel_wrap(self): @@ -59,7 +59,7 @@ class TestObjectRetrier(ut_utils.BaseTestCase): return A() b = B() - wrapped_b = utilities.ObjectRetrier(b) + wrapped_b = utilities.ObjectRetrierWraps(b) self.assertEqual(wrapped_b.f2.f1(5, 6), 30) def test_object_wrap_number(self): @@ -75,7 +75,7 @@ class TestObjectRetrier(ut_utils.BaseTestCase): return a * b a = A() - wrapped_a = utilities.ObjectRetrier(a) + wrapped_a = utilities.ObjectRetrierWraps(a) self.assertEqual(wrapped_a.class_a, 5) self.assertEqual(wrapped_a.instance_a, 10) @@ -89,15 +89,15 @@ class TestObjectRetrier(ut_utils.BaseTestCase): a = A() # retry on a specific exception - wrapped_a = utilities.ObjectRetrier(a, num_retries=1, - retry_exceptions=[SomeException]) + wrapped_a = utilities.ObjectRetrierWraps( + a, num_retries=1, retry_exceptions=[SomeException]) with self.assertRaises(SomeException): wrapped_a.func() mock_sleep.assert_called_once_with(5) # also retry on any exception if none specified - wrapped_a = utilities.ObjectRetrier(a, num_retries=1) + wrapped_a = utilities.ObjectRetrierWraps(a, num_retries=1) mock_sleep.reset_mock() with self.assertRaises(SomeException): wrapped_a.func() @@ -105,8 +105,8 @@ class TestObjectRetrier(ut_utils.BaseTestCase): mock_sleep.assert_called_once_with(5) # no retry if exception isn't listed. - wrapped_a = utilities.ObjectRetrier(a, num_retries=1, - retry_exceptions=[SomeException2]) + wrapped_a = utilities.ObjectRetrierWraps( + a, num_retries=1, retry_exceptions=[SomeException2]) mock_sleep.reset_mock() with self.assertRaises(SomeException): wrapped_a.func() @@ -123,7 +123,8 @@ class TestObjectRetrier(ut_utils.BaseTestCase): a = A() mock_log = mock.Mock() - wrapped_a = utilities.ObjectRetrier(a, num_retries=1, log=mock_log) + wrapped_a = utilities.ObjectRetrierWraps( + a, num_retries=1, log=mock_log) with self.assertRaises(SomeException): wrapped_a.func() @@ -140,7 +141,7 @@ class TestObjectRetrier(ut_utils.BaseTestCase): raise SomeException() a = A() - wrapped_a = utilities.ObjectRetrier(a, num_retries=3, backoff=2) + wrapped_a = utilities.ObjectRetrierWraps(a, num_retries=3, backoff=2) with self.assertRaises(SomeException): wrapped_a.func() # Note third call hits maximum wait time of 15. @@ -157,7 +158,8 @@ class TestObjectRetrier(ut_utils.BaseTestCase): raise SomeException() a = A() - wrapped_a = utilities.ObjectRetrier(a, num_retries=3, total_wait=9) + wrapped_a = utilities.ObjectRetrierWraps( + a, num_retries=3, total_wait=9) with self.assertRaises(SomeException): wrapped_a.func() # Note only two calls, as total wait is 9, so a 3rd retry would exceed diff --git a/zaza/openstack/charm_tests/octavia/tests.py b/zaza/openstack/charm_tests/octavia/tests.py index 9a71b49..535d7aa 100644 --- a/zaza/openstack/charm_tests/octavia/tests.py +++ b/zaza/openstack/charm_tests/octavia/tests.py @@ -25,7 +25,7 @@ import osc_lib.exceptions import zaza.openstack.charm_tests.test_utils as test_utils import zaza.openstack.utilities.openstack as openstack_utils -from zaza.openstack.utilities import ObjectRetrier +from zaza.openstack.utilities import ObjectRetrierWraps class CharmOperationTest(test_utils.OpenStackBaseTest): @@ -65,11 +65,11 @@ class LBAASv2Test(test_utils.OpenStackBaseTest): def setUpClass(cls): """Run class setup for running LBaaSv2 service tests.""" super(LBAASv2Test, cls).setUpClass() - cls.keystone_client = ObjectRetrier( + cls.keystone_client = ObjectRetrierWraps( openstack_utils.get_keystone_session_client(cls.keystone_session)) - cls.neutron_client = ObjectRetrier( + cls.neutron_client = ObjectRetrierWraps( openstack_utils.get_neutron_session_client(cls.keystone_session)) - cls.octavia_client = ObjectRetrier( + cls.octavia_client = ObjectRetrierWraps( openstack_utils.get_octavia_session_client(cls.keystone_session)) cls.RESOURCE_PREFIX = 'zaza-octavia' diff --git a/zaza/openstack/utilities/__init__.py b/zaza/openstack/utilities/__init__.py index 6db3ce3..5798f26 100644 --- a/zaza/openstack/utilities/__init__.py +++ b/zaza/openstack/utilities/__init__.py @@ -20,7 +20,7 @@ import time from keystoneauth1.exceptions.connection import ConnectFailure -class ObjectRetrier(object): +class ObjectRetrierWraps(object): """An automatic retrier for an object. This is designed to be used with an instance of an object. Basically, it @@ -35,7 +35,7 @@ class ObjectRetrier(object): # get a client that does 3 retries, waits 5 seconds between retries and # retries on any error. - some_client = ObjectRetrier(get_some_client) + some_client = ObjectRetrierWraps(get_some_client) # this gets retried up to 3 times. things = some_client.list_things() @@ -96,7 +96,7 @@ class ObjectRetrier(object): # will fail with an attribute error. attr = getattr(self.__obj, name) if callable(attr) or hasattr(attr, "__getattr__"): - return ObjectRetrier(attr, **self.__kwargs) + return ObjectRetrierWraps(attr, **self.__kwargs) else: return attr # TODO(ajkavanagh): Note detecting a property is a bit trickier. we @@ -122,8 +122,8 @@ class ObjectRetrier(object): try: return obj(*args, **kwargs) except Exception as e: - # if retry_exceptions is None, or the type of the exception is - # not in the list of retries, then raise an exception + # if retry_exceptions is not None, or the type of the exception + # is not in the list of retries, then raise an exception # immediately. This means that if retry_exceptions is None, # then the method is always retried. if (retry_exceptions is not None and @@ -148,19 +148,20 @@ class ObjectRetrier(object): def retry_on_connect_failure(client, **kwargs): """Retry an object that eventually gets resolved to a call. - Specifically, this uses ObjectRetrier but only against the + Specifically, this uses ObjectRetrierWraps but only against the keystoneauth1.exceptions.connection.ConnectFailure exeception. :params client: the object that may throw and exception when called. :type client: Any - :params **kwargs: the arguments supplied to the ObjectRetrier init method + :params **kwargs: the arguments supplied to the ObjectRetrierWraps init + method :type **kwargs: Dict[Any] - :returns: client wrapped in an ObjectRetrier instance - :rtype: ObjectRetrier[client] + :returns: client wrapped in an ObjectRetrierWraps instance + :rtype: ObjectRetrierWraps[client] """ kwcopy = kwargs.copy() if 'retry_exceptions' not in kwcopy: kwcopy['retry_exceptions'] = [] if ConnectFailure not in kwcopy['retry_exceptions']: kwcopy['retry_exceptions'].append(ConnectFailure) - return ObjectRetrier(client, **kwcopy) + return ObjectRetrierWraps(client, **kwcopy)