diff --git a/zaza/openstack/charm_tests/octavia/diskimage_retrofit/setup.py b/zaza/openstack/charm_tests/octavia/diskimage_retrofit/setup.py index 7fbca5e..73d1aad 100644 --- a/zaza/openstack/charm_tests/octavia/diskimage_retrofit/setup.py +++ b/zaza/openstack/charm_tests/octavia/diskimage_retrofit/setup.py @@ -15,6 +15,7 @@ """Code for configuring octavia-diskimage-retrofit.""" import logging +import tenacity import zaza.model @@ -39,12 +40,20 @@ def retrofit_amphora_image(unit='octavia-diskimage-retrofit/0', if image_id: params.update({'source-image': image_id}) - # NOTE(fnordahl) ``zaza.model.run_action_on_leader`` fails here, - # apparently has to do with handling of subordinates in ``libjuju`` or - # ``juju`` itself. - action = zaza.model.run_action( - unit, - 'retrofit-image', - action_params=params, - raise_on_failure=True) + # NOTE(fnordahl) the retrofit process involves downloading packages from + # the internet and is as such susceptible to random failures due to + # internet gremlins. + for attempt in tenacity.Retrying( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential( + multiplier=1, min=2, max=10)): + with attempt: + # NOTE(fnordahl) ``zaza.model.run_action_on_leader`` fails here, + # apparently has to do with handling of subordinates in ``libjuju`` + # or ``juju`` itself. + action = zaza.model.run_action( + unit, + 'retrofit-image', + action_params=params, + raise_on_failure=True) return action diff --git a/zaza/openstack/charm_tests/octavia/tests.py b/zaza/openstack/charm_tests/octavia/tests.py index e84ac3e..eb0b217 100644 --- a/zaza/openstack/charm_tests/octavia/tests.py +++ b/zaza/openstack/charm_tests/octavia/tests.py @@ -18,6 +18,8 @@ import logging import subprocess import tenacity +from keystoneauth1 import exceptions as keystone_exceptions +import octaviaclient.api.v2.octavia import osc_lib.exceptions import zaza.openstack.charm_tests.test_utils as test_utils @@ -80,18 +82,62 @@ class LBAASv2Test(test_utils.OpenStackBaseTest): # List of floating IPs created by this test cls.fips = [] - def resource_cleanup(self): - """Remove resources created during test execution.""" + def _remove_amphorae_instances(self): + """Remove amphorae instances forcefully. + + In some situations Octavia is unable to remove load balancer resources. + This helper can be used to remove the underlying instances. + """ + result = self.octavia_client.amphora_list() + for amphora in result.get('amphorae', []): + for server in self.nova_client.servers.list(): + if 'compute_id' in amphora and server.id == amphora[ + 'compute_id']: + try: + openstack_utils.delete_resource( + self.nova_client.servers, + server.id, + msg="server") + except AssertionError as e: + logging.warning( + 'Gave up waiting for resource cleanup: "{}"' + .format(str(e))) + + @tenacity.retry(stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential( + multiplier=1, min=2, max=10)) + def resource_cleanup(self, only_local=False): + """Remove resources created during test execution. + + :param only_local: When set to true do not call parent method + :type only_local: bool + """ for lb in self.loadbalancers: - self.octavia_client.load_balancer_delete(lb['id'], cascade=True) try: - self.wait_for_lb_resource( - self.octavia_client.load_balancer_show, lb['id'], - provisioning_status='DELETED') - except osc_lib.exceptions.NotFound: - pass + self.octavia_client.load_balancer_delete( + lb['id'], cascade=True) + except octaviaclient.api.v2.octavia.OctaviaClientException as e: + logging.info('Octavia is unable to delete load balancer: "{}"' + .format(e)) + logging.info('Attempting to forcefully remove amphorae') + self._remove_amphorae_instances() + else: + try: + self.wait_for_lb_resource( + self.octavia_client.load_balancer_show, lb['id'], + provisioning_status='DELETED') + except osc_lib.exceptions.NotFound: + pass + # allow resource cleanup to be run multiple times + self.loadbalancers = [] for fip in self.fips: self.neutron_client.delete_floatingip(fip) + # allow resource cleanup to be run multiple times + self.fips = [] + + if only_local: + return + # we run the parent resource_cleanup last as it will remove instances # referenced as members in the above cleaned up load balancers super(LBAASv2Test, self).resource_cleanup() @@ -157,6 +203,7 @@ class LBAASv2Test(test_utils.OpenStackBaseTest): 'provider': provider, }}) lb = result['loadbalancer'] + self.loadbalancers.append(lb) lb_id = lb['id'] logging.info('Awaiting loadbalancer to reach provisioning_status ' @@ -283,10 +330,25 @@ class LBAASv2Test(test_utils.OpenStackBaseTest): for provider in self.get_lb_providers(self.octavia_client).keys(): logging.info('Creating loadbalancer with provider {}' .format(provider)) - lb = self._create_lb_resources(self.octavia_client, provider, - vip_subnet_id, subnet_id, - payload_ips) - self.loadbalancers.append(lb) + final_exc = None + # NOTE: we cannot use tenacity here as the method we call into + # already uses it to wait for operations to complete. + for retry in range(0, 3): + try: + lb = self._create_lb_resources(self.octavia_client, + provider, + vip_subnet_id, + subnet_id, + payload_ips) + break + except (AssertionError, + keystone_exceptions.connection.ConnectFailure) as e: + logging.info('Retrying load balancer creation, last ' + 'failure: "{}"'.format(str(e))) + self.resource_cleanup(only_local=True) + final_exc = e + else: + raise final_exc lb_fp = openstack_utils.create_floating_ip( self.neutron_client, 'ext_net', port={'id': lb['vip_port_id']}) diff --git a/zaza/openstack/charm_tests/test_utils.py b/zaza/openstack/charm_tests/test_utils.py index ee61336..250c183 100644 --- a/zaza/openstack/charm_tests/test_utils.py +++ b/zaza/openstack/charm_tests/test_utils.py @@ -495,6 +495,11 @@ class OpenStackBaseTest(BaseCharmTest): self.nova_client.servers, server.id, msg="server") + except AssertionError as e: + # Resource failed to be removed within the expected time frame, + # log this fact and carry on. + logging.warning('Gave up waiting for resource cleanup: "{}"' + .format(str(e))) except AttributeError: # Test did not define self.RESOURCE_PREFIX, ignore. pass diff --git a/zaza/openstack/utilities/openstack.py b/zaza/openstack/utilities/openstack.py index 0c31c05..1e8f625 100644 --- a/zaza/openstack/utilities/openstack.py +++ b/zaza/openstack/utilities/openstack.py @@ -690,14 +690,20 @@ def add_interface_to_netplan(server_name, mac_address): "{}\nserver_name: {}".format(body_value, unit_name, interface, mac_address, server_name)) - with tempfile.NamedTemporaryFile(mode="w") as netplan_file: - netplan_file.write(body_value) - netplan_file.flush() - model.scp_to_unit(unit_name, netplan_file.name, - '/home/ubuntu/60-dataport.yaml', user="ubuntu") - run_cmd_mv = "sudo mv /home/ubuntu/60-dataport.yaml /etc/netplan/" - model.run_on_unit(unit_name, run_cmd_mv) - model.run_on_unit(unit_name, "sudo netplan apply") + for attempt in tenacity.Retrying( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential( + multiplier=1, min=2, max=10)): + with attempt: + with tempfile.NamedTemporaryFile(mode="w") as netplan_file: + netplan_file.write(body_value) + netplan_file.flush() + model.scp_to_unit( + unit_name, netplan_file.name, + '/home/ubuntu/60-dataport.yaml', user="ubuntu") + run_cmd_mv = "sudo mv /home/ubuntu/60-dataport.yaml /etc/netplan/" + model.run_on_unit(unit_name, run_cmd_mv) + model.run_on_unit(unit_name, "sudo netplan apply") def configure_gateway_ext_port(novaclient, neutronclient, net_id=None,