Merge pull request #418 from openstack-charmers/issue/417

Log resource cleanup failure and carry on, retry lb resource creation and various other retry fixes
This commit is contained in:
Aurelien Lourot
2020-09-24 10:11:23 +02:00
committed by GitHub
4 changed files with 110 additions and 28 deletions
@@ -15,6 +15,7 @@
"""Code for configuring octavia-diskimage-retrofit."""
import logging
import tenacity
import zaza.model
@@ -39,12 +40,20 @@ def retrofit_amphora_image(unit='octavia-diskimage-retrofit/0',
if image_id:
params.update({'source-image': image_id})
# NOTE(fnordahl) ``zaza.model.run_action_on_leader`` fails here,
# apparently has to do with handling of subordinates in ``libjuju`` or
# ``juju`` itself.
action = zaza.model.run_action(
unit,
'retrofit-image',
action_params=params,
raise_on_failure=True)
# NOTE(fnordahl) the retrofit process involves downloading packages from
# the internet and is as such susceptible to random failures due to
# internet gremlins.
for attempt in tenacity.Retrying(
stop=tenacity.stop_after_attempt(3),
wait=tenacity.wait_exponential(
multiplier=1, min=2, max=10)):
with attempt:
# NOTE(fnordahl) ``zaza.model.run_action_on_leader`` fails here,
# apparently has to do with handling of subordinates in ``libjuju``
# or ``juju`` itself.
action = zaza.model.run_action(
unit,
'retrofit-image',
action_params=params,
raise_on_failure=True)
return action
+74 -12
View File
@@ -18,6 +18,8 @@ import logging
import subprocess
import tenacity
from keystoneauth1 import exceptions as keystone_exceptions
import octaviaclient.api.v2.octavia
import osc_lib.exceptions
import zaza.openstack.charm_tests.test_utils as test_utils
@@ -80,18 +82,62 @@ class LBAASv2Test(test_utils.OpenStackBaseTest):
# List of floating IPs created by this test
cls.fips = []
def resource_cleanup(self):
"""Remove resources created during test execution."""
def _remove_amphorae_instances(self):
"""Remove amphorae instances forcefully.
In some situations Octavia is unable to remove load balancer resources.
This helper can be used to remove the underlying instances.
"""
result = self.octavia_client.amphora_list()
for amphora in result.get('amphorae', []):
for server in self.nova_client.servers.list():
if 'compute_id' in amphora and server.id == amphora[
'compute_id']:
try:
openstack_utils.delete_resource(
self.nova_client.servers,
server.id,
msg="server")
except AssertionError as e:
logging.warning(
'Gave up waiting for resource cleanup: "{}"'
.format(str(e)))
@tenacity.retry(stop=tenacity.stop_after_attempt(3),
wait=tenacity.wait_exponential(
multiplier=1, min=2, max=10))
def resource_cleanup(self, only_local=False):
"""Remove resources created during test execution.
:param only_local: When set to true do not call parent method
:type only_local: bool
"""
for lb in self.loadbalancers:
self.octavia_client.load_balancer_delete(lb['id'], cascade=True)
try:
self.wait_for_lb_resource(
self.octavia_client.load_balancer_show, lb['id'],
provisioning_status='DELETED')
except osc_lib.exceptions.NotFound:
pass
self.octavia_client.load_balancer_delete(
lb['id'], cascade=True)
except octaviaclient.api.v2.octavia.OctaviaClientException as e:
logging.info('Octavia is unable to delete load balancer: "{}"'
.format(e))
logging.info('Attempting to forcefully remove amphorae')
self._remove_amphorae_instances()
else:
try:
self.wait_for_lb_resource(
self.octavia_client.load_balancer_show, lb['id'],
provisioning_status='DELETED')
except osc_lib.exceptions.NotFound:
pass
# allow resource cleanup to be run multiple times
self.loadbalancers = []
for fip in self.fips:
self.neutron_client.delete_floatingip(fip)
# allow resource cleanup to be run multiple times
self.fips = []
if only_local:
return
# we run the parent resource_cleanup last as it will remove instances
# referenced as members in the above cleaned up load balancers
super(LBAASv2Test, self).resource_cleanup()
@@ -157,6 +203,7 @@ class LBAASv2Test(test_utils.OpenStackBaseTest):
'provider': provider,
}})
lb = result['loadbalancer']
self.loadbalancers.append(lb)
lb_id = lb['id']
logging.info('Awaiting loadbalancer to reach provisioning_status '
@@ -283,10 +330,25 @@ class LBAASv2Test(test_utils.OpenStackBaseTest):
for provider in self.get_lb_providers(self.octavia_client).keys():
logging.info('Creating loadbalancer with provider {}'
.format(provider))
lb = self._create_lb_resources(self.octavia_client, provider,
vip_subnet_id, subnet_id,
payload_ips)
self.loadbalancers.append(lb)
final_exc = None
# NOTE: we cannot use tenacity here as the method we call into
# already uses it to wait for operations to complete.
for retry in range(0, 3):
try:
lb = self._create_lb_resources(self.octavia_client,
provider,
vip_subnet_id,
subnet_id,
payload_ips)
break
except (AssertionError,
keystone_exceptions.connection.ConnectFailure) as e:
logging.info('Retrying load balancer creation, last '
'failure: "{}"'.format(str(e)))
self.resource_cleanup(only_local=True)
final_exc = e
else:
raise final_exc
lb_fp = openstack_utils.create_floating_ip(
self.neutron_client, 'ext_net', port={'id': lb['vip_port_id']})
+5
View File
@@ -495,6 +495,11 @@ class OpenStackBaseTest(BaseCharmTest):
self.nova_client.servers,
server.id,
msg="server")
except AssertionError as e:
# Resource failed to be removed within the expected time frame,
# log this fact and carry on.
logging.warning('Gave up waiting for resource cleanup: "{}"'
.format(str(e)))
except AttributeError:
# Test did not define self.RESOURCE_PREFIX, ignore.
pass
+14 -8
View File
@@ -690,14 +690,20 @@ def add_interface_to_netplan(server_name, mac_address):
"{}\nserver_name: {}".format(body_value, unit_name,
interface, mac_address,
server_name))
with tempfile.NamedTemporaryFile(mode="w") as netplan_file:
netplan_file.write(body_value)
netplan_file.flush()
model.scp_to_unit(unit_name, netplan_file.name,
'/home/ubuntu/60-dataport.yaml', user="ubuntu")
run_cmd_mv = "sudo mv /home/ubuntu/60-dataport.yaml /etc/netplan/"
model.run_on_unit(unit_name, run_cmd_mv)
model.run_on_unit(unit_name, "sudo netplan apply")
for attempt in tenacity.Retrying(
stop=tenacity.stop_after_attempt(3),
wait=tenacity.wait_exponential(
multiplier=1, min=2, max=10)):
with attempt:
with tempfile.NamedTemporaryFile(mode="w") as netplan_file:
netplan_file.write(body_value)
netplan_file.flush()
model.scp_to_unit(
unit_name, netplan_file.name,
'/home/ubuntu/60-dataport.yaml', user="ubuntu")
run_cmd_mv = "sudo mv /home/ubuntu/60-dataport.yaml /etc/netplan/"
model.run_on_unit(unit_name, run_cmd_mv)
model.run_on_unit(unit_name, "sudo netplan apply")
def configure_gateway_ext_port(novaclient, neutronclient, net_id=None,