diff --git a/unit_tests/utilities/.test_zaza_utilities_openstack.py.swp b/unit_tests/utilities/.test_zaza_utilities_openstack.py.swp new file mode 100644 index 0000000..752be85 Binary files /dev/null and b/unit_tests/utilities/.test_zaza_utilities_openstack.py.swp differ diff --git a/unit_tests/utilities/test_zaza_utilities_openstack.py b/unit_tests/utilities/test_zaza_utilities_openstack.py index 6a9cbf0..f40098f 100644 --- a/unit_tests/utilities/test_zaza_utilities_openstack.py +++ b/unit_tests/utilities/test_zaza_utilities_openstack.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import datetime import io import mock import tenacity @@ -854,6 +855,54 @@ class TestOpenStackUtils(ut_utils.BaseTestCase): 'myvm', 'org-hypervisor') + def test_wait_for_server_update_and_active(self): + openstack_utils.wait_for_server_migration.retry.stop = \ + tenacity.stop_after_attempt(1) + novaclient = mock.MagicMock() + servermock = mock.MagicMock() + servermock.updated = '2019-03-07T13:41:58Z' + servermock.status = 'ACTIVE' + novaclient.servers.find.return_value = servermock + # Implicit assertion that exception is not raised. + openstack_utils.wait_for_server_update_and_active( + novaclient, + 'myvm', + datetime.datetime.strptime( + '2019-03-07T13:40:58Z', + '%Y-%m-%dT%H:%M:%SZ')) + + def test_wait_for_server_update_and_active_fail_no_meeta_update(self): + openstack_utils.wait_for_server_update_and_active.retry.stop = \ + tenacity.stop_after_attempt(1) + novaclient = mock.MagicMock() + servermock = mock.MagicMock() + servermock.updated = '2019-03-07T13:41:58Z' + servermock.status = 'ACTIVE' + novaclient.servers.find.return_value = servermock + with self.assertRaises(exceptions.NovaGuestRestartFailed): + openstack_utils.wait_for_server_update_and_active( + novaclient, + 'myvm', + datetime.datetime.strptime( + '2019-03-07T13:41:58Z', + '%Y-%m-%dT%H:%M:%SZ')) + + def test_wait_for_server_update_and_active_fail_not_active(self): + openstack_utils.wait_for_server_update_and_active.retry.stop = \ + tenacity.stop_after_attempt(1) + novaclient = mock.MagicMock() + servermock = mock.MagicMock() + servermock.updated = '2019-03-07T13:41:58Z' + servermock.status = 'NOTACTIVE' + novaclient.servers.find.return_value = servermock + with self.assertRaises(exceptions.NovaGuestRestartFailed): + openstack_utils.wait_for_server_update_and_active( + novaclient, + 'myvm', + datetime.datetime.strptime( + '2019-03-07T13:40:58Z', + '%Y-%m-%dT%H:%M:%SZ')) + def test_enable_all_nova_services(self): novaclient = mock.MagicMock() svc_mock1 = mock.MagicMock() diff --git a/zaza/charm_tests/masakari/tests.py b/zaza/charm_tests/masakari/tests.py index f74ad8a..5168bbf 100644 --- a/zaza/charm_tests/masakari/tests.py +++ b/zaza/charm_tests/masakari/tests.py @@ -16,6 +16,7 @@ """Encapsulate masakari testing.""" +from datetime import datetime import logging import novaclient @@ -40,27 +41,74 @@ class MasakariTest(test_utils.OpenStackBaseTest): cls.nova_client = openstack_utils.get_nova_session_client( cls.keystone_session) - def test_instance_failover(self): - """Test masakari managed guest migration.""" - # Launch guest - vm_name = 'zaza-test-instance-failover' + @classmethod + def tearDown(cls): + """Bring hypervisors and services back up.""" + logging.info('Running teardown') + for unit in zaza.model.get_units('nova-compute', + model_name=cls.model_name): + zaza.configure.masakari.simulate_compute_host_recovery( + unit.entity_id, + model_name=cls.model_name) + zaza.utilities.openstack.enable_all_nova_services(cls.nova_client) + zaza.configure.masakari.enable_hosts() + + def ensure_guest(self, vm_name): + """Return the existing guest or boot a new one. + + :param vm_name: Name of guest to lookup + :type vm_name: str + """ try: - self.nova_client.servers.find(name=vm_name) + guest = self.nova_client.servers.find(name=vm_name) logging.info('Found existing guest') except novaclient.exceptions.NotFound: logging.info('Launching new guest') - zaza.configure.guest.launch_instance( + guest = zaza.configure.guest.launch_instance( 'bionic', use_boot_volume=True, vm_name=vm_name) + return guest - # Locate hypervisor hosting guest and shut it down + def get_guests_compute_info(self, vm_name): + """Return the hostname & juju unit of compute host hosting vm. + + :param vm_name: Name of guest to lookup + :type vm_name: str + """ current_hypervisor = zaza.utilities.openstack.get_hypervisor_for_guest( self.nova_client, vm_name) unit_name = juju_utils.get_unit_name_from_host_name( current_hypervisor, 'nova-compute') + return current_hypervisor, unit_name + + def get_guest_qemu_pid(self, compute_unit_name, vm_uuid, model_name=None): + """Return the qemu pid of process running guest. + + :param compute_unit_name: Juju unit name of hypervisor running guest + :type compute_unit_name: str + :param vm_uuid: Guests UUID + :type vm_uuid: str + :param model_name: Name of model running cloud. + :type model_name: str + """ + pid_find_cmd = 'pgrep -u libvirt-qemu -f {}'.format(vm_uuid) + out = zaza.model.run_on_unit( + compute_unit_name, + pid_find_cmd, + model_name=self.model_name) + return int(out['Stdout'].strip()) + + def test_instance_failover(self): + """Test masakari managed guest migration.""" + # Launch guest + vm_name = 'zaza-test-instance-failover' + self.ensure_guest(vm_name) + + # Locate hypervisor hosting guest and shut it down + current_hypervisor, unit_name = self.get_guests_compute_info(vm_name) zaza.configure.masakari.simulate_compute_host_failure( unit_name, model_name=self.model_name) @@ -80,3 +128,36 @@ class MasakariTest(test_utils.OpenStackBaseTest): model_name=self.model_name) zaza.utilities.openstack.enable_all_nova_services(self.nova_client) zaza.configure.masakari.enable_hosts() + + def test_instance_restart_on_fail(self): + """Test singlee guest crash and recovery.""" + vm_name = 'zaza-test-instance-failover' + vm = self.ensure_guest(vm_name) + _, unit_name = self.get_guests_compute_info(vm_name) + logging.info('{} is running on {}'.format(vm_name, unit_name)) + guest_pid = self.get_guest_qemu_pid( + unit_name, + vm.id, + model_name=self.model_name) + logging.info('{} pid is {}'.format(vm_name, guest_pid)) + inital_update_time = datetime.strptime( + vm.updated, + "%Y-%m-%dT%H:%M:%SZ") + logging.info('Simulating vm crash of {}'.format(vm_name)) + zaza.configure.masakari.simulate_vm_crash( + guest_pid, + unit_name, + model_name=self.model_name) + logging.info('Waiting for {} to be updated and become active'.format( + vm_name)) + zaza.utilities.openstack.wait_for_server_update_and_active( + self.nova_client, + vm_name, + inital_update_time) + new_guest_pid = self.get_guest_qemu_pid( + unit_name, + vm.id, + model_name=self.model_name) + logging.info('{} pid is now {}'.format(vm_name, guest_pid)) + assert new_guest_pid and new_guest_pid != guest_pid, ( + "Restart failed or never happened") diff --git a/zaza/configure/masakari.py b/zaza/configure/masakari.py index f4f84ac..75c6076 100644 --- a/zaza/configure/masakari.py +++ b/zaza/configure/masakari.py @@ -167,3 +167,20 @@ def simulate_compute_host_recovery(unit_name, model_name): 'start', ['corosync', 'pacemaker', 'nova-compute'], model_name) + + +def simulate_guest_crash(guest_pid, compute_unit_name, model_name): + """Simulate a guest crashing. + + :param guest_pid: PID of running qemu provess for guest. + :type guest_pid: str + :param compute_unit_name: Juju name of hypervisor hosting guest (app/n) + :type compute_unit_name: str + :param model_name: Name of model unit_name resides in. + :type model_name: str + """ + pid_kill_cmd = 'kill -9 {}' + zaza.model.run_on_unit( + compute_unit_name, + pid_kill_cmd.format(guest_pid), + model_name=model_name) diff --git a/zaza/utilities/.openstack.py.swp b/zaza/utilities/.openstack.py.swp new file mode 100644 index 0000000..04d4ce1 Binary files /dev/null and b/zaza/utilities/.openstack.py.swp differ diff --git a/zaza/utilities/exceptions.py b/zaza/utilities/exceptions.py index dc52cca..868a7c6 100644 --- a/zaza/utilities/exceptions.py +++ b/zaza/utilities/exceptions.py @@ -166,3 +166,9 @@ class NovaGuestMigrationFailed(Exception): """Nova guest migration failed.""" pass + + +class NovaGuestRestartFailed(Exception): + """Nova guest restart failed.""" + + pass diff --git a/zaza/utilities/openstack.py b/zaza/utilities/openstack.py index ad171cb..7dca504 100644 --- a/zaza/utilities/openstack.py +++ b/zaza/utilities/openstack.py @@ -42,6 +42,7 @@ from neutronclient.common import exceptions as neutronexceptions from octaviaclient.api.v2 import octavia as octaviaclient from swiftclient import client as swiftclient +import datetime import io import juju_wait import logging @@ -1922,6 +1923,37 @@ def wait_for_server_migration(nova_client, vm_name, original_hypervisor): current_hypervisor)) +@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, max=60), + reraise=True, stop=tenacity.stop_after_attempt(80), + retry=tenacity.retry_if_exception_type( + exceptions.NovaGuestRestartFailed)) +def wait_for_server_update_and_active(nova_client, vm_name, + original_updatetime): + """Wait for guests metadata to be updated and for status to become active. + + :param nova_client: Authenticated nova client + :type nova_client: novaclient.v2.client.Client + :param vm_name: Name of guest to monitor + :type vm_name: str + :param original_updatetime: The time the metadata was previously updated. + :type original_updatetime: datetime + :raises: exceptions.NovaGuestMigrationFailed + """ + server = nova_client.servers.find(name=vm_name) + current_updatetime = datetime.datetime.strptime( + server.updated, + "%Y-%m-%dT%H:%M:%SZ") + if current_updatetime <= original_updatetime or server.status != 'ACTIVE': + logging.info('{} Updated: {} Satus: {})'.format( + vm_name, + current_updatetime, + server.status)) + raise exceptions.NovaGuestRestartFailed( + 'Restart of {} after crash failed'.format(vm_name)) + else: + logging.info('SUCCESS {} has restarted'.format(vm_name)) + + def enable_all_nova_services(nova_client): """Enable all nova services.