Enable masakari instance monitor tests

Add helpers and a test to test a guest dying and being resurrected
by masakari instance monitor.
This commit is contained in:
Liam Young
2019-03-07 16:03:42 +00:00
parent ac52b0c14b
commit e9c7690af4
7 changed files with 192 additions and 7 deletions

View File

@@ -13,6 +13,7 @@
# limitations under the License.
import copy
import datetime
import io
import mock
import tenacity
@@ -854,6 +855,54 @@ class TestOpenStackUtils(ut_utils.BaseTestCase):
'myvm',
'org-hypervisor')
def test_wait_for_server_update_and_active(self):
openstack_utils.wait_for_server_migration.retry.stop = \
tenacity.stop_after_attempt(1)
novaclient = mock.MagicMock()
servermock = mock.MagicMock()
servermock.updated = '2019-03-07T13:41:58Z'
servermock.status = 'ACTIVE'
novaclient.servers.find.return_value = servermock
# Implicit assertion that exception is not raised.
openstack_utils.wait_for_server_update_and_active(
novaclient,
'myvm',
datetime.datetime.strptime(
'2019-03-07T13:40:58Z',
'%Y-%m-%dT%H:%M:%SZ'))
def test_wait_for_server_update_and_active_fail_no_meeta_update(self):
openstack_utils.wait_for_server_update_and_active.retry.stop = \
tenacity.stop_after_attempt(1)
novaclient = mock.MagicMock()
servermock = mock.MagicMock()
servermock.updated = '2019-03-07T13:41:58Z'
servermock.status = 'ACTIVE'
novaclient.servers.find.return_value = servermock
with self.assertRaises(exceptions.NovaGuestRestartFailed):
openstack_utils.wait_for_server_update_and_active(
novaclient,
'myvm',
datetime.datetime.strptime(
'2019-03-07T13:41:58Z',
'%Y-%m-%dT%H:%M:%SZ'))
def test_wait_for_server_update_and_active_fail_not_active(self):
openstack_utils.wait_for_server_update_and_active.retry.stop = \
tenacity.stop_after_attempt(1)
novaclient = mock.MagicMock()
servermock = mock.MagicMock()
servermock.updated = '2019-03-07T13:41:58Z'
servermock.status = 'NOTACTIVE'
novaclient.servers.find.return_value = servermock
with self.assertRaises(exceptions.NovaGuestRestartFailed):
openstack_utils.wait_for_server_update_and_active(
novaclient,
'myvm',
datetime.datetime.strptime(
'2019-03-07T13:40:58Z',
'%Y-%m-%dT%H:%M:%SZ'))
def test_enable_all_nova_services(self):
novaclient = mock.MagicMock()
svc_mock1 = mock.MagicMock()

View File

@@ -16,6 +16,7 @@
"""Encapsulate masakari testing."""
from datetime import datetime
import logging
import novaclient
@@ -40,27 +41,74 @@ class MasakariTest(test_utils.OpenStackBaseTest):
cls.nova_client = openstack_utils.get_nova_session_client(
cls.keystone_session)
def test_instance_failover(self):
"""Test masakari managed guest migration."""
# Launch guest
vm_name = 'zaza-test-instance-failover'
@classmethod
def tearDown(cls):
"""Bring hypervisors and services back up."""
logging.info('Running teardown')
for unit in zaza.model.get_units('nova-compute',
model_name=cls.model_name):
zaza.configure.masakari.simulate_compute_host_recovery(
unit.entity_id,
model_name=cls.model_name)
zaza.utilities.openstack.enable_all_nova_services(cls.nova_client)
zaza.configure.masakari.enable_hosts()
def ensure_guest(self, vm_name):
"""Return the existing guest or boot a new one.
:param vm_name: Name of guest to lookup
:type vm_name: str
"""
try:
self.nova_client.servers.find(name=vm_name)
guest = self.nova_client.servers.find(name=vm_name)
logging.info('Found existing guest')
except novaclient.exceptions.NotFound:
logging.info('Launching new guest')
zaza.configure.guest.launch_instance(
guest = zaza.configure.guest.launch_instance(
'bionic',
use_boot_volume=True,
vm_name=vm_name)
return guest
# Locate hypervisor hosting guest and shut it down
def get_guests_compute_info(self, vm_name):
"""Return the hostname & juju unit of compute host hosting vm.
:param vm_name: Name of guest to lookup
:type vm_name: str
"""
current_hypervisor = zaza.utilities.openstack.get_hypervisor_for_guest(
self.nova_client,
vm_name)
unit_name = juju_utils.get_unit_name_from_host_name(
current_hypervisor,
'nova-compute')
return current_hypervisor, unit_name
def get_guest_qemu_pid(self, compute_unit_name, vm_uuid, model_name=None):
"""Return the qemu pid of process running guest.
:param compute_unit_name: Juju unit name of hypervisor running guest
:type compute_unit_name: str
:param vm_uuid: Guests UUID
:type vm_uuid: str
:param model_name: Name of model running cloud.
:type model_name: str
"""
pid_find_cmd = 'pgrep -u libvirt-qemu -f {}'.format(vm_uuid)
out = zaza.model.run_on_unit(
compute_unit_name,
pid_find_cmd,
model_name=self.model_name)
return int(out['Stdout'].strip())
def test_instance_failover(self):
"""Test masakari managed guest migration."""
# Launch guest
vm_name = 'zaza-test-instance-failover'
self.ensure_guest(vm_name)
# Locate hypervisor hosting guest and shut it down
current_hypervisor, unit_name = self.get_guests_compute_info(vm_name)
zaza.configure.masakari.simulate_compute_host_failure(
unit_name,
model_name=self.model_name)
@@ -80,3 +128,36 @@ class MasakariTest(test_utils.OpenStackBaseTest):
model_name=self.model_name)
zaza.utilities.openstack.enable_all_nova_services(self.nova_client)
zaza.configure.masakari.enable_hosts()
def test_instance_restart_on_fail(self):
"""Test singlee guest crash and recovery."""
vm_name = 'zaza-test-instance-failover'
vm = self.ensure_guest(vm_name)
_, unit_name = self.get_guests_compute_info(vm_name)
logging.info('{} is running on {}'.format(vm_name, unit_name))
guest_pid = self.get_guest_qemu_pid(
unit_name,
vm.id,
model_name=self.model_name)
logging.info('{} pid is {}'.format(vm_name, guest_pid))
inital_update_time = datetime.strptime(
vm.updated,
"%Y-%m-%dT%H:%M:%SZ")
logging.info('Simulating vm crash of {}'.format(vm_name))
zaza.configure.masakari.simulate_vm_crash(
guest_pid,
unit_name,
model_name=self.model_name)
logging.info('Waiting for {} to be updated and become active'.format(
vm_name))
zaza.utilities.openstack.wait_for_server_update_and_active(
self.nova_client,
vm_name,
inital_update_time)
new_guest_pid = self.get_guest_qemu_pid(
unit_name,
vm.id,
model_name=self.model_name)
logging.info('{} pid is now {}'.format(vm_name, guest_pid))
assert new_guest_pid and new_guest_pid != guest_pid, (
"Restart failed or never happened")

View File

@@ -167,3 +167,20 @@ def simulate_compute_host_recovery(unit_name, model_name):
'start',
['corosync', 'pacemaker', 'nova-compute'],
model_name)
def simulate_guest_crash(guest_pid, compute_unit_name, model_name):
"""Simulate a guest crashing.
:param guest_pid: PID of running qemu provess for guest.
:type guest_pid: str
:param compute_unit_name: Juju name of hypervisor hosting guest (app/n)
:type compute_unit_name: str
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
pid_kill_cmd = 'kill -9 {}'
zaza.model.run_on_unit(
compute_unit_name,
pid_kill_cmd.format(guest_pid),
model_name=model_name)

Binary file not shown.

View File

@@ -166,3 +166,9 @@ class NovaGuestMigrationFailed(Exception):
"""Nova guest migration failed."""
pass
class NovaGuestRestartFailed(Exception):
"""Nova guest restart failed."""
pass

View File

@@ -42,6 +42,7 @@ from neutronclient.common import exceptions as neutronexceptions
from octaviaclient.api.v2 import octavia as octaviaclient
from swiftclient import client as swiftclient
import datetime
import io
import juju_wait
import logging
@@ -1922,6 +1923,37 @@ def wait_for_server_migration(nova_client, vm_name, original_hypervisor):
current_hypervisor))
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, max=60),
reraise=True, stop=tenacity.stop_after_attempt(80),
retry=tenacity.retry_if_exception_type(
exceptions.NovaGuestRestartFailed))
def wait_for_server_update_and_active(nova_client, vm_name,
original_updatetime):
"""Wait for guests metadata to be updated and for status to become active.
:param nova_client: Authenticated nova client
:type nova_client: novaclient.v2.client.Client
:param vm_name: Name of guest to monitor
:type vm_name: str
:param original_updatetime: The time the metadata was previously updated.
:type original_updatetime: datetime
:raises: exceptions.NovaGuestMigrationFailed
"""
server = nova_client.servers.find(name=vm_name)
current_updatetime = datetime.datetime.strptime(
server.updated,
"%Y-%m-%dT%H:%M:%SZ")
if current_updatetime <= original_updatetime or server.status != 'ACTIVE':
logging.info('{} Updated: {} Satus: {})'.format(
vm_name,
current_updatetime,
server.status))
raise exceptions.NovaGuestRestartFailed(
'Restart of {} after crash failed'.format(vm_name))
else:
logging.info('SUCCESS {} has restarted'.format(vm_name))
def enable_all_nova_services(nova_client):
"""Enable all nova services.