Files
zaza-openstack-tests/zaza/openstack/configure/masakari.py
2019-04-25 17:46:03 +02:00

229 lines
8.0 KiB
Python

# Copyright 2019 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Configure and manage masakari.
Functions for managing masakari resources and simulating compute node loss
and recovery.
"""
import logging
import zaza.model
import zaza.openstack.utilities.openstack as openstack_utils
ROUND_ROBIN = 'round-robin'
def roundrobin_assign_hosts_to_segments(nova_client, masakari_client):
"""Assign hypervisors to segments in a round-robin fashion.
:param nova_client: Authenticated nova client
:type nova_client: novaclient.Client
:param masakari_client: Authenticated masakari client
:type masakari_client: openstack.instance_ha.v1._proxy.Proxy
"""
hypervisors = nova_client.hypervisors.list()
segment_ids = [s.uuid for s in masakari_client.segments()]
segment_ids = segment_ids * len(hypervisors)
for hypervisor in hypervisors:
target_segment = segment_ids.pop()
hostname = hypervisor.hypervisor_hostname.split('.')[0]
logging.info('Adding {} to segment {}'.format(hostname,
target_segment))
masakari_client.create_host(
name=hostname,
segment_id=target_segment,
recovery_method='auto',
control_attributes='SSH',
type='COMPUTE')
HOST_ASSIGNMENT_METHODS = {
ROUND_ROBIN: roundrobin_assign_hosts_to_segments
}
def create_segments(segment_number=1, host_assignment_method=None):
"""Create a masakari segment and populate it with hypervisors.
:param segment_number: Number of segments to create
:type segment_number: int
:param host_assignment_method: Method to use to assign hypervisors to
segments
:type host_assignment_method: f()
"""
host_assignment_method = host_assignment_method or ROUND_ROBIN
keystone_session = openstack_utils.get_overcloud_keystone_session()
nova_client = openstack_utils.get_nova_session_client(keystone_session)
masakari_client = openstack_utils.get_masakari_session_client(
keystone_session)
for segment_number in range(0, segment_number):
segment_name = 'seg{}'.format(segment_number)
logging.info('Creating segment {}'.format(segment_name))
masakari_client.create_segment(
name=segment_name,
recovery_method='auto',
service_type='COMPUTE')
HOST_ASSIGNMENT_METHODS[host_assignment_method](
nova_client,
masakari_client)
def enable_hosts(masakari_client=None):
"""Enable all hypervisors within masakari.
Enable all hosts across all segments within masakari. This does not
enable the hypervisor from a nova POV.
:param masakari_client: Authenticated masakari client
:type masakari_client: openstack.instance_ha.v1._proxy.Proxy
"""
if not masakari_client:
keystone_session = openstack_utils.get_overcloud_keystone_session()
masakari_client = openstack_utils.get_masakari_session_client(
keystone_session)
for segment in masakari_client.segments():
for host in masakari_client.hosts(segment_id=segment.uuid):
if host.on_maintenance:
logging.info("Removing maintenance mode from masakari "
"host {}".format(host.uuid))
masakari_client.update_host(
host.uuid,
segment_id=segment.uuid,
**{'on_maintenance': False})
def _svc_control(unit_name, action, services, model_name):
"""Enable/Disable services on a unit.
This is a simplistic method for controlling services, hence its private.
:param unit_name: Juju name of unit (app/n)
:type unit_name: str
:param action: systemctl action to perform on unit (start/stop etc)
:type action: str
:param services: List of services to perform action against
:type services: []
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
logging.info('{} {} on {}'.format(action.title(), services, unit_name))
cmds = []
for svc in services:
cmds.append("systemctl {} {}".format(action, svc))
zaza.model.run_on_unit(
unit_name, command=';'.join(cmds),
model_name=model_name)
def _svc_set_systemd_restart_mode(unit_name, service_name, mode, model_name):
"""Update the restart mode of the given systemd service.
:param unit_name: Juju name of unit (app/n)
:type unit_name: str
:param service_name: Name of systemd service to update
:type service_name: str
:param mode: Restart mode to switch to eg 'no', 'on-success', 'on-failure',
'on-abort' or 'always'
:type mode: str
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
# Restart options include: no, on-success, on-failure, on-abort or always
logging.info('Setting systemd restart mode for {} to {}'.format(
service_name,
mode))
cmds = [
("sed -i -e 's/^Restart=.*/Restart={}/g' "
"/lib/systemd/system/{}.service'").format(mode, service_name),
'systemctl daemon-reload']
logging.info('Running {} on {}'.format(cmds, unit_name))
zaza.model.run_on_unit(
unit_name, command=';'.join(cmds),
model_name=model_name)
def simulate_compute_host_failure(unit_name, model_name):
"""Simulate compute node failure from a masakari and nova POV.
Masakari uses corosync/pacemaker to detect failure and nova check
nova-compute. Shutting down these services causes masakari and nova to
mark them as down.
:param unit_name: Juju name of unit (app/n)
:type unit_name: str
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
logging.info('Simulating failure of compute node {}'.format(unit_name))
_svc_set_systemd_restart_mode(
unit_name,
'pacemaker_remote',
'no',
model_name)
_svc_control(
unit_name,
'stop',
['corosync', 'nova-compute'],
model_name)
logging.info('Sending pacemaker_remoted a SIGTERM')
zaza.model.run_on_unit(
unit_name,
'pkill -9 -f /usr/sbin/pacemaker_remoted',
model_name=model_name)
def simulate_compute_host_recovery(unit_name, model_name):
"""Simulate compute node recovery from a masakari and nova POV.
Masakari uses corosync/pacemaker to detect failure and nova check
nova-compute. Starting these services is a prerequisite to marking
them as recovered.
:param unit_name: Juju name of unit (app/n)
:type unit_name: str
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
logging.info('Simulating recovery of compute node {}'.format(unit_name))
_svc_set_systemd_restart_mode(
unit_name,
'pacemaker_remote',
'on-failure',
model_name)
_svc_control(
unit_name,
'start',
['corosync', 'pacemaker_remote', 'nova-compute'],
model_name)
def simulate_guest_crash(guest_pid, compute_unit_name, model_name):
"""Simulate a guest crashing.
:param guest_pid: PID of running qemu provess for guest.
:type guest_pid: str
:param compute_unit_name: Juju name of hypervisor hosting guest (app/n)
:type compute_unit_name: str
:param model_name: Name of model unit_name resides in.
:type model_name: str
"""
pid_kill_cmd = 'kill -9 {}'
zaza.model.run_on_unit(
compute_unit_name,
pid_kill_cmd.format(guest_pid),
model_name=model_name)