When the nrpe application is not deployed in the model skip the test that checks for the correct configuration of the nagios checks, this is because at the moment nrpe has no support for jammy, hence is not being deployed in the jammy-yoga bundles. Jammy support for nrpe is tracked at http://pad.lv/1968008
526 lines
21 KiB
Python
526 lines
21 KiB
Python
# Copyright 2019 Canonical Ltd.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""RabbitMQ Testing."""
|
|
|
|
import json
|
|
import logging
|
|
import time
|
|
import uuid
|
|
import unittest
|
|
|
|
import juju
|
|
import tenacity
|
|
import zaza.model
|
|
import zaza.openstack.charm_tests.test_utils as test_utils
|
|
import zaza.openstack.utilities.generic as generic_utils
|
|
|
|
from zaza.openstack.utilities.generic import get_series
|
|
from zaza.openstack.utilities.os_versions import CompareHostReleases
|
|
|
|
from . import utils as rmq_utils
|
|
from .utils import RmqNoMessageException
|
|
|
|
|
|
class RmqTests(test_utils.OpenStackBaseTest):
|
|
"""Zaza tests on a basic rabbitmq cluster deployment."""
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
"""Run class setup for running tests."""
|
|
super(RmqTests, cls).setUpClass()
|
|
|
|
def _get_uuid_epoch_stamp(self):
|
|
"""Return a string based on uuid4 and epoch time.
|
|
|
|
Useful in generating test messages which need to be unique-ish.
|
|
"""
|
|
return '[{}-{}]'.format(uuid.uuid4(), time.time())
|
|
|
|
@tenacity.retry(
|
|
reraise=True,
|
|
retry=tenacity.retry_if_exception_type(RmqNoMessageException),
|
|
wait=tenacity.wait_fixed(10),
|
|
stop=tenacity.stop_after_attempt(2))
|
|
def _retry_get_amqp_message(self, check_unit, ssl=None, port=None):
|
|
return rmq_utils.get_amqp_message_by_unit(check_unit,
|
|
ssl=ssl,
|
|
port=port)
|
|
|
|
def _search_for_message(self, amqp_msg, check_unit, ssl, port,
|
|
amqp_msg_counter):
|
|
"""Search for message in message queue.
|
|
|
|
WARNING: This will consume messages until it finds the target message.
|
|
|
|
:param amqp_msg: Message to search for
|
|
:type amqp_msg: string
|
|
:param check_unit: Unit to retrieve messages from
|
|
:type check_unit: juju.unit.Unit
|
|
:param ssl: Whether to use SSL when connecting to rabbit
|
|
:type ssl: bool
|
|
:param port: Port to use when connecting to rabbit
|
|
:type port: Union[int, None]
|
|
:param amqp_msg_counter: Number in test sequence of this message.
|
|
:type amqp_msg: int
|
|
:raises: RmqNoMessageException
|
|
"""
|
|
for i in range(100):
|
|
amqp_msg_rcvd = self._retry_get_amqp_message(
|
|
check_unit,
|
|
ssl=ssl,
|
|
port=port)
|
|
if amqp_msg == amqp_msg_rcvd:
|
|
logging.info(
|
|
'Message {} received OK.'.format(amqp_msg_counter))
|
|
break
|
|
else:
|
|
logging.info('Expected: {}'.format(amqp_msg))
|
|
logging.info('Actual: {}'.format(amqp_msg_rcvd))
|
|
else:
|
|
msg = 'Message {} not found.'.format(amqp_msg_counter)
|
|
raise RmqNoMessageException(msg)
|
|
|
|
def _test_rmq_amqp_messages_all_units(self, units,
|
|
ssl=False, port=None):
|
|
"""Reusable test to send/check amqp messages to every listed rmq unit.
|
|
|
|
Reusable test to send amqp messages to every listed rmq
|
|
unit. Checks every listed rmq unit for messages.
|
|
:param units: list of units
|
|
:returns: None if successful. Raise on error.
|
|
|
|
"""
|
|
# Add test user if it does not already exist
|
|
rmq_utils.add_user(units)
|
|
|
|
# Handle ssl (includes wait-for-cluster)
|
|
if ssl:
|
|
rmq_utils.configure_ssl_on(units, port=port)
|
|
else:
|
|
rmq_utils.configure_ssl_off(units)
|
|
|
|
# Publish and get amqp messages in all possible unit combinations.
|
|
# Qty of checks == qty_of_units * (qty_of_units - 1)
|
|
assert len(units) >= 2, 'Test is useful only with 2 units or more.'
|
|
|
|
amqp_msg_counter = 1
|
|
host_names = generic_utils.get_unit_hostnames(units)
|
|
|
|
for dest_unit in units:
|
|
dest_unit_name = dest_unit.entity_id
|
|
dest_unit_host = zaza.model.get_unit_public_address(dest_unit)
|
|
dest_unit_host_name = host_names[dest_unit_name]
|
|
|
|
for check_unit in units:
|
|
check_unit_name = check_unit.entity_id
|
|
if dest_unit_name == check_unit_name:
|
|
logging.info("Skipping check for this unit to itself.")
|
|
continue
|
|
check_unit_host = zaza.model.get_unit_public_address(
|
|
check_unit)
|
|
check_unit_host_name = host_names[check_unit_name]
|
|
|
|
amqp_msg_stamp = self._get_uuid_epoch_stamp()
|
|
amqp_msg = ('Message {}@{} {}'.format(amqp_msg_counter,
|
|
dest_unit_host,
|
|
amqp_msg_stamp)).upper()
|
|
# Publish amqp message
|
|
logging.info('Publish message to: {} '
|
|
'({} {})'.format(dest_unit_host,
|
|
dest_unit_name,
|
|
dest_unit_host_name))
|
|
|
|
rmq_utils.publish_amqp_message_by_unit(dest_unit,
|
|
amqp_msg, ssl=ssl,
|
|
port=port)
|
|
|
|
# Get amqp message
|
|
logging.info('Get messages from: {} '
|
|
'({} {})'.format(check_unit_host,
|
|
check_unit_name,
|
|
check_unit_host_name))
|
|
|
|
try:
|
|
self._search_for_message(
|
|
amqp_msg,
|
|
check_unit,
|
|
ssl,
|
|
port,
|
|
amqp_msg_counter)
|
|
except RmqNoMessageException:
|
|
msg = 'Failed to retrieve message {}.'.format(
|
|
amqp_msg_counter)
|
|
raise Exception(msg)
|
|
amqp_msg_counter += 1
|
|
|
|
# Delete the test user
|
|
rmq_utils.delete_user(units)
|
|
|
|
def test_400_rmq_cluster_running_nodes(self):
|
|
"""Verify cluster status shows every cluster node as running member."""
|
|
logging.info('Checking that all units are in cluster_status '
|
|
'running nodes...')
|
|
|
|
units = zaza.model.get_units(self.application_name)
|
|
|
|
ret = rmq_utils.validate_cluster_running_nodes(units)
|
|
self.assertIsNone(ret, msg=ret)
|
|
|
|
logging.info('OK')
|
|
|
|
def test_406_rmq_amqp_messages_all_units_ssl_off(self):
|
|
"""Send (and check) amqp messages to every rmq unit.
|
|
|
|
Sends amqp messages to every rmq unit, and check every rmq
|
|
unit for messages. Uses Standard amqp tcp port, no ssl.
|
|
|
|
"""
|
|
logging.info('Checking amqp message publish/get on all units '
|
|
'(ssl off)...')
|
|
|
|
units = zaza.model.get_units(self.application_name)
|
|
self._test_rmq_amqp_messages_all_units(units, ssl=False)
|
|
logging.info('OK')
|
|
|
|
def test_408_rmq_amqp_messages_all_units_ssl_on(self):
|
|
"""Send (and check) amqp messages to every rmq unit (ssl enabled).
|
|
|
|
Sends amqp messages to every rmq unit, and check every rmq
|
|
unit for messages. Uses Standard ssl tcp port.
|
|
|
|
"""
|
|
units = zaza.model.get_units(self.application_name)
|
|
|
|
# http://pad.lv/1625044
|
|
if CompareHostReleases(get_series(units[0])) <= 'trusty':
|
|
logging.info('SKIP')
|
|
logging.info('Skipping SSL tests due to client'
|
|
' compatibility issues')
|
|
return
|
|
logging.info('Checking amqp message publish/get on all units '
|
|
'(ssl on)...')
|
|
|
|
self._test_rmq_amqp_messages_all_units(units,
|
|
ssl=True, port=5671)
|
|
logging.info('OK')
|
|
|
|
@tenacity.retry(
|
|
retry=tenacity.retry_if_result(lambda ret: ret is not None),
|
|
wait=tenacity.wait_fixed(30),
|
|
stop=tenacity.stop_after_attempt(20),
|
|
after=rmq_utils._log_tenacity_retry)
|
|
def _retry_port_knock_units(self, units, port, expect_success=True):
|
|
return generic_utils.port_knock_units(units, port,
|
|
expect_success=expect_success)
|
|
|
|
def test_412_rmq_management_plugin(self):
|
|
"""Enable and check management plugin."""
|
|
logging.info('Checking tcp socket connect to management plugin '
|
|
'port on all rmq units...')
|
|
|
|
units = zaza.model.get_units(self.application_name)
|
|
mgmt_port = 15672
|
|
|
|
# Enable management plugin
|
|
logging.info('Enabling management_plugin charm config option...')
|
|
config = {'management_plugin': 'True'}
|
|
zaza.model.set_application_config('rabbitmq-server', config)
|
|
rmq_utils.wait_for_cluster()
|
|
|
|
# Check tcp connect to management plugin port
|
|
ret = self._retry_port_knock_units(units, mgmt_port)
|
|
|
|
self.assertIsNone(ret, msg=ret)
|
|
logging.info('Connect to all units (OK)')
|
|
|
|
# Disable management plugin
|
|
logging.info('Disabling management_plugin charm config option...')
|
|
config = {'management_plugin': 'False'}
|
|
zaza.model.set_application_config('rabbitmq-server', config)
|
|
rmq_utils.wait_for_cluster()
|
|
|
|
# Negative check - tcp connect to management plugin port
|
|
logging.info('Expect tcp connect fail since charm config '
|
|
'option is disabled.')
|
|
ret = self._retry_port_knock_units(units,
|
|
mgmt_port,
|
|
expect_success=False)
|
|
|
|
self.assertIsNone(ret, msg=ret)
|
|
logging.info('Confirm mgmt port closed on all units (OK)')
|
|
|
|
@tenacity.retry(
|
|
retry=tenacity.retry_if_result(lambda ret: ret is not None),
|
|
# sleep for 2mins to allow 1min cron job to run...
|
|
wait=tenacity.wait_fixed(120),
|
|
stop=tenacity.stop_after_attempt(2))
|
|
def _retry_check_commands_on_units(self, cmds, units):
|
|
return generic_utils.check_commands_on_units(cmds, units)
|
|
|
|
def test_414_rmq_nrpe_monitors(self):
|
|
"""Check rabbimq-server nrpe monitor basic functionality."""
|
|
try:
|
|
zaza.model.get_application("nrpe")
|
|
except KeyError:
|
|
logging.warn(("Skipping as nrpe is not deployed. "
|
|
"http://pad.lv/1968008"))
|
|
return
|
|
|
|
units = zaza.model.get_units(self.application_name)
|
|
host_names = generic_utils.get_unit_hostnames(units)
|
|
|
|
# check_rabbitmq monitor
|
|
logging.info('Checking nrpe check_rabbitmq on units...')
|
|
cmds = ['egrep -oh /usr/local.* /etc/nagios/nrpe.d/'
|
|
'check_rabbitmq.cfg']
|
|
ret = self._retry_check_commands_on_units(cmds, units)
|
|
self.assertIsNone(ret, msg=ret)
|
|
|
|
# check_rabbitmq_queue monitor
|
|
logging.info('Checking nrpe check_rabbitmq_queue on units...')
|
|
cmds = ['egrep -oh /usr/local.* /etc/nagios/nrpe.d/'
|
|
'check_rabbitmq_queue.cfg']
|
|
ret = self._retry_check_commands_on_units(cmds, units)
|
|
self.assertIsNone(ret, msg=ret)
|
|
|
|
# check dat file existence
|
|
logging.info('Checking nrpe dat file existence on units...')
|
|
for u in units:
|
|
unit_host_name = host_names[u.entity_id]
|
|
|
|
cmds = [
|
|
'stat /var/lib/rabbitmq/data/{}_general_stats.dat'.format(
|
|
unit_host_name),
|
|
'stat /var/lib/rabbitmq/data/{}_queue_stats.dat'.format(
|
|
unit_host_name)
|
|
]
|
|
|
|
ret = generic_utils.check_commands_on_units(cmds, [u])
|
|
self.assertIsNone(ret, msg=ret)
|
|
|
|
logging.info('OK')
|
|
|
|
def test_910_pause_and_resume(self):
|
|
"""The services can be paused and resumed."""
|
|
logging.info('Checking pause and resume actions...')
|
|
|
|
logging.info('Waiting for the cluster to be ready')
|
|
rmq_utils.wait_for_cluster()
|
|
unit = zaza.model.get_units(self.application_name)[0]
|
|
assert unit.workload_status == "active"
|
|
|
|
logging.info('Pausing unit {}'.format(unit))
|
|
zaza.model.run_action(unit.entity_id, "pause")
|
|
logging.info('Waiting until unit {} reaches "maintenance" state'
|
|
''.format(unit))
|
|
zaza.model.block_until_unit_wl_status(unit.entity_id, "maintenance")
|
|
unit = zaza.model.get_unit_from_name(unit.entity_id)
|
|
assert unit.workload_status == "maintenance"
|
|
|
|
logging.info('Resuming unit {}'.format(unit))
|
|
zaza.model.run_action(unit.entity_id, "resume")
|
|
logging.info('Waiting until unit {} reaches "active" state'
|
|
''.format(unit))
|
|
zaza.model.block_until_unit_wl_status(unit.entity_id, "active")
|
|
unit = zaza.model.get_unit_from_name(unit.entity_id)
|
|
assert unit.workload_status == "active"
|
|
|
|
rmq_utils.wait_for_cluster()
|
|
logging.info('OK')
|
|
|
|
def test_911_cluster_status(self):
|
|
"""Test rabbitmqctl cluster_status action can be returned."""
|
|
logging.info('Checking cluster status action...')
|
|
|
|
unit = zaza.model.get_units(self.application_name)[0]
|
|
action = zaza.model.run_action(unit.entity_id, "cluster-status")
|
|
self.assertIsInstance(action, juju.action.Action)
|
|
|
|
logging.info('OK')
|
|
|
|
def test_912_check_queues(self):
|
|
"""Test rabbitmqctl check_queues action can be returned."""
|
|
logging.info('Checking cluster status action...')
|
|
|
|
unit = zaza.model.get_units(self.application_name)[0]
|
|
action = zaza.model.run_action(unit.entity_id, "check-queues")
|
|
self.assertIsInstance(action, juju.action.Action)
|
|
|
|
def test_913_list_unconsumed_queues(self):
|
|
"""Test rabbitmqctl list-unconsumed-queues action can be returned."""
|
|
logging.info('Checking list-unconsumed-queues action...')
|
|
|
|
units = zaza.model.get_units(self.application_name)
|
|
self._test_rmq_amqp_messages_all_units(units)
|
|
unit = units[0]
|
|
action = zaza.model.run_action(unit.entity_id,
|
|
'list-unconsumed-queues')
|
|
self.assertIsInstance(action, juju.action.Action)
|
|
|
|
queue_count = int(action.results['unconsumed-queue-count'])
|
|
assert queue_count > 0, 'Did not find any unconsumed queues.'
|
|
|
|
queue_name = 'test' # publish_amqp_message_by_unit default queue name
|
|
for i in range(queue_count):
|
|
queue_data = json.loads(
|
|
action.results['unconsumed-queues'][str(i)])
|
|
if queue_data['name'] == queue_name:
|
|
break
|
|
else:
|
|
assert False, 'Did not find expected queue in result.'
|
|
|
|
# Since we just reused _test_rmq_amqp_messages_all_units, we should
|
|
# have created the queue if it didn't already exist, but all messages
|
|
# should have already been consumed.
|
|
if queue_data['messages'] != 0:
|
|
logging.error(
|
|
'{} has {} remaining messages in {} instead of 0.'.format(
|
|
unit.entity_id, queue_data['messages'],
|
|
queue_data['name']))
|
|
if queue_data['messages'] >= 1:
|
|
logging.error('One message is: {}'.format(
|
|
self._retry_get_amqp_message(unit)))
|
|
assert False, 'Found unexpected message count.'
|
|
|
|
logging.info('OK')
|
|
|
|
@tenacity.retry(
|
|
retry=tenacity.retry_if_result(lambda errors: bool(errors)),
|
|
wait=tenacity.wait_fixed(10),
|
|
stop=tenacity.stop_after_attempt(2))
|
|
def _retry_check_unit_cluster_nodes(self, u, unit_node_names):
|
|
return rmq_utils.check_unit_cluster_nodes(u, unit_node_names)
|
|
|
|
@unittest.skip(
|
|
"Skipping as a significant rework is required, see "
|
|
"https://github.com/openstack-charmers/zaza-openstack-tests/issues/290"
|
|
)
|
|
def test_921_remove_and_add_unit(self):
|
|
"""Test if unit cleans up when removed from Rmq cluster.
|
|
|
|
Test if a unit correctly cleans up by removing itself from the
|
|
RabbitMQ cluster on removal.
|
|
|
|
Add the unit back to the cluster at the end of the test case to
|
|
avoid side-effects.
|
|
|
|
"""
|
|
logging.info('Checking that units correctly clean up after '
|
|
'themselves on unit removal...')
|
|
config = {'min-cluster-size': '2'}
|
|
zaza.model.set_application_config('rabbitmq-server', config)
|
|
rmq_utils.wait_for_cluster()
|
|
|
|
all_units = zaza.model.get_units(self.application_name)
|
|
removed_unit = all_units[-1]
|
|
left_units = all_units[:-1]
|
|
|
|
logging.info('Simulating unit {} removal'.format(removed_unit))
|
|
zaza.model.run_on_unit(removed_unit.entity_id, 'hooks/stop')
|
|
logging.info('Waiting until unit {} reaches "waiting" state'
|
|
''.format(removed_unit))
|
|
zaza.model.block_until_unit_wl_status(removed_unit.entity_id,
|
|
"waiting")
|
|
|
|
def check_units(units):
|
|
unit_host_names = generic_utils.get_unit_hostnames(units)
|
|
unit_node_names = []
|
|
for unit in unit_host_names:
|
|
unit_node_names.append('rabbit@{}'.format(
|
|
unit_host_names[unit]))
|
|
errors = []
|
|
|
|
for u in units:
|
|
e = self._retry_check_unit_cluster_nodes(u,
|
|
unit_node_names)
|
|
if e:
|
|
errors.append(e)
|
|
|
|
self.assertFalse(errors, msg=errors)
|
|
|
|
logging.info('Checking that all units except for {} are present'
|
|
'in the cluster'.format(removed_unit))
|
|
check_units(left_units)
|
|
|
|
logging.info('Re-adding the removed unit {} back to the cluster'
|
|
'by simulating the upgrade-charm event'
|
|
''.format(removed_unit))
|
|
# TODO(dmitriis): Fix the rabbitmq charm to add a proper way to add a
|
|
# unit back to the cluster and replace this.
|
|
zaza.model.run_on_unit(removed_unit.entity_id, 'hooks/upgrade-charm')
|
|
logging.info('Waiting until unit {} reaches "active" state'
|
|
''.format(removed_unit))
|
|
zaza.model.block_until_unit_wl_status(removed_unit.entity_id,
|
|
"active")
|
|
logging.info('Checking that all units are present in the cluster')
|
|
check_units(all_units)
|
|
|
|
logging.info('OK')
|
|
|
|
|
|
class RabbitMQDeferredRestartTest(test_utils.BaseDeferredRestartTest):
|
|
"""Deferred restart tests."""
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
"""Run setup for deferred restart tests."""
|
|
super().setUpClass(application_name='rabbitmq-server')
|
|
|
|
def check_status_message_is_clear(self):
|
|
"""Check each units status message show no defeerred events."""
|
|
pattern = '(Unit is ready|Unit is ready and clustered)$'
|
|
for unit in zaza.model.get_units(self.application_name):
|
|
zaza.model.block_until_unit_wl_message_match(
|
|
unit.entity_id,
|
|
pattern)
|
|
zaza.model.block_until_all_units_idle()
|
|
|
|
def get_new_config(self):
|
|
"""Return the config key and new value to trigger a hook execution.
|
|
|
|
:returns: Config key and new value
|
|
:rtype: (str, bool)
|
|
"""
|
|
app_config = zaza.model.get_application_config(self.application_name)
|
|
new_value = str(int(
|
|
app_config['connection-backlog'].get('value', 100) + 1))
|
|
return 'connection-backlog', new_value
|
|
|
|
def run_tests(self):
|
|
"""Run deferred restart tests."""
|
|
# Trigger a config change which triggers a deferred hook.
|
|
self.run_charm_change_hook_test('config-changed')
|
|
|
|
# Trigger a package change which requires a restart
|
|
self.run_package_change_test(
|
|
'rabbitmq-server',
|
|
'rabbitmq-server')
|
|
|
|
def check_clear_restarts(self):
|
|
"""Clear and deferred restarts and check status.
|
|
|
|
Clear and deferred restarts and then check the workload status message
|
|
for each unit.
|
|
"""
|
|
# Use action to run any deferred restarts
|
|
for unit in zaza.model.get_units(self.application_name):
|
|
zaza.model.run_action(
|
|
unit.entity_id,
|
|
'restart-services',
|
|
action_params={'services': 'rabbitmq-server'})
|
|
|
|
# Check workload status no longer shows deferred restarts.
|
|
self.check_status_message_is_clear()
|