Files
zaza-openstack-tests/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
Corey Bryant 6d53e6d758 Increase wait attempts for volume availability
Increase wait attempts for availability of volume created from image
in CephRBDMirrorTest. This is taking longer as of Nautilus due to
switch to using juju storage backed by undercloud cinder taking
longer than prior method of directory-backed OSD devices.
2019-11-06 20:59:48 -05:00

351 lines
14 KiB
Python

# Copyright 2019 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Encapsulate ``ceph-rbd-mirror`` testing."""
import json
import logging
import re
import zaza.openstack.charm_tests.test_utils as test_utils
import zaza.model
import zaza.openstack.utilities.ceph
import zaza.openstack.utilities.openstack as openstack
from zaza.openstack.charm_tests.glance.setup import LTS_IMAGE_NAME
class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
"""Base class for ``ceph-rbd-mirror`` tests."""
@classmethod
def setUpClass(cls):
"""Run setup for ``ceph-rbd-mirror`` tests."""
super().setUpClass()
# get ready for multi-model Zaza
cls.site_a_model = cls.site_b_model = zaza.model.get_juju_model()
cls.site_b_app_suffix = '-b'
def run_status_action(self, application_name=None, model_name=None):
"""Run status action, decode and return response."""
result = zaza.model.run_action_on_leader(
application_name or self.application_name,
'status',
model_name=model_name,
action_params={
'verbose': True,
'format': 'json',
})
return json.loads(result.results['output'])
def get_pools(self):
"""Retrieve list of pools from both sites.
:returns: Tuple with list of pools on each side.
:rtype: tuple
"""
site_a_pools = zaza.openstack.utilities.ceph.get_ceph_pools(
zaza.model.get_lead_unit_name(
'ceph-mon', model_name=self.site_a_model),
model_name=self.site_a_model)
site_b_pools = zaza.openstack.utilities.ceph.get_ceph_pools(
zaza.model.get_lead_unit_name(
'ceph-mon' + self.site_b_app_suffix,
model_name=self.site_b_model),
model_name=self.site_b_model)
return sorted(site_a_pools.keys()), sorted(site_b_pools.keys())
def wait_for_mirror_state(self, state, application_name=None,
model_name=None,
check_entries_behind_master=False,
require_images_in=[]):
"""Wait until all images reach requested state.
This function runs the ``status`` action and examines the data it
returns.
:param state: State to expect all images to be in
:type state: str
:param application_name: Application to run action on
:type application_name: str
:param model_name: Model to run in
:type model_name: str
:param check_entries_behind_master: Wait for ``entries_behind_master``
to become '0'. Only makes sense
when used with state
``up+replying``.
:type check_entries_behind_master: bool
:param require_images_in: List of pools to require images in
:type require_images_in: list of str
:returns: True on success, never returns on failure
"""
rep = re.compile(r'.*entries_behind_master=(\d+)')
while True:
try:
# encapsulate in try except to work around LP: #1820976
pool_status = self.run_status_action(
application_name=application_name, model_name=model_name)
except KeyError:
continue
for pool, status in pool_status.items():
images = status.get('images', [])
if not len(images) and pool in require_images_in:
break
for image in images:
if image['state'] and image['state'] != state:
break
if check_entries_behind_master:
m = rep.match(image['description'])
# NOTE(fnordahl): Tactical fix for upstream Ceph
# Luminous bug https://tracker.ceph.com/issues/23516
if m and int(m.group(1)) > 42:
logging.info('entries_behind_master={}'
.format(m.group(1)))
break
else:
# not found here, check next pool
continue
# found here, pass on to outer loop
break
else:
# all images with state has expected state
return True
class CephRBDMirrorTest(CephRBDMirrorBase):
"""Encapsulate ``ceph-rbd-mirror`` tests."""
def test_pause_resume(self):
"""Run pause and resume tests."""
self.pause_resume(['rbd-mirror'])
def test_pool_broker_synced(self):
"""Validate that pools created with broker protocol are synced.
The functional test bundle includes the ``cinder``, ``cinder-ceph`` and
``glance`` charms. The ``cinder-ceph`` and ``glance`` charms will
create pools using the ceph charms broker protocol at deploy time.
"""
site_a_pools, site_b_pools = self.get_pools()
self.assertEqual(site_a_pools, site_b_pools)
def test_pool_manual_synced(self):
"""Validate that manually created pools are synced after refresh.
The ``ceph-rbd-mirror`` charm does not get notified when the operator
creates a pool manually without using the ceph charms broker protocol.
To alleviate this the charm has a ``refresh-pools`` action the operator
can call to have it discover such pools. Validate its operation.
"""
# use action on ceph-mon to create a pool directly in the Ceph cluster
# without using the broker protocol
zaza.model.run_action_on_leader(
'ceph-mon',
'create-pool',
model_name=self.site_a_model,
action_params={
'name': 'zaza',
'app-name': 'rbd',
})
# tell ceph-rbd-mirror unit on site_a to refresh list of pools
zaza.model.run_action_on_leader(
'ceph-rbd-mirror',
'refresh-pools',
model_name=self.site_a_model,
action_params={
})
# wait for execution to start
zaza.model.wait_for_agent_status(model_name=self.site_a_model)
zaza.model.wait_for_agent_status(model_name=self.site_b_model)
# wait for execution to finish
zaza.model.wait_for_application_states(model_name=self.site_a_model)
zaza.model.wait_for_application_states(model_name=self.site_b_model)
# make sure everything is idle before we test
zaza.model.block_until_all_units_idle(model_name=self.site_a_model)
zaza.model.block_until_all_units_idle(model_name=self.site_b_model)
# validate result
site_a_pools, site_b_pools = self.get_pools()
self.assertEqual(site_a_pools, site_b_pools)
def test_cinder_volume_mirrored(self):
"""Validate that a volume created through Cinder is mirrored.
For RBD Mirroring to work clients must enable the correct set of
features when creating images.
The RBD image feature settings are announced by the ``ceph-mon`` charm
over the client relation when it has units related on its
``rbd-mirror`` endpoint.
By creating a volume through cinder on site A, checking for presence on
site B and subsequently comparing the contents we get a full end to end
test.
"""
session = openstack.get_overcloud_keystone_session()
glance = openstack.get_glance_session_client(session)
cinder = openstack.get_cinder_session_client(session)
image = next(glance.images.list(name=LTS_IMAGE_NAME))
# NOTE(fnordahl): for some reason create volume from image often fails
# when run just after deployment is finished. We should figure out
# why, resolve the underlying issue and then remove this.
#
# We do not use tenacity here as it will interfere with tenacity used
# in ``resource_reaches_status``
def create_volume_from_image(cinder, image, retry=20):
if retry < 1:
return
volume = cinder.volumes.create(8, name='zaza', imageRef=image.id)
try:
# Note(coreycb): stop_after_attempt is increased because using
# juju storage for ceph-osd backed by cinder on undercloud
# takes longer than the prior method of directory-backed OSD
# devices.
openstack.resource_reaches_status(
cinder.volumes, volume.id, msg='volume',
stop_after_attempt=20)
return volume
except AssertionError:
logging.info('retrying')
volume.delete()
return create_volume_from_image(cinder, image, retry=retry - 1)
volume = create_volume_from_image(cinder, image)
site_a_hash = zaza.openstack.utilities.ceph.get_rbd_hash(
zaza.model.get_lead_unit_name('ceph-mon',
model_name=self.site_a_model),
'cinder-ceph',
'volume-{}'.format(volume.id),
model_name=self.site_a_model)
self.wait_for_mirror_state(
'up+replaying',
check_entries_behind_master=True,
application_name=self.application_name + self.site_b_app_suffix,
model_name=self.site_b_model)
site_b_hash = zaza.openstack.utilities.ceph.get_rbd_hash(
zaza.model.get_lead_unit_name('ceph-mon' + self.site_b_app_suffix,
model_name=self.site_b_model),
'cinder-ceph',
'volume-{}'.format(volume.id),
model_name=self.site_b_model)
logging.info(site_a_hash)
logging.info(site_b_hash)
self.assertEqual(site_a_hash, site_b_hash)
class CephRBDMirrorControlledFailoverTest(CephRBDMirrorBase):
"""Encapsulate ``ceph-rbd-mirror`` controlled failover tests."""
def test_fail_over_fall_back(self):
"""Validate controlled fail over and fall back."""
site_a_pools, site_b_pools = self.get_pools()
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror',
'demote',
model_name=self.site_a_model,
action_params={})
logging.info(result.results)
n_pools_demoted = len(result.results['output'].split('\n'))
self.assertEqual(len(site_a_pools), n_pools_demoted)
self.wait_for_mirror_state('up+unknown', model_name=self.site_a_model)
self.wait_for_mirror_state(
'up+unknown',
application_name=self.application_name + self.site_b_app_suffix,
model_name=self.site_b_model)
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror' + self.site_b_app_suffix,
'promote',
model_name=self.site_b_model,
action_params={})
logging.info(result.results)
n_pools_promoted = len(result.results['output'].split('\n'))
self.assertEqual(len(site_b_pools), n_pools_promoted)
self.wait_for_mirror_state(
'up+replaying',
model_name=self.site_a_model)
self.wait_for_mirror_state(
'up+stopped',
application_name=self.application_name + self.site_b_app_suffix,
model_name=self.site_b_model)
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror' + self.site_b_app_suffix,
'demote',
model_name=self.site_b_model,
action_params={
})
logging.info(result.results)
n_pools_demoted = len(result.results['output'].split('\n'))
self.assertEqual(len(site_a_pools), n_pools_demoted)
self.wait_for_mirror_state(
'up+unknown',
model_name=self.site_a_model)
self.wait_for_mirror_state(
'up+unknown',
application_name=self.application_name + self.site_b_app_suffix,
model_name=self.site_b_model)
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror',
'promote',
model_name=self.site_a_model,
action_params={
})
logging.info(result.results)
n_pools_promoted = len(result.results['output'].split('\n'))
self.assertEqual(len(site_b_pools), n_pools_promoted)
self.wait_for_mirror_state(
'up+stopped',
model_name=self.site_a_model)
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror' + self.site_b_app_suffix,
'resync-pools',
model_name=self.site_b_model,
action_params={
'i-really-mean-it': True,
})
logging.info(result.results)
self.wait_for_mirror_state(
'up+replaying',
application_name=self.application_name + self.site_b_app_suffix,
model_name=self.site_b_model,
require_images_in=['cinder-ceph', 'glance'])
class CephRBDMirrorDisasterFailoverTest(CephRBDMirrorBase):
"""Encapsulate ``ceph-rbd-mirror`` destructive tests."""
def test_kill_site_a_fail_over(self):
"""Validate fail over after uncontrolled shutdown of primary."""
for application in 'ceph-rbd-mirror', 'ceph-mon', 'ceph-osd':
zaza.model.remove_application(
application,
model_name=self.site_a_model,
forcefully_remove_machines=True)
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror' + self.site_b_app_suffix,
'promote',
model_name=self.site_b_model,
action_params={
})
self.assertEqual(result.status, 'failed')
result = zaza.model.run_action_on_leader(
'ceph-rbd-mirror' + self.site_b_app_suffix,
'promote',
model_name=self.site_b_model,
action_params={
'force': True,
})
self.assertEqual(result.status, 'completed')