From d41f8b37240b794442c873327696ffe2ed9d4fbb Mon Sep 17 00:00:00 2001
From: Ionut Balutoiu <ibalutoiu@cloudbasesolutions.com>
Date: Wed, 20 Jan 2021 20:10:34 +0200
Subject: [PATCH 1/2] Update Ceph RBD Mirror tests

The updated tests add the possibility of testing deployments
with `image` RBD mirroring mode implemented as part of the
Cinder Ceph Replication charm spec.
---
 .../charm_tests/ceph/rbd_mirror/tests.py      | 308 ++++++++++++++----
 1 file changed, 242 insertions(+), 66 deletions(-)

diff --git a/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py b/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
index 6c2fa1b..75a3f42 100644
--- a/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
+++ b/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
@@ -16,6 +16,9 @@
 import json
 import logging
 import re
+import time
+
+import cinderclient.exceptions as cinder_exceptions
 
 import zaza.openstack.charm_tests.test_utils as test_utils
 
@@ -39,16 +42,20 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
         cls.site_a_model = cls.site_b_model = zaza.model.get_juju_model()
         cls.site_b_app_suffix = '-b'
 
-    def run_status_action(self, application_name=None, model_name=None):
+    def run_status_action(self, application_name=None, model_name=None,
+                          pools=[]):
         """Run status action, decode and return response."""
+        action_params = {
+            'verbose': True,
+            'format': 'json',
+        }
+        if len(pools) > 0:
+            action_params['pools'] = ','.join(pools)
         result = zaza.model.run_action_on_leader(
             application_name or self.application_name,
             'status',
             model_name=model_name,
-            action_params={
-                'verbose': True,
-                'format': 'json',
-            })
+            action_params=action_params)
         return json.loads(result.results['output'])
 
     def get_pools(self):
@@ -71,7 +78,8 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
     def wait_for_mirror_state(self, state, application_name=None,
                               model_name=None,
                               check_entries_behind_master=False,
-                              require_images_in=[]):
+                              require_images_in=[],
+                              pools=[]):
         """Wait until all images reach requested state.
 
         This function runs the ``status`` action and examines the data it
@@ -90,6 +98,9 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
         :type check_entries_behind_master: bool
         :param require_images_in: List of pools to require images in
         :type require_images_in: list of str
+        :param pools: List of pools to run status on. If this is empty, the
+                      status action will run on all the pools.
+        :type pools: list of str
         :returns: True on success, never returns on failure
         """
         rep = re.compile(r'.*entries_behind_master=(\d+)')
@@ -97,7 +108,8 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
             try:
                 # encapsulate in try except to work around LP: #1820976
                 pool_status = self.run_status_action(
-                    application_name=application_name, model_name=model_name)
+                    application_name=application_name, model_name=model_name,
+                    pools=pools)
             except KeyError:
                 continue
             for pool, status in pool_status.items():
@@ -124,6 +136,119 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
                 # all images with state has expected state
                 return True
 
+    def get_cinder_rbd_mirroring_mode(self,
+                                      cinder_ceph_app_name='cinder-ceph'):
+        """Get the RBD mirroring mode for the Cinder Ceph pool.
+
+        :returns: A string representing the RBD mirroring mode. It can be
+                  either 'pool' or 'image'.
+        """
+        DEFAULT_RBD_MIRRORING_MODE = 'pool'
+
+        rbd_mirroring_mode_config = zaza.model.get_application_config(
+            cinder_ceph_app_name).get('rbd-mirroring-mode')
+        if rbd_mirroring_mode_config:
+            rbd_mirroring_mode = rbd_mirroring_mode_config.get(
+                'value', DEFAULT_RBD_MIRRORING_MODE).lower()
+        else:
+            rbd_mirroring_mode = DEFAULT_RBD_MIRRORING_MODE
+
+        return rbd_mirroring_mode
+
+    def create_cinder_volume(self, session, from_image=False):
+        """Create Cinder Volume from image.
+
+        :rtype: :class:`Volume`.
+        """
+        def get_glance_image(session):
+            glance = openstack.get_glance_session_client(session)
+            images = openstack.get_images_by_name(glance, CIRROS_IMAGE_NAME)
+            if images:
+                return images[0]
+            logging.info("Failed to find {} image, falling back to {}".format(
+                CIRROS_IMAGE_NAME,
+                LTS_IMAGE_NAME))
+            return openstack.get_images_by_name(glance, LTS_IMAGE_NAME)[0]
+
+        def create_volume_type(cinder):
+            try:
+                vol_type = cinder.volume_types.find(name='repl')
+            except cinder_exceptions.NotFound:
+                vol_type = cinder.volume_types.create('repl')
+                vol_type.set_keys(metadata={
+                    'volume_backend_name': 'cinder-ceph',
+                    'replication_enabled': '<is> True',
+                })
+            return vol_type
+
+        # NOTE(fnordahl): for some reason create volume from image often fails
+        # when run just after deployment is finished.  We should figure out
+        # why, resolve the underlying issue and then remove this.
+        #
+        # We do not use tenacity here as it will interfere with tenacity used
+        # in ``resource_reaches_status``
+        def create_volume(cinder, volume_params, retry=20):
+            if retry < 1:
+                return
+            volume = cinder.volumes.create(**volume_params)
+            try:
+                # Note(coreycb): stop_after_attempt is increased because using
+                # juju storage for ceph-osd backed by cinder on undercloud
+                # takes longer than the prior method of directory-backed OSD
+                # devices.
+                openstack.resource_reaches_status(
+                    cinder.volumes, volume.id, msg='volume',
+                    stop_after_attempt=20)
+                return volume
+            except AssertionError:
+                logging.info('retrying')
+                volume.delete()
+                return create_volume(cinder, volume_params, retry=retry - 1)
+
+        volume_params = {
+            'size': 8,
+            'name': 'zaza',
+        }
+        if from_image:
+            volume_params['imageRef'] = get_glance_image(session).id
+        cinder = openstack.get_cinder_session_client(session)
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            volume_params['volume_type'] = create_volume_type(cinder).id
+
+        return create_volume(cinder, volume_params)
+
+    def failover_cinder_volume_host(self, cinder_client,
+                                    backend_name='cinder-ceph',
+                                    target_backend_id='ceph',
+                                    target_status='disabled',
+                                    target_replication_status='failed-over',
+                                    timeout=300):
+        """Failover Cinder volume host."""
+        host = 'cinder@{}'.format(backend_name)
+        logging.info(
+            'Failover Cinder host %s to backend_id %s',
+            host, target_backend_id)
+        cinder_client.services.failover_host(
+            host=host,
+            backend_id=target_backend_id)
+        start = time.time()
+        while True:
+            elapsed = time.time() - start
+            if elapsed > timeout:
+                raise cinder_exceptions.TimeoutException(
+                    obj=cinder_client.services,
+                    action='failover_host')
+            service = cinder_client.services.list(
+                host=host,
+                binary='cinder-volume')[0]
+            if (service.status == target_status and
+                    service.replication_status == target_replication_status):
+                break
+            time.sleep(5)
+        logging.info(
+            'Successfully failed-over Cinder host %s to backend_id %s',
+            host, target_backend_id)
+
 
 class CephRBDMirrorTest(CephRBDMirrorBase):
     """Encapsulate ``ceph-rbd-mirror`` tests."""
@@ -196,43 +321,7 @@ class CephRBDMirrorTest(CephRBDMirrorBase):
         test.
         """
         session = openstack.get_overcloud_keystone_session()
-        glance = openstack.get_glance_session_client(session)
-        cinder = openstack.get_cinder_session_client(session)
-
-        images = openstack.get_images_by_name(glance, CIRROS_IMAGE_NAME)
-        if images:
-            image = images[0]
-        else:
-            logging.info("Failed to find {} image, falling back to {}".format(
-                CIRROS_IMAGE_NAME,
-                LTS_IMAGE_NAME))
-            image = openstack.get_images_by_name(glance, LTS_IMAGE_NAME)[0]
-
-        # NOTE(fnordahl): for some reason create volume from image often fails
-        # when run just after deployment is finished.  We should figure out
-        # why, resolve the underlying issue and then remove this.
-        #
-        # We do not use tenacity here as it will interfere with tenacity used
-        # in ``resource_reaches_status``
-        def create_volume_from_image(cinder, image, retry=20):
-            if retry < 1:
-                return
-            volume = cinder.volumes.create(8, name='zaza', imageRef=image.id)
-            try:
-                # Note(coreycb): stop_after_attempt is increased because using
-                # juju storage for ceph-osd backed by cinder on undercloud
-                # takes longer than the prior method of directory-backed OSD
-                # devices.
-                openstack.resource_reaches_status(
-                    cinder.volumes, volume.id, msg='volume',
-                    stop_after_attempt=20)
-                return volume
-            except AssertionError:
-                logging.info('retrying')
-                volume.delete()
-                return create_volume_from_image(cinder, image, retry=retry - 1)
-        volume = create_volume_from_image(cinder, image)
-
+        volume = self.create_cinder_volume(session, from_image=True)
         site_a_hash = zaza.openstack.utilities.ceph.get_rbd_hash(
             zaza.model.get_lead_unit_name('ceph-mon',
                                           model_name=self.site_a_model),
@@ -258,85 +347,170 @@ class CephRBDMirrorTest(CephRBDMirrorBase):
 class CephRBDMirrorControlledFailoverTest(CephRBDMirrorBase):
     """Encapsulate ``ceph-rbd-mirror`` controlled failover tests."""
 
+    def cinder_fail_over_fall_back(self):
+        """Validate controlled fail over and fall back via the Cinder API."""
+        session = openstack.get_overcloud_keystone_session()
+        cinder = openstack.get_cinder_session_client(session)
+        volume = self.create_cinder_volume(session, from_image=True)
+        self.wait_for_mirror_state(
+            'up+replaying',
+            check_entries_behind_master=True,
+            application_name=self.application_name + self.site_b_app_suffix,
+            model_name=self.site_b_model,
+            pools=['cinder-ceph'])
+        self.failover_cinder_volume_host(
+            cinder_client=cinder)
+        self.assertEqual(cinder.volumes.get(volume.id).status, 'available')
+        self.failover_cinder_volume_host(
+            cinder_client=cinder,
+            target_backend_id='default',
+            target_status='enabled',
+            target_replication_status='enabled')
+        self.assertEqual(cinder.volumes.get(volume.id).status, 'available')
+
     def test_fail_over_fall_back(self):
         """Validate controlled fail over and fall back."""
         site_a_pools, site_b_pools = self.get_pools()
+        site_a_action_params = {}
+        site_b_action_params = {}
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            site_a_pools.remove('cinder-ceph')
+            site_a_action_params['pools'] = ','.join(site_a_pools)
+            site_b_pools.remove('cinder-ceph')
+            site_b_action_params['pools'] = ','.join(site_b_pools)
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror',
             'demote',
             model_name=self.site_a_model,
-            action_params={})
+            action_params=site_a_action_params)
         logging.info(result.results)
         n_pools_demoted = len(result.results['output'].split('\n'))
         self.assertEqual(len(site_a_pools), n_pools_demoted)
-        self.wait_for_mirror_state('up+unknown', model_name=self.site_a_model)
+        self.wait_for_mirror_state(
+            'up+unknown',
+            model_name=self.site_a_model,
+            pools=site_a_pools)
         self.wait_for_mirror_state(
             'up+unknown',
             application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model)
+            model_name=self.site_b_model,
+            pools=site_b_pools)
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror' + self.site_b_app_suffix,
             'promote',
             model_name=self.site_b_model,
-            action_params={})
+            action_params=site_b_action_params)
         logging.info(result.results)
         n_pools_promoted = len(result.results['output'].split('\n'))
         self.assertEqual(len(site_b_pools), n_pools_promoted)
         self.wait_for_mirror_state(
             'up+replaying',
-            model_name=self.site_a_model)
+            model_name=self.site_a_model,
+            pools=site_a_pools)
         self.wait_for_mirror_state(
             'up+stopped',
             application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model)
+            model_name=self.site_b_model,
+            pools=site_b_pools)
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror' + self.site_b_app_suffix,
             'demote',
             model_name=self.site_b_model,
-            action_params={
-            })
+            action_params=site_b_action_params)
         logging.info(result.results)
         n_pools_demoted = len(result.results['output'].split('\n'))
         self.assertEqual(len(site_a_pools), n_pools_demoted)
         self.wait_for_mirror_state(
             'up+unknown',
-            model_name=self.site_a_model)
+            model_name=self.site_a_model,
+            pools=site_a_pools)
         self.wait_for_mirror_state(
             'up+unknown',
             application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model)
+            model_name=self.site_b_model,
+            pools=site_b_pools)
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror',
             'promote',
             model_name=self.site_a_model,
-            action_params={
-            })
+            action_params=site_a_action_params)
         logging.info(result.results)
         n_pools_promoted = len(result.results['output'].split('\n'))
         self.assertEqual(len(site_b_pools), n_pools_promoted)
         self.wait_for_mirror_state(
             'up+stopped',
-            model_name=self.site_a_model)
+            model_name=self.site_a_model,
+            pools=site_a_pools)
+        action_params = {
+            'i-really-mean-it': True,
+        }
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            action_params['pools'] = site_b_action_params['pools']
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror' + self.site_b_app_suffix,
             'resync-pools',
             model_name=self.site_b_model,
-            action_params={
-                'i-really-mean-it': True,
-            })
+            action_params=action_params)
         logging.info(result.results)
         self.wait_for_mirror_state(
             'up+replaying',
             application_name=self.application_name + self.site_b_app_suffix,
             model_name=self.site_b_model,
-            require_images_in=['cinder-ceph', 'glance'])
+            require_images_in=['cinder-ceph', 'glance'],
+            pools=site_a_pools)
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            self.cinder_fail_over_fall_back()
 
 
 class CephRBDMirrorDisasterFailoverTest(CephRBDMirrorBase):
     """Encapsulate ``ceph-rbd-mirror`` destructive tests."""
 
+    def forced_failover_cinder_volume_host(self, cinder_client):
+        """Validate forced Cinder volume host fail over."""
+        def apply_cinder_workaround():
+            """Set minimal timeouts / retries to the Cinder Ceph backend.
+
+            This is needed because the failover via Cinder will try to do a
+            demotion of the site-a, and with the default timeouts / retries,
+            the operation takes an unreasonably amount of time.
+            """
+            cinder_configs = {
+                'rados_connect_timeout': '1',
+                'rados_connection_retries': '1',
+                'rados_connection_interval': '0',
+                'replication_connect_timeout': '1',
+            }
+            update_cinder_conf_cmd = (
+                "import configparser; "
+                "config = configparser.ConfigParser(); "
+                "config.read('/etc/cinder/cinder.conf'); "
+                "{}"
+                "f = open('/etc/cinder/cinder.conf', 'w'); "
+                "config.write(f); "
+                "f.close()")
+            cmd = ''
+            for config in cinder_configs:
+                cmd += "config.set('cinder-ceph', '{0}', '{1}'); ".format(
+                    config, cinder_configs[config])
+            cmd = update_cinder_conf_cmd.format(cmd)
+            zaza.model.run_on_leader(
+                'cinder-ceph',
+                'python3 -c "{}"; systemctl restart cinder-volume'.format(cmd))
+
+        apply_cinder_workaround()
+        self.failover_cinder_volume_host(cinder_client)
+
+        for volume in cinder_client.volumes.list():
+            self.assertEqual(volume.status, 'available')
+
     def test_kill_site_a_fail_over(self):
         """Validate fail over after uncontrolled shutdown of primary."""
+        action_params = {}
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            _, site_b_pools = self.get_pools()
+            site_b_pools.remove('cinder-ceph')
+            action_params['pools'] = ','.join(site_b_pools)
+
         for application in 'ceph-rbd-mirror', 'ceph-mon', 'ceph-osd':
             zaza.model.remove_application(
                 application,
@@ -346,14 +520,16 @@ class CephRBDMirrorDisasterFailoverTest(CephRBDMirrorBase):
             'ceph-rbd-mirror' + self.site_b_app_suffix,
             'promote',
             model_name=self.site_b_model,
-            action_params={
-            })
+            action_params=action_params)
         self.assertEqual(result.status, 'failed')
+        action_params['force'] = True
         result = zaza.model.run_action_on_leader(
             'ceph-rbd-mirror' + self.site_b_app_suffix,
             'promote',
             model_name=self.site_b_model,
-            action_params={
-                'force': True,
-            })
+            action_params=action_params)
         self.assertEqual(result.status, 'completed')
+        if self.get_cinder_rbd_mirroring_mode() == 'image':
+            session = openstack.get_overcloud_keystone_session()
+            cinder = openstack.get_cinder_session_client(session)
+            self.forced_failover_cinder_volume_host(cinder)

From 2fefca5a406647a1efcd91e5f67441766f6976b7 Mon Sep 17 00:00:00 2001
From: Ionut Balutoiu <ibalutoiu@cloudbasesolutions.com>
Date: Tue, 2 Mar 2021 18:03:06 +0200
Subject: [PATCH 2/2] Code cleanup

* Move general function out of the testing class. This will make them
  easy to be reused.
* Properly organize the tests, and add tons of docstrings and comments
  to have them as clear as possible.
* Add `failover_cinder_volume_host` to the Zaza `utilities/openstack.py`,
  since this is a general purpose function.
---
 .../charm_tests/ceph/rbd_mirror/tests.py      | 776 ++++++++++++------
 zaza/openstack/utilities/openstack.py         |  34 +
 2 files changed, 558 insertions(+), 252 deletions(-)

diff --git a/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py b/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
index 75a3f42..d8d7967 100644
--- a/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
+++ b/zaza/openstack/charm_tests/ceph/rbd_mirror/tests.py
@@ -16,7 +16,6 @@
 import json
 import logging
 import re
-import time
 
 import cinderclient.exceptions as cinder_exceptions
 
@@ -31,6 +30,129 @@ from zaza.openstack.charm_tests.glance.setup import (
     CIRROS_IMAGE_NAME)
 
 
+DEFAULT_CINDER_RBD_MIRRORING_MODE = 'pool'
+
+
+def get_cinder_rbd_mirroring_mode(cinder_ceph_app_name='cinder-ceph'):
+    """Get the RBD mirroring mode for the Cinder Ceph pool.
+
+    :param cinder_ceph_app_name: Cinder Ceph Juju application name.
+    :type cinder_ceph_app_name: str
+    :returns: A string representing the RBD mirroring mode. It can be
+              either 'pool' or 'image'.
+    :rtype: str
+    """
+    rbd_mirroring_mode_config = zaza.model.get_application_config(
+        cinder_ceph_app_name).get('rbd-mirroring-mode')
+    if rbd_mirroring_mode_config:
+        rbd_mirroring_mode = rbd_mirroring_mode_config.get(
+            'value', DEFAULT_CINDER_RBD_MIRRORING_MODE).lower()
+    else:
+        rbd_mirroring_mode = DEFAULT_CINDER_RBD_MIRRORING_MODE
+
+    return rbd_mirroring_mode
+
+
+def get_glance_image(glance):
+    """Get the Glance image object to be used by the Ceph tests.
+
+    It looks for the Cirros Glance image, and it's returned if it's found.
+    If the Cirros image is not found, it will try and find the Ubuntu
+    LTS image.
+
+    :param glance: Authenticated glanceclient
+    :type glance: glanceclient.Client
+    :returns: Glance image object
+    :rtype: glanceclient.image
+    """
+    images = openstack.get_images_by_name(glance, CIRROS_IMAGE_NAME)
+    if images:
+        return images[0]
+    logging.info("Failed to find {} image, falling back to {}".format(
+        CIRROS_IMAGE_NAME,
+        LTS_IMAGE_NAME))
+    return openstack.get_images_by_name(glance, LTS_IMAGE_NAME)[0]
+
+
+def setup_cinder_repl_volume_type(cinder, type_name='repl',
+                                  backend_name='cinder-ceph'):
+    """Set up the Cinder volume replication type.
+
+    :param cinder: Authenticated cinderclient
+    :type cinder: cinder.Client
+    :param type_name: Cinder volume type name
+    :type type_name: str
+    :param backend_name: Cinder volume backend name with replication enabled.
+    :type backend_name: str
+    :returns: Cinder volume type object
+    :rtype: cinderclient.VolumeType
+    """
+    try:
+        vol_type = cinder.volume_types.find(name=type_name)
+    except cinder_exceptions.NotFound:
+        vol_type = cinder.volume_types.create(type_name)
+
+    vol_type.set_keys(metadata={
+        'volume_backend_name': backend_name,
+        'replication_enabled': '<is> True',
+    })
+    return vol_type
+
+
+# TODO: This function should be incorporated into
+# 'zaza.openstack.utilities.openstack.create_volume' helper, once the below
+# flakiness comments are addressed.
+def create_cinder_volume(cinder, name='zaza', image_id=None, type_id=None):
+    """Create a new Cinder volume.
+
+    :param cinder: Authenticated cinderclient.
+    :type cinder: cinder.Client
+    :param name: Volume name.
+    :type name: str
+    :param image_id: Glance image id, if the volume is created from image.
+    :type image_id: str
+    :param type_id: Cinder Volume type id, if the volume needs to use an
+                    explicit volume type.
+    :type type_id: boolean
+    :returns: Cinder volume
+    :rtype: :class:`Volume`.
+    """
+    # NOTE(fnordahl): for some reason create volume from image often fails
+    # when run just after deployment is finished.  We should figure out
+    # why, resolve the underlying issue and then remove this.
+    #
+    # We do not use tenacity here as it will interfere with tenacity used
+    # in ``resource_reaches_status``
+    def create_volume(cinder, volume_params, retry=20):
+        if retry < 1:
+            return
+        volume = cinder.volumes.create(**volume_params)
+        try:
+            # Note(coreycb): stop_after_attempt is increased because using
+            # juju storage for ceph-osd backed by cinder on undercloud
+            # takes longer than the prior method of directory-backed OSD
+            # devices.
+            openstack.resource_reaches_status(
+                cinder.volumes, volume.id, msg='volume',
+                stop_after_attempt=20)
+            return volume
+        except AssertionError:
+            logging.info('retrying')
+            volume.delete()
+            return create_volume(cinder, volume_params, retry=retry - 1)
+
+    volume_params = {
+        'size': 8,
+        'name': name,
+    }
+    if image_id:
+        volume_params['imageRef'] = image_id
+    if type_id:
+        volume_params['volume_type'] = type_id
+
+    return create_volume(cinder, volume_params)
+
+
 class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
     """Base class for ``ceph-rbd-mirror`` tests."""
 
@@ -38,6 +160,8 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
     def setUpClass(cls):
         """Run setup for ``ceph-rbd-mirror`` tests."""
         super().setUpClass()
+        cls.cinder_ceph_app_name = 'cinder-ceph'
+        cls.test_cinder_volume_name = 'test-cinder-ceph-volume'
         # get ready for multi-model Zaza
         cls.site_a_model = cls.site_b_model = zaza.model.get_juju_model()
         cls.site_b_app_suffix = '-b'
@@ -75,6 +199,21 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
             model_name=self.site_b_model)
         return sorted(site_a_pools.keys()), sorted(site_b_pools.keys())
 
+    def get_failover_pools(self):
+        """Get the failover Ceph pools' names, from both sites.
+
+        If the Cinder RBD mirroring mode is 'image', the 'cinder-ceph' pool
+        needs to be excluded, since Cinder orchestrates the failover then.
+
+        :returns: Tuple with site-a pools and site-b pools.
+        :rtype: Tuple[List[str], List[str]]
+        """
+        site_a_pools, site_b_pools = self.get_pools()
+        if get_cinder_rbd_mirroring_mode(self.cinder_ceph_app_name) == 'image':
+            site_a_pools.remove(self.cinder_ceph_app_name)
+            site_b_pools.remove(self.cinder_ceph_app_name)
+        return site_a_pools, site_b_pools
+
     def wait_for_mirror_state(self, state, application_name=None,
                               model_name=None,
                               check_entries_behind_master=False,
@@ -136,118 +275,40 @@ class CephRBDMirrorBase(test_utils.OpenStackBaseTest):
                 # all images with state has expected state
                 return True
 
-    def get_cinder_rbd_mirroring_mode(self,
-                                      cinder_ceph_app_name='cinder-ceph'):
-        """Get the RBD mirroring mode for the Cinder Ceph pool.
+    def setup_test_cinder_volume(self):
+        """Set up the test Cinder volume into the Ceph RBD mirror environment.
 
-        :returns: A string representing the RBD mirroring mode. It can be
-                  either 'pool' or 'image'.
-        """
-        DEFAULT_RBD_MIRRORING_MODE = 'pool'
+        If the volume already exists, then it's returned.
 
-        rbd_mirroring_mode_config = zaza.model.get_application_config(
-            cinder_ceph_app_name).get('rbd-mirroring-mode')
-        if rbd_mirroring_mode_config:
-            rbd_mirroring_mode = rbd_mirroring_mode_config.get(
-                'value', DEFAULT_RBD_MIRRORING_MODE).lower()
-        else:
-            rbd_mirroring_mode = DEFAULT_RBD_MIRRORING_MODE
-
-        return rbd_mirroring_mode
-
-    def create_cinder_volume(self, session, from_image=False):
-        """Create Cinder Volume from image.
+        Also, if the Cinder RBD mirroring mode is 'image', the volume will
+        use an explicit volume type with the appropriate replication flags.
+        Otherwise, it is just a simple Cinder volume using the default backend.
 
+        :returns: Cinder volume
         :rtype: :class:`Volume`.
         """
-        def get_glance_image(session):
-            glance = openstack.get_glance_session_client(session)
-            images = openstack.get_images_by_name(glance, CIRROS_IMAGE_NAME)
-            if images:
-                return images[0]
-            logging.info("Failed to find {} image, falling back to {}".format(
-                CIRROS_IMAGE_NAME,
-                LTS_IMAGE_NAME))
-            return openstack.get_images_by_name(glance, LTS_IMAGE_NAME)[0]
+        session = openstack.get_overcloud_keystone_session()
+        cinder = openstack.get_cinder_session_client(session, version=3)
 
-        def create_volume_type(cinder):
-            try:
-                vol_type = cinder.volume_types.find(name='repl')
-            except cinder_exceptions.NotFound:
-                vol_type = cinder.volume_types.create('repl')
-                vol_type.set_keys(metadata={
-                    'volume_backend_name': 'cinder-ceph',
-                    'replication_enabled': '<is> True',
-                })
-            return vol_type
+        try:
+            return cinder.volumes.find(name=self.test_cinder_volume_name)
+        except cinder_exceptions.NotFound:
+            logging.info("Test Cinder volume doesn't exist. Creating it")
 
-        # NOTE(fnordahl): for some reason create volume from image often fails
-        # when run just after deployment is finished.  We should figure out
-        # why, resolve the underlying issue and then remove this.
-        #
-        # We do not use tenacity here as it will interfere with tenacity used
-        # in ``resource_reaches_status``
-        def create_volume(cinder, volume_params, retry=20):
-            if retry < 1:
-                return
-            volume = cinder.volumes.create(**volume_params)
-            try:
-                # Note(coreycb): stop_after_attempt is increased because using
-                # juju storage for ceph-osd backed by cinder on undercloud
-                # takes longer than the prior method of directory-backed OSD
-                # devices.
-                openstack.resource_reaches_status(
-                    cinder.volumes, volume.id, msg='volume',
-                    stop_after_attempt=20)
-                return volume
-            except AssertionError:
-                logging.info('retrying')
-                volume.delete()
-                return create_volume(cinder, volume_params, retry=retry - 1)
-
-        volume_params = {
-            'size': 8,
-            'name': 'zaza',
+        glance = openstack.get_glance_session_client(session)
+        image = get_glance_image(glance)
+        kwargs = {
+            'cinder': cinder,
+            'name': self.test_cinder_volume_name,
+            'image_id': image.id,
         }
-        if from_image:
-            volume_params['imageRef'] = get_glance_image(session).id
-        cinder = openstack.get_cinder_session_client(session)
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            volume_params['volume_type'] = create_volume_type(cinder).id
+        if get_cinder_rbd_mirroring_mode(self.cinder_ceph_app_name) == 'image':
+            volume_type = setup_cinder_repl_volume_type(
+                cinder,
+                backend_name=self.cinder_ceph_app_name)
+            kwargs['type_id'] = volume_type.id
 
-        return create_volume(cinder, volume_params)
-
-    def failover_cinder_volume_host(self, cinder_client,
-                                    backend_name='cinder-ceph',
-                                    target_backend_id='ceph',
-                                    target_status='disabled',
-                                    target_replication_status='failed-over',
-                                    timeout=300):
-        """Failover Cinder volume host."""
-        host = 'cinder@{}'.format(backend_name)
-        logging.info(
-            'Failover Cinder host %s to backend_id %s',
-            host, target_backend_id)
-        cinder_client.services.failover_host(
-            host=host,
-            backend_id=target_backend_id)
-        start = time.time()
-        while True:
-            elapsed = time.time() - start
-            if elapsed > timeout:
-                raise cinder_exceptions.TimeoutException(
-                    obj=cinder_client.services,
-                    action='failover_host')
-            service = cinder_client.services.list(
-                host=host,
-                binary='cinder-volume')[0]
-            if (service.status == target_status and
-                    service.replication_status == target_replication_status):
-                break
-            time.sleep(5)
-        logging.info(
-            'Successfully failed-over Cinder host %s to backend_id %s',
-            host, target_backend_id)
+        return create_cinder_volume(**kwargs)
 
 
 class CephRBDMirrorTest(CephRBDMirrorBase):
@@ -320,8 +381,7 @@ class CephRBDMirrorTest(CephRBDMirrorBase):
         site B and subsequently comparing the contents we get a full end to end
         test.
         """
-        session = openstack.get_overcloud_keystone_session()
-        volume = self.create_cinder_volume(session, from_image=True)
+        volume = self.setup_test_cinder_volume()
         site_a_hash = zaza.openstack.utilities.ceph.get_rbd_hash(
             zaza.model.get_lead_unit_name('ceph-mon',
                                           model_name=self.site_a_model),
@@ -333,6 +393,8 @@ class CephRBDMirrorTest(CephRBDMirrorBase):
             check_entries_behind_master=True,
             application_name=self.application_name + self.site_b_app_suffix,
             model_name=self.site_b_model)
+        logging.info('Checking the Ceph RBD hashes of the primary and '
+                     'the secondary Ceph images')
         site_b_hash = zaza.openstack.utilities.ceph.get_rbd_hash(
             zaza.model.get_lead_unit_name('ceph-mon' + self.site_b_app_suffix,
                                           model_name=self.site_b_model),
@@ -347,189 +409,399 @@ class CephRBDMirrorTest(CephRBDMirrorBase):
 class CephRBDMirrorControlledFailoverTest(CephRBDMirrorBase):
     """Encapsulate ``ceph-rbd-mirror`` controlled failover tests."""
 
-    def cinder_fail_over_fall_back(self):
-        """Validate controlled fail over and fall back via the Cinder API."""
+    def execute_failover_juju_actions(self,
+                                      primary_site_app_name,
+                                      primary_site_model,
+                                      primary_site_pools,
+                                      secondary_site_app_name,
+                                      secondary_site_model,
+                                      secondary_site_pools):
+        """Execute the failover Juju actions.
+
+        The failover / failback via Juju actions shares the same workflow. The
+        failback is just a failover with sites in reversed order.
+
+        This function encapsulates the tasks to failover a primary site to
+        a secondary site:
+        1. Demote primary site
+        2. Validation of the primary site demotion
+        3. Promote secondary site
+        4. Validation of the secondary site promotion
+
+        :param primary_site_app_name: Primary site Ceph RBD mirror app name.
+        :type primary_site_app_name: str
+        :param primary_site_model: Primary site Juju model name.
+        :type primary_site_model: str
+        :param primary_site_pools: Primary site pools.
+        :type primary_site_pools: List[str]
+        :param secondary_site_app_name: Secondary site Ceph RBD mirror
+                                        app name.
+        :type secondary_site_app_name: str
+        :param secondary_site_model: Secondary site Juju model name.
+        :type secondary_site_model: str
+        :param secondary_site_pools: Secondary site pools.
+        :type secondary_site_pools: List[str]
+        """
+        # Check if primary and secondary pools sizes are the same.
+        self.assertEqual(len(primary_site_pools), len(secondary_site_pools))
+
+        # Run the 'demote' Juju action against the primary site pools.
+        logging.info('Demoting {} from model {}.'.format(
+            primary_site_app_name, primary_site_model))
+        result = zaza.model.run_action_on_leader(
+            primary_site_app_name,
+            'demote',
+            model_name=primary_site_model,
+            action_params={
+                'pools': ','.join(primary_site_pools)
+            })
+        logging.info(result.results)
+        self.assertEqual(int(result.results['Code']), 0)
+
+        # Validate that the demoted pools count matches the total primary site
+        # pools count.
+        n_pools_demoted = len(result.results['output'].split('\n'))
+        self.assertEqual(len(primary_site_pools), n_pools_demoted)
+
+        # At this point, both primary and secondary sites are demoted. Validate
+        # that the Ceph images, from both sites, report 'up+unknown', since
+        # there isn't a primary site at the moment.
+        logging.info('Waiting until {} is demoted.'.format(
+            primary_site_app_name))
+        self.wait_for_mirror_state(
+            'up+unknown',
+            application_name=primary_site_app_name,
+            model_name=primary_site_model,
+            pools=primary_site_pools)
+        self.wait_for_mirror_state(
+            'up+unknown',
+            application_name=secondary_site_app_name,
+            model_name=secondary_site_model,
+            pools=secondary_site_pools)
+
+        # Run the 'promote' Juju against the secondary site.
+        logging.info('Promoting {} from model {}.'.format(
+            secondary_site_app_name, secondary_site_model))
+        result = zaza.model.run_action_on_leader(
+            secondary_site_app_name,
+            'promote',
+            model_name=secondary_site_model,
+            action_params={
+                'pools': ','.join(secondary_site_pools)
+            })
+        logging.info(result.results)
+        self.assertEqual(int(result.results['Code']), 0)
+
+        # Validate that the promoted pools count matches the total secondary
+        # site pools count.
+        n_pools_promoted = len(result.results['output'].split('\n'))
+        self.assertEqual(len(secondary_site_pools), n_pools_promoted)
+
+        # Validate that the Ceph images from the newly promoted site
+        # report 'up+stopped' state (which is reported by primary Ceph images).
+        logging.info('Waiting until {} is promoted.'.format(
+            secondary_site_app_name))
+        self.wait_for_mirror_state(
+            'up+stopped',
+            application_name=secondary_site_app_name,
+            model_name=secondary_site_model,
+            pools=secondary_site_pools)
+
+        # Validate that the Ceph images from site-a report 'up+replaying'
+        # (which is reported by secondary Ceph images).
+        self.wait_for_mirror_state(
+            'up+replaying',
+            check_entries_behind_master=True,
+            application_name=primary_site_app_name,
+            model_name=primary_site_model,
+            pools=primary_site_pools)
+
+    def test_100_cinder_failover(self):
+        """Validate controlled failover via the Cinder API.
+
+        This test only makes sense if Cinder RBD mirroring mode is 'image'.
+        It will return early, if this is not the case.
+        """
+        cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode(
+            self.cinder_ceph_app_name)
+        if cinder_rbd_mirroring_mode != 'image':
+            logging.warning(
+                "Skipping 'test_100_cinder_failover' since Cinder RBD "
+                "mirroring mode is {}.".format(cinder_rbd_mirroring_mode))
+            return
+
         session = openstack.get_overcloud_keystone_session()
-        cinder = openstack.get_cinder_session_client(session)
-        volume = self.create_cinder_volume(session, from_image=True)
+        cinder = openstack.get_cinder_session_client(session, version=3)
+
+        # Check if the Cinder volume host is available with replication
+        # enabled.
+        host = 'cinder@{}'.format(self.cinder_ceph_app_name)
+        svc = cinder.services.list(host=host, binary='cinder-volume')[0]
+        self.assertEqual(svc.replication_status, 'enabled')
+        self.assertEqual(svc.status, 'enabled')
+
+        # Setup the test Cinder volume
+        volume = self.setup_test_cinder_volume()
+
+        # Check if the volume is properly mirrored
         self.wait_for_mirror_state(
             'up+replaying',
             check_entries_behind_master=True,
             application_name=self.application_name + self.site_b_app_suffix,
             model_name=self.site_b_model,
-            pools=['cinder-ceph'])
-        self.failover_cinder_volume_host(
-            cinder_client=cinder)
+            pools=[self.cinder_ceph_app_name])
+
+        # Execute the Cinder volume failover
+        openstack.failover_cinder_volume_host(
+            cinder=cinder,
+            backend_name=self.cinder_ceph_app_name,
+            target_backend_id='ceph',
+            target_status='disabled',
+            target_replication_status='failed-over')
+
+        # Check if the test volume is still available after failover
         self.assertEqual(cinder.volumes.get(volume.id).status, 'available')
-        self.failover_cinder_volume_host(
-            cinder_client=cinder,
+
+    def test_101_cinder_failback(self):
+        """Validate controlled failback via the Cinder API.
+
+        This test only makes sense if Cinder RBD mirroring mode is 'image'.
+        It will return early, if this is not the case.
+
+        The test needs to be executed when the Cinder volume host is already
+        failed-over with the test volume on it.
+        """
+        cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode(
+            self.cinder_ceph_app_name)
+        if cinder_rbd_mirroring_mode != 'image':
+            logging.warning(
+                "Skipping 'test_101_cinder_failback' since Cinder RBD "
+                "mirroring mode is {}.".format(cinder_rbd_mirroring_mode))
+            return
+
+        session = openstack.get_overcloud_keystone_session()
+        cinder = openstack.get_cinder_session_client(session, version=3)
+
+        # Check if the Cinder volume host is already failed-over
+        host = 'cinder@{}'.format(self.cinder_ceph_app_name)
+        svc = cinder.services.list(host=host, binary='cinder-volume')[0]
+        self.assertEqual(svc.replication_status, 'failed-over')
+        self.assertEqual(svc.status, 'disabled')
+
+        # Check if the test Cinder volume is already present. The method
+        # 'cinder.volumes.find' raises 404 if the volume is not found.
+        volume = cinder.volumes.find(name=self.test_cinder_volume_name)
+
+        # Execute the Cinder volume failback
+        openstack.failover_cinder_volume_host(
+            cinder=cinder,
+            backend_name=self.cinder_ceph_app_name,
             target_backend_id='default',
             target_status='enabled',
             target_replication_status='enabled')
+
+        # Check if the test volume is still available after failback
         self.assertEqual(cinder.volumes.get(volume.id).status, 'available')
 
-    def test_fail_over_fall_back(self):
-        """Validate controlled fail over and fall back."""
-        site_a_pools, site_b_pools = self.get_pools()
-        site_a_action_params = {}
-        site_b_action_params = {}
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            site_a_pools.remove('cinder-ceph')
-            site_a_action_params['pools'] = ','.join(site_a_pools)
-            site_b_pools.remove('cinder-ceph')
-            site_b_action_params['pools'] = ','.join(site_b_pools)
+    def test_200_juju_failover(self):
+        """Validate controlled failover via Juju actions."""
+        # Get the Ceph pools needed to failover
+        site_a_pools, site_b_pools = self.get_failover_pools()
+
+        # Execute the failover Juju actions with the appropriate parameters.
+        site_b_app_name = self.application_name + self.site_b_app_suffix
+        self.execute_failover_juju_actions(
+            primary_site_app_name=self.application_name,
+            primary_site_model=self.site_a_model,
+            primary_site_pools=site_a_pools,
+            secondary_site_app_name=site_b_app_name,
+            secondary_site_model=self.site_b_model,
+            secondary_site_pools=site_b_pools)
+
+    def test_201_juju_failback(self):
+        """Validate controlled failback via Juju actions."""
+        # Get the Ceph pools needed to failback
+        site_a_pools, site_b_pools = self.get_failover_pools()
+
+        # Execute the failover Juju actions with the appropriate parameters.
+        # The failback operation is just a failover with sites in reverse
+        # order.
+        site_b_app_name = self.application_name + self.site_b_app_suffix
+        self.execute_failover_juju_actions(
+            primary_site_app_name=site_b_app_name,
+            primary_site_model=self.site_b_model,
+            primary_site_pools=site_b_pools,
+            secondary_site_app_name=self.application_name,
+            secondary_site_model=self.site_a_model,
+            secondary_site_pools=site_a_pools)
+
+    def test_203_juju_resync(self):
+        """Validate the 'resync-pools' Juju action.
+
+        The 'resync-pools' Juju action is meant to flag Ceph images from the
+        secondary site to re-sync against the Ceph images from the primary
+        site.
+
+        This use case is useful when the Ceph secondary images are out of sync.
+        """
+        # Get the Ceph pools needed to failback
+        _, site_b_pools = self.get_failover_pools()
+
+        # Run the 'resync-pools' Juju action against the pools from site-b.
+        # This will make sure that the Ceph images from site-b are properly
+        # synced with the primary images from site-a.
+        site_b_app_name = self.application_name + self.site_b_app_suffix
+        logging.info('Re-syncing {} from model {}'.format(
+            site_b_app_name, self.site_b_model))
         result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror',
-            'demote',
-            model_name=self.site_a_model,
-            action_params=site_a_action_params)
-        logging.info(result.results)
-        n_pools_demoted = len(result.results['output'].split('\n'))
-        self.assertEqual(len(site_a_pools), n_pools_demoted)
-        self.wait_for_mirror_state(
-            'up+unknown',
-            model_name=self.site_a_model,
-            pools=site_a_pools)
-        self.wait_for_mirror_state(
-            'up+unknown',
-            application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model,
-            pools=site_b_pools)
-        result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror' + self.site_b_app_suffix,
-            'promote',
-            model_name=self.site_b_model,
-            action_params=site_b_action_params)
-        logging.info(result.results)
-        n_pools_promoted = len(result.results['output'].split('\n'))
-        self.assertEqual(len(site_b_pools), n_pools_promoted)
-        self.wait_for_mirror_state(
-            'up+replaying',
-            model_name=self.site_a_model,
-            pools=site_a_pools)
-        self.wait_for_mirror_state(
-            'up+stopped',
-            application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model,
-            pools=site_b_pools)
-        result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror' + self.site_b_app_suffix,
-            'demote',
-            model_name=self.site_b_model,
-            action_params=site_b_action_params)
-        logging.info(result.results)
-        n_pools_demoted = len(result.results['output'].split('\n'))
-        self.assertEqual(len(site_a_pools), n_pools_demoted)
-        self.wait_for_mirror_state(
-            'up+unknown',
-            model_name=self.site_a_model,
-            pools=site_a_pools)
-        self.wait_for_mirror_state(
-            'up+unknown',
-            application_name=self.application_name + self.site_b_app_suffix,
-            model_name=self.site_b_model,
-            pools=site_b_pools)
-        result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror',
-            'promote',
-            model_name=self.site_a_model,
-            action_params=site_a_action_params)
-        logging.info(result.results)
-        n_pools_promoted = len(result.results['output'].split('\n'))
-        self.assertEqual(len(site_b_pools), n_pools_promoted)
-        self.wait_for_mirror_state(
-            'up+stopped',
-            model_name=self.site_a_model,
-            pools=site_a_pools)
-        action_params = {
-            'i-really-mean-it': True,
-        }
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            action_params['pools'] = site_b_action_params['pools']
-        result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror' + self.site_b_app_suffix,
+            site_b_app_name,
             'resync-pools',
             model_name=self.site_b_model,
-            action_params=action_params)
+            action_params={
+                'pools': ','.join(site_b_pools),
+                'i-really-mean-it': True,
+            })
         logging.info(result.results)
+        self.assertEqual(int(result.results['Code']), 0)
+
+        # Validate that the Ceph images from site-b report 'up+replaying'
+        # (which is reported by secondary Ceph images). And check that images
+        # exist in Cinder and Glance pools.
         self.wait_for_mirror_state(
             'up+replaying',
-            application_name=self.application_name + self.site_b_app_suffix,
+            check_entries_behind_master=True,
+            application_name=site_b_app_name,
             model_name=self.site_b_model,
-            require_images_in=['cinder-ceph', 'glance'],
-            pools=site_a_pools)
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            self.cinder_fail_over_fall_back()
+            require_images_in=[self.cinder_ceph_app_name, 'glance'],
+            pools=site_b_pools)
 
 
 class CephRBDMirrorDisasterFailoverTest(CephRBDMirrorBase):
     """Encapsulate ``ceph-rbd-mirror`` destructive tests."""
 
-    def forced_failover_cinder_volume_host(self, cinder_client):
-        """Validate forced Cinder volume host fail over."""
-        def apply_cinder_workaround():
-            """Set minimal timeouts / retries to the Cinder Ceph backend.
+    def apply_cinder_ceph_workaround(self):
+        """Set minimal timeouts / retries to the Cinder Ceph backend.
 
-            This is needed because the failover via Cinder will try to do a
-            demotion of the site-a, and with the default timeouts / retries,
-            the operation takes an unreasonably amount of time.
-            """
-            cinder_configs = {
-                'rados_connect_timeout': '1',
-                'rados_connection_retries': '1',
-                'rados_connection_interval': '0',
-                'replication_connect_timeout': '1',
-            }
-            update_cinder_conf_cmd = (
-                "import configparser; "
-                "config = configparser.ConfigParser(); "
-                "config.read('/etc/cinder/cinder.conf'); "
-                "{}"
-                "f = open('/etc/cinder/cinder.conf', 'w'); "
-                "config.write(f); "
-                "f.close()")
-            cmd = ''
-            for config in cinder_configs:
-                cmd += "config.set('cinder-ceph', '{0}', '{1}'); ".format(
-                    config, cinder_configs[config])
-            cmd = update_cinder_conf_cmd.format(cmd)
-            zaza.model.run_on_leader(
-                'cinder-ceph',
-                'python3 -c "{}"; systemctl restart cinder-volume'.format(cmd))
+        This is needed because the failover via Cinder API will try to do a
+        demotion of the site-a. However, when site-a is down, and with the
+        default timeouts / retries, the operation takes an unreasonably amount
+        of time (or sometimes it never finishes).
+        """
+        # These new config options need to be set under the Cinder Ceph backend
+        # section in the main Cinder config file.
+        # At the moment, we don't the possibility of using Juju config to set
+        # these options. And also, it's not even a good practice to have them
+        # in production.
+        # These should be set only to do the Ceph failover via Cinder API, and
+        # they need to be removed after.
+        configs = {
+            'rados_connect_timeout': '1',
+            'rados_connection_retries': '1',
+            'rados_connection_interval': '0',
+            'replication_connect_timeout': '1',
+        }
 
-        apply_cinder_workaround()
-        self.failover_cinder_volume_host(cinder_client)
+        # Small Python script that will be executed via Juju run to update
+        # the Cinder config file.
+        update_cinder_conf_script = (
+            "import configparser; "
+            "config = configparser.ConfigParser(); "
+            "config.read('/etc/cinder/cinder.conf'); "
+            "{}"
+            "f = open('/etc/cinder/cinder.conf', 'w'); "
+            "config.write(f); "
+            "f.close()")
+        set_cmd = ''
+        for cfg_name in configs:
+            set_cmd += "config.set('{0}', '{1}', '{2}'); ".format(
+                self.cinder_ceph_app_name, cfg_name, configs[cfg_name])
+        script = update_cinder_conf_script.format(set_cmd)
 
-        for volume in cinder_client.volumes.list():
-            self.assertEqual(volume.status, 'available')
+        # Run the workaround script via Juju run
+        zaza.model.run_on_leader(
+            self.cinder_ceph_app_name,
+            'python3 -c "{}"; systemctl restart cinder-volume'.format(script))
 
-    def test_kill_site_a_fail_over(self):
-        """Validate fail over after uncontrolled shutdown of primary."""
-        action_params = {}
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            _, site_b_pools = self.get_pools()
-            site_b_pools.remove('cinder-ceph')
-            action_params['pools'] = ','.join(site_b_pools)
-
-        for application in 'ceph-rbd-mirror', 'ceph-mon', 'ceph-osd':
+    def kill_primary_site(self):
+        """Simulate an unexpected primary site shutdown."""
+        logging.info('Killing the Ceph primary site')
+        for application in ['ceph-rbd-mirror', 'ceph-mon', 'ceph-osd']:
             zaza.model.remove_application(
                 application,
                 model_name=self.site_a_model,
                 forcefully_remove_machines=True)
+
+    def test_100_forced_juju_failover(self):
+        """Validate Ceph failover via Juju when the primary site is down.
+
+        * Kill the primary site
+        * Execute the forced failover via Juju actions
+        """
+        # Get the site-b Ceph pools that need to be promoted
+        _, site_b_pools = self.get_failover_pools()
+        site_b_app_name = self.application_name + self.site_b_app_suffix
+
+        # Simulate primary site unexpected shutdown
+        self.kill_primary_site()
+
+        # Try and promote the site-b to primary.
         result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror' + self.site_b_app_suffix,
+            site_b_app_name,
             'promote',
             model_name=self.site_b_model,
-            action_params=action_params)
+            action_params={
+                'pools': ','.join(site_b_pools),
+            })
+        self.assertEqual(int(result.results['Code']), 0)
+
+        # The site-b 'promote' Juju action is expected to fail, because the
+        # primary site is down.
         self.assertEqual(result.status, 'failed')
-        action_params['force'] = True
+
+        # Retry to promote site-b using the 'force' Juju action parameter.
         result = zaza.model.run_action_on_leader(
-            'ceph-rbd-mirror' + self.site_b_app_suffix,
+            site_b_app_name,
             'promote',
             model_name=self.site_b_model,
-            action_params=action_params)
+            action_params={
+                'force': True,
+                'pools': ','.join(site_b_pools),
+            })
+        self.assertEqual(int(result.results['Code']), 0)
+
+        # Validate successful Juju action execution
         self.assertEqual(result.status, 'completed')
-        if self.get_cinder_rbd_mirroring_mode() == 'image':
-            session = openstack.get_overcloud_keystone_session()
-            cinder = openstack.get_cinder_session_client(session)
-            self.forced_failover_cinder_volume_host(cinder)
+
+    def test_200_forced_cinder_failover(self):
+        """Validate Ceph failover via Cinder when the primary site is down.
+
+        This test only makes sense if Cinder RBD mirroring mode is 'image'.
+        It will return early, if this is not the case.
+
+        This assumes that the primary site is already killed.
+        """
+        cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode(
+            self.cinder_ceph_app_name)
+        if cinder_rbd_mirroring_mode != 'image':
+            logging.warning(
+                "Skipping 'test_200_cinder_failover_without_primary_site' "
+                "since Cinder RBD mirroring mode is {}.".format(
+                    cinder_rbd_mirroring_mode))
+            return
+
+        # Make sure that the Cinder Ceph backend workaround is applied.
+        self.apply_cinder_ceph_workaround()
+
+        session = openstack.get_overcloud_keystone_session()
+        cinder = openstack.get_cinder_session_client(session, version=3)
+        openstack.failover_cinder_volume_host(
+            cinder=cinder,
+            backend_name=self.cinder_ceph_app_name,
+            target_backend_id='ceph',
+            target_status='disabled',
+            target_replication_status='failed-over')
+
+        # Check that the Cinder volumes are still available after forced
+        # failover.
+        for volume in cinder.volumes.list():
+            self.assertEqual(volume.status, 'available')
diff --git a/zaza/openstack/utilities/openstack.py b/zaza/openstack/utilities/openstack.py
index d277656..4ed75b9 100644
--- a/zaza/openstack/utilities/openstack.py
+++ b/zaza/openstack/utilities/openstack.py
@@ -2489,6 +2489,40 @@ def attach_volume(nova, volume_id, instance_id):
                                              device='/dev/vdx')
 
 
+def failover_cinder_volume_host(cinder, backend_name='cinder-ceph',
+                                target_backend_id='ceph',
+                                target_status='disabled',
+                                target_replication_status='failed-over'):
+    """Failover Cinder volume host with replication enabled.
+
+    :param cinder: Authenticated cinderclient
+    :type cinder: cinder.Client
+    :param backend_name: Cinder volume backend name with
+                         replication enabled.
+    :type backend_name: str
+    :param target_backend_id: Failover target Cinder backend id.
+    :type target_backend_id: str
+    :param target_status: Target Cinder volume status after failover.
+    :type target_status: str
+    :param target_replication_status: Target Cinder volume replication
+                                      status after failover.
+    :type target_replication_status: str
+    :raises: AssertionError
+    """
+    host = 'cinder@{}'.format(backend_name)
+    logging.info('Failover Cinder volume host %s to backend_id %s',
+                 host, target_backend_id)
+    cinder.services.failover_host(host=host, backend_id=target_backend_id)
+    for attempt in tenacity.Retrying(
+            retry=tenacity.retry_if_exception_type(AssertionError),
+            stop=tenacity.stop_after_attempt(10),
+            wait=tenacity.wait_exponential(multiplier=1, min=2, max=10)):
+        with attempt:
+            svc = cinder.services.list(host=host, binary='cinder-volume')[0]
+            assert svc.status == target_status
+            assert svc.replication_status == target_replication_status
+
+
 def create_volume_backup(cinder, volume_id, name=None):
     """Create cinder volume backup.