From 5579f2f110b8b08097c6a7b189669886a10cb378 Mon Sep 17 00:00:00 2001
From: Alvaro Uria <alvaro.uria@canonical.com>
Date: Mon, 20 Jul 2020 19:24:33 +0200
Subject: [PATCH 01/17] HaclusterTest: support scaleback test on bionic

---
 zaza/openstack/charm_tests/hacluster/tests.py | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 199d915..3ae13d1 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -21,6 +21,7 @@ import os
 
 import zaza.openstack.charm_tests.test_utils as test_utils
 import zaza.openstack.configure.hacluster
+import zaza.utilities.juju as juju_utils
 
 
 class HaclusterTest(test_utils.OpenStackBaseTest):
@@ -69,3 +70,71 @@ class HaclusterTest(test_utils.OpenStackBaseTest):
 
         self._toggle_maintenance_and_wait('true')
         self._toggle_maintenance_and_wait('false')
+
+    def test_930_scaleback_bionic(self):
+        """Remove a unit, recalculate quorum and add a new one."""
+        principle_app = 'keystone'
+        principle_units = zaza.model.get_status().applications[
+            principle_app]['units']
+        self.assertEqual(len(principle_units), 3)
+        doomed_principle = sorted(principle_units.keys())[0]
+        series = juju_utils.get_machine_series(
+            principle_units[doomed_principle].machine)
+        if series != 'bionic':
+            logging.debug("noop - only run test in bionic")
+            logging.info('SKIP')
+            return
+
+        doomed_unit = juju_utils.get_subordinate_units(
+            [doomed_principle], charm_name='hac')[0]
+
+        logging.info('Pausing unit {}'.format(doomed_unit))
+        zaza.model.run_action(
+            doomed_unit,
+            'pause',
+            raise_on_failure=True)
+        logging.info('OK')
+
+        logging.info('Resuming unit {}'.format(doomed_unit))
+        zaza.model.run_action(
+            doomed_unit,
+            'resume',
+            raise_on_failure=True)
+        logging.info('OK')
+
+        logging.info('Removing {}'.format(doomed_principle))
+        zaza.model.destroy_unit(
+            principle_app,
+            doomed_principle,
+            wait_disappear=True)
+        logging.info('OK')
+
+        logging.info('Updating corosync ring')
+        zaza.model.run_action_on_leader(
+            self.application_name,
+            'update-ring',
+            action_params={'i-really-mean-it': True},
+            raise_on_failure=True)
+
+        _states = {
+            self.application_name: {
+                "workload-status": "blocked",
+                "workload-status-message":
+                    "Insufficient peer units for ha cluster (require 3)"
+            },
+            'keystone': {
+                "workload-status": "blocked",
+                "workload-status-message": "Database not initialised",
+            },
+        }
+        zaza.model.wait_for_application_states(states=_states)
+        zaza.model.block_until_all_units_idle()
+        logging.info('OK')
+
+        logging.info('Adding a hacluster unit')
+        zaza.model.add_unit(principle_app, wait_appear=True)
+        _states = {self.application_name: {
+            "workload-status": "active",
+            "workload-status-message": "Unit is ready and clustered"}}
+        zaza.model.wait_for_application_states(states=_states)
+        logging.debug('OK')

From 573acf9d1c0e2490589a4246b90083fc4113fdc9 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 27 Aug 2020 11:54:42 +0200
Subject: [PATCH 02/17] Move new scaleback test to new test class

---
 zaza/openstack/charm_tests/hacluster/tests.py | 44 ++++++++++---------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 3ae13d1..7715a4a 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -24,15 +24,19 @@ import zaza.openstack.configure.hacluster
 import zaza.utilities.juju as juju_utils
 
 
-class HaclusterTest(test_utils.OpenStackBaseTest):
-    """hacluster tests."""
+class HaclusterBaseTest(test_utils.OpenStackBaseTest):
+    """Base class for hacluster tests."""
 
     @classmethod
     def setUpClass(cls):
         """Run class setup for running hacluster tests."""
-        super(HaclusterTest, cls).setUpClass()
+        super(HaclusterBaseTest, cls).setUpClass()
         cls.vip = os.environ.get("TEST_VIP00")
 
+
+class HaclusterTest(HaclusterBaseTest):
+    """hacluster tests."""
+
     def test_900_action_cleanup(self):
         """The services can be cleaned up."""
         zaza.model.run_action_on_leader(
@@ -71,22 +75,20 @@ class HaclusterTest(test_utils.OpenStackBaseTest):
         self._toggle_maintenance_and_wait('true')
         self._toggle_maintenance_and_wait('false')
 
-    def test_930_scaleback_bionic(self):
+
+class HaclusterScalebackTest(HaclusterBaseTest):
+    """hacluster scaleback tests."""
+
+    _PRINCIPLE_APP = 'keystone'
+
+    def test_930_scaleback(self):
         """Remove a unit, recalculate quorum and add a new one."""
-        principle_app = 'keystone'
         principle_units = zaza.model.get_status().applications[
-            principle_app]['units']
+            self._PRINCIPLE_APP]['units']
         self.assertEqual(len(principle_units), 3)
         doomed_principle = sorted(principle_units.keys())[0]
-        series = juju_utils.get_machine_series(
-            principle_units[doomed_principle].machine)
-        if series != 'bionic':
-            logging.debug("noop - only run test in bionic")
-            logging.info('SKIP')
-            return
-
         doomed_unit = juju_utils.get_subordinate_units(
-            [doomed_principle], charm_name='hac')[0]
+            [doomed_principle], charm_name=self.application_name)[0]
 
         logging.info('Pausing unit {}'.format(doomed_unit))
         zaza.model.run_action(
@@ -104,7 +106,7 @@ class HaclusterTest(test_utils.OpenStackBaseTest):
 
         logging.info('Removing {}'.format(doomed_principle))
         zaza.model.destroy_unit(
-            principle_app,
+            self._PRINCIPLE_APP,
             doomed_principle,
             wait_disappear=True)
         logging.info('OK')
@@ -116,25 +118,25 @@ class HaclusterTest(test_utils.OpenStackBaseTest):
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
 
-        _states = {
+        expected_states = {
             self.application_name: {
                 "workload-status": "blocked",
                 "workload-status-message":
                     "Insufficient peer units for ha cluster (require 3)"
             },
-            'keystone': {
+            self._PRINCIPLE_APP: {
                 "workload-status": "blocked",
                 "workload-status-message": "Database not initialised",
             },
         }
-        zaza.model.wait_for_application_states(states=_states)
+        zaza.model.wait_for_application_states(states=expected_states)
         zaza.model.block_until_all_units_idle()
         logging.info('OK')
 
         logging.info('Adding a hacluster unit')
-        zaza.model.add_unit(principle_app, wait_appear=True)
-        _states = {self.application_name: {
+        zaza.model.add_unit(self._PRINCIPLE_APP, wait_appear=True)
+        expected_states = {self.application_name: {
             "workload-status": "active",
             "workload-status-message": "Unit is ready and clustered"}}
-        zaza.model.wait_for_application_states(states=_states)
+        zaza.model.wait_for_application_states(states=expected_states)
         logging.debug('OK')

From e48da475a2fe4a4b7d6b210def38aacfee05b83e Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 27 Aug 2020 12:57:46 +0200
Subject: [PATCH 03/17] No need to resume hacluster unit before scaling down

---
 zaza/openstack/charm_tests/hacluster/tests.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 7715a4a..28f5248 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -97,13 +97,6 @@ class HaclusterScalebackTest(HaclusterBaseTest):
             raise_on_failure=True)
         logging.info('OK')
 
-        logging.info('Resuming unit {}'.format(doomed_unit))
-        zaza.model.run_action(
-            doomed_unit,
-            'resume',
-            raise_on_failure=True)
-        logging.info('OK')
-
         logging.info('Removing {}'.format(doomed_principle))
         zaza.model.destroy_unit(
             self._PRINCIPLE_APP,

From b72855ca96d15a11c43e8b56a63a7f968e91d847 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Fri, 28 Aug 2020 13:15:31 +0200
Subject: [PATCH 04/17] Make HaclusterScalebackTest runnable against any charm

---
 zaza/openstack/charm_tests/hacluster/tests.py | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 28f5248..76eced8 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -79,16 +79,18 @@ class HaclusterTest(HaclusterBaseTest):
 class HaclusterScalebackTest(HaclusterBaseTest):
     """hacluster scaleback tests."""
 
-    _PRINCIPLE_APP = 'keystone'
+    _PRINCIPLE_APP_NAME = 'keystone'
+    _HACLUSTER_APP_NAME = 'hacluster'
+    _HACLUSTER_CHARM_NAME = 'hacluster'
 
     def test_930_scaleback(self):
         """Remove a unit, recalculate quorum and add a new one."""
         principle_units = zaza.model.get_status().applications[
-            self._PRINCIPLE_APP]['units']
+            self._PRINCIPLE_APP_NAME]['units']
         self.assertEqual(len(principle_units), 3)
         doomed_principle = sorted(principle_units.keys())[0]
         doomed_unit = juju_utils.get_subordinate_units(
-            [doomed_principle], charm_name=self.application_name)[0]
+            [doomed_principle], charm_name=self._HACLUSTER_CHARM_NAME)[0]
 
         logging.info('Pausing unit {}'.format(doomed_unit))
         zaza.model.run_action(
@@ -99,25 +101,25 @@ class HaclusterScalebackTest(HaclusterBaseTest):
 
         logging.info('Removing {}'.format(doomed_principle))
         zaza.model.destroy_unit(
-            self._PRINCIPLE_APP,
+            self._PRINCIPLE_APP_NAME,
             doomed_principle,
             wait_disappear=True)
         logging.info('OK')
 
         logging.info('Updating corosync ring')
         zaza.model.run_action_on_leader(
-            self.application_name,
+            self._HACLUSTER_APP_NAME,
             'update-ring',
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
 
         expected_states = {
-            self.application_name: {
+            self._HACLUSTER_APP_NAME: {
                 "workload-status": "blocked",
                 "workload-status-message":
                     "Insufficient peer units for ha cluster (require 3)"
             },
-            self._PRINCIPLE_APP: {
+            self._PRINCIPLE_APP_NAME: {
                 "workload-status": "blocked",
                 "workload-status-message": "Database not initialised",
             },
@@ -127,8 +129,8 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         logging.info('OK')
 
         logging.info('Adding a hacluster unit')
-        zaza.model.add_unit(self._PRINCIPLE_APP, wait_appear=True)
-        expected_states = {self.application_name: {
+        zaza.model.add_unit(self._PRINCIPLE_APP_NAME, wait_appear=True)
+        expected_states = {self._HACLUSTER_APP_NAME: {
             "workload-status": "active",
             "workload-status-message": "Unit is ready and clustered"}}
         zaza.model.wait_for_application_states(states=expected_states)

From 14f7e3848cb5989912769e3a7c47345cd1f56624 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Tue, 8 Sep 2020 12:51:53 +0200
Subject: [PATCH 05/17] Remove hacluster test option hacluster_app_name

---
 zaza/openstack/charm_tests/hacluster/tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 2728a68..e02832b 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -85,7 +85,6 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         super(HaclusterScalebackTest, cls).setUpClass()
         test_config = cls.test_config['tests_options']['hacluster']
         cls._principle_app_name = test_config['principle-app-name']
-        cls._hacluster_app_name = test_config['hacluster-app-name']
         cls._hacluster_charm_name = test_config['hacluster-charm-name']
 
     def test_930_scaleback(self):
@@ -121,8 +120,9 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         logging.info('OK')
 
         logging.info('Updating corosync ring')
+        hacluster_app_name = other_hacluster_unit.application
         zaza.model.run_action_on_leader(
-            self._hacluster_app_name,
+            hacluster_app_name,
             'update-ring',
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
@@ -132,7 +132,7 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         logging.info('OK')
 
         logging.info('Waiting for model to settle')
-        expected_states = {self._hacluster_app_name: {
+        expected_states = {hacluster_app_name: {
             "workload-status": "active",
             "workload-status-message": "Unit is ready and clustered"}}
         zaza.model.wait_for_application_states(states=expected_states)

From 11f3060f6b1564d6497cbfc2b2f80251bcc27ca3 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Wed, 9 Sep 2020 10:08:37 +0200
Subject: [PATCH 06/17] Fix unit-name -> app-name conversion

---
 zaza/openstack/charm_tests/hacluster/tests.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index e02832b..29751e1 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -120,7 +120,8 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         logging.info('OK')
 
         logging.info('Updating corosync ring')
-        hacluster_app_name = other_hacluster_unit.application
+        hacluster_app_name = zaza.model.get_unit_from_name(
+            other_hacluster_unit).application
         zaza.model.run_action_on_leader(
             hacluster_app_name,
             'update-ring',

From 69b61c63f4ca647e597c17a98baa33d1fe9ee0a7 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Fri, 27 Nov 2020 12:04:43 +0100
Subject: [PATCH 07/17] Add HaclusterScaleBackAndForthTest

---
 zaza/openstack/charm_tests/hacluster/tests.py | 62 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 29751e1..99272ed 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -77,7 +77,67 @@ class HaclusterTest(HaclusterBaseTest):
 
 
 class HaclusterScalebackTest(HaclusterBaseTest):
-    """hacluster scaleback tests."""
+    """hacluster scaleback tests.
+
+    Use for testing older releases where lp:1400481 wasn't fixed yet.
+    Superseded by HaclusterScaleBackAndForthTest.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        """Run class setup for running hacluster scaleback tests."""
+        super(HaclusterScalebackTest, cls).setUpClass()
+        test_config = cls.test_config['tests_options']['hacluster']
+        cls._principle_app_name = test_config['principle-app-name']
+        cls._hacluster_charm_name = test_config['hacluster-charm-name']
+
+    def test_930_scaleback(self):
+        """Remove a unit and add a new one."""
+        principle_units = sorted(zaza.model.get_status().applications[
+            self._principle_app_name]['units'].keys())
+        self.assertEqual(len(principle_units), 3)
+        doomed_principle_unit = principle_units[0]
+        other_principle_unit = principle_units[1]
+        doomed_hacluster_unit = juju_utils.get_subordinate_units(
+            [doomed_principle_unit], charm_name=self._hacluster_charm_name)[0]
+        other_hacluster_unit = juju_utils.get_subordinate_units(
+            [other_principle_unit], charm_name=self._hacluster_charm_name)[0]
+
+        logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
+        zaza.model.run_action(
+            doomed_hacluster_unit,
+            'pause',
+            raise_on_failure=True)
+        logging.info('OK')
+
+        logging.info('Removing {}'.format(doomed_principle_unit))
+        zaza.model.destroy_unit(
+            self._principle_app_name,
+            doomed_principle_unit,
+            wait_disappear=True)
+        logging.info('OK')
+
+        logging.info('Waiting for model to settle')
+        zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'blocked')
+        zaza.model.block_until_unit_wl_status(other_principle_unit, 'blocked')
+        zaza.model.block_until_all_units_idle()
+        logging.info('OK')
+
+        logging.info('Adding an hacluster unit')
+        zaza.model.add_unit(self._principle_app_name, wait_appear=True)
+        logging.info('OK')
+
+        logging.info('Waiting for model to settle')
+        zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'active')
+        # NOTE(lourot): the principle application remains blocked after scaling
+        # back up until lp:1400481 is solved.
+        zaza.model.block_until_unit_wl_status(other_principle_unit, 'blocked')
+        zaza.model.block_until_all_units_idle()
+        logging.debug('OK')
+
+
+class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
+    """hacluster tests scaling back and forth."""
 
     @classmethod
     def setUpClass(cls):

From a3248185f147d04774c3e8f2e32c84d386be385c Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Fri, 27 Nov 2020 13:52:44 +0100
Subject: [PATCH 08/17] Fix copy-paste mistake

---
 zaza/openstack/charm_tests/hacluster/tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 99272ed..517cde2 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -141,8 +141,8 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
 
     @classmethod
     def setUpClass(cls):
-        """Run class setup for running hacluster scaleback tests."""
-        super(HaclusterScalebackTest, cls).setUpClass()
+        """Run class setup for running hacluster tests."""
+        super(HaclusterScaleBackAndForthTest, cls).setUpClass()
         test_config = cls.test_config['tests_options']['hacluster']
         cls._principle_app_name = test_config['principle-app-name']
         cls._hacluster_charm_name = test_config['hacluster-charm-name']

From 1c4d2e05aff4e56859afec4ce80b4e079a41175e Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 18 Feb 2021 09:51:00 +0100
Subject: [PATCH 09/17] Let test_930_scaleback remove 2 units instead of 1

---
 zaza/openstack/charm_tests/hacluster/tests.py | 57 +++++++++++--------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 28d0383..b1c0713 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -137,7 +137,10 @@ class HaclusterScalebackTest(HaclusterBaseTest):
 
 
 class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
-    """hacluster tests scaling back and forth."""
+    """hacluster tests scaling back and forth.
+
+    Supersedes HaclusterScalebackTest.
+    """
 
     @classmethod
     def setUpClass(cls):
@@ -148,38 +151,46 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         cls._hacluster_charm_name = test_config['hacluster-charm-name']
 
     def test_930_scaleback(self):
-        """Remove a unit, recalculate quorum and add a new one."""
+        """Remove two units, recalculate quorum and re-add two units.
+
+        NOTE(lourot): before lp:1400481 was fixed, quorum wasn't recalculated
+        when removing units. Within a cluster of 3 units, removing two units
+        and re-adding them led to a situation where the expected quorum became
+        5 and was still considered unreached.
+        """
         principle_units = sorted(zaza.model.get_status().applications[
             self._principle_app_name]['units'].keys())
         self.assertEqual(len(principle_units), 3)
-        doomed_principle_unit = principle_units[0]
-        other_principle_unit = principle_units[1]
-        doomed_hacluster_unit = juju_utils.get_subordinate_units(
-            [doomed_principle_unit], charm_name=self._hacluster_charm_name)[0]
+        doomed_principle_units = principle_units[:2]
+        other_principle_unit = principle_units[2]
+        doomed_hacluster_units = juju_utils.get_subordinate_units(
+            doomed_principle_units, charm_name=self._hacluster_charm_name)
         other_hacluster_unit = juju_utils.get_subordinate_units(
             [other_principle_unit], charm_name=self._hacluster_charm_name)[0]
 
-        logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
-        zaza.model.run_action(
-            doomed_hacluster_unit,
-            'pause',
-            raise_on_failure=True)
-        logging.info('OK')
+        for doomed_hacluster_unit in doomed_hacluster_units:
+            logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
+            zaza.model.run_action(
+                doomed_hacluster_unit,
+                'pause',
+                raise_on_failure=True)
 
-        logging.info('Removing {}'.format(doomed_principle_unit))
-        zaza.model.destroy_unit(
-            self._principle_app_name,
-            doomed_principle_unit,
-            wait_disappear=True)
-        logging.info('OK')
+        for doomed_principle_unit in doomed_principle_units:
+            logging.info('Removing {}'.format(doomed_principle_unit))
+            zaza.model.destroy_unit(
+                self._principle_app_name,
+                doomed_principle_unit,
+                wait_disappear=True)
 
         logging.info('Waiting for model to settle')
         zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'blocked')
-        zaza.model.block_until_unit_wl_status(other_principle_unit, 'blocked')
+        # NOTE(lourot): the principle unit (usually a keystone unit) isn't
+        # guaranteed to be blocked, so we don't validate that.
         zaza.model.block_until_all_units_idle()
-        logging.info('OK')
 
         logging.info('Updating corosync ring')
+        # NOTE(lourot): with Juju >= 2.8 this isn't actually necessary as it
+        # has already been done in hacluster's hanode departing hook:
         hacluster_app_name = zaza.model.get_unit_from_name(
             other_hacluster_unit).application
         zaza.model.run_action_on_leader(
@@ -188,9 +199,9 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
 
-        logging.info('Adding an hacluster unit')
-        zaza.model.add_unit(self._principle_app_name, wait_appear=True)
-        logging.info('OK')
+        for article in ('an', 'another'):
+            logging.info('Adding {} hacluster unit'.format(article))
+            zaza.model.add_unit(self._principle_app_name, wait_appear=True)
 
         logging.info('Waiting for model to settle')
         expected_states = {hacluster_app_name: {

From f598eb93c29f0c7aec99ac22f1d5d5e5ec7cd7e1 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 18 Feb 2021 10:38:45 +0100
Subject: [PATCH 10/17] Improve comments

---
 zaza/openstack/charm_tests/hacluster/tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index b1c0713..710d3e7 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -185,14 +185,14 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         logging.info('Waiting for model to settle')
         zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'blocked')
         # NOTE(lourot): the principle unit (usually a keystone unit) isn't
-        # guaranteed to be blocked, so we don't validate that.
+        # guaranteed to be blocked, so we don't validate that here.
         zaza.model.block_until_all_units_idle()
 
         logging.info('Updating corosync ring')
-        # NOTE(lourot): with Juju >= 2.8 this isn't actually necessary as it
-        # has already been done in hacluster's hanode departing hook:
         hacluster_app_name = zaza.model.get_unit_from_name(
             other_hacluster_unit).application
+        # NOTE(lourot): with Juju >= 2.8 this isn't actually necessary as it
+        # has already been done in hacluster's hanode departing hook:
         zaza.model.run_action_on_leader(
             hacluster_app_name,
             'update-ring',

From c00aa18ffbd03dfad9b196b86a54b51e1ba3ef7e Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 18 Feb 2021 16:22:43 +0100
Subject: [PATCH 11/17] Assert that corosync considers all nodes to be online

---
 zaza/openstack/charm_tests/hacluster/tests.py | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 710d3e7..f720d20 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -153,20 +153,22 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
     def test_930_scaleback(self):
         """Remove two units, recalculate quorum and re-add two units.
 
-        NOTE(lourot): before lp:1400481 was fixed, quorum wasn't recalculated
-        when removing units. Within a cluster of 3 units, removing two units
-        and re-adding them led to a situation where the expected quorum became
-        5 and was still considered unreached.
+        NOTE(lourot): before lp:1400481 was fixed, the corosync ring wasn't
+        recalculated when removing units. So within a cluster of 3 units,
+        removing two units and re-adding them led to a situation where corosync
+        considers having 3 nodes online out of 5, instead of just 3 out of 3.
+        This test covers this scenario.
         """
         principle_units = sorted(zaza.model.get_status().applications[
             self._principle_app_name]['units'].keys())
         self.assertEqual(len(principle_units), 3)
         doomed_principle_units = principle_units[:2]
-        other_principle_unit = principle_units[2]
+        surviving_principle_unit = principle_units[2]
         doomed_hacluster_units = juju_utils.get_subordinate_units(
             doomed_principle_units, charm_name=self._hacluster_charm_name)
-        other_hacluster_unit = juju_utils.get_subordinate_units(
-            [other_principle_unit], charm_name=self._hacluster_charm_name)[0]
+        surviving_hacluster_unit = juju_utils.get_subordinate_units(
+            [surviving_principle_unit],
+            charm_name=self._hacluster_charm_name)[0]
 
         for doomed_hacluster_unit in doomed_hacluster_units:
             logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
@@ -183,14 +185,15 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
                 wait_disappear=True)
 
         logging.info('Waiting for model to settle')
-        zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'blocked')
+        zaza.model.block_until_unit_wl_status(surviving_hacluster_unit,
+                                              'blocked')
         # NOTE(lourot): the principle unit (usually a keystone unit) isn't
         # guaranteed to be blocked, so we don't validate that here.
         zaza.model.block_until_all_units_idle()
 
         logging.info('Updating corosync ring')
         hacluster_app_name = zaza.model.get_unit_from_name(
-            other_hacluster_unit).application
+            surviving_hacluster_unit).application
         # NOTE(lourot): with Juju >= 2.8 this isn't actually necessary as it
         # has already been done in hacluster's hanode departing hook:
         zaza.model.run_action_on_leader(
@@ -200,7 +203,7 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             raise_on_failure=True)
 
         for article in ('an', 'another'):
-            logging.info('Adding {} hacluster unit'.format(article))
+            logging.info('Re-adding {} hacluster unit'.format(article))
             zaza.model.add_unit(self._principle_app_name, wait_appear=True)
 
         logging.info('Waiting for model to settle')
@@ -208,4 +211,16 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             "workload-status": "active",
             "workload-status-message": "Unit is ready and clustered"}}
         zaza.model.wait_for_application_states(states=expected_states)
-        logging.debug('OK')
+
+        # At this point if the corosync ring has been properly updated, there
+        # shouldn't be any trace of the deleted units anymore:
+        logging.info('Checking that corosync considers all nodes to be online')
+        cmd = 'sudo crm status'
+        result = zaza.model.run_on_unit(surviving_hacluster_unit, cmd)
+        code = result.get('Code')
+        if code != '0':
+            raise zaza.model.CommandRunFailed(cmd, result)
+        output = result.get('Stdout').strip()
+        logging.debug('Output received: {}'.format(output))
+        self.assertNotIn('OFFLINE', output,
+                         "corosync shouldn't list any offline node")

From 8c819814fb01741575bee97ab74d202b00947123 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 18 Feb 2021 10:50:53 +0100
Subject: [PATCH 12/17] Don't wait for principle to get back to active

---
 zaza/openstack/charm_tests/hacluster/tests.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index f720d20..f340022 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -130,8 +130,7 @@ class HaclusterScalebackTest(HaclusterBaseTest):
         logging.info('Waiting for model to settle')
         zaza.model.block_until_unit_wl_status(other_hacluster_unit, 'active')
         # NOTE(lourot): the principle application sometimes remain blocked
-        # after scaling back up until lp:1400481 is solved.
-        # zaza.model.block_until_unit_wl_status(other_principle_unit, 'active')
+        # after scaling back up.
         zaza.model.block_until_all_units_idle()
         logging.debug('OK')
 
@@ -207,10 +206,15 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             zaza.model.add_unit(self._principle_app_name, wait_appear=True)
 
         logging.info('Waiting for model to settle')
-        expected_states = {hacluster_app_name: {
-            "workload-status": "active",
-            "workload-status-message": "Unit is ready and clustered"}}
-        zaza.model.wait_for_application_states(states=expected_states)
+        # NOTE(lourot): the principle charm may remain blocked here. This seems
+        # to happen often when it is keystone and has a mysql-router as other
+        # subordinate charm. The keystone units seems to often remain blocked
+        # with 'Database not initialised'. This is not the hacluster charm's
+        # fault and this is why we don't validate here that the entire model
+        # goes back to active/idle.
+        zaza.model.block_until_unit_wl_status(surviving_hacluster_unit,
+                                              'active')
+        zaza.model.block_until_all_units_idle()
 
         # At this point if the corosync ring has been properly updated, there
         # shouldn't be any trace of the deleted units anymore:

From 20bd5dfb5908f79b6e2c1c19b5e6e893baad86ee Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Thu, 25 Feb 2021 13:05:30 +0100
Subject: [PATCH 13/17] Remove obsolete comment

The automatic corosync cleanup on Juju >= 2.8 turned out
not to work and has been removed from charm-hacluster.
The 'update-ring' action is now always needed.
---
 zaza/openstack/charm_tests/hacluster/tests.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index f340022..ea613ec 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -193,8 +193,6 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         logging.info('Updating corosync ring')
         hacluster_app_name = zaza.model.get_unit_from_name(
             surviving_hacluster_unit).application
-        # NOTE(lourot): with Juju >= 2.8 this isn't actually necessary as it
-        # has already been done in hacluster's hanode departing hook:
         zaza.model.run_action_on_leader(
             hacluster_app_name,
             'update-ring',

From c80f5fdef8fe80564406b084961d5a6b4f7e03a0 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Wed, 3 Mar 2021 10:50:53 +0100
Subject: [PATCH 14/17] Make HaclusterScaleBackAndForthTest less stressful

Removing and re-adding two units out of a cluster of three
so fast seems to be too stressful for corosync/pacemaker,
which makes the test fail in rare cases.  Removing and
re-adding one unit only is enough in order to validate the
functionality.  This also makes the test execution time
much shorter.
---
 zaza/openstack/charm_tests/hacluster/tests.py | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index ea613ec..b365960 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -154,40 +154,39 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
 
         NOTE(lourot): before lp:1400481 was fixed, the corosync ring wasn't
         recalculated when removing units. So within a cluster of 3 units,
-        removing two units and re-adding them led to a situation where corosync
-        considers having 3 nodes online out of 5, instead of just 3 out of 3.
+        removing a unit and re-adding one led to a situation where corosync
+        considers having 3 nodes online out of 4, instead of just 3 out of 3.
         This test covers this scenario.
         """
         principle_units = sorted(zaza.model.get_status().applications[
             self._principle_app_name]['units'].keys())
         self.assertEqual(len(principle_units), 3)
-        doomed_principle_units = principle_units[:2]
-        surviving_principle_unit = principle_units[2]
-        doomed_hacluster_units = juju_utils.get_subordinate_units(
-            doomed_principle_units, charm_name=self._hacluster_charm_name)
+        surviving_principle_unit = principle_units[0]
+        doomed_principle_unit = principle_units[1]
         surviving_hacluster_unit = juju_utils.get_subordinate_units(
             [surviving_principle_unit],
             charm_name=self._hacluster_charm_name)[0]
+        doomed_hacluster_unit = juju_utils.get_subordinate_units(
+            [doomed_principle_unit],
+            charm_name=self._hacluster_charm_name)[0]
 
-        for doomed_hacluster_unit in doomed_hacluster_units:
-            logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
-            zaza.model.run_action(
-                doomed_hacluster_unit,
-                'pause',
-                raise_on_failure=True)
+        logging.info('Pausing unit {}'.format(doomed_hacluster_unit))
+        zaza.model.run_action(
+            doomed_hacluster_unit,
+            'pause',
+            raise_on_failure=True)
 
-        for doomed_principle_unit in doomed_principle_units:
-            logging.info('Removing {}'.format(doomed_principle_unit))
-            zaza.model.destroy_unit(
-                self._principle_app_name,
-                doomed_principle_unit,
-                wait_disappear=True)
+        logging.info('Removing {}'.format(doomed_principle_unit))
+        zaza.model.destroy_unit(
+            self._principle_app_name,
+            doomed_principle_unit,
+            wait_disappear=True)
 
         logging.info('Waiting for model to settle')
         zaza.model.block_until_unit_wl_status(surviving_hacluster_unit,
                                               'blocked')
-        # NOTE(lourot): the principle unit (usually a keystone unit) isn't
-        # guaranteed to be blocked, so we don't validate that here.
+        # NOTE(lourot): the surviving principle units (usually keystone units)
+        # aren't guaranteed to be blocked, so we don't validate that here.
         zaza.model.block_until_all_units_idle()
 
         logging.info('Updating corosync ring')
@@ -199,9 +198,8 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
 
-        for article in ('an', 'another'):
-            logging.info('Re-adding {} hacluster unit'.format(article))
-            zaza.model.add_unit(self._principle_app_name, wait_appear=True)
+        logging.info('Re-adding an hacluster unit')
+        zaza.model.add_unit(self._principle_app_name, wait_appear=True)
 
         logging.info('Waiting for model to settle')
         # NOTE(lourot): the principle charm may remain blocked here. This seems
@@ -215,7 +213,7 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         zaza.model.block_until_all_units_idle()
 
         # At this point if the corosync ring has been properly updated, there
-        # shouldn't be any trace of the deleted units anymore:
+        # shouldn't be any trace of the deleted unit anymore:
         logging.info('Checking that corosync considers all nodes to be online')
         cmd = 'sudo crm status'
         result = zaza.model.run_on_unit(surviving_hacluster_unit, cmd)

From 0931ebb456574fb3b4b25344e3ed26861eca7429 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Wed, 3 Mar 2021 10:56:15 +0100
Subject: [PATCH 15/17] Fix docstring

---
 zaza/openstack/charm_tests/hacluster/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index b365960..66d4f45 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -150,7 +150,7 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         cls._hacluster_charm_name = test_config['hacluster-charm-name']
 
     def test_930_scaleback(self):
-        """Remove two units, recalculate quorum and re-add two units.
+        """Remove one unit, recalculate quorum and re-add one unit.
 
         NOTE(lourot): before lp:1400481 was fixed, the corosync ring wasn't
         recalculated when removing units. So within a cluster of 3 units,

From 93a48133b35e20d79251922c321d97b1ddf03e4e Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Fri, 5 Mar 2021 12:12:00 +0100
Subject: [PATCH 16/17] Add assertions to HaclusterScaleBackAndForthTest

---
 zaza/openstack/charm_tests/hacluster/tests.py | 35 +++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 66d4f45..53c7392 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -189,6 +189,10 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
         # aren't guaranteed to be blocked, so we don't validate that here.
         zaza.model.block_until_all_units_idle()
 
+        # At this point the corosync ring hasn't been updated yet, so it should
+        # still remember the deleted unit:
+        self.__assert_some_corosync_nodes_are_offline(surviving_hacluster_unit)
+
         logging.info('Updating corosync ring')
         hacluster_app_name = zaza.model.get_unit_from_name(
             surviving_hacluster_unit).application
@@ -198,6 +202,10 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
             action_params={'i-really-mean-it': True},
             raise_on_failure=True)
 
+        # At this point if the corosync ring has been properly updated, there
+        # shouldn't be any trace of the deleted unit anymore:
+        self.__assert_all_corosync_nodes_are_online(surviving_hacluster_unit)
+
         logging.info('Re-adding an hacluster unit')
         zaza.model.add_unit(self._principle_app_name, wait_appear=True)
 
@@ -212,15 +220,30 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
                                               'active')
         zaza.model.block_until_all_units_idle()
 
-        # At this point if the corosync ring has been properly updated, there
-        # shouldn't be any trace of the deleted unit anymore:
+        # At this point the corosync ring should still not contain any offline
+        # node:
+        self.__assert_all_corosync_nodes_are_online(surviving_hacluster_unit)
+
+    def __assert_some_corosync_nodes_are_offline(self, hacluster_unit):
+        logging.info('Checking that corosync considers at least one node to '
+                     'be offline')
+        output = self._get_crm_status(hacluster_unit)
+        self.assertIn('OFFLINE', output,
+                      "corosync should list at least one offline node")
+
+    def __assert_all_corosync_nodes_are_online(self, hacluster_unit):
         logging.info('Checking that corosync considers all nodes to be online')
+        output = self._get_crm_status(hacluster_unit)
+        self.assertNotIn('OFFLINE', output,
+                         "corosync shouldn't list any offline node")
+
+    @staticmethod
+    def _get_crm_status(hacluster_unit):
         cmd = 'sudo crm status'
-        result = zaza.model.run_on_unit(surviving_hacluster_unit, cmd)
+        result = zaza.model.run_on_unit(hacluster_unit, cmd)
         code = result.get('Code')
         if code != '0':
             raise zaza.model.CommandRunFailed(cmd, result)
         output = result.get('Stdout').strip()
-        logging.debug('Output received: {}'.format(output))
-        self.assertNotIn('OFFLINE', output,
-                         "corosync shouldn't list any offline node")
+        logging.debug('crm output received: {}'.format(output))
+        return output

From aafdc4070f0984d87dea05008de9a86e0beed920 Mon Sep 17 00:00:00 2001
From: Aurelien Lourot <aurelien.lourot@canonical.com>
Date: Fri, 5 Mar 2021 18:08:54 +0100
Subject: [PATCH 17/17] Workaround for lp:1874719

---
 zaza/openstack/charm_tests/hacluster/tests.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/zaza/openstack/charm_tests/hacluster/tests.py b/zaza/openstack/charm_tests/hacluster/tests.py
index 53c7392..5f46565 100644
--- a/zaza/openstack/charm_tests/hacluster/tests.py
+++ b/zaza/openstack/charm_tests/hacluster/tests.py
@@ -220,8 +220,17 @@ class HaclusterScaleBackAndForthTest(HaclusterBaseTest):
                                               'active')
         zaza.model.block_until_all_units_idle()
 
-        # At this point the corosync ring should still not contain any offline
-        # node:
+        # Because of lp:1874719 the corosync ring may show a mysterious offline
+        # 'node1' node. We clean up the ring by re-running the 'update-ring'
+        # action:
+        logging.info('Updating corosync ring - workaround for lp:1874719')
+        zaza.model.run_action_on_leader(
+            hacluster_app_name,
+            'update-ring',
+            action_params={'i-really-mean-it': True},
+            raise_on_failure=True)
+
+        # At this point the corosync ring should not contain any offline node:
         self.__assert_all_corosync_nodes_are_online(surviving_hacluster_unit)
 
     def __assert_some_corosync_nodes_are_offline(self, hacluster_unit):