From 5ab5bb834448c0e957ad04fb9c3a634678f5b9ca Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Wed, 28 Sep 2022 19:05:49 +0200 Subject: [PATCH 1/5] Tests for ovn-central downscaling and cluster status --- zaza/openstack/charm_tests/ovn/tests.py | 133 +++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index cbff6ec..2f0015b 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -19,7 +19,7 @@ import logging import juju import tenacity - +import yaml import zaza import zaza.model @@ -701,3 +701,134 @@ class OVNCentralDeferredRestartTest( self.run_package_change_test( 'ovn-central', 'ovn-central') + + +class OVNCentralDownscaleTests(test_utils.BaseCharmTest): + """Tests for cluster-status and cluster-kick actions.""" + + SB_CMD = "ovs-appctl -t /var/run/ovn/ovnsb_db.ctl {}" + NB_CMD = "ovs-appctl -t /var/run/ovn/ovnnb_db.ctl {}" + + def _cluster_status_action(self): + """Return Southbound and Northbound cluster status. + + This function returns data as reported by "cluster-status" action + parsed into two dictionaries in the following order: + "Southbound status", "Northbound status" + """ + yaml_load_err = "Status of '{}' could not be loaded as yaml:\n{}" + status_raw = zaza.model.run_action_on_leader("ovn-central", + "cluster-status") + status_data = status_raw.data["results"] + # Verify expected items in the action result + self.assertIn("northbound-cluster", status_data) + self.assertIn("southbound-cluster", status_data) + + try: + nb_status = yaml.safe_load(status_data["northbound-cluster"]) + except yaml.YAMLError: + self.fail(yaml_load_err.format("northbound-cluster", + status_data["northbound-cluster"])) + try: + sb_status = yaml.safe_load(status_data["southbound-cluster"]) + except yaml.YAMLError: + self.fail(yaml_load_err.format("southbound-cluster", + status_data["southbound-cluster"])) + + return sb_status, nb_status + + def test_cluster_status(self): + """Test that cluster-status action returns expected results.""" + application = zaza.model.get_application("ovn-central") + sb_status, nb_status = self._cluster_status_action() + + # Verify that cluster status includes "Servers" field with correct type + for status in (nb_status, sb_status): + self.assertIn("Servers", status) + self.assertIsInstance(status["Servers"], dict) + + # Verify that units and their Server IDs are properly paired + expected_mapping = {} + for unit in application.units: + unit_name = unit.entity_id + nb_status_cmd = self.NB_CMD.format("cluster/status OVN_Northbound") + sb_status_cmd = self.SB_CMD.format("cluster/status OVN_Southbound") + nb_cluster_status = zaza.model.run_on_unit(unit_name, + nb_status_cmd) + sb_cluster_status = zaza.model.run_on_unit(unit_name, + sb_status_cmd) + nb_id = nb_cluster_status["Stdout"].splitlines()[0] + sb_id = sb_cluster_status["Stdout"].splitlines()[0] + expected_mapping[unit_name] = {"sb_id": sb_id, "nb_id": nb_id} + + for unit_name, unit_data in expected_mapping.items(): + sb_id = unit_data["sb_id"] + nb_id = unit_data["nb_id"] + self.assertEqual(sb_status["Servers"][sb_id]["Unit"], unit_name) + self.assertEqual(nb_status["Servers"][nb_id]["Unit"], unit_name) + + def test_cluster_kick(self): + """Test forcefully removing a member of an ovn cluster. + + If unit fails to remove itself gracefully from the + Southbound/Northbound OVN clusters, it can be kicked using + "cluster-kick" action. This test simulates such scenario by removing + contents of "/var/run/ovn/*" to mess with OVN communication before + removal of the unit which prevents the unit from gracefully leaving + the OVN cluster. + """ + application = zaza.model.get_application("ovn-central") + removed_unit = application.units[-1].entity_id + missing_unit_err = ("Failed to perform kick test. Unit {} is already" + " missing from the {} cluster status") + sb_status, nb_status = self._cluster_status_action() + + for server_id, server_data in sb_status["Servers"].items(): + if server_data["Unit"] == removed_unit: + removed_sb_id = server_id + break + else: + self.fail(missing_unit_err.format(removed_unit, "Southbound")) + + for server_id, server_data in nb_status["Servers"].items(): + if server_data["Unit"] == removed_unit: + removed_nb_id = server_id + break + else: + self.fail(missing_unit_err.format(removed_unit, "Northbound")) + + logging.info("Killing OVN services on %s unit" % removed_unit) + zaza.model.run_on_unit(removed_unit, "rm -rf /var/run/ovn/*") + + logging.info("Removing unit %s", removed_unit) + zaza.model.destroy_unit(application.entity_id, removed_unit) + zaza.model.wait_for_application_states( + states={"ovn-central": {"workload-status": "active"}} + ) + + # Verify that Server IDs of the removed unit are no longer associated + # with the units ID and show "UNKNOWN" instead + sb_status, nb_status = self._cluster_status_action() + + self.assertEqual(sb_status["Servers"][removed_sb_id]["Unit"], + "UNKNOWN") + self.assertEqual(nb_status["Servers"][removed_nb_id]["Unit"], + "UNKNOWN") + + logging.info("Requesting kick of removed servers (Southbound ID: %s, " + "Northbound ID: %s) from OVN clusters", + removed_sb_id, + removed_nb_id) + action_params = {"sb-server-id": removed_sb_id, + "nb-server-id": removed_nb_id, + "i-really-mean-it": True} + zaza.model.run_action_on_leader("ovn-central", + "cluster-kick", + action_params=action_params) + + # Verify that Server IDs of the removed unit are completely removed + # from the cluster status + sb_status, nb_status = self._cluster_status_action() + + self.assertNotIn(removed_sb_id, sb_status["Servers"]) + self.assertNotIn(removed_nb_id, nb_status["Servers"]) From 9e727657314183d3ab1b770c7f9336c0f30ac081 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Thu, 13 Oct 2022 04:41:36 +0200 Subject: [PATCH 2/5] Updated tests to add their own unit for destruction and added test for downscaling. --- zaza/openstack/charm_tests/ovn/tests.py | 169 ++++++++++++++++++------ 1 file changed, 129 insertions(+), 40 deletions(-) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index 2f0015b..0e49b6a 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -721,31 +721,99 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): "cluster-status") status_data = status_raw.data["results"] # Verify expected items in the action result - self.assertIn("northbound-cluster", status_data) - self.assertIn("southbound-cluster", status_data) + self.assertIn("ovnnb", status_data) + self.assertIn("ovnsb", status_data) try: - nb_status = yaml.safe_load(status_data["northbound-cluster"]) + nb_status = yaml.safe_load(status_data["ovnnb"]) except yaml.YAMLError: self.fail(yaml_load_err.format("northbound-cluster", - status_data["northbound-cluster"])) + status_data["ovnnb"])) try: - sb_status = yaml.safe_load(status_data["southbound-cluster"]) + sb_status = yaml.safe_load(status_data["ovnsb"]) except yaml.YAMLError: self.fail(yaml_load_err.format("southbound-cluster", - status_data["southbound-cluster"])) + status_data["ovnsb"])) return sb_status, nb_status + @staticmethod + def _add_unit(number_of_units=1): + """Add specified number of units to ovn-central application. + + This function also waits unit the application reaches active state. + """ + zaza.model.add_unit( + "ovn-central", + count=number_of_units, + wait_appear=True + ) + zaza.model.wait_for_application_states() + + @staticmethod + def _remove_unit(unit_name): + """Add specified unit from ovn-central application. + + This function also waits unit the application reaches active state. + """ + zaza.model.destroy_unit("ovn-central", unit_name) + zaza.model.wait_for_application_states() + + def _assert_servers_cleanly_removed(self, sb_id, nb_id): + """Assert that specified members were removed from cluster. + + This checks that they are no longer listed in cluster_status + and that there are no missing server. + :param sb_id: ID of a Southbound server that should no longer be + present + :type sb_id: str + :param nb_id: ID of a Northbound server that should no longer be + present + :type nb_id: str + """ + sb_status, nb_status = self._cluster_status_action() + + self.assertNotIn(sb_id, sb_status["unit_map"]) + self.assertNotIn("UNKNOWN", sb_status["unit_map"]) + self.assertNotIn(nb_id, nb_status["unit_map"]) + self.assertNotIn("UNKNOWN", nb_status["unit_map"]) + + def _get_server_ids(self, unit_name): + """Return SB and NB server id belonging to the servers on the unit. + + :return: Southbound and Northbound IDs (in this order) + :rtype: str, str + """ + missing_unit_err = ("Failed to find Server IDs. Unit {} is already" + " missing from the {} cluster status") + sb_status, nb_status = self._cluster_status_action() + + for unit, server_id in sb_status["unit_map"].items(): + if unit_name == unit: + sb_id = server_id + break + else: + self.fail(missing_unit_err.format(unit_name, "Southbound")) + + for unit, server_id in nb_status["unit_map"].items(): + if unit_name == unit: + nb_id = server_id + break + else: + self.fail(missing_unit_err.format(unit_name, "Northbound")) + + return sb_id, nb_id + def test_cluster_status(self): """Test that cluster-status action returns expected results.""" application = zaza.model.get_application("ovn-central") sb_status, nb_status = self._cluster_status_action() - # Verify that cluster status includes "Servers" field with correct type + # Verify that cluster status includes "unit_map" field with correct + # type for status in (nb_status, sb_status): - self.assertIn("Servers", status) - self.assertIsInstance(status["Servers"], dict) + self.assertIn("unit_map", status) + self.assertIsInstance(status["unit_map"], dict) # Verify that units and their Server IDs are properly paired expected_mapping = {} @@ -764,8 +832,8 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): for unit_name, unit_data in expected_mapping.items(): sb_id = unit_data["sb_id"] nb_id = unit_data["nb_id"] - self.assertEqual(sb_status["Servers"][sb_id]["Unit"], unit_name) - self.assertEqual(nb_status["Servers"][nb_id]["Unit"], unit_name) + self.assertEqual(sb_status["unit_map"][unit_name], sb_id) + self.assertEqual(nb_status["unit_map"][unit_name], nb_id) def test_cluster_kick(self): """Test forcefully removing a member of an ovn cluster. @@ -777,43 +845,26 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): removal of the unit which prevents the unit from gracefully leaving the OVN cluster. """ + logging.info("Add a ovn-central unit to be kicked") + self._add_unit() application = zaza.model.get_application("ovn-central") removed_unit = application.units[-1].entity_id - missing_unit_err = ("Failed to perform kick test. Unit {} is already" - " missing from the {} cluster status") - sb_status, nb_status = self._cluster_status_action() - - for server_id, server_data in sb_status["Servers"].items(): - if server_data["Unit"] == removed_unit: - removed_sb_id = server_id - break - else: - self.fail(missing_unit_err.format(removed_unit, "Southbound")) - - for server_id, server_data in nb_status["Servers"].items(): - if server_data["Unit"] == removed_unit: - removed_nb_id = server_id - break - else: - self.fail(missing_unit_err.format(removed_unit, "Northbound")) + removed_sb_id, removed_nb_id = self._get_server_ids(removed_unit) logging.info("Killing OVN services on %s unit" % removed_unit) zaza.model.run_on_unit(removed_unit, "rm -rf /var/run/ovn/*") logging.info("Removing unit %s", removed_unit) - zaza.model.destroy_unit(application.entity_id, removed_unit) - zaza.model.wait_for_application_states( - states={"ovn-central": {"workload-status": "active"}} - ) + self._remove_unit(removed_unit) # Verify that Server IDs of the removed unit are no longer associated - # with the units ID and show "UNKNOWN" instead + # with the units ID and show in "UNKNOWN" instead sb_status, nb_status = self._cluster_status_action() - self.assertEqual(sb_status["Servers"][removed_sb_id]["Unit"], - "UNKNOWN") - self.assertEqual(nb_status["Servers"][removed_nb_id]["Unit"], - "UNKNOWN") + self.assertNotIn(removed_sb_id, sb_status["unit_map"]) + self.assertIn(removed_sb_id, sb_status["unit_map"]["UNKNOWN"]) + self.assertNotIn(removed_nb_id, nb_status["unit_map"]) + self.assertIn(removed_nb_id, nb_status["unit_map"]["UNKNOWN"]) logging.info("Requesting kick of removed servers (Southbound ID: %s, " "Northbound ID: %s) from OVN clusters", @@ -828,7 +879,45 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): # Verify that Server IDs of the removed unit are completely removed # from the cluster status - sb_status, nb_status = self._cluster_status_action() + self._assert_servers_cleanly_removed(removed_sb_id, removed_nb_id) - self.assertNotIn(removed_sb_id, sb_status["Servers"]) - self.assertNotIn(removed_nb_id, nb_status["Servers"]) + def test_cluster_downscale(self): + """Test unit's graceful departure from OVN cluster. + + When ovn-central unit is removed. It should automatically leave from + OVN clusters (Northbound and Southbound) as well. + """ + logging.info("Adding units needed for downscaling test.") + self._add_unit(2) + leader_status = "leader:" + application = zaza.model.get_application("ovn-central") + + logging.info("Removing unit that hosts OVN follower server.") + for unit in application.units: + if leader_status not in unit.workload_status_message: + non_leader_unit = unit.entity_id + break + else: + non_leader_unit = "" + + if not non_leader_unit: + self.fail("Did not find a unit that's not an OVN cluster leader.") + + non_leader_sb, non_leader_nb = self._get_server_ids(non_leader_unit) + self._remove_unit(non_leader_unit) + self._assert_servers_cleanly_removed(non_leader_sb, non_leader_nb) + + logging.info("Removing unit that hosts OVN leader server.") + for unit in application.units: + if leader_status in unit.workload_status_message: + leader_unit = unit.entity_id + break + else: + leader_unit = "" + + if not non_leader_unit: + self.fail("Did not find a unit that's an OVN cluster leader.") + + leader_sb, leader_nb = self._get_server_ids(leader_unit) + self._remove_unit(leader_unit) + self._assert_servers_cleanly_removed(leader_sb, leader_nb) From 570a9f1a5719ff84e6b79a0fca46bd422449b0c2 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Fri, 21 Oct 2022 14:29:55 +0200 Subject: [PATCH 3/5] Include fix for race condition when using `wait_for_application_states` In some testing environment, `wait_for_application_states()` can execute before juju starts actually performing changes on the units, causing it to return immediately. --- zaza/openstack/charm_tests/ovn/tests.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index 0e49b6a..b4cb7cf 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -623,6 +623,7 @@ class OVSOVNMigrationTest(test_utils.BaseCharmTest): except KeyError: pass zaza.model.wait_for_agent_status() + zaza.model.block_until_all_units_idle() zaza.model.wait_for_application_states( states=self.target_deploy_status) @@ -706,8 +707,8 @@ class OVNCentralDeferredRestartTest( class OVNCentralDownscaleTests(test_utils.BaseCharmTest): """Tests for cluster-status and cluster-kick actions.""" - SB_CMD = "ovs-appctl -t /var/run/ovn/ovnsb_db.ctl {}" - NB_CMD = "ovs-appctl -t /var/run/ovn/ovnnb_db.ctl {}" + SB_CMD = "ovn-appctl -t /var/run/ovn/ovnsb_db.ctl {}" + NB_CMD = "ovn-appctl -t /var/run/ovn/ovnnb_db.ctl {}" def _cluster_status_action(self): """Return Southbound and Northbound cluster status. @@ -741,7 +742,7 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): def _add_unit(number_of_units=1): """Add specified number of units to ovn-central application. - This function also waits unit the application reaches active state. + This function also waits until the application reaches active state. """ zaza.model.add_unit( "ovn-central", @@ -752,11 +753,13 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): @staticmethod def _remove_unit(unit_name): - """Add specified unit from ovn-central application. + """Remove specified unit from ovn-central application. - This function also waits unit the application reaches active state. + This function also waits until the application reaches active state + again. """ zaza.model.destroy_unit("ovn-central", unit_name) + zaza.model.block_until_all_units_idle() zaza.model.wait_for_application_states() def _assert_servers_cleanly_removed(self, sb_id, nb_id): From a0abb64826d43e619a4f6576594a85cdab274db9 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Mon, 24 Oct 2022 11:49:12 +0200 Subject: [PATCH 4/5] Run `update-status` before searching for OVN leader --- zaza/openstack/charm_tests/ovn/tests.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index b4cb7cf..d3baf85 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -896,6 +896,10 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): application = zaza.model.get_application("ovn-central") logging.info("Removing unit that hosts OVN follower server.") + # Run `update-status` hook. This updates the workload status message, + # helping us to correctly identify unit that does not host OVN leader + # servers. + zaza.run(application.run("hooks/update-status")) for unit in application.units: if leader_status not in unit.workload_status_message: non_leader_unit = unit.entity_id @@ -911,6 +915,9 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): self._assert_servers_cleanly_removed(non_leader_sb, non_leader_nb) logging.info("Removing unit that hosts OVN leader server.") + # Run `update-status` hook. This updates the workload status message, + # helping us to correctly identify unit that hosts OVN leader server. + zaza.run(application.run("hooks/update-status")) for unit in application.units: if leader_status in unit.workload_status_message: leader_unit = unit.entity_id @@ -918,7 +925,7 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): else: leader_unit = "" - if not non_leader_unit: + if not leader_unit: self.fail("Did not find a unit that's an OVN cluster leader.") leader_sb, leader_nb = self._get_server_ids(leader_unit) From dadac36ccff4b63d6b40d9361d869e5ead588a03 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Tue, 22 Nov 2022 14:30:10 +0100 Subject: [PATCH 5/5] Determine OVN leader/follower status based on `cluster-status` action. --- zaza/openstack/charm_tests/ovn/tests.py | 65 ++++++++++++++----------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index d3baf85..9516678 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -807,6 +807,30 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): return sb_id, nb_id + def _get_unit_hosting_ovn(self, leader): + """Return ID of a unit with at least one OVN server leader/follower. + + :param leader: If `True`, this method returns ID of a unit that host + at least one leader. Otherwise, the ID of a unit hosting at least + one follower will be returned. + :type leader: bool + :return: ID of a unit hosting OVN leader/follower (based on the + `leader` param) + :rtype: str + """ + # It's sufficient to parse only one of the cluster statuses To + # determine if unit holds at least one leader or one follower. + cluster_status, _ = self._cluster_status_action() + leader_id = cluster_status["leader"] + if leader_id == "self": + leader_id = cluster_status["server_id"][:4] + + for unit_id, server_id in cluster_status["unit_map"].items(): + if (server_id == leader_id) == leader: + return unit_id + else: + self.fail("Test failed to locate unit that hosts OVN leader.") + def test_cluster_status(self): """Test that cluster-status action returns expected results.""" application = zaza.model.get_application("ovn-central") @@ -892,41 +916,24 @@ class OVNCentralDownscaleTests(test_utils.BaseCharmTest): """ logging.info("Adding units needed for downscaling test.") self._add_unit(2) - leader_status = "leader:" - application = zaza.model.get_application("ovn-central") - logging.info("Removing unit that hosts OVN follower server.") - # Run `update-status` hook. This updates the workload status message, - # helping us to correctly identify unit that does not host OVN leader - # servers. - zaza.run(application.run("hooks/update-status")) - for unit in application.units: - if leader_status not in unit.workload_status_message: - non_leader_unit = unit.entity_id - break - else: - non_leader_unit = "" - - if not non_leader_unit: - self.fail("Did not find a unit that's not an OVN cluster leader.") + # Remove unit hosting at least one follower + non_leader_unit = self._get_unit_hosting_ovn(leader=False) + logging.info( + "Removing unit (%s) that hosts OVN follower server.", + non_leader_unit + ) non_leader_sb, non_leader_nb = self._get_server_ids(non_leader_unit) self._remove_unit(non_leader_unit) self._assert_servers_cleanly_removed(non_leader_sb, non_leader_nb) - logging.info("Removing unit that hosts OVN leader server.") - # Run `update-status` hook. This updates the workload status message, - # helping us to correctly identify unit that hosts OVN leader server. - zaza.run(application.run("hooks/update-status")) - for unit in application.units: - if leader_status in unit.workload_status_message: - leader_unit = unit.entity_id - break - else: - leader_unit = "" - - if not leader_unit: - self.fail("Did not find a unit that's an OVN cluster leader.") + # Remove unit hosting at least one leader + leader_unit = self._get_unit_hosting_ovn(leader=True) + logging.info( + "Removing unit (%s) that hosts OVN leader server.", + leader_unit + ) leader_sb, leader_nb = self._get_server_ids(leader_unit) self._remove_unit(leader_unit)