Merge pull request #933 from mkalcok/ovn-downscale

Tests for ovn-central downscaling and cluster status
This commit is contained in:
Frode Nordahl
2022-11-28 12:02:52 +01:00
committed by GitHub

View File

@@ -19,7 +19,7 @@ import logging
import juju
import tenacity
import yaml
import zaza
import zaza.model
@@ -623,6 +623,7 @@ class OVSOVNMigrationTest(test_utils.BaseCharmTest):
except KeyError:
pass
zaza.model.wait_for_agent_status()
zaza.model.block_until_all_units_idle()
zaza.model.wait_for_application_states(
states=self.target_deploy_status)
@@ -701,3 +702,239 @@ class OVNCentralDeferredRestartTest(
self.run_package_change_test(
'ovn-central',
'ovn-central')
class OVNCentralDownscaleTests(test_utils.BaseCharmTest):
"""Tests for cluster-status and cluster-kick actions."""
SB_CMD = "ovn-appctl -t /var/run/ovn/ovnsb_db.ctl {}"
NB_CMD = "ovn-appctl -t /var/run/ovn/ovnnb_db.ctl {}"
def _cluster_status_action(self):
"""Return Southbound and Northbound cluster status.
This function returns data as reported by "cluster-status" action
parsed into two dictionaries in the following order:
"Southbound status", "Northbound status"
"""
yaml_load_err = "Status of '{}' could not be loaded as yaml:\n{}"
status_raw = zaza.model.run_action_on_leader("ovn-central",
"cluster-status")
status_data = status_raw.data["results"]
# Verify expected items in the action result
self.assertIn("ovnnb", status_data)
self.assertIn("ovnsb", status_data)
try:
nb_status = yaml.safe_load(status_data["ovnnb"])
except yaml.YAMLError:
self.fail(yaml_load_err.format("northbound-cluster",
status_data["ovnnb"]))
try:
sb_status = yaml.safe_load(status_data["ovnsb"])
except yaml.YAMLError:
self.fail(yaml_load_err.format("southbound-cluster",
status_data["ovnsb"]))
return sb_status, nb_status
@staticmethod
def _add_unit(number_of_units=1):
"""Add specified number of units to ovn-central application.
This function also waits until the application reaches active state.
"""
zaza.model.add_unit(
"ovn-central",
count=number_of_units,
wait_appear=True
)
zaza.model.wait_for_application_states()
@staticmethod
def _remove_unit(unit_name):
"""Remove specified unit from ovn-central application.
This function also waits until the application reaches active state
again.
"""
zaza.model.destroy_unit("ovn-central", unit_name)
zaza.model.block_until_all_units_idle()
zaza.model.wait_for_application_states()
def _assert_servers_cleanly_removed(self, sb_id, nb_id):
"""Assert that specified members were removed from cluster.
This checks that they are no longer listed in cluster_status
and that there are no missing server.
:param sb_id: ID of a Southbound server that should no longer be
present
:type sb_id: str
:param nb_id: ID of a Northbound server that should no longer be
present
:type nb_id: str
"""
sb_status, nb_status = self._cluster_status_action()
self.assertNotIn(sb_id, sb_status["unit_map"])
self.assertNotIn("UNKNOWN", sb_status["unit_map"])
self.assertNotIn(nb_id, nb_status["unit_map"])
self.assertNotIn("UNKNOWN", nb_status["unit_map"])
def _get_server_ids(self, unit_name):
"""Return SB and NB server id belonging to the servers on the unit.
:return: Southbound and Northbound IDs (in this order)
:rtype: str, str
"""
missing_unit_err = ("Failed to find Server IDs. Unit {} is already"
" missing from the {} cluster status")
sb_status, nb_status = self._cluster_status_action()
for unit, server_id in sb_status["unit_map"].items():
if unit_name == unit:
sb_id = server_id
break
else:
self.fail(missing_unit_err.format(unit_name, "Southbound"))
for unit, server_id in nb_status["unit_map"].items():
if unit_name == unit:
nb_id = server_id
break
else:
self.fail(missing_unit_err.format(unit_name, "Northbound"))
return sb_id, nb_id
def _get_unit_hosting_ovn(self, leader):
"""Return ID of a unit with at least one OVN server leader/follower.
:param leader: If `True`, this method returns ID of a unit that host
at least one leader. Otherwise, the ID of a unit hosting at least
one follower will be returned.
:type leader: bool
:return: ID of a unit hosting OVN leader/follower (based on the
`leader` param)
:rtype: str
"""
# It's sufficient to parse only one of the cluster statuses To
# determine if unit holds at least one leader or one follower.
cluster_status, _ = self._cluster_status_action()
leader_id = cluster_status["leader"]
if leader_id == "self":
leader_id = cluster_status["server_id"][:4]
for unit_id, server_id in cluster_status["unit_map"].items():
if (server_id == leader_id) == leader:
return unit_id
else:
self.fail("Test failed to locate unit that hosts OVN leader.")
def test_cluster_status(self):
"""Test that cluster-status action returns expected results."""
application = zaza.model.get_application("ovn-central")
sb_status, nb_status = self._cluster_status_action()
# Verify that cluster status includes "unit_map" field with correct
# type
for status in (nb_status, sb_status):
self.assertIn("unit_map", status)
self.assertIsInstance(status["unit_map"], dict)
# Verify that units and their Server IDs are properly paired
expected_mapping = {}
for unit in application.units:
unit_name = unit.entity_id
nb_status_cmd = self.NB_CMD.format("cluster/status OVN_Northbound")
sb_status_cmd = self.SB_CMD.format("cluster/status OVN_Southbound")
nb_cluster_status = zaza.model.run_on_unit(unit_name,
nb_status_cmd)
sb_cluster_status = zaza.model.run_on_unit(unit_name,
sb_status_cmd)
nb_id = nb_cluster_status["Stdout"].splitlines()[0]
sb_id = sb_cluster_status["Stdout"].splitlines()[0]
expected_mapping[unit_name] = {"sb_id": sb_id, "nb_id": nb_id}
for unit_name, unit_data in expected_mapping.items():
sb_id = unit_data["sb_id"]
nb_id = unit_data["nb_id"]
self.assertEqual(sb_status["unit_map"][unit_name], sb_id)
self.assertEqual(nb_status["unit_map"][unit_name], nb_id)
def test_cluster_kick(self):
"""Test forcefully removing a member of an ovn cluster.
If unit fails to remove itself gracefully from the
Southbound/Northbound OVN clusters, it can be kicked using
"cluster-kick" action. This test simulates such scenario by removing
contents of "/var/run/ovn/*" to mess with OVN communication before
removal of the unit which prevents the unit from gracefully leaving
the OVN cluster.
"""
logging.info("Add a ovn-central unit to be kicked")
self._add_unit()
application = zaza.model.get_application("ovn-central")
removed_unit = application.units[-1].entity_id
removed_sb_id, removed_nb_id = self._get_server_ids(removed_unit)
logging.info("Killing OVN services on %s unit" % removed_unit)
zaza.model.run_on_unit(removed_unit, "rm -rf /var/run/ovn/*")
logging.info("Removing unit %s", removed_unit)
self._remove_unit(removed_unit)
# Verify that Server IDs of the removed unit are no longer associated
# with the units ID and show in "UNKNOWN" instead
sb_status, nb_status = self._cluster_status_action()
self.assertNotIn(removed_sb_id, sb_status["unit_map"])
self.assertIn(removed_sb_id, sb_status["unit_map"]["UNKNOWN"])
self.assertNotIn(removed_nb_id, nb_status["unit_map"])
self.assertIn(removed_nb_id, nb_status["unit_map"]["UNKNOWN"])
logging.info("Requesting kick of removed servers (Southbound ID: %s, "
"Northbound ID: %s) from OVN clusters",
removed_sb_id,
removed_nb_id)
action_params = {"sb-server-id": removed_sb_id,
"nb-server-id": removed_nb_id,
"i-really-mean-it": True}
zaza.model.run_action_on_leader("ovn-central",
"cluster-kick",
action_params=action_params)
# Verify that Server IDs of the removed unit are completely removed
# from the cluster status
self._assert_servers_cleanly_removed(removed_sb_id, removed_nb_id)
def test_cluster_downscale(self):
"""Test unit's graceful departure from OVN cluster.
When ovn-central unit is removed. It should automatically leave from
OVN clusters (Northbound and Southbound) as well.
"""
logging.info("Adding units needed for downscaling test.")
self._add_unit(2)
# Remove unit hosting at least one follower
non_leader_unit = self._get_unit_hosting_ovn(leader=False)
logging.info(
"Removing unit (%s) that hosts OVN follower server.",
non_leader_unit
)
non_leader_sb, non_leader_nb = self._get_server_ids(non_leader_unit)
self._remove_unit(non_leader_unit)
self._assert_servers_cleanly_removed(non_leader_sb, non_leader_nb)
# Remove unit hosting at least one leader
leader_unit = self._get_unit_hosting_ovn(leader=True)
logging.info(
"Removing unit (%s) that hosts OVN leader server.",
leader_unit
)
leader_sb, leader_nb = self._get_server_ids(leader_unit)
self._remove_unit(leader_unit)
self._assert_servers_cleanly_removed(leader_sb, leader_nb)