Restart blocked mysql routers (#565)

LP Bug #1918953 [0] was resolved on the mysql-innodb-cluster side using coordinated delayed action. Since then we have seen a similar issue in CI [1] after the pause and resume test. Mysql-router hangs with:
2021-04-21 20:42:05 metadata_cache WARNING [7f3f968d5700] Instance '192.168.254.18:3306' [72b4ac2c-a2dd-11eb-82a5-fa163e5a4b7e] of replicaset 'default' is unreachable. Increasing metadata cache refresh frequency.

This cannot be fixed from the cluster side. I will be looking into solutions on the mysql-rotuer side. But in the meantime, to unblock the mysql-innodb-cluster gate this change restarts blocked MySQL routers.

[0] https://bugs.launchpad.net/charm-mysql-router/+bug/1918953
[1] https://openstack-ci-reports.ubuntu.com/artifacts/test_charm_pipeline_func_full/openstack/charm-mysql-innodb-cluster/786514/3/8479/consoleText.test_charm_func_full_11494.txt
This commit is contained in:
David Ames
2021-04-23 09:26:52 -07:00
committed by GitHub
parent a05263f111
commit 5d533fba6a
2 changed files with 85 additions and 0 deletions

View File

@@ -113,6 +113,56 @@ class MySQLBaseTest(test_utils.OpenStackBaseTest):
if _primary_ip in unit.public_address:
return unit
def get_blocked_mysql_routers(self):
"""Get blocked mysql routers.
:returns: List of blocked mysql-router unit names
:rtype: List[str]
"""
# Make sure mysql-router units are up to date
# We cannot assume they are as there is up to a five minute delay
mysql_router_units = []
for application in self.get_applications_with_substring_in_name(
"mysql-router"):
for unit in zaza.model.get_units(application):
mysql_router_units.append(unit.entity_id)
self.run_update_status_hooks(mysql_router_units)
# Get up to date status
status = zaza.model.get_status().applications
blocked_mysql_routers = []
# Check if the units are blocked
for application in self.get_applications_with_substring_in_name(
"mysql-router"):
# Subordinate dance with primary
# There is no satus[applicatoin]["units"] for subordinates
_subordinate_to = status[application].subordinate_to[0]
for appunit in status[_subordinate_to].units:
for subunit in (
status[_subordinate_to].
units[appunit].subordinates.keys()):
if "blocked" in (
status[_subordinate_to].units[appunit].
subordinates[subunit].workload_status.status):
blocked_mysql_routers.append(subunit)
return blocked_mysql_routers
def restart_blocked_mysql_routers(self):
"""Restart blocked mysql routers.
:returns: None
:rtype: None
"""
# Check for blocked mysql-router units
blocked_mysql_routers = self.get_blocked_mysql_routers()
for unit in blocked_mysql_routers:
logging.warning(
"Restarting blocked mysql-router unit {}"
.format(unit))
zaza.model.run_on_unit(
unit,
"systemctl restart {}".format(unit.rpartition("/")[0]))
class MySQLCommonTests(MySQLBaseTest):
"""Common mysql charm tests."""
@@ -169,6 +219,15 @@ class MySQLCommonTests(MySQLBaseTest):
"""
with self.pause_resume(self.services):
logging.info("Testing pause resume")
logging.info("Wait till model is idle ...")
zaza.model.block_until_all_units_idle()
# If there are any blocekd mysql routers restart them.
self.restart_blocked_mysql_routers()
assert not self.get_blocked_mysql_routers(), (
"Should no longer be blocked mysql-router units")
logging.info("Passed pause and resume test.")

View File

@@ -573,6 +573,32 @@ class BaseCharmTest(unittest.TestCase):
return self.test_config.get('tests_options', {}).get(
'.'.join(caller_path + [key]), default)
def get_applications_with_substring_in_name(self, substring):
"""Get applications with substring in name.
:param substring: String to search for in application names
:type substring: str
:returns: List of matching applictions
:rtype: List
"""
status = model.get_status().applications
applications = []
for application in status.keys():
if substring in application:
applications.append(application)
return applications
def run_update_status_hooks(self, units):
"""Run update status hooks on units.
:param units: List of unit names or unit.entity_id
:type units: List[str]
:returns: None
:rtype: None
"""
for unit in units:
model.run_on_unit(unit, "hooks/update-status")
class OpenStackBaseTest(BaseCharmTest):
"""Generic helpers for testing OpenStack API charms."""