From 0f9fc9c49b47725b251f7f87b4ad0d2c82f08d4d Mon Sep 17 00:00:00 2001 From: Liam Young Date: Tue, 30 Jul 2019 09:25:42 +0000 Subject: [PATCH] Only auto-resolve update-status hooks If a unit is rebooted while executing a hook Juju will mark it as errored when it comes back. To work around this in tests, units in an error state are resolved on reboot. However, this could mask a genuine error. So, to minimise the risk only resolve units that went into an error state when running update-status as the system was in a stable state prior to reboot so that should have been the only hook running. Charm going into an error state on reboot applies equally to subordinates so use the same logic for hacluster. block_until_all_units_idle can return when a subordinate hook is still queued so check for update-status errors when manually queuing update-status run via action. --- zaza/openstack/charm_tests/mysql/tests.py | 33 ++++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/zaza/openstack/charm_tests/mysql/tests.py b/zaza/openstack/charm_tests/mysql/tests.py index 2d175c8..6c10c64 100644 --- a/zaza/openstack/charm_tests/mysql/tests.py +++ b/zaza/openstack/charm_tests/mysql/tests.py @@ -246,6 +246,22 @@ class PerconaClusterColdStartTest(PerconaClusterTest): cls.machines = ( juju_utils.get_machine_uuids_for_application(cls.application)) + def resolve_update_status_errors(self): + """Resolve update-status hooks error. + + This should *only* be used after an instance hard reboot to handle the + situation where a update-status hook was running when the unit was + rebooted. + """ + zaza.model.resolve_units( + application_name='percona-cluster', + erred_hook='update-status', + wait=True) + zaza.model.resolve_units( + application_name='hacluster', + erred_hook='update-status', + wait=True) + def test_100_cold_start_bootstrap(self): """Bootstrap a non-leader node. @@ -274,19 +290,28 @@ class PerconaClusterColdStartTest(PerconaClusterTest): for uuid in self.machines: self.nova_client.servers.start(uuid) + for unit in zaza.model.get_units(self.application): + zaza.model.block_until_unit_wl_status( + unit.entity_id, + 'unknown', + negate_match=True) + logging.debug("Wait till model is idle ...") # XXX If a hook was executing on a unit when it was powered off # it comes back in an error state. try: zaza.model.block_until_all_units_idle() except zaza.model.UnitError: - zaza.model.resolve_units( - application_name='percona-cluster', - wait=True) + self.resolve_update_status_errors() zaza.model.block_until_all_units_idle() + logging.debug("Wait for application states ...") for unit in zaza.model.get_units(self.application): - zaza.model.run_on_unit(unit.entity_id, "hooks/update-status") + try: + zaza.model.run_on_unit(unit.entity_id, "hooks/update-status") + except zaza.model.UnitError: + self.resolve_update_status_errors() + zaza.model.run_on_unit(unit.entity_id, "hooks/update-status") states = {"percona-cluster": { "workload-status": "blocked", "workload-status-message": "MySQL is down"}}