Only auto-resolve update-status hooks

If a unit is rebooted while executing a hook Juju will mark it as
errored when it comes back. To work around this in tests, units in
an error state are resolved on reboot. However, this could mask a
genuine error. So, to minimise the risk only resolve units that
went into an error state when running update-status as the system
was in a stable state prior to reboot so that should have been the
only hook running.

Charm going into an error state on reboot applies equally to
subordinates so use the same logic for hacluster.

block_until_all_units_idle can return when a subordinate hook is
still queued so check for update-status errors when manually
queuing update-status run via action.
This commit is contained in:
Liam Young
2019-07-30 09:25:42 +00:00
parent 20402e4f30
commit 0f9fc9c49b

View File

@@ -246,6 +246,22 @@ class PerconaClusterColdStartTest(PerconaClusterTest):
cls.machines = (
juju_utils.get_machine_uuids_for_application(cls.application))
def resolve_update_status_errors(self):
"""Resolve update-status hooks error.
This should *only* be used after an instance hard reboot to handle the
situation where a update-status hook was running when the unit was
rebooted.
"""
zaza.model.resolve_units(
application_name='percona-cluster',
erred_hook='update-status',
wait=True)
zaza.model.resolve_units(
application_name='hacluster',
erred_hook='update-status',
wait=True)
def test_100_cold_start_bootstrap(self):
"""Bootstrap a non-leader node.
@@ -274,19 +290,28 @@ class PerconaClusterColdStartTest(PerconaClusterTest):
for uuid in self.machines:
self.nova_client.servers.start(uuid)
for unit in zaza.model.get_units(self.application):
zaza.model.block_until_unit_wl_status(
unit.entity_id,
'unknown',
negate_match=True)
logging.debug("Wait till model is idle ...")
# XXX If a hook was executing on a unit when it was powered off
# it comes back in an error state.
try:
zaza.model.block_until_all_units_idle()
except zaza.model.UnitError:
zaza.model.resolve_units(
application_name='percona-cluster',
wait=True)
self.resolve_update_status_errors()
zaza.model.block_until_all_units_idle()
logging.debug("Wait for application states ...")
for unit in zaza.model.get_units(self.application):
zaza.model.run_on_unit(unit.entity_id, "hooks/update-status")
try:
zaza.model.run_on_unit(unit.entity_id, "hooks/update-status")
except zaza.model.UnitError:
self.resolve_update_status_errors()
zaza.model.run_on_unit(unit.entity_id, "hooks/update-status")
states = {"percona-cluster": {
"workload-status": "blocked",
"workload-status-message": "MySQL is down"}}