test_utils: Error recovery on huge page enablement

Rebooting units can cause the to go into an error state[0]
and it seems to pop up in our CI from time to time when we
enable huge pages on a machine.
We can try to recover from this at least once before
giving up on the test run.

[0] https://bugs.launchpad.net/juju/+bug/2077936

Signed-off-by: Martin Kalcok <martin.kalcok@canonical.com>
This commit is contained in:
Martin Kalcok
2025-05-01 10:59:33 +02:00
parent 2f20669ceb
commit e4e1d18624

View File

@@ -733,12 +733,32 @@ class BaseCharmTest(unittest.TestCase):
logging.info('Checking CPU topology on {}'.format(unit.name))
self.assert_unit_cpu_topology(unit, nr_1g_hugepages)
logging.info('Enabling hugepages on {}'.format(unit.name))
zaza.utilities.machine_os.enable_hugepages(
unit, nr_1g_hugepages, model_name=self.model_name)
logging.info('Enabling unsafe VFIO NOIOMMU mode on {}'
.format(unit.name))
zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode(
unit, model_name=self.model_name)
try:
zaza.utilities.machine_os.enable_hugepages(
unit, nr_1g_hugepages, model_name=self.model_name)
except zaza.model.UnitError:
logging.warn(f'Unit {unit.name} went into error state during'
' huge pages enablement. Attempting to recover.'
' Possible cause:'
' https://bugs.launchpad.net/juju/+bug/2077936')
zaza.model.resolve_units()
try:
logging.info('Enabling unsafe VFIO NOIOMMU mode on {}'
.format(unit.name))
zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode(
unit, model_name=self.model_name)
model.wait_for_application_states(
model_name=self.model_name,
states=self.test_config.get('target_deploy_status', {}))
except zaza.model.UnitError:
logging.warn(f'Unit {unit.name} went into error state while'
' setting VFIO NOIOMMU mode. Attempting to'
' recover. Possible cause:'
' https://bugs.launchpad.net/juju/+bug/2077936')
zaza.model.resolve_units()
zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode(
unit, model_name=self.model_name)
def disable_hugepages_vfio_on_hvs_in_vms(self):
"""Disable hugepages and unsafe VFIO NOIOMMU on virtual hypervisors."""