From 2f20669ceb64cbad5dbc5b75d20f8f56a59f3240 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Wed, 30 Apr 2025 17:30:25 +0200 Subject: [PATCH 1/2] ovn: COS integration tests. These tests ensure that OVN Chassis can relate to the COS via grafana-agent. Signed-off-by: Martin Kalcok --- zaza/openstack/charm_tests/cos/setup.py | 143 ++++++++++++++++++++++++ zaza/openstack/charm_tests/ovn/tests.py | 99 ++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 zaza/openstack/charm_tests/cos/setup.py diff --git a/zaza/openstack/charm_tests/cos/setup.py b/zaza/openstack/charm_tests/cos/setup.py new file mode 100644 index 0000000..cd5076c --- /dev/null +++ b/zaza/openstack/charm_tests/cos/setup.py @@ -0,0 +1,143 @@ +# Copyright 2025 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run configuration phase for cos-agent charm.""" +import logging + +from dataclasses import dataclass +from typing import List + +import zaza.charm_lifecycle.utils as lifecycle_utils +import zaza.controller +import zaza.model + +from juju.client._definitions import ApplicationOfferAdminDetails +from zaza import sync_wrapper + + +GRAFANA_OFFER_ALIAS = "cos-grafana" +PROMETHEUS_OFFER_ALIAS = "cos-prometheus" + + +@dataclass +class CosOffer: + """Collection of information about cross-model relation offer. + + :param interface: Interface which at least one endpoint in the offer must + implement. + :param role: Role of the interface. Either 'provider' or 'requirer' + :param alias: Alias under which is the offer consumed + """ + + interface: str + role: str + alias: str + + +COS_OFFERS = [ + CosOffer("prometheus_remote_write", "provider", PROMETHEUS_OFFER_ALIAS), + CosOffer("grafana_dashboard", "requirer", GRAFANA_OFFER_ALIAS), +] + + +async def async_list_offers(model: str) -> List[ApplicationOfferAdminDetails]: + """Return a list of cross-model realtions offered by the model. + + :param model: Name of the model that's searched for the offers + + :returns: List of offers + """ + controller = zaza.controller.Controller() + await controller.connect() + offer_data = await controller.list_offers(model) + await controller.disconnect() + return offer_data.get("results", []) + + +async def async_consume_cos_offers(consumer_model_name: str) -> List[CosOffer]: + """Consume cross-model relations offers provided by COS model. + + Any offer that contains endpoint with correct interface and a role + (defined by COS_OFFERS) will be consumed. + + :param consumer_model_name: Name of the model that should consume offers + + :returns: List of CosOffer that were consumed + """ + consumed_offers = [] + consumer = await zaza.model.get_model(consumer_model_name) + + for model_name in await zaza.controller.async_list_models(): + for offer in await async_list_offers(model_name): + for endpoint in offer.endpoints: + for cos_ep in COS_OFFERS: + if ( + endpoint.interface == cos_ep.interface and + endpoint.role == cos_ep.role + ): + logging.info( + f"Consuming offer: {offer.offer_url}" + f" under alias {cos_ep.alias}" + ) + await consumer.consume(offer.offer_url, cos_ep.alias) + consumed_offers.append(cos_ep) + + return consumed_offers + + +consume_cos_offers = sync_wrapper(async_consume_cos_offers) + + +async def async_relate_grafana_agent( + model_name: str, cos_offers: List[CosOffer] +) -> None: + """Relate application grafana-agent to the offered COS applications. + + :param model_name: Name of the model in which grafana-agent resides. + :param cos_offers: List of cross-model relation offers to which + grafana-agent should be related. + + :returns: None + """ + model = await zaza.model.get_model(model_name) + for cos_ep in cos_offers: + logging.info(f"Relating grafana-agent to offer {cos_ep.alias}") + await model.integrate("grafana-agent", cos_ep.alias) + + +relate_grafana_agent = sync_wrapper(async_relate_grafana_agent) + + +def try_relate_to_cos(): + """Attempt to relate grafana-agent with COS applications.""" + logging.info( + "Attempting to relate grafana-agent to COS via cross-model relations" + ) + model = zaza.model.get_juju_model() + cos_offers = consume_cos_offers(model) + if cos_offers: + relate_grafana_agent(model, cos_offers) + zaza.model.wait_for_agent_status() + test_config = lifecycle_utils.get_charm_config(fatal=False) + test_config['target_deploy_status']['grafana-agent'][ + 'workload-status' + ] = 'active' + zaza.model.wait_for_application_states( + states=test_config.get("target_deploy_status", {}), timeout=7200 + ) + else: + logging.warn( + "No COS cross-model relation offers found. grafana-agent" + " will remain blocked" + ) diff --git a/zaza/openstack/charm_tests/ovn/tests.py b/zaza/openstack/charm_tests/ovn/tests.py index 7b131bd..c6a7721 100644 --- a/zaza/openstack/charm_tests/ovn/tests.py +++ b/zaza/openstack/charm_tests/ovn/tests.py @@ -24,10 +24,109 @@ import yaml import zaza import zaza.model +import zaza.openstack.charm_tests.ceph.mon.integration as cos_integration import zaza.openstack.charm_tests.test_utils as test_utils import zaza.openstack.utilities.generic as generic_utils import zaza.utilities.juju +from zaza.openstack.charm_tests.cos.setup import GRAFANA_OFFER_ALIAS + + +class BaseCosIntegrationTest(test_utils.BaseCharmTest): + """Tests to verify that OVN charms are successfully related to COS. + + The integration with COS is facilitated via grafana-agent charm. + """ + + GRAFANA_AGENT = 'grafana-agent' + GRAFANA_CREDENTIALS = {} + + # Class variables below need to be overriden in child classes + APPLICATION_NAME = "" + DASHBOARD = "" + PROM_QUERY = "" + + @classmethod + def setUpClass(cls, model_alias=None): + """Run class setup for running OVN COS integration tests.""" + super(BaseCosIntegrationTest, cls).setUpClass( + cls.APPLICATION_NAME, model_alias) + + app_data = zaza.model.get_application(cls.GRAFANA_AGENT) + units = list(app_data.units) + for unit in units: + if unit.workload_status == 'blocked': + raise Exception(f"Application {cls.GRAFANA_AGENT} is in" + " blocked state and is probably not related" + " to the COS.") + + cos_model = None + for remote_app in app_data.model.remote_applications.values(): + if remote_app.name == GRAFANA_OFFER_ALIAS: + offer_url = juju.offerendpoints.parse_offer_url( + remote_app.offer_url + ) + cos_model = offer_url.model + break + else: + raise Exception("COS model offering Grafana relation not found") + + cls.GRAFANA_CREDENTIALS = zaza.model.run_action_on_leader( + "grafana", "get-admin-password", model_name=cos_model, + raise_on_failure=True + ).results + + # Wait for maximum of about 2 minutes for metrics to show up in prometheus + @tenacity.retry(wait=tenacity.wait_exponential(min=1, max=60), + reraise=True, stop=tenacity.stop_after_attempt(8)) + def _prometheus_scrape_check(self, prom_url, query): + response = requests.get( + f"{prom_url}/query", params={"query": query}, verify=False + ) + data = response.json() + logging.debug(data) + if data["status"] != "success": + raise Exception("Query failed: " + f"{data.get('error', 'Unknown error')}") + if not data['data']['result']: + raise Exception(f"Metric '{query}' not found in Prometheus") + + def test_prometheus_scraping(self): + """Test that prometheus successfully scrapes OVN metrics.""" + prom_url = cos_integration.get_prom_api_url("grafana-agent") + try: + self._prometheus_scrape_check(prom_url, self.PROM_QUERY) + except Exception as exc: + self.fail(exc) + + def test_grafana_dashboards(self): + """Test that grafana dashboard got successfully imported.""" + dashboards = cos_integration.get_dashboards( + self.GRAFANA_CREDENTIALS['url'], + 'admin', + self.GRAFANA_CREDENTIALS['admin-password'], + ) + + for dashboard in dashboards: + if dashboard['title'] == self.DASHBOARD: + break + else: + self.fail(f"Grafana dashboard '{self.DASHBOARD}' not found.") + + +class ChassisCosIntegrationTest(BaseCosIntegrationTest): + """Variant of COS integration tests for OVN Chassis.""" + + APPLICATION_NAME = 'ovn-chassis' + DASHBOARD = 'Juju: OVN Chassis' + PROM_QUERY = 'ovs_up' + + +class DedicatedChassisCosIntegrationTest(ChassisCosIntegrationTest): + """Variant of COS integration tests for OVN Dedicated Chassis.""" + + APPLICATION_NAME = 'ovn-dedicated-chassis' + class BaseCharmOperationTest(test_utils.BaseCharmTest): """Base OVN Charm operation tests.""" From e4e1d18624ba7800c9e57f22a00ed2b7b78903a3 Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Thu, 1 May 2025 10:59:33 +0200 Subject: [PATCH 2/2] test_utils: Error recovery on huge page enablement Rebooting units can cause the to go into an error state[0] and it seems to pop up in our CI from time to time when we enable huge pages on a machine. We can try to recover from this at least once before giving up on the test run. [0] https://bugs.launchpad.net/juju/+bug/2077936 Signed-off-by: Martin Kalcok --- zaza/openstack/charm_tests/test_utils.py | 32 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/zaza/openstack/charm_tests/test_utils.py b/zaza/openstack/charm_tests/test_utils.py index c6873a6..bc3d1de 100644 --- a/zaza/openstack/charm_tests/test_utils.py +++ b/zaza/openstack/charm_tests/test_utils.py @@ -733,12 +733,32 @@ class BaseCharmTest(unittest.TestCase): logging.info('Checking CPU topology on {}'.format(unit.name)) self.assert_unit_cpu_topology(unit, nr_1g_hugepages) logging.info('Enabling hugepages on {}'.format(unit.name)) - zaza.utilities.machine_os.enable_hugepages( - unit, nr_1g_hugepages, model_name=self.model_name) - logging.info('Enabling unsafe VFIO NOIOMMU mode on {}' - .format(unit.name)) - zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode( - unit, model_name=self.model_name) + try: + zaza.utilities.machine_os.enable_hugepages( + unit, nr_1g_hugepages, model_name=self.model_name) + except zaza.model.UnitError: + logging.warn(f'Unit {unit.name} went into error state during' + ' huge pages enablement. Attempting to recover.' + ' Possible cause:' + ' https://bugs.launchpad.net/juju/+bug/2077936') + zaza.model.resolve_units() + + try: + logging.info('Enabling unsafe VFIO NOIOMMU mode on {}' + .format(unit.name)) + zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode( + unit, model_name=self.model_name) + model.wait_for_application_states( + model_name=self.model_name, + states=self.test_config.get('target_deploy_status', {})) + except zaza.model.UnitError: + logging.warn(f'Unit {unit.name} went into error state while' + ' setting VFIO NOIOMMU mode. Attempting to' + ' recover. Possible cause:' + ' https://bugs.launchpad.net/juju/+bug/2077936') + zaza.model.resolve_units() + zaza.utilities.machine_os.enable_vfio_unsafe_noiommu_mode( + unit, model_name=self.model_name) def disable_hugepages_vfio_on_hvs_in_vms(self): """Disable hugepages and unsafe VFIO NOIOMMU on virtual hypervisors."""