Add OVS to OVN migration tests

2020-06-24 10:59:40 +02:00
parent 4ae1b39ed2
commit 3c457e4fbb
2 changed files with 372 additions and 0 deletions
@@ -0,0 +1,139 @@
+# Copyright 2020 Canonical Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Code for configuring OVN tests."""
+
+import logging
+
+import zaza
+
+import zaza.openstack.charm_tests.test_utils as test_utils
+
+
+class _OVNSetupHelper(test_utils.BaseCharmTest):
+    """Helper class to get at the common `config_change` helper."""
+
+    @staticmethod
+    def _get_instance_mtu_from_global_physnet_mtu():
+        """Calculate instance mtu from Neutron API global-physnet-mtu.
+
+        :returns: Value for instance mtu after migration.
+        :rtype: int
+        """
+        n_api_config = zaza.model.get_application_config('neutron-api')
+
+        # NOTE: we would have to adjust this calculation if we use IPv6 tunnel
+        # endpoints
+        GENEVE_ENCAP_OVERHEAD = 38
+        IP4_HEADER_SIZE = 20
+        return int(n_api_config['global-physnet-mtu']['value']) - (
+            GENEVE_ENCAP_OVERHEAD + IP4_HEADER_SIZE)
+
+    def configure_ngw_novs(self):
+        """Configure n-ovs and n-gw units."""
+        cfg = {
+            # To be able to successfully clean up after the Neutron agents we
+            # need to use the 'openvswitch' `firewall-driver`.
+            'firewall-driver': 'openvswitch',
+            # To be able to have instances successfully survive the migration
+            # without communication issues we need to lower the MTU announced
+            # to instances prior to migration.
+            #
+            # NOTE: In a real world scenario the end user would configure the
+            # MTU at least 24 hrs prior to doing the migration to allow
+            # instances to reconfigure as they renew the DHCP lease.
+            #
+            # NOTE: For classic n-gw topologies the `instance-mtu` config
+            # is a NOOP on neutron-openvswitch units, but that is ok.
+            'instance-mtu': self._get_instance_mtu_from_global_physnet_mtu()
+        }
+        apps = ('neutron-gateway', 'neutron-openvswitch')
+        for app in apps:
+            try:
+                zaza.model.get_application(app)
+                for k, v in cfg.items():
+                    logging.info('Setting `{}` to "{}" on "{}"...'
+                                 .format(k, v, app))
+                with self.config_change(cfg, cfg, app):
+                    # The intent here is to change the config and not restore
+                    # it. We accomplish that by passing in the same value for
+                    # default and alternate.
+                    #
+                    # The reason for using the `config_change` helper for this
+                    # is that it already deals with all the permutations of
+                    # config already being set etc and does not get into
+                    # trouble if the test bundle already have the values we try
+                    # to set.
+                    continue
+            except KeyError:
+                pass
+
+    def configure_ovn_mappings(self):
+        """Copy mappings from n-gw or n-ovs application."""
+        dst_apps = ('ovn-dedicated-chassis', 'ovn-chassis')
+        src_apps = ('neutron-gateway', 'neutron-openvswitch')
+        ovn_cfg = {}
+        for app in src_apps:
+            try:
+                app_cfg = zaza.model.get_application_config(app)
+                ovn_cfg['bridge-interface-mappings'] = app_cfg[
+                    'data-port']['value']
+                ovn_cfg['ovn-bridge-mappings'] = app_cfg[
+                    'bridge-mappings']['value']
+                # Use values from neutron-gateway when present, otherwise use
+                # values from neutron-openvswitch
+                break
+            except KeyError:
+                pass
+        else:
+            raise RuntimeError(
+                'None of the expected apps ({}) are present in the model.'
+                .format(src_apps)
+            )
+
+        for app in dst_apps:
+            try:
+                zaza.model.get_application(app)
+                for k, v in ovn_cfg.items():
+                    logging.info('Setting `{}` to "{}" on "{}"...'
+                                 .format(k, v, app))
+                with self.config_change(ovn_cfg, ovn_cfg, app):
+                    # Set values only on ovn-dedicated-chassis when present,
+                    # otherwise we set them on ovn-chassis.
+                    break
+            except KeyError:
+                pass
+        else:
+            raise RuntimeError(
+                'None of the expected apps ({}) are present in the model.'
+                .format(dst_apps)
+            )
+
+
+def pre_migration_configuration():
+    """Perform pre-migration configuration steps.
+
+    NOTE: Doing the configuration post-deploy and after doing initial network
+    configuration is an important part of the test as we need to prove that our
+    end users would be successful in doing this in the wild.
+    """
+    # we use a helper class to leverage common setup code and the
+    # `config_change` helper
+    helper = _OVNSetupHelper()
+    helper.setUpClass()
+    # Configure `firewall-driver` and `instance-mtu` on n-gw and n-ovs units.
+    helper.configure_ngw_novs()
+    # Copy mappings from n-gw or n-ovs application to ovn-dedicated-chassis or
+    # ovn-chassis.
+    helper.configure_ovn_mappings()
@@ -15,8 +15,13 @@
 """Encapsulate OVN testing."""

 import logging
+import tenacity

+import juju
+
+import zaza
 import zaza.openstack.charm_tests.test_utils as test_utils
+import zaza.openstack.utilities.generic as generic_utils
 import zaza.openstack.utilities.openstack as openstack_utils


@@ -71,3 +76,231 @@ class ChassisCharmOperationTest(BaseCharmOperationTest):
        cls.services = [
            'ovn-controller',
        ]
+
+
+class OVSOVNMigrationTest(test_utils.BaseCharmTest):
+    """OVS to OVN migration tests."""
+
+    def setUp(self):
+        """Perform migration steps prior to validation."""
+        super(OVSOVNMigrationTest, self).setUp()
+        # These steps here due to them having to be executed once and in a
+        # specific order prior to running any tests. The steps should still
+        # be idempotent if at all possible as a courtesy to anyone iterating
+        # on the test code.
+        try:
+            if self.one_time_init_done:
+                logging.debug('Skipping migration steps as they have already '
+                              'run.')
+                return
+        except AttributeError:
+            logging.info('Performing migration steps.')
+
+        # as we progress through the steps our target deploy status changes
+        # store it in the class instance so the individual methods can
+        # update when appropriate.
+        self.target_deploy_status = self.test_config.get(
+            'target_deploy_status', {})
+
+        # Stop Neutron agents on hypervisors
+        self._pause_units('neutron-openvswitch')
+
+        # Add the neutron-api-plugin-ovn subordinate which will make the
+        # `neutron-api-plugin-ovn` unit appear in the deployment.
+        #
+        # NOTE: The OVN drivers will not be activated until we change the
+        # value for the `manage-neutron-plugin-legacy-mode` config.
+        self._add_neutron_api_plugin_ovn_subordinate_relation()
+
+        # Adjust MTU on overlay networks
+        #
+        # Prior to this the end user will already have lowered the MTU on their
+        # running instances through the use of the `instance-mtu` configuration
+        # option and manual reconfiguration of instances that do not use DHCP.
+        #
+        # We update the value for the MTU on the overlay networks at this point
+        # in time because:
+        #
+        # - Agents are paused and will not actually reconfigure the networks.
+        #
+        # - Making changes to non-Geneve networks are prohibited as soon as the
+        #   OVN drivers are activated.
+        #
+        # - Get the correct MTU value into the OVN database on first sync.
+        #
+        #   - This will be particularly important for any instances using
+        #     stateless IPv6 autoconfiguration (SLAAC) as there is currently
+        #     no config knob to feed MTU information into the legacy ML2+OVS
+        #     `radvd` configuration or the native OVN RA.
+        #
+        #   - Said instances will reconfigure their IPv6 MTU as soon as they
+        #     receive an RA with correct MTU when OVN takes over control.
+        self._run_migrate_mtu_action()
+
+        # Flip `manage-neutron-plugin-legacy-mode` to enable it
+        #
+        # NOTE(fnordahl): until we sync/repair the OVN DB this will make the
+        # `neutron-server` log errors. However we need the neutron unit to be
+        # unpaused while doing this to have the configuration rendered. The
+        # configuration is consumed by the `neutron-ovn-db-sync` tool.
+        self._configure_neutron_api()
+
+        # Stop the Neutron server prior to OVN DB sync/repair
+        self._pause_units('neutron-api')
+
+        # Sync the OVN DB
+        self._run_migrate_ovn_db_action()
+        # Perform the optional morphing of Neutron DB action
+        self._run_offline_neutron_morph_db_action()
+        self._resume_units('neutron-api')
+
+        # Run `cleanup` action on neutron-openvswitch units/hypervisors
+        self._run_cleanup_action()
+
+        # Start the OVN controller on hypervisors
+        #
+        # NOTE(fnordahl): it is very important to have run cleanup prior to
+        # starting these, if you don't do that it is almost guaranteed that
+        # you will program the network to a state of infinite loop.
+        self._resume_units('ovn-chassis')
+
+        # And we should be off to the races
+
+        self.one_time_init_done = True
+
+    def _add_neutron_api_plugin_ovn_subordinate_relation(self):
+        try:
+            logging.info('Adding relation neutron-api-plugin-ovn '
+                         '-> neutron-api')
+            zaza.model.add_relation(
+                'neutron-api-plugin-ovn', 'neutron-plugin',
+                'neutron-api:neutron-plugin-api-subordinate')
+            zaza.model.wait_for_agent_status()
+            zaza.model.wait_for_application_states(
+                states=self.test_config.get('target_deploy_status', {}))
+        except juju.errors.JujuAPIError:
+            # we were not able to add the relation, let's make sure it's
+            # because it's already there
+            assert (zaza.model.get_relation_id(
+                'neutron-api-plugin-ovn', 'neutron-api',
+                remote_interface_name='neutron-plugin-api-subordinate')
+                is not None), 'Unable to add relation required for test'
+            logging.info('--> On the other hand, did not need to add the '
+                         'relation as it was already there.')
+
+    def _configure_neutron_api(self):
+        """Set configuration option `manage-neutron-plugin-legacy-mode`."""
+        logging.info('Configuring `manage-neutron-plugin-legacy-mode` for '
+                     'neutron-api...')
+        n_api_config = {
+            'manage-neutron-plugin-legacy-mode': False,
+        }
+        with self.config_change(
+                n_api_config, n_api_config, 'neutron-api'):
+            logging.info('done')
+
+    def _run_offline_neutron_morph_db_action(self):
+        logging.info('Running the optional `offline-neutron-morph-db` action '
+                     'on neutron-api-plugin-ovn/leader')
+        generic_utils.assertActionRanOK(
+            zaza.model.run_action_on_leader(
+                'neutron-api-plugin-ovn',
+                'offline-neutron-morph-db',
+                action_params={
+                    'i-really-mean-it': True},
+                raise_on_failure=True,
+            )
+        )
+
+    def _run_migrate_ovn_db_action(self):
+        logging.info('Running `migrate-ovn-db` action on '
+                     'neutron-api-plugin-ovn/leader')
+        generic_utils.assertActionRanOK(
+            zaza.model.run_action_on_leader(
+                'neutron-api-plugin-ovn',
+                'migrate-ovn-db',
+                action_params={
+                    'i-really-mean-it': True},
+                raise_on_failure=True,
+            )
+        )
+
+    # Charm readiness is no guarantee for API being ready to serve requests.
+    # https://bugs.launchpad.net/charm-neutron-api/+bug/1854518
+    @tenacity.retry(wait=tenacity.wait_exponential(min=5, max=60),
+                    reraise=True, stop=tenacity.stop_after_attempt(3))
+    def _run_migrate_mtu_action(self):
+        logging.info('Running `migrate-mtu` action on '
+                     'neutron-api-plugin-ovn/leader')
+        generic_utils.assertActionRanOK(
+            zaza.model.run_action_on_leader(
+                'neutron-api-plugin-ovn',
+                'migrate-mtu',
+                action_params={
+                    'i-really-mean-it': True},
+                raise_on_failure=True,
+            )
+        )
+
+    def _pause_units(self, application):
+        logging.info('Pausing {} units'.format(application))
+        zaza.model.run_action_on_units(
+            [unit.entity_id
+                for unit in zaza.model.get_units(application)],
+            'pause',
+            raise_on_failure=True,
+        )
+        self.target_deploy_status.update(
+            {
+                application: {
+                    'workload-status': 'maintenance',
+                    'workload-status-message': 'Paused',
+                },
+            },
+        )
+
+    def _run_cleanup_action(self):
+        logging.info('Running `cleanup` action on neutron-openvswitch units.')
+        zaza.model.run_action_on_units(
+            [unit.entity_id
+                for unit in zaza.model.get_units('neutron-openvswitch')],
+            'cleanup',
+            action_params={
+                'i-really-mean-it': True},
+            raise_on_failure=True,
+        )
+
+    def _resume_units(self, application):
+        logging.info('Resuming {} units'.format(application))
+        zaza.model.run_action_on_units(
+            [unit.entity_id
+                for unit in zaza.model.get_units(application)],
+            'resume',
+            raise_on_failure=True,
+        )
+        self.target_deploy_status.pop(application)
+
+    def test_ovs_ovn_migration(self):
+        """Test migration of existing Neutron ML2+OVS deployment to OVN.
+
+        The test should be run after deployment and validation of a legacy
+        deployment combined with subsequent run of a network connectivity test
+        on instances created prior to the migration.
+        """
+        # The setUp method of this test class will perform the migration steps.
+        # The tests.yaml is programmed to do further validation after the
+        # migration.
+
+        # Reset the n-gw and n-ovs instance-mtu configuration option so it does
+        # not influence how further tests are executed.
+        reset_config_keys = ['instance-mtu']
+        for app in ('neutron-gateway', 'neutron-openvswitch'):
+            try:
+                zaza.model.reset_application_config(app, reset_config_keys)
+                logging.info('Reset configuration to default on "{}" for "{}"'
+                             .format(app, reset_config_keys))
+            except KeyError:
+                pass
+        zaza.model.wait_for_agent_status()
+        zaza.model.wait_for_application_states(
+            states=self.target_deploy_status)