From b5e250513b15de7575efd444dcfa79f1f648ce07 Mon Sep 17 00:00:00 2001 From: David Ames Date: Tue, 21 Jan 2020 10:07:01 -0800 Subject: [PATCH] Fix for NDR testing race condition Previous attempts to fix LP Bug#1784083 added a workaround (commit 820ed808) which is being removed here. The root cause seems to be upstream in the dragent. It may never have been envisioned to run the agent by itself the way the charm does. So that even if neutron-api completes its amqp relation first, neutron-dynamic-routing can still see oslo_messaging.exceptions.MessagingTimeout errors. Some operation must occur against neutron before dragent is truly ready. i.e. some post deploy openstack command. So it is outside the purview of the charm. This change adds a service restart late. Partial-Bug: #1841459 --- .../utilities/test_zaza_utilities_generic.py | 25 +++++++++++++ .../charm_tests/dragent/configure.py | 11 ++++++ zaza/openstack/configure/bgp_speaker.py | 35 ------------------- zaza/openstack/utilities/generic.py | 26 ++++++++++++++ 4 files changed, 62 insertions(+), 35 deletions(-) diff --git a/unit_tests/utilities/test_zaza_utilities_generic.py b/unit_tests/utilities/test_zaza_utilities_generic.py index 3fb26da..d18b646 100644 --- a/unit_tests/utilities/test_zaza_utilities_generic.py +++ b/unit_tests/utilities/test_zaza_utilities_generic.py @@ -666,3 +666,28 @@ class TestGenericUtils(ut_utils.BaseTestCase): result = generic_utils.check_commands_on_units(cmds, _units) self.assertIsNotNone(result) + + def test_systemctl(self): + self.patch_object(generic_utils.model, "get_unit_from_name") + self.patch_object(generic_utils.model, "run_on_unit") + _unit = mock.MagicMock() + _unit.entity_id = "unit/2" + _command = "stop" + _service = "servicename" + _systemctl = "/bin/systemctl {} {}".format(_command, _service) + self.run_on_unit.return_value = {"Code": 0} + self.get_unit_from_name.return_value = _unit + + # With Unit object + generic_utils.systemctl(_unit, _service, command=_command) + self.run_on_unit.assert_called_with(_unit.entity_id, _systemctl) + + # With string name unit + generic_utils.systemctl(_unit.entity_id, _service, command=_command) + self.run_on_unit.assert_called_with(_unit.entity_id, _systemctl) + + # Failed return code + self.run_on_unit.return_value = {"Code": 1} + with self.assertRaises(AssertionError): + generic_utils.systemctl( + _unit.entity_id, _service, command=_command) diff --git a/zaza/openstack/charm_tests/dragent/configure.py b/zaza/openstack/charm_tests/dragent/configure.py index 6d6e075..f509609 100644 --- a/zaza/openstack/charm_tests/dragent/configure.py +++ b/zaza/openstack/charm_tests/dragent/configure.py @@ -16,6 +16,8 @@ """Setup for BGP deployments.""" +import logging +import zaza.model from zaza.openstack.configure import ( network, bgp_speaker, @@ -86,6 +88,15 @@ def setup(): # Confugre the overcloud network network.setup_sdn(network_config, keystone_session=keystone_session) + + # LP Bugs #1784083 and #1841459, require a late restart of the + # neutron-bgp-dragent service + logging.warning("Due to LP Bugs #1784083 and #1841459, we require a late " + "restart of the neutron-bgp-dragent service before " + "setting up BGP.") + for unit in zaza.model.get_units("neutron-dynamic-routing"): + generic_utils.systemctl(unit, "neutron-bgp-dragent", command="restart") + # Configure BGP bgp_speaker.setup_bgp_speaker( peer_application_name=DEFAULT_PEER_APPLICATION_NAME, diff --git a/zaza/openstack/configure/bgp_speaker.py b/zaza/openstack/configure/bgp_speaker.py index 2f5dbb9..4fd9587 100755 --- a/zaza/openstack/configure/bgp_speaker.py +++ b/zaza/openstack/configure/bgp_speaker.py @@ -19,7 +19,6 @@ import argparse import logging import sys -import neutronclient from zaza.openstack.utilities import ( cli as cli_utils, openstack as openstack_utils, @@ -99,40 +98,6 @@ def setup_bgp_speaker(peer_application_name, keystone_session=None): "Advertised floating IP: {}".format( floating_ip["floating_ip_address"])) - # NOTE(fnordahl): As a workaround for LP: #1784083 remove BGP speaker from - # dragent and add it back. - logging.info( - "Waiting for Neutron agent 'neutron-bgp-dragent' to appear...") - keystone_session = openstack_utils.get_overcloud_keystone_session() - neutron_client = openstack_utils.get_neutron_session_client( - keystone_session) - agents = openstack_utils.neutron_agent_appears(neutron_client, - 'neutron-bgp-dragent') - agent_id = None - for agent in agents.get('agents', []): - agent_id = agent.get('id', None) - if agent_id is not None: - break - logging.info( - 'Waiting for BGP speaker to appear on agent "{}"...'.format(agent_id)) - bgp_speakers = openstack_utils.neutron_bgp_speaker_appears_on_agent( - neutron_client, agent_id) - logging.info( - "Removing and adding back bgp-speakers to agent (LP: #1784083)...") - while True: - try: - for bgp_speaker in bgp_speakers.get('bgp_speakers', []): - bgp_speaker_id = bgp_speaker.get('id', None) - logging.info('removing "{}" from "{}"' - ''.format(bgp_speaker_id, agent_id)) - neutron_client.remove_bgp_speaker_from_dragent( - agent_id, bgp_speaker_id) - except neutronclient.common.exceptions.NotFound as e: - logging.info('Exception: "{}"'.format(e)) - break - neutron_client.add_bgp_speaker_to_dragent( - agent_id, {'bgp_speaker_id': bgp_speaker_id}) - def run_from_cli(): """Run BGP Speaker setup from CLI. diff --git a/zaza/openstack/utilities/generic.py b/zaza/openstack/utilities/generic.py index dbeb7d3..7b673d0 100644 --- a/zaza/openstack/utilities/generic.py +++ b/zaza/openstack/utilities/generic.py @@ -824,3 +824,29 @@ def get_series(unit): result = model.run_on_unit(unit.entity_id, "lsb_release -cs") return result['Stdout'].strip() + + +def systemctl(unit, service, command="restart"): + """Run systemctl command on a unit. + + :param unit: Unit object or unit name + :type unit: Union[Unit,string] + :param service: Name of service to act on + :type service: string + :param command: Name of command. i.e. start, stop, restart + :type command: string + :raises: AssertionError if the command is unsuccessful + :returns: None if successful + """ + cmd = "/bin/systemctl {} {}".format(command, service) + + # Check if this is a unit object or string name of a unit + try: + unit.entity_id + except AttributeError: + unit = model.get_unit_from_name(unit) + + result = model.run_on_unit( + unit.entity_id, cmd) + assert int(result['Code']) == 0, ( + "{} of {} on {} failed".format(command, service, unit.entity_id))