From a69d828e6938c0423b2cd6f73833676be9c13464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Sat, 2 May 2026 22:56:39 -0300 Subject: [PATCH 1/8] Remove eventlet dependency, migrate to asyncio/concurrent.futures Replace eventlet.greenpool with concurrent.futures.ThreadPoolExecutor in the BMC discovery script, using as_completed() for proper exception propagation and main-thread result aggregation to avoid race conditions. Remove dead eventlet socket compatibility code (.fd attribute checks) from the IPMI session layer, and clean up stale eventlet references in comments across the codebase. Closes: xcat2/confluent#197 --- .../aiohmi/ipmi/private/session.py | 21 ++--------- confluent_server/confluent/debugger.py | 2 - confluent_server/confluent/syncfiles.py | 5 +-- misc/cfg-dhcp-redfish-bmcs-by-switch.py | 37 ++++++++++++------- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/confluent_server/aiohmi/ipmi/private/session.py b/confluent_server/aiohmi/ipmi/private/session.py index 23e15db6..ce66361d 100644 --- a/confluent_server/aiohmi/ipmi/private/session.py +++ b/confluent_server/aiohmi/ipmi/private/session.py @@ -57,12 +57,8 @@ except AttributeError: # in case of congestion initialtimeout = 0.5 # the thread in which all IO will be performed -# While the model as-is works fine for it's own coroutine -# structure, when combined with threading or something like -# eventlet, it becomes difficult for the calling code to cope -# This thread will tuck away the threading situation such that -# calling code doesn't have to do any gymnastics to cope with -# the nature of things. +# This thread tucks away the threading situation such that +# calling code doesn't have to do any gymnastics. iothread = None # whether io thread is yet ready to work iothreadready = False @@ -186,24 +182,13 @@ async def _io_wait(timeout, myaddr=None, evq=None): evq.append(evt) deadline = timeout + _monotonic_time() ioqueue.append((deadline, evt, myaddr)) - # Unfortunately, at least with eventlet patched threading, the wait() - # is a somewhat busy wait if given a deadline. Workaround by having - # it piggy back on the select() in the io thread, which is a truly - # lazy wait even with eventlet involvement if deadline < selectdeadline: - intsock = iosockets[0] - if hasattr(intsock, 'fd'): - # if in eventlet, go for the true sendto, which is less glitchy - intsock = intsock.fd - intsock.sendto(b'\x01', (myself, iosockets[0].getsockname()[1])) + iosockets[0].sendto(b'\x01', (myself, iosockets[0].getsockname()[1])) await evt.wait() def _io_sendto(mysocket, packet, sockaddr): - # Want sendto to act reasonably sane.. mysocket.setblocking(1) - if hasattr(mysocket, 'fd'): - mysocket = mysocket.fd try: mysocket.sendto(packet, sockaddr) except Exception: diff --git a/confluent_server/confluent/debugger.py b/confluent_server/confluent/debugger.py index 85ec283b..557835a2 100644 --- a/confluent_server/confluent/debugger.py +++ b/confluent_server/confluent/debugger.py @@ -5,8 +5,6 @@ import socket import sys import confluent.tasks as tasks -#this will ultimately fill the role of the 'backdoor' of eventlet - # since we have to asyncio up the input and output, we use InteractiveInterpreter and handle the # input ourselves, since code is not asyncio friendly in and of itself #code.InteractiveConsole().interact() diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 2c7edf96..426c0021 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -199,9 +199,8 @@ async def sync_list_to_node(sl, node, suffixes, peerip=None): 'rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)) except Exception as e: if 'CalledProcessError' not in repr(e): - # https://github.com/eventlet/eventlet/issues/413 - # for some reason, can't catch the calledprocesserror normally - # for this exception, implement a hack workaround + # CalledProcessError can't be caught normally in some contexts, + # so check via repr as a workaround raise unreadablefiles = [] for root, dirnames, filenames in os.walk(targdir): diff --git a/misc/cfg-dhcp-redfish-bmcs-by-switch.py b/misc/cfg-dhcp-redfish-bmcs-by-switch.py index fe304ae3..a0249641 100755 --- a/misc/cfg-dhcp-redfish-bmcs-by-switch.py +++ b/misc/cfg-dhcp-redfish-bmcs-by-switch.py @@ -32,18 +32,17 @@ import sys sys.path.append('/opt/confluent/lib/python') +import concurrent.futures import confluent.client as cli -import eventlet.greenpool import gzip import io import json import os import struct import subprocess +import pyghmi.util.webclient as webclient import time -webclient = eventlet.import_patched('pyghmi.util.webclient') - bmcsbyuuid = {} def checkfish(addr, mac): @@ -57,10 +56,10 @@ def checkfish(addr, mac): try: body = json.loads(body) except json.decoder.JSONDecodeError: - return + return None uuid = body.get('UUID', None) if not uuid: - return + return None #This part is needed if a bmc sticks 'wire format' uuid in the json body #Should be skipped for bmcs that present it sanely uuidparts = uuid.split('-') @@ -68,14 +67,10 @@ def checkfish(addr, mac): uuidparts[1] = '{:04x}'.format(struct.unpack('!H', struct.pack(' Date: Sat, 2 May 2026 23:07:54 -0300 Subject: [PATCH 2/8] Remove python3-eventlet from build deps and clean up stale references Drop python3-eventlet from the Ubuntu Noble build Dockerfile. Clean up remaining greenthread/greenlet terminology in comments across aiohmi IPMI modules, consoleserver, macmap, and the IPMI plugin. Remove a commented-out GreenPool reference in macmap. --- build/arm/noble/Dockerfile | 2 +- confluent_server/aiohmi/ipmi/command.py | 3 +-- confluent_server/aiohmi/ipmi/console.py | 2 +- confluent_server/aiohmi/ipmi/private/session.py | 2 +- confluent_server/aiohmi/ipmi/private/simplesession.py | 2 +- confluent_server/confluent/consoleserver.py | 3 +-- confluent_server/confluent/networking/macmap.py | 1 - .../confluent/plugins/hardwaremanagement/ipmi.py | 10 +++------- 8 files changed, 9 insertions(+), 16 deletions(-) diff --git a/build/arm/noble/Dockerfile b/build/arm/noble/Dockerfile index e145de1f..37d60446 100644 --- a/build/arm/noble/Dockerfile +++ b/build/arm/noble/Dockerfile @@ -3,7 +3,7 @@ ADD stdeb.patch /tmp/ ADD buildapt.sh /bin/ ADD distributions.tmpl /bin/ RUN ["apt-get", "update"] -RUN ["apt-get", "install", "-y", "reprepro", "python3-stdeb", "gnupg-agent", "devscripts", "debhelper", "libsoap-lite-perl", "libdbi-perl", "quilt", "git", "python3-pyparsing", "python3-dnspython", "python3-eventlet", "python3-netifaces", "python3-paramiko", "dh-python", "libjson-perl", "ronn", "alien", "gcc", "make"] +RUN ["apt-get", "install", "-y", "reprepro", "python3-stdeb", "gnupg-agent", "devscripts", "debhelper", "libsoap-lite-perl", "libdbi-perl", "quilt", "git", "python3-pyparsing", "python3-dnspython", "python3-netifaces", "python3-paramiko", "dh-python", "libjson-perl", "ronn", "alien", "gcc", "make"] RUN ["mkdir", "-p", "/sources/git/"] RUN ["mkdir", "-p", "/debs/"] RUN ["mkdir", "-p", "/apt/"] diff --git a/confluent_server/aiohmi/ipmi/command.py b/confluent_server/aiohmi/ipmi/command.py index 32d6ab1d..94abca49 100644 --- a/confluent_server/aiohmi/ipmi/command.py +++ b/confluent_server/aiohmi/ipmi/command.py @@ -138,8 +138,7 @@ class Command(object): :param bmc: hostname or ip address of the BMC (default is local) :param userid: username to use to connect (default to no user) :param password: password to connect to the BMC (defaults to no password) - :param onlogon: function to run when logon completes in an asynchronous - fashion. This will result in a greenthread behavior. + :param onlogon: function to run when logon completes asynchronously. :param kg: Optional parameter to use if BMC has a particular Kg configured :param verifycallback: For OEM extensions that use HTTPS, this function will be used to evaluate the certificate. diff --git a/confluent_server/aiohmi/ipmi/console.py b/confluent_server/aiohmi/ipmi/console.py index 6479b938..294a84a9 100644 --- a/confluent_server/aiohmi/ipmi/console.py +++ b/confluent_server/aiohmi/ipmi/console.py @@ -421,7 +421,7 @@ class Console(object): If a caller is a simple little utility, provide a function to eternally run the event loop. More complicated usage would be expected to provide their own event loop behavior, though this could be used - within the greenthread implementation of caller's choice if desired. + within the async implementation of caller's choice if desired. """ # wait_for_rsp promises to return a false value when no sessions are # alive anymore diff --git a/confluent_server/aiohmi/ipmi/private/session.py b/confluent_server/aiohmi/ipmi/private/session.py index ce66361d..5ab6d704 100644 --- a/confluent_server/aiohmi/ipmi/private/session.py +++ b/confluent_server/aiohmi/ipmi/private/session.py @@ -847,7 +847,7 @@ class Session(object): # within a process. In this way, synchronous usage of the interface # plays well with asynchronous use. In fact, this produces the # behavior of only the constructor needing a callback. From then on, - # synchronous usage of the class acts in a greenthread style governed + # synchronous usage of the class acts in a coroutine style governed # by order of data on the network await self.awaitresponse(retry, netfn + 1, command) lastresponse = self.lastresponse diff --git a/confluent_server/aiohmi/ipmi/private/simplesession.py b/confluent_server/aiohmi/ipmi/private/simplesession.py index a4f01e95..9ac8d898 100644 --- a/confluent_server/aiohmi/ipmi/private/simplesession.py +++ b/confluent_server/aiohmi/ipmi/private/simplesession.py @@ -486,7 +486,7 @@ class Session(object): # within a process. In this way, synchronous usage of the interface # plays well with asynchronous use. In fact, this produces the # behavior of only the constructor needing a callback. From then on, - # synchronous usage of the class acts in a greenthread style governed + # synchronous usage of the class acts in a coroutine style governed # by order of data on the network self.awaitresponse(retry) lastresponse = self.lastresponse diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index e0e876e3..31f25452 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -462,8 +462,7 @@ class ConsoleHandler(object): self._attribwatcher = None async def get_console_output(self, data): - # Spawn as a greenthread, return control as soon as possible - # to the console object + # Return control as soon as possible to the console object await self._handle_console_output(data) async def attachsession(self, session): diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index aa578e48..54f00123 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -587,7 +587,6 @@ async def _full_updatemacmap(configmanager): if switch not in switches: del _macsbyswitch[switch] switchauth = get_switchcreds(configmanager, switches) - #pool = GreenPool(64) tsks = [] for sa in switchauth: tsks.append(_map_switch(sa)) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 9776a500..8b9f9a68 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -98,13 +98,9 @@ def get_pci_text_from_ids(subdevice, subvendor, device, vendor): return vendorstr, devstr -# There is something not right with the RLocks used in pyghmi when -# greenthreads comes into play. It seems like sometimes on acquire, -# it calls _get_ident and it isn't the id(greenlet) and so -# a thread deadlocks itself due to identity crisis? -# However, since we are not really threaded, the operations being protected -# are not actually dangerously multiplexed... so we can replace with -# a null context manager for now +# Since we are single-threaded via asyncio, the operations being protected +# by RLocks are not actually dangerously multiplexed, so we can replace +# with a null context manager class NullLock(object): def donothing(self, *args, **kwargs): From 0b1c40aa4ac978bcc8cc2fde1a0e527754717b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Sun, 3 May 2026 01:44:12 -0300 Subject: [PATCH 3/8] Remove stale comments that restated the obvious Drop NullLock rationale comment (referenced removed library), consoleserver greenthread spawn comment, and update syncfiles CalledProcessError workaround comment. --- confluent_server/confluent/consoleserver.py | 1 - confluent_server/confluent/plugins/hardwaremanagement/ipmi.py | 3 --- confluent_server/confluent/syncfiles.py | 4 ++-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 31f25452..ea0329d0 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -462,7 +462,6 @@ class ConsoleHandler(object): self._attribwatcher = None async def get_console_output(self, data): - # Return control as soon as possible to the console object await self._handle_console_output(data) async def attachsession(self, session): diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 8b9f9a68..d711679a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -98,9 +98,6 @@ def get_pci_text_from_ids(subdevice, subvendor, device, vendor): return vendorstr, devstr -# Since we are single-threaded via asyncio, the operations being protected -# by RLocks are not actually dangerously multiplexed, so we can replace -# with a null context manager class NullLock(object): def donothing(self, *args, **kwargs): diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 426c0021..ddfbfc7c 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -199,8 +199,8 @@ async def sync_list_to_node(sl, node, suffixes, peerip=None): 'rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)) except Exception as e: if 'CalledProcessError' not in repr(e): - # CalledProcessError can't be caught normally in some contexts, - # so check via repr as a workaround + # CalledProcessError can't be caught normally through + # asyncio subprocess, so check via repr as a workaround raise unreadablefiles = [] for root, dirnames, filenames in os.walk(targdir): From 52f2086319eb4ae9d24646559d044d14580c758b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Sun, 3 May 2026 01:45:43 -0300 Subject: [PATCH 4/8] Replace eventlet CalledProcessError workaround with proper catch The repr() check existed because eventlet broke normal exception catching. With eventlet removed, catch CalledProcessError directly. --- confluent_server/confluent/syncfiles.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index ddfbfc7c..4713effe 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -17,6 +17,7 @@ import asyncio import glob import os +import subprocess import shutil import tempfile import confluent.sshutil as sshutil @@ -197,11 +198,7 @@ async def sync_list_to_node(sl, node, suffixes, peerip=None): targip = peerip output, stderr = await util.check_output( 'rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)) - except Exception as e: - if 'CalledProcessError' not in repr(e): - # CalledProcessError can't be caught normally through - # asyncio subprocess, so check via repr as a workaround - raise + except subprocess.CalledProcessError: unreadablefiles = [] for root, dirnames, filenames in os.walk(targdir): for filename in filenames: From aafd6967bab61c0884e165965774894f4055ec6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Sun, 3 May 2026 01:46:35 -0300 Subject: [PATCH 5/8] Clean up iothread design rationale comment --- confluent_server/aiohmi/ipmi/private/session.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/confluent_server/aiohmi/ipmi/private/session.py b/confluent_server/aiohmi/ipmi/private/session.py index 5ab6d704..492162f0 100644 --- a/confluent_server/aiohmi/ipmi/private/session.py +++ b/confluent_server/aiohmi/ipmi/private/session.py @@ -56,9 +56,8 @@ except AttributeError: # session. This will be randomized to stagger out retries # in case of congestion initialtimeout = 0.5 -# the thread in which all IO will be performed -# This thread tucks away the threading situation such that -# calling code doesn't have to do any gymnastics. +# the thread in which all IO will be performed, so that +# calling code doesn't have to manage threading directly iothread = None # whether io thread is yet ready to work iothreadready = False From 1964d4a4ca36cc54bf1be3fb291288475e9de95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Sun, 3 May 2026 01:50:30 -0300 Subject: [PATCH 6/8] Fix unbound exception variable in CalledProcessError handler --- confluent_server/confluent/syncfiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 4713effe..db851d74 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -198,7 +198,7 @@ async def sync_list_to_node(sl, node, suffixes, peerip=None): targip = peerip output, stderr = await util.check_output( 'rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: unreadablefiles = [] for root, dirnames, filenames in os.walk(targdir): for filename in filenames: From 5e26f48e108c7548ea7d41107ff0a86a65f39e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Tue, 5 May 2026 17:11:22 -0300 Subject: [PATCH 7/8] Restore debugger eventlet backdoor comment per maintainer request --- confluent_server/confluent/debugger.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/debugger.py b/confluent_server/confluent/debugger.py index 557835a2..85ec283b 100644 --- a/confluent_server/confluent/debugger.py +++ b/confluent_server/confluent/debugger.py @@ -5,6 +5,8 @@ import socket import sys import confluent.tasks as tasks +#this will ultimately fill the role of the 'backdoor' of eventlet + # since we have to asyncio up the input and output, we use InteractiveInterpreter and handle the # input ourselves, since code is not asyncio friendly in and of itself #code.InteractiveConsole().interact() From 73152001339e1640dbde94d415a1d1c7a4dcaecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Ferr=C3=A3o?= <2031761+viniciusferrao@users.noreply.github.com> Date: Wed, 6 May 2026 19:48:04 -0300 Subject: [PATCH 8/8] Replace pyghmi.util.webclient with aiohmi.util.webclient --- misc/cfg-dhcp-redfish-bmcs-by-switch.py | 64 +++++++++++-------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/misc/cfg-dhcp-redfish-bmcs-by-switch.py b/misc/cfg-dhcp-redfish-bmcs-by-switch.py index a0249641..7b9fe422 100755 --- a/misc/cfg-dhcp-redfish-bmcs-by-switch.py +++ b/misc/cfg-dhcp-redfish-bmcs-by-switch.py @@ -30,32 +30,23 @@ # recommend, but hopefully can be useful reference material +import asyncio import sys sys.path.append('/opt/confluent/lib/python') -import concurrent.futures +import aiohmi.util.webclient as webclient import confluent.client as cli -import gzip -import io import json import os import struct import subprocess -import pyghmi.util.webclient as webclient import time bmcsbyuuid = {} -def checkfish(addr, mac): - wc = webclient.SecureHTTPConnection(addr, 443, verifycallback=lambda x: True) - wc.connect() - wc.request('GET', '/redfish/v1') - rsp = wc.getresponse() - body = rsp.read() - if body[:2] == b'\x1f\x8b': - body = gzip.GzipFile(fileobj=io.BytesIO(body)).read() - try: - body = json.loads(body) - except json.decoder.JSONDecodeError: +async def checkfish(addr, mac): + wc = webclient.WebConnection(addr, 443, verifycallback=lambda x: True) + body = await wc.grab_json_response('/redfish/v1') + if not body: return None uuid = body.get('UUID', None) if not uuid: @@ -70,6 +61,27 @@ def checkfish(addr, mac): return (uuid, mac, addr) +async def probe_bmcs(mactonode, mactoips): + tasks = [] + macs = [] + for mac in sorted(mactonode): + tasks.append(checkfish(mactoips[mac], mac)) + macs.append(mac) + results = await asyncio.gather(*tasks, return_exceptions=True) + for mac, result in zip(macs, results): + if isinstance(result, Exception): + sys.stderr.write('Failed to probe {}: {}\n'.format( + mactoips[mac], result)) + continue + if result is None: + continue + uuid, mac, addr = result + if uuid in bmcsbyuuid: + bmcsbyuuid[uuid]['bmcs'][mac] = addr + else: + bmcsbyuuid[uuid] = {'bmcs': {mac: addr}} + + if __name__ == '__main__': with open('/var/lib/dhcpd/dhcpd.leases', 'r') as leasefile: leases = leasefile.read() @@ -90,7 +102,7 @@ if __name__ == '__main__': currip = None inlease = False # warm up arp tables and fdb - pings = {} + pings = {} for mac in mactoips: pings[mac] = subprocess.Popen(['ping', '-c', '1', mactoips[mac]], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) for mac in pings: @@ -110,25 +122,7 @@ if __name__ == '__main__': for inf in macinfo: if inf.get('possiblenode', None): mactonode[mac] = inf['possiblenode'] - with concurrent.futures.ThreadPoolExecutor() as executor: - futures = {} - for mac in sorted(mactonode): - futures[executor.submit(checkfish, mactoips[mac], mac)] = mac - for future in concurrent.futures.as_completed(futures): - mac = futures[future] - try: - result = future.result() - except Exception as e: - sys.stderr.write('Failed to probe {}: {}\n'.format( - mactoips[mac], e)) - continue - if result is None: - continue - uuid, mac, addr = result - if uuid in bmcsbyuuid: - bmcsbyuuid[uuid]['bmcs'][mac] = addr - else: - bmcsbyuuid[uuid] = {'bmcs': {mac: addr}} + asyncio.run(probe_bmcs(mactonode, mactoips)) for uuid in sorted(bmcsbyuuid): macd = bmcsbyuuid[uuid]['bmcs'] macs = sorted(macd)