From e2bb72cc14caf054b7b27caefa3462be59aa6382 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Jan 2025 15:59:48 -0500 Subject: [PATCH 001/413] Allow Unix socket for http socket If service.cfg has: [http] bindhost = /var/run/confluent/httpapi Then it will use the cited path to create a unix socket instead of a network socket. --- confluent_server/confluent/httpapi.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index f3c0b1af..5048b8f8 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -1195,8 +1195,17 @@ def serve(bind_host, bind_port): sock = None while not sock: try: - sock = eventlet.listen( - (bind_host, bind_port, 0, 0), family=socket.AF_INET6) + if '/' in bind_host: + try: + os.remove(bind_host) + except Exception: + pass + sock = eventlet.listen( + bind_host, family=socket.AF_UNIX) + os.chmod(bind_host, 0o666) + else: + sock = eventlet.listen( + (bind_host, bind_port, 0, 0), family=socket.AF_INET6) except socket.error as e: if e.errno != 98: raise From 32bc5afe032bb8bcdf0e183e0cf1bf063ea9f7ba Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 13 Jan 2025 12:07:39 -0500 Subject: [PATCH 002/413] Generate error on node/group misdeletion If requesting to delete a group from a node when that node is not a member of that group, generate an error. Similarly to delete a node from a group. --- confluent_server/confluent/config/configmanager.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index fa4cbccc..506ed85a 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1986,6 +1986,9 @@ class ConfigManager(object): delnodes = noderange.NodeRange( attribmap[group][attr]['remove'], config=self).nodes + for node in delnodes: + if node not in currnodes: + raise ValueError('node "{0}" is not a member of {1}'.format(node, group)) attribmap[group][attr] = [ x for x in currnodes if x not in delnodes] if not isinstance(attribmap[group][attr], list): @@ -2442,6 +2445,9 @@ class ConfigManager(object): elif attribmap[node]['groups'].get('remove', False): delgroups = attribmap[node]['groups'][ 'remove'].split(',') + for group in delgroups: + if group not in currgroups: + raise ValueError("node {0} is not a member of group {1}".format(node, group)) newgroups = [ x for x in currgroups if x not in delgroups] attribmap[node]['groups'] = newgroups From cdfb76de57e5c4d23a8f141e86adc8a0cba3ac53 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Jan 2025 08:59:17 -0500 Subject: [PATCH 003/413] Try alternate invocation for handshake Newer versions of websocket change internal call, and we must follow. This is a consequence of the library providing no means to customize the TLS handling, so we have to dig in a little to get that customization. --- confluent_server/confluent/plugins/console/openbmc.py | 5 ++++- confluent_server/confluent/plugins/console/tsmsol.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/console/openbmc.py b/confluent_server/confluent/plugins/console/openbmc.py index a67d9b23..e4b00bd3 100644 --- a/confluent_server/confluent/plugins/console/openbmc.py +++ b/confluent_server/confluent/plugins/console/openbmc.py @@ -83,7 +83,10 @@ class WrappedWebSocket(wso): if not self._certverify(bincert): raise pygexc.UnrecognizedCertificate('Unknown certificate', bincert) try: - self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + try: + self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + except TypeError: + self.handshake_response = websocket._handshake.handshake(self.sock, url, *addrs, **options) if self.handshake_response.status in websocket._handshake.SUPPORTED_REDIRECT_STATUSES: options['redirect_limit'] = options.pop('redirect_limit', 3) - 1 if options['redirect_limit'] < 0: diff --git a/confluent_server/confluent/plugins/console/tsmsol.py b/confluent_server/confluent/plugins/console/tsmsol.py index 5e662ebd..55ef4a90 100644 --- a/confluent_server/confluent/plugins/console/tsmsol.py +++ b/confluent_server/confluent/plugins/console/tsmsol.py @@ -82,7 +82,10 @@ class WrappedWebSocket(wso): if not self._certverify(bincert): raise pygexc.UnrecognizedCertificate('Unknown certificate', bincert) try: - self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + try: + self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + except TypeError: + self.handshake_response = websocket._handshake.handshake(self.sock, url, *addrs, **options) if self.handshake_response.status in websocket._handshake.SUPPORTED_REDIRECT_STATUSES: options['redirect_limit'] = options.pop('redirect_limit', 3) - 1 if options['redirect_limit'] < 0: From fb8675ddc51757f0d65434678a063de1cc4f07a9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 21 Jan 2025 10:10:40 -0500 Subject: [PATCH 004/413] Fix SMM3 discovery by switch --- confluent_server/confluent/discovery/handlers/smm3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/discovery/handlers/smm3.py b/confluent_server/confluent/discovery/handlers/smm3.py index 7e663dc2..8d93cc3e 100644 --- a/confluent_server/confluent/discovery/handlers/smm3.py +++ b/confluent_server/confluent/discovery/handlers/smm3.py @@ -25,6 +25,8 @@ getaddrinfo = eventlet.support.greendns.getaddrinfo class NodeHandler(redfishbmc.NodeHandler): devname = 'SMM3' + maxmacs = 18 # support an enclosure, but try to avoid catching daisy chain + is_enclosure = True def scan(self): attrs = self.info.get('attributes', {}) From 24f0ff5221c3a67ad6ff141179355abb40712de0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 21 Jan 2025 16:48:42 -0500 Subject: [PATCH 005/413] Add scripts to adopt a node to confluent SSH --- misc/adoptnode.sh | 23 +++++++++++++++++++++++ misc/finalizeadopt.sh | 32 ++++++++++++++++++++++++++++++++ misc/prepadopt.sh | 25 +++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100755 misc/adoptnode.sh create mode 100644 misc/finalizeadopt.sh create mode 100644 misc/prepadopt.sh diff --git a/misc/adoptnode.sh b/misc/adoptnode.sh new file mode 100755 index 00000000..ad230186 --- /dev/null +++ b/misc/adoptnode.sh @@ -0,0 +1,23 @@ +#!/bin/bash +TARGNODE=$1 +TARGPROF=$2 +if [ -z "$TARGNODE" ] ; then + echo "Target node must be specified" + exit 1 +fi +if [ -z "$TARGPROF" ]; then + echo "Target profile must be specified" + exit 1 +fi +nodedefine $TARGNODE deployment.apiarmed=once deployment.profile=$TARGPROF deployment.useinsecureprotocols= deployment.pendingprofile=$TARGPROF +cat /var/lib/confluent/public/site/ssh/*pubkey | ssh $TARGNODE "mkdir -p /root/.ssh/; cat - >> /root/.ssh/authorized_keys" +ssh $TARGNODE mkdir -p /etc/confluent /opt/confluent/bin +cat /var/lib/confluent/public/site/tls/*.pem | ssh $TARGNODE "cat - >> /etc/confluent/ca.pem" +cat /var/lib/confluent/public/site/tls/*.pem | ssh $TARGNODE "cat - >> /etc/pki/ca-trust/source/anchors/confluent.pem" +nodeattrib $TARGNODE id.uuid=$(ssh $TARGNODE cat /sys/devices/virtual/dmi/id/product_uuid) +scp prepadopt.sh $TARGNODE:/tmp/ +scp finalizeadopt.sh $TARGNODE:/tmp/ +ssh $TARGNODE bash /tmp/prepadopt.sh $TARGNODE $TARGPROF +nodeattrib $TARGNODE deployment.pendingprofile= +nodeapply $TARGNODE -k +ssh $TARGNODE sh /tmp/finalizeadopt.sh diff --git a/misc/finalizeadopt.sh b/misc/finalizeadopt.sh new file mode 100644 index 00000000..7b9413d1 --- /dev/null +++ b/misc/finalizeadopt.sh @@ -0,0 +1,32 @@ +#!/bin/bash +if ! grep ^HostbasedAuthentication /etc/ssh/sshd_config > /dev/null; then + echo HostbasedAuthentication yes >> /etc/ssh/sshd_config + echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config + echo IgnoreRhosts no >> /etc/ssh/sshd_config +fi +for certfile in /etc/ssh/*cert*; do + if ! grep $certfile /etc/ssh/sshd_config > /dev/null; then + echo HostCertificate $certfile >> /etc/ssh/sshd_config + fi +done +if [ -d /etc/ssh/ssh_config.d/ ]; then + cat > /etc/ssh/ssh_config.d/01-confluent.conf << EOF +Host * + HostbasedAuthentication yes + EnableSSHKeysign yes + HostbasedKeyTypes *ed25519* +EOF +else + if ! grep EnableSSHKeysign /etc/ssh/ssh_config > /dev/null; then + cat >> /etc/ssh/ssh_config << EOF +Host * + HostbasedAuthentication yes + EnableSSHKeysign yes + HostbasedKeyTypes *ed25519* +EOF + fi +fi +restorecon -r /etc/ssh +restorecon /root/.shosts + +systemctl restart sshd diff --git a/misc/prepadopt.sh b/misc/prepadopt.sh new file mode 100644 index 00000000..b47602d8 --- /dev/null +++ b/misc/prepadopt.sh @@ -0,0 +1,25 @@ +#!/bin/bash +TARGNODE=$1 +TARGPROF=$2 +TMPDIR=$(mktemp -d) +cd $TMPDIR +DEPLOYSRV=$(echo $SSH_CLIENT|awk '{print $1}') +UDEPLOYSRV=$DEPLOYSRV +if [[ "$DEPLOYSRV" = *":"* ]]; then + UDEPLOYSRV="[$DEPLOYSRV]" +fi +update-ca-trust +mkdir -p /etc/confluent +curl -sg https://$UDEPLOYSRV/confluent-public/os/$TARGPROF/boot/initramfs/addons.cpio > addons.cpio +curl -sg https://$UDEPLOYSRV/confluent-public/os/$TARGPROF/scripts/functions > /etc/confluent/functions +cpio -dumi < addons.cpio +systemctl status firewalld >& /dev/null && FWACTIVE=1 +if [ "$FWACTIVE" == 1 ]; then systemctl stop firewalld; fi +opt/confluent/bin/copernicus > /etc/confluent/confluent.info +opt/confluent/bin/clortho $TARGNODE $DEPLOYSRV > /etc/confluent/confluent.apikey +if [ "$FWACTIVE" == 1 ]; then systemctl start firewalld; fi +cp opt/confluent/bin/apiclient /opt/confluent/bin +curl -sg -H "CONFLUENT_APIKEY: $(cat /etc/confluent/confluent.apikey)" -H "CONFLUENT_NODENAME: $TARGNODE" https://$UDEPLOYSRV/confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg +# python3 /opt/confluent/bin/apiclient /confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg +cd - +echo rm -rf $TMPDIR From d4fbd021adf1d5ca97e23d960acd7af8bd1454a4 Mon Sep 17 00:00:00 2001 From: Tinashe Date: Wed, 22 Jan 2025 09:49:46 -0500 Subject: [PATCH 006/413] l2traceroute --- .../bin/{l2traceroute => nodel2traceroute} | 5 ++- confluent_client/doc/man/l2traceroute.ronn | 38 ----------------- .../doc/man/nodel2traceroute.ronn | 42 +++++++++++++++++++ 3 files changed, 46 insertions(+), 39 deletions(-) rename confluent_client/bin/{l2traceroute => nodel2traceroute} (98%) delete mode 100644 confluent_client/doc/man/l2traceroute.ronn create mode 100644 confluent_client/doc/man/nodel2traceroute.ronn diff --git a/confluent_client/bin/l2traceroute b/confluent_client/bin/nodel2traceroute similarity index 98% rename from confluent_client/bin/l2traceroute rename to confluent_client/bin/nodel2traceroute index e8f9705e..233f3cc8 100755 --- a/confluent_client/bin/l2traceroute +++ b/confluent_client/bin/nodel2traceroute @@ -61,7 +61,10 @@ def get_neighbors(switch): switch_neigbors = [] url = '/networking/neighbors/by-switch/{0}/by-peername/'.format(switch) for neighbor in session.read(url): - switch = neighbor['item']['href'].strip('/') + try: + switch = neighbor['item']['href'].strip('/') + except: + continue if switch in all_switches: switch_neigbors.append(switch) return switch_neigbors diff --git a/confluent_client/doc/man/l2traceroute.ronn b/confluent_client/doc/man/l2traceroute.ronn deleted file mode 100644 index 16318567..00000000 --- a/confluent_client/doc/man/l2traceroute.ronn +++ /dev/null @@ -1,38 +0,0 @@ -l2traceroute(8) -- returns the layer 2 route through an Ethernet network managed by confluent given 2 end points. -============================== -## SYNOPSIS -`l2traceroute [options] ` - -## DESCRIPTION -**l2traceroute** is a command that returns the layer 2 route for the configered interfaces in nodeattrib. -It can also be used with the -i and -e options to check against specific interfaces on the endpoints. - - -## PREREQUISITES -**l2traceroute** the net..switch attributes have to be set on the end points if endpoint is not a switch - - -## OPTIONS -* ` -e` EFACE, --eface=INTERFACE - interface to check against for the second end point -* ` -i` INTERFACE, --interface=INTERFACE - interface to check against for the first end point -* ` -c` CUMULUS, --cumulus=CUMULUS - return layer 2 route through cumulus switches only -* `-h`, `--help`: - Show help message and exit - - -## EXAMPLES - * Checking route between two nodes: - `# l2traceroute_client n244 n1851` - `n244 to n1851: ['switch114']` - -* Checking route from one node to multiple nodes: - `# l2traceroute_client n244 n1833,n1851` - `n244 to n1833: ['switch114', 'switch7', 'switch32', 'switch253', 'switch85', 'switch72', 'switch21', 'switch2', 'switch96', 'switch103', 'switch115'] - n244 to n1851: ['switch114']` - - - - diff --git a/confluent_client/doc/man/nodel2traceroute.ronn b/confluent_client/doc/man/nodel2traceroute.ronn new file mode 100644 index 00000000..a5a1a428 --- /dev/null +++ b/confluent_client/doc/man/nodel2traceroute.ronn @@ -0,0 +1,42 @@ +nodel2traceroute(8) -- returns the layer 2 route through an Ethernet network managed by confluent given 2 end points. +============================== +## SYNOPSIS +`nodel2traceroute [options] ` + +## DESCRIPTION +**nodel2traceroute** is a command that returns the layer 2 route for the configered interfaces in nodeattrib. +It can also be used with the -i and -e options to check against specific interfaces on the endpoints. If the +--interface or --eface option are not used then the command will check for routes against all the defined +interfaces in nodeattrib (net.*.switch) for the nodes. + + + +## PREREQUISITES +**nodel2traceroute** the net..switch attributes have to be set on the end points if endpoint is not a switch + + +## OPTIONS +* ` -e` EFACE, --eface=INTERFACE + interface to check against for the second end point or end points if using checking against multiple nodes +* ` -i` INTERFACE, --interface=INTERFACE + interface to check against for the first end point +* ` -c` CUMULUS, --cumulus=CUMULUS + return layer 2 route through cumulus switches only +* `-h`, `--help`: + Show help message and exit + + +## EXAMPLES + * Checking route between two nodes: + `# nodel2traceroute n244 n1851` + `n244 to n1851: ['switch114']` + +* Checking route from one node to multiple nodes: + `# nodel2traceroute n244 n1833,n1851` + `n244 to n1833: ['switch114', 'switch7', 'switch32', 'switch253', 'switch85', 'switch72', 'switch21', 'switch2', 'switch96', 'switch103', 'switch115'] + n244 to n1851: ['switch114']` + + + + + From e3d70f351d65d1c91a52e1966f64d645e016fced Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 22 Jan 2025 15:44:49 -0500 Subject: [PATCH 007/413] Provide internal URL shortening service Permit users to have very long profile names, and provide a URL shortening service to bridge the gap for fixed-width field limitations in DHCP/PXE. --- .../confluent/discovery/protocols/pxe.py | 44 ++++++++++++++----- confluent_server/confluent/httpapi.py | 15 ++++++- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 64e64c79..2be13883 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -22,6 +22,7 @@ # option 97 = UUID (wireformat) +import base64 import confluent.config.configmanager as cfm import confluent.collective.manager as collective import confluent.noderange as noderange @@ -35,6 +36,7 @@ import eventlet import eventlet.green.socket as socket import eventlet.green.select as select import netifaces +import os import struct import time import traceback @@ -165,6 +167,18 @@ pxearchs = { } +shorturls = {} +def register_shorturl(url): + urlid = base64.urlsafe_b64encode(os.urandom(3)) + while urlid in shorturls: + urlid = base64.urlsafe_b64encode(os.urandom(3)) + urlid = urlid.decode() + shorturls[urlid] = url + returl = '/'.join(url.split('/')[:3]) + returl += '/confluent-api/boot/su/' + urlid + '/' + os.path.basename(url) + return returl + + uuidmap = {} macmap = {} attribwatcher = None @@ -369,12 +383,15 @@ def proxydhcp(handler, nodeguess): elif disco['arch'] == 'uefi-aarch64': bootfile = b'confluent/aarch64/ipxe.efi' if len(bootfile) > 127: - log.log( - {'info': 'Boot offer cannot be made to {0} as the ' - 'profile name "{1}" is {2} characters longer than is supported ' - 'for this boot method.'.format( - node, profile, len(bootfile) - 127)}) - continue + if bootfile.startswith(b'http'): + bootfile = register_shorturl(bootfile.decode('utf8')).encode('utf8') + else: + log.log( + {'info': 'Boot offer cannot be made to {0} as the ' + 'profile name "{1}" is {2} characters longer than is supported ' + 'for this boot method.'.format( + node, profile, len(bootfile) - 127)}) + continue rpv[:240] = rqv[:240].tobytes() rpv[0:1] = b'\x02' rpv[108:108 + len(bootfile)] = bootfile @@ -797,12 +814,15 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N if not isinstance(bootfile, bytes): bootfile = bootfile.encode('utf8') if len(bootfile) > 127: - log.log( - {'info': 'Boot offer cannot be made to {0} as the ' - 'profile name "{1}" is {2} characters longer than is supported ' - 'for this boot method.'.format( - node, profile, len(bootfile) - 127)}) - return + if bootfile.startswith(b'http'): + bootfile = register_shorturl(bootfile.decode('utf8')).encode('utf8') + else: + log.log( + {'info': 'Boot offer cannot be made to {0} as the ' + 'profile name "{1}" is {2} characters longer than is supported ' + 'for this boot method.'.format( + node, profile, len(bootfile) - 127)}) + return repview[108:108 + len(bootfile)] = bootfile elif info.get('architecture', None) == 'uefi-aarch64' and packet.get(77, None) == b'iPXE': if not profile: diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 5048b8f8..e2144235 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -30,6 +30,7 @@ import confluent.config.attributes as attribs import confluent.config.configmanager as configmanager import confluent.consoleserver as consoleserver import confluent.discovery.core as disco +import confluent.discovery.protocols.pxe as pxe import confluent.forwarder as forwarder import confluent.exceptions as exc import confluent.log as log @@ -640,6 +641,8 @@ def resourcehandler(env, start_response): yield '500 - ' + str(e) return + + def resourcehandler_backend(env, start_response): """Function to handle new wsgi requests """ @@ -648,7 +651,7 @@ def resourcehandler_backend(env, start_response): ('Pragma', 'no-cache'), ('X-Content-Type-Options', 'nosniff'), ('Content-Security-Policy', "default-src 'self'"), - ('X-XSS-Protection', '1; mode=block'), ('X-Frame-Options', 'deny'), + ('X-XySS-Protection', '1; mode=block'), ('X-Frame-Options', 'deny'), ('Strict-Transport-Security', 'max-age=86400'), ('X-Permitted-Cross-Domain-Policies', 'none')] reqbody = None @@ -671,6 +674,16 @@ def resourcehandler_backend(env, start_response): request = env['PATH_INFO'].split('/') if not request[0]: request = request[1:] + if request[1] == 'su': # shorturl + targurl = pxe.shorturls.get(request[2], None) + if not targurl: + start_response('404 Not Found', headers) + yield '' + return + headers.append(('Location', targurl)) + start_response('302 Found', headers) + yield '' + return if len(request) != 4: start_response('400 Bad Request', headers) yield '' From 67aaee3b4e6ba8f433a57dfa4462f074d0fa8ca9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 24 Jan 2025 07:58:31 -0500 Subject: [PATCH 008/413] Adapt to the bond modes When the team modes were defined in attributes, it was based on the teamd names. Since the ecosystem abandoned teamd, we went back to bond. However, we neglected to map all the names to the closest bond type equivalent. Change confignet to do the mapping. --- confluent_osdeploy/common/profile/scripts/confignet | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 71c156a7..f9818c62 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -301,6 +301,12 @@ class WickedManager(object): class NetworkManager(object): + bondtypes = { + 'lacp': '802.3ad', + 'loadbalance': 'balance-alb', + 'roundrobin': 'balance-rr', + 'activebackup:' 'active-backup', + } def __init__(self, devtypes, deploycfg): self.deploycfg = deploycfg self.connections = {} @@ -401,8 +407,8 @@ class NetworkManager(object): for arg in cmdargs: cargs.append(arg) cargs.append(cmdargs[arg]) - if stgs['team_mode'] == 'lacp': - stgs['team_mode'] = '802.3ad' + if stgs['team_mode'] in self.bondtypes: + stgs['team_mode'] = self.bondtypes[stgs['team_mode']] subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) From 79d5a637a7184bc55717405ca3708df9aefe1ccc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 24 Jan 2025 11:12:25 -0500 Subject: [PATCH 009/413] Correct syntax error in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f9818c62..64d3dbc0 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -305,7 +305,7 @@ class NetworkManager(object): 'lacp': '802.3ad', 'loadbalance': 'balance-alb', 'roundrobin': 'balance-rr', - 'activebackup:' 'active-backup', + 'activebackup': 'active-backup', } def __init__(self, devtypes, deploycfg): self.deploycfg = deploycfg From b89ae4d74a13de5c2c914dd886f81bdf3757dc02 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Jan 2025 12:58:42 -0500 Subject: [PATCH 010/413] Fix bytes being stored in db on identity image use --- confluent_server/confluent/selfservice.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index 2486a71e..d206337e 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -135,6 +135,8 @@ def handle_request(env, start_response): return righthmac = hmac.new(hmackey, cryptkey, hashlib.sha256).digest() if righthmac == crypthmac: + if not isinstance(cryptkey, str): + cryptkey = cryptkey.decode() cfgupdate = {nodename: {'crypted.selfapikey': {'hashvalue': cryptkey}}} cfg.set_node_attributes(cfgupdate) cfg.clear_node_attributes([nodename], ['secret.selfapiarmtoken']) From 0fadb00acf1eccf94ffc3f504ac8f677e5fabba3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 30 Jan 2025 08:03:36 -0500 Subject: [PATCH 011/413] Pass through slot geometry if provided --- confluent_server/confluent/plugins/info/layout.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index ca7f120c..71f288d3 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -84,13 +84,14 @@ def retrieve(nodes, element, configmanager, inputdata): else: allnodedata[enclosure]['children'] = enclosuremap[enclosure] needheight = set([]) + needslots = set(enclosuremap) for node in allnodedata: if 'height' not in allnodedata[node]: needheight.add(node) - needheight = ','.join(needheight) - if needheight: + needheightrange = ','.join(needheight.union(needslots)) + if needheightrange: for rsp in core.handle_path( - '/noderange/{0}/description'.format(needheight), + '/noderange/{0}/description'.format(needheightrange), 'retrieve', configmanager, inputdata=None): if not hasattr(rsp, 'kvpairs'): @@ -98,7 +99,10 @@ def retrieve(nodes, element, configmanager, inputdata): continue kvp = rsp.kvpairs for node in kvp: - allnodedata[node]['height'] = kvp[node]['height'] + if node in needheight: + allnodedata[node]['height'] = kvp[node]['height'] + if node in needslots: + allnodedata[node]['slots'] = kvp[node]['slots'] for node in allnodedata: if 'height' not in allnodedata[node]: allnodedata[node]['height'] = 1 From 7493cc5d48088376638c7a97c84e70ad0a643d59 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 30 Jan 2025 09:37:10 -0500 Subject: [PATCH 012/413] Normalize enclosure handling --- .../confluent/plugins/info/layout.py | 59 ++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 71f288d3..8604eaeb 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -15,6 +15,22 @@ import confluent.core as core import confluent.messages as msg +def baytonumber(bay): + if not bay: + return None + try: + return int(bay) + except ValueError: + if len(bay) == 2: + # Treat a hexadecimal system as a leading decimal digit and letter compile + # 1a == slot 1, 1b == slot 2, 2a == slot 1, etc.. + try: + tmp = int(bay, 16) + return (2 * (tmp >> 4) - 1) + ((tmp & 15) % 10) + except ValueError: + return None + return None + def retrieve(nodes, element, configmanager, inputdata): locationinfo = configmanager.get_node_attributes(nodes, (u'enclosure.manager', u'enclosure.bay', u'location.rack', @@ -24,6 +40,7 @@ def retrieve(nodes, element, configmanager, inputdata): allnodedata = {} needenclosures = set([]) locatednodes = set([]) + needcoord = {} for node in locationinfo: nodeinfo = locationinfo[node] rack = nodeinfo.get(u'location.rack', {}).get('value', '') @@ -31,17 +48,23 @@ def retrieve(nodes, element, configmanager, inputdata): row = nodeinfo.get(u'location.row', {}).get('value', '') enclosure = nodeinfo.get(u'enclosure.manager', {}).get('value', None) bay = nodeinfo.get(u'enclosure.bay', {}).get('value', None) + height = nodeinfo.get(u'location.height', {}).get('value', None) if enclosure: if enclosure not in enclosuremap: - enclosuremap[enclosure] = {} - enclosuremap[enclosure][bay] = node + enclosuremap[enclosure] = {'bays': {}, 'coordinates': {}} + bay = baytonumber(bay) + if bay is None: + continue + bay = f'{bay}' + enclosuremap[enclosure]['bays'][bay] = node + needcoord[node] = enclosure if u: if row not in rackmap: rackmap[row] = {} if rack not in rackmap[row]: rackmap[row][rack] = {} - rackmap[row][rack][u] = {'node': enclosure, 'children': enclosuremap[enclosure]} + rackmap[row][rack][u] = {'node': enclosure, 'children': enclosuremap[enclosure]['bays'], 'nodecoordinates': enclosuremap[enclosure]['coordinates']} allnodedata[enclosure] = rackmap[row][rack][u] if height: allnodedata[enclosure]['height'] = height @@ -66,7 +89,7 @@ def retrieve(nodes, element, configmanager, inputdata): row = nodeinfo.get(u'location.row', {}).get('value', '') height = nodeinfo.get(u'location.height', {}).get('value', None) if u: - allnodedata[enclosure] = {'node': enclosure, 'children': enclosuremap[enclosure]} + allnodedata[enclosure] = {'node': enclosure, 'children': enclosuremap[enclosure]['bays'], 'nodecoordinates': enclosuremap[enclosure]['coordinates']} if height: allnodedata[enclosure]['height'] = height if row not in rackmap: @@ -82,13 +105,14 @@ def retrieve(nodes, element, configmanager, inputdata): if enclosure not in allnodedata: results['errors'].append('Enclosure {} is missing required location information'.format(enclosure)) else: - allnodedata[enclosure]['children'] = enclosuremap[enclosure] + allnodedata[enclosure]['children'] = enclosuremap[enclosure]['bays'] + allnodedata[enclosure]['nodecoordinates'] = enclosuremap[enclosure]['coordinates'] needheight = set([]) needslots = set(enclosuremap) for node in allnodedata: if 'height' not in allnodedata[node]: needheight.add(node) - needheightrange = ','.join(needheight.union(needslots)) + needheightrange = ','.join(needheight.union(needslots).union(needcoord)) if needheightrange: for rsp in core.handle_path( '/noderange/{0}/description'.format(needheightrange), @@ -101,8 +125,29 @@ def retrieve(nodes, element, configmanager, inputdata): for node in kvp: if node in needheight: allnodedata[node]['height'] = kvp[node]['height'] - if node in needslots: + if node in needslots and 'slots' in kvp[node]: allnodedata[node]['slots'] = kvp[node]['slots'] + if node in needcoord and 'slotcoord' in kvp[node]: + enclosuremap[needcoord[node]]['coordinates'][node] = kvp[node]['slotcoord'] + del needcoord[node] + for enclosure in enclosuremap: + if 'slots' not in allnodedata[enclosure]: + # if slots not described by chassis, assume a double-wide form factor + allnodedata[enclosure]['slots'] = [2, allnodedata[enclosure]['height']] + for node in needcoord: # have to fill in based on heuristic absent of specific data + enclosure = needcoord[node] + currslot = None + for bay in enclosuremap[enclosure]['bays']: + if enclosuremap[enclosure]['bays'][bay] == node: + currslot = int(bay) + if currslot is None: + continue + if enclosure in allnodedata and 'slots' in allnodedata[enclosure]: + dimensions = allnodedata[enclosure]['slots'] + if dimensions[0] > dimensions[1]: + enclosuremap[enclosure]['coordinates'][node] = [(currslot - 1) // dimensions[1] + 1, (currslot - 1) % dimensions[1] + 1] + else: + enclosuremap[enclosure]['coordinates'][node] = [(currslot - 1) % dimensions[0] + 1, (currslot - 1) // dimensions[0] + 1] for node in allnodedata: if 'height' not in allnodedata[node]: allnodedata[node]['height'] = 1 From e536789c9de15428002c781274cd04d45e63ac21 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 30 Jan 2025 14:27:42 -0500 Subject: [PATCH 013/413] Mitigate send of duplicate replies If an unrelated network interface shares a vlan with an otherwise pertinent interface, defer and be silent to avoid confusion on the line. --- .../confluent/discovery/protocols/pxe.py | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 2be13883..8491933c 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -459,7 +459,10 @@ def snoop(handler, protocol=None, nodeguess=None): try: # Just need some delay, picked a prime number so that overlap with other # timers might be reduced, though it really is probably nothing - ready = select.select([net4, net6], [], [], None) + ready = select.select([net4, net6], [], [], 1) + for txid in list(_recent_txids): + if _recent_txids[txid] < time.time(): + del _recent_txids[txid] if not ready or not ready[0]: continue for netc in ready[0]: @@ -732,6 +735,7 @@ def get_my_duid(): _myuuid = uuid.uuid4().bytes return _myuuid +_recent_txids = {} def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=None, requestor=None): replen = 275 # default is going to be 286 @@ -766,6 +770,7 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N repview = repview[28:] repview[0:1] = b'\x02' repview[1:10] = reqview[1:10] # duplicate txid, hwlen, and others + thistxid = bytes(repview[4:8]) repview[10:11] = b'\x80' # always set broadcast repview[28:44] = reqview[28:44] # copy chaddr field relayip = reqview[24:28].tobytes() @@ -783,7 +788,7 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N log.log({'error': nicerr}) if niccfg.get('ipv4_broken', False): # Received a request over a nic with no ipv4 configured, ignore it - log.log({'error': 'Skipping boot reply to {0} due to no viable IPv4 configuration on deployment system'.format(node)}) + log.log({'error': 'Skipping boot reply to {0} due to no viable IPv4 configuration on deployment system on interface index "{}"'.format(node, info['netinfo']['ifidx'])}) return clipn = None if niccfg['ipv4_method'] == 'firmwarenone': @@ -903,12 +908,26 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N boottype = 'HTTP' else: boottype = 'PXE' + deferanswer = None if clipn: + _recent_txids[thistxid] = time.time() + 1 ipinfo = 'with static address {0}'.format(niccfg['ipv4_address']) else: + # use txid to track + # defer sending for a second if otherwise unserved... + deferanswer = thistxid ipinfo = 'without address, served from {0}'.format(myip) if relayipa: ipinfo += ' (relayed to {} via {})'.format(relayipa, requestor[0]) + eventlet.spawn(send_rsp, repview, replen, requestor, relayip, reqview, info, deferanswer, isboot, node, boottype, ipinfo) + + +def send_rsp(repview, replen, requestor, relayip, reqview, info, defertxid, isboot, node, boottype, ipinfo): + if defertxid: + eventlet.sleep(0.5) + if defertxid in _recent_txids: + log.log({'info': 'Skipping reply for {} over interface {} due to better offer being made over other interface'.format(node, info['netinfo']['ifidx'])}) + return if isboot: log.log({ 'info': 'Offering {0} boot {1} to {2}'.format(boottype, ipinfo, node)}) From e901559644d3292ab26725011937e8e1d1b99027 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 30 Jan 2025 15:25:10 -0500 Subject: [PATCH 014/413] Add mechanism to explicitly ignore nics for netboot A service.cfg configuration can be applied to ignore nics for netboot # cat /etc/confluent/service.cfg #[http] #bindhost = /var/run/confluent/httpapi [netboot] ignorenics=enp65s0f1np1,enp65s0f3np3 --- .../confluent/discovery/protocols/pxe.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 8491933c..d248d4b1 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -23,6 +23,7 @@ # option 97 = UUID (wireformat) import base64 +import confluent.config.conf as inifile import confluent.config.configmanager as cfm import confluent.collective.manager as collective import confluent.noderange as noderange @@ -410,12 +411,18 @@ def proxydhcp(handler, nodeguess): def start_proxydhcp(handler, nodeguess=None): eventlet.spawn_n(proxydhcp, handler, nodeguess) - +ignorenics = None def snoop(handler, protocol=None, nodeguess=None): + global ignorenics #TODO(jjohnson2): ipv6 socket and multicast for DHCPv6, should that be #prominent #TODO(jjohnson2): enable unicast replies. This would suggest either # injection into the neigh table before OFFER or using SOCK_RAW. + ignorenics = inifile.get_option('netboot', 'ignorenics') + if ignorenics: + if not isinstance(ignorenics, bytes): + ignorenics = ignorenics.encode() + ignorenics = ignorenics.split(b',') start_proxydhcp(handler, nodeguess) tracelog = log.Logger('trace') global attribwatcher @@ -478,6 +485,14 @@ def snoop(handler, protocol=None, nodeguess=None): _, level, typ = struct.unpack('QII', cmsgarr[:16]) if level == socket.IPPROTO_IP and typ == IP_PKTINFO: idx, recv = struct.unpack('II', cmsgarr[16:24]) + if ignorenics: + ignore = False + for nic in ignorenics: + if libc.if_nametoindex(nic) == idx: + ignore = True + break # ignore DHCP from ignored NIC + if ignore: + continue recv = ipfromint(recv) rqv = memoryview(rawbuffer)[:i] client = (ipfromint(clientaddr.sin_addr.s_addr), socket.htons(clientaddr.sin_port)) @@ -633,6 +648,7 @@ def check_reply(node, info, packet, sock, cfg, reqview, addr, requestor): requestor = ('0.0.0.0', None) if requestor[0] == '0.0.0.0' and not info.get('uuid', None): return # ignore DHCP from local non-PXE segment + httpboot = info.get('architecture', None) == 'uefi-httpboot' cfd = cfg.get_node_attributes(node, ('deployment.*', 'collective.managercandidates')) profile, stgprofile = get_deployment_profile(node, cfg, cfd) From b9f40513962cad0f9bb468f8d44f4e48aa8b29f6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 3 Feb 2025 16:40:57 -0500 Subject: [PATCH 015/413] Export variables set in confluent functions --- .../el7-diskless/profiles/default/scripts/functions | 1 + confluent_osdeploy/el7/profiles/default/scripts/functions | 1 + .../el8-diskless/profiles/default/scripts/functions | 1 + confluent_osdeploy/el8/profiles/default/scripts/functions | 1 + .../el9-diskless/profiles/default/scripts/functions | 1 + confluent_osdeploy/genesis/profiles/default/scripts/functions | 1 + .../suse15-diskless/profiles/default/scripts/functions | 1 + confluent_osdeploy/suse15/profiles/hpc/scripts/functions | 1 + confluent_osdeploy/suse15/profiles/server/scripts/functions | 1 + .../ubuntu18.04/profiles/default/scripts/functions | 1 + .../ubuntu20.04-diskless/profiles/default/scripts/functions | 1 + .../ubuntu20.04/profiles/default/scripts/functions | 1 + .../ubuntu22.04/profiles/default/scripts/functions | 1 + 13 files changed, 13 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/el7/profiles/default/scripts/functions b/confluent_osdeploy/el7/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/functions +++ b/confluent_osdeploy/el7/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/el8/profiles/default/scripts/functions b/confluent_osdeploy/el8/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/functions +++ b/confluent_osdeploy/el8/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/functions b/confluent_osdeploy/genesis/profiles/default/scripts/functions index 911eb01f..97c3d54d 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/functions +++ b/confluent_osdeploy/genesis/profiles/default/scripts/functions @@ -23,6 +23,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export nodename confluent_mgr confluent_profile } fetch_remote() { diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/functions b/confluent_osdeploy/suse15/profiles/hpc/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/functions +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/functions b/confluent_osdeploy/suse15/profiles/server/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/functions +++ b/confluent_osdeploy/suse15/profiles/server/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/functions index 170cb897..5df56438 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/functions @@ -50,6 +50,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_mgr confluent_profile nodename } fetch_remote() { diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions index 026697b7..f68f3a5e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions @@ -53,6 +53,7 @@ function set_confluent_vars() { if [ -z "$confluent_profile" ]; then confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') fi + export confluent_profile confluent_mgr nodename } fetch_remote() { From 564e136dc58205cdc19f8e5915d8998cd3b625b9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Feb 2025 09:11:53 -0500 Subject: [PATCH 016/413] Always provide a badreadings output, even if empty --- .../confluent/plugins/hardwaremanagement/redfish.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index f89d9a09..c7c5f5d4 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -1186,13 +1186,12 @@ class IpmiHandler(object): health = response['health'] health = _str_health(health) self.output.put(msg.HealthSummary(health, self.node)) - if 'badreadings' in response: - badsensors = [] - for reading in response['badreadings']: - if hasattr(reading, 'health'): - reading.health = _str_health(reading.health) - badsensors.append(reading) - self.output.put(msg.SensorReadings(badsensors, name=self.node)) + badsensors = [] + for reading in response.get('badreadings', []): + if hasattr(reading, 'health'): + reading.health = _str_health(reading.health) + badsensors.append(reading) + self.output.put(msg.SensorReadings(badsensors, name=self.node)) else: raise exc.InvalidArgumentException('health is read-only') From 2fcfbe97742c6a3aaf30786b0625ed279963d0fa Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 5 Feb 2025 16:57:26 -0500 Subject: [PATCH 017/413] Fix multi-session access to shell Shell sessions are now wired up to vtbufferd The shellserver now correctly accounts for sessions being started. The sockapi now correctly allows the client to specify/attach to a specific session id. --- confluent_server/confluent/shellserver.py | 48 +++++++++++++++++------ confluent_server/confluent/sockapi.py | 2 +- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/confluent_server/confluent/shellserver.py b/confluent_server/confluent/shellserver.py index 3dbf475a..4e81ec2b 100644 --- a/confluent_server/confluent/shellserver.py +++ b/confluent_server/confluent/shellserver.py @@ -22,14 +22,37 @@ import confluent.consoleserver as consoleserver import confluent.exceptions as exc import confluent.messages as msg +import eventlet +import time activesessions = {} +_reaper = None + +def reapsessions(): + while True: + eventlet.sleep(30) + for clientid in activesessions: + currcli = activesessions[clientid] + for sesshdl in list(currcli): + currsess = currcli[sesshdl] + if currsess.numusers == 0 and currsess.expiry < time.time(): + currsess.close() + del activesessions[clientid][sesshdl] class _ShellHandler(consoleserver.ConsoleHandler): _plugin_path = '/nodes/{0}/_shell/session' _genwatchattribs = False _logtobuffer = False + def __init__(self, node, configmanager, width=80, height=24, prefix=''): + super().__init__(node, configmanager, width, height) + self.termprefix = prefix + self.numusers = 0 + global _reaper + if _reaper is None: + _reaper = eventlet.spawn(reapsessions) + + def check_collective(self, attrvalue): return @@ -37,13 +60,13 @@ class _ShellHandler(consoleserver.ConsoleHandler): # suppress logging through proving a stub 'log' function return - def feedbuffer(self, data): - return - #return super().feedbuffer(data) + #def feedbuffer(self, data): + # return + # #return super().feedbuffer(data) - def get_recent(self): - retdata, connstate = super(_ShellHandler, self).get_recent() - return '', connstate + #def get_recent(self): + # retdata, connstate = super(_ShellHandler, self).get_recent() + # return '', connstate def _got_disconnected(self): self.connectstate = 'closed' @@ -106,7 +129,7 @@ class ShellSession(consoleserver.ConsoleSession): def connect_session(self): global activesessions tenant = self.configmanager.tenant - if (self.configmanager.tenant, self.node) not in activesessions: + if (self.configmanager.tenant, self.node, self.username) not in activesessions: activesessions[(tenant, self.node, self.username)] = {} if self.sessionid is None: self.sessionid = 1 @@ -114,15 +137,16 @@ class ShellSession(consoleserver.ConsoleSession): self.sessionid += 1 self.sessionid = str(self.sessionid) if self.sessionid not in activesessions[(tenant, self.node, self.username)]: - activesessions[(tenant, self.node, self.username)][self.sessionid] = _ShellHandler(self.node, self.configmanager, width=self.width, height=self.height) + activesessions[(tenant, self.node, self.username)][self.sessionid] = _ShellHandler(self.node, self.configmanager, width=self.width, height=self.height, prefix='s_{}_{}'.format(self.username, self.sessionid)) self.conshdl = activesessions[(self.configmanager.tenant, self.node, self.username)][self.sessionid] + self.conshdl.numusers += 1 def destroy(self): try: - activesessions[(self.configmanager.tenant, self.node, - self.username)][self.sessionid].close() - del activesessions[(self.configmanager.tenant, self.node, - self.username)][self.sessionid] + self.conshdl.numusers -= 1 + if self.conshdl.numusers == 0: + self.conshdl.expiry = time.time() + 120 + except KeyError: pass super(ShellSession, self).destroy() diff --git a/confluent_server/confluent/sockapi.py b/confluent_server/confluent/sockapi.py index 86534767..48b1065d 100644 --- a/confluent_server/confluent/sockapi.py +++ b/confluent_server/confluent/sockapi.py @@ -301,7 +301,7 @@ def start_term(authname, cfm, connection, params, path, authdata, skipauth): node=node, configmanager=cfm, username=authname, datacallback=ccons.sendall, skipreplay=skipreplay) elif len(elems) >= 6 and elems[3:5] == ['shell', 'sessions']: - if len(elems) == 7: + if len(elems) == 6 and elems[5]: sessionid = elems[5] else: sessionid = None From 52497d7d9571fe2521c39ac2ff19d4c9024f469a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 6 Feb 2025 10:44:59 -0500 Subject: [PATCH 018/413] Broaden except clause on automation check For whatever reason, we can't seem to specifically catch the CalledProcessError and have to resort to generic Exception. --- confluent_server/bin/confluent_selfcheck | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index f1de6c71..fe45f637 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -223,7 +223,7 @@ if __name__ == '__main__': try: sshutil.prep_ssh_key('/etc/confluent/ssh/automation') print('OK') - except subprocess.CalledProcessError: + except Exception: emprint('Failed to load confluent automation key, syncfiles and profile ansible plays will not work (Example resolution: osdeploy initialize -a)') os.kill(int(sshutil.agent_pid), signal.SIGTERM) fprint('Checking for blocked insecure boot: ') From 9b59c2fadb1ebd5a9d7962b4f3a3e3112b69edd3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 6 Feb 2025 13:25:39 -0500 Subject: [PATCH 019/413] Have httpapi support multiple shell sessions --- confluent_server/confluent/httpapi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index e2144235..2fbc700b 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -527,7 +527,11 @@ def wsock_handler(ws): else: delimit = '/shell/sessions' shellsession = True - node = targ.split(delimit, 1)[0] + nodesess = targ.split(delimit, 1) + node = nodesess[0] + sessidx = None + if len(nodesess) == 2 and len(nodesess[1]) > 1: + sessidx = nodesess[1][1:] node = node.rsplit('/', 1)[-1] auditmsg = {'operation': 'start', 'target': targ, 'user': util.stringify(username)} @@ -538,7 +542,7 @@ def wsock_handler(ws): node=node, configmanager=cfgmgr, username=username, skipreplay=skipreplay, datacallback=datacallback, - width=width, height=height) + width=width, height=height, sessionid=sessidx) else: consession = consoleserver.ConsoleSession( node=node, configmanager=cfgmgr, From 82fe75e45707878d0a951223f1e84251a0e0cf01 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 6 Feb 2025 15:59:29 -0500 Subject: [PATCH 020/413] Add aliases to attrib clear Support aliases when specified in clearing. --- .../confluent/plugins/configuration/attributes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 2c9a6ac9..6986227b 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -17,6 +17,7 @@ import ast import confluent.exceptions as exc import confluent.messages as msg import confluent.config.attributes as allattributes +import confluent.config.configmanager as configmod import confluent.util as util from fnmatch import fnmatch @@ -284,6 +285,9 @@ def update_nodes(nodes, element, configmanager, inputdata): clearattribs.append(attrib) else: foundattrib = False + for candattrib in configmod._attraliases: + if fnmatch(candattrib, attrib): + attrib = configmod._attraliases[candattrib] for candattrib in allattributes.node: if fnmatch(candattrib, attrib): clearattribs.append(candattrib) From 543a42edd62c4c32236090ac868df4fff436f2df Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 6 Feb 2025 16:30:06 -0500 Subject: [PATCH 021/413] Disable SELinux policy in EL diskless images The SELinux policies do not currently work in a diskless build, disable by default, though a user may try to enable it manually after build. --- imgutil/imgutil | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 276ff601..b1698c79 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -224,7 +224,7 @@ def capture_remote(args): sys.exit(1) oscat = finfo['oscategory'] subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'python3', '/run/imgutil/capenv/imgutil', 'capturelocal']) - utillib = __file__.replace('bin/imgutil', 'lib/imgutil') + utillib = __file__.replace('bin/imgutil', 'lib/imgutil') if oscat.startswith('ubuntu'): utillib = os.path.join(utillib, '{}/initramfs-tools/'.format(oscat)) if not os.path.exists(utillib): @@ -752,6 +752,10 @@ class ElHandler(OsHandler): subprocess.check_call(['yum'] + self.yumargs) else: subprocess.check_call(['yum', '-y'] + self.yumargs) + with open(os.path.join(self.targpath, 'etc/selinux/config'), 'r') as seconfigin: + seconfig = seconfigin.read().replace('SELINUX=enforcing', 'SELINUX=disabled') + with open(os.path.join(self.targpath, 'etc/selinux/config'), 'w') as seconfigout: + seconfigout.write(seconfig) with open('/proc/mounts') as mountinfo: for line in mountinfo.readlines(): if line.startswith('selinuxfs '): @@ -761,7 +765,7 @@ class ElHandler(OsHandler): def relabel_targdir(self): subprocess.check_call( - ['setfiles', '-r', self.targpath, + ['setfiles', '-r', self.targpath, '/etc/selinux/targeted/contexts/files/file_contexts', self.targpath]) From 76d455650102153f5e5b80bbde8ba71ba1266f0a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Feb 2025 10:19:54 -0500 Subject: [PATCH 022/413] Add a go version of genpasshmac --- confluent_osdeploy/utils/gopasshmac/go.mod | 10 +++++ confluent_osdeploy/utils/gopasshmac/go.sum | 4 ++ confluent_osdeploy/utils/gopasshmac/main.go | 49 +++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 confluent_osdeploy/utils/gopasshmac/go.mod create mode 100644 confluent_osdeploy/utils/gopasshmac/go.sum create mode 100644 confluent_osdeploy/utils/gopasshmac/main.go diff --git a/confluent_osdeploy/utils/gopasshmac/go.mod b/confluent_osdeploy/utils/gopasshmac/go.mod new file mode 100644 index 00000000..83303fa8 --- /dev/null +++ b/confluent_osdeploy/utils/gopasshmac/go.mod @@ -0,0 +1,10 @@ +module genpasshmac + +go 1.22 + +toolchain go1.23.6 + +require ( + github.com/go-crypt/crypt v0.3.2 // indirect + github.com/go-crypt/x v0.3.2 // indirect +) diff --git a/confluent_osdeploy/utils/gopasshmac/go.sum b/confluent_osdeploy/utils/gopasshmac/go.sum new file mode 100644 index 00000000..91d036f4 --- /dev/null +++ b/confluent_osdeploy/utils/gopasshmac/go.sum @@ -0,0 +1,4 @@ +github.com/go-crypt/crypt v0.3.2 h1:I4i0u2g8X9bxCXIjvv19BDVXqQbddDQrURCJrOyyJos= +github.com/go-crypt/crypt v0.3.2/go.mod h1:U0YhpCizEtaVC4gVfUUN0qGn1Z6+e3at+B5uLYx/sV0= +github.com/go-crypt/x v0.3.2 h1:m2wn2+8tp28V4yDiW5NSTiyNSXnCoTs1R1+H+cAJA3M= +github.com/go-crypt/x v0.3.2/go.mod h1:uelN9rbD2e2eqE8KA26B9R6OQ0TdM6msWdPsoMM1ZFk= diff --git a/confluent_osdeploy/utils/gopasshmac/main.go b/confluent_osdeploy/utils/gopasshmac/main.go new file mode 100644 index 00000000..788b2d6c --- /dev/null +++ b/confluent_osdeploy/utils/gopasshmac/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "flag" + //"fmt" + "github.com/go-crypt/crypt/algorithm/shacrypt" + "os" + "crypto/rand" + "encoding/base64" + "crypto/hmac" + "crypto/sha256" +) + +func main() { + hmackeyfile := flag.String("k", "", "Key file for HMAC calculation") + passfile := flag.String("p", "", "File to write generated password to") + cryptfile := flag.String("c", "", "File to write crypted form of key to") + hmacfile := flag.String("m", "", "File to write HMAC value to") + flag.Parse() + randbytes := make([]byte, 36) + _, err := rand.Read(randbytes) + if err != nil { + panic(err) + } + newpasswd := base64.StdEncoding.EncodeToString(randbytes) + hasher, err := shacrypt.New(shacrypt.WithVariant(shacrypt.VariantSHA256), shacrypt.WithIterations(5000)) + if err != nil { + panic(err) + } + + digest, err := hasher.Hash(newpasswd) + if err != nil { + panic(err) + } + cryptdata := []byte(digest.Encode()) + err = os.WriteFile(*passfile, []byte(newpasswd), 0600) + if err != nil { panic(err )} + err = os.WriteFile(*cryptfile, cryptdata, 0600) + if err != nil { panic(err )} + keydata, err := os.ReadFile(*hmackeyfile) + if err != nil { panic(err )} + hmacer := hmac.New(sha256.New, keydata) + hmacer.Write(cryptdata) + hmacresult := hmacer.Sum(nil) + hmacout := []byte(base64.StdEncoding.EncodeToString(hmacresult)) + err = os.WriteFile(*hmacfile, hmacout, 0600) + if err != nil { panic(err )} +} + From e87d6652ca53962f9edb14679eefb4a8357ba12b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Feb 2025 08:35:43 -0500 Subject: [PATCH 023/413] Fix type of height when pulled from attributes location.height was left as string, which fouled further processing. --- confluent_server/confluent/plugins/info/layout.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 8604eaeb..2bc80e5b 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -67,7 +67,7 @@ def retrieve(nodes, element, configmanager, inputdata): rackmap[row][rack][u] = {'node': enclosure, 'children': enclosuremap[enclosure]['bays'], 'nodecoordinates': enclosuremap[enclosure]['coordinates']} allnodedata[enclosure] = rackmap[row][rack][u] if height: - allnodedata[enclosure]['height'] = height + allnodedata[enclosure]['height'] = int(height) else: # need to see if enclosure lands in the map naturally or need to pull it needenclosures.add(enclosure) elif u: @@ -78,7 +78,7 @@ def retrieve(nodes, element, configmanager, inputdata): rackmap[row][rack][u] = {'node': node} allnodedata[node] = rackmap[row][rack][u] if height: - allnodedata[node]['height'] = height + allnodedata[node]['height'] = int(height) locatednodes.add(node) cfgenc = needenclosures - locatednodes locationinfo = configmanager.get_node_attributes(cfgenc, (u'location.rack', u'location.row', u'location.u', u'location.height')) @@ -91,7 +91,7 @@ def retrieve(nodes, element, configmanager, inputdata): if u: allnodedata[enclosure] = {'node': enclosure, 'children': enclosuremap[enclosure]['bays'], 'nodecoordinates': enclosuremap[enclosure]['coordinates']} if height: - allnodedata[enclosure]['height'] = height + allnodedata[enclosure]['height'] = int(height) if row not in rackmap: rackmap[row] = {} if rack not in rackmap[row]: From 825cacde0e8f6de18244322a5663b34f9282d95a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 12 Feb 2025 09:52:23 -0500 Subject: [PATCH 024/413] Fix relay dhcp behavior The refactor for multiple nics on same vlan omitted a required parameter. --- confluent_server/confluent/discovery/protocols/pxe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index d248d4b1..853e0b8d 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -935,10 +935,10 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N ipinfo = 'without address, served from {0}'.format(myip) if relayipa: ipinfo += ' (relayed to {} via {})'.format(relayipa, requestor[0]) - eventlet.spawn(send_rsp, repview, replen, requestor, relayip, reqview, info, deferanswer, isboot, node, boottype, ipinfo) + eventlet.spawn(send_rsp, repview, replen, requestor, relayip, reqview, info, deferanswer, isboot, node, boottype, ipinfo, sock) -def send_rsp(repview, replen, requestor, relayip, reqview, info, defertxid, isboot, node, boottype, ipinfo): +def send_rsp(repview, replen, requestor, relayip, reqview, info, defertxid, isboot, node, boottype, ipinfo, sock): if defertxid: eventlet.sleep(0.5) if defertxid in _recent_txids: From 5fda02b9e069c93e20ff46bce43ce5c0dd7c937a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Feb 2025 12:54:55 -0500 Subject: [PATCH 025/413] Repeat NIC check loop on Ubuntu 18.04 deployments --- .../S25confluentinit | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/confluent_osdeploy/ubuntu18.04/initramfs/lib/debian-installer-startup.d/S25confluentinit b/confluent_osdeploy/ubuntu18.04/initramfs/lib/debian-installer-startup.d/S25confluentinit index 8759d669..4d0c79df 100644 --- a/confluent_osdeploy/ubuntu18.04/initramfs/lib/debian-installer-startup.d/S25confluentinit +++ b/confluent_osdeploy/ubuntu18.04/initramfs/lib/debian-installer-startup.d/S25confluentinit @@ -55,25 +55,28 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then if [ ! -z "$v4gw" ]; then setdebopt netcfg/get_gateway $v4gw string fi - for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do - ip addr add dev $NICGUESS $v4addr - if [ ! -z "$v4gw" ]; then - ip route add default via $v4gw - fi - for dsrv in $deploysrvs; do - if wget https://$dsrv/confluent-public/ --tries=1 --timeout=1 -O /dev/null > /dev/null 2>&1; then - deploysrvs=$dsrv - NIC=$NICGUESS - setdebopt netcfg/choose_interface $NIC select + NIC="" + while [ -z "$NIC" ]; do + for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do + ip addr add dev $NICGUESS $v4addr + if [ ! -z "$v4gw" ]; then + ip route add default via $v4gw + fi + for dsrv in $deploysrvs; do + if wget https://$dsrv/confluent-public/ --tries=1 --timeout=1 -O /dev/null > /dev/null 2>&1; then + deploysrvs=$dsrv + NIC=$NICGUESS + setdebopt netcfg/choose_interface $NIC select + break + fi + done + if [ -z "$NIC" ]; then + ip -4 a flush dev $NICGUESS + else break fi done - if [ -z "$NIC" ]; then - ip -4 a flush dev $NICGUESS - else - break - fi - done + done #TODO: nameservers elif [ "$v4cfgmeth" = "dhcp" ]; then setdebopt netcfg/disable_dhcp false boolean From 3a3f3a961d3c3ed74469f8d068ad997c545f5ca9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Feb 2025 15:42:49 -0500 Subject: [PATCH 026/413] Add SMM3 to chained SMM logic --- confluent_server/confluent/discovery/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 41767a77..40a2d38c 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1087,7 +1087,7 @@ def get_nodename(cfg, handler, info): # Ok, see if it is something with a chassis-uuid and discover by # chassis nodename = get_nodename_from_enclosures(cfg, info) - if not nodename and handler.devname == 'SMM': + if not nodename and handler.devname in ('SMM', 'SMM3'): nodename = get_nodename_from_chained_smms(cfg, handler, info) if not nodename: # as a last resort, search switches for info # This is the slowest potential operation, so we hope for the @@ -1095,7 +1095,7 @@ def get_nodename(cfg, handler, info): nodename, macinfo = macmap.find_nodeinfo_by_mac(info['hwaddr'], cfg) maccount = macinfo['maccount'] if nodename: - if handler.devname == 'SMM': + if handler.devname in ('SMM', 'SMM3'): nl = list(cfg.filter_node_attributes( 'enclosure.extends=' + nodename)) if nl: @@ -1118,7 +1118,7 @@ def get_nodename(cfg, handler, info): return None, None if (nodename and not handler.discoverable_by_switch(macinfo['maccount'])): - if handler.devname == 'SMM': + if handler.devname in ('SMM', 'SMM3'): errorstr = 'Attempt to discover SMM by switch, but chained ' \ 'topology or incorrect net attributes detected, ' \ 'which is not compatible with switch discovery ' \ From 67bacc9934af50c371432b8ec5f095ac1c9814b3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 21 Feb 2025 15:25:41 -0500 Subject: [PATCH 027/413] Add sample script for bringing up a host interface --- misc/prepfish.py | 258 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 misc/prepfish.py diff --git a/misc/prepfish.py b/misc/prepfish.py new file mode 100644 index 00000000..0d5a4a23 --- /dev/null +++ b/misc/prepfish.py @@ -0,0 +1,258 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import ctypes +import fcntl +import json +from select import select +import glob +import socket +import struct +import os +import subprocess +import sys +import time + +class IpmiMsg(ctypes.Structure): + _fields_ = [('netfn', ctypes.c_ubyte), + ('cmd', ctypes.c_ubyte), + ('data_len', ctypes.c_short), + ('data', ctypes.POINTER(ctypes.c_ubyte))] + + +class IpmiSystemInterfaceAddr(ctypes.Structure): + _fields_ = [('addr_type', ctypes.c_int), + ('channel', ctypes.c_short), + ('lun', ctypes.c_ubyte)] + + +class IpmiRecv(ctypes.Structure): + _fields_ = [('recv_type', ctypes.c_int), + ('addr', ctypes.POINTER(IpmiSystemInterfaceAddr)), + ('addr_len', ctypes.c_uint), + ('msgid', ctypes.c_long), + ('msg', IpmiMsg)] + + +class IpmiReq(ctypes.Structure): + _fields_ = [('addr', ctypes.POINTER(IpmiSystemInterfaceAddr)), + ('addr_len', ctypes.c_uint), + ('msgid', ctypes.c_long), + ('msg', IpmiMsg)] + + +_IONONE = 0 +_IOWRITE = 1 +_IOREAD = 2 +IPMICTL_SET_MY_ADDRESS_CMD = ( + _IOREAD << 30 | ctypes.sizeof(ctypes.c_uint) << 16 + | ord('i') << 8 | 17) # from ipmi.h +IPMICTL_SEND_COMMAND = ( + _IOREAD << 30 | ctypes.sizeof(IpmiReq) << 16 + | ord('i') << 8 | 13) # from ipmi.h +# next is really IPMICTL_RECEIVE_MSG_TRUNC, but will only use that +IPMICTL_RECV = ( + (_IOWRITE | _IOREAD) << 30 | ctypes.sizeof(IpmiRecv) << 16 + | ord('i') << 8 | 11) # from ipmi.h +BMC_SLAVE_ADDR = ctypes.c_uint(0x20) +CURRCHAN = 0xf +ADDRTYPE = 0xc + +def get_nicname_from_dmi(): + for fi in glob.glob('/sys/firmware/dmi/entries/42-*/raw'): + dmit = memoryview(open(fi, 'rb').read()) + if dmit[0] != 42: + continue + if dmit[1] < 0xb: + continue + if dmit[4] != 0x40: # only supporting network host interface + continue + ifdatalen = dmit[5] + ifdata = dmit[6:6+ifdatalen] + if ifdata[0] != 2: + continue + idvend, idprod = struct.unpack(' Date: Tue, 25 Feb 2025 11:00:16 -0500 Subject: [PATCH 028/413] allow users to choose which lom port to use for bmc shared mode --- .../genesis/profiles/default/scripts/configbmc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/configbmc b/confluent_osdeploy/genesis/profiles/default/scripts/configbmc index 4062b7af..24c3905a 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/configbmc +++ b/confluent_osdeploy/genesis/profiles/default/scripts/configbmc @@ -210,16 +210,23 @@ def set_port_tsm(s, port, model): def set_port_xcc(s, port, model): + if '_' in port: + port_type = port.split('_')[0] + port_number = port.split('_')[1] # this could be the number or the form sfp28 || rj45 + port = port_type oport = port if port.lower() == 'dedicated': port = b'\x01' elif port.lower() in ('ml2', 'ocp'): port = b'\x02\x00' - elif port.lower() == 'lom': - if model == '7x58': - port = b'\x00\x02' - else: - port = b'\x00\x00' + elif port.lower() == 'lom': # potentially have to change to port_form.lower() == 'sfp28 || or rj45 // if it is two port sfp28 it is usually the first port that is ncsi enabled' + if port_number == '1' or port_number.lower() == 'sfp28': + port = b'\x05\x00' + elif port_number == '3' or port_number.lower() == 'rj45': + if model == '7x58': + port = b'\x00\x02' + else: + port = b'\x00\x00' else: port = port.split(' ') port = bytes(bytearray([int(x) for x in port])) From a595abe9e64d30a1583d6efd2c402ff7148c741b Mon Sep 17 00:00:00 2001 From: Tinashe Date: Tue, 25 Feb 2025 12:13:18 -0500 Subject: [PATCH 029/413] also allow users to just specify lom and we use the first port --- .../genesis/profiles/default/scripts/configbmc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/configbmc b/confluent_osdeploy/genesis/profiles/default/scripts/configbmc index 24c3905a..7073ddd5 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/configbmc +++ b/confluent_osdeploy/genesis/profiles/default/scripts/configbmc @@ -227,6 +227,11 @@ def set_port_xcc(s, port, model): port = b'\x00\x02' else: port = b'\x00\x00' + else: + if model == '7x58': + port = b'\x00\x02' + else: + port = b'\x00\x00' else: port = port.split(' ') port = bytes(bytearray([int(x) for x in port])) From f62c0db678655b59505200697eb55aa70773298c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Feb 2025 15:29:22 -0500 Subject: [PATCH 030/413] Remove ssh_config directive not supported by EL7wq --- misc/finalizeadopt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/finalizeadopt.sh b/misc/finalizeadopt.sh index 7b9413d1..9a49a3ae 100644 --- a/misc/finalizeadopt.sh +++ b/misc/finalizeadopt.sh @@ -22,7 +22,7 @@ else Host * HostbasedAuthentication yes EnableSSHKeysign yes - HostbasedKeyTypes *ed25519* +# HostbasedKeyTypes *ed25519* EOF fi fi From 75a0f44a362534dbd77e5a721d855824963108d3 Mon Sep 17 00:00:00 2001 From: Hengli Kuang Date: Fri, 28 Feb 2025 13:53:28 +0800 Subject: [PATCH 031/413] add a loop to find the accessible deployer when the method is not static --- .../ubuntu20.04/initramfs/scripts/init-premount/confluent | 6 ++++++ .../ubuntu22.04/initramfs/scripts/init-premount/confluent | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index 528b27d6..b3fcdef6 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -56,6 +56,12 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then echo $NIC > /tmp/autodetectnic else configure_networking + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv + break + fi + done fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 6315ba5d..f15aff01 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -56,6 +56,12 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then echo $NIC > /tmp/autodetectnic else configure_networking + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv + break + fi + done fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') From 44a30686cb535fcc9084a9b083d822cf7e07168d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Feb 2025 08:53:26 -0500 Subject: [PATCH 032/413] Add Fedora 41 scripted install support --- .../el8/profiles/default/initprofile.sh | 5 ++++- confluent_server/confluent/osimage.py | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/profiles/default/initprofile.sh b/confluent_osdeploy/el8/profiles/default/initprofile.sh index 680ffda7..fa9c20ab 100644 --- a/confluent_osdeploy/el8/profiles/default/initprofile.sh +++ b/confluent_osdeploy/el8/profiles/default/initprofile.sh @@ -1,5 +1,8 @@ #!/bin/sh -sed -i 's/centos/CentOS/; s/rhel/Red Hat Enterprise Linux/; s/oraclelinux/Oracle Linux/; s/alma/AlmaLinux/' $2/profile.yaml +sed -i 's/centos/CentOS/; s/rhel/Red Hat Enterprise Linux/; s/oraclelinux/Oracle Linux/; s/alma/AlmaLinux/;s/fedora/Fedora Linux/' $2/profile.yaml +if grep Fedora $2/profile.yaml > /dev/null; then + sed -i 's/@^minimal-environment/#/' $2/packagelist +fi ln -s $1/images/pxeboot/vmlinuz $2/boot/kernel && \ ln -s $1/images/pxeboot/initrd.img $2/boot/initramfs/distribution mkdir -p $2/boot/efi/boot diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index dcfdf6ec..595785e6 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -310,6 +310,27 @@ def check_rocky(isoinfo): arch = arch.decode('utf-8') return {'name': 'rocky-{0}-{1}'.format(ver, arch), 'method': EXTRACT, 'category': cat} +fedoracatmap = { + '41': 'el10', +} +def check_fedora(isoinfo): + if '.discinfo' not in isoinfo[1]: + return None + prodinfo = isoinfo[1]['.discinfo'] + prodlines = prodinfo.split(b'\n') + if len(prodlines) < 3: + return None + prod = prodlines[1].split()[0] + if prod != b'Fedora': + return None + arch = prodlines[2] + ver = prodlines[1].split()[-1] + if not isinstance(arch, str): + arch = arch.decode('utf-8') + ver = ver.decode('utf-8') + if ver not in fedoracatmap: + return None + return {'name': 'fedora-{0}-{1}'.format(ver, arch), 'method': EXTRACT, 'category': fedoracatmap[ver]} def check_alma(isoinfo): ver = None From 0d4da78f05653d98859582f5932b4b9248db8440 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 3 Mar 2025 10:51:10 -0500 Subject: [PATCH 033/413] Add certificate handling to prepfish.py --- misc/prepfish.py | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/misc/prepfish.py b/misc/prepfish.py index 0d5a4a23..00c5c2e1 100644 --- a/misc/prepfish.py +++ b/misc/prepfish.py @@ -24,6 +24,8 @@ import os import subprocess import sys import time +import ssl +import socket class IpmiMsg(ctypes.Structure): _fields_ = [('netfn', ctypes.c_ubyte), @@ -193,6 +195,13 @@ class Session(object): fcntl.ioctl(self.ipmidev, IPMICTL_RECV, self.rsp) return self.parsed_rsp +class Verifier(object): + def __init__(self, fprint): + self._fprint = fprint + + def validate(self, certificate): + return hashlib.sha256(certificate).digest() == self._fprint + def dotwait(): sys.stderr.write('.') @@ -204,15 +213,15 @@ def disable_host_interface(): rsp = s.raw_command(netfn=0xc, command=1, data=(1, 0xc1, 0)) def get_redfish_creds(): - os.makedirs('/run/fauxonecli', exist_ok=True, mode=0o700) - s = Session('/dev/ipmi0') + os.makedirs('/run/redfish', exist_ok=True, mode=0o700) try: - with open('/run/fauxonecli/credentials', 'rb') as credin: + with open('/run/redfish/credentials', 'rb') as credin: cred = credin.read() except FileNotFoundError: + s = Session('/dev/ipmi0') rsp = s.raw_command(netfn=0x2c, command=2, data=(0x52, 0xa5)) cred = bytes(rsp['data']) - with open('/run/fauxonecli/credentials', 'wb') as credout: + with open('/run/redfish/credentials', 'wb') as credout: credout.write(cred) if cred[0] == 0x52: cred = cred[1:] @@ -220,6 +229,21 @@ def get_redfish_creds(): return creds +def get_redfish_fingerprint(): + os.makedirs('/run/redfish', exist_ok=True, mode=0o700) + try: + with open('/run/redfish/fingerprint', 'rb') as certin: + fprint = certin.read() + except FileNotFoundError: + s = Session('/dev/ipmi0') + rsp = s.raw_command(0x2c, 1, data=(0x52, 1)) + if rsp['data'][:2] == b'\x52\x01': + fprint = rsp['data'][2:] + with open('/run/redfish/fingerprint', 'wb') as printout: + printout.write(fprint) + return fprint + + def enable_host_interface(): s = Session('/dev/ipmi0') rsp = s.raw_command(netfn=0xc, command=2, data=(1, 0xc1, 0, 0)) @@ -246,9 +270,23 @@ def enable_host_interface(): sys.stderr.flush() return bmctarg +def store_redfish_cert(bmc): + fprint = get_redfish_fingerprint() + verifier = Verifier(fprint) + peercert = None + with socket.create_connection((bmc, 443)) as plainsock: + finsock = ssl.wrap_socket(plainsock, cert_reqs=ssl.CERT_NONE) # to allow fprint based cert + peercert = finsock.getpeercert(binary_form=True) + if not verifier.validate(peercert): + raise Exception("Mismatched certificate") + if peercert: + with open('/run/redfish/cert.der', 'wb') as certout: + certout.write(peercert) + def main(): bmcuser, bmcpass = get_redfish_creds() bmc = enable_host_interface() + store_redfish_cert(bmc) print('Redfish user: {}'.format(bmcuser.decode())) print('Redfish password: {}'.format(bmcpass.decode())) print('Redfish host: https://[{}]/'.format(bmc)) From f06d9a81e7188e775344919f3cbcf914d54b5ae2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Mar 2025 10:04:30 -0500 Subject: [PATCH 034/413] Have sshd only generate ed25519 during initramfs phase of diskless. sshd-keygen service will come later with other keys, if desired. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 9b885e82..19489b43 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -292,7 +292,7 @@ if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then confluent_websrv="[$confluent_websrv]" fi echo -n "Initializing ssh..." -ssh-keygen -A +ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -C '' -N '' for pubkey in /etc/ssh/ssh_host*key.pub; do certfile=${pubkey/.pub/-cert.pub} privfile=${pubkey%.pub} From c73352a293823ae6506005879d37eba2608a5724 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Mar 2025 10:50:48 -0500 Subject: [PATCH 035/413] Add filter for bmc interface People have been putting the BMC interface as a net section, to aid in their information and confluent2hosts. Tolerate that by dropping net entries that match the hardwaremanagement.manager attribute. --- confluent_server/confluent/netutil.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index e5384f5d..92cc0bfa 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -317,6 +317,20 @@ def add_netmask(ncfg): def get_full_net_config(configmanager, node, serverip=None): cfd = configmanager.get_node_attributes(node, ['net.*']) cfd = cfd.get(node, {}) + bmc = configmanager.get_node_attributes( + node, 'hardwaremanagement.manager').get(node, {}).get( + 'hardwaremanagement.manager', {}).get('value', None) + bmc4 = None + bmc6 = None + if bmc: + try: + bmc4 = socket.getaddrinfo(bmc, 0, socket.AF_INET, socket.SOCK_DGRAM)[0][-1][0] + except Exception: + pass + try: + bmc6 = socket.getaddrinfo(bmc, 0, socket.AF_INET6, socket.SOCK_DGRAM)[0][-1][0] + except Exception: + pass attribs = {} for attrib in cfd: val = cfd[attrib].get('value', None) @@ -346,6 +360,12 @@ def get_full_net_config(configmanager, node, serverip=None): for netname in sorted(attribs): ppool.spawn(nm.process_attribs, netname, attribs[netname]) ppool.waitall() + for iface in list(nm.myattribs): + if bmc4 and nm.myattribs[iface].get('ipv4_address', None) == bmc4: + del nm.myattribs[iface] + continue + if bmc6 and nm.myattribs[iface].get('ipv6_address', None) == bmc6: + del nm.myattribs[iface] retattrs = {} if None in nm.myattribs: retattrs['default'] = nm.myattribs[None] @@ -454,6 +474,19 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, clientipn = socket.inet_pton(clientfam, clientip) nodenetattribs = configmanager.get_node_attributes( node, 'net*').get(node, {}) + bmc = configmanager.get_node_attributes( + node, 'hardwaremanagement.manager').get(node, {}).get('hardwaremanagement.manager', {}).get('value', None) + bmc4 = None + bmc6 = None + if bmc: + try: + bmc4 = socket.getaddrinfo(bmc, 0, socket.AF_INET, socket.SOCK_DGRAM)[0][-1][0] + except Exception: + pass + try: + bmc6 = socket.getaddrinfo(bmc, 0, socket.AF_INET6, socket.SOCK_DGRAM)[0][-1][0] + except Exception: + pass cfgbyname = {} for attrib in nodenetattribs: segs = attrib.split('.') @@ -554,6 +587,10 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, continue candgw = cfgbyname[candidate].get('ipv{}_gateway'.format(nver), None) if candip: + if bmc4 and candip == bmc4: + continue + if bmc6 and candip == bmc6: + continue try: for inf in socket.getaddrinfo(candip, 0, fam, socket.SOCK_STREAM): candipn = socket.inet_pton(fam, inf[-1][0]) From cea87d012cf4d1cb4079e5e325789b36ec59342f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Mar 2025 10:51:02 -0500 Subject: [PATCH 036/413] Fix missing import from prepfish example. --- misc/prepfish.py | 1 + 1 file changed, 1 insertion(+) diff --git a/misc/prepfish.py b/misc/prepfish.py index 00c5c2e1..5c6ece7a 100644 --- a/misc/prepfish.py +++ b/misc/prepfish.py @@ -18,6 +18,7 @@ import fcntl import json from select import select import glob +import hashlib import socket import struct import os From 02ec40092e16523a3bb5c8475e4f45dd71c02ee7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Mar 2025 15:12:59 -0500 Subject: [PATCH 037/413] Have genesis optionally GUI capable Have Genesis work with newer distribution base, also support seatd/sway when genesis contains it. --- .../initramfs/opt/confluent/bin/rungenesis | 19 +++++++++++++++---- .../lib/dracut/hooks/cmdline/10-genesis.sh | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index ebf0a380..de9b4305 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -1,5 +1,20 @@ #!/bin/bash +if [ ! -x /usr/libexec/platform-python ]; then + ln -s /usr/bin/python3 /usr/libexec/platform-python +fi export LANG=en_US.utf8 + +if [ -x /usr/bin/seatd-launch -a -x /usr/bin/sway ]; then + export XDG_RUNTIME_DIR=/run/users/0 + mkdir -p $XDG_RUNTIME_DIR + sed -i '/^output /d' /etc/sway/config + echo 'exec foot -t XTERM -T Terminal tmux a' > /etc/sway/config.d/genesis +else + (while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & +fi + + + mkdir -p /etc/confluent if ! grep console= /proc/cmdline >& /dev/null; then autocons=$(/opt/confluent/bin/autocons) @@ -9,13 +24,9 @@ if ! grep console= /proc/cmdline >& /dev/null; then if [ ! -z "$autocons" ]; then echo "Using $(cat /tmp/01-autocons.conf)" (while :; do TERM=xterm-256color tmux a <> $autocons >&0 2>&1; done) & - (while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & - else - (while :; do TERM=linux tmux a <> /dev/console >&0 2>&1; done) & fi else (while :; do TERM=xterm-256color tmux a <> /dev/console >&0 2>&1; done) & - (while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & fi (while :; do TERM=linux tmux <> /dev/tty2 >&0 2>&1; done) & echo -n "udevd: " diff --git a/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh b/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh index b5cfd961..6f25d910 100644 --- a/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh +++ b/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh @@ -7,7 +7,7 @@ echo PS1="'"'[genesis running on \H \w]$ '"'" >> ~/.bash_profile mkdir -p /etc/ssh mkdir -p /var/tmp/ mkdir -p /var/empty/sshd -sed -i '/^root:x/d' /etc/passwd +sed -i '/^root:/d' /etc/passwd echo root:x:0:0::/:/bin/bash >> /etc/passwd echo sshd:x:30:30:SSH User:/var/empty/sshd:/sbin/nologin >> /etc/passwd tmux new-session -d sh /opt/confluent/bin/rungenesis From f458c15677c33c00e1e357d10dacf814b5ccfa72 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Mar 2025 16:11:28 -0500 Subject: [PATCH 038/413] Correct launching of GUI in genesis --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index de9b4305..1d87e978 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -8,7 +8,8 @@ if [ -x /usr/bin/seatd-launch -a -x /usr/bin/sway ]; then export XDG_RUNTIME_DIR=/run/users/0 mkdir -p $XDG_RUNTIME_DIR sed -i '/^output /d' /etc/sway/config - echo 'exec foot -t XTERM -T Terminal tmux a' > /etc/sway/config.d/genesis + echo 'exec foot -t xterm -T Terminal tmux a' > /etc/sway/config.d/genesis + (while :; do seatd-launch sway <> /dev/tty1 >& /dev/null; done) & else (while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & fi From 2bc347fc2a3828097e6f3d66f759aa8c6730c4e9 Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Wed, 5 Mar 2025 04:17:45 +0100 Subject: [PATCH 039/413] Support FQDN first (Fix #167) --- confluent_client/bin/confluent2hosts | 9 ++++++++- confluent_client/doc/man/confluent2hosts.ronn | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/confluent2hosts b/confluent_client/bin/confluent2hosts index b467e5cc..d21bc643 100644 --- a/confluent_client/bin/confluent2hosts +++ b/confluent_client/bin/confluent2hosts @@ -118,6 +118,7 @@ def main(): ap.add_argument('-a', '--attrib', help='Pull ip addresses and hostnames from attribute database', action='store_true') ap.add_argument('-i', '--ip', help='Expression to generate addresses (e.g. 172.16.1.{n1} or fd2b:246f:8a50::{n1:x})') ap.add_argument('-n', '--name', help='Expression for name to add ({node}-compute, etc). If unspecified, "{node} {node}.{dns.domain}" will be used', action='append') + ap.add_argument('-f', '--fqdn-first', help='Put the FQDN first in the hosts entries', action='store_true') args = ap.parse_args() c = client.Command() if args.name: @@ -173,7 +174,13 @@ def main(): break else: for name in list(names): - names.append('{0}.{1}'.format(name, mydomain)) + fqdn = '{0}.{1}'.format(name, mydomain) + if args.fqdn_first: + # Insert FQDN at the beginning if --fqdn-first flag is set + names.insert(0, fqdn) + else: + # Otherwise, append FQDN at the end (original behavior) + names.append(fqdn) names = ' '.join(names) merger.add_entry(ipdb[node][currnet], names) merger.write_out('/etc/whatnowhosts') diff --git a/confluent_client/doc/man/confluent2hosts.ronn b/confluent_client/doc/man/confluent2hosts.ronn index a20c870d..e57401be 100644 --- a/confluent_client/doc/man/confluent2hosts.ronn +++ b/confluent_client/doc/man/confluent2hosts.ronn @@ -13,7 +13,7 @@ noderange. There are two general approaches. It can be used ad-hoc, using -i and -n to specify the address and name portions respectively. This accepts the standard confluent expression syntax, allowing for things like 172.30.1.{n1} or {node}.{dns.domain} or {bmc}. -It can also read from the confluent db, using `-a`. In this mode, each net.. group is pulled together into hosts lines. ipv4_address and ipv6_address fields are associated with the corresponding hostname attributes. +It can also read from the confluent db, using `-a`. In this mode, each net.. group is pulled together into hosts lines. ipv4_address and ipv6_address fields are associated with the corresponding hostname attributes. You can use `-f` to put the FQDN first. ## EXAMPLES From 7b4063a42fa225a513899e4479a1976d9c28a5d7 Mon Sep 17 00:00:00 2001 From: Tinashe Date: Wed, 5 Mar 2025 11:12:31 -0500 Subject: [PATCH 040/413] modify redirection code for rhel --- .../common/profile/scripts/sample/consoleredirect | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect index 4ebc3a8f..1b80fa70 100644 --- a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect +++ b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect @@ -2,7 +2,7 @@ is_suse=false is_rhel=false if test -f /boot/efi/EFI/redhat/grub.cfg; then - grubcfg="/boot/efi/EFI/redhat/grub.cfg" + grubcfg="/etc/default/grub" grub2-mkconfig -o $grubcfg is_rhel=true elif test -f /boot/efi/EFI/sle_hpc/grub.cfg; then @@ -45,5 +45,6 @@ fi # Working on Redhat if $is_rhel; then - sed -i 's,^serial,#serial, ; s,^terminal,#terminal,' $grubcfg + sed -i '/^GRUB_TERMINAL/s/serial //' $grubcfg + grub2-mkconfig -o /boot/grub2/grub.cfg fi \ No newline at end of file From 1bfad11ee53799e8db4d7c4672bc6c7639486c0c Mon Sep 17 00:00:00 2001 From: Tinashe Date: Wed, 5 Mar 2025 11:19:34 -0500 Subject: [PATCH 041/413] remove-mkconfig --- confluent_osdeploy/common/profile/scripts/sample/consoleredirect | 1 - 1 file changed, 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect index 1b80fa70..d761817e 100644 --- a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect +++ b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect @@ -3,7 +3,6 @@ is_rhel=false if test -f /boot/efi/EFI/redhat/grub.cfg; then grubcfg="/etc/default/grub" - grub2-mkconfig -o $grubcfg is_rhel=true elif test -f /boot/efi/EFI/sle_hpc/grub.cfg; then grubcfg="/boot/efi/EFI/sle_hpc/grub.cfg" From e5b1b5d3a0876ac33f83f2609a9bf89a3ad40022 Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Wed, 5 Mar 2025 03:31:02 +0100 Subject: [PATCH 042/413] Implement YAML support for confluentdbutil (fixes #152) --- confluent_client/doc/man/confluentdbutil.ronn | 3 + confluent_server/bin/confluentdbutil | 16 +- .../confluent/config/configmanager.py | 143 ++++++++++++++---- 3 files changed, 129 insertions(+), 33 deletions(-) diff --git a/confluent_client/doc/man/confluentdbutil.ronn b/confluent_client/doc/man/confluentdbutil.ronn index abb47566..b73ce8f5 100644 --- a/confluent_client/doc/man/confluentdbutil.ronn +++ b/confluent_client/doc/man/confluentdbutil.ronn @@ -38,5 +38,8 @@ the json files (password protected, removed from the files, or unprotected). keys do not change and as such they do not require incremental backup. +* `-y`, `--yaml + Use YAML instead of JSON as file format + * `-h`, `--help`: Show help message and exit diff --git a/confluent_server/bin/confluentdbutil b/confluent_server/bin/confluentdbutil index e74c2ab4..37e95c18 100755 --- a/confluent_server/bin/confluentdbutil +++ b/confluent_server/bin/confluentdbutil @@ -50,6 +50,8 @@ argparser.add_option('-s', '--skipkeys', action='store_true', 'protected keys.json file, and only the protected ' 'data is needed. keys do not change and as such ' 'they do not require incremental backup') +argparser.add_option('-y', '--yaml', action='store_true', + help='Use YAML instead of JSON as file format') (options, args) = argparser.parse_args() if len(args) != 2 or args[0] not in ('dump', 'restore', 'merge'): argparser.print_help() @@ -73,9 +75,16 @@ if args[0] in ('restore', 'merge'): cfm.init(stateless) cfm.statelessmode = stateless skipped = {'nodes': [], 'nodegroups': []} + + # Use the format parameter based on the --yaml option + format = 'yaml' if options.yaml else 'json' + cfm.restore_db_from_directory( dumpdir, password, - merge="skip" if args[0] == 'merge' else False, skipped=skipped) + merge="skip" if args[0] == 'merge' else False, + skipped=skipped, + format=format) + if skipped['nodes']: skippedn = ','.join(skipped['nodes']) print('The following nodes were skipped during merge: ' @@ -114,8 +123,11 @@ elif args[0] == 'dump': main._initsecurity(conf.get_config()) if not os.path.exists(dumpdir): os.makedirs(dumpdir) + + # Use the format parameter based on the --yaml option + format = 'yaml' if options.yaml else 'json' cfm.dump_db_to_directory(dumpdir, password, options.redact, - options.skipkeys) + options.skipkeys, format=format) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 506ed85a..e505e49c 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -102,7 +102,7 @@ try: unicode except NameError: unicode = str - +import yaml _masterkey = None _masterintegritykey = None @@ -2945,12 +2945,30 @@ def _dump_keys(password, dojson=True): return keydata -def restore_db_from_directory(location, password, merge=False, skipped=None): +def restore_db_from_directory(location, password, merge=False, skipped=None, format='json'): + """Restore database from a directory + + :param location: Directory containing the configuration + :param password: Password to decrypt sensitive data + :param merge: If True, merge with existing configuration + :param skipped: List of elements to skip during restore + :param format: Format of the files ('json' [default] or 'yaml') + """ + if format not in ('json', 'yaml'): + raise ValueError("Format must be 'json' or 'yaml'") + kdd = None try: - with open(os.path.join(location, 'keys.json'), 'r') as cfgfile: + keys_file = os.path.join(location, f'keys.{format}') + with open(keys_file, 'r') as cfgfile: keydata = cfgfile.read() - kdd = json.loads(keydata) + if format == 'json': + kdd = json.loads(keydata) + else: + kdd = yaml.safe_load(keydata) + if kdd is None: + raise ValueError(f"Invalid or empty YAML content in {keys_file}") + if merge: if 'cryptkey' in kdd: kdd['cryptkey'] = _parse_key(kdd['cryptkey'], password) @@ -2959,59 +2977,122 @@ def restore_db_from_directory(location, password, merge=False, skipped=None): else: kdd['integritykey'] = None # GCM else: + if format == 'json': + _restore_keys(keydata, password) + else: + # Convert YAML to JSON string for _restore_keys + _restore_keys(json.dumps(kdd), password) kdd = None - _restore_keys(keydata, password) except IOError as e: if e.errno == 2: raise Exception("Cannot restore without keys, this may be a " "redacted dump") if not merge: try: - moreglobals = json.load(open(os.path.join(location, 'globals.json'))) - for globvar in moreglobals: - set_global(globvar, moreglobals[globvar]) + globals_file = os.path.join(location, f'globals.{format}') + with open(globals_file, 'r') as globin: + if format == 'json': + moreglobals = json.load(globin) + else: + moreglobals = yaml.safe_load(globin) + if moreglobals is None: + raise ValueError(f"Invalid or empty YAML content in {globals_file}") + + for globvar in moreglobals: + set_global(globvar, moreglobals[globvar]) except IOError as e: if e.errno != 2: raise try: - collective = json.load(open(os.path.join(location, 'collective.json'))) - _cfgstore['collective'] = {} - for coll in collective: - add_collective_member(coll, collective[coll]['address'], - collective[coll]['fingerprint']) + collective_file = os.path.join(location, f'collective.{format}') + with open(collective_file, 'r') as collin: + if format == 'json': + collective = json.load(collin) + else: + collective = yaml.safe_load(collin) + if collective is None: + raise ValueError(f"Invalid or empty YAML content in {collective_file}") + + _cfgstore['collective'] = {} + for coll in collective: + add_collective_member(coll, collective[coll]['address'], + collective[coll]['fingerprint']) except IOError as e: if e.errno != 2: raise - with open(os.path.join(location, 'main.json'), 'r') as cfgfile: + main_file = os.path.join(location, f'main.{format}') + with open(main_file, 'r') as cfgfile: cfgdata = cfgfile.read() + if format == 'yaml': + # Convert YAML to JSON string for _load_from_json + yaml_data = yaml.safe_load(cfgdata) + if yaml_data is None: + raise ValueError(f"Invalid or empty YAML content in {main_file}") + cfgdata = json.dumps(yaml_data) ConfigManager(tenant=None)._load_from_json(cfgdata, merge=merge, keydata=kdd, skipped=skipped) ConfigManager.wait_for_sync(True) - -def dump_db_to_directory(location, password, redact=None, skipkeys=False): +def dump_db_to_directory(location, password, redact=None, skipkeys=False, format='json'): + """Dump database to a directory + + :param location: Directory to store the configuration + :param password: Password to protect sensitive data + :param redact: If True, redact sensitive data + :param skipkeys: If True, skip dumping keys + :param format: Format to use for dumping ('json' [default] or 'yaml') + """ + if format not in ('json', 'yaml'): + raise ValueError("Format must be 'json' or 'yaml'") + + # Handle keys file if not redact and not skipkeys: - with open(os.path.join(location, 'keys.json'), 'w') as cfgfile: - cfgfile.write(_dump_keys(password)) + with open(os.path.join(location, f'keys.{format}'), 'w') as cfgfile: + if format == 'json': + cfgfile.write(_dump_keys(password)) + else: + keydata = _dump_keys(password, dojson=False) + yaml.dump(keydata, cfgfile, default_flow_style=False) cfgfile.write('\n') - with open(os.path.join(location, 'main.json'), 'wb') as cfgfile: - cfgfile.write(ConfigManager(tenant=None)._dump_to_json(redact=redact)) - cfgfile.write(b'\n') + + # Handle main config + main_data = ConfigManager(tenant=None)._dump_to_json(redact=redact) + with open(os.path.join(location, f'main.{format}'), 'wb' if format == 'json' else 'w') as cfgfile: + if format == 'json': + cfgfile.write(main_data) + cfgfile.write(b'\n') + else: + # Convert JSON to Python object, then dump as YAML + yaml.dump(json.loads(main_data.decode('utf-8')), cfgfile, default_flow_style=False) + + # Handle collective data if 'collective' in _cfgstore: - with open(os.path.join(location, 'collective.json'), 'w') as cfgfile: - cfgfile.write(json.dumps(_cfgstore['collective'])) - cfgfile.write('\n') + with open(os.path.join(location, f'collective.{format}'), 'w') as cfgfile: + if format == 'json': + cfgfile.write(json.dumps(_cfgstore['collective'])) + cfgfile.write('\n') + else: + yaml.dump(_cfgstore['collective'], cfgfile, default_flow_style=False) + + # Handle globals bkupglobals = get_globals() if bkupglobals: - with open(os.path.join(location, 'globals.json'), 'w') as globout: - json.dump(bkupglobals, globout) + with open(os.path.join(location, f'globals.{format}'), 'w') as globout: + if format == 'json': + json.dump(bkupglobals, globout) + else: + yaml.dump(bkupglobals, globout, default_flow_style=False) + + # Handle tenants try: for tenant in os.listdir( os.path.join(ConfigManager._cfgdir, '/tenants/')): - with open(os.path.join(location, 'tenants', tenant, - 'main.json'), 'w') as cfgfile: - cfgfile.write(ConfigManager(tenant=tenant)._dump_to_json( - redact=redact)) - cfgfile.write('\n') + tenant_data = ConfigManager(tenant=tenant)._dump_to_json(redact=redact) + with open(os.path.join(location, 'tenants', tenant, f'main.{format}'), 'wb' if format == 'json' else 'w') as cfgfile: + if format == 'json': + cfgfile.write(tenant_data) + cfgfile.write(b'\n') + else: + yaml.dump(json.loads(tenant_data.decode('utf-8')), cfgfile, default_flow_style=False) except OSError: pass From 95952b523175d6f56d34751165e0da658ad0a978 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 5 Mar 2025 13:13:34 -0500 Subject: [PATCH 043/413] Detect active nic in multi-nic BMC discovery --- .../discovery/handlers/redfishbmc.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/redfishbmc.py b/confluent_server/confluent/discovery/handlers/redfishbmc.py index 5f3c34fb..b8764361 100644 --- a/confluent_server/confluent/discovery/handlers/redfishbmc.py +++ b/confluent_server/confluent/discovery/handlers/redfishbmc.py @@ -276,7 +276,25 @@ class NodeHandler(generic.NodeHandler): continue actualnics.append(candnic) if len(actualnics) != 1: - raise Exception("Multi-interface BMCs are not supported currently") + compip = self.ipaddr + if ':' in compip: + compip = compip.split('%')[0] + ipkey = 'IPv6Addresses' + else: + ipkey = 'IPv6Addresses' + actualnic = None + for curractnic in actualnics: + currnicinfo = wc.grab_json_response(curractnic) + for targipaddr in currnicinfo.get(ipkey, []): + targipaddr = targipaddr.get('Address', 'Z') + if compip == targipaddr: + actualnic = curractnic + break + if actualnic: + break + else: + raise Exception("Unable to detect active NIC of multi-nic bmc") + actualnics = [actualnic] currnet = wc.grab_json_response(actualnics[0]) netconfig = netutil.get_nic_config(self.configmanager, nodename, ip=newip) newconfig = { From b4b011663e8a5c96ef72380b257919bcfd4c6d37 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 5 Mar 2025 17:14:28 -0500 Subject: [PATCH 044/413] Handle more forms of confluent headers Some frameworks won't allow headers through, normalize case and normalize _ presence. --- confluent_server/confluent/httpapi.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 2fbc700b..0b2abb4d 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -629,8 +629,11 @@ def wsock_handler(ws): def resourcehandler(env, start_response): for hdr in env['headers_raw']: - if hdr[0].startswith('CONFLUENT_'): - env['HTTP_' + hdr[0]] = hdr[1] + if hdr[0].lower().startswith('confluent'): + hdrname = hdr[0].upper() + if '_' not in hdrname: + hdrname = hdrname.replace('CONFLUENT', 'CONFLUENT_') + env['HTTP_' + hdrname] = hdr[1] try: if 'HTTP_SEC_WEBSOCKET_VERSION' in env: for rsp in wsock_handler(env, start_response): From 28c929aec677573653569c750b26965782cce624 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 5 Mar 2025 17:14:55 -0500 Subject: [PATCH 045/413] Have a draft apiclient in golang --- confluent_osdeploy/utils/goapiclient/go.mod | 5 ++ confluent_osdeploy/utils/goapiclient/go.sum | 4 ++ confluent_osdeploy/utils/goapiclient/main.go | 57 ++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 confluent_osdeploy/utils/goapiclient/go.mod create mode 100644 confluent_osdeploy/utils/goapiclient/go.sum create mode 100644 confluent_osdeploy/utils/goapiclient/main.go diff --git a/confluent_osdeploy/utils/goapiclient/go.mod b/confluent_osdeploy/utils/goapiclient/go.mod new file mode 100644 index 00000000..47f2cb65 --- /dev/null +++ b/confluent_osdeploy/utils/goapiclient/go.mod @@ -0,0 +1,5 @@ +module confluentapiclient + +go 1.22 + +toolchain go1.23.6 diff --git a/confluent_osdeploy/utils/goapiclient/go.sum b/confluent_osdeploy/utils/goapiclient/go.sum new file mode 100644 index 00000000..91d036f4 --- /dev/null +++ b/confluent_osdeploy/utils/goapiclient/go.sum @@ -0,0 +1,4 @@ +github.com/go-crypt/crypt v0.3.2 h1:I4i0u2g8X9bxCXIjvv19BDVXqQbddDQrURCJrOyyJos= +github.com/go-crypt/crypt v0.3.2/go.mod h1:U0YhpCizEtaVC4gVfUUN0qGn1Z6+e3at+B5uLYx/sV0= +github.com/go-crypt/x v0.3.2 h1:m2wn2+8tp28V4yDiW5NSTiyNSXnCoTs1R1+H+cAJA3M= +github.com/go-crypt/x v0.3.2/go.mod h1:uelN9rbD2e2eqE8KA26B9R6OQ0TdM6msWdPsoMM1ZFk= diff --git a/confluent_osdeploy/utils/goapiclient/main.go b/confluent_osdeploy/utils/goapiclient/main.go new file mode 100644 index 00000000..91ae02c3 --- /dev/null +++ b/confluent_osdeploy/utils/goapiclient/main.go @@ -0,0 +1,57 @@ +package main + +import ( + "flag" + "fmt" + "io" + "os" + "net/http" + "crypto/x509" + "crypto/tls" +) + +func main() { + certauthority := flag.String("c", "/etc/confluent/ca.pem", "Certificate authorities to use, in PEM format") + targurl := flag.String("u", "", "Url to connect to") + keyfile := flag.String("k", "/etc/confluent/confluent.apikey", "Confluent API key file") + nodename := flag.String("n", "", "Node Name") + usejson := flag.Bool("j", false, "Use JSON") + flag.Parse() + certpool := x509.NewCertPool() + currcacerts, err := os.ReadFile(*certauthority) + if err != nil { + panic(err) + } + confluentapikey, err := os.ReadFile(*keyfile) + if confluentapikey[len(confluentapikey) - 1] == 0xa { + confluentapikey = confluentapikey[:len(confluentapikey)-1] + } + if err != nil { + panic(err) + } + certpool.AppendCertsFromPEM(currcacerts) + + client := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + RootCAs: certpool, + }, + }, + } + rq, err := http.NewRequest(http.MethodGet, *targurl, nil) + if err != nil { panic(err )} + if *usejson { rq.Header.Set("Accept", "application/json") } + if *nodename == "" { + *nodename, err = os.Hostname() + } + rq.Header.Set("CONFLUENT_NODENAME", *nodename) + fmt.Println(string(confluentapikey)) + rq.Header.Set("CONFLUENT_APIKEY", string(confluentapikey)) + if err != nil { panic(err )} + rsp, err := client.Do(rq) + if err != nil { panic(err )} + rspdata, err := io.ReadAll(rsp.Body) + rsptxt := string(rspdata) + fmt.Println(rsptxt) +} + From 13a649310096fde45aaff107b3ebfa337b4dad7e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Mar 2025 17:16:13 -0500 Subject: [PATCH 046/413] Add a general utility for confluent in golang --- .../utils/confusebox/apiclient.go | 133 ++++++++++++++++++ .../utils/confusebox/genpasshmac.go | 44 ++++++ confluent_osdeploy/utils/confusebox/go.mod | 7 + confluent_osdeploy/utils/confusebox/go.sum | 4 + confluent_osdeploy/utils/confusebox/main.go | 86 +++++++++++ 5 files changed, 274 insertions(+) create mode 100644 confluent_osdeploy/utils/confusebox/apiclient.go create mode 100644 confluent_osdeploy/utils/confusebox/genpasshmac.go create mode 100644 confluent_osdeploy/utils/confusebox/go.mod create mode 100644 confluent_osdeploy/utils/confusebox/go.sum create mode 100644 confluent_osdeploy/utils/confusebox/main.go diff --git a/confluent_osdeploy/utils/confusebox/apiclient.go b/confluent_osdeploy/utils/confusebox/apiclient.go new file mode 100644 index 00000000..81360a0c --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/apiclient.go @@ -0,0 +1,133 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "os" + "net/http" + "crypto/x509" + "crypto/tls" + "strings" + "errors" +) +type ApiClient struct { + server string + urlserver string + apikey string + nodename string + webclient *http.Client +} + +func NewApiClient(cafile string, keyfile string, nodename string, server string) (*ApiClient, error) { + currcacerts, err := os.ReadFile(cafile) + if err != nil { + return nil, err + } + cacerts := x509.NewCertPool() + cacerts.AppendCertsFromPEM(currcacerts) + apikey := []byte("") + if keyfile != "" { + apikey, err = os.ReadFile(keyfile) + if err != nil { + return nil, err + } + if apikey[len(apikey) - 1] == 0xa { + apikey = apikey[:len(apikey)-1] + } + } + if nodename == "" { + cinfo, err := os.ReadFile("/etc/confluent/confliuent.info") + if err != nil { + nodename, err = os.Hostname() + if err != nil { return nil, err } + } + cinfolines := bytes.Split(cinfo, []byte("\n")) + if bytes.Contains(cinfolines[0], []byte("NODENAME")) { + cnodebytes := bytes.Split(cinfolines[0], []byte(" ")) + nodename = string(cnodebytes[0]) + } + } + urlserver := server + if strings.Contains(server, ":") { + if strings.Contains(server, "%") && !strings.Contains(server, "%25") { + server = strings.Replace(server, "%", "%25", 1) + } + urlserver = fmt.Sprintf("[%s]", server) + if strings.Contains(server, "%") { + server = server[:strings.Index(server, "%")] + } + } + webclient := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + RootCAs: cacerts, + ServerName: server, + }, + }, + } + vc := ApiClient{server, urlserver, string(apikey), nodename, webclient} + return &vc, nil +} + +func (apiclient *ApiClient) RegisterKey(crypted string, hmac string) (error) { + cryptbytes := []byte(crypted) + _, err := apiclient.request("/confluent-api/self/registerapikey", "", &cryptbytes, "", hmac) + return err +} + +func (apiclient *ApiClient) Fetch(url string, outputfile string, mime string) (error) { + outp, err := os.Create(outputfile) + if err != nil { return err } + defer outp.Close() + rsp, err := apiclient.request(url, mime, nil, "", "") + if err != nil { return err } + _, err = io.Copy(outp, rsp) + return err +} + +func (apiclient *ApiClient) GrabText(url string, mime string) (string, error){ + rsp, err := apiclient.request(url, mime, nil, "", "") + if err != nil { return "", err } + rspdata, err := io.ReadAll(rsp) + if err != nil { return "", err } + rsptxt := string(rspdata) + return rsptxt, nil +} + +func (apiclient *ApiClient) request(url string, mime string, body *[]byte, method string, hmac string) (io.ReadCloser, error) { + if ! strings.Contains(url, "https://") { + url = fmt.Sprintf("https://%s%s", apiclient.urlserver, url) + } + if method == "" { + if body != nil { + method = http.MethodPost + } else { + method = http.MethodGet + } + } + var err error + var rq *http.Request + if body == nil { + rq, err = http.NewRequest(method, url, nil) + } else { + rq, err = http.NewRequest(method, url, bytes.NewBuffer(*body)) + } + if err != nil { return nil, err } + if (mime != "") { rq.Header.Set("Accept", mime) } + rq.Header.Set("CONFLUENT_NODENAME", apiclient.nodename) + if len(hmac) > 0 { + rq.Header.Set("CONFLUENT_CRYPTHMAC", hmac) + } else { + + rq.Header.Set("CONFLUENT_APIKEY", apiclient.apikey) + } + rsp, err := apiclient.webclient.Do(rq) + if err != nil { return nil, err } + if rsp.StatusCode >= 300 { + err = errors.New(rsp.Status) + return nil, err + } + return rsp.Body, err +} + diff --git a/confluent_osdeploy/utils/confusebox/genpasshmac.go b/confluent_osdeploy/utils/confusebox/genpasshmac.go new file mode 100644 index 00000000..3e8fabf0 --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/genpasshmac.go @@ -0,0 +1,44 @@ +package main + +import ( + "bytes" + "github.com/go-crypt/crypt/algorithm/shacrypt" + "os" + "crypto/rand" + "encoding/base64" + "crypto/hmac" + "crypto/sha256" +) + +func genpasshmac(hmackeyfile string) (string, string, string, error) { + randbytes := make([]byte, 36) + _, err := rand.Read(randbytes) + if err != nil { + panic(err) + } + password := base64.StdEncoding.EncodeToString(randbytes) + hasher, err := shacrypt.New(shacrypt.WithVariant(shacrypt.VariantSHA256), shacrypt.WithIterations(5000)) + if err != nil { + panic(err) + } + + digest, err := hasher.Hash(password) + if err != nil { + panic(err) + } + cryptpass := digest.Encode() + hmackey, err := os.ReadFile(hmackeyfile) + if err != nil { return "", "", "", err } + keylines := bytes.Split(hmackey, []byte("\n")) + if bytes.Contains(keylines[0], []byte("apitoken:")) { + keyparts := bytes.Split(keylines[0], []byte(" ")) + hmackey = keyparts[1] + } + + hmacer := hmac.New(sha256.New, hmackey) + hmacer.Write([]byte(cryptpass)) + hmacresult := hmacer.Sum(nil) + hmacout := base64.StdEncoding.EncodeToString(hmacresult) + return password, cryptpass, hmacout, nil +} + diff --git a/confluent_osdeploy/utils/confusebox/go.mod b/confluent_osdeploy/utils/confusebox/go.mod new file mode 100644 index 00000000..424bbb1c --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/go.mod @@ -0,0 +1,7 @@ +module confusebox + +go 1.23.6 + +require github.com/go-crypt/crypt v0.3.3 + +require github.com/go-crypt/x v0.3.4 // indirect diff --git a/confluent_osdeploy/utils/confusebox/go.sum b/confluent_osdeploy/utils/confusebox/go.sum new file mode 100644 index 00000000..cae66a5c --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/go.sum @@ -0,0 +1,4 @@ +github.com/go-crypt/crypt v0.3.3 h1:mBSh8U+vwDm3V+UHNMQqsxV0clzlvKbLcJXcafYFpCs= +github.com/go-crypt/crypt v0.3.3/go.mod h1:ex5C1b58/tzCW6/rJfcdf5Y2TjgzmWVtX57sjpN3pUQ= +github.com/go-crypt/x v0.3.4 h1:zgpaI55VOAbkkRup9+tLaZ02IWTV/xz63tohoY0t9+Y= +github.com/go-crypt/x v0.3.4/go.mod h1:+uHWqfzD3S6YWxm18/Qp+4VcuBb0Le9dGUhX0zaWicU= diff --git a/confluent_osdeploy/utils/confusebox/main.go b/confluent_osdeploy/utils/confusebox/main.go new file mode 100644 index 00000000..1cc87413 --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/main.go @@ -0,0 +1,86 @@ +package main + +import ( + "bytes" + "flag" + "os" + "fmt" +) + +func main() { + var nodename string + var cacerts string + var apikey string + var usejson bool + var confluentsrv string + hmacreg := flag.NewFlagSet("hmacregister", flag.ExitOnError) + hmacreg.StringVar(&apikey, "k", "/etc/confluent/apikey", "Output file for the api key") + hmacKey := hmacreg.String("i", "", "Identity yaml file") + hmacreg.StringVar(&cacerts, "c", "/etc/confluent/ca.pem", "Certeficate authorities to use in PEM") + hmacreg.StringVar(&nodename, "n", "", "Node name") + hmacreg.StringVar(&confluentsrv, "s", "", "Confluent server to request from") + + invokeapi := flag.NewFlagSet("invoke", flag.ExitOnError) + invokeapi.StringVar(&nodename, "n", "", "Node name") + + invokeapi.StringVar(&cacerts, "c", "/etc/confluent/ca.pem", "Certeficate authorities to use in PEM") + invokeapi.StringVar(&apikey, "k", "/etc/confluent/confluent.apikey", "File containing Confluent API key") + invokeapi.BoolVar(&usejson, "j", false, "Request JSON formatted reply") + outputfile := invokeapi.String("o", "", "Filename to store download to") + invokeapi.StringVar(&confluentsrv, "s", "", "Confluent server to request from") + + if confluentsrv == "" { + dcfg, err := os.ReadFile("/etc/confluent/confluent.deploycfg") + if err == nil { + dcfglines := bytes.Split(dcfg, []byte("\n")) + for _, dcfgline := range(dcfglines) { + dkeyval := bytes.Split(dcfgline, []byte(" ")) + if bytes.Contains(dkeyval[0], []byte("deploy_server")) && (bytes.Contains(dkeyval[1], []byte(".")) || bytes.Contains(dkeyval[1], []byte(":"))) { + confluentsrv = string(dkeyval[1]) + } + } + + + } else { + _, err := os.ReadFile("/etc/confluent/confluent.info") + if err != nil { + panic("Unable to determine Confluent server") + } + } + } + + if len(os.Args) < 2 { + panic("Insufficient arguments, no subcommand") + } + switch os.Args[1] { + case "hmacregister": + hmacreg.Parse(os.Args[2:]) + password, crypted, hmac, err := genpasshmac(*hmacKey) + if err != nil { panic(err) } + //apiclient(cacerts, "/confluent-api/self/registerapikey", apikey, nodename, usejson) + apiclient, err := NewApiClient(cacerts, "", nodename, confluentsrv) + if err != nil { panic(err) } + err = apiclient.RegisterKey(crypted, hmac) + if err != nil { panic(err) } + outp, err := os.Create(apikey) + if err != nil { panic(err) } + defer outp.Close() + outp.Write([]byte(password)) + case "invoke": + invokeapi.Parse(os.Args[2:]) + apiclient, err := NewApiClient(cacerts, apikey, nodename, confluentsrv) + if err != nil { panic(err) } + mime := "" + if usejson { + mime = "application/json" + } + if *outputfile != "" { + apiclient.Fetch(invokeapi.Arg(0), *outputfile, mime) + } + rsp, err := apiclient.GrabText(invokeapi.Arg(0), mime) + if err != nil { panic(err) } + fmt.Println(rsp) + default: + panic("Unrecognized subcommand") + } +} From 1f97a5e67d7144ca4f590b60d402a9f530d94ef6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Mar 2025 18:01:55 -0500 Subject: [PATCH 047/413] Add a Makefile for building --- confluent_osdeploy/utils/confusebox/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 confluent_osdeploy/utils/confusebox/Makefile diff --git a/confluent_osdeploy/utils/confusebox/Makefile b/confluent_osdeploy/utils/confusebox/Makefile new file mode 100644 index 00000000..8501cc39 --- /dev/null +++ b/confluent_osdeploy/utils/confusebox/Makefile @@ -0,0 +1,7 @@ +all: confusebox + +confusebox: *.go + go build -ldflags "-w -s" -gcflags=all="-l" -trimpath + +clean: + rm confusebox From 060b81e205fc4b46bf1d5844f333de3d532bd72c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Mar 2025 08:39:28 -0400 Subject: [PATCH 048/413] Fix documentation error in attributes --- confluent_server/confluent/config/attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index ff2aa90a..4f5ed01b 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -557,7 +557,7 @@ node = { 'description': 'Indicates that this interface should be a team and what mode or runner to use when teamed. ' 'If this covers a deployment interface, one of the member interfaces may be brought up as ' 'a standalone interface until deployment is complete, as supported by the OS deployment profile. ' - 'To support this scenario, the switch should be set up to allow independent operation of member ports123654 (e.g. lacp bypass mode or fallback mode).', + 'To support this scenario, the switch should be set up to allow independent operation of member ports (e.g. lacp bypass mode or fallback mode).', 'validvalues': ('lacp', 'loadbalance', 'roundrobin', 'activebackup', 'none') }, 'power.pdu': { From d2011261ab2c9f6284d73e68bcf149ff2e054174 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Mar 2025 09:58:51 -0400 Subject: [PATCH 049/413] Enable creation of bond and dependent tags in one iteration Have a second pass to check interfaces that may be created by the first pass. --- .../common/profile/scripts/confignet | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 64d3dbc0..5bf0871b 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -367,7 +367,7 @@ class NetworkManager(object): args.append(bondcfg[parm]) subprocess.check_call(['nmcli', 'c', 'm', team] + args) - def apply_configuration(self, cfg): + def apply_configuration(self, cfg, lastchance=False): cmdargs = {} cmdargs['connection.autoconnect'] = 'yes' stgs = cfg['settings'] @@ -418,8 +418,9 @@ class NetworkManager(object): iname = list(cfg['interfaces'])[0] ctype = self.devtypes.get(iname, None) if not ctype: - sys.stderr.write("Warning, no device found for interface_name ({0}), skipping setup\n".format(iname)) - return + if lastchance: + sys.stderr.write("Warning, no device found for interface_name ({0}), skipping setup\n".format(iname)) + return 1 if stgs.get('vlan_id', None): vlan = stgs['vlan_id'] if ctype == 'infiniband': @@ -543,8 +544,21 @@ if __name__ == '__main__': nm = NetworkManager(devtypes, dc) elif os.path.exists('/usr/sbin/wicked'): nm = WickedManager() + retrynics = [] for netn in netname_to_interfaces: - nm.apply_configuration(netname_to_interfaces[netn]) + redo = nm.apply_configuration(netname_to_interfaces[netn]) + if redo == 1: + retrynics.append(netn) + if retrynics: + idxmap, devtypes = map_idx_to_name() + if os.path.exists('/usr/sbin/netplan'): + nm = NetplanManager(dc) + if os.path.exists('/usr/bin/nmcli'): + nm = NetworkManager(devtypes, dc) + elif os.path.exists('/usr/sbin/wicked'): + nm = WickedManager() + for netn in retrynics: + nm.apply_configuration(netname_to_interfaces[netn], lastchance=True) if havefirewall: subprocess.check_call(['systemctl', 'start', 'firewalld']) await_tentative() From 9136341bdae68774ac182e3764a88b833fd37632 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Mar 2025 11:38:05 -0400 Subject: [PATCH 050/413] Tolerate different proxy pass configurations The stock reverse proxy configuration strips the leading '/confluent-api/' from the URL. However, when doing a custom reverse proxy set up, one may preserve full path without knowing which way to go. Since '/confluent-api/' will never be used inside the api, just strip it when detected to tolerate either of the likely proxy pass behaviors. --- confluent_server/confluent/httpapi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 0b2abb4d..c47722d4 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -628,6 +628,8 @@ def wsock_handler(ws): def resourcehandler(env, start_response): + if env['PATH_INFO'].startswith('/confluent-api'): + env['PATH_INFO'] = env['PATH_INFO'].replace('/confluent-api', '') for hdr in env['headers_raw']: if hdr[0].lower().startswith('confluent'): hdrname = hdr[0].upper() From 9cecaab055007c0a126c828627fe57ff1c30e85f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Mar 2025 15:04:36 -0400 Subject: [PATCH 051/413] Fix confluent server identification using -s instead of file --- confluent_osdeploy/utils/confusebox/Makefile | 1 + confluent_osdeploy/utils/confusebox/main.go | 46 ++++++++++++-------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/confluent_osdeploy/utils/confusebox/Makefile b/confluent_osdeploy/utils/confusebox/Makefile index 8501cc39..38787071 100644 --- a/confluent_osdeploy/utils/confusebox/Makefile +++ b/confluent_osdeploy/utils/confusebox/Makefile @@ -2,6 +2,7 @@ all: confusebox confusebox: *.go go build -ldflags "-w -s" -gcflags=all="-l" -trimpath + upx --brute confusebox clean: rm confusebox diff --git a/confluent_osdeploy/utils/confusebox/main.go b/confluent_osdeploy/utils/confusebox/main.go index 1cc87413..3ff86563 100644 --- a/confluent_osdeploy/utils/confusebox/main.go +++ b/confluent_osdeploy/utils/confusebox/main.go @@ -7,6 +7,26 @@ import ( "fmt" ) +func get_confluent_server() (string, error) { + var confluentsrv string + dcfg, err := os.ReadFile("/etc/confluent/confluent.deploycfg") + if err == nil { + dcfglines := bytes.Split(dcfg, []byte("\n")) + for _, dcfgline := range(dcfglines) { + dkeyval := bytes.Split(dcfgline, []byte(" ")) + if bytes.Contains(dkeyval[0], []byte("deploy_server")) && (bytes.Contains(dkeyval[1], []byte(".")) || bytes.Contains(dkeyval[1], []byte(":"))) { + confluentsrv = string(dkeyval[1]) + return confluentsrv, nil + } + } + } else { + _, err := os.ReadFile("/etc/confluent/confluent.info") + if err != nil { + return "Unable to determine Confluent server", err + } + } + return "", err +} func main() { var nodename string var cacerts string @@ -29,32 +49,18 @@ func main() { outputfile := invokeapi.String("o", "", "Filename to store download to") invokeapi.StringVar(&confluentsrv, "s", "", "Confluent server to request from") - if confluentsrv == "" { - dcfg, err := os.ReadFile("/etc/confluent/confluent.deploycfg") - if err == nil { - dcfglines := bytes.Split(dcfg, []byte("\n")) - for _, dcfgline := range(dcfglines) { - dkeyval := bytes.Split(dcfgline, []byte(" ")) - if bytes.Contains(dkeyval[0], []byte("deploy_server")) && (bytes.Contains(dkeyval[1], []byte(".")) || bytes.Contains(dkeyval[1], []byte(":"))) { - confluentsrv = string(dkeyval[1]) - } - } - } else { - _, err := os.ReadFile("/etc/confluent/confluent.info") - if err != nil { - panic("Unable to determine Confluent server") - } - } - } - if len(os.Args) < 2 { panic("Insufficient arguments, no subcommand") } switch os.Args[1] { case "hmacregister": + var err error hmacreg.Parse(os.Args[2:]) + if confluentsrv == "" { + confluentsrv, err = get_confluent_server() + } password, crypted, hmac, err := genpasshmac(*hmacKey) if err != nil { panic(err) } //apiclient(cacerts, "/confluent-api/self/registerapikey", apikey, nodename, usejson) @@ -67,7 +73,11 @@ func main() { defer outp.Close() outp.Write([]byte(password)) case "invoke": + var err error invokeapi.Parse(os.Args[2:]) + if confluentsrv == "" { + confluentsrv, err = get_confluent_server() + } apiclient, err := NewApiClient(cacerts, apikey, nodename, confluentsrv) if err != nil { panic(err) } mime := "" From 9123d2f2e014c2213b5ffa79aafc2f6334b0e820 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 11 Mar 2025 15:35:35 -0400 Subject: [PATCH 052/413] Add ability to post bodys to HTTP requests --- confluent_osdeploy/utils/confusebox/apiclient.go | 15 ++++++++------- confluent_osdeploy/utils/confusebox/main.go | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/utils/confusebox/apiclient.go b/confluent_osdeploy/utils/confusebox/apiclient.go index 81360a0c..bbbab0e5 100644 --- a/confluent_osdeploy/utils/confusebox/apiclient.go +++ b/confluent_osdeploy/utils/confusebox/apiclient.go @@ -72,22 +72,23 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) func (apiclient *ApiClient) RegisterKey(crypted string, hmac string) (error) { cryptbytes := []byte(crypted) - _, err := apiclient.request("/confluent-api/self/registerapikey", "", &cryptbytes, "", hmac) + cryptbuffer := bytes.NewBuffer(cryptbytes) + _, err := apiclient.request("/confluent-api/self/registerapikey", "", cryptbuffer, "", hmac) return err } -func (apiclient *ApiClient) Fetch(url string, outputfile string, mime string) (error) { +func (apiclient *ApiClient) Fetch(url string, outputfile string, mime string, body io.Reader) (error) { outp, err := os.Create(outputfile) if err != nil { return err } defer outp.Close() - rsp, err := apiclient.request(url, mime, nil, "", "") + rsp, err := apiclient.request(url, mime, body, "", "") if err != nil { return err } _, err = io.Copy(outp, rsp) return err } -func (apiclient *ApiClient) GrabText(url string, mime string) (string, error){ - rsp, err := apiclient.request(url, mime, nil, "", "") +func (apiclient *ApiClient) GrabText(url string, mime string, body io.Reader) (string, error){ + rsp, err := apiclient.request(url, mime, body, "", "") if err != nil { return "", err } rspdata, err := io.ReadAll(rsp) if err != nil { return "", err } @@ -95,7 +96,7 @@ func (apiclient *ApiClient) GrabText(url string, mime string) (string, error){ return rsptxt, nil } -func (apiclient *ApiClient) request(url string, mime string, body *[]byte, method string, hmac string) (io.ReadCloser, error) { +func (apiclient *ApiClient) request(url string, mime string, body io.Reader, method string, hmac string) (io.ReadCloser, error) { if ! strings.Contains(url, "https://") { url = fmt.Sprintf("https://%s%s", apiclient.urlserver, url) } @@ -111,7 +112,7 @@ func (apiclient *ApiClient) request(url string, mime string, body *[]byte, metho if body == nil { rq, err = http.NewRequest(method, url, nil) } else { - rq, err = http.NewRequest(method, url, bytes.NewBuffer(*body)) + rq, err = http.NewRequest(method, url, body) } if err != nil { return nil, err } if (mime != "") { rq.Header.Set("Accept", mime) } diff --git a/confluent_osdeploy/utils/confusebox/main.go b/confluent_osdeploy/utils/confusebox/main.go index 3ff86563..9ae23959 100644 --- a/confluent_osdeploy/utils/confusebox/main.go +++ b/confluent_osdeploy/utils/confusebox/main.go @@ -4,6 +4,7 @@ import ( "bytes" "flag" "os" + "io" "fmt" ) @@ -48,6 +49,8 @@ func main() { invokeapi.BoolVar(&usejson, "j", false, "Request JSON formatted reply") outputfile := invokeapi.String("o", "", "Filename to store download to") invokeapi.StringVar(&confluentsrv, "s", "", "Confluent server to request from") + invokedata := invokeapi.String("d", "", "Data to submit") + invokedatafile := invokeapi.String("i", "", "File containing data to submit") @@ -74,7 +77,16 @@ func main() { outp.Write([]byte(password)) case "invoke": var err error + var body io.Reader + body = nil invokeapi.Parse(os.Args[2:]) + if *invokedata != "" { + body = bytes.NewBuffer([]byte(*invokedata)) + } + if *invokedatafile != "" { + body, err = os.Open(*invokedatafile) + if err != nil { panic(err) } + } if confluentsrv == "" { confluentsrv, err = get_confluent_server() } @@ -85,9 +97,9 @@ func main() { mime = "application/json" } if *outputfile != "" { - apiclient.Fetch(invokeapi.Arg(0), *outputfile, mime) + apiclient.Fetch(invokeapi.Arg(0), *outputfile, mime, body) } - rsp, err := apiclient.GrabText(invokeapi.Arg(0), mime) + rsp, err := apiclient.GrabText(invokeapi.Arg(0), mime, body) if err != nil { panic(err) } fmt.Println(rsp) default: From 7d7baf0f77ad1c5010988b61bed968a5abd69cbd Mon Sep 17 00:00:00 2001 From: Tinashe Date: Wed, 12 Mar 2025 12:51:42 -0400 Subject: [PATCH 053/413] handle empty timestample --- confluent_client/bin/nodeeventlog | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeeventlog b/confluent_client/bin/nodeeventlog index 680ea400..80d64357 100755 --- a/confluent_client/bin/nodeeventlog +++ b/confluent_client/bin/nodeeventlog @@ -78,8 +78,11 @@ exitcode = 0 def format_event(evt): retparts = [] if 'timestamp' in evt and evt['timestamp'] is not None: - display = dt.strptime(evt['timestamp'], '%Y-%m-%dT%H:%M:%S') - retparts.append(display.strftime('%m/%d/%Y %H:%M:%S')) + try: + display = dt.strptime(evt['timestamp'], '%Y-%m-%dT%H:%M:%S') + retparts.append(display.strftime('%m/%d/%Y %H:%M:%S')) + except ValueError: + display = '' dscparts = [] if evt.get('log_id', None): retparts.append(evt['log_id'] + ':') From 58608016c48dff0ed2f4630983571ec564012f4e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 12 Mar 2025 13:23:00 -0400 Subject: [PATCH 054/413] Add wait for disk bringup to allow media based co-opt of genesis --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index 1d87e978..88b8d39e 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -55,6 +55,13 @@ mkdir -p /etc/pki/tls/certs cat /tls/*.pem > /etc/pki/tls/certs/ca-bundle.crt TRIES=0 touch /etc/confluent/confluent.info +TRIES=5 +echo -n "Waitiing for disks..." +while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do + sleep 1 + TRIES=$((TRIES - 1)) +done +echo "Done" if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then mkdir -p /media/ident mount /dev/disk/by-label/CNFLNT_IDNT /media/ident From 6402861f4ca40771302d0e86a3b2004f2a876431 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Mar 2025 14:22:26 -0400 Subject: [PATCH 055/413] Provide custom node secret attributes This allows for confluent to pass node secret data through. --- .../confluent/config/configmanager.py | 16 ++++++++++------ confluent_server/confluent/httpapi.py | 4 ++-- .../plugins/configuration/attributes.py | 4 ++-- confluent_server/confluent/selfservice.py | 4 +++- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index e505e49c..af9c8a39 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -136,7 +136,7 @@ def attrib_supports_expression(attrib): if not isinstance(attrib, str): attrib = attrib.decode('utf8') attrib = _attraliases.get(attrib, attrib) - if attrib.startswith('secret.') or attrib.startswith('crypted.'): + if attrib.startswith('secret.') or attrib.startswith('crypted.') or attrib.startswith('custom.nodesecret.'): return False return True @@ -1108,6 +1108,10 @@ class _ExpressionFormat(string.Formatter): field_name = val parsed = ast.parse(field_name) val = self._handle_ast_node(parsed.body[0].value) + try: + val = int(val) + except Exception: + pass return format(val, format_spec) def _handle_ast_node(self, node): @@ -1373,7 +1377,7 @@ class ConfigManager(object): attribute, match = expression.split('=') else: raise Exception('Invalid Expression') - if attribute.startswith('secret.'): + if attribute.startswith('secret.') or attribute.startswith('custom.nodesecret.'): raise Exception('Filter by secret attributes is not supported') if attribute_name_is_invalid(attribute): raise ValueError( @@ -2023,10 +2027,10 @@ class ConfigManager(object): newdict = {'value': attribmap[group][attr]} else: newdict = attribmap[group][attr] - if keydata and attr.startswith('secret.') and 'cryptvalue' in newdict: + if keydata and (attr.startswith('secret.') or attr.startswith('custom.nodesecret.')) and 'cryptvalue' in newdict: newdict['value'] = decrypt_value(newdict['cryptvalue'], keydata['cryptkey'], keydata['integritykey']) del newdict['cryptvalue'] - if 'value' in newdict and attr.startswith("secret."): + if 'value' in newdict and (attr.startswith('secret.') or attr.startswith('custom.nodesecret.')): newdict['cryptvalue'] = crypt_value(newdict['value']) del newdict['value'] if 'value' in newdict and attr.startswith("crypted."): @@ -2485,10 +2489,10 @@ class ConfigManager(object): # add check here, skip None attributes if newdict is None: continue - if keydata and attrname.startswith('secret.') and 'cryptvalue' in newdict: + if keydata and (attrname.startswith('secret.') or attrname.startswith('custom.nodesecret.')) and 'cryptvalue' in newdict: newdict['value'] = decrypt_value(newdict['cryptvalue'], keydata['cryptkey'], keydata['integritykey']) del newdict['cryptvalue'] - if 'value' in newdict and attrname.startswith("secret."): + if 'value' in newdict and (attrname.startswith('secret.') or attrname.startswith('custom.nodesecret.')): newdict['cryptvalue'] = crypt_value(newdict['value']) del newdict['value'] if 'value' in newdict and attrname.startswith("crypted."): diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index c47722d4..b2d45f0c 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -97,7 +97,7 @@ def group_creation_resources(): for attr in sorted(attribs.node): if attr == 'groups': continue - if attr.startswith("secret."): + if attr.startswith('secret.') or attr.startswith('custom.nodesecret.'): yield confluent.messages.CryptedAttributes( kv={attr: None}, desc=attribs.node[attr]['description']).html() + '
\n' @@ -116,7 +116,7 @@ def node_creation_resources(): yield confluent.messages.Attributes( kv={'name': None}, desc="Name of the node").html() + '
' for attr in sorted(attribs.node): - if attr.startswith("secret."): + if attr.startswith('secret.') or attr.startswith('custom.nodesecret.'): yield confluent.messages.CryptedAttributes( kv={attr: None}, desc=attribs.node[attr]['description']).html() + '
\n' diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 6986227b..2a8b2bbb 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -59,7 +59,7 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata, clearwarnby val['desc'] = 'The noderange this group is expanded ' \ 'to when used in noderange, exclusive with static ' \ 'nodes' - if attribute.startswith('secret.') or attribute.startswith('crypted.'): + if attribute.startswith('secret.') or attribute.startswith('crypted.') or attribute.startswith('custom.nodesecret.'): yield msg.CryptedAttributes( kv={attribute: val}, desc=allattributes.node[attribute]['description']) @@ -121,7 +121,7 @@ def retrieve_nodes(nodes, element, configmanager, inputdata, clearwarnbynode): val = [] else: # no setting, provide a blank val = {'value': None} - if attribute.startswith('secret.') or attribute.startswith('crypted.'): + if attribute.startswith('secret.') or attribute.startswith('crypted.') or attribute.startswith('custom.nodesecret.'): yield msg.CryptedAttributes( node, {attribute: val}, allattributes.node.get( diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index d206337e..bb619f78 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -262,12 +262,14 @@ def handle_request(env, start_response): start_response('200 OK', (('Content-Type', retype),)) yield dumper(res) elif env['PATH_INFO'] == '/self/myattribs': - cfd = cfg.get_node_attributes(nodename, '*').get(nodename, {}) + cfd = cfg.get_node_attributes(nodename, '*', decrypt=True).get(nodename, {}) rsp = {} for k in cfd: if k.startswith('secret') or k.startswith('crypt') or 'value' not in cfd[k] or not cfd[k]['value']: continue rsp[k] = cfd[k]['value'] + if isinstance(rsp[k], bytes): + rsp[k] = rsp[k].decode() start_response('200 OK', (('Conntent-Type', retype),)) yield dumper(rsp) elif env['PATH_INFO'] == '/self/netcfg': From 7d83a920a23f2d125e3d7b30282dc0099a0aff6b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Mar 2025 15:59:10 -0400 Subject: [PATCH 056/413] Add mechanism for configurable ikvm response This allows for more flexible ikvm handling with newer pyghmi. --- confluent_server/confluent/core.py | 9 ++++++- confluent_server/confluent/messages.py | 2 +- .../plugins/hardwaremanagement/ipmi.py | 26 +++++++++++++++++++ .../plugins/hardwaremanagement/redfish.py | 25 ++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 48af7b70..14cea46a 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -434,7 +434,14 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), - 'ikvm': PluginRoute({'handler': 'ikvm'}), + 'ikvm': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'ikvm_methods': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), }, 'description': PluginRoute({ 'pluginattrs': ['hardwaremanagement.method'], diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 70b2ca21..e8ea972f 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -952,7 +952,7 @@ class InputIdentImage(ConfluentInputMessage): class InputIkvmParams(ConfluentInputMessage): keyname = 'method' - valid_values = ['unix', 'wss'] + valid_values = ['unix', 'wss', 'url'] class InputIdentifyMessage(ConfluentInputMessage): valid_values = set([ diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 32fabefe..e5219352 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -38,6 +38,7 @@ ipmicommand = eventlet.import_patched('pyghmi.ipmi.command') import socket import ssl import traceback +import confluent.vinzmanager as vinzmanager if not hasattr(ssl, 'SSLEOFError'): @@ -175,6 +176,7 @@ def sanitize_invdata(indata): class IpmiCommandWrapper(ipmicommand.Command): def __init__(self, node, cfm, **kwargs): + self.confluentbmcname = kwargs['bmc'] self.cfm = cfm self.node = node self.sensormap = {} @@ -592,6 +594,10 @@ class IpmiHandler(object): self.handle_servicedata_fetch() elif self.element == ['description']: self.handle_description() + elif self.element == ['console', 'ikvm_methods']: + self.handle_ikvm_methods() + elif self.element == ['console', 'ikvm']: + self.handle_ikvm() else: raise Exception('Not Implemented') @@ -1610,6 +1616,26 @@ class IpmiHandler(object): dsc = self.ipmicmd.get_description() self.output.put(msg.KeyValueData(dsc, self.node)) + def handle_ikvm_methods(self): + dsc = self.ipmicmd.get_ikvm_methods() + dsc = {'ikvm_methods': dsc} + self.output.put(msg.KeyValueData(dsc, self.node)) + + def handle_ikvm(self): + methods = self.ipmicmd.get_ikvm_methods() + if 'openbmc' in methods: + url = vinzmanager.get_url(self.node, self.inputdata) + self.output.put(msg.ChildCollection(url)) + return + launchdata = self.ipmicmd.get_ikvm_launchdata() + if 'url' in launchdata and not launchdata['url'].startswith('https://'): + mybmc = self.ipmicmd.confluentbmcname + if ':' in mybmc and not '[' in mybmc: + mybmc = '[{}]'.format(mybmc) + launchdata['url'] = 'https://{}{}'.format(mybmc, launchdata['url']) + self.output.put(msg.KeyValueData(launchdata, self.node)) + + def handle_graphical_console(self): args = self.ipmicmd.get_graphical_console() m = msg.GraphicalConsole(self.node, *args) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index c7c5f5d4..6901c39b 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import confluent.vinzmanager as vinzmanager import confluent.exceptions as exc import confluent.firmwaremanager as firmwaremanager import confluent.messages as msg @@ -153,6 +154,7 @@ def sanitize_invdata(indata): class IpmiCommandWrapper(ipmicommand.Command): def __init__(self, node, cfm, **kwargs): + self.confluentbmcname = kwargs['bmc'] #kwargs['pool'] = eventlet.greenpool.GreenPool(4) #Some BMCs at the time of this writing crumble under the weight #of 4 concurrent requests. For now give up on this optimization. @@ -449,6 +451,10 @@ class IpmiHandler(object): self.handle_servicedata_fetch() elif self.element == ['description']: self.handle_description() + elif self.element == ['console', 'ikvm_methods']: + self.handle_ikvm_methods() + elif self.element == ['console', 'ikvm']: + self.handle_ikvm() else: raise Exception('Not Implemented') @@ -1467,6 +1473,25 @@ class IpmiHandler(object): dsc = self.ipmicmd.get_description() self.output.put(msg.KeyValueData(dsc, self.node)) + def handle_ikvm_methods(self): + dsc = self.ipmicmd.get_ikvm_methods() + dsc = {'ikvm_methods': dsc} + self.output.put(msg.KeyValueData(dsc, self.node)) + + def handle_ikvm(self): + methods = self.ipmicmd.get_ikvm_methods() + if 'openbmc' in methods: + url = vinzmanager.get_url(self.node, self.inputdata) + self.output.put(msg.ChildCollection(url)) + return + launchdata = self.ipmicmd.get_ikvm_launchdata() + if 'url' in launchdata and not launchdata['url'].startswith('https://'): + mybmc = self.ipmicmd.confluentbmcname + if ':' in mybmc and not '[' in mybmc: + mybmc = '[{}]'.format(mybmc) + launchdata['url'] = 'https://{}{}'.format(mybmc, launchdata['url']) + self.output.put(msg.KeyValueData(launchdata, self.node)) + def _str_health(health): if isinstance(health, str): From 1e463367fe09c01471fdc160f208e3ac60812755 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 18 Mar 2025 15:52:37 -0400 Subject: [PATCH 057/413] Switch Ubunut initramfs to ssl The busybox wget invocation of openssl is broken. Override by stubbing it out to let openssl pick the verify hostname instead of wget specified one, which is incorrect. --- .../initramfs/scripts/init-premount/confluent | 11 +++++++++-- .../initramfs/scripts/init-premount/confluent | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index b3fcdef6..ff3ac42b 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -88,8 +88,15 @@ if [ ! -z "$cons" ]; then fi echo "Preparing to deploy $osprofile from $MGR" echo $osprofile > /custom-installation/confluent/osprofile -echo URL=http://${MGR}/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf -fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=http://${MGR}/confluent-public/os/$osprofile/distribution/install.iso" +mv /usr/bin/openssl /usr/bin/ossl +echo '#!/bin/sh' > /usr/bin/openssl +echo 'args=$*' >> /usr/bin/openssl +echo 'args=$(echo $args|sed -e "s/-verify_hostname.*//")' >> /usr/bin/openssl +echo 'exec /usr/bin/ossl $args' >> /usr/bin/openssl +chmod +x /usr/bin/openssl +cp /tls/* /etc/ssl/certs/ +echo URL=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf +fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso" if [ ! -z "$cons" ]; then fcmdline="$fcmdline console=${cons#/dev/}" fi diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index f15aff01..1e0de226 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -1,3 +1,4 @@ +cp /tls/* /etc/ssl/certs/ cd /sys/class/net for nic in *; do ip link set $nic up @@ -91,8 +92,14 @@ echo $osprofile > /custom-installation/confluent/osprofile . /etc/os-release DIRECTISO=$(blkid -t TYPE=iso9660 |grep -Ei ' LABEL="Ubuntu-Server '$VERSION_ID) if [ -z "$DIRECTISO" ]; then - echo URL=http://${MGR}/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf - fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=http://${MGR}/confluent-public/os/$osprofile/distribution/install.iso" + mv /usr/bin/openssl /usr/bin/ossl + echo '#!/bin/sh' > /usr/bin/openssl + echo 'args=$*' >> /usr/bin/openssl + echo 'args=$(echo $args|sed -e "s/-verify_hostname.*//")' >> /usr/bin/openssl + echo 'exec /usr/bin/ossl $args' >> /usr/bin/openssl + chmod +x /usr/bin/openssl + echo URL=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf + fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso" fi if [ ! -z "$cons" ]; then fcmdline="$fcmdline console=${cons#/dev/}" From e25b3acd98f65843fe66c16ea529d541a1e9421f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 09:41:21 -0400 Subject: [PATCH 058/413] Fix onboot.d in genesis profiles --- confluent_osdeploy/genesis/profiles/default/scripts/onboot.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/onboot.sh b/confluent_osdeploy/genesis/profiles/default/scripts/onboot.sh index 65347eab..12398ad3 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/genesis/profiles/default/scripts/onboot.sh @@ -10,6 +10,9 @@ # present run_remote_python syncfileclient +run_remote_parts onboot.d + + # Induce execution of remote configuration, e.g. ansible plays in ansible/onboot.d/ run_remote_config onboot From ef68259745a2425dbb5adee36c8eece9de491e84 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 12:41:50 -0400 Subject: [PATCH 059/413] Provide more full fixup of openssl invocation in wget For IPv4 and IPv6, strip the ':443' for arguments where it doesn't make sense. For IPv6, strip out [, ], and '%' from those arguments. --- .../initramfs/scripts/init-premount/confluent | 21 ++++++++++++++----- .../initramfs/scripts/init-premount/confluent | 19 +++++++++++++---- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index ff3ac42b..297e5c3f 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -1,4 +1,5 @@ cd /sys/class/net +cp /tls/* /etc/ssl/certs/ for nic in *; do ip link set $nic up done @@ -89,12 +90,22 @@ fi echo "Preparing to deploy $osprofile from $MGR" echo $osprofile > /custom-installation/confluent/osprofile mv /usr/bin/openssl /usr/bin/ossl -echo '#!/bin/sh' > /usr/bin/openssl -echo 'args=$*' >> /usr/bin/openssl -echo 'args=$(echo $args|sed -e "s/-verify_hostname.*//")' >> /usr/bin/openssl -echo 'exec /usr/bin/ossl $args' >> /usr/bin/openssl +cat > /usr/bin/openssl << 'EOF' +#!/bin/sh +AMENDARGS=0 +nargs="" +for arg in $*; do + if [ "$arg" == "-servername" ]; then + AMENDARGS=1 + fi + if [ "$AMENDARGS" == "1" ]; then + arg=$(echo $arg|sed -e 's/:443$//' -e 's/\[//' -e 's/\]//' -e 's/%.*//') + fi + nargs="$nargs $arg" +done +exec /usr/bin/ossl $nargs +EOF chmod +x /usr/bin/openssl -cp /tls/* /etc/ssl/certs/ echo URL=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso" if [ ! -z "$cons" ]; then diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 1e0de226..302b6657 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -93,10 +93,21 @@ echo $osprofile > /custom-installation/confluent/osprofile DIRECTISO=$(blkid -t TYPE=iso9660 |grep -Ei ' LABEL="Ubuntu-Server '$VERSION_ID) if [ -z "$DIRECTISO" ]; then mv /usr/bin/openssl /usr/bin/ossl - echo '#!/bin/sh' > /usr/bin/openssl - echo 'args=$*' >> /usr/bin/openssl - echo 'args=$(echo $args|sed -e "s/-verify_hostname.*//")' >> /usr/bin/openssl - echo 'exec /usr/bin/ossl $args' >> /usr/bin/openssl + cat > /usr/bin/openssl << 'EOF' +#!/bin/sh +AMENDARGS=0 +nargs="" +for arg in $*; do + if [ "$arg" == "-servername" ]; then + AMENDARGS=1 + fi + if [ "$AMENDARGS" == "1" ]; then + arg=$(echo $arg|sed -e 's/:443$//' -e 's/\[//' -e 's/\]//' -e 's/%.*//') + fi + nargs="$nargs $arg" +done +exec /usr/bin/ossl $nargs +EOF chmod +x /usr/bin/openssl echo URL=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso >> /conf/param.conf fcmdline="$(cat /custom-installation/confluent/cmdline.orig) url=https://${MGR}:443/confluent-public/os/$osprofile/distribution/install.iso" From 29915acaebc108825d8460391efc7b610fcec2d3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 15:31:51 -0400 Subject: [PATCH 060/413] Provide API to query update readyness --- confluent_server/confluent/core.py | 4 ++++ .../confluent/plugins/hardwaremanagement/ipmi.py | 10 ++++++++++ .../confluent/plugins/hardwaremanagement/redfish.py | 10 ++++++++++ 3 files changed, 24 insertions(+) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 14cea46a..59615f64 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -490,6 +490,10 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), + 'updatestatus': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), 'updates': { 'active': PluginCollection({ 'pluginattrs': ['hardwaremanagement.method'], diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index e5219352..16b59152 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -572,6 +572,8 @@ class IpmiHandler(object): self.handle_configuration() elif self.element[:3] == ['inventory', 'firmware', 'updates']: self.handle_update() + elif self.element[:3] == ['inventory', 'firmware', 'updatestatus']: + self.handle_update_status() elif self.element[0] == 'inventory': self.handle_inventory() elif self.element == ['media', 'attach']: @@ -978,6 +980,14 @@ class IpmiHandler(object): if errorneeded: self.output.put(errorneeded) + def handle_update_status(self): + activeupdates = firmwaremanager.list_updates([self.node], None, []) + if activeupdates: + self.output.put(msg.KeyValueData({'status': 'active'}, self.node)) + else: + status = self.ipmicmd.get_update_status() + self.output.put(msg.KeyValueData({'status': status}, self.node)) + def handle_inventory(self): if self.element[1] == 'firmware': if len(self.element) == 3: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 6901c39b..39fb3df3 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -431,6 +431,8 @@ class IpmiHandler(object): self.handle_configuration() elif self.element[:3] == ['inventory', 'firmware', 'updates']: self.handle_update() + elif self.element[:3] == ['inventory', 'firmware', 'updatestatus']: + self.handle_update_status() elif self.element[0] == 'inventory': self.handle_inventory() elif self.element == ['media', 'attach']: @@ -835,6 +837,14 @@ class IpmiHandler(object): if errorneeded: self.output.put(errorneeded) + def handle_update_status(self): + activeupdates = firmwaremanager.list_updates([self.node], None, []) + if activeupdates: + self.output.put(msg.KeyValueData({'status': 'active'}, self.node)) + else: + status = self.ipmicmd.get_update_status() + self.output.put(msg.KeyValueData({'status': status}, self.node)) + def handle_inventory(self): if self.element[1] == 'firmware': if len(self.element) == 3: From 3a9b75839b64c270910d472b93ff0e60521b2cb3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 16:08:42 -0400 Subject: [PATCH 061/413] Add another error code for XCC user rename refusal Yet another error to reperesent rename refusal --- confluent_server/confluent/discovery/handlers/xcc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index d5a5998f..39920b3c 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -479,7 +479,7 @@ class NodeHandler(immhandler.NodeHandler): {'UserName': username}, method='PATCH') if status != 200: rsp = json.loads(rsp) - if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError'): + if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError', 'Base.1.18.GeneralError'): if tries: eventlet.sleep(4) elif tmpaccount: From 02bd26e7d23d9c0092cc2e0b96de35ea34826bde Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 16:17:31 -0400 Subject: [PATCH 062/413] Correct updatestatus to be resource, not collection --- confluent_server/confluent/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 59615f64..61a03d05 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -490,7 +490,7 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), - 'updatestatus': PluginCollection({ + 'updatestatus': PluginRoute({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), From 5fb04126e661c7147c41c5dfcf39c423f25a236f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Mar 2025 16:43:19 -0400 Subject: [PATCH 063/413] Fix tracking of 'active' updates in update status --- confluent_server/confluent/plugins/hardwaremanagement/ipmi.py | 2 +- .../confluent/plugins/hardwaremanagement/redfish.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 16b59152..7e5d7a18 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -981,7 +981,7 @@ class IpmiHandler(object): self.output.put(errorneeded) def handle_update_status(self): - activeupdates = firmwaremanager.list_updates([self.node], None, []) + activeupdates = list(firmwaremanager.list_updates([self.node], None, [])) if activeupdates: self.output.put(msg.KeyValueData({'status': 'active'}, self.node)) else: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 39fb3df3..7f60dd88 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -838,7 +838,7 @@ class IpmiHandler(object): self.output.put(errorneeded) def handle_update_status(self): - activeupdates = firmwaremanager.list_updates([self.node], None, []) + activeupdates = list(firmwaremanager.list_updates([self.node], None, [])) if activeupdates: self.output.put(msg.KeyValueData({'status': 'active'}, self.node)) else: From f6e9691b7f338c1fa337b87494dcf890b85960a6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Mar 2025 08:21:47 -0400 Subject: [PATCH 064/413] Amend arguments on IPv4 invocation It turns out that when busybox invokes openssl for IPv4, it does not pass a servername field. In this case, start amending arguments after '-verify' instead, to catch the verify_ip argument correctly. --- .../ubuntu20.04/initramfs/scripts/init-premount/confluent | 2 +- .../ubuntu22.04/initramfs/scripts/init-premount/confluent | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index 297e5c3f..a974f04d 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -95,7 +95,7 @@ cat > /usr/bin/openssl << 'EOF' AMENDARGS=0 nargs="" for arg in $*; do - if [ "$arg" == "-servername" ]; then + if [ "$arg" == "-servername" -o "$arg" == "-verify" ]; then AMENDARGS=1 fi if [ "$AMENDARGS" == "1" ]; then diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 302b6657..725560de 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -98,7 +98,7 @@ if [ -z "$DIRECTISO" ]; then AMENDARGS=0 nargs="" for arg in $*; do - if [ "$arg" == "-servername" ]; then + if [ "$arg" == "-servername" -o "$arg" == "-verify" ]; then AMENDARGS=1 fi if [ "$AMENDARGS" == "1" ]; then From e375c956ed90ef91ad0c66d32b3b605138135b95 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Mar 2025 09:32:30 -0400 Subject: [PATCH 065/413] Provide command line access to the updatestatus --- confluent_client/bin/nodefirmware | 17 +++++++++++++++-- confluent_client/doc/man/nodefirmware.ronn | 5 ++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/confluent_client/bin/nodefirmware b/confluent_client/bin/nodefirmware index 465314d7..0c6a0958 100755 --- a/confluent_client/bin/nodefirmware +++ b/confluent_client/bin/nodefirmware @@ -56,7 +56,7 @@ components = ['all'] argparser = optparse.OptionParser( usage="Usage: " - "%prog [list][update [--backup ]]|[]") + "%prog [list][updatestatus][update [--backup ]]|[]") argparser.add_option('-b', '--backup', action='store_true', help='Target a backup bank rather than primary') argparser.add_option('-m', '--maxnodes', type='int', @@ -65,14 +65,18 @@ argparser.add_option('-m', '--maxnodes', type='int', (options, args) = argparser.parse_args() upfile = None +querystatus = False try: noderange = args[0] if len(args) > 1: if args[1] == 'update': upfile = args[2] else: + comps = [] if args[1] == 'list': comps = args[2:] + elif args[1] == 'updatestatus': + querystatus = True else: comps = args[1:] components = [] @@ -171,7 +175,16 @@ def show_firmware(session): try: session = client.Command() - if upfile is None: + if querystatus: + for res in session.read( + '/noderange/{0}/inventory/firmware/updatestatus'.format(noderange)): + for node in res.get('databynode', {}): + currstat = res['databynode'][node].get('status', None) + if currstat: + print('{}: {}'.format(node, currstat)) + else: + print(repr(res)) + elif upfile is None: show_firmware(session) else: update_firmware(session, upfile) diff --git a/confluent_client/doc/man/nodefirmware.ronn b/confluent_client/doc/man/nodefirmware.ronn index bd55f847..b8925976 100644 --- a/confluent_client/doc/man/nodefirmware.ronn +++ b/confluent_client/doc/man/nodefirmware.ronn @@ -3,7 +3,7 @@ nodefirmware(8) -- Report firmware information on confluent nodes ## SYNOPSIS -`nodefirmware [list][update [--backup ]]|[]` +`nodefirmware [list][updatestatus][update [--backup ]]|[]` ## DESCRIPTION @@ -17,6 +17,9 @@ not be relevant to redfish. Additionally, the Lenovo XCC makes certain information available over IPMI that is not otherwise available (for example the FPGA version where applicable). +The updatestatus argument will describe the state of firmware updates on the +nodes. + In the update form, it accepts a single file and attempts to update it using the out of band facilities. Firmware updates can end in one of three states: From 24e419568a2793d5045eee52d24194a093409aa2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 21 Mar 2025 09:12:41 -0400 Subject: [PATCH 066/413] Remove spurious output from stdout log --- confluent_server/confluent/discovery/handlers/xcc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 39920b3c..a7feaa93 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -671,8 +671,6 @@ class NodeHandler(immhandler.NodeHandler): statargs[currkey + attribsuffix] = statargs[currkey] del statargs[currkey] netset, status = wc.grab_json_response_with_status('/api/dataset', statargs) - print(repr(netset)) - print(repr(status)) elif self.ipaddr.startswith('fe80::'): self.configmanager.set_node_attributes( From 84881cc6be4dfe4092641cda941d44a92e8bec5c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 21 Mar 2025 09:12:57 -0400 Subject: [PATCH 067/413] Fix invoke to a file to not repeat the request --- confluent_osdeploy/utils/confusebox/main.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/utils/confusebox/main.go b/confluent_osdeploy/utils/confusebox/main.go index 9ae23959..edc1c48b 100644 --- a/confluent_osdeploy/utils/confusebox/main.go +++ b/confluent_osdeploy/utils/confusebox/main.go @@ -97,11 +97,13 @@ func main() { mime = "application/json" } if *outputfile != "" { - apiclient.Fetch(invokeapi.Arg(0), *outputfile, mime, body) + err := apiclient.Fetch(invokeapi.Arg(0), *outputfile, mime, body) + if err != nil { panic(err) } + } else { + rsp, err := apiclient.GrabText(invokeapi.Arg(0), mime, body) + if err != nil { panic(err) } + fmt.Println(rsp) } - rsp, err := apiclient.GrabText(invokeapi.Arg(0), mime, body) - if err != nil { panic(err) } - fmt.Println(rsp) default: panic("Unrecognized subcommand") } From fae266bf61b95083bd3f5ad3cb840bdb2518f5e9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 24 Mar 2025 16:28:36 -0400 Subject: [PATCH 068/413] Refresh genesis for 3.13 release --- confluent_client/bin/dir2img | 58 +++++++++++++++++++++---------- genesis/97genesis/installkernel | 1 + genesis/97genesis/module-setup.sh | 22 ++++++++++++ genesis/buildgenesis.sh | 4 +-- genesis/firefox/firefoxlibs | 49 ++++++++++++++++++++++++++ genesis/firefox/makelayer.sh | 6 ++++ 6 files changed, 119 insertions(+), 21 deletions(-) create mode 100644 genesis/97genesis/module-setup.sh create mode 100644 genesis/firefox/firefoxlibs create mode 100644 genesis/firefox/makelayer.sh diff --git a/confluent_client/bin/dir2img b/confluent_client/bin/dir2img index 0100cf20..debb5b71 100644 --- a/confluent_client/bin/dir2img +++ b/confluent_client/bin/dir2img @@ -8,30 +8,35 @@ import os import subprocess import sys -def create_image(directory, image, label=None): - ents = 0 - datasz = 512 - for dir in os.walk(sys.argv[1]): - ents += 1 - for filen in dir[2]: +def create_image(directory, image, label=None, esize=0, totalsize=None): + + if totalsize: + datasz = totalsize * 1048576 + else: + ents = 0 + datasz = 512 + (esize * 1048576) + for dir in os.walk(sys.argv[1]): ents += 1 - filename = os.path.join(dir[0], filen) - currsz = os.path.getsize(filename) - # assuming up to 65k cluster - currsz = (currsz // 512 +1) * 512 - datasz += currsz - datasz += ents * 32768 - datasz = datasz // 16384 + 1 + for filen in dir[2]: + ents += 1 + filename = os.path.join(dir[0], filen) + currsz = os.path.getsize(filename) + # assuming up to 65k cluster + currsz = (currsz // 512 + 1) * 512 + datasz += currsz + datasz += ents * 32768 + datasz = datasz // 65536 + 1 with open(image, 'wb') as imgfile: - imgfile.seek(datasz * 16384 - 1) + imgfile.seek(datasz * 65536 - 1) imgfile.write(b'\x00') if label: + # 4 heads, 32 sectors, means 65k per track subprocess.check_call(['mformat', '-i', image, '-v', label, '-r', '16', '-d', '1', '-t', str(datasz), - '-s', '16','-h', '2', '::']) + '-s', '32','-h', '4', '::']) else: subprocess.check_call(['mformat', '-i', image, '-r', '16', '-d', '1', '-t', - str(datasz), '-s', '16','-h', '2', '::']) + str(datasz), '-s', '32','-h', '4', '::']) # Some clustered filesystems will have the lock from mformat # linger after close (mformat doesn't unlock) # do a blocking wait for shared lock and then explicitly @@ -56,6 +61,21 @@ if __name__ == '__main__': sys.argv[0])) sys.exit(1) label = None - if len(sys.argv) > 3: - label = sys.argv[3] - create_image(sys.argv[1], sys.argv[2], label) + args = sys.argv + esize = 0 + try: + earg = args.index('-e') + esize = int(args[earg + 1]) + args = args[:earg] + args[earg +2:] + except ValueError: + pass + totsize = None + try: + earg = args.index('-s') + totsize = int(args[earg + 1]) + args = args[:earg] + args[earg +2:] + except ValueError: + pass + if len(args) > 3: + label = args[3] + create_image(args[1], args[2], label, esize, totsize) diff --git a/genesis/97genesis/installkernel b/genesis/97genesis/installkernel index a41482d2..570e8c15 100644 --- a/genesis/97genesis/installkernel +++ b/genesis/97genesis/installkernel @@ -1,4 +1,5 @@ #!/bin/sh +instmods virtio_net instmods e1000 e1000e igb sfc mlx5_ib mlx5_core mlx4_en cxgb3 cxgb4 tg3 bnx2 bnx2x bna ixgb ixgbe qlge mptsas mpt2sas mpt3sas megaraid_sas ahci xhci-hcd sd_mod pmcraid be2net vfat ext3 ext4 usb_storage scsi_wait_scan ipmi_si ipmi_devintf qlcnic xfs instmods nvme instmods cdc_ether r8152 diff --git a/genesis/97genesis/module-setup.sh b/genesis/97genesis/module-setup.sh new file mode 100644 index 00000000..d8cf1d76 --- /dev/null +++ b/genesis/97genesis/module-setup.sh @@ -0,0 +1,22 @@ +#!/usr/bin/bash + +# called by dracut +check() { + return 0 +} +install() { + . $moddir/install-base + #. $moddir/install-gui + + if [ -d /usr/lib64/python3.13/ ]; then + . $moddir/install-python313 + elif [ -d /usr/lib64/python3.9/ ]; then + . $moddir/install-python39 + + fi +} + +installkernel() { + . $moddir/installkernel +} + diff --git a/genesis/buildgenesis.sh b/genesis/buildgenesis.sh index 8e0de608..8d34b9d8 100644 --- a/genesis/buildgenesis.sh +++ b/genesis/buildgenesis.sh @@ -1,8 +1,8 @@ pushd $(dirname $0) rm -rf licenses cp -a 97genesis /usr/lib/dracut/modules.d/ -cat /usr/lib/dracut/modules.d/97genesis/install-* > /usr/lib/dracut/modules.d/97genesis/install -chmod +x /usr/lib/dracut/modules.d/97genesis/install /usr/lib/dracut/modules.d/97genesis/installkernel +#cat /usr/lib/dracut/modules.d/97genesis/install-* > /usr/lib/dracut/modules.d/97genesis/install +chmod +x /usr/lib/dracut/modules.d/97genesis/install* mkdir -p boot/initramfs mkdir -p boot/efi/boot dracut --no-early-microcode --xz -N -m "genesis base" -f boot/initramfs/distribution $(uname -r) diff --git a/genesis/firefox/firefoxlibs b/genesis/firefox/firefoxlibs new file mode 100644 index 00000000..7be57132 --- /dev/null +++ b/genesis/firefox/firefoxlibs @@ -0,0 +1,49 @@ +usr/lib64/libgtk-3.so.0 +usr/lib64/libgdk-3.so.0 +usr/lib64/libcairo-gobject.so.2 +usr/lib64/libgdk_pixbuf-2.0.so.0 +usr/lib64/libatk-1.0.so.0 +usr/lib64/libepoxy.so.0 +usr/lib64/libXi.so.6 +usr/lib64/libatk-bridge-2.0.so.0 +usr/lib64/libcloudproviders.so.0 +usr/lib64/libtracker-sparql-3.0.so.0 +usr/lib64/libXfixes.so.3 +usr/lib64/libgdk_pixbuf-2.0.so.0 +usr/lib64/libcairo-gobject.so.2 +usr/lib64/libepoxy.so.0 +usr/lib64/libwayland-egl.so.1 +usr/lib64/libXi.so.6 +usr/lib64/libXcursor.so.1 +usr/lib64/libXdamage.so.1 +usr/lib64/libXfixes.so.3 +usr/lib64/libXcomposite.so.1 +usr/lib64/libXrandr.so.2 +usr/lib64/libXinerama.so.1 +usr/lib64/libjpeg.so.62 +usr/lib64/libatspi.so.0 +usr/lib64/libdbus-1.so.3 +usr/lib64/libjson-glib-1.0.so.0 +usr/lib64/libasound.so.2 +usr/lib64/libplc4.so +usr/lib64/libssl3.so +usr/lib64/libsmime3.so +usr/lib64/libnss3.so +usr/lib64/libnssutil3.so +usr/lib64/libwebp.so.7 +usr/lib64/libwebpdemux.so.2 +usr/lib64/libvpx.so.9 +usr/lib64/libX11-xcb.so.1 +usr/lib64/libplds4.so +usr/lib64/libplds4.so +usr/lib64/libplds4.so +usr/lib64/libsharpyuv.so.0 +usr/lib64/libsoftokn3.so +etc/alternatives/libnssckbi.so.x86_64 +usr/lib64/pkcs11/p11-kit-trust.so +etc/pki +usr/share/pki +usr/lib64/libnssckbi.so +usr/lib64/libtasn1.so.6 +usr/lib64/libtasn1.so.6.6.4 +usr/lib64/libnspr4.so diff --git a/genesis/firefox/makelayer.sh b/genesis/firefox/makelayer.sh new file mode 100644 index 00000000..1d42621f --- /dev/null +++ b/genesis/firefox/makelayer.sh @@ -0,0 +1,6 @@ +cd $(dirname $0) +mydir=$(pwd) +cd - +cd / +tar -czvhf /tmp/firefox.tgz usr/bin/firefox usr/lib64/firefox $(cat $mydir/firefoxlibs) +cd - From 21f4d2e5c288b60117e0e8173a5cdfdebf2e132b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 24 Mar 2025 16:29:51 -0400 Subject: [PATCH 069/413] Remove opa from genesis --- genesis/97genesis/install-base | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index 7fe1a976..22b91b37 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -23,11 +23,21 @@ dracut_install killall chown chroot dd expr kill parted rsync shutdown sort bloc dracut_install /etc/udev/hwdb.bin dracut_install /usr/share/hwdata/pci.ids dracut_install ibstat ibstatus -dracut_install opainfo dracut_install /usr/lib/udev/rules.d/10-dm.rules /usr/sbin/dmsetup /usr/lib/udev/rules.d/95-dm-notify.rules +#dracut_install opainfo #dracut_install /usr/lib/opa-fm/bin/opafmd #dracut_install /usr/sbin/opensm /usr/libexec/opensm-launch dracut_install /usr/lib64/libibverbs/libhfi1verbs-rdmav34.so /etc/libibverbs.d/hfi1verbs.driver /etc/libibverbs.d/mlx4.driver /etc/libibverbs.d/mlx5.driver /usr/lib64/libibverbs/libmlx4-rdmav34.so /usr/lib64/libibverbs/libmlx5-rdmav34.so -inst /usr/lib/dracut/modules.d/40network/net-lib.sh /lib/net-lib.sh +if [ -x /usr/libexec/openssh/sshd-session ]; then + dracut_install /usr/libexec/openssh/sshd-session +fi +if [ -e /usr/lib/dracut/modules.d/40network/net-lib.sh ]; then + inst /usr/lib/dracut/modules.d/40network/net-lib.sh /lib/net-lib.sh +else + inst /usr/lib/dracut/modules.d/45net-lib/net-lib.sh /lib/net-lib.sh +fi +if [ -e /etc/crypto-policies/back-ends/opensslcnf.config ]; then + dracut_install /etc/crypto-policies/back-ends/opensslcnf.config +fi #dracut_install mstflint This would have mlx update support, but it incurs gigantic dependencies, libicudata From 5ab02c31ee3c220a7df220b17f1fd84357c5c1db Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 24 Mar 2025 16:34:15 -0400 Subject: [PATCH 070/413] Add python 3.13 option for genesis --- genesis/97genesis/install-python313 | 202 ++++++++++++++++++ .../{install-python => install-python39} | 0 2 files changed, 202 insertions(+) create mode 100644 genesis/97genesis/install-python313 rename genesis/97genesis/{install-python => install-python39} (100%) diff --git a/genesis/97genesis/install-python313 b/genesis/97genesis/install-python313 new file mode 100644 index 00000000..3c72525f --- /dev/null +++ b/genesis/97genesis/install-python313 @@ -0,0 +1,202 @@ +#!/bin/sh +#strace /usr/libexec/platform-python -c 'import hashlib; import socket; import argparse; import socket; import os; import http.client; import http.cookies; import subprocess; import base64; import ctypes; import struct; import urllib.parse; import shlex; import configparser' +#dracut_install /usr/libexec/platform-python +dracut_install /usr/bin/python3 +dracut_install /etc/localtime +dracut_install /lib64/libffi.so.8 +dracut_install /lib64/libssl.so.3 +dracut_install /usr/lib64/python3.13/os.py +dracut_install /usr/lib64/gconv/gconv-modules.cache +dracut_install /usr/lib64/python3.13 +dracut_install /usr/lib64/python3.13/collections +dracut_install /usr/lib64/python3.13/ctypes +dracut_install /usr/lib64/python3.13/email +dracut_install /usr/lib64/python3.13/encodings +dracut_install /usr/lib64/python3.13/http +dracut_install /usr/lib64/python3.13/lib-dynload +dracut_install /usr/lib64/python3.13/lib-dynload/fcntl.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/binascii.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_bisect.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_blake2.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_ctypes.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_hashlib.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_heapq.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/math.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_posixsubprocess.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_random.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/select.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_socket.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_ssl.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/_struct.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/unicodedata.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/site-packages +dracut_install /usr/lib64/python3.13/urllib +dracut_install /usr/lib64/python3.13/re/__init__.py +dracut_install /usr/lib64/python3.13/re/_casefix.py +dracut_install /usr/lib64/python3.13/re/_compiler.py +dracut_install /usr/lib64/python3.13/re/_constants.py +dracut_install /usr/lib64/python3.13/re/_parser.py +dracut_install /usr/lib64/python3.13/importlib/_abc.py +cp -a /usr/lib64/python3.13/_pyrepl $initdir/usr/lib64/python3.13/_pyrepl +cp -a /usr/lib64/python3.13/re $initdir/usr/lib64/python3.13/re + +dracut_install /usr/lib/locale/en_US.utf8/LC_ADDRESS +dracut_install /usr/lib/locale/en_US.utf8/LC_COLLATE +dracut_install /usr/lib/locale/en_US.utf8/LC_CTYPE +dracut_install /usr/lib/locale/en_US.utf8/LC_IDENTIFICATION +dracut_install /usr/lib/locale/en_US.utf8/LC_MEASUREMENT +dracut_install /usr/lib/locale/en_US.utf8/LC_MESSAGES +dracut_install /usr/lib/locale/en_US.utf8/LC_MESSAGES/SYS_LC_MESSAGES +dracut_install /usr/lib/locale/en_US.utf8/LC_MONETARY +dracut_install /usr/lib/locale/en_US.utf8/LC_NAME +dracut_install /usr/lib/locale/en_US.utf8/LC_NUMERIC +dracut_install /usr/lib/locale/en_US.utf8/LC_PAPER +dracut_install /usr/lib/locale/en_US.utf8/LC_TELEPHONE +dracut_install /usr/lib/locale/en_US.utf8/LC_TIME +dracut_install /usr/lib/python3.13/site-packages +dracut_install /usr/lib64/python3.13/argparse.py +dracut_install /usr/lib64/python3.13/codecs.py +dracut_install /usr/lib64/python3.13/encodings/aliases.py +dracut_install /usr/lib64/python3.13/encodings/utf_8.py +dracut_install /usr/lib64/python3.13/encodings/latin_1.py +dracut_install /usr/lib64/python3.13/encodings/ascii.py +dracut_install /usr/lib64/python3.13/encodings/idna.py +dracut_install /usr/lib64/python3.13/io.py +dracut_install /usr/lib64/python3.13/_weakrefset.py +dracut_install /usr/lib64/python3.13/weakref.py +dracut_install /usr/lib64/python3.13/site.py +dracut_install /usr/lib64/python3.13/stat.py +dracut_install /usr/lib64/python3.13/posixpath.py +dracut_install /usr/lib64/python3.13/genericpath.py +dracut_install /usr/lib64/python3.13/_sitebuiltins.py +dracut_install /usr/lib64/python3.13/_sysconfigdata__linux_x86_64-linux-gnu.py +dracut_install /usr/lib64/python3.13/encodings/__init__.py +dracut_install /usr/lib64/python3.13/socket.py +dracut_install /usr/lib64/python3.13/selectors.py +dracut_install /usr/share/locale/locale.alias +dracut_install /usr/lib64/python3.13/collections/__init__.py +dracut_install /usr/lib64/python3.13/operator.py +dracut_install /usr/lib64/python3.13/keyword.py +dracut_install /usr/lib64/python3.13/heapq.py +dracut_install /usr/lib64/python3.13/reprlib.py +dracut_install /usr/lib64/python3.13/enum.py +dracut_install /usr/lib64/python3.13/types.py +dracut_install /usr/lib64/python3.13/functools.py +dracut_install /usr/lib64/python3.13/http/client.py +dracut_install /usr/lib64/python3.13/email/parser.py +dracut_install /usr/lib64/python3.13/email/feedparser.py +dracut_install /usr/lib64/python3.13/sre_compile.py +dracut_install /usr/lib64/python3.13/sre_parse.py +dracut_install /usr/lib64/python3.13/sre_constants.py +dracut_install /usr/lib64/python3.13/copyreg.py +dracut_install /usr/lib64/python3.13/email/errors.py +dracut_install /usr/lib64/python3.13/email/_policybase.py +dracut_install /usr/lib64/python3.13/email/header.py +dracut_install /usr/lib64/python3.13/email/quoprimime.py +dracut_install /usr/lib64/python3.13/string.py +dracut_install /usr/lib64/python3.13/stringprep.py +dracut_install /usr/lib64/python3.13/email/base64mime.py +dracut_install /usr/lib64/python3.13/base64.py +dracut_install /usr/lib64/python3.13/struct.py +dracut_install /usr/lib64/python3.13/email/charset.py +dracut_install /usr/lib64/python3.13/email/encoders.py +dracut_install /usr/lib64/python3.13/quopri.py +dracut_install /usr/lib64/python3.13/email/utils.py +dracut_install /usr/lib64/python3.13/random.py +dracut_install /usr/lib64/python3.13/warnings.py +dracut_install /usr/lib64/python3.13/hashlib.py +dracut_install /usr/lib64/python3.13/bisect.py +dracut_install /usr/lib64/python3.13/datetime.py +dracut_install /usr/lib64/python3.13/urllib/parse.py +dracut_install /usr/lib64/python3.13/email/_parseaddr.py +dracut_install /usr/lib64/python3.13/calendar.py +dracut_install /usr/lib64/python3.13/locale.py +dracut_install /usr/lib64/python3.13/email/message.py +dracut_install /usr/lib64/python3.13/email/_encoded_words.py +dracut_install /usr/lib64/python3.13/email/iterators.py +dracut_install /usr/lib64/python3.13/http/__init__.py +dracut_install /usr/lib64/python3.13/http/cookies.py +dracut_install /usr/lib64/python3.13/argparse.py +dracut_install /usr/lib64/python3.13/copy.py +dracut_install /usr/lib64/python3.13/textwrap.py +dracut_install /usr/lib64/python3.13/gettext.py +dracut_install /usr/lib64/python3.13/subprocess.py +dracut_install /usr/lib64/python3.13/signal.py +dracut_install /usr/lib64/python3.13/threading.py +dracut_install /usr/lib64/python3.13/traceback.py + dracut_install /usr/lib64/python3.13/traceback.py +dracut_install /usr/lib64/python3.13/linecache.py +dracut_install /usr/lib64/python3.13/tokenize.py +dracut_install /usr/lib64/python3.13/token.py +dracut_install /usr/lib64/python3.13/shlex.py +dracut_install /usr/lib64/python3.13/configparser.py +dracut_install /usr/lib64/python3.13/lib-dynload/readline.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/ctypes/__init__.py +dracut_install /usr/lib64/python3.13/ctypes/_endian.py +dracut_install /usr/lib64/python3.13/ctypes/util.py +dracut_install /usr/lib64/python3.13/ssl.py +dracut_install /usr/lib64/python3.13/ipaddress.py +dracut_install /usr/lib/locale/en_US.utf8/LC_ADDRESS +dracut_install /usr/lib/locale/en_US.utf8/LC_IDENTIFICATION +dracut_install /usr/lib/locale/en_US.utf8/LC_MEASUREMENT +dracut_install /usr/lib/locale/en_US.utf8/LC_MESSAGES/SYS_LC_MESSAGES +dracut_install /usr/lib/locale/en_US.utf8/LC_MONETARY +dracut_install /usr/lib/locale/en_US.utf8/LC_TELEPHONE +dracut_install /usr/lib/locale/en_US.utf8/LC_TIME +dracut_install /usr/lib/locale/en_US.utf8/LC_COLLATE +dracut_install /usr/lib/locale/en_US.utf8/LC_CTYPE +dracut_install /usr/lib/locale/en_US.utf8/LC_NAME +dracut_install /usr/lib/locale/en_US.utf8/LC_NUMERIC +dracut_install /usr/lib/locale/en_US.utf8/LC_PAPER +dracut_install /usr/lib64/python3.13/json/__init__.py /usr/lib64/python3.13/json/decoder.py /usr/lib64/python3.13/json/encoder.py /usr/lib64/python3.13/json/scanner.py /usr/lib64/python3.13/json/tool.py /usr/lib64/python3.13/lib-dynload/_json.cpython-313-x86_64-linux-gnu.so + +# ansible dependencies +dracut_install /usr/lib64/python3.13/runpy.py +dracut_install /usr/lib64/python3.13/importlib/__init__.py +dracut_install /usr/lib64/python3.13/importlib/_bootstrap.py +dracut_install /usr/lib64/python3.13/importlib/_bootstrap_external.py +dracut_install /usr/lib64/python3.13/importlib/machinery.py +dracut_install /usr/lib64/python3.13/importlib/util.py +dracut_install /usr/lib64/python3.13/contextlib.py +dracut_install /usr/lib64/python3.13/pkgutil.py +dracut_install /usr/lib64/python3.13/shutil.py +dracut_install /usr/lib64/python3.13/fnmatch.py +dracut_install /usr/lib64/python3.13/tempfile.py +dracut_install /usr/lib64/python3.13/encodings/cp437.pyc +dracut_install /usr/lib64/python3.13/lib-dynload/zlib.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/grp.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/lib-dynload/array.cpython-313-x86_64-linux-gnu.so +dracut_install /usr/lib64/python3.13/__future__.py +dracut_install /usr/lib64/python3.13/platform.py +dracut_install /usr/lib64/python3.13/logging/__init__.py +dracut_install /usr/lib64/python3.13/logging/config.py +dracut_install /usr/lib64/python3.13/logging/handlers.py +dracut_install /usr/lib64/python3.13/optparse.py +dracut_install /usr/lib64/python3.13/ast.py +dracut_install /usr/lib64/python3.13/multiprocessing/__init__.py +dracut_install /usr/lib64/python3.13/multiprocessing/connection.py +dracut_install /usr/lib64/python3.13/multiprocessing/context.py +dracut_install /usr/lib64/python3.13/multiprocessing/dummy/__init__.py +dracut_install /usr/lib64/python3.13/multiprocessing/dummy/connection.py +dracut_install /usr/lib64/python3.13/multiprocessing/forkserver.py +dracut_install /usr/lib64/python3.13/multiprocessing/heap.py +dracut_install /usr/lib64/python3.13/multiprocessing/managers.py +dracut_install /usr/lib64/python3.13/multiprocessing/pool.py +dracut_install /usr/lib64/python3.13/multiprocessing/popen_fork.py +dracut_install /usr/lib64/python3.13/multiprocessing/popen_forkserver.py +dracut_install /usr/lib64/python3.13/multiprocessing/popen_spawn_posix.py +dracut_install /usr/lib64/python3.13/multiprocessing/popen_spawn_win32.py +dracut_install /usr/lib64/python3.13/multiprocessing/process.py +dracut_install /usr/lib64/python3.13/multiprocessing/queues.py +dracut_install /usr/lib64/python3.13/multiprocessing/reduction.py +dracut_install /usr/lib64/python3.13/multiprocessing/resource_sharer.py +dracut_install /usr/lib64/python3.13/multiprocessing/sharedctypes.py +dracut_install /usr/lib64/python3.13/multiprocessing/spawn.py +dracut_install /usr/lib64/python3.13/multiprocessing/synchronize.py +dracut_install /usr/lib64/python3.13/multiprocessing/util.py +dracut_install /usr/lib64/python3.13/pickle.py +dracut_install /usr/lib64/python3.13/_compat_pickle.py +dracut_install /usr/lib64/python3.13/queue.py +dracut_install /usr/lib64/python3.13/glob.py +dracut_install /usr/lib64/python3.13/getpass.py + diff --git a/genesis/97genesis/install-python b/genesis/97genesis/install-python39 similarity index 100% rename from genesis/97genesis/install-python rename to genesis/97genesis/install-python39 From 1cf2a5339aeb190ca1d72f3d4cebf4cd886b803f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 24 Mar 2025 16:34:51 -0400 Subject: [PATCH 071/413] Move sftp server to a more appropriate location --- genesis/97genesis/install-base | 1 + genesis/97genesis/install-python39 | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index 22b91b37..40a734e8 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -8,6 +8,7 @@ dracut_install openssl tar ipmitool cpio xz gzip lsmod ethtool dracut_install modprobe touch echo cut wc bash uniq grep ip hostname dracut_install awk egrep dirname bc expr sort dracut_install ssh sshd vi reboot lspci parted tmux mkfs mkfs.ext4 mkfs.xfs xfs_db mkswap +dracut_install /usr/libexec/openssh/sftp-server dracut_install efibootmgr dracut_install du df ssh-keygen scp clear dhclient lldpd lldpcli tee dracut_install /lib64/libnss_dns.so.2 /lib64/libnss_dns.so.2 /lib64/libnss_myhostname.so.2 diff --git a/genesis/97genesis/install-python39 b/genesis/97genesis/install-python39 index c5dd6db7..ab9e2c86 100644 --- a/genesis/97genesis/install-python39 +++ b/genesis/97genesis/install-python39 @@ -217,4 +217,3 @@ dracut_install /usr/lib64/python3.9/distutils/util.py dracut_install /usr/lib64/python3.9/distutils/version.py dracut_install /usr/lib64/python3.9/distutils/versionpredicate.py dracut_install /usr/lib64/python3.9/getpass.py -dracut_install /usr/libexec/openssh/sftp-server From 62e081cd723f87af2c4527be5ced91256c97ad8b Mon Sep 17 00:00:00 2001 From: Sorin Toderica Date: Tue, 25 Mar 2025 09:50:40 +0200 Subject: [PATCH 072/413] Modify apiclient utility, for esxi7 only, to check if the uplink of vSwitch0 is up and if not, to try to replaces it with a different vmnic, that is up --- .../initramfs/opt/confluent/bin/apiclient | 86 +++++++++++++++++++ .../esxi7/initramfs/bin/dcuiweasel | 1 + 2 files changed, 87 insertions(+) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index b64052c9..7e78a5b8 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -14,6 +14,8 @@ import ssl import sys import struct import time +import re +import json class InvalidApiKey(Exception): pass @@ -412,8 +414,91 @@ class HTTPSClient(client.HTTPConnection, object): self.node, [self.host], errout=self.errout) raise Exception(rsp.read()) +def get_current_vmnic_vswitch(): + uplinkmatch = re.compile(r'^\s*Uplinks:\s*(.*)') + switchmatch = re.compile(r'^\s*Name:\s*(.*)') + + vswinfo = subprocess.check_output(['localcli', 'network', 'vswitch', 'standard', 'list']).decode() + + vmnic = None + vswitch_name = None + + for info in vswinfo.split('\n'): + name_match = switchmatch.match(info) + if name_match: + vswitch_name = name_match.group(1).strip() + + upinfo = uplinkmatch.match(info) + if upinfo: + vmnic = upinfo.group(1).strip() + if vmnic and 'vusb0' not in vmnic: + return vswitch_name, vmnic + + return vswitch_name, vmnic + +def get_available_nics(): + nicinfo = subprocess.check_output(['localcli', 'network', 'nic', 'list']).decode('utf8').split('\n') + available_nics = {} + + # Skip headers and separators + parsing_started = False + for line in nicinfo: + if re.match(r'^-+', line): + parsing_started = True + continue + if not parsing_started or not line.strip(): + continue + + parts = re.split(r'\s{2,}', line.strip()) + if len(parts) >= 5: + nic_name = parts[0] + nic_status = parts[4] # "Link Status" este al 5-lea câmp + available_nics[nic_name] = nic_status + + return available_nics + +def is_esxi(): + return os.path.isdir("/etc/vmware") + +def fix_vswitch(): + if is_esxi(): + start_time = time.time() + while True: + + current_vswitch, current_vmnic = get_current_vmnic_vswitch() + if current_vswitch is None: + raise RuntimeError("Panic: current vswitch is None") + if current_vmnic is None: + raise RuntimeError("Panic: current vmnic is None") + + available_nics = get_available_nics() + + if current_vmnic and available_nics.get(current_vmnic) == 'Up': + break + + new_vmnic = next((nic for nic, status in available_nics.items() if status == 'Up'), None) + + if new_vmnic and new_vmnic != current_vmnic: + subprocess.check_call(['localcli', 'network', 'vswitch', 'standard', 'uplink', 'remove', + '--uplink-name', current_vmnic, '--vswitch-name', current_vswitch]) + subprocess.check_call(['localcli', 'network', 'vswitch', 'standard', 'uplink', 'add', + '--uplink-name', new_vmnic, '--vswitch-name', current_vswitch]) + elif not new_vmnic: + if time.time() - start_time > 300: + break + time.sleep(5) + + time.sleep(5) + if __name__ == '__main__': data = None + + if '-f' in sys.argv: + try: + fix_vswitch() + except Exception as e: + print(f"fix_vswitch() error: {e}") + sys.argv.remove('-f') usejson = False if '-j' in sys.argv: usejson = True @@ -468,6 +553,7 @@ if __name__ == '__main__': outf.write(chunk) chunk = reader.read(16384) sys.exit(0) + client = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly) if waitfor: status = 201 diff --git a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel index f88c730d..f9e02624 100644 --- a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel +++ b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel @@ -6,6 +6,7 @@ if [ ! -f /var/run/vmware/show-esx-shell-login ]; then chvt 2 /etc/init.d/ESXShell start fi +/opt/confluent/bin/apiclient -f uuid=$(vsish -e get /hardware/bios/dmiInfo|grep -A15 UUID|sed -e 's/.*://'|sed -e ':a;N;$!ba;s/\n//g' | sed -e 's/ *0x//g') uuid=${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12} kargs=$(vsish -e get /system/bootCmdLine|grep "command line:") From 249ed5d9be1c4cf283dfa86370957ed52ce0a630 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Mar 2025 11:19:38 -0400 Subject: [PATCH 073/413] Add script to try to sort out extra license material --- genesis/fetchlicenses | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 genesis/fetchlicenses diff --git a/genesis/fetchlicenses b/genesis/fetchlicenses new file mode 100644 index 00000000..0d36d89d --- /dev/null +++ b/genesis/fetchlicenses @@ -0,0 +1,44 @@ +#!/bin/bash +set -e +cd $(dirname $0) +mydir=$(pwd) +cd - >& /dev/null +wget https://www.apache.org/licenses/LICENSE-2.0 -O /usr/share/licenses/nss/LICENSE.APACHE +if [ ! -f /root/rpmbuild/BUILD/openssh-9.9p1/openbsd-compat/blowfish.c ]; then + echo "ERROR: openssh source must be extracted" + exit 1 +fi +head -n 37 /root/rpmbuild/BUILD/openssh-9.9p1/openbsd-compat/blowfish.c > /usr/share/licenses/openssh/COPYING.blowfish +wget https://www.gnu.org/licenses/old-licenses/gpl-2.0.txt -O /usr/share/licenses/bc/COPYING.GPLv2 +cp /root/rpmbuild/BUILD/bind-9.16.23/LICENSE /usr/share/licenses/bind-license/LICENSE || exit 1 +wget https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt -O /usr/share/licenses/procps-ng/COPYING.LIBv2.1 +cp /root/rpmbuild/BUILD/perl-5.32.1/dist/ExtUtils-CBuilder/LICENSE /usr/share/licenses/perl-libs/LICENSE || exit 1 +head -n 31 /root/rpmbuild/BUILD/Linux-PAM-1.5.1/conf/pam_conv1/pam_conv_y.h > /usr/share/licenses/pam/COPYING.bison || exit 1 +mkdir -p /usr/share/licenses/pcre2 +head -n 25 /root/rpmbuild/BUILD/pcre-8.44/sljit/sljitLir.h > /usr/share/licenses/pcre/LICENSE.BSD2 || exit 1 +head -n 25 /root/rpmbuild/BUILD/pcre2-10.40/src/sljit/sljitLir.h > /usr/share/licenses/pcre2/LICENSE.BSD2 + +cp -a $mydir/exlicenses/* /usr/share/licenses/ +mkdir -p /usr/share/licenses/perl +head -n 67 /root/rpmbuild/BUILD/perl-5.32.1/regexec.c | tail -n 25 > /usr/share/licenses/perl/COPYING.regexec +cp /root/rpmbuild/BUILD/lz4-1.9.3/LICENSE /usr/share/licenses/lz4/LICENSE +cp /usr/share/doc/lz4-libs/LICENSE /usr/share/licenses/lz4/LICENSE.BSD +cp /root/rpmbuild/BUILD/libgcrypt-1.10.0/README /usr/share/licenses/libgcrypt/README +cp /root/rpmbuild/BUILD/libgcrypt-1.10.0/LICENSES /usr/share/licenses/libgcrypt/LICENSES +mkdir -p /usr/share/licenses/sqlite +wget https://raw.githubusercontent.com/sqlite/sqlite/master/LICENSE.md -O /usr/share/licenses/sqlite/LICENSE.md +cd /root/rpmbuild/BUILD/tmux-3.2a +mkdir -p /usr/share/licenses/tmux +python3 ~/confluent/genesis/extracttmuxlicenses.py > /usr/share/licenses/tmux/NOTICE +cp COPYING README /usr/share/licenses/tmux/ +cd - +mkdir -p /usr/share/licenses/xfsprogs/ +cp /root/rpmbuild/BUILD/xfsprogs-6.4.0/LICENSES/* /usr/share/licenses/xfsprogs/ +mkdir -p /usr/share/licenses/kernel-extra/ +cp -a /root/rpmbuild/BUILD/kernel-*/linux-*/LICENSES/* /usr/share/licenses/kernel-extra/ +cp /usr/share/licenses/krb5-libs/LICENSE /usr/share/licenses/krb5-libs/NOTICE +mkdir -p /usr/share/licenses/libdb +cp /root/rpmbuild/BUILD/db-5.3.28/lang/sql/odbc/debian/copyright /usr/share/licenses/libdb/copyright +head -n 105 $(pwd)/util-linux-2.37.4/sys-utils/hwclock-parse-date.c|tail -n 34 > /usr/share/licenses/util-linux/COPYING.GPLv3 + + From 4677f2c806e1689e2d28347e24562160eae7895c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Mar 2025 11:20:58 -0400 Subject: [PATCH 074/413] Bump genesis version for next release --- genesis/confluent-genesis.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/confluent-genesis.spec b/genesis/confluent-genesis.spec index e652ed5e..4fd80bb2 100644 --- a/genesis/confluent-genesis.spec +++ b/genesis/confluent-genesis.spec @@ -1,5 +1,5 @@ %define arch x86_64 -Version: 3.12.0 +Version: 3.13.0 Release: 1 Name: confluent-genesis-%{arch} BuildArch: noarch From 72c030995fac99b6f9192f9b3f8216be5f19dcbf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 27 Mar 2025 09:50:19 -0400 Subject: [PATCH 075/413] Tolerate errors during register If a condition breaks unicast query, keep going. --- confluent_server/confluent/discovery/core.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 40a2d38c..0f4deca0 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -529,13 +529,17 @@ def register_remote_addrs(addresses, configmanager): nd = { 'addresses': [(addr, 443)] } - sd = ssdp.check_fish(('/DeviceDescription.json', nd)) - if not sd: + try: + sd = ssdp.check_fish(('/DeviceDescription.json', nd)) + if not sd: + return addr, False + + sd['hwaddr'] = sd['attributes']['mac-address'] + nh = xcc.NodeHandler(sd, configmanager) + nh.scan() + detected(nh.info) + except Exception: return addr, False - sd['hwaddr'] = sd['attributes']['mac-address'] - nh = xcc.NodeHandler(sd, configmanager) - nh.scan() - detected(nh.info) return addr, True rpool = eventlet.greenpool.GreenPool(512) for count in iterate_addrs(addresses, True): From ce5c1c925eb4e6e262b4c5dc83db4c3c7354cd87 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 27 Mar 2025 10:00:44 -0400 Subject: [PATCH 076/413] Adapt to register XCC3 Handle XCC3 differences in the register scenario. --- confluent_server/confluent/discovery/core.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 0f4deca0..790379b6 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -533,9 +533,14 @@ def register_remote_addrs(addresses, configmanager): sd = ssdp.check_fish(('/DeviceDescription.json', nd)) if not sd: return addr, False - - sd['hwaddr'] = sd['attributes']['mac-address'] - nh = xcc.NodeHandler(sd, configmanager) + if 'macaddress' in sd['attributes']: + sd['hwaddr'] = sd['attributes']['macaddress'] + else: + sd['hwaddr'] = sd['attributes']['mac-address'] + if 'lenovo-xcc3' in sd['services']: + nh = xcc3.NodeHandler(sd, configmanager) + elif 'lenovo-xcc' in sd['services']: + nh = xcc.NodeHandler(sd, configmanager) nh.scan() detected(nh.info) except Exception: From b596de93a0a06cb392152be6d24f69557ab7c858 Mon Sep 17 00:00:00 2001 From: Tinashe Date: Thu, 27 Mar 2025 11:09:25 -0400 Subject: [PATCH 077/413] seperate into oses --- .../default/scripts/sample/consoleredirect | 15 +++++++++++++++ .../profiles/hpc}/scripts/sample/consoleredirect | 14 ++------------ 2 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect rename confluent_osdeploy/{common/profile => suse15/profiles/hpc}/scripts/sample/consoleredirect (69%) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect b/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect new file mode 100644 index 00000000..60143ae5 --- /dev/null +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect @@ -0,0 +1,15 @@ +is_rhel=false + +if test -f /boot/efi/EFI/redhat/grub.cfg; then + grubcfg="/etc/default/grub" + is_rhel=true +else + echo "Expected File missing: Check if os redhat" + exit +fi + +# Working on Redhat +if $is_rhel; then + sed -i '/^GRUB_TERMINAL/s/serial //' $grubcfg + grub2-mkconfig -o /boot/grub2/grub.cfg +fi \ No newline at end of file diff --git a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect b/confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect similarity index 69% rename from confluent_osdeploy/common/profile/scripts/sample/consoleredirect rename to confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect index d761817e..270d24b7 100644 --- a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect @@ -1,15 +1,11 @@ is_suse=false -is_rhel=false -if test -f /boot/efi/EFI/redhat/grub.cfg; then - grubcfg="/etc/default/grub" - is_rhel=true -elif test -f /boot/efi/EFI/sle_hpc/grub.cfg; then +if test -f /boot/efi/EFI/sle_hpc/grub.cfg; then grubcfg="/boot/efi/EFI/sle_hpc/grub.cfg" grub2-mkconfig -o $grubcfg is_suse=true else - echo "Expected File missing: Check if os sle_hpc or redhat" + echo "Expected File missing: Check if os sle_hpc" exit fi @@ -41,9 +37,3 @@ if $is_suse; then done sed -i 's,^terminal,#terminal,' $grubcfg fi - -# Working on Redhat -if $is_rhel; then - sed -i '/^GRUB_TERMINAL/s/serial //' $grubcfg - grub2-mkconfig -o /boot/grub2/grub.cfg -fi \ No newline at end of file From fc0cc41b9058fe7263c33de30d3f7f6be57faabf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 27 Mar 2025 16:37:25 -0400 Subject: [PATCH 078/413] Commence work on NXAPI support --- confluent_server/confluent/networking/nxos.py | 230 ++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 confluent_server/confluent/networking/nxos.py diff --git a/confluent_server/confluent/networking/nxos.py b/confluent_server/confluent/networking/nxos.py new file mode 100644 index 00000000..4ed22d8f --- /dev/null +++ b/confluent_server/confluent/networking/nxos.py @@ -0,0 +1,230 @@ + +import pyghmi.util.webclient as webclient +import confluent.util as util +import time +from pprint import pprint + +_healthmap = { + 'normal': 'ok', + 'minor': 'warning', + 'major': 'critical', +} + +def add_sensedata(component, sensedata, name=None): + senseinfo = {} + if 'eqptSensor' in component: + attrs = component['eqptSensor']['attributes'] + senseinfo['name'] = attrs['descr'] + senseinfo['value'] = attrs['tempValue'] + units = attrs['unit'] + if units == 'Celsius': + units = '°C' + senseinfo['units'] = units + senseinfo['health'] = _healthmap.get(attrs['operSt'], attrs['operSt']) + elif 'eqptFtSlot' in component: + attrs = component['eqptFtSlot']['attributes'] + name = '{} {}'.format(attrs['descr'], attrs['physId']) + elif 'eqptFan' in component: + attrs = component['eqptFan']['attributes'] + if name: + senseinfo['name'] = '{}/{}'.format(name, attrs['id']) + else: + senseinfo['name'] = '{} {}'.format(attrs['descr'], attrs['id']) + senseinfo['value'] = attrs['speedInRpm'] + senseinfo['units'] = 'RPM' + senseinfo['health'] = attrs['operSt'] + elif 'eqptPsu' in component: + attrs = component['eqptPsu']['attributes'] + senseinfo['name'] = 'PSU {} Output Current'.format(attrs['id']) + senseinfo['value'] = attrs['drawnCurr'] + senseinfo['units'] = 'A' + sensedata.append(senseinfo) + senseinfo['name'] = 'PSU {} Input Current'.format(attrs['id']) + senseinfo['value'] = attrs['inputCurr'] + senseinfo['units'] = 'A' + sensedata.append(senseinfo) + senseinfo['name'] = 'PSU {} Output Voltage'.format(attrs['id']) + senseinfo['value'] = attrs['volt'] + senseinfo['units'] = 'V' + sensedata.append(senseinfo) + elif 'eqptPsuSlot' in component: + attrs = component['eqptPsuSlot']['attributes'] + senseinfo['name'] = 'PSU Slot {}'.format(attrs['physId']) + senseinfo['health'] = 'ok' + senseinfo['states'] = ['Present'] + if attrs['operSt'] == 'empty': + senseinfo['health'] = 'critical' + senseinfo['states'] = 'Absent' + sensedata.append(senseinfo) + if senseinfo: + sensedata.append(senseinfo) + for key in component: + if 'children' in component[key]: + for child in component[key]['children']: + add_sensedata(child, sensedata, name) + + + + + + +class NxApiClient: + def __init__(self, switch, user, password, configmanager): + self.cachedurls = {} + if configmanager: + cv = util.TLSCertVerifier( + configmanager, switch, 'pubkeys.tls_hardwaremanager' + ).verify_cert + else: + cv = lambda x: True + self.user = user + self.password = password + self.wc = webclient.SecureHTTPConnection(switch, port=443, verifycallback=cv) + self.login() + + def login(self): + payload = {'aaaUser': + {'attributes': + {'name': self.user, + 'pwd': self.password}}} + rsp = self.wc.grab_json_response_with_status('/api/mo/aaaLogin.json', payload) + if rsp[1] != 200: + raise Exception("Failed authenticating") + rsp = rsp[0] + self.authtoken = rsp['imdata'][0]['aaaLogin']['attributes']['token'] + self.wc.cookies['Apic-Cookie'] = self.authtoken + + def get_firmware(self): + firmdata = {} + for imdata in self.grab_imdata('/api/mo/sys/showversion.json'): + attrs = imdata['sysmgrShowVersion']['attributes'] + firmdata['NX-OS'] = {'version': attrs['nxosVersion'], 'date': attrs['nxosCompileTime']} + firmdata['BIOS'] = {'version': attrs['biosVersion'], 'date': attrs['biosCompileTime']} + return firmdata + + + + def get_serial(self): + for imdata in self.grab_imdata('/api/mo/sys/ch.json'): + for keyn in imdata: + currinfo = imdata[keyn] + model = currinfo.get('model', 'Unknown') + serial = currinfo.get('ser', 'Unknown') + modelname = currinfo.get('descr', 'Uknonwn') + + self.wc.grab_json_response_with_status('/api/mo/sys.json') + rsp['imdata'][0]['topSystem']['attributes'][serial] + + def get_sensors(self): + sensedata = [] + for imdata in self.grab_imdata('/api/mo/sys/ch.json?rsp-subtree=full'): + hwinfo = imdata['eqptCh']['children'] + for component in hwinfo: + add_sensedata(component, sensedata) + return sensedata + + def get_inventory(self): + invdata = [] + for imdata in self.grab_imdata('/api/mo/sys/ch.json?rsp-subtree=full'): + hwinfo = imdata['eqptCh'] + chattr = hwinfo['attributes'] + invinfo = {'name': 'System', 'present': True} + invinfo['information'] = { + 'Manufacturer': chattr['vendor'], + 'Serial Number': chattr['ser'], + 'Product name': chattr['descr'], + 'Model': chattr['model'], + 'Revision': chattr['rev'], + } + invdata.append(invinfo) + for comp in hwinfo['children']: + if 'eqptPsuSlot' in comp: + attrs = comp['eqptPsuSlot']['attributes'] + name = '{} {}'.format(attrs['descr'], attrs['id']) + if attrs['operSt'] == 'empty': + invinfo = {'name': name, 'present': False} + else: + invinfo = {'name': name, 'present': True} + psuinfo = comp['eqptPsuSlot']['children'][0]['eqptPsu']['attributes'] + invinfo['information'] = { + 'Manufacturer': psuinfo['vendor'], + 'Model': psuinfo['model'] + } + invdata.append(invinfo) + return invdata + + + + + + + def grab(self, url, cache=True, retry=True): + if cache is True: + cache = 1 + if cache: + if url in self.cachedurls: + if self.cachedurls[url][1] > time.monotonic() - cache: + return self.cachedurls[url][0] + rsp = self.wc.grab_json_response_with_status(url) + if rsp[1] == 403 and retry: + self.login() + return self.grab(url, cache, False) + if rsp[1] != 200: + raise Exception("Error making request") + self.cachedurls[url] = rsp[0], time.monotonic() + return rsp[0] + + def grab_imdata(self, url): + response = self.grab(url) + for imdata in response['imdata']: + yield imdata + + def get_mac_table(self): + macdict = {} + for macinfo in self.grab_imdata('/api/mo/sys/mac/table.json?rsp-subtree=full'): + mactable = macinfo['l2MacAddressTable']['children'] + for macent in mactable: + mace = macent['l2MacAddressEntry']['attributes'] + if mace['port'] in macdict: + macdict[mace['port']].append(mace['macAddress']) + else: + macdict[mace['port']] = [mace['macAddress']] + return macdict + + + def get_lldp(self): + lldpbyport = {} + for lldpimdata in self.grab_imdata('/api/mo/sys/lldp/inst.json?rsp-subtree=full'): + lldpdata = lldpimdata['lldpInst']['children'] + for lldpinfo in lldpdata: + if 'lldpIf' not in lldpinfo: + continue + port_id = lldpinfo['lldpIf']['attributes']['id'] + for child in lldpinfo['lldpIf'].get('children', []): + if 'lldpAdjEp' not in child: + continue + record = child['lldpAdjEp']['attributes'] + lldpinfo = { + 'verified': True, # over TLS + 'peerdescription': record['sysDesc'], + 'peername': record['sysName'], + 'peerchassisid': record['chassisIdV'], + 'peerportid': record['portIdV'], + 'portid': port_id, + 'port': port_id, + } + lldpbyport[port_id] = lldpinfo + return lldpbyport + + +if __name__ == '__main__': + import sys + import os + myuser = os.environ['SWITCHUSER'] + mypass = os.environ['SWITCHPASS'] + na = NxApiClient(sys.argv[1], myuser, mypass, None) + pprint(na.get_firmware()) + pprint(na.get_lldp()) + pprint(na.get_mac_table()) + pprint(na.get_inventory()) + pprint(na.get_sensors()) From 586261ddcae68a414710e2944d5885c5564e0e90 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 27 Mar 2025 16:42:28 -0400 Subject: [PATCH 079/413] Fix messed up PSU in nxos --- confluent_server/confluent/networking/nxos.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/networking/nxos.py b/confluent_server/confluent/networking/nxos.py index 4ed22d8f..c42a997c 100644 --- a/confluent_server/confluent/networking/nxos.py +++ b/confluent_server/confluent/networking/nxos.py @@ -39,14 +39,17 @@ def add_sensedata(component, sensedata, name=None): senseinfo['value'] = attrs['drawnCurr'] senseinfo['units'] = 'A' sensedata.append(senseinfo) + senseinfo = {} senseinfo['name'] = 'PSU {} Input Current'.format(attrs['id']) senseinfo['value'] = attrs['inputCurr'] senseinfo['units'] = 'A' sensedata.append(senseinfo) + senseinfo = {} senseinfo['name'] = 'PSU {} Output Voltage'.format(attrs['id']) senseinfo['value'] = attrs['volt'] senseinfo['units'] = 'V' sensedata.append(senseinfo) + senseinfo = {} elif 'eqptPsuSlot' in component: attrs = component['eqptPsuSlot']['attributes'] senseinfo['name'] = 'PSU Slot {}'.format(attrs['physId']) @@ -55,7 +58,6 @@ def add_sensedata(component, sensedata, name=None): if attrs['operSt'] == 'empty': senseinfo['health'] = 'critical' senseinfo['states'] = 'Absent' - sensedata.append(senseinfo) if senseinfo: sensedata.append(senseinfo) for key in component: From 659f87877dadc6a05ab6f872b119d13a246aae5d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Mar 2025 08:29:18 -0400 Subject: [PATCH 080/413] Rename the NX-API library --- confluent_server/confluent/networking/{nxos.py => nxapi.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename confluent_server/confluent/networking/{nxos.py => nxapi.py} (100%) diff --git a/confluent_server/confluent/networking/nxos.py b/confluent_server/confluent/networking/nxapi.py similarity index 100% rename from confluent_server/confluent/networking/nxos.py rename to confluent_server/confluent/networking/nxapi.py From 7419dbcf71f70eb874ff17dbba7774f3b001242a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Mar 2025 09:26:41 -0400 Subject: [PATCH 081/413] Fix unpatched imports of webclient --- confluent_server/confluent/networking/nxapi.py | 5 +++-- confluent_server/confluent/plugins/console/openbmc.py | 2 +- .../confluent/plugins/hardwaremanagement/cooltera.py | 2 +- .../confluent/plugins/hardwaremanagement/eatonpdu.py | 3 ++- .../confluent/plugins/hardwaremanagement/enlogic.py | 2 +- .../confluent/plugins/hardwaremanagement/geist.py | 3 ++- 6 files changed, 10 insertions(+), 7 deletions(-) diff --git a/confluent_server/confluent/networking/nxapi.py b/confluent_server/confluent/networking/nxapi.py index c42a997c..19e841dd 100644 --- a/confluent_server/confluent/networking/nxapi.py +++ b/confluent_server/confluent/networking/nxapi.py @@ -1,8 +1,8 @@ -import pyghmi.util.webclient as webclient import confluent.util as util import time -from pprint import pprint +import eventlet +webclient = eventlet.import_patched('pyghmi.util.webclient') _healthmap = { 'normal': 'ok', @@ -222,6 +222,7 @@ class NxApiClient: if __name__ == '__main__': import sys import os + from pprint import pprint myuser = os.environ['SWITCHUSER'] mypass = os.environ['SWITCHPASS'] na = NxApiClient(sys.argv[1], myuser, mypass, None) diff --git a/confluent_server/confluent/plugins/console/openbmc.py b/confluent_server/confluent/plugins/console/openbmc.py index e4b00bd3..56535c52 100644 --- a/confluent_server/confluent/plugins/console/openbmc.py +++ b/confluent_server/confluent/plugins/console/openbmc.py @@ -25,9 +25,9 @@ import confluent.log as log import confluent.util as util import pyghmi.exceptions as pygexc import pyghmi.redfish.command as rcmd -import pyghmi.util.webclient as webclient import eventlet import eventlet.green.ssl as ssl +webclient = eventlet.import_patched('pyghmi.util.webclient') try: websocket = eventlet.import_patched('websocket') wso = websocket.WebSocket diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py b/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py index c6e4b070..80265878 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py @@ -13,7 +13,6 @@ # limitations under the License. from xml.etree.ElementTree import fromstring as rfromstring -import pyghmi.util.webclient as wc import confluent.util as util import confluent.messages as msg import confluent.exceptions as exc @@ -21,6 +20,7 @@ import eventlet.green.time as time import eventlet.green.socket as socket import eventlet.greenpool as greenpool import eventlet +wc = eventlet.import_patched('pyghmi.util.webclient') try: import Cookie httplib = eventlet.import_patched('httplib') diff --git a/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py b/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py index 4c3d4654..5ef16311 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py @@ -18,13 +18,14 @@ import confluent.messages as msg import confluent.exceptions as exc import eventlet import eventlet.green.socket as socket -import pyghmi.util.webclient as wc +wc = eventlet.import_patched('pyghmi.util.webclient') import confluent.util as util import re import hashlib import json import time + def simplify_name(name): return name.lower().replace(' ', '_').replace('/', '-').replace( '_-_', '-') diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enlogic.py b/confluent_server/confluent/plugins/hardwaremanagement/enlogic.py index 196b79df..de9b13d5 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enlogic.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enlogic.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyghmi.util.webclient as wc import confluent.util as util import confluent.messages as msg import confluent.exceptions as exc import eventlet.green.time as time import eventlet import eventlet.greenpool as greenpool +wc = eventlet.import_patched('pyghmi.util.webclient') diff --git a/confluent_server/confluent/plugins/hardwaremanagement/geist.py b/confluent_server/confluent/plugins/hardwaremanagement/geist.py index 3f086115..f2e0418c 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/geist.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/geist.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pyghmi.util.webclient as wc + import confluent.util as util import confluent.messages as msg import confluent.exceptions as exc import eventlet.green.time as time import eventlet import eventlet.greenpool as greenpool +wc = eventlet.import_patched('pyghmi.util.webclient') From 2514507b87620924cfa08136d083383417acc859 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Mar 2025 13:34:03 -0400 Subject: [PATCH 082/413] Add node operations against Nexus switch This enables the commands to work that one would expect. --- .../confluent/networking/nxapi.py | 41 ++++---- .../plugins/hardwaremanagement/nxos.py | 95 +++++++++++++++++++ 2 files changed, 113 insertions(+), 23 deletions(-) create mode 100644 confluent_server/confluent/plugins/hardwaremanagement/nxos.py diff --git a/confluent_server/confluent/networking/nxapi.py b/confluent_server/confluent/networking/nxapi.py index 19e841dd..2db62a4d 100644 --- a/confluent_server/confluent/networking/nxapi.py +++ b/confluent_server/confluent/networking/nxapi.py @@ -20,7 +20,10 @@ def add_sensedata(component, sensedata, name=None): if units == 'Celsius': units = '°C' senseinfo['units'] = units - senseinfo['health'] = _healthmap.get(attrs['operSt'], attrs['operSt']) + senseinfo['health'] = _healthmap.get(attrs['operSt'], 'unknown') + if senseinfo['health'] == 'unknown': + print(senseinfo['health'] + ' not recognized') + senseinfo['health'] = 'critical' elif 'eqptFtSlot' in component: attrs = component['eqptFtSlot']['attributes'] name = '{} {}'.format(attrs['descr'], attrs['physId']) @@ -52,6 +55,8 @@ def add_sensedata(component, sensedata, name=None): senseinfo = {} elif 'eqptPsuSlot' in component: attrs = component['eqptPsuSlot']['attributes'] + senseinfo['value'] = None + senseinfo['units'] = None senseinfo['name'] = 'PSU Slot {}'.format(attrs['physId']) senseinfo['health'] = 'ok' senseinfo['states'] = ['Present'] @@ -66,10 +71,6 @@ def add_sensedata(component, sensedata, name=None): add_sensedata(child, sensedata, name) - - - - class NxApiClient: def __init__(self, switch, user, password, configmanager): self.cachedurls = {} @@ -104,19 +105,6 @@ class NxApiClient: firmdata['BIOS'] = {'version': attrs['biosVersion'], 'date': attrs['biosCompileTime']} return firmdata - - - def get_serial(self): - for imdata in self.grab_imdata('/api/mo/sys/ch.json'): - for keyn in imdata: - currinfo = imdata[keyn] - model = currinfo.get('model', 'Unknown') - serial = currinfo.get('ser', 'Unknown') - modelname = currinfo.get('descr', 'Uknonwn') - - self.wc.grab_json_response_with_status('/api/mo/sys.json') - rsp['imdata'][0]['topSystem']['attributes'][serial] - def get_sensors(self): sensedata = [] for imdata in self.grab_imdata('/api/mo/sys/ch.json?rsp-subtree=full'): @@ -125,6 +113,18 @@ class NxApiClient: add_sensedata(component, sensedata) return sensedata + def get_health(self): + healthdata = {'health': 'ok', 'sensors': []} + for sensor in self.get_sensors(): + currhealth = sensor.get('health', 'ok') + if currhealth != 'ok': + healthdata['sensors'].append(sensor) + if sensor['health'] == 'critical': + healthdata['health'] = 'critical' + elif sensor['health'] == 'warning' and healthdata['health'] != 'critical': + healthdata['health'] = 'warning' + return healthdata + def get_inventory(self): invdata = [] for imdata in self.grab_imdata('/api/mo/sys/ch.json?rsp-subtree=full'): @@ -155,11 +155,6 @@ class NxApiClient: invdata.append(invinfo) return invdata - - - - - def grab(self, url, cache=True, retry=True): if cache is True: cache = 1 diff --git a/confluent_server/confluent/plugins/hardwaremanagement/nxos.py b/confluent_server/confluent/plugins/hardwaremanagement/nxos.py new file mode 100644 index 00000000..df5779e8 --- /dev/null +++ b/confluent_server/confluent/plugins/hardwaremanagement/nxos.py @@ -0,0 +1,95 @@ +import confluent.networking.nxapi as nxapi +import eventlet +import eventlet.queue as queue +import eventlet.greenpool as greenpool +import confluent.messages as msg +import traceback + + +def retrieve_node(node, element, user, pwd, configmanager, inputdata, results): + try: + retrieve_node_backend(node, element, user, pwd, configmanager, inputdata, results) + except Exception as e: + print(traceback.format_exc()) + print(repr(e)) + +def simplify_name(name): + return name.lower().replace(' ', '_').replace('/', '-').replace( + '_-_', '-') + +def retrieve_node_backend(node, element, user, pwd, configmanager, inputdata, results): + cli = nxapi.NxApiClient(node, user, pwd, configmanager) + if element == ['power', 'state']: # client initted successfully, must be on + results.put(msg.PowerState(node, 'on')) + elif element == ['health', 'hardware']: + hinfo = cli.get_health() + results.put(msg.HealthSummary(hinfo.get('health', 'unknown'), name=node)) + results.put(msg.SensorReadings(hinfo.get('sensors', []), name=node)) + elif element[:3] == ['inventory', 'hardware', 'all']: + if len(element) == 3: + results.put(msg.ChildCollection('all')) + return + invinfo = cli.get_inventory() + if invinfo: + results.put(msg.KeyValueData({'inventory': invinfo}, node)) + elif element[:3] == ['inventory', 'firmware', 'all']: + if len(element) == 3: + results.put(msg.ChildCollection('all')) + return + fwinfo = [] + for fwnam, fwdat in cli.get_firmware().items(): + fwinfo.append({fwnam: fwdat}) + if fwinfo: + results.put(msg.Firmware(fwinfo, node)) + elif element == ['sensors', 'hardware', 'all']: + sensors = cli.get_sensors() + for sensor in sensors: + results.put(msg.ChildCollection(simplify_name(sensor['name']))) + elif element[:3] == ['sensors', 'hardware', 'all']: + sensors = cli.get_sensors() + for sensor in sensors: + if element[-1] == 'all' or simplify_name(sensor['name']) == element[-1]: + results.put(msg.SensorReadings([sensor], node)) + else: + print(repr(element)) + + +def retrieve(nodes, element, configmanager, inputdata): + results = queue.LightQueue() + workers = set([]) + creds = configmanager.get_node_attributes( + nodes, ['secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) + for node in nodes: + cred = creds.get(node, {}) + user = cred.get('secret.hardwaremanagementuser', {}).get('value') + pwd = cred.get('secret.hardwaremanagementpassword', {}).get('value') + try: + user = user.decode() + pwd = pwd.decode() + except Exception: + pass + if not user or not pwd: + yield msg.ConfluentTargetInvalidCredentials(node) + continue + workers.add(eventlet.spawn(retrieve_node, node, element, user, pwd, configmanager, inputdata, results)) + while workers: + try: + datum = results.get(block=True, timeout=10) + while datum: + if datum: + yield datum + datum = results.get_nowait() + except queue.Empty: + pass + eventlet.sleep(0.001) + for t in list(workers): + if t.dead: + workers.discard(t) + try: + while True: + datum = results.get_nowait() + if datum: + yield datum + except queue.Empty: + pass + From 92ac49b561bfa416fe20802a0b96ecde010b28a5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Mar 2025 17:28:42 -0400 Subject: [PATCH 083/413] Add NXAPI backend for mac table support. --- .../confluent/networking/macmap.py | 60 +++++++++++++++---- .../confluent/networking/nxapi.py | 6 ++ 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index cf6012c5..94e7bf5d 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -54,6 +54,7 @@ import confluent.exceptions as exc import confluent.log as log import confluent.messages as msg import confluent.noderange as noderange +import confluent.networking.nxapi as nxapi import confluent.util as util from eventlet.greenpool import GreenPool import eventlet.green.subprocess as subprocess @@ -151,19 +152,54 @@ def _nodelookup(switch, ifname): return _switchportmap[switch][portdesc] return None - -def _affluent_map_switch(args): +_fastbackends = {} +def _fast_map_switch(args): switch, password, user, cfgm = args + macdata = None + backend = _fastbackends.get(switch, None) kv = util.TLSCertVerifier(cfgm, switch, - 'pubkeys.tls_hardwaremanager').verify_cert - wc = webclient.SecureHTTPConnection( - switch, 443, verifycallback=kv, timeout=5) - wc.set_basic_credentials(user, password) - macs, retcode = wc.grab_json_response_with_status('/affluent/macs/by-port') - if retcode != 200: - raise Exception("No affluent detected") - _macsbyswitch[switch] = macs + 'pubkeys.tls_hardwaremanager').verify_cert + if not backend: + wc = webclient.SecureHTTPConnection( + switch, 443, verifycallback=kv, timeout=5) + wc.set_basic_credentials(user, password) + macdata, retcode = wc.grab_json_response_with_status('/affluent/macs/by-port') + if retcode == 200: + _fastbackends[switch] = 'affluent' + else: + apicheck, retcode = wc.grab_json_response_with_status('/api/') + if retcode == 400: + if apicheck.startswith(b'{"imdata":['): + _fastbackends[switch] = 'nxapi' + backend = _fastbackends.get(switch, None) + if backend == 'affluent': + return _affluent_map_switch(switch, password, user, cfgm, macdata) + elif backend == 'nxapi': + return _nxapi_map_switch(switch, password, user, cfgm) + raise Exception("No fast backend match") +def _nxapi_map_switch(switch, password, user, cfgm): + cli = nxapi.NxApiClient(switch, user, password, cfgm) + mt = cli.get_mac_table() + _macsbyswitch[switch] = mt + _fast_backend_fixup(mt, switch) + + + +def _affluent_map_switch(switch, password, user, cfgm, macs): + if not macs: + kv = util.TLSCertVerifier(cfgm, switch, + 'pubkeys.tls_hardwaremanager').verify_cert + wc = webclient.SecureHTTPConnection( + switch, 443, verifycallback=kv, timeout=5) + wc.set_basic_credentials(user, password) + macs, retcode = wc.grab_json_response_with_status('/affluent/macs/by-port') + if retcode != 200: + raise Exception("No affluent detected") + _macsbyswitch[switch] = macs + _fast_backend_fixup(macs, switch) + +def _fast_backend_fixup(macs, switch): for iface in macs: nummacs = len(macs[iface]) for mac in macs[iface]: @@ -267,13 +303,13 @@ def _map_switch_backend(args): user = None if switch not in noaffluent: try: - return _affluent_map_switch(args) + return _fast_map_switch(args) except exc.PubkeyInvalid: log.log({'error': 'While trying to gather ethernet mac addresses ' 'from {0}, the TLS certificate failed validation. ' 'Clear pubkeys.tls_hardwaremanager if this was ' 'expected due to reinstall or new certificate'.format(switch)}) - except Exception: + except Exception as e: pass mactobridge, ifnamemap, bridgetoifmap = _offload_map_switch( switch, password, user) diff --git a/confluent_server/confluent/networking/nxapi.py b/confluent_server/confluent/networking/nxapi.py index 2db62a4d..fe7827ed 100644 --- a/confluent_server/confluent/networking/nxapi.py +++ b/confluent_server/confluent/networking/nxapi.py @@ -82,6 +82,11 @@ class NxApiClient: cv = lambda x: True self.user = user self.password = password + try: + self.user = self.user.decode() + self.password = self.password.decode() + except Exception: + pass self.wc = webclient.SecureHTTPConnection(switch, port=443, verifycallback=cv) self.login() @@ -182,6 +187,7 @@ class NxApiClient: mactable = macinfo['l2MacAddressTable']['children'] for macent in mactable: mace = macent['l2MacAddressEntry']['attributes'] + mace['macAddress'] = mace['macAddress'].lower() if mace['port'] in macdict: macdict[mace['port']].append(mace['macAddress']) else: From df2c6a4e1846139bf6ed1a88965fefaca8c30188 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 28 Mar 2025 17:30:49 -0400 Subject: [PATCH 084/413] Fix states of absent PSU in NX-API --- confluent_server/confluent/networking/nxapi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/networking/nxapi.py b/confluent_server/confluent/networking/nxapi.py index fe7827ed..9978110c 100644 --- a/confluent_server/confluent/networking/nxapi.py +++ b/confluent_server/confluent/networking/nxapi.py @@ -62,7 +62,7 @@ def add_sensedata(component, sensedata, name=None): senseinfo['states'] = ['Present'] if attrs['operSt'] == 'empty': senseinfo['health'] = 'critical' - senseinfo['states'] = 'Absent' + senseinfo['states'] = ['Absent'] if senseinfo: sensedata.append(senseinfo) for key in component: From 48921c4ef01910d29e5146e2d583e6e46ae458db Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 31 Mar 2025 15:12:29 -0400 Subject: [PATCH 085/413] Quick scanner to do ssdp scan --- misc/ssdpscan.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 misc/ssdpscan.py diff --git a/misc/ssdpscan.py b/misc/ssdpscan.py new file mode 100644 index 00000000..c296f91c --- /dev/null +++ b/misc/ssdpscan.py @@ -0,0 +1,42 @@ + +from select import select +import socket +import sys +import socket + +def scan_nicname(nicname): + idx = int(open('/sys/class/net/{}/ifindex'.format(nicname)).read()) + return scan_nic(idx) + +def scan_nic(nicidx): + known_peers = {} + srvs = {} + s6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + s6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + s6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + s6.bind(('::', 0)) + msg = b'M-SEARCH * HTTP/1.1\r\nHOST: [ff02::c]:1900\r\nMAN: "ssdp:discover"\r\nST: urn:dmtf-org:service:redfish-rest:1\r\nMX: 3\r\n\r\n' + s6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, nicidx) + x = [False,] + tries=5 + s6.sendto(msg, ('ff02::c', 1900)) + x = select((s6,), (), (), 3.0) + while x[0]: + (rsp, peer) = s6.recvfrom(9000) + x = select((s6,), (), (), 0.5) + if peer in known_peers: + continue + known_peers[peer] = 1 + if '%' not in peer[0]: + peer = list(peer) + peer[0] = '{}%{}'.format(peer[0], nicidx) + print("Received Redfish response from {}".format(peer[0])) + + + +def main(): + scan_nicname(sys.argv[1]) + + +if __name__ == '__main__': + main() From b6653651788d93892ee88888c23cb5c0d9bdf09a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 08:18:51 -0400 Subject: [PATCH 086/413] Start with esxi7 contents as base for esxi9 --- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 26beb74f..7eafc23f 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -77,12 +77,14 @@ cd .. cp -a esxi7out esxi6out cp -a esxi7 esxi6 cp -a esxi7out esxi8out +cp -a esxi7out esxi9out cp -a esxi7 esxi8 +cp -a esxi7 esxi9 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do +for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 esxi9 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/profiles cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs From c8ed877fda583541534d049ed126dd5584601a96 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 14:05:46 -0400 Subject: [PATCH 087/413] Make clearer api grant errors --- .../scripts/casper-bottom/99confluent | 8 ++++ confluent_osdeploy/utils/clortho.c | 2 +- confluent_server/confluent/networking/lldp.py | 48 ++++++++++++++++++- .../confluent/networking/macmap.py | 16 +------ 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent index d629cf32..90a7fd56 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent @@ -28,7 +28,15 @@ if [ -e /tmp/cnflnthmackeytmp ]; then DEVICE=$(cat /tmp/autodetectnic) IP=done else + APIKEY= chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey + APIKEY=$(cat /root/custom-installation/confluent.apikey) + while [ -z "$APIKEY" ]; do + echo "Failure trying to get confluent node token registered, check nodedeploy status, retrying in 5 seconds..." + sleep 5 + chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey + APIKEY=$(cat /root/custom-installation/confluent.apikey) + done MGR=[$MGR] nic=$(grep ^MANAGER /custom-installation/confluent/confluent.info|grep fe80::|sed -e s/.*%//|head -n 1) nic=$(ip link |grep ^$nic:|awk '{print $2}') diff --git a/confluent_osdeploy/utils/clortho.c b/confluent_osdeploy/utils/clortho.c index 887deee2..6d6789d8 100644 --- a/confluent_osdeploy/utils/clortho.c +++ b/confluent_osdeploy/utils/clortho.c @@ -246,6 +246,6 @@ int main(int argc, char* argv[]) { buffer[0] = 255; ret = read(sock, buffer, 2); } - fprintf(stderr, "Password was not accepted\n"); + fprintf(stderr, "Confluent API token grant denied by server\n"); exit(1); } diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index e181d46f..ad556d55 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -34,6 +34,7 @@ if __name__ == '__main__': import sys import confluent.config.configmanager as cfm import base64 +import confluent.networking.nxapi as nxapi import confluent.exceptions as exc import confluent.log as log import confluent.messages as msg @@ -174,11 +175,54 @@ def _init_lldp(data, iname, idx, idxtoportid, switch): data[iname] = {'port': iname, 'portid': str(idxtoportid[idx]), 'chassisid': _chassisidbyswitch[switch]} -def _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata): +_fastbackends = {} +def detect_backend(switch, verifier) + backend = _fastbackends.get(switch, None) + if backend: + return backend + wc = webclient.SecureHTTPConnection( + switch, 443, verifycallback=verifier, timeout=5) + wc.set_basic_credentials(user, password) + apicheck, retcode = wc.grab_json_response_with_status('/affluent/') + if retcode == 401 and apicheck == b'{}': + _fastbackends[switch] = 'affluent' + else: + apicheck, retcode = wc.grab_json_response_with_status('/api/') + if retcode == 400 and apicheck.startswith(b'{"imdata":['): + _fastbackends[switch] = 'nxapi' + return _fastbackends.get(switch, None) + +def _extract_neighbor_data_https(switch, user, password, cfm, lldpdata): kv = util.TLSCertVerifier(cfm, switch, 'pubkeys.tls_hardwaremanager').verify_cert + backend = detect_backend(switch, kv) + if not backend: + raise Exception("No HTTPS backend identified") wc = webclient.SecureHTTPConnection( switch, 443, verifycallback=kv, timeout=5) + if backend == 'affluent': + return _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata, wc) + elif backend == 'nxapi': + return _nxapi_map_switch(switch, password, user, cfgm) + + + +def _extract_neighbor_data_nxapi(switch, user, password, cfm, lldpdata, wc): + cli = nxapi.NxApiClient(switch, user, password, cfm) + lldipinfo = cli.get_lldp() + for port in lldpinfo: + portdata = lldpinfo[port] + peerid = '{0}.{1}'.format( + portdata.get('peerchassisid', '').replace(':', '-').replace('/', '-'), + portdata.get('peerportid', '').replace(':', '-').replace('/', '-'), + ) + _extract_extended_desc(portdata, portdata['peerdescription'], True) + + + mt = cli.get_mac_table() + _macsbyswitch[switch] = mt + _fast_backend_fixup(mt, switch) +def _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata, wc): wc.set_basic_credentials(user, password) neighdata = wc.grab_json_response('/affluent/lldp/all') chassisid = neighdata['chassis']['id'] @@ -219,7 +263,7 @@ def _extract_neighbor_data_b(args): return lldpdata = {'!!vintage': now} try: - return _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata) + return _extract_neighbor_data_https(switch, user, password, cfm, lldpdata) except Exception: pass conn = snmp.Session(switch, password, user) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index 94e7bf5d..32f4d52d 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -152,26 +152,12 @@ def _nodelookup(switch, ifname): return _switchportmap[switch][portdesc] return None -_fastbackends = {} def _fast_map_switch(args): switch, password, user, cfgm = args macdata = None - backend = _fastbackends.get(switch, None) kv = util.TLSCertVerifier(cfgm, switch, 'pubkeys.tls_hardwaremanager').verify_cert - if not backend: - wc = webclient.SecureHTTPConnection( - switch, 443, verifycallback=kv, timeout=5) - wc.set_basic_credentials(user, password) - macdata, retcode = wc.grab_json_response_with_status('/affluent/macs/by-port') - if retcode == 200: - _fastbackends[switch] = 'affluent' - else: - apicheck, retcode = wc.grab_json_response_with_status('/api/') - if retcode == 400: - if apicheck.startswith(b'{"imdata":['): - _fastbackends[switch] = 'nxapi' - backend = _fastbackends.get(switch, None) + backend = lldp.detect_backend(switch, kv) if backend == 'affluent': return _affluent_map_switch(switch, password, user, cfgm, macdata) elif backend == 'nxapi': From e5f588d2b7cbaf93922409863c7e90316b30b528 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 14:24:59 -0400 Subject: [PATCH 088/413] Fixup work to add nxapi for neighbor api backend --- confluent_server/confluent/networking/lldp.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index ad556d55..6a969759 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -176,7 +176,7 @@ def _init_lldp(data, iname, idx, idxtoportid, switch): 'chassisid': _chassisidbyswitch[switch]} _fastbackends = {} -def detect_backend(switch, verifier) +def detect_backend(switch, verifier): backend = _fastbackends.get(switch, None) if backend: return backend @@ -217,11 +217,10 @@ def _extract_neighbor_data_nxapi(switch, user, password, cfm, lldpdata, wc): portdata.get('peerportid', '').replace(':', '-').replace('/', '-'), ) _extract_extended_desc(portdata, portdata['peerdescription'], True) + _neighbypeerid[peerid] = portdata + lldpdata[port] = portdata + _neighdata[switch] = lldpdata - - mt = cli.get_mac_table() - _macsbyswitch[switch] = mt - _fast_backend_fixup(mt, switch) def _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata, wc): wc.set_basic_credentials(user, password) neighdata = wc.grab_json_response('/affluent/lldp/all') @@ -248,7 +247,7 @@ def _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata, wc): _extract_extended_desc(portdata, portdata['peerdescription'], True) _neighbypeerid[peerid] = portdata lldpdata[localport] = portdata - neighdata[switch] = lldpdata + _neighdata[switch] = lldpdata def _extract_neighbor_data_b(args): From 85b19acf5fbe8d75c7d7d00bf093da9db06c77e3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 15:22:36 -0400 Subject: [PATCH 089/413] Fix NXAPI neighbor table API backend --- confluent_server/confluent/networking/lldp.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index 6a969759..5f17ab80 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -182,14 +182,13 @@ def detect_backend(switch, verifier): return backend wc = webclient.SecureHTTPConnection( switch, 443, verifycallback=verifier, timeout=5) - wc.set_basic_credentials(user, password) apicheck, retcode = wc.grab_json_response_with_status('/affluent/') - if retcode == 401 and apicheck == b'{}': + if retcode == 401 and apicheck.startswith(b'{}'): _fastbackends[switch] = 'affluent' else: apicheck, retcode = wc.grab_json_response_with_status('/api/') if retcode == 400 and apicheck.startswith(b'{"imdata":['): - _fastbackends[switch] = 'nxapi' + _fastbackends[switch] = 'nxapi' return _fastbackends.get(switch, None) def _extract_neighbor_data_https(switch, user, password, cfm, lldpdata): @@ -203,19 +202,20 @@ def _extract_neighbor_data_https(switch, user, password, cfm, lldpdata): if backend == 'affluent': return _extract_neighbor_data_affluent(switch, user, password, cfm, lldpdata, wc) elif backend == 'nxapi': - return _nxapi_map_switch(switch, password, user, cfgm) + return _extract_neighbor_data_nxapi(switch, user, password, cfm, lldpdata, wc) def _extract_neighbor_data_nxapi(switch, user, password, cfm, lldpdata, wc): cli = nxapi.NxApiClient(switch, user, password, cfm) - lldipinfo = cli.get_lldp() + lldpinfo = cli.get_lldp() for port in lldpinfo: portdata = lldpinfo[port] peerid = '{0}.{1}'.format( portdata.get('peerchassisid', '').replace(':', '-').replace('/', '-'), portdata.get('peerportid', '').replace(':', '-').replace('/', '-'), ) + portdata['peerid'] = peerid _extract_extended_desc(portdata, portdata['peerdescription'], True) _neighbypeerid[peerid] = portdata lldpdata[port] = portdata @@ -263,7 +263,7 @@ def _extract_neighbor_data_b(args): lldpdata = {'!!vintage': now} try: return _extract_neighbor_data_https(switch, user, password, cfm, lldpdata) - except Exception: + except Exception as e: pass conn = snmp.Session(switch, password, user) sid = None From 98add92a203acdd8a40575eeceb642d98e31ec20 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 15:26:37 -0400 Subject: [PATCH 090/413] Correct the path to the api key during ubuntu installation --- .../ubuntu22.04/initramfs/scripts/casper-bottom/99confluent | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent index 90a7fd56..d2ccb5db 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/casper-bottom/99confluent @@ -30,12 +30,12 @@ if [ -e /tmp/cnflnthmackeytmp ]; then else APIKEY= chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey - APIKEY=$(cat /root/custom-installation/confluent.apikey) + APIKEY=$(cat /root/custom-installation/confluent/confluent.apikey) while [ -z "$APIKEY" ]; do echo "Failure trying to get confluent node token registered, check nodedeploy status, retrying in 5 seconds..." sleep 5 chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey - APIKEY=$(cat /root/custom-installation/confluent.apikey) + APIKEY=$(cat /root/custom-installation/confluent/confluent.apikey) done MGR=[$MGR] nic=$(grep ^MANAGER /custom-installation/confluent/confluent.info|grep fe80::|sed -e s/.*%//|head -n 1) From df6818a3cc5732cec905af1a26fc518aadf6bacb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Apr 2025 15:59:02 -0400 Subject: [PATCH 091/413] Fix refactoring of detect_backend to lldp module --- confluent_server/confluent/networking/macmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index 32f4d52d..1bff9e5f 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -43,7 +43,7 @@ if __name__ == '__main__': import confluent.snmputil as snmp -from confluent.networking.lldp import _handle_neighbor_query, get_fingerprint +from confluent.networking.lldp import detect_backend, _handle_neighbor_query, get_fingerprint from confluent.networking.netutil import get_switchcreds, list_switches, get_portnamemap import eventlet.green.select as select @@ -157,7 +157,7 @@ def _fast_map_switch(args): macdata = None kv = util.TLSCertVerifier(cfgm, switch, 'pubkeys.tls_hardwaremanager').verify_cert - backend = lldp.detect_backend(switch, kv) + backend = detect_backend(switch, kv) if backend == 'affluent': return _affluent_map_switch(switch, password, user, cfgm, macdata) elif backend == 'nxapi': From b21d8b75e07d66149c1f6c9abcaf3805ae6e097c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 2 Apr 2025 09:50:15 -0400 Subject: [PATCH 092/413] Incorporate block device into retry loop Have block devices checked for identity information in a loop with network source search. Block devices may be delayed for various reasons. The previous method could be bypassed by fast block device cutting off slow device enumeration. It also incurred a delay for the network install case. --- .../initramfs/scripts/init-premount/confluent | 123 +++++++++-------- .../initramfs/scripts/init-premount/confluent | 124 +++++++++--------- 2 files changed, 121 insertions(+), 126 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index a974f04d..964869d7 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -6,77 +6,74 @@ done mkdir -p /custom-installation cp -a /opt/confluent /custom-installation touch /custom-installation/confluent/confluent.info -TRIES=5 -while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do - sleep 2 - TRIES=$((TRIES - 1)) -done -if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then - tmnt=/tmp/idntmnt - mkdir -p /tmp/identdata/ - mkdir -p $tmnt - tcfg=/tmp/idnttmp - mount /dev/disk/by-label/CNFLNT_IDNT $tmnt - cp -a $tmnt/* /tmp/identdata/ - cd $tmnt - deploysrvs=$(sed -n '/^deploy_servers:/,/^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) - sed -n '/^net_cfgs:/,/^[^- ]/{/^[^- ]/!p}' cnflnt.yml |sed -n '/^-/,/^-/{/^-/!p}'| sed -e 's/^[- ]*//'> $tcfg - autoconfigmethod=$(grep ^ipv4_method: $tcfg) - autoconfigmethod=${autoconfigmethod#ipv4_method: } - . /scripts/functions - if [ "$autoconfigmethod" = "static" ]; then - MYIP=$(grep ^ipv4_address: $tcfg | awk '{print $2}'|sed -e s'!/.*!!') - v4addr=$(grep ^ipv4_address: $tcfg|cut -d: -f 2|sed -e 's/ //') - MYGW=$(grep ^ipv4_gateway: $tcfg | awk '{print $2}') - if [ "$MYGW" = "null" ]; then - MYGW="" - fi - MYNM=$(grep ^ipv4_netmask: $tcfg | awk '{print $2}') - NIC="" - while [ -z "$NIC" ]; do - for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do - ip addr add dev $NICGUESS $v4addr - if [ ! -z "$MYGW" ]; then - ip route add default via $MYGW - fi - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - NIC=$NICGUESS +while ! grep NODENAME /custom-installation/confluent/confluent.info; do + if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then + tmnt=/tmp/idntmnt + mkdir -p /tmp/identdata/ + mkdir -p $tmnt + tcfg=/tmp/idnttmp + mount /dev/disk/by-label/CNFLNT_IDNT $tmnt + cp -a $tmnt/* /tmp/identdata/ + cd $tmnt + deploysrvs=$(sed -n '/^deploy_servers:/,/^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) + sed -n '/^net_cfgs:/,/^[^- ]/{/^[^- ]/!p}' cnflnt.yml |sed -n '/^-/,/^-/{/^-/!p}'| sed -e 's/^[- ]*//'> $tcfg + autoconfigmethod=$(grep ^ipv4_method: $tcfg) + autoconfigmethod=${autoconfigmethod#ipv4_method: } + . /scripts/functions + if [ "$autoconfigmethod" = "static" ]; then + MYIP=$(grep ^ipv4_address: $tcfg | awk '{print $2}'|sed -e s'!/.*!!') + v4addr=$(grep ^ipv4_address: $tcfg|cut -d: -f 2|sed -e 's/ //') + MYGW=$(grep ^ipv4_gateway: $tcfg | awk '{print $2}') + if [ "$MYGW" = "null" ]; then + MYGW="" + fi + MYNM=$(grep ^ipv4_netmask: $tcfg | awk '{print $2}') + NIC="" + while [ -z "$NIC" ]; do + for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do + ip addr add dev $NICGUESS $v4addr + if [ ! -z "$MYGW" ]; then + ip route add default via $MYGW + fi + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv + NIC=$NICGUESS + break + fi + done + if [ -z "$NIC" ]; then + ip -4 a flush dev $NICGUESS + else break fi done - if [ -z "$NIC" ]; then - ip -4 a flush dev $NICGUESS - else + done + ipconfig -d $MYIP::$MYGW:$MYNM::$NIC + echo $NIC > /tmp/autodetectnic + else + configure_networking + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv break fi done - done - ipconfig -d $MYIP::$MYGW:$MYNM::$NIC - echo $NIC > /tmp/autodetectnic + fi + MGR=$deploysrvs + NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') + echo "NODENAME: $NODENAME" >> /custom-installation/confluent/confluent.info + echo "MANAGER: $MGR" >> /custom-installation/confluent/confluent.info + echo "EXTMGRINFO: $MGR||1" >> /custom-installation/confluent/confluent.info + hmackeyfile=/tmp/cnflnthmackeytmp + echo -n $(grep ^apitoken: cnflnt.yml|awk '{print $2}') > $hmackeyfile + cd - + umount $tmnt else - configure_networking - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - break - fi - done - fi - MGR=$deploysrvs - NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') - echo "NODENAME: $NODENAME" >> /custom-installation/confluent/confluent.info - echo "MANAGER: $MGR" >> /custom-installation/confluent/confluent.info - echo "EXTMGRINFO: $MGR||1" >> /custom-installation/confluent/confluent.info - hmackeyfile=/tmp/cnflnthmackeytmp - echo -n $(grep ^apitoken: cnflnt.yml|awk '{print $2}') > $hmackeyfile - cd - - umount $tmnt -else - while ! grep NODENAME /custom-installation/confluent/confluent.info; do /opt/confluent/bin/copernicus -t > /custom-installation/confluent/confluent.info - done + fi +done +if [ -z "$MGR" ]; then MGR="[$(grep MANAGER: /custom-installation/confluent/confluent.info | head -n 1 | awk '{print $2}')]" fi osprofile=$(sed -e 's/.*osprofile=//' -e 's/ .*//' /proc/cmdline) diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 725560de..995fb086 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -6,77 +6,75 @@ done mkdir -p /custom-installation cp -a /opt/confluent /custom-installation touch /custom-installation/confluent/confluent.info -TRIES=5 -while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do - sleep 2 - TRIES=$((TRIES - 1)) -done -if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then - tmnt=/tmp/idntmnt - mkdir -p /tmp/identdata/ - mkdir -p $tmnt - tcfg=/tmp/idnttmp - mount /dev/disk/by-label/CNFLNT_IDNT $tmnt - cp -a $tmnt/* /tmp/identdata/ - cd $tmnt - deploysrvs=$(sed -n '/^deploy_servers:/,/^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) - sed -n '/^net_cfgs:/,/^[^- ]/{/^[^- ]/!p}' cnflnt.yml |sed -n '/^-/,/^-/{/^-/!p}'| sed -e 's/^[- ]*//'> $tcfg - autoconfigmethod=$(grep ^ipv4_method: $tcfg) - autoconfigmethod=${autoconfigmethod#ipv4_method: } - . /scripts/functions - if [ "$autoconfigmethod" = "static" ]; then - MYIP=$(grep ^ipv4_address: $tcfg | awk '{print $2}'|sed -e s'!/.*!!') - v4addr=$(grep ^ipv4_address: $tcfg|cut -d: -f 2|sed -e 's/ //') - MYGW=$(grep ^ipv4_gateway: $tcfg | awk '{print $2}') - if [ "$MYGW" = "null" ]; then - MYGW="" - fi - MYNM=$(grep ^ipv4_netmask: $tcfg | awk '{print $2}') - NIC="" - while [ -z "$NIC" ]; do - for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do - ip addr add dev $NICGUESS $v4addr - if [ ! -z "$MYGW" ]; then - ip route add default via $MYGW - fi - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - NIC=$NICGUESS +MGR="" +while ! grep NODENAME /custom-installation/confluent/confluent.info; do + if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then + tmnt=/tmp/idntmnt + mkdir -p /tmp/identdata/ + mkdir -p $tmnt + tcfg=/tmp/idnttmp + mount /dev/disk/by-label/CNFLNT_IDNT $tmnt + cp -a $tmnt/* /tmp/identdata/ + cd $tmnt + deploysrvs=$(sed -n '/^deploy_servers:/,/^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) + sed -n '/^net_cfgs:/,/^[^- ]/{/^[^- ]/!p}' cnflnt.yml |sed -n '/^-/,/^-/{/^-/!p}'| sed -e 's/^[- ]*//'> $tcfg + autoconfigmethod=$(grep ^ipv4_method: $tcfg) + autoconfigmethod=${autoconfigmethod#ipv4_method: } + . /scripts/functions + if [ "$autoconfigmethod" = "static" ]; then + MYIP=$(grep ^ipv4_address: $tcfg | awk '{print $2}'|sed -e s'!/.*!!') + v4addr=$(grep ^ipv4_address: $tcfg|cut -d: -f 2|sed -e 's/ //') + MYGW=$(grep ^ipv4_gateway: $tcfg | awk '{print $2}') + if [ "$MYGW" = "null" ]; then + MYGW="" + fi + MYNM=$(grep ^ipv4_netmask: $tcfg | awk '{print $2}') + NIC="" + while [ -z "$NIC" ]; do + for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do + ip addr add dev $NICGUESS $v4addr + if [ ! -z "$MYGW" ]; then + ip route add default via $MYGW + fi + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv + NIC=$NICGUESS + break + fi + done + if [ -z "$NIC" ]; then + ip -4 a flush dev $NICGUESS + else break fi done - if [ -z "$NIC" ]; then - ip -4 a flush dev $NICGUESS - else + done + ipconfig -d $MYIP::$MYGW:$MYNM::$NIC + echo $NIC > /tmp/autodetectnic + else + configure_networking + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv break fi done - done - ipconfig -d $MYIP::$MYGW:$MYNM::$NIC - echo $NIC > /tmp/autodetectnic + fi + MGR=$deploysrvs + NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') + echo "NODENAME: $NODENAME" >> /custom-installation/confluent/confluent.info + echo "MANAGER: $MGR" >> /custom-installation/confluent/confluent.info + echo "EXTMGRINFO: $MGR||1" >> /custom-installation/confluent/confluent.info + hmackeyfile=/tmp/cnflnthmackeytmp + echo -n $(grep ^apitoken: cnflnt.yml|awk '{print $2}') > $hmackeyfile + cd - + umount $tmnt else - configure_networking - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - break - fi - done - fi - MGR=$deploysrvs - NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') - echo "NODENAME: $NODENAME" >> /custom-installation/confluent/confluent.info - echo "MANAGER: $MGR" >> /custom-installation/confluent/confluent.info - echo "EXTMGRINFO: $MGR||1" >> /custom-installation/confluent/confluent.info - hmackeyfile=/tmp/cnflnthmackeytmp - echo -n $(grep ^apitoken: cnflnt.yml|awk '{print $2}') > $hmackeyfile - cd - - umount $tmnt -else - while ! grep NODENAME /custom-installation/confluent/confluent.info; do /opt/confluent/bin/copernicus -t > /custom-installation/confluent/confluent.info - done + fi +done +if [ -z "$MGR" ]; then MGR="[$(grep MANAGER: /custom-installation/confluent/confluent.info | head -n 1 | awk '{print $2}')]" fi osprofile=$(sed -e 's/.*osprofile=//' -e 's/ .*//' /proc/cmdline) From b1ba1720b98bab0bbcd204a33c8c59759108e481 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 2 Apr 2025 11:10:33 -0400 Subject: [PATCH 093/413] Suppress scary message from apiclient when asked to just do -f. --- confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index 7e78a5b8..61eddf8b 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -499,6 +499,7 @@ if __name__ == '__main__': except Exception as e: print(f"fix_vswitch() error: {e}") sys.argv.remove('-f') + sys.exit(0) usejson = False if '-j' in sys.argv: usejson = True From 1fa2baacb78a9733ee5cf7a649f2b0c99a5821d0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 13:06:37 -0400 Subject: [PATCH 094/413] Support debian style lib layout --- genesis/97genesis/install-base | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index 40a734e8..731c3fb1 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -1,6 +1,11 @@ #!/bin/sh +if [ -d /usr/lib/x86_64-linux-gnu ]; then # Debian style + IMPLIBDIR=lib/x86_64-linux-gnu +else + IMPLIBDIR=lib64 +fi dracut_install mktemp -dracut_install /lib64/libtss2-tcti-device.so.0 +dracut_install /$IMPLIBDIR/libtss2-tcti-device.so.0 dracut_install tpm2_create tpm2_pcrread tpm2_createpolicy tpm2_createprimary dracut_install tpm2_load tpm2_unseal tpm2_getcap tpm2_evictcontrol dracut_install tpm2_pcrextend tpm2_policypcr tpm2_flushcontext tpm2_startauthsession @@ -11,12 +16,12 @@ dracut_install ssh sshd vi reboot lspci parted tmux mkfs mkfs.ext4 mkfs.xfs xfs_ dracut_install /usr/libexec/openssh/sftp-server dracut_install efibootmgr dracut_install du df ssh-keygen scp clear dhclient lldpd lldpcli tee -dracut_install /lib64/libnss_dns.so.2 /lib64/libnss_dns.so.2 /lib64/libnss_myhostname.so.2 -dracut_install ldd uptime /usr/lib64/libnl-3.so.200 +dracut_install /$IMPLIBDIR/libnss_dns.so.2 /$IMPLIBDIR/libnss_dns.so.2 /$IMPLIBDIR/libnss_myhostname.so.2 +dracut_install ldd uptime /usr/$IMPLIBDIR/libnl-3.so.200 dracut_install poweroff date /etc/nsswitch.conf /etc/services /etc/protocols dracut_install /usr/share/terminfo/x/xterm /usr/share/terminfo/l/linux /usr/share/terminfo/v/vt100 /usr/share/terminfo/x/xterm-color /usr/share/terminfo/s/screen /usr/share/terminfo/x/xterm-256color /usr/share/terminfo/p/putty-256color /usr/share/terminfo/p/putty /usr/share/terminfo/d/dumb dracut_install chmod whoami head tail basename /etc/redhat-release ping tr /usr/share/hwdata/usb.ids -dracut_install dmidecode /usr/lib64/libstdc++.so.6 +dracut_install dmidecode /usr/$IMPLIBDIR/libstdc++.so.6 dracut_install ps free find inst /bin/bash /bin/sh #inst_hook cmdline 10 $moddir/cmdline.sh # moved to addons.cpio @@ -28,7 +33,7 @@ dracut_install /usr/lib/udev/rules.d/10-dm.rules /usr/sbin/dmsetup /usr/lib/udev #dracut_install opainfo #dracut_install /usr/lib/opa-fm/bin/opafmd #dracut_install /usr/sbin/opensm /usr/libexec/opensm-launch -dracut_install /usr/lib64/libibverbs/libhfi1verbs-rdmav34.so /etc/libibverbs.d/hfi1verbs.driver /etc/libibverbs.d/mlx4.driver /etc/libibverbs.d/mlx5.driver /usr/lib64/libibverbs/libmlx4-rdmav34.so /usr/lib64/libibverbs/libmlx5-rdmav34.so +dracut_install /usr/$IMPLIBDIR/libibverbs/libhfi1verbs-rdmav34.so /etc/libibverbs.d/hfi1verbs.driver /etc/libibverbs.d/mlx4.driver /etc/libibverbs.d/mlx5.driver /usr/$IMPLIBDIR/libibverbs/libmlx4-rdmav34.so /usr/$IMPLIBDIR/libibverbs/libmlx5-rdmav34.so if [ -x /usr/libexec/openssh/sshd-session ]; then dracut_install /usr/libexec/openssh/sshd-session fi From 53760ac576d7f5a00578373796e81ca88d9f7121 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 13:11:56 -0400 Subject: [PATCH 095/413] More changes to support debian genesis host --- genesis/97genesis/install-base | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index 731c3fb1..1ac2cfa5 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -13,23 +13,35 @@ dracut_install openssl tar ipmitool cpio xz gzip lsmod ethtool dracut_install modprobe touch echo cut wc bash uniq grep ip hostname dracut_install awk egrep dirname bc expr sort dracut_install ssh sshd vi reboot lspci parted tmux mkfs mkfs.ext4 mkfs.xfs xfs_db mkswap -dracut_install /usr/libexec/openssh/sftp-server +if [ -x /usr/libexec/openssh/sftp-server ]; then + dracut_install /usr/libexec/openssh/sftp-server +else + dracut_install /usr/lib/sftp-server +fi dracut_install efibootmgr dracut_install du df ssh-keygen scp clear dhclient lldpd lldpcli tee dracut_install /$IMPLIBDIR/libnss_dns.so.2 /$IMPLIBDIR/libnss_dns.so.2 /$IMPLIBDIR/libnss_myhostname.so.2 dracut_install ldd uptime /usr/$IMPLIBDIR/libnl-3.so.200 dracut_install poweroff date /etc/nsswitch.conf /etc/services /etc/protocols dracut_install /usr/share/terminfo/x/xterm /usr/share/terminfo/l/linux /usr/share/terminfo/v/vt100 /usr/share/terminfo/x/xterm-color /usr/share/terminfo/s/screen /usr/share/terminfo/x/xterm-256color /usr/share/terminfo/p/putty-256color /usr/share/terminfo/p/putty /usr/share/terminfo/d/dumb -dracut_install chmod whoami head tail basename /etc/redhat-release ping tr /usr/share/hwdata/usb.ids +dracut_install chmod whoami head tail basename ping tr /usr/share/hwdata/usb.ids +if [ -e /etc/redhat-release ]; then + dracut_install /etc/redhat_release +fi dracut_install dmidecode /usr/$IMPLIBDIR/libstdc++.so.6 dracut_install ps free find inst /bin/bash /bin/sh #inst_hook cmdline 10 $moddir/cmdline.sh # moved to addons.cpio dracut_install killall chown chroot dd expr kill parted rsync shutdown sort blockdev findfs insmod lvm -dracut_install /etc/udev/hwdb.bin +if [ -e /etc/udev/hwdb.bin ]; then + dracut_install /etc/udev/hwdb.bin +else + dracut_install /usr/lib/udev/hwdb.bin +fi + dracut_install /usr/share/hwdata/pci.ids dracut_install ibstat ibstatus -dracut_install /usr/lib/udev/rules.d/10-dm.rules /usr/sbin/dmsetup /usr/lib/udev/rules.d/95-dm-notify.rules +dracut_install /usr/lib/udev/rules.d/*-dm.rules /usr/sbin/dmsetup /usr/lib/udev/rules.d/95-dm-notify.rules #dracut_install opainfo #dracut_install /usr/lib/opa-fm/bin/opafmd #dracut_install /usr/sbin/opensm /usr/libexec/opensm-launch From 5f7a5b18bffd7d2b90d21e9f2e7696f43b3b0fbf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 14:28:25 -0400 Subject: [PATCH 096/413] Add Sway to genesis install assets --- genesis/97genesis/install-gui | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 genesis/97genesis/install-gui diff --git a/genesis/97genesis/install-gui b/genesis/97genesis/install-gui new file mode 100644 index 00000000..344686cb --- /dev/null +++ b/genesis/97genesis/install-gui @@ -0,0 +1,10 @@ +dracut_install /usr/bin/sway /usr/bin/foot +dracut_install swaynag swaymsg +cp -a /etc/sway $initdir/etc/sway +cp -a /usr/share/X11 $initdir/usr/share/X11 +cp -a /usr/share/fonts $initdir/usr/share/fonts +dracut_install /usr/share/glvnd/egl_vendor.d/50_mesa.json +dracut_install seatd seatd-launch +if [ -e /usr/lib/sysusers.d/seatd.conf ]; then + dracut_install /usr/lib/sysusers.d/seatd.conf +fi From 9980414160e88041c864e4d08303fd60b0d9ef6c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 14:30:25 -0400 Subject: [PATCH 097/413] Hook gui in genesis build if detected --- genesis/97genesis/module-setup.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/genesis/97genesis/module-setup.sh b/genesis/97genesis/module-setup.sh index d8cf1d76..46260d97 100644 --- a/genesis/97genesis/module-setup.sh +++ b/genesis/97genesis/module-setup.sh @@ -6,7 +6,9 @@ check() { } install() { . $moddir/install-base - #. $moddir/install-gui + if [ -x /usr/bin/sway ]; then + . $moddir/install-gui + fi if [ -d /usr/lib64/python3.13/ ]; then . $moddir/install-python313 From 65760bb678ee0a6c29157321b89494e14bce58e8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 14:49:24 -0400 Subject: [PATCH 098/413] Break locale to a separate file --- genesis/97genesis/install-locale | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 genesis/97genesis/install-locale diff --git a/genesis/97genesis/install-locale b/genesis/97genesis/install-locale new file mode 100644 index 00000000..d4efd712 --- /dev/null +++ b/genesis/97genesis/install-locale @@ -0,0 +1,14 @@ +dracut_install /usr/lib/locale/en_US.utf8/LC_ADDRESS +dracut_install /usr/lib/locale/en_US.utf8/LC_COLLATE +dracut_install /usr/lib/locale/en_US.utf8/LC_CTYPE +dracut_install /usr/lib/locale/en_US.utf8/LC_IDENTIFICATION +dracut_install /usr/lib/locale/en_US.utf8/LC_MEASUREMENT +dracut_install /usr/lib/locale/en_US.utf8/LC_MESSAGES +dracut_install /usr/lib/locale/en_US.utf8/LC_MESSAGES/SYS_LC_MESSAGES +dracut_install /usr/lib/locale/en_US.utf8/LC_MONETARY +dracut_install /usr/lib/locale/en_US.utf8/LC_NAME +dracut_install /usr/lib/locale/en_US.utf8/LC_NUMERIC +dracut_install /usr/lib/locale/en_US.utf8/LC_PAPER +dracut_install /usr/lib/locale/en_US.utf8/LC_TELEPHONE +dracut_install /usr/lib/locale/en_US.utf8/LC_TIME +dracut_install /usr/share/locale/locale.alias From bf03d8dc82dbf529c08a6b84a978f513309705fa Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 14:51:58 -0400 Subject: [PATCH 099/413] Pull locale file into genesis build --- genesis/97genesis/module-setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/genesis/97genesis/module-setup.sh b/genesis/97genesis/module-setup.sh index 46260d97..afba1fca 100644 --- a/genesis/97genesis/module-setup.sh +++ b/genesis/97genesis/module-setup.sh @@ -10,6 +10,7 @@ install() { . $moddir/install-gui fi + . $moddir/install-locale if [ -d /usr/lib64/python3.13/ ]; then . $moddir/install-python313 elif [ -d /usr/lib64/python3.9/ ]; then From 799fff10ffdefc8bca0ed78484fd42a4d1ed85a9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 3 Apr 2025 14:58:09 -0400 Subject: [PATCH 100/413] Handle different locale layouts --- genesis/97genesis/install-locale | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/genesis/97genesis/install-locale b/genesis/97genesis/install-locale index d4efd712..21b216c9 100644 --- a/genesis/97genesis/install-locale +++ b/genesis/97genesis/install-locale @@ -1,3 +1,7 @@ +if [ -e /usr/lib/locale/locale-archive ]; then + dracut_install /usr/lib/locale/locale-archive +fi +if [ -d /usr/lib/locale/en_US.utf8 ]; then dracut_install /usr/lib/locale/en_US.utf8/LC_ADDRESS dracut_install /usr/lib/locale/en_US.utf8/LC_COLLATE dracut_install /usr/lib/locale/en_US.utf8/LC_CTYPE @@ -12,3 +16,4 @@ dracut_install /usr/lib/locale/en_US.utf8/LC_PAPER dracut_install /usr/lib/locale/en_US.utf8/LC_TELEPHONE dracut_install /usr/lib/locale/en_US.utf8/LC_TIME dracut_install /usr/share/locale/locale.alias +fi From 71ddbb88fc6d68b8ffa3fcd9025f1ce382f938e7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 09:25:19 -0400 Subject: [PATCH 101/413] If doing GUI in Genesis, defer until after udevd udev is needed to run in some scenarios for seatd/sway to function correctly. --- .../initramfs/opt/confluent/bin/rungenesis | 18 +++++++++--------- .../usr/lib/dracut/hooks/cmdline/10-genesis.sh | 4 +++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index 88b8d39e..107d48fb 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -1,18 +1,11 @@ #!/bin/bash +mkdir -p /usr/libexec /run/sshd if [ ! -x /usr/libexec/platform-python ]; then ln -s /usr/bin/python3 /usr/libexec/platform-python fi export LANG=en_US.utf8 -if [ -x /usr/bin/seatd-launch -a -x /usr/bin/sway ]; then - export XDG_RUNTIME_DIR=/run/users/0 - mkdir -p $XDG_RUNTIME_DIR - sed -i '/^output /d' /etc/sway/config - echo 'exec foot -t xterm -T Terminal tmux a' > /etc/sway/config.d/genesis - (while :; do seatd-launch sway <> /dev/tty1 >& /dev/null; done) & -else - (while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & -fi +(while :; do TERM=linux tmux a <> /dev/tty1 >&0 2>&1; done) & @@ -41,6 +34,13 @@ modprobe ib_umad modprobe hfi1 modprobe mlx5_ib echo "done" +if [ -x /usr/bin/seatd-launch -a -x /usr/bin/sway ]; then + export XDG_RUNTIME_DIR=/run/users/0 + mkdir -p $XDG_RUNTIME_DIR + sed -i '/^output /d' /etc/sway/config + echo 'exec foot -t xterm -T Terminal tmux a' > /etc/sway/config.d/genesis + (while :; do seatd-launch sway <> /dev/tty1 >& /dev/null; done) & +fi cat > /etc/ssh/sshd_config << EOF Port 22 Port 3389 diff --git a/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh b/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh index 6f25d910..c658fce8 100644 --- a/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh +++ b/confluent_osdeploy/genesis/initramfs/usr/lib/dracut/hooks/cmdline/10-genesis.sh @@ -2,6 +2,8 @@ root=1 rootok=1 netroot=genesis clear +mount -t cgroup2 cgroup2 /sys/fs/cgroup +mount -t efivarfs efivarfs /sys/firmware/efi/efivars echo PS1="'"'[genesis running on \H \w]$ '"'" >> ~/.bashrc echo PS1="'"'[genesis running on \H \w]$ '"'" >> ~/.bash_profile mkdir -p /etc/ssh @@ -10,7 +12,7 @@ mkdir -p /var/empty/sshd sed -i '/^root:/d' /etc/passwd echo root:x:0:0::/:/bin/bash >> /etc/passwd echo sshd:x:30:30:SSH User:/var/empty/sshd:/sbin/nologin >> /etc/passwd -tmux new-session -d sh /opt/confluent/bin/rungenesis +tmux new-session -d bash /opt/confluent/bin/rungenesis while :; do sleep 86400 done From b3b852a9e18b6ba79a99ccd6028929a9dcabb1ad Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 10:39:35 -0400 Subject: [PATCH 102/413] Add Ubuntu GUI considerations for genesis --- genesis/97genesis/install-gui | 18 ++- genesis/97genesis/install-python312 | 194 ++++++++++++++++++++++++++++ genesis/97genesis/installkernel | 4 + genesis/97genesis/module-setup.sh | 2 + 4 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 genesis/97genesis/install-python312 diff --git a/genesis/97genesis/install-gui b/genesis/97genesis/install-gui index 344686cb..1f55c446 100644 --- a/genesis/97genesis/install-gui +++ b/genesis/97genesis/install-gui @@ -5,6 +5,20 @@ cp -a /usr/share/X11 $initdir/usr/share/X11 cp -a /usr/share/fonts $initdir/usr/share/fonts dracut_install /usr/share/glvnd/egl_vendor.d/50_mesa.json dracut_install seatd seatd-launch -if [ -e /usr/lib/sysusers.d/seatd.conf ]; then - dracut_install /usr/lib/sysusers.d/seatd.conf +if [ -e /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0 ]; then + dracut_install /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0 +fi +if [ -e /usr/lib/sysusers.d/seatd.conf ]; then + dracut_install /usr/lib/sysusers.d/seatd.conf +fi +if grep Ubuntu /etc/os-release > /dev/null; then + dracut_install /usr/share/libinput/* /etc/fonts/fonts.conf /etc/fonts/conf.d/* /usr/bin/libinput /usr/libexec/libinput/* /usr/bin/lsof + dracut_install /usr/lib/udev/hwdb.d/60-input-id.hwdb + dracut_install /usr/lib/udev/libinput-fuzz-to-zero + dracut_install /usr/lib/udev/libinput-fuzz-extract + dracut_install /usr/lib/udev/libinput-device-group + dracut_install /usr/lib/udev/rules.d/60-input-id.rules + dracut_install /usr/lib/udev/rules.d/90-libinput-fuzz-override.rules + dracut_install /usr/lib/udev/rules.d/80-libinput-device-groups.rules + dracut_install /usr/lib/udev/rules.d/60-persistent-input.rules fi diff --git a/genesis/97genesis/install-python312 b/genesis/97genesis/install-python312 new file mode 100644 index 00000000..d2b171f5 --- /dev/null +++ b/genesis/97genesis/install-python312 @@ -0,0 +1,194 @@ +dracut_install /usr/bin/python3 +dracut_install /etc/ld.so.cache +dracut_install /etc/localtime +dracut_install /lib/x86_64-linux-gnu/libc.so.6 +dracut_install /lib/x86_64-linux-gnu/libcrypto.so.3 +dracut_install /lib/x86_64-linux-gnu/libexpat.so.1 +dracut_install /lib/x86_64-linux-gnu/libffi.so.8 +dracut_install /lib/x86_64-linux-gnu/libm.so.6 +dracut_install /lib/x86_64-linux-gnu/libssl.so.3 +dracut_install /lib/x86_64-linux-gnu/libz.so.1 +dracut_install /usr/lib/locale/C.utf8/LC_CTYPE +dracut_install /usr/lib/locale/locale-archive +dracut_install /usr/lib/python3.12/__pycache__/_weakrefset.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/argparse.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/base64.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/bisect.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/calendar.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/configparser.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/contextlib.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/copyreg.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/datetime.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/enum.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/functools.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/gettext.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/ipaddress.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/keyword.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/locale.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/operator.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/quopri.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/random.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/reprlib.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/selectors.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/shlex.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/signal.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/sitecustomize.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/socket.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/ssl.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/string.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/struct.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/subprocess.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/threading.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/types.cpython-312.pyc +dracut_install /usr/lib/python3.12/__pycache__/warnings.cpython-312.pyc +dracut_install /usr/lib/python3.12/collections/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/collections/__pycache__/abc.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/_endian.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/_encoded_words.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/_parseaddr.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/_policybase.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/base64mime.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/charset.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/encoders.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/errors.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/feedparser.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/header.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/iterators.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/message.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/parser.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/quoprimime.cpython-312.pyc +dracut_install /usr/lib/python3.12/email/__pycache__/utils.cpython-312.pyc +dracut_install /usr/lib/python3.12/encodings/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/encodings/__pycache__/aliases.cpython-312.pyc +dracut_install /usr/lib/python3.12/encodings/__pycache__/utf_8.cpython-312.pyc +dracut_install /usr/lib/python3.12/http/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/http/__pycache__/client.cpython-312.pyc +dracut_install /usr/lib/python3.12/http/__pycache__/cookies.cpython-312.pyc +dracut_install /usr/lib/python3.12/importlib +dracut_install /usr/lib/python3.12/importlib/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/importlib/__pycache__/_abc.cpython-312.pyc +dracut_install /usr/lib/python3.12/lib-dynload +dracut_install /usr/lib/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-gnu.so +dracut_install /usr/lib/python3.12/lib-dynload/_ssl.cpython-312-x86_64-linux-gnu.so +dracut_install /usr/lib/python3.12/re/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/re/__pycache__/_casefix.cpython-312.pyc +dracut_install /usr/lib/python3.12/re/__pycache__/_compiler.cpython-312.pyc +dracut_install /usr/lib/python3.12/re/__pycache__/_constants.cpython-312.pyc +dracut_install /usr/lib/python3.12/re/__pycache__/_parser.cpython-312.pyc +dracut_install /usr/lib/python3.12/urllib/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/urllib/__pycache__/parse.cpython-312.pyc +dracut_install /usr/lib/python3/dist-packages/__pycache__/apport_python_hook.cpython-312.pyc +dracut_install /usr/lib/python3/dist-packages/_distutils_hack/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3/dist-packages/distutils-precedence.pth +dracut_install /usr/lib/python3/dist-packages/zope.interface-6.1-nspkg.pth +dracut_install /usr/lib/ssl/openssl.cnf +dracut_install /usr/lib/x86_64-linux-gnu/gconv/gconv-modules.cache +dracut_install /usr/local/lib/python3.12/dist-packages +dracut_install /usr/share/locale/locale.alias +dracut_install /usr/lib/python3.12/os.py /usr/lib/python3.12/encodings/__init__.py +dracut_install /usr/bin/python3 +dracut_install /usr/lib/python3.12/_weakrefset.py +dracut_install /usr/lib/python3.12/argparse.py +dracut_install /usr/lib/python3.12/base64.py +dracut_install /usr/lib/python3.12/bisect.py +dracut_install /usr/lib/python3.12/calendar.py +dracut_install /usr/lib/python3.12/collections +dracut_install /usr/lib/python3.12/collections/__init__.py +dracut_install /usr/lib/python3.12/collections/abc.py +dracut_install /usr/lib/python3.12/configparser.py +dracut_install /usr/lib/python3.12/contextlib.py +dracut_install /usr/lib/python3.12/copyreg.py +dracut_install /usr/lib/python3.12/ctypes +dracut_install /usr/lib/python3.12/ctypes/__init__.py +dracut_install /usr/lib/python3.12/ctypes/_endian.py +dracut_install /usr/lib/python3.12/datetime.py +dracut_install /usr/lib/python3.12/email +dracut_install /usr/lib/python3.12/email/__init__.py +dracut_install /usr/lib/python3.12/email/_encoded_words.py +dracut_install /usr/lib/python3.12/email/_parseaddr.py +dracut_install /usr/lib/python3.12/email/_policybase.py +dracut_install /usr/lib/python3.12/email/base64mime.py +dracut_install /usr/lib/python3.12/email/charset.py +dracut_install /usr/lib/python3.12/email/encoders.py +dracut_install /usr/lib/python3.12/email/errors.py +dracut_install /usr/lib/python3.12/email/feedparser.py +dracut_install /usr/lib/python3.12/email/header.py +dracut_install /usr/lib/python3.12/email/iterators.py +dracut_install /usr/lib/python3.12/email/message.py +dracut_install /usr/lib/python3.12/email/parser.py +dracut_install /usr/lib/python3.12/email/quoprimime.py +dracut_install /usr/lib/python3.12/email/utils.py +dracut_install /usr/lib/python3.12/encodings +dracut_install /usr/lib/python3.12/encodings/__init__.py +dracut_install /usr/lib/python3.12/encodings/aliases.py +dracut_install /usr/lib/python3.12/encodings/utf_8.py +dracut_install /usr/lib/python3.12/enum.py +dracut_install /usr/lib/python3.12/functools.py +dracut_install /usr/lib/python3.12/gettext.py +dracut_install /usr/lib/python3.12/http +dracut_install /usr/lib/python3.12/http/__init__.py +dracut_install /usr/lib/python3.12/http/client.py +dracut_install /usr/lib/python3.12/http/cookies.py +dracut_install /usr/lib/python3.12/importlib +dracut_install /usr/lib/python3.12/importlib/__init__.py +dracut_install /usr/lib/python3.12/importlib/_abc.py +dracut_install /usr/lib/python3.12/ipaddress.py +dracut_install /usr/lib/python3.12/keyword.py +dracut_install /usr/lib/python3.12/lib-dynload +dracut_install /usr/lib/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-gnu.so +dracut_install /usr/lib/python3.12/lib-dynload/_ssl.cpython-312-x86_64-linux-gnu.so +dracut_install /usr/lib/python3.12/locale.py +dracut_install /usr/lib/python3.12/operator.py +dracut_install /usr/lib/python3.12/os.py +dracut_install /usr/lib/python3.12/quopri.py +dracut_install /usr/lib/python3.12/random.py +dracut_install /usr/lib/python3.12/re +dracut_install /usr/lib/python3.12/re/__init__.py +dracut_install /usr/lib/python3.12/re/_casefix.py +dracut_install /usr/lib/python3.12/re/_compiler.py +dracut_install /usr/lib/python3.12/re/_constants.py +dracut_install /usr/lib/python3.12/re/_parser.py +dracut_install /usr/lib/python3.12/reprlib.py +dracut_install /usr/lib/python3.12/selectors.py +dracut_install /usr/lib/python3.12/shlex.py +dracut_install /usr/lib/python3.12/signal.py +dracut_install /usr/lib/python3.12/sitecustomize.py +dracut_install /usr/lib/python3.12/socket.py +dracut_install /usr/lib/python3.12/ssl.py +dracut_install /usr/lib/python3.12/string.py +dracut_install /usr/lib/python3.12/struct.py +dracut_install /usr/lib/python3.12/subprocess.py +dracut_install /usr/lib/python3.12/threading.py +dracut_install /usr/lib/python3.12/types.py +dracut_install /usr/lib/python3.12/urllib +dracut_install /usr/lib/python3.12/urllib/__init__.py +dracut_install /usr/lib/python3.12/urllib/parse.py +dracut_install /usr/lib/python3.12/warnings.py +dracut_install /usr/lib/python3/dist-packages +dracut_install /usr/lib/python3/dist-packages/_distutils_hack/__init__.py +dracut_install /usr/lib/python3/dist-packages/apport_python_hook.py +dracut_install /usr/lib/python3/dist-packages/distutils-precedence.pth +dracut_install /usr/lib/python3/dist-packages/zope.interface-6.1-nspkg.pth +dracut_install /usr/lib/python3/dist-packages/zope/__init__.py +dracut_install /usr/lib/python3.12/ctypes/wintypes.py +dracut_install /usr/lib/python3.12/ctypes/__pycache__/wintypes.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/_aix.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/_endian.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/__init__.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/__pycache__/util.cpython-312.pyc +dracut_install /usr/lib/python3.12/ctypes/_aix.py +dracut_install /usr/lib/python3.12/ctypes/__init__.py +dracut_install /usr/lib/python3.12/ctypes/_endian.py +dracut_install /usr/lib/python3.12/ctypes/util.py +dracut_install /usr/lib/python3.12/shutil.py +dracut_install /usr/lib/python3.12/fnmatch.py /usr/lib/python3.12/tempfile.py /usr/lib/python3.12/_weakrefset.py /usr/lib/python3.12/weakref.py /usr/lib/python3.12/glob.py +dracut_install /usr/lib/python3.12/json/__init__.py +dracut_install /usr/lib/python3.12/json/__pycache__ +dracut_install /usr/lib/python3.12/json/decoder.py +dracut_install /usr/lib/python3.12/json/encoder.py +dracut_install /usr/lib/python3.12/json/scanner.py +dracut_install /usr/lib/python3.12/json/tool.py +dracut_install /usr/lib/python3.12/lib-dynload/_json.cpython-312-x86_64-linux-gnu.so +dracut_install /usr/lib/python3.12/encodings/idna.py /usr/lib/python3.12/stringprep.py diff --git a/genesis/97genesis/installkernel b/genesis/97genesis/installkernel index 570e8c15..2d58a290 100644 --- a/genesis/97genesis/installkernel +++ b/genesis/97genesis/installkernel @@ -1,4 +1,8 @@ #!/bin/sh +if grep Ubuntu /etc/os-release > /dev/null; then # must include specific drivers + instmods hid usbhid hid_generic xhci_pci xhci_pci_renesas + instmods virtio_gpu ast bochs dmi_sysfs +fi instmods virtio_net instmods e1000 e1000e igb sfc mlx5_ib mlx5_core mlx4_en cxgb3 cxgb4 tg3 bnx2 bnx2x bna ixgb ixgbe qlge mptsas mpt2sas mpt3sas megaraid_sas ahci xhci-hcd sd_mod pmcraid be2net vfat ext3 ext4 usb_storage scsi_wait_scan ipmi_si ipmi_devintf qlcnic xfs instmods nvme diff --git a/genesis/97genesis/module-setup.sh b/genesis/97genesis/module-setup.sh index afba1fca..600b92db 100644 --- a/genesis/97genesis/module-setup.sh +++ b/genesis/97genesis/module-setup.sh @@ -13,6 +13,8 @@ install() { . $moddir/install-locale if [ -d /usr/lib64/python3.13/ ]; then . $moddir/install-python313 + if [ -d /usr/lib/python3.12/ ]; then + . $moddir/install-python312 elif [ -d /usr/lib64/python3.9/ ]; then . $moddir/install-python39 From 9cc3c96f6a1a5e3988e04b81a76b3d52912afd59 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 10:40:07 -0400 Subject: [PATCH 103/413] Fetch fingerprint before credentials Some implementations choose to close the certificate command after granting user/password. Make sure we get the certificate first. --- misc/prepfish.py | 1 + 1 file changed, 1 insertion(+) diff --git a/misc/prepfish.py b/misc/prepfish.py index 5c6ece7a..921108f1 100644 --- a/misc/prepfish.py +++ b/misc/prepfish.py @@ -285,6 +285,7 @@ def store_redfish_cert(bmc): certout.write(peercert) def main(): + get_redfish_fingerprint() bmcuser, bmcpass = get_redfish_creds() bmc = enable_host_interface() store_redfish_cert(bmc) From 0e3543c4aa7fda712cfe1d151a509d059e888c12 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 10:51:29 -0400 Subject: [PATCH 104/413] Fix elif clause in module-setup --- genesis/97genesis/module-setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/97genesis/module-setup.sh b/genesis/97genesis/module-setup.sh index 600b92db..d81990fb 100644 --- a/genesis/97genesis/module-setup.sh +++ b/genesis/97genesis/module-setup.sh @@ -13,7 +13,7 @@ install() { . $moddir/install-locale if [ -d /usr/lib64/python3.13/ ]; then . $moddir/install-python313 - if [ -d /usr/lib/python3.12/ ]; then + elif [ -d /usr/lib/python3.12/ ]; then . $moddir/install-python312 elif [ -d /usr/lib64/python3.9/ ]; then . $moddir/install-python39 From 672bc73756faf3300125c1381db5936b2c89a428 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 14:10:40 -0400 Subject: [PATCH 105/413] Fix for potential hangs on race condition with task exit --- .../plugins/hardwaremanagement/enclosure.py | 9 ++++++--- .../confluent/plugins/hardwaremanagement/pdu.py | 16 +++++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py index a59422c0..c835c852 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py @@ -59,9 +59,12 @@ def update(nodes, element, configmanager, inputdata): for encmgr in baysbyencmgr: gp.spawn_n(reseat_bays, encmgr, baysbyencmgr[encmgr], configmanager, rspq) while gp.running(): - nrsp = rspq.get() - if nrsp is not None: - yield nrsp + try: + nrsp = rspq.get(timeout=0.1) + if nrsp is not None: + yield nrsp + except queue.Empty: + continue while not rspq.empty(): nrsp = rspq.get() if nrsp is not None: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py index 3db21636..d3c0f049 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py @@ -53,9 +53,12 @@ def retrieve(nodes, element, configmanager, inputdata): for pdu in relpdus: gp.spawn(readpdu, pdu, relpdus[pdu], configmanager, rspq) while gp.running(): - nrsp = rspq.get() - if not isinstance(nrsp, TaskDone): + try: + nrsp = rspq.get(timeout=0.1) + if nrsp is not None and not isinstance(nrsp, TaskDone): yield nrsp + except queue.Empty: + continue while not rspq.empty(): nrsp = rspq.get() if not isinstance(nrsp, TaskDone): @@ -115,9 +118,12 @@ def update(nodes, element, configmanager, inputdata): for pdu in relpdus: gp.spawn(updatepdu, pdu, relpdus[pdu], configmanager, inputdata, rspq) while gp.running(): - nrsp = rspq.get() - if not isinstance(nrsp, TaskDone): - yield nrsp + try: + nrsp = rspq.get(timeout=0.1) + if nrsp is not None and not isinstance(nrsp, TaskDone): + yield nrsp + except queue.Empty: + continue while not rspq.empty(): nrsp = rspq.get() if not isinstance(nrsp, TaskDone): From 5d60a6a427f3ad9512b160a26d904b76bd028522 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 8 Apr 2025 14:53:36 -0400 Subject: [PATCH 106/413] Fix indentation in pdu module --- confluent_server/confluent/plugins/hardwaremanagement/pdu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py index d3c0f049..1da24daa 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py @@ -55,8 +55,8 @@ def retrieve(nodes, element, configmanager, inputdata): while gp.running(): try: nrsp = rspq.get(timeout=0.1) - if nrsp is not None and not isinstance(nrsp, TaskDone): - yield nrsp + if nrsp is not None and not isinstance(nrsp, TaskDone): + yield nrsp except queue.Empty: continue while not rspq.empty(): From 699efd2f4f5cfb9166fbe42fed9c06f04d7f39d9 Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Wed, 9 Apr 2025 02:37:07 +0200 Subject: [PATCH 107/413] Show valid values from attributes.py in man pages --- confluent_client/addattribs.py | 17 +++++++++++------ confluent_server/confluent/config/attributes.py | 8 +++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/confluent_client/addattribs.py b/confluent_client/addattribs.py index 0deece66..db9e4acb 100644 --- a/confluent_client/addattribs.py +++ b/confluent_client/addattribs.py @@ -14,11 +14,16 @@ import shutil shutil.copyfile('doc/man/nodeattrib.ronn.tmpl', 'doc/man/nodeattrib.ronn') shutil.copyfile('doc/man/nodegroupattrib.ronn.tmpl', 'doc/man/nodegroupattrib.ronn') -with open('doc/man/nodeattrib.ronn', 'a') as outf: - for field in sorted(attr.node): - outf.write('\n* `{0}`:\n {1}\n'.format(field, attr.node[field]['description'])) -with open('doc/man/nodegroupattrib.ronn', 'a') as outf: - for field in sorted(attr.node): - outf.write('\n* `{0}`:\n {1}\n'.format(field, attr.node[field]['description'])) +def append_attributes(filename): + with open(filename, 'a') as outf: + for field in sorted(attr.node): + outf.write('\n* `{0}`:\n {1}\n'.format(field, attr.node[field]['description'])) + # Optionally write valid values if they exist + for key, values in attr.node[field].items(): + if key.startswith('valid'): + values_formatted = ', '.join("'{0}'".format(v) for v in values) + outf.write(f'\n Valid values: {values_formatted}\n') +append_attributes('doc/man/nodeattrib.ronn') +append_attributes('doc/man/nodegroupattrib.ronn') diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 4f5ed01b..a6ce0e96 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -265,8 +265,7 @@ node = { }, 'discovery.policy': { 'description': 'Policy to use for auto-configuration of discovered ' - 'and identified nodes. Valid values are "manual", ' - '"permissive", or "open". "manual" means nodes are ' + 'and identified nodes. "manual" means nodes are ' 'detected, but not autoconfigured until a user ' 'approves. "permissive" indicates to allow discovery, ' 'so long as the node has no existing public key. ' @@ -361,9 +360,8 @@ node = { # 'to suppress serial console configuration') # }, 'console.logging': { - 'description': ('Indicate logging level to apply to console. Valid ' - 'values are currently "full", "interactive", "memory", and ' - '"none". Defaults to "full".'), + 'description': ('Indicate logging level to apply to console. ' + 'Defaults to "full".'), 'validvalues': ('full', 'memory', 'interactive', 'none'), }, 'console.method': { From 7001f0d827de07129791a9db2f4fa615810654ba Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 09:49:10 -0400 Subject: [PATCH 108/413] Add encoding for vfat usage in ubuntu genesis --- genesis/97genesis/installkernel | 1 + 1 file changed, 1 insertion(+) diff --git a/genesis/97genesis/installkernel b/genesis/97genesis/installkernel index 2d58a290..11372451 100644 --- a/genesis/97genesis/installkernel +++ b/genesis/97genesis/installkernel @@ -2,6 +2,7 @@ if grep Ubuntu /etc/os-release > /dev/null; then # must include specific drivers instmods hid usbhid hid_generic xhci_pci xhci_pci_renesas instmods virtio_gpu ast bochs dmi_sysfs + instmods nls_iso8859-1 fi instmods virtio_net instmods e1000 e1000e igb sfc mlx5_ib mlx5_core mlx4_en cxgb3 cxgb4 tg3 bnx2 bnx2x bna ixgb ixgbe qlge mptsas mpt2sas mpt3sas megaraid_sas ahci xhci-hcd sd_mod pmcraid be2net vfat ext3 ext4 usb_storage scsi_wait_scan ipmi_si ipmi_devintf qlcnic xfs From 9744e0d1b0cf17ee793532bcb96419bd368d0328 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 12:19:16 -0400 Subject: [PATCH 109/413] Accept XCC and BMC for aliases of each other --- .../confluent/plugins/hardwaremanagement/ipmi.py | 12 +++++++++++- .../confluent/plugins/hardwaremanagement/redfish.py | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 7e5d7a18..16878d95 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -55,6 +55,15 @@ def get_dns_txt(qstring): return eventlet.support.greendns.resolver.query( qstring, 'TXT')[0].strings[0].replace('i=', '') +def match_aliases(first, second): + aliases = { + ('bmc', 'xcc') + } + for alias in aliases: + if first in alias and second in alias: + return True + return False + def get_pci_text_from_ids(subdevice, subvendor, device, vendor): fqpi = '{0}.{1}.{2}.{3}'.format(subdevice, subvendor, device, vendor) if fqpi in pci_cache: @@ -960,7 +969,8 @@ class IpmiHandler(object): complist = () if component == 'all' else (component,) for id, data in self.ipmicmd.get_firmware(complist): if (component in ('core', 'all') or - component == simplify_name(id)): + component == simplify_name(id) or + match_aliases(component, simplify_name(id))): items.append({id: data}) except ssl.SSLEOFError: errorneeded = msg.ConfluentNodeError( diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 7f60dd88..2a2d26f5 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -46,6 +46,15 @@ def get_dns_txt(qstring): return eventlet.support.greendns.resolver.query( qstring, 'TXT')[0].strings[0].replace('i=', '') +def match_aliases(first, second): + aliases = { + ('bmc', 'xcc') + } + for alias in aliases: + if first in alias and second in alias: + return True + return False + def get_pci_text_from_ids(subdevice, subvendor, device, vendor): fqpi = '{0}.{1}.{2}.{3}'.format(subdevice, subvendor, device, vendor) if fqpi in pci_cache: @@ -817,7 +826,8 @@ class IpmiHandler(object): complist = () if component == 'all' else (component,) for id, data in self.ipmicmd.get_firmware(complist): if (component in ('core', 'all') or - component == simplify_name(id)): + component == simplify_name(id) or + match_aliases(component, simplify_name(id))): items.append({id: data}) except ssl.SSLEOFError: errorneeded = msg.ConfluentNodeError( From 2e60ca13b773c6813acf36ceba6862d2c722ce41 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 13:27:29 -0400 Subject: [PATCH 110/413] Try to add gpgkey to local repository This is needed for things like followup imgutil --- .../el8/profiles/default/scripts/add_local_repositories | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories index 79b0b6c5..21271a91 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories +++ b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories @@ -6,6 +6,7 @@ except ImportError: import importlib.util import importlib.machinery import sys +import glob modloader = importlib.machinery.SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient') modspec = importlib.util.spec_from_file_location('apiclient', '/opt/confluent/bin/apiclient', loader=modloader) apiclient = importlib.util.module_from_spec(modspec) @@ -41,6 +42,7 @@ try: except AttributeError: f = cStringIO.StringIO(cfgdata) c.readfp(f) +gpgkeys = glob.glob('/etc/pki/rpm-gpg/RPM-GG-KEY-*') for sec in c.sections(): if sec.startswith('variant-'): try: @@ -56,3 +58,5 @@ for sec in c.sections(): repopath = repopath[1:] repout.write('baseurl=https://{}/confluent-public/os/{}/distribution/{}\n'.format(server, profile, repopath)) repout.write('enabled=1\n') + if gpgkeys: + repout.write('gpgkey=file://' + gpgkeys[0] + '\n') From 66265d170aef13a3ecc7ecd5a0077012c7eadb67 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 16:06:12 -0400 Subject: [PATCH 111/413] Catch general reseat errors --- .../confluent/plugins/hardwaremanagement/enclosure.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py index c835c852..08aa6052 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py @@ -33,6 +33,8 @@ def reseat_bays(encmgr, bays, configmanager, rspq): rspq.put(msg.ConfluentNodeError(node, str(uf))) except exc.TargetEndpointUnreachable as uf: rspq.put(msg.ConfluentNodeError(node, str(uf))) + except Exception as e: + rspq.put(msg.ConfluentNodeError(node, str(e))) finally: rspq.put(None) From 5e72a8b3c0fa3ecabd3af3792db9078345e3a211 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 16:29:57 -0400 Subject: [PATCH 112/413] Handle reseat with '1a/1b' type bay description This fixes ability to reseat newer chassis when using the coordinate specification for bay location. --- confluent_server/confluent/messages.py | 2 +- .../confluent/plugins/hardwaremanagement/enclosure.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index e8ea972f..f74f027a 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1060,7 +1060,7 @@ class InputReseatMessage(ConfluentInputMessage): keyname = 'reseat' def is_valid_key(self, key): - return key in self.valid_values or isinstance(key, int) + return key in self.valid_values or isinstance(key, int) or len(key) < 4 class InputBMCReset(ConfluentInputMessage): diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py index 08aa6052..8665386a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py @@ -27,7 +27,7 @@ def reseat_bays(encmgr, bays, configmanager, rspq): for rsp in core.handle_path( '/nodes/{0}/_enclosure/reseat_bay'.format(encmgr), 'update', configmanager, - inputdata={'reseat': int(encbay)}): + inputdata={'reseat': encbay}): rspq.put(rsp) except pygexc.UnsupportedFunctionality as uf: rspq.put(msg.ConfluentNodeError(node, str(uf))) From ddf92445141060b5612ad1424b00fe58f0d310a2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Apr 2025 17:00:56 -0400 Subject: [PATCH 113/413] Correct typo in add_local_repositories --- .../el8/profiles/default/scripts/add_local_repositories | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories index 21271a91..f1b423f0 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories +++ b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories @@ -42,7 +42,7 @@ try: except AttributeError: f = cStringIO.StringIO(cfgdata) c.readfp(f) -gpgkeys = glob.glob('/etc/pki/rpm-gpg/RPM-GG-KEY-*') +gpgkeys = glob.glob('/etc/pki/rpm-gpg/RPM-GPG-KEY-*') for sec in c.sections(): if sec.startswith('variant-'): try: From 9174ad651f7253fbff3e472ed91a1121f52d0fd8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 08:38:51 -0400 Subject: [PATCH 114/413] Fallback to mac Some systems do not have UUIDs. Which is unfortunate, but usually a system mac address does well enough. --- .../common/initramfs/opt/confluent/bin/apiclient | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index 61eddf8b..efc0a562 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -91,8 +91,11 @@ def scan_confluents(): confluentuuid = line.split(': ')[1] msg += '/confluentuuid=' + confluentuuid break - with open('/sys/devices/virtual/dmi/id/product_uuid') as uuidin: - msg += '/uuid=' + uuidin.read().strip() + try: + with open('/sys/devices/virtual/dmi/id/product_uuid') as uuidin: + msg += '/uuid=' + uuidin.read().strip() + except Exception: + pass for addrf in glob.glob('/sys/class/net/*/address'): with open(addrf) as addrin: hwaddr = addrin.read().strip() From 1985525cc0c6bf53287ff8a84526538523425395 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 12:13:30 -0400 Subject: [PATCH 115/413] Add all gpgkeys to local repositories gpgkey can take multiple, and better to specify them all instead of just one. --- .../el8/profiles/default/scripts/add_local_repositories | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories index f1b423f0..ff0d27e3 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories +++ b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories @@ -59,4 +59,5 @@ for sec in c.sections(): repout.write('baseurl=https://{}/confluent-public/os/{}/distribution/{}\n'.format(server, profile, repopath)) repout.write('enabled=1\n') if gpgkeys: - repout.write('gpgkey=file://' + gpgkeys[0] + '\n') + gpgkeyvals = ['file://{}'.format(x) for x in gpgkeys] + repout.write('gpgkey=' + ' '.join(gpgkeyvals) + '\n') From 90f4a2a062c0b06b82a27cc9b94f73431bb9a257 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 12:55:01 -0400 Subject: [PATCH 116/413] Improve text console behaviors of Ubuntu and RedHat RedHat makes grub redundantly handle serial output that firmware already handles. If we detect EFI firmware and SPCR ACPI table and connected serial port, that suggests that firmware will handle. Ubuntu hates serial console by default, amend it so it can actually work for serial users. --- .../common/profile/scripts/autoconsole | 117 ++++++++++++++++++ .../el8/profiles/default/scripts/post.sh | 5 + .../profiles/default/scripts/post.sh | 4 + 3 files changed, 126 insertions(+) create mode 100644 confluent_osdeploy/common/profile/scripts/autoconsole diff --git a/confluent_osdeploy/common/profile/scripts/autoconsole b/confluent_osdeploy/common/profile/scripts/autoconsole new file mode 100644 index 00000000..b37e1df6 --- /dev/null +++ b/confluent_osdeploy/common/profile/scripts/autoconsole @@ -0,0 +1,117 @@ +#!/usr/bin/python3 + +# This script evaluates whether firmware redirection is likely. It uses three cues: +# - Does the system offer up SPCR? This would indicate that the firmware is doing serial output. +# Otherwise, there's no indication that the firmware cares about serial console. +# - Is the system EFI? BIOS implementations may not intercept text draw calls after POST exit, +# thus even when BIOS tells us serial port is in use, it may not be doing anything when +# grub would be running +# - Is the serial port connected? In the event that firmware indicates serial port, but +# serial port is not reporting DCD, then it doesn't look like a comfortable enough scenario + +import fcntl +import os +import os.path +import struct +import subprocess +import termios + + +addrtoname = { + 0x3f8: '/dev/ttyS0', + 0x2f8: '/dev/ttyS1', + 0x3e8: '/dev/ttyS2', + 0x2e8: '/dev/ttyS3', +} +speedmap = { + 0: None, + 3: 9600, + 4: 19200, + 6: 57600, + 7: 115200, +} + +termiobaud = { + 9600: termios.B9600, + 19200: termios.B19200, + 57600: termios.B57600, + 115200: termios.B115200, +} + + +def deserialize_grub_rh(): + if 'console=ttyS' in open('/proc/cmdline').read(): + return None # User manually indicated serial config + # they own the grub behavior too for now + grublines = [] + with open('/etc/default/grub') as grubin: + grublines = grubin.read().split('\n') + with open('/etc/default/grub', 'w') as grubout: + for grubline in grublines: + if grubline.startswith('GRUB_TERMINAL'): + grubline = grubline.replace('serial ', '') + grubout.write(grubline + '\n') + subprocess.check_call(['grub2-mkconfig', '-o', '/boot/grub2/grub.cfg']) + +def fixup_ubuntu_grub_serial(): + # Ubuntu aggressively tries to graphics up + # grub. We will counter that for serial + # They also aggressively hide UI and + # block ability to interject. We will + # compromise and lean on nodeboot setup + # as a means to give someone reasonable shot at + # the short timeout + with open('/etc/default/grub') as grubin: + grublines = grubin.read().split('\n') + with open('/etc/default/grub', 'w') as grubout: + for grubline in grublines: + if grubline.startswith('GRUB_TIMEOUT_STYLE=hidden'): + grubline = 'GRUB_TIMEOUT_STYLE=menu' + elif grubline.startswith('GRUB_TIMEOUT=0'): + grubline = 'GRUB_TIMEOUT=2' + elif grubline.startswith('#GRUB_TERMINAL=console'): + grubline = grubline.replace('#', '') + grubout.write(grubline + '\n') + subprocess.check_call(['update-grub']) + +def get_serial_config(): + if not os.path.exists('/sys/firmware/efi'): + return None + spcr = open("/sys/firmware/acpi/tables/SPCR", "rb") + spcr = bytearray(spcr.read()) + if spcr[8] != 2 or spcr[36] != 0 or spcr[40] != 1: + return None + address = struct.unpack(' /target/etc/confluent/ca.pem cat /target/etc/confluent/tls/*.pem > /target/usr/local/share/ca-certificates/confluent.crt cat /target/etc/confluent/tls/*.pem > /etc/confluent/ca.pem chroot /target update-ca-certificates + +# Ubuntu mangles grub function for serial users, undo that mangling +chroot /target bash -c "source /etc/confluent/functions; run_remote_python autoconsole" + chroot /target bash -c "source /etc/confluent/functions; run_remote_python syncfileclient" chroot /target bash -c "source /etc/confluent/functions; run_remote_python confignet" chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d" From 1ec08336e69b97f717be769e4e616dfd43c240ab Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 13:18:41 -0400 Subject: [PATCH 117/413] Add notation on how to opt out of ubuntu install internet connect --- .../ubuntu22.04/profiles/default/autoinstall/user-data | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/autoinstall/user-data b/confluent_osdeploy/ubuntu22.04/profiles/default/autoinstall/user-data index 5b6c9894..de07bb82 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/autoinstall/user-data +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/autoinstall/user-data @@ -1,5 +1,13 @@ #cloud-config autoinstall: +# The following can help an Ubuntu system skip install-time updates +# Only uncomment if you know you really want to do this or plan to manage the updates +# a different way. +# +# updates: security +# apt: +# disable_suites: [security] +# fallback: offline-install version: 1 early-commands: - /custom-installation/pre.sh From 49ac3487c24dc055f225130abdd869367b6b16cd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 13:23:45 -0400 Subject: [PATCH 118/413] Fix bad indentation in add_local_repositories --- .../el8/profiles/default/scripts/add_local_repositories | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories index ff0d27e3..c3bc7e68 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories +++ b/confluent_osdeploy/el8/profiles/default/scripts/add_local_repositories @@ -60,4 +60,4 @@ for sec in c.sections(): repout.write('enabled=1\n') if gpgkeys: gpgkeyvals = ['file://{}'.format(x) for x in gpgkeys] - repout.write('gpgkey=' + ' '.join(gpgkeyvals) + '\n') + repout.write('gpgkey=' + ' '.join(gpgkeyvals) + '\n') From e9372a4d344b2ed0c5a4be0b501414d2d528b925 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 15:10:28 -0400 Subject: [PATCH 119/413] Provide means for nodeping to use original name on -s --- confluent_client/bin/nodeping | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeping b/confluent_client/bin/nodeping index b9c45340..1140a6bd 100755 --- a/confluent_client/bin/nodeping +++ b/confluent_client/bin/nodeping @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2016-2017 Lenovo @@ -42,6 +42,8 @@ def run(): usage="Usage: %prog [options] noderange") argparser.add_option('-f', '-c', '--count', type='int', default=168, help='Number of commands to run at a time') + argparser.add_option('-o', '--origname', action='store_true', + help='Use original nodename in print out even if substituted') argparser.add_option('-s', '--substitutename', help='Use a different name other than the nodename for ping, with {}, it is the entire name evaluated as an expression, otherwise it is used as a suffix') # among other things, FD_SETSIZE limits. Besides, spawning too many @@ -83,7 +85,10 @@ def run(): cmdv = ['ping', '-c', '1', '-W', '1', pingnode] if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(pingnode, cmdv, all, pipedesc) + if options.origname: + run_cmdv(node, cmdv, all, pipedesc) + else: + run_cmdv(pingnode, cmdv, all, pipedesc) else: pendingexecs.append((pingnode, cmdv)) if not all or exitcode: From 507e6fa9acdf05856decec60973d41f3a0d73254 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 16:05:08 -0400 Subject: [PATCH 120/413] Ensure bash runs the genesis_bootstrap from media --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index 107d48fb..362617ac 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -66,14 +66,14 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then mkdir -p /media/ident mount /dev/disk/by-label/CNFLNT_IDNT /media/ident if [ -e /media/ident/genesis_bootstrap.sh ]; then - exec sh /media/ident/genesis_bootstrap.sh + exec bash /media/ident/genesis_bootstrap.sh fi fi if [ -e /dev/disk/by-label/GENESIS-X86 ]; then mkdir -p /media/genesis mount /dev/disk/by-label/GENESIS-X86 /media/genesis if [ -e /media/genesis/genesis_bootstrap.sh ]; then - exec sh /media/genesis/genesis_bootstrap.sh + exec bash /media/genesis/genesis_bootstrap.sh fi fi cd /sys/class/net From 6d1da859915d7c03eaa93d511d6501c63f75d4df Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Apr 2025 17:13:19 -0400 Subject: [PATCH 121/413] Implement screenshot via nodeconsole -s This will grab screenshots from Lenovo systems and output them to the console, using the kitty image protocol. --- confluent_client/bin/nodeconsole | 23 +++++++++++++++++++ confluent_server/confluent/core.py | 4 ++++ confluent_server/confluent/messages.py | 7 ++++++ .../plugins/hardwaremanagement/ipmi.py | 19 +++++++++++++++ .../plugins/hardwaremanagement/redfish.py | 19 +++++++++++++++ 5 files changed, 72 insertions(+) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index f05d5783..82b95583 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 import optparse import os import subprocess @@ -46,6 +47,8 @@ argparser.add_option('-l', '--log', action='store_true', default=False, argparser.add_option('-T', '--Timestamp', action='store_true', default=False, help= 'Dump log in stdout with timestamps') +argparser.add_option('-s', '--screenshot', action='store_true', default=False, + help='Attempt to grab screenshot and render using kitty image protocol') argparser.add_option('-w','--windowed', action='store_true', default=False, help='Open terminal windows for each node. The ' 'environment variable NODECONSOLE_WINDOWED_COMMAND ' @@ -69,6 +72,16 @@ argparser.add_option('-w','--windowed', action='store_true', default=False, (options, args) = argparser.parse_args() +def kitty_draw(data): + while data: + chunk, data = data[:4096], data[4096:] + m = 1 if data else 0 + sys.stdout.write('\x1b_Ga=T,f=100,m={};'.format(m)) + sys.stdout.write(chunk.decode('utf8')) + sys.stdout.write('\x1b\\') + sys.stdout.flush() + sys.stdout.write('\n') + pass_through_args = [] killcon = False try: @@ -106,6 +119,16 @@ if options.Timestamp: logreader.dump_to_console(logname) sys.exit(0) +if options.screenshot: + sess = client.Command() + for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): + for node in res.get('databynode', {}): + imgdata = res['databynode'][node]['image']['imgdata'] + sys.stdout.write('{}: '.format(node)) + kitty_draw(imgdata.encode()) + sys.stdout.write('\n') + sys.exit(0) + def kill(noderange): sess = client.Command() envstring=os.environ.get('NODECONSOLE_WINDOWED_COMMAND') diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 61a03d05..ef1c33ee 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -442,6 +442,10 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), + 'ikvm_screenshot': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), }, 'description': PluginRoute({ 'pluginattrs': ['hardwaremanagement.method'], diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index f74f027a..62830400 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -18,6 +18,7 @@ # This module implements client/server messages emitted from plugins. # Things are defined here to 'encourage' developers to coordinate information # format. This is also how different data formats are supported +import base64 import confluent.exceptions as exc import confluent.config.configmanager as cfm import confluent.config.conf as cfgfile @@ -1882,6 +1883,12 @@ class GraphicalConsole(ConfluentMessage): else: self.kvpairs = {name: {'Launcher': kv}} +class ScreenShot(ConfluentMessage): + readonly = True + def __init__(self, imgdata, node, imgformat=None): + self.kvpairs = {node: {'image': {'imgformat': imgformat, 'imgdata': base64.b64encode(imgdata)}}} + + class CryptedAttributes(Attributes): defaulttype = 'password' diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 16878d95..ed4236c9 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -28,6 +28,7 @@ import eventlet.greenpool as greenpool import eventlet.queue as queue import eventlet.support.greendns from fnmatch import fnmatch +import io import os import pwd import pyghmi.constants as pygconstants @@ -51,6 +52,14 @@ except NameError: pci_cache = {} +class RetainedIO(io.BytesIO): + # Need to retain buffer after close + def __init__(self): + self.resultbuffer = None + def close(self): + self.resultbuffer = self.getbuffer() + super().close() + def get_dns_txt(qstring): return eventlet.support.greendns.resolver.query( qstring, 'TXT')[0].strings[0].replace('i=', '') @@ -607,6 +616,8 @@ class IpmiHandler(object): self.handle_description() elif self.element == ['console', 'ikvm_methods']: self.handle_ikvm_methods() + elif self.element == ['console', 'ikvm_screenshot']: + self.handle_ikvm_screenshot() elif self.element == ['console', 'ikvm']: self.handle_ikvm() else: @@ -1641,6 +1652,14 @@ class IpmiHandler(object): dsc = {'ikvm_methods': dsc} self.output.put(msg.KeyValueData(dsc, self.node)) + def handle_ikvm_screenshot(self): + # good background for the webui, and kitty + imgdata = RetainedIO() + imgformat = self.ipmicmd.get_screenshot(imgdata) + imgdata = imgdata.getvalue() + if imgdata: + self.output.put(msg.ScreenShot(imgdata, self.node, imgformat=imgformat)) + def handle_ikvm(self): methods = self.ipmicmd.get_ikvm_methods() if 'openbmc' in methods: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 2a2d26f5..0f2cd5ae 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -27,6 +27,7 @@ import eventlet.greenpool as greenpool import eventlet.queue as queue import eventlet.support.greendns from fnmatch import fnmatch +import io import os import pwd import pyghmi.constants as pygconstants @@ -42,6 +43,14 @@ if not hasattr(ssl, 'SSLEOFError'): pci_cache = {} +class RetainedIO(io.BytesIO): + # Need to retain buffer after close + def __init__(self): + self.resultbuffer = None + def close(self): + self.resultbuffer = self.getbuffer() + super().close() + def get_dns_txt(qstring): return eventlet.support.greendns.resolver.query( qstring, 'TXT')[0].strings[0].replace('i=', '') @@ -464,6 +473,8 @@ class IpmiHandler(object): self.handle_description() elif self.element == ['console', 'ikvm_methods']: self.handle_ikvm_methods() + elif self.element == ['console', 'ikvm_screenshot']: + self.handle_ikvm_screenshot() elif self.element == ['console', 'ikvm']: self.handle_ikvm() else: @@ -1498,6 +1509,14 @@ class IpmiHandler(object): dsc = {'ikvm_methods': dsc} self.output.put(msg.KeyValueData(dsc, self.node)) + def handle_ikvm_screenshot(self): + # good background for the webui, and kitty + imgdata = RetainedIO() + imgformat = self.ipmicmd.get_screenshot(imgdata) + imgdata = imgdata.getvalue() + if imgdata: + self.output.put(msg.ScreenShot(imgdata, self.node, imgformat=imgformat)) + def handle_ikvm(self): methods = self.ipmicmd.get_ikvm_methods() if 'openbmc' in methods: From 35e3ca1f1febdeb6be69c1e434e0c65e8ee5263c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 14 Apr 2025 07:53:40 -0400 Subject: [PATCH 122/413] Have screenshots become normal text Base64 comes out as ASCII bytes, change to text for json handling. --- confluent_server/confluent/messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 62830400..16879638 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1886,7 +1886,7 @@ class GraphicalConsole(ConfluentMessage): class ScreenShot(ConfluentMessage): readonly = True def __init__(self, imgdata, node, imgformat=None): - self.kvpairs = {node: {'image': {'imgformat': imgformat, 'imgdata': base64.b64encode(imgdata)}}} + self.kvpairs = {node: {'image': {'imgformat': imgformat, 'imgdata': base64.b64encode(imgdata).decode()}}} class CryptedAttributes(Attributes): From f11473c7361513a36569f4b98d71a6310c12d8cc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 14 Apr 2025 10:25:25 -0400 Subject: [PATCH 123/413] Numerous fixes for shell server On exit, clear the terminal buffer and invalidate the session. This avoids the web ui being very attached to a closed, dead session, and leaking stale buffer to a reused sessionid. For confetty, treat starting a shell session more like starting a console session. If an attempt to resize a dead session is attempted, ignore failures. --- confluent_client/bin/confetty | 16 ++++++++++------ confluent_server/confluent/plugins/shell/ssh.py | 6 +++++- confluent_server/confluent/shellserver.py | 10 ++++++++-- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index e1126df8..e14a55ed 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation @@ -654,13 +654,17 @@ def quitconfetty(code=0, fullexit=False, fixterm=True): def get_session_node(shellargs): # straight to node console if len(shellargs) == 1 and ' ' not in shellargs[0]: - return shellargs[0] + targ = "/nodes/%s/console/session" % shellargs[0] + return targ, shellargs[0] if len(shellargs) == 2 and shellargs[0] == 'start': args = [s for s in shellargs[1].split('/') if s] if len(args) == 4 and args[0] == 'nodes' and args[2] == 'console' and \ args[3] == 'session': - return args[1] - return None + return shellargs[1], args[1] + if len(args) == 5 and args[0] == 'nodes' and args[2] == 'shell' and \ + args[3] == 'sessions': + return shellargs[1], args[1] + return None, None def run_inline_command(path, arg, completion, **kwargs): @@ -917,10 +921,10 @@ def main(): doexit = False inconsole = False pendingcommand = "" - session_node = get_session_node(shellargs) + targ, session_node = get_session_node(shellargs) if session_node is not None: consoleonly = True - do_command("start /nodes/%s/console/session" % session_node, netserver) + do_command("start %s" % targ, netserver) doexit = True elif shellargs: do_command(shellargs, netserver) diff --git a/confluent_server/confluent/plugins/shell/ssh.py b/confluent_server/confluent/plugins/shell/ssh.py index f802f842..cbb8586e 100644 --- a/confluent_server/confluent/plugins/shell/ssh.py +++ b/confluent_server/confluent/plugins/shell/ssh.py @@ -93,7 +93,10 @@ class SshShell(conapi.Console): self.height = height if not self.connected: return - self.shell.resize_pty(width=width, height=height) + try: + self.shell.resize_pty(width=width, height=height) + except Exception: + pass def recvdata(self): while self.connected: @@ -254,6 +257,7 @@ class SshShell(conapi.Console): self.shell.sendall(data) def close(self): + self.connected = False if self.ssh is not None: self.ssh.close() self.datacallback = None diff --git a/confluent_server/confluent/shellserver.py b/confluent_server/confluent/shellserver.py index 4e81ec2b..386f7f56 100644 --- a/confluent_server/confluent/shellserver.py +++ b/confluent_server/confluent/shellserver.py @@ -73,6 +73,7 @@ class _ShellHandler(consoleserver.ConsoleHandler): self._send_rcpts({'connectstate': self.connectstate}) for session in list(self.livesessions): session.destroy() + self.feedbuffer('\x1bc') @@ -136,9 +137,14 @@ class ShellSession(consoleserver.ConsoleSession): while str(self.sessionid) in activesessions[(tenant, self.node, self.username)]: self.sessionid += 1 self.sessionid = str(self.sessionid) - if self.sessionid not in activesessions[(tenant, self.node, self.username)]: + conshdl = activesessions[(tenant, self.node, self.username)].get(self.sessionid, None) + if conshdl and conshdl.connectstate == 'closed': + del activesessions[(tenant, self.node, self.username)][self.sessionid] + conshdl = None + if not conshdl: activesessions[(tenant, self.node, self.username)][self.sessionid] = _ShellHandler(self.node, self.configmanager, width=self.width, height=self.height, prefix='s_{}_{}'.format(self.username, self.sessionid)) - self.conshdl = activesessions[(self.configmanager.tenant, self.node, self.username)][self.sessionid] + conshdl = activesessions[(self.configmanager.tenant, self.node, self.username)][self.sessionid] + self.conshdl = conshdl self.conshdl.numusers += 1 def destroy(self): From e27f07ac36f925c97623e09a1d368bde3ec7961e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 15 Apr 2025 14:25:03 -0400 Subject: [PATCH 124/413] Use IPv4 address for ikvm when fe80 is the local bmc The fe80:: is hopeless, try to send ipv4 just in case. Technically speaking, the user may be using a different address or real ipv6 and the ipv4 guess might fail, but it probably won't. --- confluent_server/confluent/plugins/hardwaremanagement/ipmi.py | 3 +++ .../confluent/plugins/hardwaremanagement/redfish.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index ed4236c9..02e324bb 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -1669,6 +1669,9 @@ class IpmiHandler(object): launchdata = self.ipmicmd.get_ikvm_launchdata() if 'url' in launchdata and not launchdata['url'].startswith('https://'): mybmc = self.ipmicmd.confluentbmcname + if mybmc.startswith('fe80::'): # link local, need to adjust + lancfg = self.ipmicmd.get_net_configuration() + mybmc = lancfg['ipv4_address'].split('/')[0] if ':' in mybmc and not '[' in mybmc: mybmc = '[{}]'.format(mybmc) launchdata['url'] = 'https://{}{}'.format(mybmc, launchdata['url']) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 0f2cd5ae..03edd4a6 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -1526,6 +1526,9 @@ class IpmiHandler(object): launchdata = self.ipmicmd.get_ikvm_launchdata() if 'url' in launchdata and not launchdata['url'].startswith('https://'): mybmc = self.ipmicmd.confluentbmcname + if mybmc.startswith('fe80::'): # link local, need to adjust + lancfg = self.ipmicmd.get_net_configuration() + mybmc = lancfg['ipv4_address'].split('/')[0] if ':' in mybmc and not '[' in mybmc: mybmc = '[{}]'.format(mybmc) launchdata['url'] = 'https://{}{}'.format(mybmc, launchdata['url']) From a138bef55103b688cc4d54e8164613a6bacaabcc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 15 Apr 2025 15:40:17 -0400 Subject: [PATCH 125/413] Do not worry about failure to reply to a SSDP confluent request --- confluent_server/confluent/discovery/protocols/ssdp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py index 28f14fe8..d8acf753 100644 --- a/confluent_server/confluent/discovery/protocols/ssdp.py +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -297,7 +297,10 @@ def snoop(handler, byehandler=None, protocol=None, uuidlookup=None): continue if not isinstance(reply, bytes): reply = reply.encode('utf8') - s.sendto(reply, peer) + try: + s.sendto(reply, peer) + except Exception: + pass break r = select.select((net4, net6), (), (), 0.2) if r: From e5da8c01a934ce09e76d6a59df596fe3f06e280f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 15 Apr 2025 15:51:12 -0400 Subject: [PATCH 126/413] Do not attempt to print non-existent data. --- confluent_client/bin/nodeconsole | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 82b95583..ddfbd0e3 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -123,10 +123,11 @@ if options.screenshot: sess = client.Command() for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): for node in res.get('databynode', {}): - imgdata = res['databynode'][node]['image']['imgdata'] - sys.stdout.write('{}: '.format(node)) - kitty_draw(imgdata.encode()) - sys.stdout.write('\n') + imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) + if imgdata: + sys.stdout.write('{}: '.format(node)) + kitty_draw(imgdata.encode()) + sys.stdout.write('\n') sys.exit(0) def kill(noderange): From 9823ffc12dadf8f4ad82ee1b5ee20395f5448df7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 16 Apr 2025 09:46:48 -0400 Subject: [PATCH 127/413] Fix collective serialization of screenshot messages --- confluent_server/confluent/messages.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 16879638..6dbe031f 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -87,7 +87,13 @@ def _htmlify_structure(indict): def msg_deserialize(packed): - m = msgpack.unpackb(packed, raw=False) + try: + m = msgpack.unpackb(packed, raw=False) + except UnicodeDecodeError: # binary data, likely imagedata + # strings will be made binary, so binary messages + # must tolerate either string or bytes + m = msgpack.unpackb(packed) + m[0] = m[0].decode() cls = globals()[m[0]] if issubclass(cls, ConfluentMessage) or issubclass(cls, ConfluentNodeError): return cls(*m[1:]) @@ -1885,7 +1891,13 @@ class GraphicalConsole(ConfluentMessage): class ScreenShot(ConfluentMessage): readonly = True + def __init__(self, imgdata, node, imgformat=None): + if isinstance(node, bytes): + node = node.decode() + if isinstance(imgformat, bytes): + imgformat = imgformat.decode() + self.myargs = (imgdata, node, imgformat) self.kvpairs = {node: {'image': {'imgformat': imgformat, 'imgdata': base64.b64encode(imgdata).decode()}}} From e46b4ede6dfc63fa09e5c30e7fcd76e8c1041044 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 16 Apr 2025 12:50:59 -0400 Subject: [PATCH 128/413] Implement a CONFLUENT_IMAGE_PROTOCOL env variable This directs CLI with image output to use a preferred protocol. This is retroactively applied to stats. Currently we prefer kitty, as it seems to be the most widely supported. Though some things only support iterm, so that's an option. And some only support sixel, but the user has to be the one to figure out adding pysixel dependency. --- confluent_client/bin/nodeconsole | 44 +++++++++++++++++++++++++++++++- confluent_client/bin/stats | 4 ++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index ddfbd0e3..aafdb1f7 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -32,6 +32,20 @@ import time import socket import re +try: + # sixel is optional, attempt to import but stub out if unavailable + import io + import sixel + + class DumbWriter(sixel.SixelWriter): + def restore_position(self, output): + return +except ImportError: + class DumbWriter(): + def draw(self, imgfile): + sys.stderr.write("PySixel not detected, Sixel format display not supported\n") + + confettypath = os.path.join(os.path.dirname(sys.argv[0]), 'confetty') argparser = optparse.OptionParser( usage="Usage: %prog [options] [kill][-- [passthroughoptions]]", @@ -72,6 +86,34 @@ argparser.add_option('-w','--windowed', action='store_true', default=False, (options, args) = argparser.parse_args() + +def draw_image(data): + imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') + if imageformat == 'sixel': + sixel_draw(data) + elif imageformat == 'iterm': + iterm_draw(data) + else: + kitty_draw(data) + + +def sixel_draw(data): + bindata = base64.b64decode(data) + binfile = io.BytesIO() + binfile.write(bindata) + binfile.seek(0) + DumbWriter().draw(binfile) + +def iterm_draw(data): + bindata = base64.b64decode(data) + datalen = len(bindata) + sys.stdout.write( + '\x1b]1337;File=inline=1;size={}:'.format(datalen)) + sys.stdout.write(data.decode('utf8')) + sys.stdout.write('\a') + sys.stdout.write('\n') + sys.stdout.flush() + def kitty_draw(data): while data: chunk, data = data[:4096], data[4096:] @@ -126,7 +168,7 @@ if options.screenshot: imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) if imgdata: sys.stdout.write('{}: '.format(node)) - kitty_draw(imgdata.encode()) + draw_image(imgdata.encode()) sys.stdout.write('\n') sys.exit(0) diff --git a/confluent_client/bin/stats b/confluent_client/bin/stats index 94af75db..0893fadb 100755 --- a/confluent_client/bin/stats +++ b/confluent_client/bin/stats @@ -72,6 +72,8 @@ def plot(gui, output, plotdata, bins, fmt): tdata = io.BytesIO() plt.savefig(tdata) if not gui and not output: + if fmt == 'environment': + fmt = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') if fmt == 'sixel': writer = DumbWriter() writer.draw(tdata) @@ -108,7 +110,7 @@ aparser = argparse.ArgumentParser(description='Quick access to common statistics aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)') aparser.add_argument('-d', default=None, help='Value used to separate columns') aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in graphical format') -aparser.add_argument('-f', default='sixel', help='Format for histogram output (sixel/iterm/kitty)') +aparser.add_argument('-f', default='environment', help='Format for histogram output (sixel/iterm/kitty)') aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing') aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window') aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format') From 656dea0929300810f1b6fd75f157330140f9d60d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 16 Apr 2025 15:34:06 -0400 Subject: [PATCH 129/413] Add error for failure to get screenshot Usually this is due to the target not having a license key, in the case of Lenovo equipment. --- confluent_client/bin/nodeconsole | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index aafdb1f7..c2443649 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -167,6 +167,9 @@ if options.screenshot: for node in res.get('databynode', {}): imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) if imgdata: + if len(imgdata) < 32: # We were subjected to error + sys.stderr.write(f'{node}: Unable to get screenshot\n') + continue sys.stdout.write('{}: '.format(node)) draw_image(imgdata.encode()) sys.stdout.write('\n') From 69240ef49299026d19087942c64c0444f792f63b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Apr 2025 08:30:20 -0400 Subject: [PATCH 130/413] Add Fedora 42 scripted install support --- .../usr/lib/dracut/hooks/cmdline/01-confluent.sh | 8 +++++++- .../usr/lib/dracut/hooks/pre-pivot/01-confluent.sh | 3 +++ confluent_server/confluent/osimage.py | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh index bc327610..84882ba4 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh @@ -1,7 +1,13 @@ #!/bin/bash echo -n "" >> /tmp/net.ifaces echo -n "" > /tmp/01-autocons.devnode -cat /tls/*.0 >> /etc/pki/tls/certs/ca-bundle.crt +BUNDLENAME=/etc/pki/tls/certs/ca-bundle.crt +if [ ! -e "$BUNDLENAME" ]; then + BUNDLENAME=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem + mkdir -p /etc/pki/tls/certs + ln -s $BUNDLENAME /etc/pki/tls/certs/ca-bundle.crt +fi +cat /tls/*.0 >> $BUNDLENAME if ! grep console= /proc/cmdline >& /dev/null; then autocons=$(/opt/confluent/bin/autocons) if [ -n "$autocons" ]; then diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-pivot/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-pivot/01-confluent.sh index 89963143..cde90c3d 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-pivot/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-pivot/01-confluent.sh @@ -1,5 +1,8 @@ #!/bin/bash BUNDLENAME=/sysroot/etc/pki/tls/certs/ca-bundle.crt +if [ ! -e "$BUNDLENAME" ]; then + BUNDLENAME=/sysroot/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem +fi while [ -h $BUNDLENAME ]; do BUNDLENAME=/sysroot/$(readlink $BUNDLENAME) done diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 595785e6..8feefead 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -312,6 +312,7 @@ def check_rocky(isoinfo): fedoracatmap = { '41': 'el10', + '42': 'el10', } def check_fedora(isoinfo): if '.discinfo' not in isoinfo[1]: From 082a20f7761d328c19b8e701bdb2040735b98388 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Apr 2025 10:34:11 -0400 Subject: [PATCH 131/413] Add mechanism to refresh screenshot in nodeconsole For a single node, provide a way to cleanly redraw a screen to keep an eye on it. --- confluent_client/bin/nodeconsole | 88 ++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 10 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index c2443649..3ee218ee 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -29,8 +29,12 @@ import confluent.client as client import confluent.sortutil as sortutil import confluent.logreader as logreader import time +import select import socket import re +import tty +import termios +import fcntl try: # sixel is optional, attempt to import but stub out if unavailable @@ -63,6 +67,9 @@ argparser.add_option('-T', '--Timestamp', action='store_true', default=False, argparser.add_option('-s', '--screenshot', action='store_true', default=False, help='Attempt to grab screenshot and render using kitty image protocol') +argparser.add_option('-i', '--interval', type='float', + help='Interval in seconds to redraw the screenshot. Currently only ' + 'works for one node') argparser.add_option('-w','--windowed', action='store_true', default=False, help='Open terminal windows for each node. The ' 'environment variable NODECONSOLE_WINDOWED_COMMAND ' @@ -86,6 +93,50 @@ argparser.add_option('-w','--windowed', action='store_true', default=False, (options, args) = argparser.parse_args() +oldtcattr = None +oldfl = None + +def get_coords(): + sys.stdout.write('\x1b[6n') # + sys.stdout.flush() + gotreply = select.select([sys.stdin,], [], [], 0.250)[0] + if gotreply: + response = '' + while select.select([sys.stdin,], [], [], 0.1)[0] and 'R' not in response: + response += sys.stdin.read() + coords = response.replace('R', '').split('[')[1].split(';') + #sys.stdout.write('\x1b[{}:{}H'.format(*coords)) + +def direct_console(): + global oldtcattr + global oldfl + oldtcattr = termios.tcgetattr(sys.stdin.fileno()) + oldfl = fcntl.fcntl(sys.stdin.fileno(), fcntl.F_GETFL) + tty.setraw(sys.stdin.fileno()) + fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, oldfl | os.O_NONBLOCK) + +def indirect_console(): + fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, oldfl & ~os.O_NONBLOCK) + termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, oldtcattr) + +cursor_saved = False +def sticky_cursor(): + global cursor_saved +# get cursor restore_position + if sys.stdin.isatty() and not cursor_saved: + try: + direct_console() + sys.stdout.write('\x1b7') + cursor_saved = True + finally: + indirect_console() + elif cursor_saved: + try: + direct_console() + sys.stdout.write('\x1b8') + finally: + indirect_console() + def draw_image(data): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') @@ -163,16 +214,33 @@ if options.Timestamp: if options.screenshot: sess = client.Command() - for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): - for node in res.get('databynode', {}): - imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) - if imgdata: - if len(imgdata) < 32: # We were subjected to error - sys.stderr.write(f'{node}: Unable to get screenshot\n') - continue - sys.stdout.write('{}: '.format(node)) - draw_image(imgdata.encode()) - sys.stdout.write('\n') + firstnodename = None + dorefresh = True + if options.interval is not None: + sys.stdout.write('\x1bc') + while dorefresh: + for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): + for node in res.get('databynode', {}): + if not firstnodename: + firstnodename = node + imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) + if imgdata: + if len(imgdata) < 32: # We were subjected to error + sys.stderr.write(f'{node}: Unable to get screenshot\n') + continue + if options.interval is not None: + if node != firstnodename: + sys.stderr.write('Multiple nodes not supported for interval') + sys.exit(1) + sticky_cursor() + sys.stdout.write('{}: '.format(node)) + draw_image(imgdata.encode()) + sys.stdout.write('\n') + if options.interval is None: + dorefresh = False + else: + dorefresh = True + time.sleep(options.interval) sys.exit(0) def kill(noderange): From 999a9c3acf23921c78d6d7830b8e7d2eadc0e3e1 Mon Sep 17 00:00:00 2001 From: Tinashe Date: Thu, 17 Apr 2025 15:34:45 -0400 Subject: [PATCH 132/413] remove-consoleredirect --- .../default/scripts/sample/consoleredirect | 15 ------- .../hpc/scripts/sample/consoleredirect | 39 ------------------- 2 files changed, 54 deletions(-) delete mode 100644 confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect delete mode 100644 confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect b/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect deleted file mode 100644 index 60143ae5..00000000 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/sample/consoleredirect +++ /dev/null @@ -1,15 +0,0 @@ -is_rhel=false - -if test -f /boot/efi/EFI/redhat/grub.cfg; then - grubcfg="/etc/default/grub" - is_rhel=true -else - echo "Expected File missing: Check if os redhat" - exit -fi - -# Working on Redhat -if $is_rhel; then - sed -i '/^GRUB_TERMINAL/s/serial //' $grubcfg - grub2-mkconfig -o /boot/grub2/grub.cfg -fi \ No newline at end of file diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect b/confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect deleted file mode 100644 index 270d24b7..00000000 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/sample/consoleredirect +++ /dev/null @@ -1,39 +0,0 @@ -is_suse=false - -if test -f /boot/efi/EFI/sle_hpc/grub.cfg; then - grubcfg="/boot/efi/EFI/sle_hpc/grub.cfg" - grub2-mkconfig -o $grubcfg - is_suse=true -else - echo "Expected File missing: Check if os sle_hpc" - exit -fi - -# working on SUSE -if $is_suse; then - start=false - num_line=0 - lines_to_edit=() - while read line; do - ((num_line++)) - if [[ $line == *"grub_platform"* ]]; then - start=true - fi - if $start; then - if [[ $line != "#"* ]];then - lines_to_edit+=($num_line) - fi - fi - if [[ ${#line} -eq 2 && $line == *"fi" ]]; then - if $start; then - start=false - fi - fi - done < grub_cnf.cfg - - for line_num in "${lines_to_edit[@]}"; do - line_num+="s" - sed -i "${line_num},^,#," $grubcfg - done - sed -i 's,^terminal,#terminal,' $grubcfg -fi From 0cfdfbdfa4c4283b8d2b8d50617cef222c335395 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 18 Apr 2025 11:19:12 -0400 Subject: [PATCH 133/413] Add tar.zstd capability zstd is much faster and not too much bigger than xz --- genesis/97genesis/install-base | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index 1ac2cfa5..e6c3d3a4 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -9,7 +9,7 @@ dracut_install /$IMPLIBDIR/libtss2-tcti-device.so.0 dracut_install tpm2_create tpm2_pcrread tpm2_createpolicy tpm2_createprimary dracut_install tpm2_load tpm2_unseal tpm2_getcap tpm2_evictcontrol dracut_install tpm2_pcrextend tpm2_policypcr tpm2_flushcontext tpm2_startauthsession -dracut_install openssl tar ipmitool cpio xz gzip lsmod ethtool +dracut_install openssl tar ipmitool cpio zstd xz gzip lsmod ethtool dracut_install modprobe touch echo cut wc bash uniq grep ip hostname dracut_install awk egrep dirname bc expr sort dracut_install ssh sshd vi reboot lspci parted tmux mkfs mkfs.ext4 mkfs.xfs xfs_db mkswap From 94af42031e326c00608436033d92f14971cf5a09 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Apr 2025 10:35:14 -0400 Subject: [PATCH 134/413] Provide screenshot tiling with interval support Only for kitty graphics protocol. Also, attempt to use pillow to convert, if available. Kitty itself needs this, Konsole can work either way. It currently does not preserve aspect ratio, to do that we pretty much need to do some work with pillow. If we specify just the height, then ratio is preserved, but it won't honor the designed bounding box on wide screenshots. Also Konsole won't even honor just one scaling factor. So the better thing would be to determine the aspect ratio, which needs pillow. --- confluent_client/bin/nodeconsole | 129 +++++++++++++++++--- confluent_client/confluent/screensqueeze.py | 8 +- 2 files changed, 117 insertions(+), 20 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 3ee218ee..fbcaabc9 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -35,6 +35,11 @@ import re import tty import termios import fcntl +import confluent.screensqueeze as sq +try: + from PIL import Image +except ImportError: + Image = None try: # sixel is optional, attempt to import but stub out if unavailable @@ -119,6 +124,29 @@ def indirect_console(): fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, oldfl & ~os.O_NONBLOCK) termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, oldtcattr) +def determine_tile_size(numnodes): + cheight, cwidth, pixwidth, pixheight = sq.get_screengeom() + ratio = (pixwidth / 16) / (pixheight / 10) + bestdeviation = None + bestdims = [] + for i in range(1, numnodes + 1): + number = numnodes + while number % i != 0: + number += 1 + columns = i + rows = number // i + deviation = abs(ratio - (columns / rows)) + if bestdeviation is None: + bestdeviation = deviation + bestdims = [columns, rows] + elif deviation < bestdeviation: + bestdeviation = deviation + bestdims = [columns, rows] + cellswide = cwidth // bestdims[0] + cellshigh = cheight // bestdims[1] + bestdims = bestdims + [cellswide, cellshigh] + return bestdims + cursor_saved = False def sticky_cursor(): global cursor_saved @@ -138,14 +166,14 @@ def sticky_cursor(): indirect_console() -def draw_image(data): +def draw_image(data, width, height): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') if imageformat == 'sixel': sixel_draw(data) elif imageformat == 'iterm': iterm_draw(data) else: - kitty_draw(data) + kitty_draw(data, width, height) def sixel_draw(data): @@ -165,11 +193,30 @@ def iterm_draw(data): sys.stdout.write('\n') sys.stdout.flush() -def kitty_draw(data): +def kitty_draw(data, width, height): + if Image: + bindata = base64.b64decode(data) + binfile = io.BytesIO() + binfile.write(bindata) + binfile.seek(0) + img = Image.open(binfile) + outfile = io.BytesIO() + img.save(outfile, format='PNG') + data = base64.b64encode(outfile.getbuffer()) + preamble = '\x1b_Ga=T,f=100' + if height: + preamble += f',r={height - 2},c={width}' + #sys.stdout.write(repr(preamble)) + #sys.stdout.write('\xb[{}D'.format(len(repr(preamble)))) + #return + first = True while data: chunk, data = data[:4096], data[4096:] m = 1 if data else 0 - sys.stdout.write('\x1b_Ga=T,f=100,m={};'.format(m)) + if first: + sys.stdout.write('{},m={};'.format(preamble, m)) + else: + sys.stdout.write('\x1b_Gm={};'.format(m)) sys.stdout.write(chunk.decode('utf8')) sys.stdout.write('\x1b\\') sys.stdout.flush() @@ -212,8 +259,47 @@ if options.Timestamp: logreader.dump_to_console(logname) sys.exit(0) +def prep_node_tile(node): + currcolcell, currrowcell = nodepositions[node] + if currcolcell: + sys.stdout.write(f'\x1b[{currcolcell}C') + if currrowcell: + sys.stdout.write(f'\x1b[{currrowcell}B') + sys.stdout.write(node) + sys.stdout.write('\x1b[{}D'.format(len(node))) + sys.stdout.write(f'\x1b[1B') + +def reset_cursor(node): + currcolcell, currrowcell = nodepositions[node] + if currcolcell: + sys.stdout.write(f'\x1b[{currcolcell}D') + sys.stdout.write(f'\x1b[{currrowcell + 1}A') + + +nodepositions = {} if options.screenshot: + cwidth = None + cheight = None sess = client.Command() + if options.tile: + allnodes = [] + numnodes = 0 + for res in sess.read('/noderange/{}/nodes/'.format(args[0])): + allnodes.append(res['item']['href'].replace('/', '')) + numnodes += 1 + cols, rows, cwidth, cheight = determine_tile_size(numnodes) + currcol = 1 + currcolcell = 0 + currrowcell = 0 + for node in allnodes: + nodepositions[node] = currcolcell, currrowcell + if currcol < cols: + currcol += 1 + currcolcell += cwidth + else: + currcol = 1 + currcolcell = 0 + currrowcell += cheight firstnodename = None dorefresh = True if options.interval is not None: @@ -228,19 +314,28 @@ if options.screenshot: if len(imgdata) < 32: # We were subjected to error sys.stderr.write(f'{node}: Unable to get screenshot\n') continue - if options.interval is not None: - if node != firstnodename: - sys.stderr.write('Multiple nodes not supported for interval') - sys.exit(1) - sticky_cursor() - sys.stdout.write('{}: '.format(node)) - draw_image(imgdata.encode()) - sys.stdout.write('\n') - if options.interval is None: - dorefresh = False - else: - dorefresh = True - time.sleep(options.interval) + if node in nodepositions: + prep_node_tile(node) + else: + if options.interval is not None: + if node != firstnodename: + sys.stderr.write('Multiple nodes not supported for interval') + sys.exit(1) + sticky_cursor() + sys.stdout.write('{}: '.format(node)) + draw_image(imgdata.encode(), cwidth, cheight - 1) + sys.stdout.write(f'\x1b[{cwidth}D') + sys.stdout.write(f'\x1b[{cheight - 1}A') + if node in nodepositions: + reset_cursor(node) + else: + sys.stdout.write('\n') + sys.stdout.flush() + if options.interval is None: + dorefresh = False + else: + dorefresh = True + time.sleep(options.interval) sys.exit(0) def kill(noderange): diff --git a/confluent_client/confluent/screensqueeze.py b/confluent_client/confluent/screensqueeze.py index 2fbeddc6..60aecc21 100644 --- a/confluent_client/confluent/screensqueeze.py +++ b/confluent_client/confluent/screensqueeze.py @@ -18,8 +18,9 @@ import struct import termios def get_screengeom(): - return struct.unpack('hh', fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, - b'....')) + # returns height in cells, width in cells, width in pixels, height in pixels + return struct.unpack('hhhh', fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, + b'........')) class ScreenPrinter(object): def __init__(self, noderange, client, textlen=4): @@ -58,7 +59,7 @@ class ScreenPrinter(object): def drawscreen(self, node=None): if self.squeeze: - currheight, currwidth = get_screengeom() + currheight, currwidth, _, _ = get_screengeom() currheight -= 2 if currheight < 1: currheight = 1 @@ -120,6 +121,7 @@ if __name__ == '__main__': c = client.Command() p = ScreenPrinter('d1-d12', c) p.set_output('d3', 'Upload: 67%') + p.set_output('d7', 'Upload: 67%') From bfdd6a56f65e97eb3401ed509b5e02465fc86839 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Apr 2025 10:58:25 -0400 Subject: [PATCH 135/413] Add iterm tiling support Also, block sixel attempts, since that is not implemented. --- confluent_client/bin/nodeconsole | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index fbcaabc9..01bbd98f 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -171,7 +171,7 @@ def draw_image(data, width, height): if imageformat == 'sixel': sixel_draw(data) elif imageformat == 'iterm': - iterm_draw(data) + iterm_draw(data, width, height) else: kitty_draw(data, width, height) @@ -183,11 +183,15 @@ def sixel_draw(data): binfile.seek(0) DumbWriter().draw(binfile) -def iterm_draw(data): +def iterm_draw(data, width, height): + if not height: + height = 'auto' + if not width: + width = 'auto' bindata = base64.b64decode(data) datalen = len(bindata) sys.stdout.write( - '\x1b]1337;File=inline=1;size={}:'.format(datalen)) + '\x1b]1337;File=inline=1;width={};height={};size={}:'.format(width,height,datalen)) sys.stdout.write(data.decode('utf8')) sys.stdout.write('\a') sys.stdout.write('\n') @@ -282,6 +286,10 @@ if options.screenshot: cheight = None sess = client.Command() if options.tile: + imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') + if imageformat not in ('kitty', 'iterm'): + sys.stderr.write('Tiled screenshots only supported with kitty or iterm protocol') + sys.exit(1) allnodes = [] numnodes = 0 for res in sess.read('/noderange/{}/nodes/'.format(args[0])): @@ -323,10 +331,11 @@ if options.screenshot: sys.exit(1) sticky_cursor() sys.stdout.write('{}: '.format(node)) - draw_image(imgdata.encode(), cwidth, cheight - 1) - sys.stdout.write(f'\x1b[{cwidth}D') - sys.stdout.write(f'\x1b[{cheight - 1}A') + # one row is used by our own name, so cheight - 1 for that allowance + draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) if node in nodepositions: + sys.stdout.write(f'\x1b[{cwidth}D') + sys.stdout.write(f'\x1b[{cheight - 1}A') reset_cursor(node) else: sys.stdout.write('\n') From 05ffc9da10a0b09c9007caffa3c6ae841c24da20 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Apr 2025 16:01:26 -0400 Subject: [PATCH 136/413] Constrain aspect ratio When parceling out the screen real estate, avoid either the height or the width from getting way out of proportion. Better to let screen be unused than abuse it to distort the aspect ratio too much. --- confluent_client/bin/nodeconsole | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 01bbd98f..36aa8807 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -125,8 +125,18 @@ def indirect_console(): termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, oldtcattr) def determine_tile_size(numnodes): + # for now, smash everything to a common aspect ratio. 16:11 + # is pretty much wrong for everything, making 4:3 a bit too wide + # and 16:9 significantly too narrow, but it is serviceable + # An improvement could come with us owning the scaling + # instead of delegating to Kitty, which says if we specify both, + # we get stretching. In theory we should be able to get aspect correct + # from kitty by omitting, but: + # then we don't know how much to move the cursor left after draw_image + # Konsole won't scale at all with only partial scaling specified cheight, cwidth, pixwidth, pixheight = sq.get_screengeom() - ratio = (pixwidth / 16) / (pixheight / 10) + # 16:12 is to roughly account for the 'titles' of the tiles + ratio = (pixwidth / 16) / (pixheight / 12) bestdeviation = None bestdims = [] for i in range(1, numnodes + 1): @@ -144,6 +154,14 @@ def determine_tile_size(numnodes): bestdims = [columns, rows] cellswide = cwidth // bestdims[0] cellshigh = cheight // bestdims[1] + tilewidth = cellswide * pixwidth / cwidth + tileheight = cellshigh * pixheight / cheight + if tilewidth > (tileheight * 16 / 11): + tilewidth = tileheight * 16 / 11 + cellswide = int(tilewidth // (pixwidth / cwidth)) + if tileheight > (tilewidth * 11 /16): + tileheight = tilewidth * 11 / 16 + cellshigh = int(tileheight // (pixheight / cheight)) bestdims = bestdims + [cellswide, cellshigh] return bestdims From a69113222fabf792a7659d3ac258993ec9671c7c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 09:34:44 -0400 Subject: [PATCH 137/413] Fix positioning errors in tiled console display It turns out that specifying height and width explicitly does not guarantee that the image protocols will actually fill the specified space. Notably iterm will honor aspect ratio (which is good), but leave the cursor where the image would naturally leave it (which is difficult with relative positioning). Previously, relative positioning was used as a workaround for the fact that save/restore or any absolute positioning may be fouled by incurring scroll. To make cursor save/restore work, we determine the total rows and print newlines enough to incur scroll and then move cursor back up. This lets us use save/restore to ignore cursor movement by the image. --- confluent_client/bin/nodeconsole | 46 +++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 36aa8807..86faa68b 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -162,7 +162,13 @@ def determine_tile_size(numnodes): if tileheight > (tilewidth * 11 /16): tileheight = tilewidth * 11 / 16 cellshigh = int(tileheight // (pixheight / cheight)) - bestdims = bestdims + [cellswide, cellshigh] + bestdims = bestdims + [cellswide, cellshigh, cellshigh * bestdims[1]] + # incur any scrolling we might get. This allows us to accurately + # save/restore cursor or even get coordinates without scrolling fouling + # the desired target + sys.stdout.write('\n' * bestdims[4]) + sys.stdout.flush() + cursor_up(bestdims[4]) return bestdims cursor_saved = False @@ -183,6 +189,18 @@ def sticky_cursor(): finally: indirect_console() +def cursor_up(count=1): + sys.stdout.write(f'\x1b[{count}A') +def cursor_down(count=1): + sys.stdout.write(f'\x1b[{count}B') +def cursor_right(count=1): + sys.stdout.write(f'\x1b[{count}C') +def cursor_left(count=1): + sys.stdout.write(f'\x1b[{count}D') +def cursor_save(): + sys.stdout.write('\x1b7') +def cursor_restore(): + sys.stdout.write('\x1b8') def draw_image(data, width, height): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') @@ -212,7 +230,6 @@ def iterm_draw(data, width, height): '\x1b]1337;File=inline=1;width={};height={};size={}:'.format(width,height,datalen)) sys.stdout.write(data.decode('utf8')) sys.stdout.write('\a') - sys.stdout.write('\n') sys.stdout.flush() def kitty_draw(data, width, height): @@ -227,7 +244,7 @@ def kitty_draw(data, width, height): data = base64.b64encode(outfile.getbuffer()) preamble = '\x1b_Ga=T,f=100' if height: - preamble += f',r={height - 2},c={width}' + preamble += f',r={height},c={width}' #sys.stdout.write(repr(preamble)) #sys.stdout.write('\xb[{}D'.format(len(repr(preamble)))) #return @@ -242,7 +259,6 @@ def kitty_draw(data, width, height): sys.stdout.write(chunk.decode('utf8')) sys.stdout.write('\x1b\\') sys.stdout.flush() - sys.stdout.write('\n') pass_through_args = [] killcon = False @@ -284,18 +300,18 @@ if options.Timestamp: def prep_node_tile(node): currcolcell, currrowcell = nodepositions[node] if currcolcell: - sys.stdout.write(f'\x1b[{currcolcell}C') + cursor_right(currcolcell) if currrowcell: - sys.stdout.write(f'\x1b[{currrowcell}B') + cursor_down(currrowcell) sys.stdout.write(node) - sys.stdout.write('\x1b[{}D'.format(len(node))) - sys.stdout.write(f'\x1b[1B') + cursor_left(len(node)) + cursor_down() def reset_cursor(node): currcolcell, currrowcell = nodepositions[node] if currcolcell: - sys.stdout.write(f'\x1b[{currcolcell}D') - sys.stdout.write(f'\x1b[{currrowcell + 1}A') + cursor_left(currcolcell) + cursor_up(currrowcell + 1) nodepositions = {} @@ -313,7 +329,7 @@ if options.screenshot: for res in sess.read('/noderange/{}/nodes/'.format(args[0])): allnodes.append(res['item']['href'].replace('/', '')) numnodes += 1 - cols, rows, cwidth, cheight = determine_tile_size(numnodes) + cols, rows, cwidth, cheight, numrows = determine_tile_size(numnodes) currcol = 1 currcolcell = 0 currrowcell = 0 @@ -326,10 +342,10 @@ if options.screenshot: currcol = 1 currcolcell = 0 currrowcell += cheight + elif options.interval is not None: + sys.stdout.write('\x1bc') firstnodename = None dorefresh = True - if options.interval is not None: - sys.stdout.write('\x1bc') while dorefresh: for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): for node in res.get('databynode', {}): @@ -342,6 +358,7 @@ if options.screenshot: continue if node in nodepositions: prep_node_tile(node) + cursor_save() else: if options.interval is not None: if node != firstnodename: @@ -352,8 +369,7 @@ if options.screenshot: # one row is used by our own name, so cheight - 1 for that allowance draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) if node in nodepositions: - sys.stdout.write(f'\x1b[{cwidth}D') - sys.stdout.write(f'\x1b[{cheight - 1}A') + cursor_restore() reset_cursor(node) else: sys.stdout.write('\n') From af1659dafde5a1f7c2a646f5b1e5fca2819ba615 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 09:44:10 -0400 Subject: [PATCH 138/413] Have nodeconsole exit below screenshots --- confluent_client/bin/nodeconsole | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 86faa68b..176e815e 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -313,9 +313,9 @@ def reset_cursor(node): cursor_left(currcolcell) cursor_up(currrowcell + 1) - nodepositions = {} -if options.screenshot: +def do_screenshot(): + global numrows cwidth = None cheight = None sess = client.Command() @@ -381,6 +381,15 @@ if options.screenshot: time.sleep(options.interval) sys.exit(0) +if options.screenshot: + try: + do_screenshot() + except KeyboardInterrupt: + pass + finally: + cursor_down(numrows) + sys.stdout.write('\n') + sys.exit(0) def kill(noderange): sess = client.Command() envstring=os.environ.get('NODECONSOLE_WINDOWED_COMMAND') From b5540dd3952690e9b9f0b2dd5e315c48d34e1500 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 09:48:54 -0400 Subject: [PATCH 139/413] Hide cursor during screenshot run --- confluent_client/bin/nodeconsole | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 176e815e..49c7dee3 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -201,6 +201,10 @@ def cursor_save(): sys.stdout.write('\x1b7') def cursor_restore(): sys.stdout.write('\x1b8') +def cursor_hide(): + sys.stdout.write('\x1b[?25l') +def cursor_show(): + sys.stdout.write('\x1b[?25h') def draw_image(data, width, height): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') @@ -383,10 +387,12 @@ def do_screenshot(): if options.screenshot: try: + cursor_hide() do_screenshot() except KeyboardInterrupt: pass finally: + cursor_show() cursor_down(numrows) sys.stdout.write('\n') sys.exit(0) From 977d272c56cf1e5dd2bc4ab33a3bbbe697bc3e59 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 09:55:45 -0400 Subject: [PATCH 140/413] Fix untiled nodeconsole screenshot --- confluent_client/bin/nodeconsole | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 49c7dee3..83e071ad 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -318,6 +318,7 @@ def reset_cursor(node): cursor_up(currrowcell + 1) nodepositions = {} +numrows = 0 def do_screenshot(): global numrows cwidth = None From 690980c064b55fe9fdcefc5454856873167a4619 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 10:28:42 -0400 Subject: [PATCH 141/413] Always specify miimon=100 in networkmanager bonds We already do this for SUSE, it makes sense as a default. --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 5bf0871b..0c93485b 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -409,7 +409,7 @@ class NetworkManager(object): cargs.append(cmdargs[arg]) if stgs['team_mode'] in self.bondtypes: stgs['team_mode'] = self.bondtypes[stgs['team_mode']] - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'miimon=100,mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) subprocess.check_call(['nmcli', 'c', 'u', cname]) From 52b0ae179ec77dc3c805019b039ba00a2850b618 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 12:33:40 -0400 Subject: [PATCH 142/413] Background console disconnect on node removal --- confluent_server/confluent/consoleserver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 6dba9b9c..7337743e 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -575,12 +575,12 @@ def disconnect_node(node, configmanager): def _nodechange(added, deleting, renamed, configmanager): for node in deleting: - disconnect_node(node, configmanager) + eventlet.spawn(disconnect_node, node, configmanager) for node in renamed: disconnect_node(node, configmanager) - connect_node(renamed[node], configmanager) + eventlet.spawn(connect_node, renamed[node], configmanager) for node in added: - connect_node(node, configmanager) + eventlet.spawn(connect_node, node, configmanager) def _start_tenant_sessions(cfm): From 0c8799f4dd378f81610f0fef7793aaf759d36930 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 23 Apr 2025 12:50:44 -0400 Subject: [PATCH 143/413] Favor more utilization of bottom screenshot row When we have the grid size, cut off any extra columns so long as it doesn't gain a row. --- confluent_client/bin/nodeconsole | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 83e071ad..02c00e81 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -152,6 +152,13 @@ def determine_tile_size(numnodes): elif deviation < bestdeviation: bestdeviation = deviation bestdims = [columns, rows] + # ok, the above algorithm can still pick things like + # 1 2 3 + # 4 + # So we will let it pick the number of rows, and + # then see if we can chop columns and still fit + while (bestdims[0] - 1) * bestdims[1] >= numnodes: + bestdims[0] = bestdims[0] - 1 cellswide = cwidth // bestdims[0] cellshigh = cheight // bestdims[1] tilewidth = cellswide * pixwidth / cwidth From 1553af0f41ca5c6a98d5f4c07de6ef216c411948 Mon Sep 17 00:00:00 2001 From: Wera Grzeda Date: Thu, 24 Apr 2025 10:47:38 +0200 Subject: [PATCH 144/413] NTP servers fix for stateless images for RHEL 7-9 Node attribute ntp.servers in nodeattrib ca now be used in stateless images modified: confluent_osdeploy/el7-diskless/profiles/default/scripts/onboot.sh modified: confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh modified: confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh --- .../profiles/default/scripts/onboot.sh | 26 +++++++++++++++++ .../profiles/default/scripts/onboot.sh | 28 +++++++++++++++++++ .../profiles/default/scripts/onboot.sh | 24 +++++++++++++++- 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el7-diskless/profiles/default/scripts/onboot.sh index c047dcb8..3c8eea08 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/onboot.sh @@ -5,11 +5,37 @@ # noted below so custom commands are executed before # the script notifies confluent that install is fully complete. + +ntpsrvs="" nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') + + +if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then + for ntpsrv in $(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //'); do + echo "server ${ntpsrv} iburst " >> /tmp/timeservers + done +fi + +if [ -f /tmp/timeservers ]; then + +ntpsrvs=$(cat /tmp/timeservers) + +sed -i "1,/^pool * /c\\ + +${ntpsrvs//$'\n'/\\$'\n'}" /etc/chrony.conf + + +systemctl restart chronyd + +rm -f /tmp/timeservers +fi + + + export nodename confluent_mgr confluent_profile . /etc/confluent/functions mkdir -p /var/log/confluent diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh index b2c0d1b3..65b13ff1 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh @@ -5,6 +5,7 @@ # noted below so custom commands are executed before # the script notifies confluent that install is fully complete. +ntpsrvs="" nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) v4meth=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') @@ -17,6 +18,33 @@ fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') hostnamectl set-hostname $nodename + + +if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then + for ntpsrv in $(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //'); do + echo "server ${ntpsrv} iburst " >> /tmp/timeservers + done +fi + +if [ -f /tmp/timeservers ]; then + +ntpsrvs=$(cat /tmp/timeservers) + +sed -i "1,/^pool * /c\\ +${ntpsrvs//$'\n'/\\$'\n'}" /etc/chrony.conf + + +systemctl restart chronyd + +rm -f /tmp/timeservers + +fi + + + + + + export nodename confluent_mgr confluent_profile . /etc/confluent/functions mkdir -p /var/log/confluent diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh index b2c0d1b3..80f95870 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh @@ -4,7 +4,7 @@ # completed. It is best to edit the middle of the file as # noted below so custom commands are executed before # the script notifies confluent that install is fully complete. - +ntpsrvs="" nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) v4meth=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') @@ -17,6 +17,28 @@ fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') hostnamectl set-hostname $nodename + + +if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then + for ntpsrv in $(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //'); do + echo "server ${ntpsrv} iburst " >> /tmp/timeservers + done +fi + +if [ -f /tmp/timeservers ]; then + +ntpsrvs=$(cat /tmp/timeservers) + +sed -i "1,/^pool * /c\\ + +${ntpsrvs//$'\n'/\\$'\n'}" /etc/chrony.conf + + +systemctl restart chronyd + +rm -f /tmp/timeservers +fi + export nodename confluent_mgr confluent_profile . /etc/confluent/functions mkdir -p /var/log/confluent From 9f51e256ce4b1ecf9648cd4afc5bb67cfbf6d420 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 24 Apr 2025 08:08:27 -0400 Subject: [PATCH 145/413] Activate vinz on access if doing collective If a node has not been asked to open any locally managed video consoles before it was asked to open a peer managed console, it would fail to start the needed vinz service. Work around this by detecting that scenario and giving the vinz subsystem a chance to fix itself. --- confluent_server/confluent/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index ef1c33ee..e25e82d2 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -74,7 +74,7 @@ import uuid import yaml import shutil - +vinz = None pluginmap = {} dispatch_plugins = (b'ipmi', u'ipmi', b'redfish', u'redfish', b'tsmsol', u'tsmsol', b'geist', u'geist', b'deltapdu', u'deltapdu', b'eatonpdu', u'eatonpdu', b'affluent', u'affluent', b'cnos', u'cnos', b'enos', u'enos') @@ -213,6 +213,7 @@ def handle_deployment(configmanager, inputdata, pathcomponents, with open('/var/lib/confluent/public/os/{}/profile.yaml'.format(profname)) as profyaml: profinfo = yaml.safe_load(profyaml) profinfo['name'] = profname + #check if boot.ipxe is older than profile.yaml yield msg.KeyValueData(profinfo) return elif len(pathcomponents) == 3: @@ -972,6 +973,7 @@ def _forward_rsp(connection, res): def handle_node_request(configmanager, inputdata, operation, pathcomponents, autostrip=True): + global vinz if log.logfull: raise exc.TargetResourceUnavailable('Filesystem full, free up space and restart confluent service') iscollection = False @@ -1090,6 +1092,10 @@ def handle_node_request(configmanager, inputdata, operation, plugpath = plugroute['default'] if plugpath in dispatch_plugins: cfm.check_quorum() + if pathcomponents == ['console', 'ikvm']: + if not vinz: + import confluent.vinzmanager as vinz + vinz.assure_vinz() manager = nodeattr[node].get('collective.manager', {}).get( 'value', None) if manager: From b4ef1b484a0955c046200fced6b6ed22b6610a85 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Apr 2025 08:55:10 -0400 Subject: [PATCH 146/413] Amend syncfiles address selection. A node with private, unroutable addresses relative to the deployment server may cause the deployment server to select an unroutable address. Address this with two strategies. First, if any of the addresses appear local to the deployment server networks, prefer those and filter out unroutable. Secondly, if a node is purely remote, and thus all addresses routable, then make all the addresses a candidate. However, since the client can't possibly be using fe80::, we can replace the principal list with just the clientip, provided it appears in the principal list. --- confluent_server/confluent/selfservice.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index bb619f78..14c8dd0e 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -52,7 +52,7 @@ def listdump(input): return retval -def get_extra_names(nodename, cfg, myip=None): +def get_extra_names(nodename, cfg, myip=None, preferadjacent=False): names = set(['127.0.0.1', '::1', 'localhost', 'localhost.localdomain']) dnsinfo = cfg.get_node_attributes(nodename, ('dns.*', 'net.*hostname')) dnsinfo = dnsinfo.get(nodename, {}) @@ -74,11 +74,19 @@ def get_extra_names(nodename, cfg, myip=None): ncfgs.append(fncfg.get('default', {})) for ent in fncfg.get('extranets', []): ncfgs.append(fncfg['extranets'][ent]) + addall = True + routedaddrs = set([]) for ncfg in ncfgs: for nip in (ncfg.get('ipv4_address', None), ncfg.get('ipv6_address', None)): if nip: nip = nip.split('/', 1)[0] - names.add(nip) + if not preferadjacent or netutil.address_is_local(nip): + names.add(nip) + addall = False + else: + routedaddrs.add(nip) + if addall: + names.update(routedaddrs) return names def handle_request(env, start_response): @@ -520,7 +528,9 @@ def handle_request(env, start_response): return elif env['PATH_INFO'].startswith('/self/remotesyncfiles'): if 'POST' == operation: - pals = get_extra_names(nodename, cfg, myip) + pals = get_extra_names(nodename, cfg, myip, preferadjacent=True) + if clientip in pals: + pals = [clientip] result = syncfiles.start_syncfiles( nodename, cfg, json.loads(reqbody), pals) start_response(result[0], ()) From 0bdcaecf82d986afe53a0a38ea3958d87b341d4e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Apr 2025 08:57:50 -0400 Subject: [PATCH 147/413] Do not include localhost in syncfiles consideration localhost was added to ssh principals, but should not be used as a candidate in syncfiles. The syncfileclient should already be filtering this possibility, but best to filter it everywhere that makes sense. --- confluent_server/confluent/selfservice.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index 14c8dd0e..de8eb832 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -52,8 +52,11 @@ def listdump(input): return retval -def get_extra_names(nodename, cfg, myip=None, preferadjacent=False): - names = set(['127.0.0.1', '::1', 'localhost', 'localhost.localdomain']) +def get_extra_names(nodename, cfg, myip=None, preferadjacent=False, addlocalhost=True): + if addlocalhost: + names = set(['127.0.0.1', '::1', 'localhost', 'localhost.localdomain']) + else: + names = set([]) dnsinfo = cfg.get_node_attributes(nodename, ('dns.*', 'net.*hostname')) dnsinfo = dnsinfo.get(nodename, {}) domain = dnsinfo.get('dns.domain', {}).get('value', None) @@ -528,7 +531,7 @@ def handle_request(env, start_response): return elif env['PATH_INFO'].startswith('/self/remotesyncfiles'): if 'POST' == operation: - pals = get_extra_names(nodename, cfg, myip, preferadjacent=True) + pals = get_extra_names(nodename, cfg, myip, preferadjacent=True, addlocalhost=False) if clientip in pals: pals = [clientip] result = syncfiles.start_syncfiles( From 85249ae71b91d77e0b3495427107692acbd3f526 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Apr 2025 13:19:38 -0400 Subject: [PATCH 148/413] Basic VCenter plugin This provides nodeinventory (mac and -s most interestingly), nodepower, nodesetboot (and by extension, nodedeploy -n), and nodeconsole (console.method=vcenter). --- .../confluent/config/attributes.py | 2 +- .../plugins/hardwaremanagement/vcenter.py | 315 ++++++++++++++++++ 2 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 confluent_server/confluent/plugins/hardwaremanagement/vcenter.py diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index a6ce0e96..bd08b39c 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -369,7 +369,7 @@ node = { 'the managed node. If not specified, then console ' 'is disabled. "ipmi" should be specified for most ' 'systems if console is desired.'), - 'validvalues': ('ssh', 'ipmi', 'openbmc', 'tsmsol'), + 'validvalues': ('ssh', 'ipmi', 'openbmc', 'tsmsol', 'vcenter'), }, # 'virtualization.host': { # 'description': ('Hypervisor where this node does/should reside'), diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py new file mode 100644 index 00000000..8dc6aa60 --- /dev/null +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -0,0 +1,315 @@ + +import codecs +import confluent.util as util +import confluent.messages as msg +import eventlet +import json +import struct +webclient = eventlet.import_patched('pyghmi.util.webclient') +import eventlet.green.socket as socket +import eventlet +import confluent.interface.console as conapi + + +def fixuuid(baduuid): + # VMWare changes the endian stuff in BIOS + uuidprefix = (baduuid[:8], baduuid[9:13], baduuid[14:18]) + a = codecs.encode(struct.pack(' 0: + portid = rsp[0][0]['port'] + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/serial/{portid}') + if rsp[1] == 200: + if rsp[0]['backing']['type'] != 'NETWORK_SERVER': + return + netloc = rsp[0]['backing']['network_location'] + portnum = netloc.split(':')[-1] + tlsenabled = False + if netloc.startswith('telnets'): + tlsenabled = True + hostinfo = self.get_vm_host(vm) + hostname = hostinfo['name'] + rsp[0] + return { + 'server': hostname, + 'port': portnum, + 'tls': tlsenabled, + } + + def get_vm_bootdev(self, vm): + vm = self.index_vm(vm) + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/boot') + if rsp[0]['enter_setup_mode']: + return 'setup' + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/boot/device') + if rsp[0][0]['type'] == 'ETHERNET': + return 'network' + return 'default' + + def get_vm_power(self, vm): + vm = self.index_vm(vm) + rsp = self.wc.grab_json_response(f'/api/vcenter/vm/{vm}/power') + if rsp['state'] == 'POWERED_ON': + return 'on' + if rsp['state'] == 'POWERED_OFF': + return 'off' + if rsp['state'] == 'SUSPENDED': + return 'suspended' + raise Exception("Unknown response {}".format(repr(rsp))) + + def set_vm_power(self, vm, state): + vm = self.index_vm(vm) + if state == 'boot': + current = self.get_vm_power(vm) + if current == 'on': + state = 'reset' + else: + state = 'start' + elif state == 'on': + state = 'start' + elif state == 'off': + state = 'stop' + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/power?action={state}', method='POST') + + + def set_vm_bootdev(self, vm, bootdev): + vm = self.index_vm(vm) + self.wc.set_header('Content-Type', 'application/json') + try: + bootdevs = [] + entersetup = False + if bootdev == 'setup': + entersetup = True + elif bootdev == 'default': + # In theory, we should be able to send an empty device list. + # However, vmware api counter to documentation seems to just ignore + # such a request. So instead we just go "disk first" + # and rely upon fast fail/retry to take us to a normal place + currdisks, rcode = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/disk') + currdisks = [x['disk'] for x in currdisks] + bootdevs.append({'type': 'DISK', 'disks': currdisks}) + elif bootdev in ('net', 'network'): + currnics, rcode = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/ethernet') + for nic in currnics: + bootdevs.append({'type': 'ETHERNET', 'nic': nic['nic']}) + payload = {'devices': bootdevs} + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/boot/device', + payload, + method='PUT') + rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/boot', + {'enter_setup_mode': entersetup}, + method='PATCH') + finally: + del self.wc.stdheaders['Content-Type'] + + +def prep_vcsa_clients(nodes, configmanager): + cfginfo = configmanager.get_node_attributes(nodes, ['hardwaremanagement.manager', 'secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) + clientsbyvcsa = {} + clientsbynode = {} + for node in nodes: + cfg = cfginfo[node] + currvcsa = cfg['hardwaremanagement.manager']['value'] + if currvcsa not in clientsbyvcsa: + user = cfg.get('secret.hardwaremanagementuser', {}).get('value', None) + passwd = cfg.get('secret.hardwaremanagementpassword', {}).get('value', None) + clientsbyvcsa[currvcsa] = VmwApiClient(currvcsa, user, passwd, configmanager) + clientsbynode[node] = clientsbyvcsa[currvcsa] + return clientsbynode + +def retrieve(nodes, element, configmanager, inputdata): + clientsbynode = prep_vcsa_clients(nodes, configmanager) + for node in nodes: + currclient = clientsbynode[node] + if element == ['power', 'state']: + yield msg.PowerState(node, currclient.get_vm_power(node)) + elif element == ['boot', 'nextdevice']: + yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) + elif element[:2] == ['inventory', 'hardware'] and len(element) == 4: + for rsp in currclient.get_vm_inventory(node): + yield rsp + + + + +def update(nodes, element, configmanager, inputdata): + clientsbynode = prep_vcsa_clients(nodes, configmanager) + for node in nodes: + currclient = clientsbynode[node] + if element == ['power', 'state']: + currclient.set_vm_power(node, inputdata.powerstate(node)) + yield msg.PowerState(node, currclient.get_vm_power(node)) + elif element == ['boot', 'nextdevice']: + currclient.set_vm_bootdev(node, inputdata.bootdevice(node)) + yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) + +# assume this is only console for now +def create(nodes, element, configmanager, inputdata): + clientsbynode = prep_vcsa_clients(nodes, configmanager) + for node in nodes: + serialdata = clientsbynode[node].get_vm_serial(node) + return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls']) + + + +if __name__ == '__main__': + import sys + import os + from pprint import pprint + myuser = os.environ['VMWUSER'] + mypass = os.environ['VMWPASS'] + vc = VmwApiClient(sys.argv[1], myuser, mypass, None) + vm = sys.argv[2] + if sys.argv[3] == 'setboot': + vc.set_vm_bootdev(vm, sys.argv[4]) + vc.get_vm_bootdev(vm) + elif sys.argv[3] == 'power': + vc.set_vm_power(vm, sys.argv[4]) + elif sys.argv[3] == 'getinfo': + vc.get_vm(vm) + print("Bootdev: " + vc.get_vm_bootdev(vm)) + print("Power: " + vc.get_vm_power(vm)) + print("Serial: " + repr(vc.get_vm_serial(vm))) From 3b46ccb5b10a12a9a612cd2c513b8d40366d6fbc Mon Sep 17 00:00:00 2001 From: Tinashe Date: Fri, 25 Apr 2025 16:55:25 -0400 Subject: [PATCH 149/413] make nodeconsole kill more consistent --- confluent_client/bin/nodeconsole | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 02c00e81..ee4acf52 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -404,6 +404,7 @@ if options.screenshot: cursor_down(numrows) sys.stdout.write('\n') sys.exit(0) + def kill(noderange): sess = client.Command() envstring=os.environ.get('NODECONSOLE_WINDOWED_COMMAND') @@ -411,29 +412,23 @@ def kill(noderange): envstring = 'xterm' nodes = [] - for res in sess.read('/noderange/{0}/nodes/'.format(args[0])): + for res in sess.read('/noderange/{0}/nodes/'.format(noderange)): node = res.get('item', {}).get('href', '/').replace('/', '') if not node: sys.stderr.write(res.get('error', repr(res)) + '\n') sys.exit(1) nodes.append(node) - for node in nodes: - s=socket.socket(socket.AF_UNIX) - winid=None + for node in nodes: + command = "ps auxww | grep {0} | grep console | egrep '\\b{1}\\b' | grep -v grep | awk '{{print $2}}'".format(envstring, node) + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() try: - win=subprocess.Popen(['xwininfo', '-tree', '-root'], stdout=subprocess.PIPE) - wintr=win.communicate()[0] - for line in wintr.decode('utf-8').split('\n'): - if 'console: {0}'.format(node) in line or 'confetty' in line: - win_obj = [ele for ele in line.split(' ') if ele.strip()] - winid = win_obj[0] - except: - print("Error: cannot retrieve window id of node {}".format(node)) - - if winid: - ps_data=subprocess.Popen(['xkill', '-id', winid ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - + process_id = stdout.decode('utf-8').split()[0] + except IndexError: + sys.stderr.write(node + ": console window not found \n") + continue + subprocess.Popen(["kill", process_id], stdout=subprocess.PIPE, stderr=subprocess.PIPE) sys.exit(0) def handle_geometry(envlist, sizegeometry, side_pad=0, top_pad=0, first=False): From 74355e522053b9f84eb79413ca34a4d400a7f98a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Apr 2025 18:41:01 -0400 Subject: [PATCH 150/413] Implement partial telnet for VMWare The vmware serial support demands more proper telnet support, provide some support for negotiating do/don't, will/won't opcodes. --- .../plugins/hardwaremanagement/vcenter.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index 8dc6aa60..1f95f5f2 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -38,6 +38,7 @@ class VmConsole(conapi.Console): self.socket.sendall(data) def close(self): + self.connected = False if self.socket: self.socket.close() @@ -46,12 +47,38 @@ class VmConsole(conapi.Console): try: pendingdata = self.socket.recv(1024) except Exception as e: - print(repr(e)) pendingdata = '' if pendingdata == '': + self.connected = False self.datacallback(conapi.ConsoleEvent.Disconnect) return - self.datacallback(pendingdata) + reply = b'' + while pendingdata and pendingdata[0] == 255: + cmd = pendingdata[1] + if cmd == 255: + pendingdata = pendingdata[1:] + break + subcmd = pendingdata[2] + if cmd == 253: # DO + # binary, suppress go ohaed + if subcmd in (0, 3): + reply += b'\xff\xfb' + bytes([subcmd]) # will + else: + reply += b'\xff\xfc' + bytes([subcmd]) # won't do anything else + pendingdata = pendingdata[3:] + elif cmd == 251: # will + # binary, suppress go ahead, echo + if subcmd in (0, 1, 3): + reply += b'\xff\xfd' + bytes([subcmd]) # do the implemented things + else: + reply += B'\xff\xfe' + bytes([subcmd]) # don't do others' + pendingdata = pendingdata[3:] + else: + raise Exception(repr(pendingdata[:3])) + if reply: + self.write(reply) + if pendingdata: + self.datacallback(pendingdata) From 20b4f46699edf163c8332242852ca45d32341327 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sun, 27 Apr 2025 10:32:28 -0400 Subject: [PATCH 151/413] Add screenshot ikvm to vcenter plugin --- .../plugins/hardwaremanagement/vcenter.py | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index 1f95f5f2..05196f20 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -9,8 +9,17 @@ webclient = eventlet.import_patched('pyghmi.util.webclient') import eventlet.green.socket as socket import eventlet import confluent.interface.console as conapi +import io +class RetainedIO(io.BytesIO): + # Need to retain buffer after close + def __init__(self): + self.resultbuffer = None + def close(self): + self.resultbuffer = self.getbuffer() + super().close() + def fixuuid(baduuid): # VMWare changes the endian stuff in BIOS uuidprefix = (baduuid[:8], baduuid[9:13], baduuid[14:18]) @@ -112,6 +121,15 @@ class VmwApiClient: del self.wc.stdheaders['Authorization'] self.wc.set_header('vmware-api-session-id', body) + def get_screenshot(self, vm, outfile): + vm = self.index_vm(vm) + url = f'/screen?id={vm}' + wc = self.wc.dupe() + wc.set_basic_credentials(self.user, self.password) + fd = webclient.FileDownloader(wc, url, outfile) + fd.start() + fd.join() + def list_vms(self): rsp = self.wc.grab_json_response('/api/vcenter/vm') self.vmlist = {} @@ -212,8 +230,11 @@ class VmwApiClient: if rsp[0]['enter_setup_mode']: return 'setup' rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/hardware/boot/device') - if rsp[0][0]['type'] == 'ETHERNET': - return 'network' + try: + if rsp[0][0]['type'] == 'ETHERNET': + return 'network' + except IndexError: + pass return 'default' def get_vm_power(self, vm): @@ -298,6 +319,18 @@ def retrieve(nodes, element, configmanager, inputdata): elif element[:2] == ['inventory', 'hardware'] and len(element) == 4: for rsp in currclient.get_vm_inventory(node): yield rsp + elif element == ['console', 'ikvm_methods']: + dsc = {'ikvm_methods': ['screenshot']} + yield msg.KeyValueData(dsc, node) + elif element == ['console', 'ikvm_screenshot']: + # good background for the webui, and kitty + imgdata = RetainedIO() + imgformat = currclient.get_screenshot(node, imgdata) + imgdata = imgdata.getvalue() + if imgdata: + yield msg.ScreenShot(imgdata, node, imgformat=imgformat) + + From 4c90d431b640f16b0ca867ad07c556e93b8c1736 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 28 Apr 2025 17:17:24 -0400 Subject: [PATCH 152/413] Add preliminary proxmox node support --- .../confluent/config/attributes.py | 3 + .../plugins/hardwaremanagement/proxmox.py | 266 ++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 confluent_server/confluent/plugins/hardwaremanagement/proxmox.py diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index bd08b39c..248063f2 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -637,6 +637,9 @@ node = { 'description': ('Fingerprint of the TLS certificate recognized as' 'belonging to the hardware manager of the server'), }, + 'pubkeys.tls': { + 'description': ('Fingerprint of the TLS certificate for service running on host.'), + }, 'pubkeys.ssh': { 'description': ('Fingerprint of the SSH key of the OS running on the ' 'system.'), diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py new file mode 100644 index 00000000..9953bc29 --- /dev/null +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -0,0 +1,266 @@ + +import codecs +import confluent.util as util +import confluent.messages as msg +import eventlet +import json +import struct +webclient = eventlet.import_patched('pyghmi.util.webclient') +import eventlet.green.socket as socket +import eventlet +import confluent.interface.console as conapi +import io +import urllib.parse as urlparse + +class RetainedIO(io.BytesIO): + # Need to retain buffer after close + def __init__(self): + self.resultbuffer = None + def close(self): + self.resultbuffer = self.getbuffer() + super().close() + +class PmxConsole(conapi.Console): + pass + # this more closely resembles OpenBMC.., websocket based and all + +class PmxApiClient: + def __init__(self, server, user, password, configmanager): + self.user = user + self.password = password + if configmanager: + cv = util.TLSCertVerifier( + configmanager, server, 'pubkeys.tls' + ).verify_cert + else: + cv = lambda x: True + + try: + self.user = self.user.decode() + self.password = self.password.decode() + except Exception: + pass + self.wc = webclient.SecureHTTPConnection(server, port=8006, verifycallback=cv) + self.vmmap = {} + self.login() + self.vmlist = {} + self.vmbyid = {} + + def login(self): + loginform = { + 'username': self.user, + 'password': self.password, + } + loginbody = urlparse.urlencode(loginform) + rsp = self.wc.grab_json_response_with_status('/api2/json/access/ticket', loginbody) + self.wc.cookies['PVEAuthCookie'] = rsp[0]['data']['ticket'] + self.wc.set_header('CSRFPreventionToken', rsp[0]['data']['CSRFPreventionToken']) + + + def get_screenshot(self, vm, outfile): + raise Exception("Not implemented") + + def map_vms(self): + rsp = self.wc.grab_json_response('/api2/json/cluster/resources') + for datum in rsp.get('data', []): + if datum['type'] == 'qemu': + self.vmmap[datum['name']] = (datum['node'], datum['id']) + return self.vmmap + + + def get_vm(self, vm): + if vm not in self.vmmap: + self.map_vms() + return self.vmmap[vm] + + + def get_vm_inventory(self, vm): + host, guest = self.get_vm(vm) + cfg = self.wc.grab_json_response(f'/api2/json/nodes/{host}/{guest}/pending') + myuuid = None + sysinfo = {'name': 'System', 'present': True, 'information': { + 'Product name': 'Proxmox qemu virtual machine', + 'Manufacturer': 'qemu' + }} + invitems = [sysinfo] + for datum in cfg['data']: + if datum['key'] == 'smbios1': + smbios = datum['value'] + for smbio in smbios.split(','): + if '=' in smbio: + k, v = smbio.split('=') + if k == 'uuid': + sysinfo['information']['UUID'] = v + elif datum['key'].startswith('net'): + label = 'Network adapter {}'.format(datum['key']) + niccfg = datum['value'] + cfgparts = niccfg.split(',') + nicmodel, mac = cfgparts[0].split('=') + invitems.append({ + 'present': True, + 'name': label, + 'information': { + 'Type': 'Ethernet', + 'Model': nicmodel, + 'MAC Address 1': mac, + } + }) + yield msg.KeyValueData({'inventory': invitems}, vm) + + + def get_vm_serial(self, vm): + # This would be termproxy + # Example url + #wss:///api2/json/nodes/{host}/{guest}/vncwebsocket?port=5900&vncticket=URLENCODEDTICKET + raise Exception('TODO') + + def get_vm_bootdev(self, vm): + host, guest = self.get_vm(vm) + cfg = self.wc.grab_json_response(f'/api2/json/nodes/{host}/{guest}/pending') + for datum in cfg['data']: + if datum['key'] == 'boot': + bootseq = datum.get('pending', datum['value']) + for kv in bootseq.split(','): + k, v = kv.split('=') + if k == 'order': + bootdev = v.split(';')[0] + if bootdev.startswith('net'): + return 'network' + return 'default' + + + def get_vm_power(self, vm): + host, guest = self.get_vm(vm) + rsp = self.wc.grab_json_response(f'/api2/json/nodes/{host}/{guest}/status/current') + rsp = rsp['data'] + currstatus = rsp["qmpstatus"] # stopped, "running" + if currstatus == 'running': + return 'on' + elif currstatus == 'stopped': + return off + raise Exception("Unknnown response to status query") + + def set_vm_power(self, vm, state): + host, guest = self.get_vm(vm) + if state == 'boot': + current = self.get_vm_power(vm) + if current == 'on': + state = 'reset' + else: + state = 'start' + elif state == 'on': + state = 'start' + elif state == 'off': + state = 'stop' + rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/status/{state}', method='POST') + print(repr(rsp)) + + def set_vm_bootdev(self, vm, bootdev): + host, guest = self.get_vm(vm) + if bootdev not in ('net', 'network', 'default'): + raise Exception('Requested boot device not supported') + cfg = self.wc.grab_json_response(f'/api2/json/nodes/{host}/{guest}/pending') + nonnetdevs = [] + netdevs = [] + for datum in cfg['data']: + if datum['key'] == 'boot': + bootseq = datum.get('pending', datum['value']) + for item in bootseq.split(','): + if item.startswith('order='): + bootdevs = item.replace('order=', '').split(';') + for cbootdev in bootdevs: + if cbootdev.startswith('net'): + netdevs.append(cbootdev) + else: + nonnetdevs.append(cbootdev) + if bootdev in ('net', 'network'): + newbootdevs = netdevs + nonnetdevs + else: + newbootdevs = nonnetdevs + netdevs + neworder = 'order=' + ';'.join(newbootdevs) + self.wc.set_header('Content-Type', 'application/json') + try: + self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/config', {'boot': neworder}, method='PUT') + finally: + del self.wc.stdheaders['Content-Type'] + + +def prep_proxmox_clients(nodes, configmanager): + cfginfo = configmanager.get_node_attributes(nodes, ['hardwaremanagement.manager', 'secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) + clientsbypmx = {} + clientsbynode = {} + for node in nodes: + cfg = cfginfo[node] + currpmx = cfg['hardwaremanagement.manager']['value'] + if currpmx not in clientsbypmx: + user = cfg.get('secret.hardwaremanagementuser', {}).get('value', None) + passwd = cfg.get('secret.hardwaremanagementpassword', {}).get('value', None) + clientsbypmx[currpmx] = PmxApiClient(currpmx, user, passwd, configmanager) + clientsbynode[node] = clientsbypmx[currpmx] + return clientsbynode + +def retrieve(nodes, element, configmanager, inputdata): + clientsbynode = prep_proxmox_clients(nodes, configmanager) + for node in nodes: + currclient = clientsbynode[node] + if element == ['power', 'state']: + yield msg.PowerState(node, currclient.get_vm_power(node)) + elif element == ['boot', 'nextdevice']: + yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) + elif element[:2] == ['inventory', 'hardware'] and len(element) == 4: + for rsp in currclient.get_vm_inventory(node): + yield rsp + elif element == ['console', 'ikvm_methods']: + dsc = {'ikvm_methods': ['screenshot']} + yield msg.KeyValueData(dsc, node) + elif element == ['console', 'ikvm_screenshot']: + # good background for the webui, and kitty + imgdata = RetainedIO() + imgformat = currclient.get_screenshot(node, imgdata) + imgdata = imgdata.getvalue() + if imgdata: + yield msg.ScreenShot(imgdata, node, imgformat=imgformat) + + + + + + +def update(nodes, element, configmanager, inputdata): + clientsbynode = prep_proxmox_clients(nodes, configmanager) + for node in nodes: + currclient = clientsbynode[node] + if element == ['power', 'state']: + currclient.set_vm_power(node, inputdata.powerstate(node)) + yield msg.PowerState(node, currclient.get_vm_power(node)) + elif element == ['boot', 'nextdevice']: + currclient.set_vm_bootdev(node, inputdata.bootdevice(node)) + yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) + +# assume this is only console for now +def create(nodes, element, configmanager, inputdata): + clientsbynode = prep_vcsa_clients(nodes, configmanager) + for node in nodes: + serialdata = clientsbynode[node].get_vm_serial(node) + return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls']) + + + +if __name__ == '__main__': + import sys + import os + from pprint import pprint + myuser = os.environ['PMXUSER'] + mypass = os.environ['PMXPASS'] + vc = PmxApiClient(sys.argv[1], myuser, mypass, None) + vm = sys.argv[2] + if sys.argv[3] == 'setboot': + vc.set_vm_bootdev(vm, sys.argv[4]) + vc.get_vm_bootdev(vm) + elif sys.argv[3] == 'power': + vc.set_vm_power(vm, sys.argv[4]) + elif sys.argv[3] == 'getinfo': + print(repr(list(vc.get_vm_inventory(vm)))) + print("Bootdev: " + vc.get_vm_bootdev(vm)) + print("Power: " + vc.get_vm_power(vm)) + #print("Serial: " + repr(vc.get_vm_serial(vm))) From 797b5fc478b50c12eee72aae7330d5efae05dc1e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 28 Apr 2025 19:09:56 -0400 Subject: [PATCH 153/413] Fix proxmox power reporting while off --- .../confluent/plugins/hardwaremanagement/proxmox.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index 9953bc29..593b81e8 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -137,7 +137,7 @@ class PmxApiClient: if currstatus == 'running': return 'on' elif currstatus == 'stopped': - return off + return 'off' raise Exception("Unknnown response to status query") def set_vm_power(self, vm, state): @@ -153,7 +153,6 @@ class PmxApiClient: elif state == 'off': state = 'stop' rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/status/{state}', method='POST') - print(repr(rsp)) def set_vm_bootdev(self, vm, bootdev): host, guest = self.get_vm(vm) From 6552c90e4da8d1420be015c4806f88d22102ccac Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 09:52:19 -0400 Subject: [PATCH 154/413] Clean up busy loop and other oddity with vcenter console Fix it so that common conditions do not trigger trace logs and busy loops. --- .../confluent/plugins/hardwaremanagement/vcenter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index 05196f20..724c9b5d 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -38,8 +38,11 @@ class VmConsole(conapi.Console): self.socket = None def connect(self, callback): + try: + self.socket = socket.create_connection((self.host, self.port)) + except Exception: + callback(conapi.ConsoleEvent.Disconnect) self.connected = True - self.socket = socket.create_connection((self.host, self.port)) self.datacallback = callback self.recvr = eventlet.spawn(self.recvdata) @@ -56,8 +59,8 @@ class VmConsole(conapi.Console): try: pendingdata = self.socket.recv(1024) except Exception as e: - pendingdata = '' - if pendingdata == '': + pendingdata = b'' + if pendingdata == b'': self.connected = False self.datacallback(conapi.ConsoleEvent.Disconnect) return From f89cc5863f02fe61ba20d3c70296d6b025370e38 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 09:52:47 -0400 Subject: [PATCH 155/413] Add text console support for Proxmox guests --- .../plugins/hardwaremanagement/proxmox.py | 152 +++++++++++++++++- 1 file changed, 146 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index 593b81e8..33771544 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -11,6 +11,14 @@ import eventlet import confluent.interface.console as conapi import io import urllib.parse as urlparse +import eventlet.green.ssl as ssl + + +try: + websocket = eventlet.import_patched('websocket') + wso = websocket.WebSocket +except Exception: + wso = object class RetainedIO(io.BytesIO): # Need to retain buffer after close @@ -20,14 +28,138 @@ class RetainedIO(io.BytesIO): self.resultbuffer = self.getbuffer() super().close() +class WrappedWebSocket(wso): + + def set_verify_callback(self, callback): + self._certverify = callback + + def connect(self, url, **options): + + add_tls = url.startswith('wss://') + if add_tls: + hostname, port, resource, _ = websocket._url.parse_url(url) + if hostname[0] != '[' and ':' in hostname: + hostname = '[{0}]'.format(hostname) + if resource[0] != '/': + resource = '/{0}'.format(resource) + url = 'ws://{0}:8006{1}'.format(hostname,resource) + else: + return super(WrappedWebSocket, self).connect(url, **options) + self.sock_opt.timeout = options.get('timeout', self.sock_opt.timeout) + self.sock, addrs = websocket._http.connect(url, self.sock_opt, websocket._http.proxy_info(**options), + options.pop('socket', None)) + self.sock = ssl.wrap_socket(self.sock, cert_reqs=ssl.CERT_NONE) + # The above is supersedeed by the _certverify, which provides + # known-hosts style cert validaiton + bincert = self.sock.getpeercert(binary_form=True) + if not self._certverify(bincert): + raise pygexc.UnrecognizedCertificate('Unknown certificate', bincert) + try: + try: + self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + except TypeError: + self.handshake_response = websocket._handshake.handshake(self.sock, url, *addrs, **options) + if self.handshake_response.status in websocket._handshake.SUPPORTED_REDIRECT_STATUSES: + options['redirect_limit'] = options.pop('redirect_limit', 3) - 1 + if options['redirect_limit'] < 0: + raise Exception('Redirect limit hit') + url = self.handshake_response.headers['location'] + self.sock.close() + return self.connect(url, **options) + self.connected = True + except: + if self.sock: + self.sock.close() + self.sock = None + raise + + class PmxConsole(conapi.Console): - pass - # this more closely resembles OpenBMC.., websocket based and all + def __init__(self, consdata, node, configmanager, apiclient): + self.ws = None + self.consdata = consdata + self.nodeconfig = configmanager + self.connected = False + self.bmc = consdata['server'] + self.node = node + self.recvr = None + self.apiclient = apiclient + + def recvdata(self): + while self.connected: + try: + pendingdata = self.ws.recv() + except websocket.WebSocketConnectionClosedException: + pendingdata = '' + if pendingdata == '': + self.datacallback(conapi.ConsoleEvent.Disconnect) + return + self.datacallback(pendingdata) + + def connect(self, callback): + if self.apiclient.get_vm_power(self.node) != 'on': + callback(conapi.ConsoleEvent.Disconnect) + return + # socket = new WebSocket(socketURL, 'binary'); - subprotocol binary + # client handshake is: + # socket.send(PVE.UserName + ':' + ticket + "\n"); + + # Peer sends 'OK' on handshake, other than that it's direct pass through + # send '2' every 30 seconds for keepalive + # data is xmitted with 0::data + # resize is sent with 1:columns:rows:"" + self.datacallback = callback + kv = util.TLSCertVerifier( + self.nodeconfig, self.node, 'pubkeys.tls_hardwaremanager').verify_cert + bmc = self.bmc + if '%' in self.bmc: + prefix = self.bmc.split('%')[0] + bmc = prefix + ']' + self.ws = WrappedWebSocket(host=bmc) + self.ws.set_verify_callback(kv) + ticket = self.consdata['ticket'] + user = self.consdata['user'] + port = self.consdata['port'] + urlticket = urlparse.quote(ticket) + host = self.consdata['host'] + guest = self.consdata['guest'] + pac = self.consdata['pac'] # fortunately, we terminate this on our end, but it does kind of reduce the value of the + # 'ticket' approach, as the general cookie must be provided as cookie along with the VNC ticket + self.ws.connect(f'wss://{self.bmc}:8006/api2/json/nodes/{host}/{guest}/vncwebsocket?port={port}&vncticket={urlticket}', + host=bmc, cookie=f'PVEAuthCookie={pac}', # cookie='XSRF-TOKEN={0}; SESSION={1}'.format(wc.cookies['XSRF-TOKEN'], wc.cookies['SESSION']), + subprotocols=['binary']) + self.ws.send(f'{user}:{ticket}\n') + data = self.ws.recv() + if data == b'OK': + self.ws.recv() # swallow the 'starting serial terminal' message + self.connected = True + self.recvr = eventlet.spawn(self.recvdata) + else: + print(repr(data)) + return + + def write(self, data): + try: + dlen = str(len(data)) + data = data.decode() + self.ws.send('0:' + dlen + ':' + data) + except websocket.WebSocketConnectionClosedException: + self.datacallback(conapi.ConsoleEvent.Disconnect) + + def close(self): + if self.recvr: + self.recvr.kill() + self.recvr = None + if self.ws: + self.ws.close() + self.connected = False + self.datacallback = None class PmxApiClient: def __init__(self, server, user, password, configmanager): self.user = user self.password = password + self.pac = None if configmanager: cv = util.TLSCertVerifier( configmanager, server, 'pubkeys.tls' @@ -40,6 +172,7 @@ class PmxApiClient: self.password = self.password.decode() except Exception: pass + self.server = server self.wc = webclient.SecureHTTPConnection(server, port=8006, verifycallback=cv) self.vmmap = {} self.login() @@ -54,6 +187,7 @@ class PmxApiClient: loginbody = urlparse.urlencode(loginform) rsp = self.wc.grab_json_response_with_status('/api2/json/access/ticket', loginbody) self.wc.cookies['PVEAuthCookie'] = rsp[0]['data']['ticket'] + self.pac = rsp[0]['data']['ticket'] self.wc.set_header('CSRFPreventionToken', rsp[0]['data']['CSRFPreventionToken']) @@ -111,8 +245,14 @@ class PmxApiClient: def get_vm_serial(self, vm): # This would be termproxy # Example url - #wss:///api2/json/nodes/{host}/{guest}/vncwebsocket?port=5900&vncticket=URLENCODEDTICKET - raise Exception('TODO') + host, guest = self.get_vm(vm) + rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/termproxy', method='POST') + consdata = rsp[0]['data'] + consdata['server'] = self.server + consdata['host'] = host + consdata['guest'] = guest + consdata['pac'] = self.pac + return consdata def get_vm_bootdev(self, vm): host, guest = self.get_vm(vm) @@ -238,10 +378,10 @@ def update(nodes, element, configmanager, inputdata): # assume this is only console for now def create(nodes, element, configmanager, inputdata): - clientsbynode = prep_vcsa_clients(nodes, configmanager) + clientsbynode = prep_proxmox_clients(nodes, configmanager) for node in nodes: serialdata = clientsbynode[node].get_vm_serial(node) - return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls']) + return PmxConsole(serialdata, node, configmanager, clientsbynode[node]) From 80bf0c9bfa14466e01bd2cbac604c66f44f509f1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 09:53:19 -0400 Subject: [PATCH 156/413] Clean up a relatively common potential error in XC scan Do not clutter logs with this condition. --- confluent_server/confluent/discovery/handlers/xcc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index a7feaa93..77535ce3 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -92,7 +92,10 @@ class NodeHandler(immhandler.NodeHandler): ip, port = self.get_web_port_and_ip() c = webclient.SecureHTTPConnection(ip, port, verifycallback=self.validate_cert) - i = c.grab_json_response('/api/providers/logoninfo') + try: + i = c.grab_json_response('/api/providers/logoninfo') + except Exception: + return modelname = i.get('items', [{}])[0].get('machine_name', None) if modelname: self.info['modelname'] = modelname From 0b5be6858672eb57bf43e5d6532e01a112bd70e6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 09:53:58 -0400 Subject: [PATCH 157/413] Give media bootstrap more of a chance in genesis If the USB is slow to enumerate, keep trying during the network scan to let the USB have a chance to interject if the network is unwilling. --- .../initramfs/opt/confluent/bin/rungenesis | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index 362617ac..4d9a92ad 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -79,6 +79,24 @@ fi cd /sys/class/net echo -n "Scanning for network configuration..." while ! grep ^EXTMGRINFO: /etc/confluent/confluent.info | awk -F'|' '{print $3}' | grep 1 >& /dev/null && [ "$TRIES" -lt 30 ]; do + if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then + echo "" + echo "Media bootstrap detected, switching to media based boot" + mkdir -p /media/ident + mount /dev/disk/by-label/CNFLNT_IDNT /media/ident + if [ -e /media/ident/genesis_bootstrap.sh ]; then + exec bash /media/ident/genesis_bootstrap.sh + fi + fi + if [ -e /dev/disk/by-label/GENESIS-X86 ]; then + echo "" + echo "Media bootstrap detected, switching to media based boot" + mkdir -p /media/genesis + mount /dev/disk/by-label/GENESIS-X86 /media/genesis + if [ -e /media/genesis/genesis_bootstrap.sh ]; then + exec bash /media/genesis/genesis_bootstrap.sh + fi + fi TRIES=$((TRIES + 1)) for i in *; do ip link set $i up From d4d9b015b8c2bf34fbaf23a4b6aa89e033472a4b Mon Sep 17 00:00:00 2001 From: Tinashe Date: Tue, 29 Apr 2025 09:54:43 -0400 Subject: [PATCH 158/413] document nodeconsole --- confluent_client/doc/man/nodeconsole.ronn | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_client/doc/man/nodeconsole.ronn b/confluent_client/doc/man/nodeconsole.ronn index 44286d4b..1325b2bf 100644 --- a/confluent_client/doc/man/nodeconsole.ronn +++ b/confluent_client/doc/man/nodeconsole.ronn @@ -34,6 +34,10 @@ console process which will result in the console window closing. Dump the log with Timpstamps on the current, local log in /var/log/confluent/consoles. If in collective mode, this only makes sense to use on the current collective manager at this time. + +* `-s`, `--screenshot`: + Attempt to grab screenshot and render using kitty + image protocol. * `-w`, `--windowed`: Open terminal windows for each node. The From fd4c2686a4b3be51bbf6074f92a876c1e6810f94 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 10:19:08 -0400 Subject: [PATCH 159/413] Add telnets support to vmware console This allows the console to be credibly secured, provided the current host is a node to hold the known hosts... --- .../plugins/hardwaremanagement/vcenter.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index 724c9b5d..e7140d7a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -7,6 +7,7 @@ import json import struct webclient = eventlet.import_patched('pyghmi.util.webclient') import eventlet.green.socket as socket +import eventlet.green.ssl as ssl import eventlet import confluent.interface.console as conapi import io @@ -30,18 +31,30 @@ def fixuuid(baduuid): return '-'.join(uuid).lower() class VmConsole(conapi.Console): - def __init__(self, host, port, tls): - if tls: - raise Exception('TODO') # need to have a framework for storing host certificate + def __init__(self, host, port, tls, configmanager=None): + self.tls = tls self.host = host self.port = port self.socket = None + self.nodeconfig = configmanager def connect(self, callback): try: self.socket = socket.create_connection((self.host, self.port)) except Exception: callback(conapi.ConsoleEvent.Disconnect) + if self.tls: + if not self.nodeconfig: + raise Exception('config manager instance required for TLS operation') + kv = util.TLSCertVerifier( + self.nodeconfig, self.host, 'pubkeys.tls').verify_cert + sock = ssl.wrap_socket(self.socket, cert_reqs=ssl.CERT_NONE) + # The above is supersedeed by the _certverify, which provides + # known-hosts style cert validaiton + bincert = sock.getpeercert(binary_form=True) + if not kv(bincert): + raise pygexc.UnrecognizedCertificate('Unknown certificate', bincert) + self.socket = sock self.connected = True self.datacallback = callback self.recvr = eventlet.spawn(self.recvdata) @@ -354,7 +367,7 @@ def create(nodes, element, configmanager, inputdata): clientsbynode = prep_vcsa_clients(nodes, configmanager) for node in nodes: serialdata = clientsbynode[node].get_vm_serial(node) - return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls']) + return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls'], configmanager) From f797ede15fb41ca692f9ecd46f7c588f19ede4da Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 15:44:13 -0400 Subject: [PATCH 160/413] Handle 503 from a stopped confluent member If a confluent collective member is stopped, then the HTTPS check passes. If we end up with a 503 indicating the other end has a missing confluent, fall back to the loop to check for other living collective members. --- .../initramfs/opt/confluent/bin/apiclient | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index efc0a562..cade07c2 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -233,6 +233,7 @@ def get_apikey(nodename, hosts, errout=None): class HTTPSClient(client.HTTPConnection, object): def __init__(self, usejson=False, port=443, host=None, errout=None, phmac=None, checkonly=False): + self.ignorehosts = set([]) self.phmac = phmac self.errout = None if errout: @@ -320,6 +321,8 @@ class HTTPSClient(client.HTTPConnection, object): ctx.check_hostname = True for timeo in (0.1, 5): for host in hosts: + if host in self.ignorehosts: + continue try: addrinf = socket.getaddrinfo(host, self.port)[0] psock = socket.socket(addrinf[0]) @@ -415,6 +418,12 @@ class HTTPSClient(client.HTTPConnection, object): akfile.write('') self.stdheaders['CONFLUENT_APIKEY'] = get_apikey( self.node, [self.host], errout=self.errout) + if rsp.status == 503: # confluent is down, but the server running confluent is otherwise up + authed = False + self.ignorehosts.add(self.host) + host = self.check_connections() + client.HTTPConnection.__init__(self, host, self.port) + raise Exception(rsp.read()) def get_current_vmnic_vswitch(): @@ -558,13 +567,13 @@ if __name__ == '__main__': chunk = reader.read(16384) sys.exit(0) - client = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly) + mclient = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly) if waitfor: status = 201 while status != waitfor: - status, rsp = client.grab_url_with_status(sys.argv[1], data) + status, rsp = mclient.grab_url_with_status(sys.argv[1], data) sys.stdout.write(rsp.decode()) elif checkonly: - sys.stdout.write(client.check_connections()) + sys.stdout.write(mclient.check_connections()) else: - sys.stdout.write(client.grab_url(sys.argv[1], data).decode()) + sys.stdout.write(mclient.grab_url(sys.argv[1], data).decode()) From 71f75dbc3168cc77b34f068b72b85a0aeb69288f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Apr 2025 16:13:33 -0400 Subject: [PATCH 161/413] Provide specific error on empty noderanges for expressions For commands such as nodeshell and noderun, provide a more useful message when a noderange is empty. --- confluent_server/confluent/plugins/configuration/attributes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 2a8b2bbb..14607af5 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -209,6 +209,9 @@ def update_nodegroup(group, element, configmanager, inputdata): def _expand_expression(nodes, configmanager, inputdata): + if not nodes: + raise exc.InvalidArgumentException( + 'Specified noderange contains no nodes') expression = inputdata.get_attributes(list(nodes)[0]) if type(expression) is dict: expression = expression['expression'] From 5cc70846fd466c5b71e2b6a3bc2792a94ee7c8b8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 30 Apr 2025 15:50:37 -0400 Subject: [PATCH 162/413] Fix asymmetric ipv4/ipv6 attribute PXE response If a nic were aliased *and* the node had attributes for ipv6 but used host resolution for ipv4 identity, it was possible for PXE to pick the wrong way to respond. Instruct netutil to specifically consider only the matching family for the PXE/HTTP boot context --- confluent_server/confluent/discovery/protocols/pxe.py | 7 +++++-- confluent_server/confluent/netutil.py | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 853e0b8d..11922793 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -673,7 +673,7 @@ def reply_dhcp6(node, addr, cfg, packet, cfd, profile, sock): if not myaddrs: log.log({'info': 'Unable to provide IPv6 boot services to {0}, no viable IPv6 configuration on interface index "{1}" to respond through.'.format(node, addr[-1])}) return - niccfg = netutil.get_nic_config(cfg, node, ifidx=addr[-1]) + niccfg = netutil.get_nic_config(cfg, node, ifidx=addr[-1], onlyfamily=socket.AF_INET6) ipv6addr = niccfg.get('ipv6_address', None) ipv6prefix = niccfg.get('ipv6_prefix', None) ipv6method = niccfg.get('ipv6_method', 'static') @@ -798,7 +798,7 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N relayipa = socket.inet_ntoa(relayip) gateway = None netmask = None - niccfg = netutil.get_nic_config(cfg, node, ifidx=info['netinfo']['ifidx'], relayipn=relayip) + niccfg = netutil.get_nic_config(cfg, node, ifidx=info['netinfo']['ifidx'], relayipn=relayip, onlyfamily=socket.AF_INET) nicerr = niccfg.get('error_msg', False) if nicerr: log.log({'error': nicerr}) @@ -827,6 +827,9 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N myipn = niccfg['deploy_server'] if not myipn: myipn = info['netinfo']['recvip'] + if niccfg['ipv4_address'] == myipn: + log.log({'error': 'Unable to serve {0} due to duplicated address between node and interface index "{}"'.format(node, info['netinfo']['ifidx'])}) + return if httpboot: proto = 'https' if insecuremode == 'never' else 'http' bootfile = '{0}://{1}/confluent-public/os/{2}/boot.img'.format( diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 92cc0bfa..dbd0859d 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -429,7 +429,7 @@ def noneify(cfgdata): # if switch and port available, that should match. def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, serverip=None, relayipn=b'\x00\x00\x00\x00', - clientip=None): + clientip=None, onlyfamily=None): """Fetch network configuration parameters for a nic For a given node and interface, find and retrieve the pertinent network @@ -508,9 +508,11 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, 'ipv6_method': None, } myaddrs = [] + if onlyfamily is None: + onlyfamily = 0 if ifidx is not None: dhcprequested = False - myaddrs = get_my_addresses(ifidx) + myaddrs = get_my_addresses(ifidx, family=onlyfamily) v4broken = True v6broken = True for addr in myaddrs: From 0c0cac140d98529ab7ae955ce01fb23978319ed5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 30 Apr 2025 17:06:20 -0400 Subject: [PATCH 163/413] Add debian profile material Implement Debian 12 installation --- .../S25confluentinit | 214 +++++-- .../debian/profiles/default/initprofile.sh | 7 + .../debian/profiles/default/preseed.cfg | 27 + .../debian/profiles/default/profile.yaml | 3 + .../debian/profiles/default/scripts/confignet | 565 ++++++++++++++++++ .../default/scripts/firstboot.service | 11 + .../profiles/default/scripts/firstboot.sh | 22 + .../debian/profiles/default/scripts/functions | 209 +++++++ .../debian/profiles/default/scripts/post.sh | 67 +++ .../debian/profiles/default/scripts/pre.sh | 92 ++- .../profiles/default/scripts/prechroot.sh | 19 + .../debian/profiles/default/scripts/setupssh | 43 ++ .../profiles/default/scripts/pre.sh | 1 + 13 files changed, 1218 insertions(+), 62 deletions(-) create mode 100644 confluent_osdeploy/debian/profiles/default/initprofile.sh create mode 100644 confluent_osdeploy/debian/profiles/default/preseed.cfg create mode 100644 confluent_osdeploy/debian/profiles/default/profile.yaml create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/confignet create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/firstboot.service create mode 100755 confluent_osdeploy/debian/profiles/default/scripts/firstboot.sh create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/functions create mode 100755 confluent_osdeploy/debian/profiles/default/scripts/post.sh mode change 100644 => 100755 confluent_osdeploy/debian/profiles/default/scripts/pre.sh create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/prechroot.sh create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/setupssh diff --git a/confluent_osdeploy/debian/initramfs/lib/debian-installer-startup.d/S25confluentinit b/confluent_osdeploy/debian/initramfs/lib/debian-installer-startup.d/S25confluentinit index 42f5ab97..227441c1 100644 --- a/confluent_osdeploy/debian/initramfs/lib/debian-installer-startup.d/S25confluentinit +++ b/confluent_osdeploy/debian/initramfs/lib/debian-installer-startup.d/S25confluentinit @@ -14,65 +14,127 @@ setdebopt() { echo d-i $1 $3 $2 >> /preseed.cfg } -dhuuid=$(reverse_uuid $(cat /sys/devices/virtual/dmi/id/product_uuid)) -dhcpid=$(mktemp) mkdir -p /etc/confluent -cp /tls/* /etc/ssl/certs/ -for nic in $(ip link | grep mtu|grep -v LOOPBACK|cut -d: -f 2|sed -e 's/ //'); do - ip link set $nic up +for i in /sys/class/net/*; do + ip link set $(basename $i) up done -for nic in $(ip link | grep mtu|grep -v LOOPBACK|grep LOWER_UP|cut -d: -f 2|sed -e 's/ //'); do - if udhcpc -i $nic -p $dhcpid -t 2 -T 2 -n -x 93:0007 -x 97:00$dhuuid -q; then - /opt/confluent/bin/copernicus > /etc/confluent/confluent.info - if grep ^MANAGER:.*\\. /etc/confluent/confluent.info ; then - break - fi +TRIES=5 +while [ ! -e /dev/disk ] && [ $TRIES -gt 0 ]; do + sleep 2 + TRIES=$((TRIES - 1)) +done +for i in /sys/class/net/*; do + ip link set $(basename $i) down + udevadm info $i | grep ID_NET_DRIVER=cdc_ether > /dev/null && continue + ip link set $(basename $i) up +done +cp -a /tls/* /etc/ssl/certs/ +mkdir -p /etc/confluent +if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then + tmnt=$(mktemp -d) + tcfg=$(mktemp) + mount /dev/disk/by-label/CNFLNT_IDNT $tmnt + cd $tmnt + deploysrvs=$(sed -n '/^deploy_servers:/,/^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) + nodename=$(grep ^nodename: cnflnt.yml|cut -f 2 -d ' ') + echo NODENAME: $nodename > /etc/confluent/confluent.info + sed -n '/^net_cfgs:/,/^[^- ]/{/^[^- ]/!p}' cnflnt.yml |sed -n '/^-/,/^-/{/^-/!p}'| sed -e 's/^[- ]*//'> $tcfg + autoconfigmethod=$(grep ^ipv4_method: $tcfg) + autoconfigmethod=${autoconfigmethod#ipv4_method: } + if [ "$autoconfigmethod" = "static" ]; then + setdebopt netcfg/disable_dhcp true boolean + v4addr=$(grep ^ipv4_address: $tcfg|cut -d: -f 2|sed -e 's/ //') + v4gw=$(grep ^ipv4_gateway: $tcfg|cut -d: -f 2| sed -e 's/ //') + if [ "$v4gw" = "null" ]; then + v4gw="" + fi + v4nm=$(grep ^ipv4_netmask: $tcfg|cut -d: -f 2|sed -e 's/ //') + setdebopt netcfg/get_netmask $v4nm string + setdebopt netcfg/get_ipaddress ${v4addr%/*} string + setdebopt netcfg/confirm_static true boolean + if [ ! -z "$v4gw" ]; then + setdebopt netcfg/get_gateway $v4gw string + fi + NIC="" + while [ -z "$NIC" ]; do + for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do + ip addr add dev $NICGUESS $v4addr + if [ ! -z "$v4gw" ]; then + ip route add default via $v4gw + fi + for dsrv in $deploysrvs; do + if wget https://$dsrv/confluent-public/ --tries=1 --timeout=1 -O /dev/null > /dev/null 2>&1; then + deploysrvs=$dsrv + NIC=$NICGUESS + setdebopt netcfg/choose_interface $NIC select + break + fi + done + if [ -z "$NIC" ]; then + ip -4 a flush dev $NICGUESS + else + break + fi + done + done + #TODO: nameservers + elif [ "$v4cfgmeth" = "dhcp" ]; then + setdebopt netcfg/disable_dhcp false boolean + setdebopt netcfg/confirm_static false boolean + for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK|cut -d ' ' -f 2 | sed -e 's/:$//'); do + udhcpc $NICGUESS + done + for dsrv in $deploysrvs; do + if wget https://$dsrv/confluent-public/ --tries=1 --timeout=1 -O /dev/null > /dev/null 2>&1; then + deploysrvs=$dsrv + fi + done fi - ip -4 flush dev $nic -done -mgr=$(grep ^MANAGER:.*\\. /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') -nodename=$(grep ^NODENAME: /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') -/opt/confluent/bin/clortho $nodename $mgr > /etc/confluent/confluent.apikey + mgr=$deploysrvs + ln -s /opt/confluent/bin/clortho /opt/confluent/bin/genpasshmac + hmackeyfile=/tmp/cnflnthmackeytmp + passfile=/tmp/cnflnttmppassfile + passcrypt=/tmp/cnflntcryptfile + hmacfile=/tmp/cnflnthmacfile + echo -n $(grep ^apitoken: cnflnt.yml|cut -d ' ' -f 2) > $hmackeyfile + /opt/confluent/bin/genpasshmac $passfile $passcrypt $hmacfile $hmackeyfile + wget --header="CONFLUENT_NODENAME: $nodename" --header="CONFLUENT_CRYPTHMAC: $(cat $hmacfile)" --post-file=$passcrypt https://$mgr/confluent-api/self/registerapikey -O - --quiet + cp $passfile /etc/confluent/confluent.apikey + nic=$NIC +else + dhuuid=$(reverse_uuid $(cat /sys/devices/virtual/dmi/id/product_uuid)) + dhcpid=$(mktemp) + mkdir -p /etc/confluent + cp /tls/* /etc/ssl/certs/ + cat /tls/*.pem >> /etc/confluent/ca.pem + for nic in $(ip link | grep mtu|grep -v LOOPBACK|cut -d: -f 2|sed -e 's/ //'); do + ip link set $nic up + done + for nic in $(ip link | grep mtu|grep -v LOOPBACK|grep LOWER_UP|cut -d: -f 2|sed -e 's/ //'); do + if udhcpc -i $nic -p $dhcpid -t 2 -T 2 -n -x 93:0007 -x 97:00$dhuuid -q; then + /opt/confluent/bin/copernicus > /etc/confluent/confluent.info + if grep ^MANAGER:.*\\. /etc/confluent/confluent.info ; then + break + fi + fi + ip -4 flush dev $nic + done + mgr=$(grep ^MANAGER:.*\\. /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') + nodename=$(grep ^NODENAME: /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') + /opt/confluent/bin/clortho $nodename $mgr > /etc/confluent/confluent.apikey +fi apikey=$(cat /etc/confluent/confluent.apikey) cd /etc/confluent wget --header="CONFLUENT_NODENAME: $nodename" --header="CONFLUENT_APIKEY: $apikey" https://$mgr/confluent-api/self/deploycfg cd - predir=$(mktemp -d) cd $predir +cp /etc/confluent/deploycfg /etc/confluent/confluent.deploycfg profile=$(grep ^profile: /etc/confluent/deploycfg|cut -d ' ' -f 2) -wget https://$mgr/confluent-public/os/$profile/scripts/pre.sh -chmod u+x pre.sh -wget https://$mgr/confluent-public/os/$profile/preseed.cfg -mv preseed.cfg / -setdebopt auto-install/enable true boolean -setdebopt partman/early_command $predir/pre.sh string -cd - -ip -4 a flush dev $nic -setdebopt netcfg/choose_interface $nic select -setdebopt netcfg/get_hostname $nodename string -v4cfgmeth=$(grep ipv4_method: /etc/confluent/deploycfg |cut -d: -f 2|sed -e 's/ //') -if [ "$v4cfgmeth" = "static" ]; then - setdebopt netcfg/disable_dhcp true boolean - v4addr=$(grep ^ipv4_address: /etc/confluent/deploycfg|cut -d: -f 2|sed -e 's/ //') - v4gw=$(grep ^ipv4_gateway: /etc/confluent/deploycfg|cut -d: -f 2| sed -e 's/ //') - if [ "$v4gw" = "null" ]; then - v4gw="" - fi - v4nm=$(grep ^ipv4_netmask: /etc/confluent/deploycfg|cut -d: -f 2|sed -e 's/ //') - setdebopt netcfg/get_netmask $v4nm string - setdebopt netcfg/get_ipaddress $v4addr string - setdebopt netcfg/confirm_static true boolean - if [ ! -z "$v4gw" ]; then - setdebopt netcfg/get_gateway $v4gw string - fi - namesrvs=$(sed -n '/^nameservers:/,/^[^-]/p' /etc/confluent/deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //') - for namesrv in "$namesrvs"; do - setdebopt netcfg/get_nameservers $namesrv string - done -elif [ "$v4cfgmeth" = "dhcp" ]; then - setdebopt netcfg/disable_dhcp false boolean - setdebopt netcfg/confirm_static false boolean -fi +namesrvs=$(sed -n '/^nameservers:/,/^[^-]/p' /etc/confluent/deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //') +for namesrv in "$namesrvs"; do + setdebopt netcfg/get_nameservers $namesrv string +done rootpass=$(grep ^rootpassword: /etc/confluent/deploycfg|cut -d ' ' -f 2|sed -e 's/ //') if [ "$rootpass" = null ] || [ -z "$rootpass" ]; then setdebopt passwd/root-login false boolean @@ -84,9 +146,8 @@ setdebopt time/zone $(grep ^timezone: /etc/confluent/deploycfg|cut -d ' ' -f 2|s ntpsrvs=$(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //') for ntpsrv in "$ntpsrvs"; do setdebopt clock-setup/ntp true boolean - setdebopt clock-setup/ntep-server $ntpsrv string + setdebopt clock-setup/ntp-server $ntpsrv string done -#setdebopt console-setup/layoutcode $(grep ^keymap: /etc/confluent/deploycfg|cut -d ' ' -f 2) string setdebopt debian-installer/locale $(grep ^locale: /etc/confluent/deploycfg|cut -d ' ' -f 2) select domainname=$(grep ^dnsdomain: /etc/confluent/deploycfg|cut -d ' ' -f 2) if [ ! -z "$domainname" ] && [ "$domainname" != "null" ]; then @@ -95,3 +156,54 @@ fi + +wget https://$mgr/confluent-public/os/$profile/scripts/pre.sh +chmod u+x pre.sh +wget https://$mgr/confluent-public/os/$profile/scripts/prechroot.sh +chmod u+x prechroot.sh +wget https://$mgr/confluent-public/os/$profile/scripts/post.sh +chmod u+x post.sh +wget https://$mgr/confluent-public/os/$profile/preseed.cfg +cat preseed.cfg >> /preseed.cfg +echo $mgr > /etc/confluent/deployer +setdebopt auto-install/enable true boolean +setdebopt partman/early_command $predir/pre.sh string +setdebopt preseed/late_command $predir/prechroot.sh string +mv $predir/post.sh /tmp/ +cd - +ip -4 a flush dev $nic +setdebopt netcfg/choose_interface $nic select +setdebopt netcfg/get_hostname $nodename string +setdebopt netcfg/hostname $nodename string +setdebopt mirror/protocol https string +setdebopt mirror/country manual string +setdebopt mirror/https/hostname deb.debian.org string +setdebopt mirror/https/directory /debian/ string +setdebopt mirror/protocol https string +setdebopt mirror/https/proxy "" string +#setdebopt apt-setup/security_host $mgr string +if [ ! -e /dev/disk/by-label/CNFLNT_IDNT ]; then + v4cfgmeth=$(grep ipv4_method: /etc/confluent/deploycfg |cut -d: -f 2|sed -e 's/ //') + if [ "$v4cfgmeth" = "static" ]; then + setdebopt netcfg/disable_dhcp true boolean + v4addr=$(grep ^ipv4_address: /etc/confluent/deploycfg|cut -d: -f 2|sed -e 's/ //') + v4gw=$(grep ^ipv4_gateway: /etc/confluent/deploycfg|cut -d: -f 2| sed -e 's/ //') + if [ "$v4gw" = "null" ]; then + v4gw="" + fi + v4nm=$(grep ^ipv4_netmask: /etc/confluent/deploycfg|cut -d: -f 2|sed -e 's/ //') + setdebopt netcfg/get_netmask $v4nm string + setdebopt netcfg/get_ipaddress $v4addr string + setdebopt netcfg/confirm_static true boolean + if [ ! -z "$v4gw" ]; then + setdebopt netcfg/get_gateway $v4gw string + fi + namesrvs=$(sed -n '/^nameservers:/,/^[^-]/p' /etc/confluent/deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //') + for namesrv in "$namesrvs"; do + setdebopt netcfg/get_nameservers $namesrv string + done + elif [ "$vpcfgmeth" = "dhcp" ]; then + setdebopt netcfg/disable_dhcp false boolean + setdebopt netcfg/confirm_static false boolean + fi +fi diff --git a/confluent_osdeploy/debian/profiles/default/initprofile.sh b/confluent_osdeploy/debian/profiles/default/initprofile.sh new file mode 100644 index 00000000..9a2705f2 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/initprofile.sh @@ -0,0 +1,7 @@ +#!/bin/sh +sed -i 's/label: debian/label: Debian/' $2/profile.yaml && \ +ln -s $1/linux $2/boot/kernel && \ +ln -s $1/initrd.gz $2/boot/initramfs/distribution && \ +mkdir -p $2/boot/efi/boot && \ +mcopy -i $1/boot/grub/efi.img ::/efi/boot/* $2/boot/efi/boot + diff --git a/confluent_osdeploy/debian/profiles/default/preseed.cfg b/confluent_osdeploy/debian/profiles/default/preseed.cfg new file mode 100644 index 00000000..81986a85 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/preseed.cfg @@ -0,0 +1,27 @@ +d-i anna/choose_modules string openssh-server-udeb +d-i partman-auto/method string regular +d-i partman-lvm/device_remove_lvm boolean true +d-i partman-md/device_remove_md boolean true +d-i partman-auto/expert_recipe_file string /tmp/partitionfile +d-i partman/confirm_write_new_label boolean true +d-i partman/choose_partition select finish +d-i partman/confirm boolean true +d-i partman/confirm_nooverwrite boolean true +d-i passwd/make-user boolean false +d-i clock-setup/utc boolean true +d-i apt-setup/multiverse boolean false +d-i apt-setup/universe boolean false +d-i apt-setup/backports boolean false +d-i apt-setup/updates boolean false +d-i grub-installer/only_debian boolean true +tasksel tasksel/first multiselect standard +d-i pkgsel/include string openssh-server curl +d-i pkgsel/update-policy select none +d-i pkgsel/updatedb boolean false +d-i finish-install/reboot_in_progress note +popularity-contest popularity-contest/participate boolean false +d-i partman-auto/method string lvm +d-i partman-auto/choose_recipe select atomic +d-i partman-lvm/confirm boolean true +d-i partman-lvm/confirm_nooverwrite boolean true +d-i partman-auto-lvm/guided_size string max diff --git a/confluent_osdeploy/debian/profiles/default/profile.yaml b/confluent_osdeploy/debian/profiles/default/profile.yaml new file mode 100644 index 00000000..b76cdfbf --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/profile.yaml @@ -0,0 +1,3 @@ +label: %%DISTRO%% %%VERSION%% %%ARCH%% (Default Profile) +kernelargs: quiet osprofile=%%PROFILE%% +#installedargs: example # These arguments would be added to the installed system diff --git a/confluent_osdeploy/debian/profiles/default/scripts/confignet b/confluent_osdeploy/debian/profiles/default/scripts/confignet new file mode 100644 index 00000000..5bf0871b --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/confignet @@ -0,0 +1,565 @@ +#!/usr/bin/python + +import glob +import json +import os +import socket +import sys +import time +import shlex +import subprocess +try: + import yaml +except ImportError: + pass +try: + from importlib.machinery import SourceFileLoader + def load_source(mod, path): + return SourceFileLoader(mod, path).load_module() +except ImportError: + from imp import load_source + +try: + apiclient = load_source('apiclient', '/opt/confluent/bin/apiclient') +except IOError: + apiclient = load_source('apiclient', '/etc/confluent/apiclient') + +def add_lla(iface, mac): + pieces = mac.split(':') + initbyte = int(pieces[0], 16) ^ 2 + lla = 'fe80::{0:x}{1}:{2}ff:fe{3}:{4}{5}/64'.format(initbyte, pieces[1], pieces[2], pieces[3], pieces[4], pieces[5]) + try: + with open('/proc/sys/net/ipv6/conf/{0}/disable_ipv6'.format(iface), 'w') as setin: + setin.write('0') + subprocess.check_call(['ip', 'addr', 'add', 'dev', iface, lla, 'scope', 'link']) + except Exception: + return None + return lla + +#cli = apiclient.HTTPSClient(json=True) +#c = cli.grab_url_with_status('/confluent-api/self/netcfg') +def add_missing_llas(): + #NetworkManager goes out of its way to suppress ipv6 lla, so will just add some + added = {} + linkinfo = subprocess.check_output(['ip', '-br', 'l']).decode('utf8') + ifaces = {} + for line in linkinfo.split('\n'): + line = line.strip().split() + if not line or 'LOOPBACK' in line[-1] or 'NO-CARRIER' in line[-1]: + continue + if 'UP' not in line[-1]: + subprocess.call(['ip', 'link', 'set', line[0], 'up']) + ifaces[line[0]] = line[2] + ips = {} + ipinfo = subprocess.check_output(['ip', '-br', '-6', 'a']).decode('utf8') + for line in ipinfo.split('\n'): + line = line.strip().split(None, 2) + if not line: + continue + ips[line[0]] = line[2] + for iface in ifaces: + for addr in ips.get(iface, '').split(): + if addr.startswith('fe80::'): + break + else: + newlla = add_lla(iface, ifaces[iface]) + if newlla: + added[iface] = newlla + return added + +def rm_tmp_llas(tmpllas): + for iface in tmpllas: + subprocess.check_call(['ip', 'addr', 'del', 'dev', iface, tmpllas[iface]]) + +def await_tentative(): + maxwait = 10 + while b'tentative' in subprocess.check_output(['ip', 'a']): + if maxwait == 0: + break + maxwait -= 1 + time.sleep(1) + +def map_idx_to_name(): + map = {} + devtype = {} + prevdev = None + for line in subprocess.check_output(['ip', 'l']).decode('utf8').splitlines(): + if line.startswith(' ') and 'link/' in line: + typ = line.split()[0].split('/')[1] + devtype[prevdev] = typ if typ != 'ether' else 'ethernet' + if line.startswith(' '): + continue + idx, iface, rst = line.split(':', 2) + prevdev = iface.strip() + rst = rst.split() + try: + midx = rst.index('master') + continue + except ValueError: + pass + idx = int(idx) + iface = iface.strip() + map[idx] = iface + return map, devtype + + +def get_interface_name(iname, settings): + explicitname = settings.get('interface_names', None) + if explicitname: + return explicitname + if settings.get('current_nic', False): + return iname + return None + +class NetplanManager(object): + def __init__(self, deploycfg): + self.cfgbydev = {} + self.read_connections() + self.deploycfg = deploycfg + + def read_connections(self): + for plan in glob.glob('/etc/netplan/*.y*ml'): + with open(plan) as planfile: + planinfo = yaml.safe_load(planfile) + if not planinfo: + continue + nicinfo = planinfo.get('network', {}).get('ethernets', {}) + for devname in nicinfo: + if devname == 'lo': + continue + if 'gateway4' in nicinfo[devname]: + # normalize deprecated syntax on read in + gw4 = nicinfo[devname]['gateway4'] + del nicinfo[devname]['gateway4'] + routeinfo = nicinfo[devname].get('routes', []) + for ri in routeinfo: + if ri.get('via', None) == gw4 and ri.get('to', None) in ('default', '0.0.0.0/0', '0/0'): + break + else: + routeinfo.append({ + 'to': 'default', + 'via': gw4 + }) + nicinfo[devname]['routes'] = routeinfo + self.cfgbydev[devname] = nicinfo[devname] + + def apply_configuration(self, cfg): + devnames = cfg['interfaces'] + if len(devnames) != 1: + raise Exception('Multi-nic team/bonds not yet supported') + stgs = cfg['settings'] + needcfgapply = False + for devname in devnames: + needcfgwrite = False + # ipv6_method missing at uconn... + if stgs.get('ipv6_method', None) == 'static': + curraddr = stgs['ipv6_address'] + currips = self.getcfgarrpath([devname, 'addresses']) + if curraddr not in currips: + needcfgwrite = True + currips.append(curraddr) + if stgs.get('ipv4_method', None) == 'static': + curraddr = stgs['ipv4_address'] + currips = self.getcfgarrpath([devname, 'addresses']) + if curraddr not in currips: + needcfgwrite = True + currips.append(curraddr) + gws = [] + gws.append(stgs.get('ipv4_gateway', None)) + gws.append(stgs.get('ipv6_gateway', None)) + for gwaddr in gws: + if gwaddr: + cfgroutes = self.getcfgarrpath([devname, 'routes']) + for rinfo in cfgroutes: + if rinfo.get('via', None) == gwaddr: + break + else: + needcfgwrite = True + cfgroutes.append({'via': gwaddr, 'to': 'default'}) + dnsips = self.deploycfg.get('nameservers', []) + dnsdomain = self.deploycfg.get('dnsdomain', '') + if dnsips: + currdnsips = self.getcfgarrpath([devname, 'nameservers', 'addresses']) + for dnsip in dnsips: + if dnsip and dnsip not in currdnsips: + needcfgwrite = True + currdnsips.append(dnsip) + if dnsdomain: + currdnsdomain = self.getcfgarrpath([devname, 'nameservers', 'search']) + if dnsdomain not in currdnsdomain: + needcfgwrite = True + currdnsdomain.append(dnsdomain) + if needcfgwrite: + needcfgapply = True + newcfg = {'network': {'version': 2, 'ethernets': {devname: self.cfgbydev[devname]}}} + oumask = os.umask(0o77) + with open('/etc/netplan/{0}-confluentcfg.yaml'.format(devname), 'w') as planout: + planout.write(yaml.dump(newcfg)) + os.umask(oumask) + if needcfgapply: + subprocess.call(['netplan', 'apply']) + + def getcfgarrpath(self, devpath): + currptr = self.cfgbydev + for k in devpath[:-1]: + if k not in currptr: + currptr[k] = {} + currptr = currptr[k] + if devpath[-1] not in currptr: + currptr[devpath[-1]] = [] + return currptr[devpath[-1]] + + + +class WickedManager(object): + def __init__(self): + self.teamidx = 0 + self.read_connections() + + def read_connections(self): + self.cfgbydev = {} + for ifcfg in glob.glob('/etc/sysconfig/network/ifcfg-*'): + devname = ifcfg.replace('/etc/sysconfig/network/ifcfg-', '') + if devname == 'lo': + continue + currcfg = {} + self.cfgbydev[devname] = currcfg + for cfg in open(ifcfg).read().splitlines(): + cfg = cfg.split('#', 1)[0] + try: + kv = ' '.join(shlex.split(cfg)).split('=', 1) + except Exception: + # unparseable line, likely having something we can't handle + del self.cfgbydev[devname] + if len(kv) != 2: + continue + k, v = kv + k = k.strip() + v = v.strip() + currcfg[k] = v + + def apply_configuration(self, cfg): + stgs = cfg['settings'] + ipcfg = 'STARTMODE=auto\n' + routecfg = '' + bootproto4 = stgs.get('ipv4_method', 'none') + bootproto6 = stgs.get('ipv6_method', 'none') + if bootproto4 == 'dhcp' and bootproto6 == 'dhcp': + ipcfg += 'BOOTPROTO=dhcp\n' + elif bootproto4 == 'dhcp': + ipcfg += 'BOOTPROTO=dhcp4\n' + elif bootproto6 == 'dhcp': + ipcfg += 'BOOTPROTO=dhcp6\n' + else: + ipcfg += 'BOOTPROTO=static\n' + if stgs.get('ipv4_address', None): + ipcfg += 'IPADDR=' + stgs['ipv4_address'] + '\n' + v4gw = stgs.get('ipv4_gateway', None) + if stgs.get('ipv6_address', None): + ipcfg += 'IPADDR_V6=' + stgs['ipv6_address'] + '\n' + v6gw = stgs.get('ipv6_gateway', None) + cname = None + if len(cfg['interfaces']) > 1: # creating new team + if not stgs.get('team_mode', None): + sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) + return + if not stgs.get('connection_name', None): + stgs['connection_name'] = 'bond{0}'.format(self.teamidx) + self.teamidx += 1 + cname = stgs['connection_name'] + with open('/etc/sysconfig/network/ifcfg-{0}'.format(cname), 'w') as teamout: + teamout.write(ipcfg) + if stgs['team_mode'] == 'lacp': + stgs['team_mode'] = '802.3ad' + teamout.write("BONDING_MODULE_OPTS='mode={0} miimon=100'\nBONDING_MASTER=yes\n".format(stgs['team_mode'])) + idx = 1 + for iface in cfg['interfaces']: + subprocess.call(['wicked', 'ifdown', iface]) + try: + os.remove('/etc/sysconfig/network/ifcfg-{0}'.format(iface)) + os.remove('/etc/sysconfig/network/ifroute-{0}'.format(iface)) + except OSError: + pass + teamout.write('BONDING_SLAVE{0}={1}\n'.format(idx, iface)) + idx += 1 + else: + cname = list(cfg['interfaces'])[0] + priorcfg = self.cfgbydev.get(cname, {}) + for cf in priorcfg: + if cf.startswith('TEAM_'): + ipcfg += '{0}={1}\n'.format(cf, priorcfg[cf]) + with open('/etc/sysconfig/network/ifcfg-{0}'.format(cname), 'w') as iout: + iout.write(ipcfg) + if v4gw: + routecfg += 'default {0} - {1}\n'.format(v4gw, cname) + if v6gw: + routecfg += 'default {0} - {1}\n'.format(v6gw, cname) + if routecfg: + with open('/etc/sysconfig/network/ifroute-{0}'.format(cname), 'w') as routeout: + routeout.write(routecfg) + subprocess.call(['wicked', 'ifup', cname]) + + +class NetworkManager(object): + bondtypes = { + 'lacp': '802.3ad', + 'loadbalance': 'balance-alb', + 'roundrobin': 'balance-rr', + 'activebackup': 'active-backup', + } + def __init__(self, devtypes, deploycfg): + self.deploycfg = deploycfg + self.connections = {} + self.uuidbyname = {} + self.uuidbydev = {} + self.connectiondetail = {} + self.read_connections() + self.teamidx = 0 + self.devtypes = devtypes + + def read_connections(self): + self.connections = {} + self.uuidbyname = {} + self.uuidbydev = {} + self.connectiondetail = {} + ci = subprocess.check_output(['nmcli', '-t', 'c']).decode('utf8') + for inf in ci.splitlines(): + n, u, t, dev = inf.split(':') + if n == 'NAME': + continue + if dev == '--': + dev = None + self.uuidbyname[n] = u + if dev: + self.uuidbydev[dev] = u + self.connections[u] = {'name': n, 'uuid': u, 'type': t, 'dev': dev} + deats = {} + for deat in subprocess.check_output(['nmcli', 'c', 's', u]).decode('utf8').splitlines(): + k, v = deat.split(':', 1) + v = v.strip() + if v == '--': + continue + if '(default)' in v: + continue + deats[k] = v + self.connectiondetail[u] = deats + + + def add_team_member(self, team, member): + bondcfg = {} + if member in self.uuidbydev: + myuuid = self.uuidbydev[member] + deats = self.connectiondetail[myuuid] + currteam = deats.get('connection.master', None) + if currteam == team: + return + for stg in ('ipv4.dhcp-hostname', 'ipv4.dns', 'ipv6.dns', 'ipv6.dhcp-hostname'): + if deats.get(stg, None): + bondcfg[stg] = deats[stg] + if member in self.uuidbyname: + subprocess.check_call(['nmcli', 'c', 'del', self.uuidbyname[member]]) + devtype = self.devtypes.get(member, 'bond-slave') + subprocess.check_call(['nmcli', 'c', 'add', 'type', devtype, 'master', team, 'con-name', member, 'connection.interface-name', member]) + if bondcfg: + args = [] + for parm in bondcfg: + args.append(parm) + args.append(bondcfg[parm]) + subprocess.check_call(['nmcli', 'c', 'm', team] + args) + + def apply_configuration(self, cfg, lastchance=False): + cmdargs = {} + cmdargs['connection.autoconnect'] = 'yes' + stgs = cfg['settings'] + cmdargs['ipv6.method'] = stgs.get('ipv6_method', 'link-local') + if stgs.get('ipv6_address', None): + cmdargs['ipv6.addresses'] = stgs['ipv6_address'] + cmdargs['ipv4.method'] = stgs.get('ipv4_method', 'disabled') + if stgs.get('ipv4_address', None): + cmdargs['ipv4.addresses'] = stgs['ipv4_address'] + if stgs.get('ipv4_gateway', None): + cmdargs['ipv4.gateway'] = stgs['ipv4_gateway'] + if stgs.get('ipv6_gateway', None): + cmdargs['ipv6.gateway'] = stgs['ipv6_gateway'] + dnsips = self.deploycfg.get('nameservers', []) + if not dnsips: + dnsips = [] + dns4 = [] + dns6 = [] + for dnsip in dnsips: + if '.' in dnsip: + dns4.append(dnsip) + elif ':' in dnsip: + dns6.append(dnsip) + if dns4: + cmdargs['ipv4.dns'] = ','.join(dns4) + if dns6: + cmdargs['ipv6.dns'] = ','.join(dns6) + if len(cfg['interfaces']) > 1: # team time.. should be.. + if not cfg['settings'].get('team_mode', None): + sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) + return + if not cfg['settings'].get('connection_name', None): + cfg['settings']['connection_name'] = 'team{0}'.format(self.teamidx) + self.teamidx += 1 + cname = cfg['settings']['connection_name'] + cargs = [] + for arg in cmdargs: + cargs.append(arg) + cargs.append(cmdargs[arg]) + if stgs['team_mode'] in self.bondtypes: + stgs['team_mode'] = self.bondtypes[stgs['team_mode']] + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) + for iface in cfg['interfaces']: + self.add_team_member(cname, iface) + subprocess.check_call(['nmcli', 'c', 'u', cname]) + else: + cname = stgs.get('connection_name', None) + iname = list(cfg['interfaces'])[0] + ctype = self.devtypes.get(iname, None) + if not ctype: + if lastchance: + sys.stderr.write("Warning, no device found for interface_name ({0}), skipping setup\n".format(iname)) + return 1 + if stgs.get('vlan_id', None): + vlan = stgs['vlan_id'] + if ctype == 'infiniband': + vlan = '0x{0}'.format(vlan) if not vlan.startswith('0x') else vlan + cmdargs['infiniband.parent'] = iname + cmdargs['infiniband.p-key'] = vlan + iname = '{0}.{1}'.format(iname, vlan[2:]) + elif ctype == 'ethernet': + ctype = 'vlan' + cmdargs['vlan.parent'] = iname + cmdargs['vlan.id'] = vlan + iname = '{0}.{1}'.format(iname, vlan) + else: + sys.stderr.write("Warning, unknown interface_name ({0}) device type ({1}) for VLAN/PKEY, skipping setup\n".format(iname, ctype)) + return + cname = iname if not cname else cname + u = self.uuidbyname.get(cname, None) + cargs = [] + for arg in cmdargs: + cargs.append(arg) + cargs.append(cmdargs[arg]) + if u: + subprocess.check_call(['nmcli', 'c', 'm', u, 'connection.interface-name', iname] + cargs) + subprocess.check_call(['nmcli', 'c', 'u', u]) + else: + subprocess.check_call(['nmcli', 'c', 'add', 'type', ctype, 'con-name', cname, 'connection.interface-name', iname] + cargs) + self.read_connections() + u = self.uuidbyname.get(cname, None) + if u: + subprocess.check_call(['nmcli', 'c', 'u', u]) + + + +if __name__ == '__main__': + havefirewall = subprocess.call(['systemctl', 'status', 'firewalld']) + havefirewall = havefirewall == 0 + if havefirewall: + subprocess.check_call(['systemctl', 'stop', 'firewalld']) + tmpllas = add_missing_llas() + await_tentative() + idxmap, devtypes = map_idx_to_name() + netname_to_interfaces = {} + myaddrs = apiclient.get_my_addresses() + srvs, _ = apiclient.scan_confluents() + doneidxs = set([]) + dc = None + if not srvs: # the multicast scan failed, fallback to deploycfg cfg file + with open('/etc/confluent/confluent.deploycfg', 'r') as dci: + for cfgline in dci.read().split('\n'): + if cfgline.startswith('deploy_server:'): + srvs = [cfgline.split()[1]] + break + for srv in srvs: + try: + s = socket.create_connection((srv, 443)) + except socket.error: + continue + myname = s.getsockname() + s.close() + if len(myname) == 4: + curridx = myname[-1] + else: + myname = myname[0] + myname = socket.inet_pton(socket.AF_INET, myname) + for addr in myaddrs: + if myname == addr[1].tobytes(): + curridx = addr[-1] + if curridx in doneidxs: + continue + for tries in (1, 2, 3): + try: + status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') + break + except Exception: + if tries == 3: + raise + time.sleep(1) + continue + nc = json.loads(nc) + if not dc: + for tries in (1, 2, 3): + try: + status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') + break + except Exception: + if tries == 3: + raise + time.sleep(1) + continue + dc = json.loads(dc) + iname = get_interface_name(idxmap[curridx], nc.get('default', {})) + if iname: + for iname in iname.split(','): + if 'default' in netname_to_interfaces: + netname_to_interfaces['default']['interfaces'].add(iname) + else: + netname_to_interfaces['default'] = {'interfaces': set([iname]), 'settings': nc['default']} + for netname in nc.get('extranets', {}): + uname = '_' + netname + iname = get_interface_name(idxmap[curridx], nc['extranets'][netname]) + if iname: + for iname in iname.split(','): + if uname in netname_to_interfaces: + netname_to_interfaces[uname]['interfaces'].add(iname) + else: + netname_to_interfaces[uname] = {'interfaces': set([iname]), 'settings': nc['extranets'][netname]} + doneidxs.add(curridx) + if 'default' in netname_to_interfaces: + for netn in netname_to_interfaces: + if netn == 'default': + continue + netname_to_interfaces['default']['interfaces'] -= netname_to_interfaces[netn]['interfaces'] + if not netname_to_interfaces['default']['interfaces']: + del netname_to_interfaces['default'] + # Make sure VLAN/PKEY connections are created last + netname_to_interfaces = dict(sorted(netname_to_interfaces.items(), key=lambda item: 'vlan_id' in item[1]['settings'])) + rm_tmp_llas(tmpllas) + if os.path.exists('/usr/sbin/netplan'): + nm = NetplanManager(dc) + if os.path.exists('/usr/bin/nmcli'): + nm = NetworkManager(devtypes, dc) + elif os.path.exists('/usr/sbin/wicked'): + nm = WickedManager() + retrynics = [] + for netn in netname_to_interfaces: + redo = nm.apply_configuration(netname_to_interfaces[netn]) + if redo == 1: + retrynics.append(netn) + if retrynics: + idxmap, devtypes = map_idx_to_name() + if os.path.exists('/usr/sbin/netplan'): + nm = NetplanManager(dc) + if os.path.exists('/usr/bin/nmcli'): + nm = NetworkManager(devtypes, dc) + elif os.path.exists('/usr/sbin/wicked'): + nm = WickedManager() + for netn in retrynics: + nm.apply_configuration(netname_to_interfaces[netn], lastchance=True) + if havefirewall: + subprocess.check_call(['systemctl', 'start', 'firewalld']) + await_tentative() + diff --git a/confluent_osdeploy/debian/profiles/default/scripts/firstboot.service b/confluent_osdeploy/debian/profiles/default/scripts/firstboot.service new file mode 100644 index 00000000..209a95e6 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/firstboot.service @@ -0,0 +1,11 @@ +[Unit] +Description=First Boot Process +Requires=network-online.target +After=network-online.target + +[Service] +ExecStart=/opt/confluent/bin/firstboot.sh + +[Install] +WantedBy=multi-user.target + diff --git a/confluent_osdeploy/debian/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/debian/profiles/default/scripts/firstboot.sh new file mode 100755 index 00000000..e960d6e0 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/firstboot.sh @@ -0,0 +1,22 @@ +#!/bin/bash +echo "Confluent first boot is running" +HOME=$(getent passwd $(whoami)|cut -d: -f 6) +export HOME +#cp -a /etc/confluent/ssh/* /etc/ssh/ +#systemctl restart sshd +rootpw=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg |awk '{print $2}') +if [ ! -z "$rootpw" -a "$rootpw" != "null" ]; then + echo root:$rootpw | chpasswd -e +fi +nodename=$(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg |awk '{print $2}') +while ! ping -c 1 $confluent_mgr >& /dev/null; do + sleep 1 +done +source /etc/confluent/functions + +run_remote_parts firstboot.d +run_remote_config firstboot.d +systemctl disable firstboot +curl -f -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" -X POST -d "status: complete" https://$confluent_mgr/confluent-api/self/updatestatus diff --git a/confluent_osdeploy/debian/profiles/default/scripts/functions b/confluent_osdeploy/debian/profiles/default/scripts/functions new file mode 100644 index 00000000..f68f3a5e --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/functions @@ -0,0 +1,209 @@ +#!/bin/bash +function test_mgr() { + whost=$1 + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + if curl -gs https://${whost}/confluent-api/ > /dev/null; then + return 0 + fi + return 1 +} + +function confluentpython() { + if [ -x /usr/libexec/platform-python ]; then + /usr/libexec/platform-python $* + elif [ -x /usr/bin/python3 ]; then + /usr/bin/python3 $* + elif [ -x /usr/bin/python ]; then + /usr/bin/python $* + elif [ -x /usr/bin/python2 ]; then + /usr/bin/python2 $* + fi +} + +function set_confluent_vars() { + if [ -z "$nodename" ]; then + nodename=$(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') + fi + if [[ "$confluent_mgr" == *"%"* ]]; then + confluent_mgr="" + fi + if [ -z "$confluent_mgr" ]; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + if ! test_mgr $confluent_mgr; then + confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + if [[ "$confluent_mgr" = *":"* ]]; then + confluent_mgr="[$confluent_mgr]" + fi + fi + if ! test_mgr $confluent_mgr; then + BESTMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|1$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') + OKMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|0$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') + for confluent_mgr in $BESTMGRS $OKMGRS; do + if [[ $confluent_mgr == *":"* ]]; then + confluent_mgr="[$confluent_mgr]" + fi + if test_mgr $confluent_mgr; then + break + fi + done + fi + fi + if [ -z "$confluent_profile" ]; then + confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + fi + export confluent_profile confluent_mgr nodename +} + +fetch_remote() { + curlargs="" + if [ -f /etc/confluent/ca.pem ]; then + curlargs=" --cacert /etc/confluent/ca.pem" + fi + set_confluent_vars + mkdir -p $(dirname $1) + whost=$confluent_mgr + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 + if [ $? != 0 ]; then echo $1 failed to download; return 1; fi +} + +source_remote_parts() { + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') + for script in $scriptlist; do + source_remote $1/$script + done + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir +} + +run_remote_parts() { + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') + for script in $scriptlist; do + run_remote $1/$script + done + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir +} + +source_remote() { + set_confluent_vars + unsettmpdir=0 + echo + echo '---------------------------------------------------------------------------' + echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + if [ -z "$confluentscripttmpdir" ]; then + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unsettmpdir=1 + fi + echo Sourcing from $confluentscripttmpdir + cd $confluentscripttmpdir + fetch_remote $1 + if [ $? != 0 ]; then echo $1 failed to download; return 1; fi + chmod +x $1 + cmd=$1 + shift + source ./$cmd + cd - > /dev/null + if [ "$unsettmpdir" = 1 ]; then + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + unsettmpdir=0 + fi + rm -rf $confluentscripttmpdir + return $retcode +} + +run_remote() { + requestedcmd="'$*'" + unsettmpdir=0 + set_confluent_vars + echo + echo '---------------------------------------------------------------------------' + echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + if [ -z "$confluentscripttmpdir" ]; then + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unsettmpdir=1 + fi + echo Executing in $confluentscripttmpdir + cd $confluentscripttmpdir + fetch_remote $1 + if [ $? != 0 ]; then echo $requestedcmd failed to download; return 1; fi + chmod +x $1 + cmd=$1 + if [ -x /usr/bin/chcon ]; then + chcon system_u:object_r:bin_t:s0 $cmd + fi + shift + ./$cmd $* + retcode=$? + if [ $retcode -ne 0 ]; then + echo "$requestedcmd exited with code $retcode" + fi + cd - > /dev/null + if [ "$unsettmpdir" = 1 ]; then + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + unsettmpdir=0 + fi + return $retcode +} + +run_remote_python() { + echo + set_confluent_vars + if [ -f /etc/confluent/ca.pem ]; then + curlargs=" --cacert /etc/confluent/ca.pem" + fi + echo '---------------------------------------------------------------------------' + echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + echo Executing in $confluentscripttmpdir + cd $confluentscripttmpdir + mkdir -p $(dirname $1) + whost=$confluent_mgr + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 + if [ $? != 0 ]; then echo "'$*'" failed to download; return 1; fi + confluentpython $* + retcode=$? + echo "'$*' exited with code $retcode" + cd - > /dev/null + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + return $retcode +} + +run_remote_config() { + echo + set_confluent_vars + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + echo '---------------------------------------------------------------------------' + echo Requesting to run remote configuration for "'$*'" from $confluent_mgr under profile $confluent_profile + confluentpython $apiclient /confluent-api/self/remoteconfig/"$*" -d {} + confluentpython $apiclient /confluent-api/self/remoteconfig/status -w 204 + echo + echo 'Completed remote configuration' + echo '---------------------------------------------------------------------------' + return +} +#If invoked as a command, use the arguments to actually run a function +(return 0 2>/dev/null) || $1 "${@:2}" diff --git a/confluent_osdeploy/debian/profiles/default/scripts/post.sh b/confluent_osdeploy/debian/profiles/default/scripts/post.sh new file mode 100755 index 00000000..f30d980f --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/post.sh @@ -0,0 +1,67 @@ +#!/bin/bash +mkdir -p /run/sshd +mkdir -p /root/.ssh +cat /tmp/ssh/*pubkey >> /root/.ssh/authorized_keys +cat /tmp/ssh/*.ca | sed -e s/^/'@cert-authority * '/ >> /etc/ssh/ssh_known_hosts +chmod 700 /etc/confluent +chmod go-rwx /etc/confluent/* +sshconf=/etc/ssh/ssh_config +if [ -d /etc/ssh/ssh_config.d/ ]; then + sshconf=/etc/ssh/ssh_config.d/01-confluent.conf +fi +echo 'Host *' >> $sshconf +echo ' HostbasedAuthentication yes' >> $sshconf +echo ' EnableSSHKeysign yes' >> $sshconf +echo ' HostbasedKeyTypes *ed25519*' >> $sshconf +/usr/sbin/sshd +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | awk '{print $2}') +mkdir -p /opt/confluent/bin +python3 /opt/confluent/bin/apiclient /confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh +chmod +x /opt/confluent/bin/firstboot.sh +python3 /opt/confluent/bin/apiclient /confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service +systemctl enable firstboot +python3 /opt/confluent/bin/apiclient /confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions +source /etc/confluent/functions +python3 /opt/confluent/bin/apiclient /confluent-api/self/nodelist | sed -e s/'^- //' > /tmp/allnodes +cp /tmp/allnodes /root/.shosts +cp /tmp/allnodes /etc/ssh/shosts.equiv +if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then + ntps=$(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //' | paste -sd ' ') + sed -i "s/#NTP=/NTP=$ntps/" /etc/systemd/timesyncd.conf +fi +textcons=$(grep ^textconsole: /etc/confluent/confluent.deploycfg |awk '{print $2}') +updategrub=0 +if [ "$textcons" = "true" ] && ! grep console= /proc/cmdline > /dev/null; then + cons="" + if [ -f /tmp/autocons.info ]; then + cons=$(cat /tmp/autocons.info) + fi + if [ ! -z "$cons" ]; then + sed -i 's/GRUB_CMDLINE_LINUX="\([^"]*\)"/GRUB_CMDLINE_LINUX="\1 console='${cons#/dev/}'"/' /etc/default/grub + updategrub=1 + fi +fi +kargs=$(python3 /opt/confluent/bin/apiclient /confluent-public/os/$confluent_profile/profile.yaml | grep ^installedargs: | sed -e 's/#.*//') +if [ ! -z "$kargs" ]; then + sed -i 's/GRUB_CMDLINE_LINUX="\([^"]*\)"/GRUB_CMDLINE_LINUX="\1 '"${kargs}"'"/' /etc/default/grub +fi + +if [ 1 = $updategrub ]; then + update-grub +fi + +if [ -e /sys/firmware/efi ]; then + bootnum=$(efibootmgr | grep ubuntu | sed -e 's/ .*//' -e 's/\*//' -e s/Boot//) + if [ ! -z "$bootnum" ]; then + currboot=$(efibootmgr | grep ^BootOrder: | awk '{print $2}') + nextboot=$(echo $currboot| awk -F, '{print $1}') + [ "$nextboot" = "$bootnum" ] || efibootmgr -o $bootnum,$currboot + efibootmgr -D + fi +fi +run_remote_python syncfileclient +run_remote_parts post.d +run_remote_config post + +python3 /opt/confluent/bin/apiclient /confluent-api/self/updatestatus -d 'status: staged' + diff --git a/confluent_osdeploy/debian/profiles/default/scripts/pre.sh b/confluent_osdeploy/debian/profiles/default/scripts/pre.sh old mode 100644 new mode 100755 index 85347f59..6e8b9c4c --- a/confluent_osdeploy/debian/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/debian/profiles/default/scripts/pre.sh @@ -1,18 +1,88 @@ -anna-install openssh-server-udeb -mkdir -p ~/.ssh/ -cat /ssh/*pubkey > ~/.ssh/authorized_keys -ssh-keygen -A -mgr=$(grep ^MANAGER:.*\\. /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') -nodename=$(grep ^NODENAME: /etc/confluent/confluent.info|head -n 1|cut -d: -f 2|sed -e 's/ //') +#!/bin/sh +## Use the following option to add additional boot parameters for the +## installed system (if supported by the bootloader installer). +## Note: options passed to the installer will be added automatically. +#d-i debian-installer/add-kernel-opts string [from profile.yaml] +deploycfg=/etc/confluent/confluent.deploycfg +mgr=$(cat /etc/confluent/deployer) + +cryptboot=$(grep encryptboot: $deploycfg|sed -e 's/^encryptboot: //') +if [ "$cryptboot" != "" ] && [ "$cryptboot" != "none" ] && [ "$cryptboot" != "null" ]; then + echo "****Encrypted boot requested, but not implemented for this OS, halting install" > /dev/console + [ -f '/tmp/autoconsdev' ] && (echo "****Encryptod boot requested, but not implemented for this OS,halting install" >> $(cat /tmp/autoconsdev)) + while :; do sleep 86400; done +fi +cat > /usr/lib/live-installer.d/confluent-certs << EOF +#!/bin/sh +cp /tls/* /target/etc/ssl/certs/ +cat /tls/*.pem >> /target/etc/ssl/certs/ca-certificates.crt +EOF +chmod a+x /usr/lib/live-installer.d/confluent-certs +mkdir -p /.ssh/ +cat /ssh/*pubkey > /.ssh/authorized_keys +mkdir -p /etc/ssh +nodename=$(grep ^NODENAME: /etc/confluent/confluent.info|cut -d ' ' -f 2) apikey=$(cat /etc/confluent/confluent.apikey) +ssh-keygen -A for pubkey in /etc/ssh/ssh_host*key.pub; do - certfile=${pubkey%.pub}-cert.pub + certfile=$(echo $pubkey | sed -e s/.pub/-cert.pub/) keyfile=${pubkey%.pub} - wget --post-file=$pubkey --header='CONFLUENT_NODENAME: '$nodename --header="CONFLUENT_APIKEY: $apikey" https://$mgr/confluent-api/self/sshcert -O $certfile + wget --header="CONFLUENT_NODENAME: $nodename" --header="CONFLUENT_APIKEY: $apikey" --post-file=$pubkey https://$mgr/confluent-api/self/sshcert -O $certfile --quiet echo HostKey $keyfile >> /etc/ssh/sshd_config echo HostCertificate $certfile >> /etc/ssh/sshd_config done - -echo sshd:x:939:939::/: >> /etc/passwd +if [ -e /tmp/installdisk ]; then + instdisk=$(cat /tmp/installdisk) +else + for blockdev in $(ls /sys/class/block/); do + shortname=$(basename $blockdev) + if [ "$shortname" != "${shortname%loop*}" ]; then + continue + fi + udevadm info --query=property /dev/$shortname |grep DEVTYPE=disk > /dev/null || continue # ignore partitions + udevadm info --query=property /dev/$shortname |grep DM_NAME > /dev/null && continue # not a real disk + sz=$(cat /sys/block/$shortname/size 2> /dev/null) + [ -z "$sz" ] && continue + [ $sz -lt 1048576 ] && continue # Too small + [ -z "$firstdisk" ] && firstdisk=$shortname + if udevadm info --query=property /dev/$shortname|grep ID_MODEL=| sed -e s/' '/_/g | grep -iE '(thinksystem_m.2|m.2_nvme_2-bay_raid_kit)' > /dev/null; then + instdisk=$shortname + break + fi + if udevadm info --query=property /dev/$shortname|grep MD_CONTAINER=imsm; then + sraid=$sortname + else + drv=$(udevadm info -a /dev/sdb|grep DRIVERS==|grep -Ev '""|"sd"' | sed -e s/.*=// -e s/'"'//g) + if [ "ahci" = "$drv" -a -z "$onbdisk" ]; then + onbdisk=$shortname + elif [ "megaraid" = "$drv" -a -z "$rdisk" ]; then + rdisk=$shortname + fi + fi + done +fi +if [ -z "$instdisk" ]; then + if [ ! -z "$sraid"]; then + instdisk=$sraid + elif [ ! -z "$onbdisk" ]; then + instdisk=$onbdisk + elif [ ! -z "$rdisk" ]; then + instdisk=$rdisk + else + instdisk=$firstdisk + fi +fi +if [ ! -z "$instdisk" ]; then + debconf-set partman-auto/disk /dev/$instdisk + debconf-set grub-installer/bootdev /dev/$instdisk +fi +echo HostbasedAuthentication yes >> /etc/ssh/sshd_config +echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config +echo IgnoreRhosts no >> /etc/ssh/sshd_config +echo sshd:x:1:1::/run/sshd:/bin/false >> /etc/passwd /usr/sbin/sshd - +wget --header="CONFLUENT_NODENAME: $nodename" --header="CONFLUENT_APIKEY: $apikey" https://$mgr/confluent-api/self/nodelist -O /tmp/allnodes --quiet +#kill -HUP $(ps | grep -v grep | grep /usr/sbin/sshd | sed -e 's/^ *//'|cut -d ' ' -f 1) +#curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /tmp/getinstalldisk +#python3 /tmp/getinstalldisk +#sed -i s!%%INSTALLDISK%%!/dev/$(cat /tmp/installdisk)! /autoinstall.yaml diff --git a/confluent_osdeploy/debian/profiles/default/scripts/prechroot.sh b/confluent_osdeploy/debian/profiles/default/scripts/prechroot.sh new file mode 100644 index 00000000..0824aead --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/prechroot.sh @@ -0,0 +1,19 @@ +#!/bin/sh +mount -o bind /sys /target/sys +mount -o bind /dev /target/dev +mount -o bind /dev/pts /target/dev/pts +mount -o bind /proc /target/proc +mount -o bind /dev/pts /target/dev/pts +mount -o bind /run /target/run +cp -a /etc/confluent /target/etc/confluent +cp -a /opt/confluent /target/opt/confluent +mv /tmp/post.sh /target/tmp/ +cp -a /ssh /tls /target/tmp +cat /tls/*.pem >> /target/etc/confluent/ca.pem +cp -a /etc/ssh/ssh_host_* /target/etc/ssh/ +grep HostCertificate /etc/ssh/sshd_config >> /target/etc/ssh/sshd_config +echo Port 2222 >> /etc/ssh/sshd_config +kill -HUP $(ps |grep -v grep|grep sshd|grep /usr|sed -e s/' root.*//') +cp /tls/* /target/etc/ssl/certs/ +cat /tls/*.pem >> /target/etc/ssl/certs/ca-certificates.crt +chroot /target bash /tmp/post.sh diff --git a/confluent_osdeploy/debian/profiles/default/scripts/setupssh b/confluent_osdeploy/debian/profiles/default/scripts/setupssh new file mode 100644 index 00000000..06ae3e01 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/setupssh @@ -0,0 +1,43 @@ +[ -f /lib/confluent/functions ] && . /lib/confluent/functions +[ -f /etc/confluent/functions ] && . /etc/confluent/functions +[ -f /opt/confluent/bin/apiclient ] && confapiclient=/opt/confluent/bin/apiclient +[ -f /etc/confluent/apiclient ] && confapiclient=/etc/confluent/apiclient +for pubkey in /etc/ssh/ssh_host*key.pub; do + if [ "$pubkey" = /etc/ssh/ssh_host_key.pub ]; then + continue + fi + certfile=${pubkey/.pub/-cert.pub} + rm $certfile + confluentpython $confapiclient /confluent-api/self/sshcert $pubkey -o $certfile +done +if [ -d /etc/ssh/sshd_config.d/ -a ! -e /etc/ssh/sshd_config.d/90-confluent.conf ]; then + for cert in /etc/ssh/ssh*-cert.pub; do + echo HostCertificate $cert >> /etc/ssh/sshd_config.d/90-confluent.conf + done + echo HostbasedAuthentication yes >> /etc/ssh/sshd_config.d/90-confluent.conf + echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/90-confluent.conf + echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/90-confluent.conf +fi + +TMPDIR=$(mktemp -d) +cd $TMPDIR +confluentpython $confapiclient /confluent-public/site/initramfs.tgz -o initramfs.tgz +tar xf initramfs.tgz +for ca in ssh/*.ca; do + LINE=$(cat $ca) + cp -af /etc/ssh/ssh_known_hosts /etc/ssh/ssh_known_hosts.new + grep -v "$LINE" /etc/ssh/ssh_known_hosts > /etc/ssh/ssh_known_hosts.new + echo '@cert-authority *' $LINE >> /etc/ssh/ssh_known_hosts.new + mv /etc/ssh/ssh_known_hosts.new /etc/ssh/ssh_known_hosts +done +for pubkey in ssh/*.*pubkey; do + LINE=$(cat $pubkey) + cp -af /root/.ssh/authorized_keys /root/.ssh/authorized_keys.new + grep -v "$LINE" /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.new + echo "$LINE" >> /root/.ssh/authorized_keys.new + mv /root/.ssh/authorized_keys.new /root/.ssh/authorized_keys +done +confluentpython $confapiclient /confluent-api/self/nodelist | sed -e 's/^- //' > /etc/ssh/shosts.equiv +cat /etc/ssh/shosts.equiv > /root/.shosts +cd - +rm -rf $TMPDIR diff --git a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/pre.sh b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/pre.sh index de2cf5e2..b480cce0 100755 --- a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/pre.sh @@ -85,3 +85,4 @@ wget --header="CONFLUENT_NODENAME: $nodename" --header="CONFLUENT_APIKEY: $apike #curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /tmp/getinstalldisk #python3 /tmp/getinstalldisk #sed -i s!%%INSTALLDISK%%!/dev/$(cat /tmp/installdisk)! /autoinstall.yaml +umount /media From 71f5ce2b29c2408e39542c985f35a7f51c9da7f7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 May 2025 09:25:05 -0400 Subject: [PATCH 164/413] Add deployment lock mechanism This allows users to opt into disabling setting further profile changes. Nodes may be 'unlocked' (normal), 'autolock' (will lock on next completion), or 'locked' (unable to change the pending OS profile) --- confluent_client/bin/nodedeploy | 10 ++++++++ .../confluent/config/attributes.py | 7 ++++++ confluent_server/confluent/core.py | 3 +++ confluent_server/confluent/messages.py | 14 +++++++++++ .../plugins/configuration/attributes.py | 24 +++++++++++++++++++ confluent_server/confluent/selfservice.py | 4 ++++ 6 files changed, 62 insertions(+) diff --git a/confluent_client/bin/nodedeploy b/confluent_client/bin/nodedeploy index 15e78f37..1e172fea 100755 --- a/confluent_client/bin/nodedeploy +++ b/confluent_client/bin/nodedeploy @@ -117,6 +117,16 @@ def main(args): else: sys.stderr.write('No deployment profiles available, try osdeploy import or imgutil capture\n') sys.exit(1) + lockednodes = [] + for lockinfo in c.read('/noderange/{0}/deployment/lock'.format(args.noderange)): + for node in lockinfo.get('databynode', {}): + lockstate = lockinfo['databynode'][node]['lock']['value'] + if lockstate == 'locked': + lockednodes.append(node) + if lockednodes: + sys.stderr.write('Requested noderange has nodes with locked deployment: ' + ','.join(lockednodes)) + sys.stderr.write('\n') + sys.exit(1) armonce(args.noderange, c) setpending(args.noderange, args.profile, c) else: diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 248063f2..4f6531bd 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -215,6 +215,13 @@ node = { 'Using this requires that collective members be ' 'defined as nodes for noderange expansion') }, + 'deployment.lock': { + 'description': ('Indicates whether deployment actions should be impeded. ' + 'If locked, it indicates that a pending profile should not be applied. ' + 'If "autolock", then locked will be set when current pending deployment completes. ' + ), + 'validlist': ('autolock', 'locked') + }, 'deployment.pendingprofile': { 'description': ('An OS profile that is pending deployment. This indicates to ' 'the network boot subsystem what should be offered when a potential ' diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index e25e82d2..0e754b9f 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -453,6 +453,9 @@ def _init_core(): 'default': 'ipmi', }), 'deployment': { + 'lock': PluginRoute({ + 'handler': 'attributes' + }), 'ident_image': PluginRoute({ 'handler': 'identimage' }) diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 6dbe031f..04ca43f7 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -574,6 +574,8 @@ def get_input_message(path, operation, inputdata, nodes=None, multinode=False, elif '/'.join(path).startswith( 'configuration/management_controller/licenses') and inputdata: return InputLicense(path, nodes, inputdata, configmanager) + elif path == ['deployment', 'lock'] and inputdata: + return InputDeploymentLock(path, nodes, inputdata) elif path == ['deployment', 'ident_image']: return InputIdentImage(path, nodes, inputdata) elif path == ['console', 'ikvm']: @@ -957,6 +959,18 @@ class InputIdentImage(ConfluentInputMessage): keyname = 'ident_image' valid_values = ['create'] +class InputDeploymentLock(ConfluentInputMessage): + keyname = 'lock' + valid_values = ['autolock', 'unlocked', 'locked'] + +class DeploymentLock(ConfluentChoiceMessage): + valid_values = set([ + 'autolock', + 'locked', + 'unlocked', + ]) + keyname = 'lock' + class InputIkvmParams(ConfluentInputMessage): keyname = 'method' valid_values = ['unix', 'wss', 'url'] diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 14607af5..434d6c50 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -109,6 +109,13 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata, clearwarnby def retrieve_nodes(nodes, element, configmanager, inputdata, clearwarnbynode): attributes = configmanager.get_node_attributes(nodes) + if element[-1] == 'lock': + for node in nodes: + lockstate = attributes.get(node, {}).get('deployment.lock', {}).get('value', None) + if lockstate not in ('locked', 'autolock'): + lockstate = 'unlocked' + yield msg.DeploymentLock(node, lockstate) + return if element[-1] == 'all': for node in util.natural_sort(nodes): if clearwarnbynode and node in clearwarnbynode: @@ -247,12 +254,20 @@ def yield_rename_resources(namemap, isnode): else: yield msg.RenamedResource(node, namemap[node]) +def update_locks(nodes, configmanager, inputdata): + for node in nodes: + updatestate = inputdata.inputbynode[node] + configmanager.set_node_attributes({node: {'deployment.lock': updatestate}}) + yield msg.DeploymentLock(node, updatestate) + def update_nodes(nodes, element, configmanager, inputdata): updatedict = {} if not nodes: raise exc.InvalidArgumentException( 'No action to take, noderange is empty (if trying to define ' 'group attributes, use nodegroupattrib)') + if element[-1] == 'lock': + return update_locks(nodes, configmanager, inputdata) if element[-1] == 'check': for node in nodes: check = inputdata.get_attributes(node, allattributes.node) @@ -273,6 +288,15 @@ def update_nodes(nodes, element, configmanager, inputdata): configmanager.rename_nodes(namemap) return yield_rename_resources(namemap, isnode=True) clearwarnbynode = {} + for node in nodes: + updatenode = inputdata.get_attributes(node, allattributes.node) + if updatenode and 'deployment.lock' in updatenode: + raise exc.InvalidArgumentException('Deployment lock must be manipulated by {node}/deployment/lock api') + if updatenode and ('deployment.pendingprofile' in updatenode or 'deployment.apiarmed' in updatenode): + lockcheck = configmanager.get_node_attributes(node, 'deployment.lock') + lockstate = lockcheck.get(node, {}).get('deployment.lock', {}).get('value', None) + if lockstate == 'locked': + raise exc.InvalidArgumentException('Request to set deployment for a node that has locked deployment') for node in nodes: updatenode = inputdata.get_attributes(node, allattributes.node) clearattribs = [] diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index de8eb832..6df8ff17 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -490,6 +490,10 @@ def handle_request(env, start_response): updates['deployment.pendingprofile'] = {'value': ''} if targattr == 'deployment.profile': updates['deployment.stagedprofile'] = {'value': ''} + dls = cfg.get_node_attributes(nodename, 'deployment.lock') + dls = dls.get(nodename, {}).get('deployment.lock', {}).get('value', None) + if dls == 'autolock': + updates['deployment.lock'] = 'locked' currprof = currattr.get(targattr, {}).get('value', '') if currprof != pending: updates[targattr] = {'value': pending} From 62988117f134662b3eca11e2c1994dc7a17eab85 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 May 2025 09:39:18 -0400 Subject: [PATCH 165/413] Another iteration toward debian bookworm support --- .../confluent_osdeploy.spec.tmpl | 4 ++-- confluent_server/confluent/osimage.py | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 7eafc23f..c7f42215 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -30,7 +30,7 @@ This contains support utilities for enabling deployment of x86_64 architecture s ln -s el8 el9 cp -a el8 el10 mv el10/initramfs/usr el10/initramfs/var -for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do +for os in rhvh4 el7 genesis el8 suse15 debian ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then @@ -84,7 +84,7 @@ cp -a esxi7 esxi9 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 esxi9 coreos; do +for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 debian ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 esxi9 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/profiles cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 8feefead..c4401c34 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -433,6 +433,27 @@ def check_esxi(isoinfo): 'category': 'esxi{0}'.format(version.split('.', 1)[0]) } +def check_debian(isoinfo): + if '.disk/info' not in isoinfo[1]: + return None + diskinfo = isoinfo[1]['.disk/info'] + diskbits = diskinfo.split(b' ') + if diskbits[0] == b'Debian': + if b'mini.iso' not in diskbits: + raise Exception("Debian only supports the 'netboot mini.iso' type images") + version = diskbits[2].decode() + arch = diskbits[4].decode() + if arch != 'amd64': + raise Exception("Unsupported debian architecture {}".format(arch)) + arch = 'x86_64' + name = 'debian-{0}-{1}'.format(version, arch) + return { + 'name': name, + 'method': EXTRACT, + 'category': 'debian', + } + + def check_ubuntu(isoinfo): if '.disk/info' not in isoinfo[1]: return None From b3ffd632a53614b65e09df796e2e95ebfcba03fd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 May 2025 10:23:42 -0400 Subject: [PATCH 166/413] Add proxmox ve example scripts to debian profile --- .../debian/profiles/default/initprofile.sh | 1 + .../default/scripts/firstboot.d/.gitignore | 0 .../profiles/default/scripts/post.d/.gitignore | 0 .../scripts/proxmox/proxmoxve.firstboot | 4 ++++ .../default/scripts/proxmox/proxmoxve.post | 18 ++++++++++++++++++ confluent_server/confluent/osimage.py | 11 ++++++++++- 6 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/firstboot.d/.gitignore create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/post.d/.gitignore create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot create mode 100644 confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post diff --git a/confluent_osdeploy/debian/profiles/default/initprofile.sh b/confluent_osdeploy/debian/profiles/default/initprofile.sh index 9a2705f2..09f95806 100644 --- a/confluent_osdeploy/debian/profiles/default/initprofile.sh +++ b/confluent_osdeploy/debian/profiles/default/initprofile.sh @@ -3,5 +3,6 @@ sed -i 's/label: debian/label: Debian/' $2/profile.yaml && \ ln -s $1/linux $2/boot/kernel && \ ln -s $1/initrd.gz $2/boot/initramfs/distribution && \ mkdir -p $2/boot/efi/boot && \ +rm $2/distribution && \ mcopy -i $1/boot/grub/efi.img ::/efi/boot/* $2/boot/efi/boot diff --git a/confluent_osdeploy/debian/profiles/default/scripts/firstboot.d/.gitignore b/confluent_osdeploy/debian/profiles/default/scripts/firstboot.d/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/confluent_osdeploy/debian/profiles/default/scripts/post.d/.gitignore b/confluent_osdeploy/debian/profiles/default/scripts/post.d/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot new file mode 100644 index 00000000..a6dcac35 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot @@ -0,0 +1,4 @@ +#!/bin/bash +# Add this to firstboot.d +apt-get install proxmox-ve postfix open-iscsi chrony + diff --git a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post new file mode 100644 index 00000000..f4ba3fa2 --- /dev/null +++ b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post @@ -0,0 +1,18 @@ +#!/bin/bash +# This script would run in post.d +# +echo "deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription" > /etc/apt/sources.list.d/pve-install-repo.list +wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg +sum=$(sha512sum /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg) +if [ "$sum" -ne "7da6fe34168adc6e479327ba517796d4702fa2f8b4f0a9833f5ea6e6b48f6507a6da403a274fe201595edc86a84463d50383d07f64bdde2e3658108db7d6dc87" ]; then + echo "Mismatch in fingerprint!" + exit 1 +fi +apt-get update && apt-get full-upgrade + +apt-get install proxmox-default-kernel +apt-get remove linux-image-amd64 'linux-image-6.1*' +update-grub +apt-get remove os-probec + + diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index c4401c34..ed2a3a41 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -441,8 +441,17 @@ def check_debian(isoinfo): if diskbits[0] == b'Debian': if b'mini.iso' not in diskbits: raise Exception("Debian only supports the 'netboot mini.iso' type images") - version = diskbits[2].decode() + major = diskbits[2].decode() arch = diskbits[4].decode() + buildtag = diskbits[-1].decode().strip() # 20230607+deb12u10 + minor = '0' + if '+' in buildtag: + _, variant = buildtag.split('+') + variant = variant.replace('deb', '') + if 'u' in variant: + minor = variant.split('u')[1] + version = '{0}.{1}'.format(major, minor) + if arch != 'amd64': raise Exception("Unsupported debian architecture {}".format(arch)) arch = 'x86_64' From 5be99d995a0d20ff4586dbe11d427fbdc467a3e0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 May 2025 11:44:35 -0400 Subject: [PATCH 167/413] Fix up VM power handling For both proxmox and vmware, properly model the 'oldstate' feature. For proxmox, further: - Wait for power change to actually take effect - Change reset to a cycle, to help nodesetboot actually work correctly. --- .../plugins/hardwaremanagement/proxmox.py | 28 +++++++++++++++++-- .../plugins/hardwaremanagement/vcenter.py | 9 ++++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index 33771544..bbe4ae9c 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -12,6 +12,7 @@ import confluent.interface.console as conapi import io import urllib.parse as urlparse import eventlet.green.ssl as ssl +import eventlet try: @@ -282,17 +283,38 @@ class PmxApiClient: def set_vm_power(self, vm, state): host, guest = self.get_vm(vm) + current = None + newstate = '' + targstate = state + if targstate == 'boot': + targstate = 'on' if state == 'boot': current = self.get_vm_power(vm) if current == 'on': state = 'reset' + newstate = 'reset' else: state = 'start' elif state == 'on': state = 'start' elif state == 'off': state = 'stop' - rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/status/{state}', method='POST') + if state == 'reset': # check for pending config + cfg = self.wc.grab_json_response(f'/api2/json/nodes/{host}/{guest}/pending') + for datum in cfg['data']: + if datum['key'] == 'boot' and 'pending' in datum: + self.set_vm_power(vm, 'off') + self.set_vm_power(vm, 'on') + state = '' + newstate = 'reset' + if state: + rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/status/{state}', method='POST') + if state and state != 'reset': + newstate = self.get_vm_power(vm) + while newstate != targstate: + eventlet.sleep(0.1) + newstate = self.get_vm_power(vm) + return newstate, current def set_vm_bootdev(self, vm, bootdev): host, guest = self.get_vm(vm) @@ -370,8 +392,8 @@ def update(nodes, element, configmanager, inputdata): for node in nodes: currclient = clientsbynode[node] if element == ['power', 'state']: - currclient.set_vm_power(node, inputdata.powerstate(node)) - yield msg.PowerState(node, currclient.get_vm_power(node)) + newstate, oldstate = currclient.set_vm_power(node, inputdata.powerstate(node)) + yield msg.PowerState(node, newstate, oldstate) elif element == ['boot', 'nextdevice']: currclient.set_vm_bootdev(node, inputdata.bootdevice(node)) yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index e7140d7a..00f6c2bc 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -265,18 +265,23 @@ class VmwApiClient: raise Exception("Unknown response {}".format(repr(rsp))) def set_vm_power(self, vm, state): + current = None + targstate = state vm = self.index_vm(vm) if state == 'boot': current = self.get_vm_power(vm) if current == 'on': state = 'reset' + targstate = state else: + targstate = 'on' state = 'start' elif state == 'on': state = 'start' elif state == 'off': state = 'stop' rsp = self.wc.grab_json_response_with_status(f'/api/vcenter/vm/{vm}/power?action={state}', method='POST') + return targstate, current def set_vm_bootdev(self, vm, bootdev): @@ -356,8 +361,8 @@ def update(nodes, element, configmanager, inputdata): for node in nodes: currclient = clientsbynode[node] if element == ['power', 'state']: - currclient.set_vm_power(node, inputdata.powerstate(node)) - yield msg.PowerState(node, currclient.get_vm_power(node)) + newstate, oldstate = currclient.set_vm_power(node, inputdata.powerstate(node)) + yield msg.PowerState(node, newstate, oldstate) elif element == ['boot', 'nextdevice']: currclient.set_vm_bootdev(node, inputdata.bootdevice(node)) yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) From fbea510fc1fd4aba52ecc61a2c05fc8c83695a83 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 May 2025 12:13:27 -0400 Subject: [PATCH 168/413] Have nodeconsole respond to resize When doing the screenshot tiling with interval, sanely handle resizing the terminal. --- confluent_client/bin/nodeconsole | 69 ++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index ee4acf52..2b8314d8 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -30,6 +30,7 @@ import confluent.sortutil as sortutil import confluent.logreader as logreader import time import select +import signal import socket import re import tty @@ -54,7 +55,6 @@ except ImportError: def draw(self, imgfile): sys.stderr.write("PySixel not detected, Sixel format display not supported\n") - confettypath = os.path.join(os.path.dirname(sys.argv[0]), 'confetty') argparser = optparse.OptionParser( usage="Usage: %prog [options] [kill][-- [passthroughoptions]]", @@ -326,10 +326,34 @@ def reset_cursor(node): nodepositions = {} numrows = 0 +cwidth = 0 +cheight = 0 +imagedatabynode = {} + +def redraw(): + for node in imagedatabynode: + imgdata = imagedatabynode[node] + if node in nodepositions: + prep_node_tile(node) + cursor_save() + else: + if options.interval is not None: + if node != firstnodename: + sys.stderr.write('Multiple nodes not supported for interval') + sys.exit(1) + sticky_cursor() + sys.stdout.write('{}: '.format(node)) + # one row is used by our own name, so cheight - 1 for that allowance + draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) + if node in nodepositions: + cursor_restore() + reset_cursor(node) + else: + sys.stdout.write('\n') + sys.stdout.flush() + def do_screenshot(): global numrows - cwidth = None - cheight = None sess = client.Command() if options.tile: imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') @@ -341,19 +365,31 @@ def do_screenshot(): for res in sess.read('/noderange/{}/nodes/'.format(args[0])): allnodes.append(res['item']['href'].replace('/', '')) numnodes += 1 - cols, rows, cwidth, cheight, numrows = determine_tile_size(numnodes) - currcol = 1 - currcolcell = 0 - currrowcell = 0 - for node in allnodes: - nodepositions[node] = currcolcell, currrowcell - if currcol < cols: - currcol += 1 - currcolcell += cwidth - else: - currcol = 1 - currcolcell = 0 - currrowcell += cheight + def do_resize(a=None, b=None): + if a: + # on a window resize, clear the old stuff + # ideally we'd retain the images and redraw them + sys.stdout.write('\x1bc') + global numrows + global cwidth + global cheight + cols, rows, cwidth, cheight, numrows = determine_tile_size(numnodes) + currcol = 1 + currcolcell = 0 + currrowcell = 0 + for node in allnodes: + nodepositions[node] = currcolcell, currrowcell + if currcol < cols: + currcol += 1 + currcolcell += cwidth + else: + currcol = 1 + currcolcell = 0 + currrowcell += cheight + if a: + redraw() + do_resize() + signal.signal(signal.SIGWINCH, do_resize) elif options.interval is not None: sys.stdout.write('\x1bc') firstnodename = None @@ -368,6 +404,7 @@ def do_screenshot(): if len(imgdata) < 32: # We were subjected to error sys.stderr.write(f'{node}: Unable to get screenshot\n') continue + imagedatabynode[node] = imgdata if node in nodepositions: prep_node_tile(node) cursor_save() From bfadc284b085ba1aa603476c6bfc754b6b2e3264 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 May 2025 08:57:29 -0400 Subject: [PATCH 169/413] Expand documentation of graphical nodeconsole usage. --- confluent_client/doc/man/nodeconsole.ronn | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/confluent_client/doc/man/nodeconsole.ronn b/confluent_client/doc/man/nodeconsole.ronn index 1325b2bf..26eedc23 100644 --- a/confluent_client/doc/man/nodeconsole.ronn +++ b/confluent_client/doc/man/nodeconsole.ronn @@ -21,9 +21,15 @@ console process which will result in the console window closing. ## OPTIONS +* `-i N`, `--interval`: + For screenshot mode, fetch new screenshots and overwrite old screenshots every N seconds. + For example, `nodeconsole r3u[21:24] -tsi 3` will tile screenshots of r3u21 through r3u24 and + refresh them every 3 seconds. + * `-t`, `--tile`: - Use tmux to arrange consoles of the given noderange into a tiled layout on - the terminal screen + For text consoles, use tmux to arrange consoles of the given noderange into a tiled layout on + the terminal screen. If using 'screenshot' mode, divide the terminal and display the images + in a grid. * `-l`, `--log`: Perform a log reply on the current, local log in /var/log/confluent/consoles. @@ -36,8 +42,12 @@ console process which will result in the console window closing. manager at this time. * `-s`, `--screenshot`: - Attempt to grab screenshot and render using kitty - image protocol. + Attempt to grab screenshot(s) and render using a terminal + image protocol. The image protocol defaults to kitty, and + can be selected by CONFLUENT_IMAGE_PROTOCOL environment variable. + Supported protocols are kitty, iterm, and, sixel (sixel only + if PySixel is installed). This only presents screenshots, there + is no input supported to graphical consoles from a terminal. * `-w`, `--windowed`: Open terminal windows for each node. The From 89437ee761c8e454fcfdb4a3520a5fda10ef2ba1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 May 2025 13:19:56 -0400 Subject: [PATCH 170/413] Fix Proxmox sample scripts for post/firstboot --- .../default/scripts/proxmox/proxmoxve.firstboot | 3 ++- .../profiles/default/scripts/proxmox/proxmoxve.post | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot index a6dcac35..b3e0433e 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot +++ b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.firstboot @@ -1,4 +1,5 @@ #!/bin/bash # Add this to firstboot.d -apt-get install proxmox-ve postfix open-iscsi chrony +export DEBIAN_FRONTEND=noninteractive +apt-get -y install proxmox-ve postfix open-iscsi chrony < /dev/null diff --git a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post index f4ba3fa2..fc413997 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post +++ b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post @@ -1,18 +1,20 @@ #!/bin/bash # This script would run in post.d # +export DEBIAN_FRONTEND=noninteractive echo "deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription" > /etc/apt/sources.list.d/pve-install-repo.list wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg sum=$(sha512sum /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg) if [ "$sum" -ne "7da6fe34168adc6e479327ba517796d4702fa2f8b4f0a9833f5ea6e6b48f6507a6da403a274fe201595edc86a84463d50383d07f64bdde2e3658108db7d6dc87" ]; then echo "Mismatch in fingerprint!" + rm /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg exit 1 fi -apt-get update && apt-get full-upgrade +apt-get update && apt-get -y full-upgrade < /dev/null -apt-get install proxmox-default-kernel -apt-get remove linux-image-amd64 'linux-image-6.1*' +apt-get -y install proxmox-default-kernel < /dev/null +apt-get -y remove linux-image-amd64 'linux-image-6.1*' < /dev/null update-grub -apt-get remove os-probec +apt-get -y remove os-prober < /dev/null From 57ada3728c0d45b0bdc1593c72d268e2e8c65baf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 May 2025 16:18:28 -0400 Subject: [PATCH 171/413] Change default host to localhost only Also, fix support for IPv4 addresses as bind_host --- confluent_server/confluent/httpapi.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index b2d45f0c..9ac44d5e 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -1226,8 +1226,9 @@ def serve(bind_host, bind_port): bind_host, family=socket.AF_UNIX) os.chmod(bind_host, 0o666) else: + addrinfo = socket.getaddrinfo(bind_host, bind_port)[0] sock = eventlet.listen( - (bind_host, bind_port, 0, 0), family=socket.AF_INET6) + addrinfo[-1], family=addrinfo[0]) except socket.error as e: if e.errno != 98: raise @@ -1251,7 +1252,7 @@ def serve(bind_host, bind_port): class HttpApi(object): def __init__(self, bind_host=None, bind_port=None): self.server = None - self.bind_host = bind_host or '::' + self.bind_host = bind_host or '127.0.0.1' self.bind_port = bind_port or 4005 def start(self): From d0ae7b46aec0d6d6740fb28a84c458a8da238faa Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 May 2025 16:21:47 -0400 Subject: [PATCH 172/413] Disable the HTML API explorer It may be the case that no one uses it. --- confluent_server/confluent/httpapi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 9ac44d5e..97909710 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -1091,6 +1091,8 @@ def resourcehandler_backend(env, start_response): yield e.get_error_body() def _assemble_html(responses, resource, querydict, url, extension): + yield 'HTML API Explorer is discontinued, notify developers if you want this back' + return yield '' \ 'Confluent REST Explorer: ' + url + '' \ '
127: if bootfile.startswith(b'http'): - bootfile = register_shorturl(bootfile.decode('utf8')).encode('utf8') + bootfile = register_shorturl(bootfile.decode('utf8'), can302, bootshorturl, bootfilename).encode('utf8') else: log.log( {'info': 'Boot offer cannot be made to {0} as the ' diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 97909710..db521cc1 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -684,14 +684,26 @@ def resourcehandler_backend(env, start_response): if not request[0]: request = request[1:] if request[1] == 'su': # shorturl - targurl = pxe.shorturls.get(request[2], None) + targurl, can302, relurl, bootfilename = pxe.shorturls.get(request[2], (None, None, None, None)) if not targurl: start_response('404 Not Found', headers) yield '' return - headers.append(('Location', targurl)) - start_response('302 Found', headers) - yield '' + if can302: # Maximum transparency helps iPXE and whatever else know the most + headers.append(('Location', targurl)) + start_response('302 Found', headers) + yield '' + else: # The user agent is too dumb, check headers for server side redirects + delegatemethod = env.get('HTTP_X_DELEGATE_METHOD', None) + if delegatemethod == 'accel': + headers = [('Content-Type', 'application/octet-stream')] + headers.append(('X-Accel-Redirect', relurl)) + start_response('200 OK', headers) + yield '' + else: + start_response('502 Bad Gateway', headers) + yield 'URL shortening for a limited client without proxy advertised accel support' + log.log({'error': f'Profile name exceeded DHCP limits, and reverse proxy capabilities not detected, switch to the nginx configuration or shorten the profile name: {relurl}'}) return if len(request) != 4: start_response('400 Bad Request', headers) From d382bd51822e356a068033c290bc9486b13dfcb1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 May 2025 14:23:38 -0400 Subject: [PATCH 183/413] Provide cleaner recovery for image decode failure When doing the kitty support with pillow installed, recover from a condition, creating a stub 'screenshot' to reflect the error. --- confluent_client/bin/nodeconsole | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 2b8314d8..bbb0d29e 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -243,13 +243,27 @@ def iterm_draw(data, width, height): sys.stdout.write('\a') sys.stdout.flush() +svgared= ';eNrtwgEJAAAAAqD+n64DTVBsUlVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVXVd2vglgY=' + def kitty_draw(data, width, height): if Image: bindata = base64.b64decode(data) binfile = io.BytesIO() binfile.write(bindata) binfile.seek(0) - img = Image.open(binfile) + try: + img = Image.open(binfile) + except Exception as e: + errstr = 'Error rendering image: ' + str(e) + sys.stdout.write(errstr) + cursor_left(len(errstr)) + sys.stdout.write('\x1b_Ga=T,f=24,s=640,v=480,z=-1,o=z') + if height: + sys.stdout.write(f',r={height},c={width}') + sys.stdout.write(svgared) + sys.stdout.write('\x1b\\') + sys.stdout.flush() + return outfile = io.BytesIO() img.save(outfile, format='PNG') data = base64.b64encode(outfile.getbuffer()) From 315d1d9b5ebdaaec0381cafb3cc4bdfcdb7db8f5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 May 2025 15:39:40 -0400 Subject: [PATCH 184/413] Change to common image from text for errors This allows easier management of rendering of text. Also add a black and white border to make tiling more delineated. --- confluent_client/bin/nodeconsole | 55 +++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index bbb0d29e..42790c46 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -38,7 +38,7 @@ import termios import fcntl import confluent.screensqueeze as sq try: - from PIL import Image + from PIL import Image, ImageDraw except ImportError: Image = None @@ -213,6 +213,27 @@ def cursor_hide(): def cursor_show(): sys.stdout.write('\x1b[?25h') +def draw_text(text, width, height): + if Image: + maxfntsize = 256 + nerr = Image.new(mode='RGB', size=(1024,768), color='green') + nd = ImageDraw.Draw(nerr) + for txtpiece in text.split('\n'): + fntsize = 8 + while nd.textlength(txtpiece, font_size=fntsize) < 896: + fntsize += 1 + fntsize -= 1 + if fntsize < maxfntsize: + maxfntsize = fntsize + nd.text((64, 64), text, font_size=maxfntsize) + outfile = io.BytesIO() + nerr.save(outfile, format='PNG') + data = base64.b64encode(outfile.getbuffer()) + draw_image(data, width, height) + else: + sys.stdout.write(text) + cursor_left(len(txt)) + def draw_image(data, width, height): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') if imageformat == 'sixel': @@ -243,7 +264,6 @@ def iterm_draw(data, width, height): sys.stdout.write('\a') sys.stdout.flush() -svgared= ';eNrtwgEJAAAAAqD+n64DTVBsUlVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVXVd2vglgY=' def kitty_draw(data, width, height): if Image: @@ -254,18 +274,14 @@ def kitty_draw(data, width, height): try: img = Image.open(binfile) except Exception as e: - errstr = 'Error rendering image: ' + str(e) - sys.stdout.write(errstr) - cursor_left(len(errstr)) - sys.stdout.write('\x1b_Ga=T,f=24,s=640,v=480,z=-1,o=z') - if height: - sys.stdout.write(f',r={height},c={width}') - sys.stdout.write(svgared) - sys.stdout.write('\x1b\\') - sys.stdout.flush() - return + errstr = 'Error rendering image:\n' + str(e) + return draw_text(errstr, width, height) + nimg = Image.new(mode='RGB', size=(img.width + 4, img.height + 4), color='black') + nd = ImageDraw.Draw(nimg) + nd.rectangle((0, 0, nimg.width - 1, nimg.height -1), outline='white', width=1) + nimg.paste(img, box=(2, 2)) outfile = io.BytesIO() - img.save(outfile, format='PNG') + nimg.save(outfile, format='PNG') data = base64.b64encode(outfile.getbuffer()) preamble = '\x1b_Ga=T,f=100' if height: @@ -328,8 +344,8 @@ def prep_node_tile(node): cursor_right(currcolcell) if currrowcell: cursor_down(currrowcell) - sys.stdout.write(node) - cursor_left(len(node)) + sys.stdout.write('▏' + node) + cursor_left(len(node) + 1) cursor_down() def reset_cursor(node): @@ -415,9 +431,9 @@ def do_screenshot(): firstnodename = node imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) if imgdata: + errorstr = '' if len(imgdata) < 32: # We were subjected to error - sys.stderr.write(f'{node}: Unable to get screenshot\n') - continue + errorstr = f'Unable to get screenshot' imagedatabynode[node] = imgdata if node in nodepositions: prep_node_tile(node) @@ -430,7 +446,10 @@ def do_screenshot(): sticky_cursor() sys.stdout.write('{}: '.format(node)) # one row is used by our own name, so cheight - 1 for that allowance - draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) + if errorstr: + draw_text(errorstr, cwidth, cheight -1 if cheight else cheight) + else: + draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) if node in nodepositions: cursor_restore() reset_cursor(node) From 7cb6b1ac35d7d40abe7aa373f4b7b3853ac8ede0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 May 2025 16:46:18 -0400 Subject: [PATCH 185/413] Fix autoconsole error on SPCR-free EFI implementations --- confluent_osdeploy/common/profile/scripts/autoconsole | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/autoconsole b/confluent_osdeploy/common/profile/scripts/autoconsole index b37e1df6..421c2e8a 100644 --- a/confluent_osdeploy/common/profile/scripts/autoconsole +++ b/confluent_osdeploy/common/profile/scripts/autoconsole @@ -77,6 +77,8 @@ def fixup_ubuntu_grub_serial(): def get_serial_config(): if not os.path.exists('/sys/firmware/efi'): return None + if not os.path.exists('/sys/firmware/acpi/tables/SPCR'): + return None spcr = open("/sys/firmware/acpi/tables/SPCR", "rb") spcr = bytearray(spcr.read()) if spcr[8] != 2 or spcr[36] != 0 or spcr[40] != 1: From 08738713c94ed06b84ec673621e396eb339fbbce Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 May 2025 08:39:45 -0400 Subject: [PATCH 186/413] Handle NLMSG_DONE wherever it may appear in reply Some kernels may bundle the NLMSG_DONE in the last useful system call, unlike the previous norm of sending it as a single message in a terminating system call. --- confluent_server/confluent/neighutil.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/neighutil.py b/confluent_server/confluent/neighutil.py index cd08e8bd..d1713f31 100644 --- a/confluent_server/confluent/neighutil.py +++ b/confluent_server/confluent/neighutil.py @@ -48,14 +48,15 @@ def _update_neigh(): ndmsg= b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' s.sendall(nlhdr + ndmsg) neightable = {} + inprogress = True try: - while True: + while inprogress: pdata = s.recv(65536) v = memoryview(pdata) - if struct.unpack('H', v[4:6])[0] == 3: - break while len(v): length, typ = struct.unpack('IH', v[:6]) + if typ == 3: + inprogress = False if typ == 28: hlen = struct.calcsize('BIHBB') _, idx, state, flags, typ = struct.unpack('BIHBB', v[16:16+hlen]) From ef46b6cabd0627429b313f3874882e0b9e916a56 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 May 2025 10:00:03 -0400 Subject: [PATCH 187/413] Take ownership of image scaling This fixes the imposed border being mangled, and also allows Konsole to present decent looking scaling since it no longer has to scale. --- confluent_client/bin/nodeconsole | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 42790c46..8c920b6a 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -213,19 +213,28 @@ def cursor_hide(): def cursor_show(): sys.stdout.write('\x1b[?25h') +def get_pix_dimensions(width, height): + cheight, cwidth, pixwidth, pixheight = sq.get_screengeom() + imgwidth = int(pixwidth / cwidth * width) + imgheight = int(pixheight / cheight * height) + return imgwidth, imgheight + def draw_text(text, width, height): if Image: maxfntsize = 256 - nerr = Image.new(mode='RGB', size=(1024,768), color='green') + imgwidth, imgheight = get_px_dimensions(width, height) + nerr = Image.new(mode='RGB', size=(imgwidth, imgwidth), color='green') nd = ImageDraw.Draw(nerr) for txtpiece in text.split('\n'): fntsize = 8 - while nd.textlength(txtpiece, font_size=fntsize) < 896: + while nd.textlength(txtpiece, font_size=fntsize) < int(imgwidth * 0.90): fntsize += 1 fntsize -= 1 if fntsize < maxfntsize: maxfntsize = fntsize - nd.text((64, 64), text, font_size=maxfntsize) + hmargin = int(imgwidth * 0.05) + vmargin = int(imgheight * 0.10) + nd.text((hmargin, vmargin), text, font_size=maxfntsize) outfile = io.BytesIO() nerr.save(outfile, format='PNG') data = base64.b64encode(outfile.getbuffer()) @@ -276,6 +285,8 @@ def kitty_draw(data, width, height): except Exception as e: errstr = 'Error rendering image:\n' + str(e) return draw_text(errstr, width, height) + imgwidth, imgheight = get_pix_dimensions(width, height) + img = img.resize((imgwidth - 4, imgheight - 4)) nimg = Image.new(mode='RGB', size=(img.width + 4, img.height + 4), color='black') nd = ImageDraw.Draw(nimg) nd.rectangle((0, 0, nimg.width - 1, nimg.height -1), outline='white', width=1) From 8111a135542c34f28520239de839a9303227dbda Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 May 2025 11:00:44 -0400 Subject: [PATCH 188/413] Reuse image preprocessing for iterm and kitty Both can benefit, and lets iterm handling scale the way we would like to scale it. --- confluent_client/bin/nodeconsole | 47 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 8c920b6a..7e9dba50 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -222,8 +222,8 @@ def get_pix_dimensions(width, height): def draw_text(text, width, height): if Image: maxfntsize = 256 - imgwidth, imgheight = get_px_dimensions(width, height) - nerr = Image.new(mode='RGB', size=(imgwidth, imgwidth), color='green') + imgwidth, imgheight = get_pix_dimensions(width, height) + nerr = Image.new(mode='RGB', size=(imgwidth, imgheight), color='green') nd = ImageDraw.Draw(nerr) for txtpiece in text.split('\n'): fntsize = 8 @@ -235,16 +235,36 @@ def draw_text(text, width, height): hmargin = int(imgwidth * 0.05) vmargin = int(imgheight * 0.10) nd.text((hmargin, vmargin), text, font_size=maxfntsize) + nd.rectangle((0, 0, nerr.width - 1, nerr.height -1), outline='white', width=1) outfile = io.BytesIO() nerr.save(outfile, format='PNG') data = base64.b64encode(outfile.getbuffer()) - draw_image(data, width, height) + draw_image(data, width, height, doscale=False) else: sys.stdout.write(text) cursor_left(len(txt)) -def draw_image(data, width, height): +def draw_image(data, width, height, doscale=True): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') + if doscale and Image and width: + bindata = base64.b64decode(data) + binfile = io.BytesIO() + binfile.write(bindata) + binfile.seek(0) + try: + img = Image.open(binfile) + except Exception as e: + errstr = 'Error rendering image:\n' + str(e) + return draw_text(errstr, width, height) + imgwidth, imgheight = get_pix_dimensions(width, height) + img = img.resize((imgwidth - 4, imgheight - 4)) + nimg = Image.new(mode='RGB', size=(img.width + 4, img.height + 4), color='black') + nd = ImageDraw.Draw(nimg) + nd.rectangle((0, 0, nimg.width - 1, nimg.height -1), outline='white', width=1) + nimg.paste(img, box=(2, 2)) + outfile = io.BytesIO() + nimg.save(outfile, format='PNG') + data = base64.b64encode(outfile.getbuffer()) if imageformat == 'sixel': sixel_draw(data) elif imageformat == 'iterm': @@ -275,25 +295,6 @@ def iterm_draw(data, width, height): def kitty_draw(data, width, height): - if Image: - bindata = base64.b64decode(data) - binfile = io.BytesIO() - binfile.write(bindata) - binfile.seek(0) - try: - img = Image.open(binfile) - except Exception as e: - errstr = 'Error rendering image:\n' + str(e) - return draw_text(errstr, width, height) - imgwidth, imgheight = get_pix_dimensions(width, height) - img = img.resize((imgwidth - 4, imgheight - 4)) - nimg = Image.new(mode='RGB', size=(img.width + 4, img.height + 4), color='black') - nd = ImageDraw.Draw(nimg) - nd.rectangle((0, 0, nimg.width - 1, nimg.height -1), outline='white', width=1) - nimg.paste(img, box=(2, 2)) - outfile = io.BytesIO() - nimg.save(outfile, format='PNG') - data = base64.b64encode(outfile.getbuffer()) preamble = '\x1b_Ga=T,f=100' if height: preamble += f',r={height},c={width}' From ee53ee47c16c0e661aba2a43d766df540df7bae4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 May 2025 16:30:12 -0400 Subject: [PATCH 189/413] Fix loss of uppercase during reverse noderange Reverse noderang was changing names to lower case, fix to preserve case. --- confluent_server/confluent/noderange.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 7657292c..53cebf20 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -57,7 +57,7 @@ def humanify_nodename(nodename): def unnumber_nodename(nodename): # stub out numbers - chunked = ["{}" if text.isdigit() else text.lower() + chunked = ["{}" if text.isdigit() else text for text in re.split(numregex, nodename)] return chunked From 11939c4d5712ed05a5d4e62e337120df8f6da6be Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 16 May 2025 09:44:57 -0400 Subject: [PATCH 190/413] Preserve aspect ratio if Pillow available Since we are controlling the scaling, we can manage things more precisely and get the aspect ratio right. --- confluent_client/bin/nodeconsole | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 7e9dba50..36f21e21 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -257,10 +257,21 @@ def draw_image(data, width, height, doscale=True): errstr = 'Error rendering image:\n' + str(e) return draw_text(errstr, width, height) imgwidth, imgheight = get_pix_dimensions(width, height) - img = img.resize((imgwidth - 4, imgheight - 4)) - nimg = Image.new(mode='RGB', size=(img.width + 4, img.height + 4), color='black') + nimg = Image.new(mode='RGBA', size=(imgwidth, imgheight)) + imgwidth -= 4 + imgheight -= 4 + hscalefact = imgwidth / img.width + vscalefact = imgheight / img.height + if hscalefact < vscalefact: + rzwidth = imgwidth + rzheight = int(img.height * hscalefact) + else: + rzwidth = int(img.width * vscalefact) + rzheight = imgheight + img = img.resize((rzwidth, rzheight)) nd = ImageDraw.Draw(nimg) - nd.rectangle((0, 0, nimg.width - 1, nimg.height -1), outline='white', width=1) + nd.rectangle((1, 1, rzwidth + 2, rzheight + 2), outline='black', width=1) + nd.rectangle((0, 0, rzwidth + 3, rzheight + 3), outline='white', width=1) nimg.paste(img, box=(2, 2)) outfile = io.BytesIO() nimg.save(outfile, format='PNG') From 6bebae1d0be176e76a8664717f84219321c5ab3f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 20 May 2025 10:39:57 -0400 Subject: [PATCH 191/413] Switch several node commands to epoll Improve ability to handle high fanout amounts by using epoll instead of select. --- confluent_client/bin/nodeapply | 31 ++++++++++++++++++----------- confluent_client/bin/nodeping | 36 ++++++++++++++++++++++------------ confluent_client/bin/noderun | 32 ++++++++++++++++++------------ confluent_client/bin/nodeshell | 31 ++++++++++++++++++----------- 4 files changed, 84 insertions(+), 46 deletions(-) diff --git a/confluent_client/bin/nodeapply b/confluent_client/bin/nodeapply index 2e798742..bf4b9a53 100755 --- a/confluent_client/bin/nodeapply +++ b/confluent_client/bin/nodeapply @@ -68,6 +68,7 @@ def run(): currprocs = 0 all = set([]) + poller = select.epoll() pipedesc = {} pendingexecs = deque() exitcode = 0 @@ -102,19 +103,23 @@ def run(): cmdv = ['ssh', sshnode] + cmdvbase + cmdstorun[0] if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(sshnode, cmdv, all, pipedesc) + run_cmdv(sshnode, cmdv, all, poller, pipedesc) else: pendingexecs.append((sshnode, cmdv)) if not all or exitcode: sys.exit(exitcode) - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) while all: pernodeout = {} for r in rdy: + r = r[0] desc = pipedesc[r] + r = desc['file'] node = desc['node'] data = True - while data and select.select([r], [], [], 0)[0]: + singlepoller = select.epoll() + singlepoller.register(r, select.EPOLLIN) + while data and singlepoller.poll(0): data = r.readline() if data: if desc['type'] == 'stdout': @@ -131,15 +136,17 @@ def run(): if ret is not None: exitcode = exitcode | ret all.discard(r) + poller.unregister(r) r.close() if desc['type'] == 'stdout': if idxbynode[node] < len(cmdstorun): cmdv = ['ssh', sshnode] + cmdvbase + cmdstorun[idxbynode[node]] idxbynode[node] += 1 - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) elif pendingexecs: node, cmdv = pendingexecs.popleft() - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller. pipedesc) + singlepoller.close() for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: line = client.stringify(line) @@ -147,19 +154,21 @@ def run(): sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) sys.exit(exitcode) -def run_cmdv(node, cmdv, all, pipedesc): +def run_cmdv(node, cmdv, all, poller, pipedesc): nopen = subprocess.Popen( cmdv, stdin=devnull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, - 'type': 'stdout'} - pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, - 'type': 'stderr'} + pipedesc[nopen.stdout.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stdout', 'file': nopen.stdout} + pipedesc[nopen.stderr.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stderr', 'file': nopen.stderr} all.add(nopen.stdout) + poller.register(nopen.stdout, select.EPOLLIN) all.add(nopen.stderr) + poller.register(nopen.stderr, select.EPOLLIN) if __name__ == '__main__': diff --git a/confluent_client/bin/nodeping b/confluent_client/bin/nodeping index 1140a6bd..25d60c81 100755 --- a/confluent_client/bin/nodeping +++ b/confluent_client/bin/nodeping @@ -58,6 +58,7 @@ def run(): currprocs = 0 all = set([]) + poller = select.epoll() pipedesc = {} pendingexecs = deque() exitcode = 0 @@ -86,21 +87,28 @@ def run(): if currprocs < concurrentprocs: currprocs += 1 if options.origname: - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) else: - run_cmdv(pingnode, cmdv, all, pipedesc) + run_cmdv(pingnode, cmdv, all, poller, pipedesc) else: - pendingexecs.append((pingnode, cmdv)) + if options.origname: + pendingexecs.append((node, cmdv)) + else: + pendingexecs.append((pingnode, cmdv)) if not all or exitcode: sys.exit(exitcode) - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) while all: pernodeout = {} for r in rdy: + r = r[0] desc = pipedesc[r] + r = desc['file'] node = desc['node'] data = True - while data and select.select([r], [], [], 0)[0]: + singlepoller = select.epoll() + singlepoller.register(r, select.EPOLLIN) + while data and singlepoller.poll(0): data = r.readline() if not data: pop = desc['popen'] @@ -108,6 +116,7 @@ def run(): if ret is not None: exitcode = exitcode | ret all.discard(r) + poller.unregister(r) r.close() if desc['type'] == 'stdout': if ret: @@ -116,7 +125,8 @@ def run(): print('{0}: ping'.format(node)) if pendingexecs: node, cmdv = pendingexecs.popleft() - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) + singlepoller.close() for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: line = client.stringify(line) @@ -126,19 +136,21 @@ def run(): sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) sys.exit(exitcode) -def run_cmdv(node, cmdv, all, pipedesc): +def run_cmdv(node, cmdv, all, poller, pipedesc): nopen = subprocess.Popen( cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, - 'type': 'stdout'} - pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, - 'type': 'stderr'} + pipedesc[nopen.stdout.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stdout', 'file': nopen.stdout} + pipedesc[nopen.stderr.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stderr', 'file': nopen.stderr} all.add(nopen.stdout) + poller.register(nopen.stdout, select.EPOLLIN) all.add(nopen.stderr) + poller.register(nopen.stderr, select.EPOLLIN) if __name__ == '__main__': diff --git a/confluent_client/bin/noderun b/confluent_client/bin/noderun index df0d4a77..c3367394 100755 --- a/confluent_client/bin/noderun +++ b/confluent_client/bin/noderun @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2016-2017 Lenovo @@ -67,6 +67,7 @@ def run(): currprocs = 0 all = set([]) + poller = select.epoll() pipedesc = {} pendingexecs = deque() exitcode = 0 @@ -84,19 +85,23 @@ def run(): cmdv = shlex.split(cmd) if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) else: pendingexecs.append((node, cmdv)) if not all or exitcode: sys.exit(exitcode) - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) while all: pernodeout = {} for r in rdy: + r = r[0] desc = pipedesc[r] + r = desc['file'] node = desc['node'] data = True - while data and select.select([r], [], [], 0)[0]: + singlepoller = select.epoll() + singlepoller.register(r, select.EPOLLIN) + while data and singlepoller.poll(0): data = r.readline() if data: if desc['type'] == 'stdout': @@ -116,10 +121,12 @@ def run(): if ret is not None: exitcode = exitcode | ret all.discard(r) + poller.unregister(r) r.close() if desc['type'] == 'stdout' and pendingexecs: node, cmdv = pendingexecs.popleft() - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) + singlepoller.close() for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: line = client.stringify(line) @@ -129,11 +136,11 @@ def run(): sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) sys.exit(exitcode) -def run_cmdv(node, cmdv, all, pipedesc): +def run_cmdv(node, cmdv, all, poller, pipedesc): try: nopen = subprocess.Popen( cmdv, stdin=devnull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -142,13 +149,14 @@ def run_cmdv(node, cmdv, all, pipedesc): sys.stderr.write('{0}: Unable to find local executable file "{1}"'.format(node, cmdv[0])) return raise - pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, - 'type': 'stdout'} - pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, - 'type': 'stderr'} + pipedesc[nopen.stdout.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stdout', 'file': nopen.stdout} + pipedesc[nopen.stderr.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stderr', 'file': nopen.stderr} all.add(nopen.stdout) + poller.register(nopen.stdout, select.EPOLLIN) all.add(nopen.stderr) - + poller.register(nopen.stderr, select.EPOLLIN) if __name__ == '__main__': run() diff --git a/confluent_client/bin/nodeshell b/confluent_client/bin/nodeshell index f22c1993..90ab89d9 100755 --- a/confluent_client/bin/nodeshell +++ b/confluent_client/bin/nodeshell @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2016-2017 Lenovo @@ -109,6 +109,7 @@ def run(): ex = exp.get('databynode', ()) for node in ex: cmdparms.append((node, ex[node]['value'])) + poller = select.epoll() for node, cmd in cmdparms: sshnode = nodemap.get(node, node) if not isinstance(cmd, str) and not isinstance(cmd, bytes): @@ -121,19 +122,23 @@ def run(): cmdv += [sshnode, cmd] if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) else: pendingexecs.append((node, cmdv)) if not all or exitcode: sys.exit(exitcode) - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) while all: pernodeout = {} for r in rdy: + r = r[0] desc = pipedesc[r] + r = desc['file'] node = desc['node'] data = True - while data and select.select([r], [], [], 0)[0]: + singlepoller = select.epoll() + singlepoller.register(r, select.EPOLLIN) + while data and singlepoller.poll(0): data = r.readline() if data: if desc['type'] == 'stdout': @@ -153,10 +158,12 @@ def run(): if ret is not None: exitcode = exitcode | ret all.discard(r) + poller.unregister(r) r.close() if desc['type'] == 'stdout' and pendingexecs: node, cmdv = pendingexecs.popleft() - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(node, cmdv, all, poller, pipedesc) + singlepoller.close() for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: line = client.stringify(line) @@ -167,19 +174,21 @@ def run(): sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: - rdy, _, _ = select.select(all, [], [], 10) + rdy = poller.poll(10) sys.exit(exitcode) -def run_cmdv(node, cmdv, all, pipedesc): +def run_cmdv(node, cmdv, all, poller, pipedesc): nopen = subprocess.Popen( cmdv, stdin=devnull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, - 'type': 'stdout'} - pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, - 'type': 'stderr'} + pipedesc[nopen.stdout.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stdout', 'file': nopen.stdout} + pipedesc[nopen.stderr.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stderr', 'file': nopen.stderr} all.add(nopen.stdout) all.add(nopen.stderr) + poller.register(nopen.stdout, select.EPOLLIN) + poller.register(nopen.stderr, select.EPOLLIN) if __name__ == '__main__': From 8722e6658353e37a8005277c2d2f00cb323135da Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 21 May 2025 10:00:47 -0400 Subject: [PATCH 192/413] Try a different dev build versioning Python 3.12 refuses to use the current versioning --- confluent_client/confluent_client.spec.tmpl | 2 +- confluent_server/confluent_server.spec.tmpl | 2 +- confluent_server/makesetup | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_client/confluent_client.spec.tmpl b/confluent_client/confluent_client.spec.tmpl index ee786175..ca2d0b92 100644 --- a/confluent_client/confluent_client.spec.tmpl +++ b/confluent_client/confluent_client.spec.tmpl @@ -1,7 +1,7 @@ %define name confluent_client %define version #VERSION# %define fversion %{lua: -sv, _ = string.gsub("#VERSION#", "[~+]", "-") +sv, _ = string.gsub("#VERSION#", "[~]", "-") print(sv) } %define release 1 diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 1fb62d71..83f3452b 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -1,7 +1,7 @@ %define name confluent_server %define version #VERSION# %define fversion %{lua: -sv, _ = string.gsub("#VERSION#", "[~+]", "-") +sv, _ = string.gsub("#VERSION#", "[~]", "-") print(sv) } %define release 1 diff --git a/confluent_server/makesetup b/confluent_server/makesetup index a34438d3..33b08654 100755 --- a/confluent_server/makesetup +++ b/confluent_server/makesetup @@ -6,7 +6,7 @@ if [ "$NUMCOMMITS" != "$VERSION" ]; then LASTNUM=$((LASTNUM+1)) FIRSTPART=$(echo $VERSION|rev|cut -d . -f 2- |rev) VERSION=${FIRSTPART}.${LASTNUM} - VERSION=$VERSION~dev$NUMCOMMITS+`git describe|cut -d- -f 3` + VERSION=$VERSION.dev$NUMCOMMITS+`git describe|cut -d- -f 3` fi echo $VERSION > VERSION sed -e "s/#VERSION#/$VERSION/" setup.py.tmpl > setup.py From 1a679ab6eb74b16d89697f6a8eafa664bd3ce38b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 May 2025 13:45:26 -0400 Subject: [PATCH 193/413] Improvements to nodediscover For one, understand 'ip' to potentially mean 'bmc' for list to assign convenience. Parallelize handling of csv importing to improve performance. Only call rescan once per bulk assign --- confluent_client/bin/nodediscover | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/confluent_client/bin/nodediscover b/confluent_client/bin/nodediscover index da8b0b21..fc5889b4 100755 --- a/confluent_client/bin/nodediscover +++ b/confluent_client/bin/nodediscover @@ -123,7 +123,7 @@ def process_header(header): fields.append('serial') elif datum == 'uuid': fields.append('uuid') - elif datum in ('bmc', 'imm', 'xcc'): + elif datum in ('bmc', 'imm', 'xcc', 'ip'): fields.append('hardwaremanagement.manager') elif datum in ('bmc gateway', 'xcc gateway', 'imm gateway'): fields.append('net.bmc.ipv4_gateway') @@ -191,6 +191,7 @@ def import_csv(options, session): if field in unique_fields: unique_data[field] = set([]) broken = False + alldata=[] for record in records: currfields = list(fields) nodedatum = {} @@ -207,9 +208,15 @@ def import_csv(options, session): nodedatum[currfield] = datum if not datum_complete(nodedatum): sys.exit(1) + alldata.append(nodedatum) + allthere = True + for nodedatum in alldata: if not search_record(nodedatum, options, session) and not broken: + allthere = False blocking_scan(session) - if not search_record(nodedatum, options, session): + break + for nodedatum in alldata: + if not allthere and not search_record(nodedatum, options, session): sys.stderr.write( "Could not match the following data: " + repr(nodedatum) + '\n') @@ -230,8 +237,12 @@ def import_csv(options, session): print('Defined ' + res['created']) else: print(repr(res)) + child = os.fork() + if child: + continue for mac in maclist: - for res in session.update('/discovery/by-mac/{0}'.format(mac), + mysess = client.Command() + for res in mysess.update('/discovery/by-mac/{0}'.format(mac), {'node': nodename}): if 'error' in res: sys.stderr.write(res['error'] + '\n') @@ -241,6 +252,12 @@ def import_csv(options, session): print('Discovered ' + res['assigned']) else: print(repr(res)) + sys.exit(0) + while True: + try: + os.wait() + except ChildProcessError: + break if exitcode: sys.exit(exitcode) From 94dc266cd478438330d1207939ccd763a98bde75 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 May 2025 13:57:16 -0400 Subject: [PATCH 194/413] Add neighbor overflow check to confluent_selfcheck A common issue in larger layer 2 configurations is for the neighbor table to be undersized for the number of nodes. Detect this manifesting and present a message. --- confluent_server/bin/confluent_selfcheck | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 74b50c91..f3ad8605 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -27,6 +27,15 @@ import signal import confluent.collective.manager as collective import confluent.noderange as noderange +def check_neigh_overflow(): + dmesgout = subprocess.check_output(['dmesg']) + if b'_cache: neighbor table overflow!' in subprocess.check_output(['dmesg']): + return True + return False + #dmesg snippets + #[1203637.865870] neighbour: ndisc_cache: neighbor table overflow! + #[1205244.122606] neighbour: arp_cache: neighbor table overflow! + def fprint(txt): sys.stdout.write(txt) @@ -202,6 +211,11 @@ if __name__ == '__main__': emprint('Failed access, if selinux is enabled, `setsebool -P httpd_can_network_connect=1`, otherwise check web proxy configuration') else: emprint('Not Running (Example resolution: systemctl enable httpd --now)') + fprint('IP neighbor table issue check:') + if check_neigh_overflow(): + emprint('ARP/Neighbor table problem detected, evaluate and increase net.ipv*.neigh.default.gc_thresh*') + else: + print('OK') fprint('TFTP Status: ') if tftp_works(): print('OK') From 19c4dc71dbea22b81cb292d761986b4407f6bafd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 May 2025 14:30:58 -0400 Subject: [PATCH 195/413] Add recognition of RHEL10 media --- confluent_server/confluent/osimage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index ed2a3a41..ca1d787b 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -622,6 +622,9 @@ def check_rhel(isoinfo): ver = entry.split('-')[2] arch = entry.split('.')[-2] break + elif 'redhat-release-10' in entry: + ver = entry.split('-')[2] + arch = entry.split('.')[-2] else: if '.discinfo' in isoinfo[1]: prodinfo = isoinfo[1]['.discinfo'] From 40f3ca73c4c72d78aaf20a1dfae3edc7ce3830fc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 May 2025 16:59:16 -0400 Subject: [PATCH 196/413] Fix mistake in RHEL10 recognition commit --- confluent_server/confluent/osimage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index ca1d787b..39cb3810 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -625,6 +625,7 @@ def check_rhel(isoinfo): elif 'redhat-release-10' in entry: ver = entry.split('-')[2] arch = entry.split('.')[-2] + break else: if '.discinfo' in isoinfo[1]: prodinfo = isoinfo[1]['.discinfo'] From 250de6133ddbd75dfa933b01038d96b8d24a5cb5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 27 May 2025 15:46:29 -0400 Subject: [PATCH 197/413] Stop trying to save a fingerprint that didn't exist --- misc/prepfish.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/prepfish.py b/misc/prepfish.py index 921108f1..aee229a0 100644 --- a/misc/prepfish.py +++ b/misc/prepfish.py @@ -240,8 +240,8 @@ def get_redfish_fingerprint(): rsp = s.raw_command(0x2c, 1, data=(0x52, 1)) if rsp['data'][:2] == b'\x52\x01': fprint = rsp['data'][2:] - with open('/run/redfish/fingerprint', 'wb') as printout: - printout.write(fprint) + with open('/run/redfish/fingerprint', 'wb') as printout: + printout.write(fprint) return fprint From 4fd7021581b09380546e01b43dcb398f55597f39 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 28 May 2025 11:15:22 -0400 Subject: [PATCH 198/413] Add EL10 distributions --- confluent_server/confluent/osimage.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 39cb3810..f3d174c1 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -301,6 +301,11 @@ def check_rocky(isoinfo): arch = entry.split('.')[-2] cat = 'el9' break + if 'rocky-release-10' in entry: + ver = entry.split('-')[2] + arch = entry.split('.')[-2] + cat = 'el10' + break else: return None if arch == 'noarch' and '.discinfo' in isoinfo[1]: @@ -349,6 +354,11 @@ def check_alma(isoinfo): arch = entry.split('.')[-2] cat = 'el9' break + elif 'almalinux-release-10' in entry: + ver = entry.split('-')[2] + arch = entry.split('.')[-2] + cat = 'el10' + break elif 'almalinux-kitten-release-10' in entry: ver = entry.split('-')[3] arch = entry.split('.')[-2] From 8d8db070ebdd8c1ab8160c8c834951009a1aa5cc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 30 May 2025 15:19:42 -0400 Subject: [PATCH 199/413] Fix mistake in ctypes call in userutil --- confluent_server/confluent/userutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/userutil.py b/confluent_server/confluent/userutil.py index 6d7019fc..05fc5a3e 100644 --- a/confluent_server/confluent/userutil.py +++ b/confluent_server/confluent/userutil.py @@ -4,7 +4,7 @@ import confluent.util as util import grp import pwd import os -libc = cdll.LoadLibrary(find_library('libc')) +libc = cdll.LoadLibrary(find_library('c')) _getgrouplist = libc.getgrouplist _getgrouplist.restype = c_int32 From a1a144d211eff058bdfd8f580baa5b400c4b550f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 30 May 2025 15:48:15 -0400 Subject: [PATCH 200/413] Implement plugin managed VNC To extend beyond the OpenBmc wrapped dialect of VNC, provide mechanism for plugins to provide arbitrary cookie, password, url, and protocols parameters. Implement for ProxMox. --- .../plugins/hardwaremanagement/proxmox.py | 80 ++++++-- confluent_server/confluent/vinzmanager.py | 180 ++++++++++++------ 2 files changed, 191 insertions(+), 69 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index bbe4ae9c..af5ff149 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -1,4 +1,5 @@ +import confluent.vinzmanager as vinzmanager import codecs import confluent.util as util import confluent.messages as msg @@ -29,6 +30,42 @@ class RetainedIO(io.BytesIO): self.resultbuffer = self.getbuffer() super().close() +class KvmConnection: + def __init__(self, consdata): + #self.ws = WrappedWebSocket(host=bmc) + #self.ws.set_verify_callback(kv) + ticket = consdata['ticket'] + user = consdata['user'] + port = consdata['port'] + urlticket = urlparse.quote(ticket) + host = consdata['host'] + guest = consdata['guest'] + pac = consdata['pac'] # fortunately, we terminate this on our end, but it does kind of reduce the value of the + # 'ticket' approach, as the general cookie must be provided as cookie along with the VNC ticket + hosturl = host + if ':' in hosturl: + hosturl = '[' + hosturl + ']' + self.url = f'/api2/json/nodes/{host}/{guest}/vncwebsocket?port={port}&vncticket={urlticket}' + self.fprint = consdata['fprint'] + self.cookies = { + 'PVEAuthCookie': pac, + } + self.protos = ['binary'] + self.host = host + self.portnum = 8006 + self.password = consdata['ticket'] + + +class KvmConnHandler: + def __init__(self, pmxclient, node): + self.pmxclient = pmxclient + self.node = node + + def connect(self): + consdata = self.pmxclient.get_vm_ikvm(self.node) + consdata['fprint'] = self.pmxclient.fprint + return KvmConnection(consdata) + class WrappedWebSocket(wso): def set_verify_callback(self, callback): @@ -175,6 +212,7 @@ class PmxApiClient: pass self.server = server self.wc = webclient.SecureHTTPConnection(server, port=8006, verifycallback=cv) + self.fprint = configmanager.get_node_attributes(server, 'pubkeys.tls').get(server, {}).get('pubkeys.tls', {}).get('value', None) self.vmmap = {} self.login() self.vmlist = {} @@ -243,11 +281,15 @@ class PmxApiClient: yield msg.KeyValueData({'inventory': invitems}, vm) + def get_vm_ikvm(self, vm): + return self.get_vm_consproxy(vm, 'vnc') + def get_vm_serial(self, vm): - # This would be termproxy - # Example url + return self.get_vm_consproxy(vm, 'term') + + def get_vm_consproxy(self, vm, constype): host, guest = self.get_vm(vm) - rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/termproxy', method='POST') + rsp = self.wc.grab_json_response_with_status(f'/api2/json/nodes/{host}/{guest}/{constype}proxy', method='POST') consdata = rsp[0]['data'] consdata['server'] = self.server consdata['host'] = host @@ -372,20 +414,12 @@ def retrieve(nodes, element, configmanager, inputdata): for rsp in currclient.get_vm_inventory(node): yield rsp elif element == ['console', 'ikvm_methods']: - dsc = {'ikvm_methods': ['screenshot']} + dsc = {'ikvm_methods': ['vnc']} yield msg.KeyValueData(dsc, node) elif element == ['console', 'ikvm_screenshot']: # good background for the webui, and kitty - imgdata = RetainedIO() - imgformat = currclient.get_screenshot(node, imgdata) - imgdata = imgdata.getvalue() - if imgdata: - yield msg.ScreenShot(imgdata, node, imgformat=imgformat) - - - - - + yield msg.ConfluentNodeError(node, "vnc available, screenshot not available") + return def update(nodes, element, configmanager, inputdata): clientsbynode = prep_proxmox_clients(nodes, configmanager) @@ -397,11 +431,29 @@ def update(nodes, element, configmanager, inputdata): elif element == ['boot', 'nextdevice']: currclient.set_vm_bootdev(node, inputdata.bootdevice(node)) yield msg.BootDevice(node, currclient.get_vm_bootdev(node)) + elif element == ['console', 'ikvm']: + try: + currclient = clientsbynode[node] + url = vinzmanager.get_url(node, inputdata, nodeparmcallback=KvmConnHandler(currclient, node).connect) + except Exception as e: + print(repr(e)) + return + yield msg.ChildCollection(url) + return # assume this is only console for now def create(nodes, element, configmanager, inputdata): clientsbynode = prep_proxmox_clients(nodes, configmanager) for node in nodes: + if element == ['console', 'ikvm']: + try: + currclient = clientsbynode[node] + url = vinzmanager.get_url(node, inputdata, nodeparmcallback=KvmConnHandler(currclient, node).connect) + except Exception as e: + print(repr(e)) + return + yield msg.ChildCollection(url) + return serialdata = clientsbynode[node].get_vm_serial(node) return PmxConsole(serialdata, node, configmanager, clientsbynode[node]) diff --git a/confluent_server/confluent/vinzmanager.py b/confluent_server/confluent/vinzmanager.py index f9511676..8462ac6e 100644 --- a/confluent_server/confluent/vinzmanager.py +++ b/confluent_server/confluent/vinzmanager.py @@ -47,7 +47,9 @@ def assure_vinz(): startingup = False _unix_by_nodename = {} -def get_url(nodename, inputdata): +_nodeparms = {} +def get_url(nodename, inputdata, nodeparmcallback=None): + _nodeparms[nodename] = nodeparmcallback method = inputdata.inputbynode[nodename] assure_vinz() if method == 'wss': @@ -89,56 +91,120 @@ def close_session(sessionid): 'X-XSRF-TOKEN': wc.cookies['XSRF-TOKEN']}) -def send_grant(conn, nodename): - cfg = configmanager.ConfigManager(None) - c = cfg.get_node_attributes( - nodename, - ['secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassword', - 'hardwaremanagement.manager'], decrypt=True) - bmcuser = c.get(nodename, {}).get( - 'secret.hardwaremanagementuser', {}).get('value', None) - bmcpass = c.get(nodename, {}).get( - 'secret.hardwaremanagementpassword', {}).get('value', None) - bmc = c.get(nodename, {}).get( - 'hardwaremanagement.manager', {}).get('value', None) - if bmcuser and bmcpass and bmc: - kv = util.TLSCertVerifier(cfg, nodename, - 'pubkeys.tls_hardwaremanager').verify_cert - wc = webclient.SecureHTTPConnection(bmc, 443, verifycallback=kv) - if not isinstance(bmcuser, str): - bmcuser = bmcuser.decode() - if not isinstance(bmcpass, str): - bmcpass = bmcpass.decode() - rsp = wc.grab_json_response_with_status( - '/login', {'data': [bmcuser, bmcpass]}, - headers={'Content-Type': 'application/json', - 'Accept': 'application/json'}) - sessionid = wc.cookies['SESSION'] - sessiontok = wc.cookies['XSRF-TOKEN'] +def send_grant(conn, nodename, rqtype): + parmcallback = _nodeparms.get(nodename, None) + cookies = {} + protos = [] + passwd = None + sessionid = os.urandom(8).hex() + while sessionid in _usersessions: + sessionid = os.urandom(8).hex() + if parmcallback: # plugin that handles the specifics of the vnc wrapping + if rqtype == 1: + raise Exception("Plugin managed login data not supported with legacy grant request") + cxnmgr = parmcallback() _usersessions[sessionid] = { - 'webclient': wc, + 'cxnmgr': cxnmgr, 'nodename': nodename, } - url = '/kvm/0' - fprintinfo = cfg.get_node_attributes(nodename, 'pubkeys.tls_hardwaremanager') - fprint = fprintinfo.get( - nodename, {}).get('pubkeys.tls_hardwaremanager', {}).get('value', None) - if not fprint: - return + url = cxnmgr.url + fprint = cxnmgr.fprint + cookies = cxnmgr.cookies + protos = cxnmgr.protos + host = cxnmgr.host + portnum = cxnmgr.portnum + passwd = cxnmgr.password + #url, fprint, cookies, protos = parmcallback(nodename) + else: + # original openbmc dialect + portnum = 443 + cfg = configmanager.ConfigManager(None) + c = cfg.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword', + 'hardwaremanagement.manager'], decrypt=True) + bmcuser = c.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', None) + bmcpass = c.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', None) + host = c.get(nodename, {}).get( + 'hardwaremanagement.manager', {}).get('value', None) + if bmcuser and bmcpass and host: + kv = util.TLSCertVerifier(cfg, nodename, + 'pubkeys.tls_hardwaremanager').verify_cert + wc = webclient.SecureHTTPConnection(host, 443, verifycallback=kv) + if not isinstance(bmcuser, str): + bmcuser = bmcuser.decode() + if not isinstance(bmcpass, str): + bmcpass = bmcpass.decode() + rsp = wc.grab_json_response_with_status( + '/login', {'data': [bmcuser, bmcpass]}, + headers={'Content-Type': 'application/json', + 'Accept': 'application/json'}) + cookies['SESSION'] = wc.cookies['SESSION'] + cookies['XSRF-TOKEN'] = wc.cookies['XSRF-TOKEN'] + if rqtype == 1: + # unfortunately, the original protocol failed to + # provide a means for separate tracking bmc side + # and confluent side + # chances are pretty good still + sessionid = wc.cookies['SESSION'] + sessiontok = wc.cookies['XSRF-TOKEN'] + protos.append(sessiontok) + _usersessions[sessionid] = { + 'webclient': wc, + 'nodename': nodename, + } + url = '/kvm/0' + fprintinfo = cfg.get_node_attributes(nodename, 'pubkeys.tls_hardwaremanager') + fprint = fprintinfo.get( + nodename, {}).get('pubkeys.tls_hardwaremanager', {}).get('value', None) + if not fprint: + return + if '$' in fprint: fprint = fprint.split('$', 1)[1] - fprint = bytes.fromhex(fprint) - conn.send(struct.pack('!BI', 1, len(bmc))) - conn.send(bmc.encode()) - conn.send(struct.pack('!I', len(sessionid))) - conn.send(sessionid.encode()) + fprint = bytes.fromhex(fprint) + conn.send(struct.pack('!BI', rqtype, len(host))) + conn.send(host.encode()) + conn.send(struct.pack('!I', len(sessionid))) + conn.send(sessionid.encode()) + if rqtype == 1: conn.send(struct.pack('!I', len(sessiontok))) conn.send(sessiontok.encode()) conn.send(struct.pack('!I', len(fprint))) conn.send(fprint) conn.send(struct.pack('!I', len(url))) conn.send(url.encode()) - conn.send(b'\xff') + else: # newer TLV style protocol + conn.send(struct.pack('!H', portnum)) + conn.send(struct.pack('!BI', 4, len(url))) + conn.send(url.encode()) + for cook in cookies: + v = cookies[cook] + totlen = len(cook) + len(v) + 4 + conn.send(struct.pack('!BIH', 1, totlen, len(cook.encode()))) + conn.send(cook.encode()) + conn.send(struct.pack('!H', len(v.encode()))) + conn.send(v.encode()) + for proto in protos: + conn.send(struct.pack('!BI', 2, len(proto.encode()))) + conn.send(proto.encode()) + conn.send(struct.pack('!BI', 3, len(fprint))) + conn.send(fprint) + if passwd: + conn.send(struct.pack('!BI', 5, len(passwd.encode()[:8]))) + conn.send(passwd.encode()[:8]) + conn.send(b'\xff') + +def recv_exact(conn, n): + retdata = b'' + while len(retdata) < n: + currdata = conn.recv(n - len(retdata)) + if not currdata: + raise Exception("Error receiving") + retdata += currdata + return retdata def evaluate_request(conn): allow = False @@ -149,33 +215,37 @@ def evaluate_request(conn): pid, uid, gid = struct.unpack('iII', creds) if uid != os.getuid(): return - rqcode, fieldlen = struct.unpack('!BI', conn.recv(5)) - authtoken = conn.recv(fieldlen).decode() + rqcode, fieldlen = struct.unpack('!BI', recv_exact(conn, 5)) + authtoken = recv_exact(conn, fieldlen).decode() if authtoken != _vinztoken: return if rqcode == 2: # disconnect notification - fieldlen = struct.unpack('!I', conn.recv(4))[0] - sessionid = conn.recv(fieldlen).decode() + fieldlen = struct.unpack('!I', recv_exact(conn, 4))[0] + sessionid = recv_exact(conn, fieldlen).decode() close_session(sessionid) conn.recv(1) # digest 0xff - if rqcode == 1: # request for new connection - fieldlen = struct.unpack('!I', conn.recv(4))[0] - nodename = conn.recv(fieldlen).decode() + # if rqcode == 3: # new form connection request + # this will generalize things, to allow describing + # arbitrary cookies and subprotocols + # for the websocket connection + if rqcode in (1, 3): # request for new connection + fieldlen = struct.unpack('!I', recv_exact(conn, 4))[0] + nodename = recv_exact(conn, fieldlen).decode() idtype = struct.unpack('!B', conn.recv(1))[0] if idtype == 1: - usernum = struct.unpack('!I', conn.recv(4))[0] + usernum = struct.unpack('!I', recv_exact(conn, 4))[0] if usernum == 0: # root is a special guy - send_grant(conn, nodename) + send_grant(conn, nodename, rqcode) return try: authname = pwd.getpwuid(usernum).pw_name except Exception: return elif idtype == 2: - fieldlen = struct.unpack('!I', conn.recv(4))[0] - sessionid = conn.recv(fieldlen) - fieldlen = struct.unpack('!I', conn.recv(4))[0] - sessiontok = conn.recv(fieldlen) + fieldlen = struct.unpack('!I', recv_exact(conn, 4))[0] + sessionid = recv_exact(conn, fieldlen) + fieldlen = struct.unpack('!I', recv_exact(conn, 4))[0] + sessiontok = recv_exact(conn, fieldlen) try: authname = httpapi.get_user_for_session(sessionid, sessiontok) except Exception: @@ -186,7 +256,7 @@ def evaluate_request(conn): if authname: allow = auth.authorize(authname, f'/nodes/{nodename}/console/ikvm') if allow: - send_grant(conn, nodename) + send_grant(conn, nodename, rqcode) finally: conn.close() From 7aaa3506797fabc6d282fe40dc821e0aaeca0da2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 30 May 2025 15:48:32 -0400 Subject: [PATCH 201/413] Fix missing import from stats --- confluent_client/bin/stats | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_client/bin/stats b/confluent_client/bin/stats index 0893fadb..3dc2dfb0 100755 --- a/confluent_client/bin/stats +++ b/confluent_client/bin/stats @@ -19,6 +19,7 @@ import argparse import base64 import csv import io +import os import numpy as np import sys From 6a90e1cc77d3e7e67982fe172cd210da56424a3f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 30 May 2025 16:26:34 -0400 Subject: [PATCH 202/413] Implement a VNC to screenshot For Proxmox, since no convenient screenshot mechanism is available, instead do vnc. --- confluent_client/bin/nodeconsole | 97 ++++++++++++++----- .../plugins/hardwaremanagement/proxmox.py | 1 - 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 36f21e21..f8b41324 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -447,38 +447,35 @@ def do_screenshot(): sys.stdout.write('\x1bc') firstnodename = None dorefresh = True + vnconly = set([]) while dorefresh: for res in sess.read('/noderange/{}/console/ikvm_screenshot'.format(args[0])): for node in res.get('databynode', {}): + errorstr = '' if not firstnodename: firstnodename = node + error = res['databynode'][node].get('error') + if error and 'vnc available' in error: + vnconly.add(node) + continue + elif error: + errorstr = error imgdata = res['databynode'][node].get('image', {}).get('imgdata', None) if imgdata: - errorstr = '' if len(imgdata) < 32: # We were subjected to error errorstr = f'Unable to get screenshot' - imagedatabynode[node] = imgdata - if node in nodepositions: - prep_node_tile(node) - cursor_save() - else: - if options.interval is not None: - if node != firstnodename: - sys.stderr.write('Multiple nodes not supported for interval') - sys.exit(1) - sticky_cursor() - sys.stdout.write('{}: '.format(node)) - # one row is used by our own name, so cheight - 1 for that allowance - if errorstr: - draw_text(errorstr, cwidth, cheight -1 if cheight else cheight) - else: - draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) - if node in nodepositions: - cursor_restore() - reset_cursor(node) - else: - sys.stdout.write('\n') - sys.stdout.flush() + if errorstr or imgdata: + draw_node(node, imgdata, errorstr, firstnodename, cwidth, cheight) + if asyncvnc: + urlbynode = {} + for node in vnconly: + for res in sess.update(f'/nodes/{node}/console/ikvm', {'method': 'unix'}): + url = res.get('item', {}).get('href') + if url: + urlbynode[node] = url + draw_vnc_grabs(urlbynode, cwidth, cheight) + elif vnconly: + sys.stderr.write("Require asyncvnc installed to do VNC screenshotting\n") if options.interval is None: dorefresh = False else: @@ -486,6 +483,60 @@ def do_screenshot(): time.sleep(options.interval) sys.exit(0) +try: + import asyncio, asyncvnc +except ImportError: + asyncvnc = None + +def draw_vnc_grabs(urlbynode, cwidth, cheight): + asyncio.run(grab_vncs(urlbynode, cwidth, cheight)) +async def grab_vncs(urlbynode, cwidth, cheight): + tasks = [] + for node in urlbynode: + url = urlbynode[node] + tasks.append(asyncio.create_task(do_vnc_screenshot(node, url, cwidth, cheight))) + await asyncio.gather(*tasks) + +async def my_opener(host, port): + # really, host is the unix + return await asyncio.open_unix_connection(host) + +async def do_vnc_screenshot(node, url, cwidth, cheight): + async with asyncvnc.connect(url, opener=my_opener) as client: + # Retrieve pixels as a 3D numpy array + pixels = await client.screenshot() + # Save as PNG using PIL/pillow + image = Image.fromarray(pixels) + outfile = io.BytesIO() + image.save(outfile, format='PNG') + imgdata = base64.b64encode(outfile.getbuffer()).decode() + if imgdata: + draw_node(node, imgdata, '', '', cwidth, cheight) + +def draw_node(node, imgdata, errorstr, firstnodename, cwidth, cheight): + imagedatabynode[node] = imgdata + if node in nodepositions: + prep_node_tile(node) + cursor_save() + else: + if options.interval is not None: + if node != firstnodename: + sys.stderr.write('Multiple nodes not supported for interval') + sys.exit(1) + sticky_cursor() + sys.stdout.write('{}: '.format(node)) + # one row is used by our own name, so cheight - 1 for that allowance + if errorstr: + draw_text(errorstr, cwidth, cheight -1 if cheight else cheight) + else: + draw_image(imgdata.encode(), cwidth, cheight - 1 if cheight else cheight) + if node in nodepositions: + cursor_restore() + reset_cursor(node) + else: + sys.stdout.write('\n') + sys.stdout.flush() + if options.screenshot: try: cursor_hide() diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index af5ff149..f3694513 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -419,7 +419,6 @@ def retrieve(nodes, element, configmanager, inputdata): elif element == ['console', 'ikvm_screenshot']: # good background for the webui, and kitty yield msg.ConfluentNodeError(node, "vnc available, screenshot not available") - return def update(nodes, element, configmanager, inputdata): clientsbynode = prep_proxmox_clients(nodes, configmanager) From d063f50a9c2a7fd6393d15d65d53d8f83c6162a3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 6 Jun 2025 10:52:27 -0400 Subject: [PATCH 203/413] Fix a possible breakage due to transmit error SSDP snoop could have been brought down by a non-viable transmit, tolerate that failure. --- confluent_server/confluent/discovery/protocols/ssdp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py index d8acf753..447aaf5f 100644 --- a/confluent_server/confluent/discovery/protocols/ssdp.py +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -250,7 +250,10 @@ def snoop(handler, byehandler=None, protocol=None, uuidlookup=None): reply = 'HTTP/1.1 200 OK\r\n\r\nCONFLUENT: PRESENT\r\n' if not isinstance(reply, bytes): reply = reply.encode('utf8') - s.sendto(reply, peer) + try: + s.sendto(reply, peer) + except Exception: + break elif query.startswith('uuid='): curruuid = query.split('=', 1)[1].lower() node = uuidlookup(curruuid) From 169fd976ce6dbe58e00527b25b27c6f77db839bf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 6 Jun 2025 10:56:35 -0400 Subject: [PATCH 204/413] Compensate for out of sync resize behavior During the async vnc behavior, a copy of the geometry variables are used that are out of sync. Workaround by forcing a resize again after the async tasks conclude. --- confluent_client/bin/nodeconsole | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index f8b41324..960c784c 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -404,8 +404,9 @@ def redraw(): else: sys.stdout.write('\n') sys.stdout.flush() - +resized = False def do_screenshot(): + global resized global numrows sess = client.Command() if options.tile: @@ -418,8 +419,11 @@ def do_screenshot(): for res in sess.read('/noderange/{}/nodes/'.format(args[0])): allnodes.append(res['item']['href'].replace('/', '')) numnodes += 1 + resized = False def do_resize(a=None, b=None): + global resized if a: + resized = True # on a window resize, clear the old stuff # ideally we'd retain the images and redraw them sys.stdout.write('\x1bc') @@ -474,6 +478,9 @@ def do_screenshot(): if url: urlbynode[node] = url draw_vnc_grabs(urlbynode, cwidth, cheight) + if resized: + do_resize(True) + resized = False elif vnconly: sys.stderr.write("Require asyncvnc installed to do VNC screenshotting\n") if options.interval is None: From 5f9250c492e015811436263f8abea94c734b0749 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 6 Jun 2025 20:14:07 -0400 Subject: [PATCH 205/413] Add el10 clause --- confluent_server/confluent_server.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 83f3452b..670757fc 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -24,7 +24,7 @@ Requires: python-pyghmi >= 1.5.71, python-eventlet, python-greenlet, python-pycr %if "%{dist}" == ".el8" Requires: python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-enum34, python3-asn1crypto, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else -%if "%{dist}" == ".el9" +%if "%{dist}" == ".el9" || "%{dist}" == ".el10" Requires: python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-webauthn, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else Requires: python3-dbm,python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute From c7d41f8a4b991417c7181fd85b6bda9ea7cfc839 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 9 Jun 2025 14:12:31 -0400 Subject: [PATCH 206/413] Support and prefer psutil The netifaces library seems dead, we can use psutil instead which seems more popular. --- .../confluent/discovery/protocols/pxe.py | 13 ++- confluent_server/confluent/netutil.py | 107 +++++++++++++----- confluent_server/confluent/util.py | 22 +++- confluent_server/confluent_server.spec.tmpl | 6 +- 4 files changed, 109 insertions(+), 39 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 5cb9a43c..56d870d6 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -36,7 +36,11 @@ import ctypes.util import eventlet import eventlet.green.socket as socket import eventlet.green.select as select -import netifaces +try: + import psutil +except ImportError: + psutil = None + import netifaces import os import struct import time @@ -136,7 +140,12 @@ def idxtoname(idx): _idxtobcast = {} def get_bcastaddr(idx): if idx not in _idxtobcast: - bc = netifaces.ifaddresses(idxtoname(idx))[17][0]['broadcast'] + if psutil: + for addr in psutil.net_if_addrs()[idxtoname(idx)]: + if addr.family == socket.AF_PACKET: + bc = addr.broadcast + else: + bc = netifaces.ifaddresses(idxtoname(idx))[17][0]['broadcast'] bc = bytearray([int(x, 16) for x in bc.split(':')]) _idxtobcast[idx] = bc return _idxtobcast[idx] diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 2e580624..e7f303ba 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -18,7 +18,10 @@ import confluent.exceptions as exc import codecs -import netifaces +try: + import psutil +except ImportError: + import netifaces import struct import eventlet.green.socket as socket import eventlet.support.greendns @@ -32,9 +35,18 @@ def msg_align(len): return (len + 3) & ~3 def mask_to_cidr(mask): - maskn = socket.inet_pton(socket.AF_INET, mask) - maskn = struct.unpack('!I', maskn)[0] cidr = 32 + fam = socket.AF_INET + fmt = + if ':' in mask: # ipv6 + fam = socket.AF_INET6 + cidr = 128 + maskn = socket.inet_pton(fam, mask) + if len(maskn) == 4 + maskn = struct.unpack('!I', maskn)[0] + else: + first, second = struct.unpack('!QQ', maskn) + maskn = first << 64 | second while maskn & 0b1 == 0 and cidr > 0: cidr -= 1 maskn >>= 1 @@ -101,16 +113,25 @@ def ipn_is_local(ipn): def address_is_local(address): - for iface in netifaces.interfaces(): - for i4 in netifaces.ifaddresses(iface).get(2, []): - cidr = mask_to_cidr(i4['netmask']) - if ip_on_same_subnet(i4['addr'], address, cidr): - return True - for i6 in netifaces.ifaddresses(iface).get(10, []): - cidr = int(i6['netmask'].split('/')[1]) - laddr = i6['addr'].split('%')[0] - if ip_on_same_subnet(laddr, address, cidr): - return True + if psutil: + ifas = psutil.net_if_addrs() + for iface in ifas: + for addr in ifas[iface]: + if addr.family in (socket.AF_INET, socket.AF_INET6): + cidr = mask_to_cidr(addr.netmask) + if ip_on_same_subnet(addr.address, address, cidr): + return True + else: + for iface in netifaces.interfaces(): + for i4 in netifaces.ifaddresses(iface).get(2, []): + cidr = mask_to_cidr(i4['netmask']) + if ip_on_same_subnet(i4['addr'], address, cidr): + return True + for i6 in netifaces.ifaddresses(iface).get(10, []): + cidr = int(i6['netmask'].split('/')[1]) + laddr = i6['addr'].split('%')[0] + if ip_on_same_subnet(laddr, address, cidr): + return True return False @@ -126,20 +147,35 @@ def _rebuildidxmap(): def myiptonets(svrip): - fam = netifaces.AF_INET + fam = socket.AF_INET if ':' in svrip: - fam = netifaces.AF_INET6 + fam = socket.AF_INET6 relevantnic = None - for iface in netifaces.interfaces(): - for addr in netifaces.ifaddresses(iface).get(fam, []): - addr = addr.get('addr', '') - addr = addr.split('%')[0] - if addresses_match(addr, svrip): - relevantnic = iface - break - else: - continue - break + if psutil: + ifas = psutil.net_if_addrs() + for iface in ifas: + for addr in ifas[iface]: + if addr.fam != fam: + continue + addr = addr.address + addr = addr.split('%')[0] + if addresses_match(addr, svrip): + relevantnic = iface + break + else: + continue + break + else: + for iface in netifaces.interfaces(): + for addr in netifaces.ifaddresses(iface).get(fam, []): + addr = addr.get('addr', '') + addr = addr.split('%')[0] + if addresses_match(addr, svrip): + relevantnic = iface + break + else: + continue + break return inametonets(relevantnic) @@ -150,11 +186,22 @@ def _iftonets(ifidx): return inametonets(ifidx) def inametonets(iname): - addrs = netifaces.ifaddresses(iname) - try: - addrs = addrs[netifaces.AF_INET] - except KeyError: - return + addrs = [] + if psutil: + ifaces = psutil.net_if_addrs() + if iname not in ifaces: + return + for iface in ifaces: + for addrent in ifaces[iface]: + if addrent.family != socket.AF_INET: + continue + addrs.append({'addr': addrent.address, 'netmask': addrent.netmask}) + else: + addrs = netifaces.ifaddresses(iname) + try: + addrs = addrs[netifaces.AF_INET] + except KeyError: + return for addr in addrs: ip = struct.unpack('!I', socket.inet_aton(addr['addr']))[0] mask = struct.unpack('!I', socket.inet_aton(addr['netmask']))[0] diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 462ec930..cb6c2973 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -20,7 +20,10 @@ import base64 import confluent.exceptions as cexc import confluent.log as log import hashlib -import netifaces +try: + import psutil +except ImportError: + import netifaces import os import re import socket @@ -85,11 +88,18 @@ def list_interface_indexes(): def list_ips(): # Used for getting addresses to indicate the multicast address # as well as getting all the broadcast addresses - for iface in netifaces.interfaces(): - addrs = netifaces.ifaddresses(iface) - if netifaces.AF_INET in addrs: - for addr in addrs[netifaces.AF_INET]: - yield addr + if psutil: + ifas = psutil.net_if_addrs() + for intf in ifas: + for addr in ifas[intf]: + if addr.family == socket.AF_INET and addr.broadcast: + yield {'broadcast': addr.broadcast, 'addr': addr.address} + else: + for iface in netifaces.interfaces(): + addrs = netifaces.ifaddresses(iface) + if netifaces.AF_INET in addrs: + for addr in addrs[netifaces.AF_INET]: + yield addr def randomstring(length=20): """Generate a random string of requested length diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 670757fc..17b3da2c 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -24,14 +24,18 @@ Requires: python-pyghmi >= 1.5.71, python-eventlet, python-greenlet, python-pycr %if "%{dist}" == ".el8" Requires: python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-enum34, python3-asn1crypto, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else -%if "%{dist}" == ".el9" || "%{dist}" == ".el10" +%if "%{dist}" == ".el9" Requires: python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-webauthn, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else +%if "%{dist}" == ".el10" +Requires: python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-webauthn, python3-psutil, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute +%else Requires: python3-dbm,python3-pyghmi >= 1.5.71, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute %endif %endif %endif +%endif Vendor: Lenovo Url: https://github.com/lenovo/confluent From 65b613219ec14015a64549acc9c06105912ec064 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 9 Jun 2025 14:33:12 -0400 Subject: [PATCH 207/413] Amend mistake in previous commit --- confluent_server/confluent/netutil.py | 1 - 1 file changed, 1 deletion(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index e7f303ba..9552d673 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -37,7 +37,6 @@ def msg_align(len): def mask_to_cidr(mask): cidr = 32 fam = socket.AF_INET - fmt = if ':' in mask: # ipv6 fam = socket.AF_INET6 cidr = 128 From 59dc7b54269150c5e4a8c1e8dc649630b907e843 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 9 Jun 2025 14:45:43 -0400 Subject: [PATCH 208/413] Fix another error in the psutils work --- confluent_server/confluent/netutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 9552d673..4ff5b29e 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -41,7 +41,7 @@ def mask_to_cidr(mask): fam = socket.AF_INET6 cidr = 128 maskn = socket.inet_pton(fam, mask) - if len(maskn) == 4 + if len(maskn) == 4: maskn = struct.unpack('!I', maskn)[0] else: first, second = struct.unpack('!QQ', maskn) From dcfb028ba901949b862fbe9237cc069ee5c8e373 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 9 Jun 2025 15:57:02 -0400 Subject: [PATCH 209/413] Add popular virtual machine storage drivers to imgutil --- imgutil/el9/dracut/installkernel | 1 + 1 file changed, 1 insertion(+) diff --git a/imgutil/el9/dracut/installkernel b/imgutil/el9/dracut/installkernel index 2f9b41ec..cb62b510 100644 --- a/imgutil/el9/dracut/installkernel +++ b/imgutil/el9/dracut/installkernel @@ -5,6 +5,7 @@ instmods nvme instmods cdc_ether r8152 instmods r8169 instmods vmxnet3 virtio_net +instmods virtio_scsi vmw_pvscsi instmods mptctl instmods mlx4_ib mlx5_ub ib_umad ib_ipoib instmods ice i40e hfi1 bnxt_en qed qede From 26f3ee539f6751052ebd44be82e1cbe928995df6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 10 Jun 2025 13:01:08 -0400 Subject: [PATCH 210/413] Add el10 to imgutil spec --- imgutil/confluent_imgutil.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/confluent_imgutil.spec.tmpl b/imgutil/confluent_imgutil.spec.tmpl index f7dea7a7..5d49bbad 100644 --- a/imgutil/confluent_imgutil.spec.tmpl +++ b/imgutil/confluent_imgutil.spec.tmpl @@ -10,7 +10,7 @@ BuildRoot: /tmp/ %if "%{dist}" == ".el8" Requires: squashfs-tools cpio %else -%if "%{dist}" == ".el9" +%if "%{dist}" == ".el9" || "${dist}" == ".el10" Requires: squashfs-tools cpio %else %if "%{dist}" == ".el7" From 7dd5c36e787bbd671f648a0b06c7aff7b42dd995 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 10 Jun 2025 13:25:15 -0400 Subject: [PATCH 211/413] Remove EL7, add EL10 to the spec for imgutil --- imgutil/confluent_imgutil.spec.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imgutil/confluent_imgutil.spec.tmpl b/imgutil/confluent_imgutil.spec.tmpl index 5d49bbad..b3aa3bc9 100644 --- a/imgutil/confluent_imgutil.spec.tmpl +++ b/imgutil/confluent_imgutil.spec.tmpl @@ -10,10 +10,10 @@ BuildRoot: /tmp/ %if "%{dist}" == ".el8" Requires: squashfs-tools cpio %else -%if "%{dist}" == ".el9" || "${dist}" == ".el10" +%if "%{dist}" == ".el9" Requires: squashfs-tools cpio %else -%if "%{dist}" == ".el7" +%if "%{dist}" == ".el10" Requires: squashfs-tools cpio %else Requires: squashfs From 071433a60ad4e37f6c102b6b4e63a9e20a54d8ae Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 08:00:58 -0400 Subject: [PATCH 212/413] Handle underscore in volume name Only the first underscore would be in the volume groub name, the rest would be a part of the volume name, which should be excluded. --- .../el9-diskless/profiles/default/scripts/image2disk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index ccf36036..4a08716a 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -373,7 +373,7 @@ def install_to_disk(imgpath): if fs['device'].startswith('/dev/mapper'): oldvgname = fs['device'].rsplit('/', 1)[-1] # if node has - then /dev/mapper will double up the hypen - if '_' in oldvgname and '-' in oldvgname.split('_')[-1]: + if '_' in oldvgname and '-' in oldvgname.split('_', 1)[-1]: oldvgname = oldvgname.rsplit('-', 1)[0].replace('--', '-') osname = oldvgname.split('_')[0] nodename = socket.gethostname().split('.')[0] From 45fa229f9fb31519b0c7e86064feb83f739d94e8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 14:24:25 -0400 Subject: [PATCH 213/413] Reduce columns to fit in X tiling Check if right side of window will fit instead of left side. --- confluent_client/bin/nodeconsole | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 960c784c..7cb00f2c 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -701,7 +701,6 @@ if options.windowed: screenheight -= wmyo currx = window_width curry = 0 - maxcol = int(screenwidth/window_width) for node in sortutil.natural_sort(nodes): if options.tile and envlist[0] == 'xterm': @@ -709,7 +708,7 @@ if options.windowed: corrected_y = curry xgeometry = '{0}+{1}+{2}'.format(sizegeometry, corrected_x, corrected_y) currx += window_width - if currx >= screenwidth: + if currx + window_width >= screenwidth: currx=0 curry += window_height if curry > screenheight: From 6b94a8fa22f0774606b81d97cc2af348e428f380 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 15:19:52 -0400 Subject: [PATCH 214/413] Add openssh-keysign to el10 distributions --- confluent_osdeploy/el8/profiles/default/initprofile.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_osdeploy/el8/profiles/default/initprofile.sh b/confluent_osdeploy/el8/profiles/default/initprofile.sh index fa9c20ab..bbc619ce 100644 --- a/confluent_osdeploy/el8/profiles/default/initprofile.sh +++ b/confluent_osdeploy/el8/profiles/default/initprofile.sh @@ -3,6 +3,9 @@ sed -i 's/centos/CentOS/; s/rhel/Red Hat Enterprise Linux/; s/oraclelinux/Oracle if grep Fedora $2/profile.yaml > /dev/null; then sed -i 's/@^minimal-environment/#/' $2/packagelist fi +if grep ^label: $2/profile.yaml | grep 10 > /dev/null; then + echo 'echo openssh-keysign >> /tmp/addonpackages' > $2/scripts/pre.d/enablekeysign +fi ln -s $1/images/pxeboot/vmlinuz $2/boot/kernel && \ ln -s $1/images/pxeboot/initrd.img $2/boot/initramfs/distribution mkdir -p $2/boot/efi/boot From 0ec5cf5c5e8dd70aa95ad312c266990eb314dae7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 15:48:23 -0400 Subject: [PATCH 215/413] Make the keysign pre script readable --- confluent_osdeploy/el8/profiles/default/initprofile.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/el8/profiles/default/initprofile.sh b/confluent_osdeploy/el8/profiles/default/initprofile.sh index bbc619ce..8a25fe6e 100644 --- a/confluent_osdeploy/el8/profiles/default/initprofile.sh +++ b/confluent_osdeploy/el8/profiles/default/initprofile.sh @@ -5,6 +5,7 @@ if grep Fedora $2/profile.yaml > /dev/null; then fi if grep ^label: $2/profile.yaml | grep 10 > /dev/null; then echo 'echo openssh-keysign >> /tmp/addonpackages' > $2/scripts/pre.d/enablekeysign + chmod 644 $2/scripts/pre.d/enablekeysign fi ln -s $1/images/pxeboot/vmlinuz $2/boot/kernel && \ ln -s $1/images/pxeboot/initrd.img $2/boot/initramfs/distribution From 7d49c5f9be51027fdaf441f15d7f424330cdf05f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 15:48:53 -0400 Subject: [PATCH 216/413] Do not error out on listing profiles/distributions before any exist --- confluent_server/confluent/osimage.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index f3d174c1..75a64e77 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -31,6 +31,7 @@ READFILES = set([ '.DISCINFO', '.discinfo', 'zipl.prm', + 'sources/idwbinfo.txt', ]) HEADERSUMS = set([b'\x85\xeddW\x86\xc5\xbdhx\xbe\x81\x18X\x1e\xb4O\x14\x9d\x11\xb7C8\x9b\x97R\x0c-\xb8Ht\xcb\xb3']) @@ -774,10 +775,16 @@ def printit(info): def list_distros(): - return sorted(os.listdir('/var/lib/confluent/distributions')) + try: + return sorted(os.listdir('/var/lib/confluent/distributions')) + except FileNotFoundError: + return [] def list_profiles(): - return sorted(os.listdir('/var/lib/confluent/public/os/')) + try: + return sorted(os.listdir('/var/lib/confluent/public/os/')) + except FileNotFoundError: + return [] def get_profile_label(profile): with open('/var/lib/confluent/public/os/{0}/profile.yaml') as metadata: From 5a96c7a20c58d8bbabf4b82331e6885860cee6df Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 15:57:39 -0400 Subject: [PATCH 217/413] Change to grep -E This avoids a obsolescence message in rpm update --- confluent_server/confluent_server.spec.tmpl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 17b3da2c..7b2afd8a 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -83,10 +83,10 @@ chown confluent:confluent /etc/confluent /var/lib/confluent /var/log/confluent / sysctl -p /usr/lib/sysctl.d/confluent.conf >& /dev/null NEEDCHOWN=0 NEEDSTART=0 -find /etc/confluent -uid 0 | egrep '.*' > /dev/null && NEEDCHOWN=1 -find /var/log/confluent -uid 0 | egrep '.*' > /dev/null && NEEDCHOWN=1 -find /var/run/confluent -uid 0 | egrep '.*' > /dev/null && NEEDCHOWN=1 -find /var/cache/confluent -uid 0 | egrep '.*' > /dev/null && NEEDCHOWN=1 +find /etc/confluent -uid 0 | grep -E '.*' > /dev/null && NEEDCHOWN=1 +find /var/log/confluent -uid 0 | grep -E '.*' > /dev/null && NEEDCHOWN=1 +find /var/run/confluent -uid 0 | grep -E '.*' > /dev/null && NEEDCHOWN=1 +find /var/cache/confluent -uid 0 | grep -E '.*' > /dev/null && NEEDCHOWN=1 if [ $NEEDCHOWN = 1 ]; then if systemctl is-active confluent > /dev/null; then NEEDSTART=1 From dcd59667e411261bd1d6ff0630edda3f139cdefc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Jun 2025 16:19:13 -0400 Subject: [PATCH 218/413] Add a secondary copernicus loop to diskless Sometimes 30 seconds just isn't enough. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 19489b43..9764d971 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -102,6 +102,14 @@ while ! grep ^EXTMGRINFO: /etc/confluent/confluent.info | awk -F'|' '{print $3}' ip link set $i up done /opt/confluent/bin/copernicus -t > /etc/confluent/confluent.info + echo -n . +done +TRIES=0 +while ! grep ^NODENAME: /etc/confluent/confluent.info >& /dev/null && [ "$TRIES" -lt 300 ]; do + sleep 0.5 + echo -n . + /opt/confluent/bin/copernicus -t > /etc/confluent/confluent.info + TRIES=$((TRIES + 1)) done cd / nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') From cb1f06fecfcab0c2ed69713f80ee3049ea272204 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:14:09 -0400 Subject: [PATCH 219/413] Add EL10 Diskless --- .../confluent_osdeploy-aarch64.spec.tmpl | 9 +- .../confluent_osdeploy.spec.tmpl | 2 +- .../hooks/cmdline/10-confluentdiskless.sh | 327 ++++++++++ .../default/scripts/add_local_repositories | 58 ++ .../profiles/default/scripts/firstboot.custom | 4 + .../default/scripts/firstboot.service | 11 + .../profiles/default/scripts/firstboot.sh | 49 ++ .../profiles/default/scripts/functions | 209 +++++++ .../profiles/default/scripts/getinstalldisk | 99 ++++ .../profiles/default/scripts/image2disk.py | 561 ++++++++++++++++++ .../profiles/default/scripts/imageboot.sh | 132 +++++ .../profiles/default/scripts/installimage | 49 ++ .../profiles/default/scripts/onboot.custom | 0 .../profiles/default/scripts/onboot.service | 11 + .../profiles/default/scripts/onboot.sh | 66 +++ .../profiles/default/scripts/post.sh | 53 ++ .../profiles/default/scripts/syncfileclient | 307 ++++++++++ imgutil/el10/dracut/install | 35 ++ imgutil/el10/dracut/installkernel | 14 + imgutil/el10/dracut/module-setup.sh | 10 + imgutil/el10/pkglist | 22 + imgutil/el10/pkglist.aarch64 | 22 + imgutil/imgutil | 6 +- 23 files changed, 2049 insertions(+), 7 deletions(-) create mode 100644 confluent_osdeploy/el10-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/functions create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh create mode 100644 confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient create mode 100644 imgutil/el10/dracut/install create mode 100644 imgutil/el10/dracut/installkernel create mode 100644 imgutil/el10/dracut/module-setup.sh create mode 100644 imgutil/el10/pkglist create mode 100644 imgutil/el10/pkglist.aarch64 diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index fb6f6ddc..b6cf3826 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -26,11 +26,14 @@ mkdir -p opt/confluent/bin mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 -for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do +ln -s el8 el10 +for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then cp -a ../${os}bin/opt . + elif [ $os = el10 ]; then + cp -a ../el9bin/opt . else cp -a ../opt . fi @@ -40,7 +43,7 @@ for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreo mv ../addons.cpio . cd .. done -for os in el7 el8 suse15 el9 ubuntu20.04; do +for os in el7 el8 suse15 el9 el10 ubuntu20.04; do mkdir ${os}disklessout cd ${os}disklessout if [ -d ../${os}bin ]; then @@ -76,7 +79,7 @@ cp -a esxi7 esxi8 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ #cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do +for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ if [ -d ${os}disklessout ]; then diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index c7f42215..a6cf95e5 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -46,7 +46,7 @@ for os in rhvh4 el7 genesis el8 suse15 debian ubuntu18.04 ubuntu20.04 ubuntu22.0 mv ../addons.cpio . cd .. done -for os in el7 el8 suse15 el9 ubuntu20.04 ubuntu22.04 ubuntu24.04; do +for os in el7 el8 suse15 el9 el10 ubuntu20.04 ubuntu22.04 ubuntu24.04; do mkdir ${os}disklessout cd ${os}disklessout if [ -d ../${os}bin ]; then diff --git a/confluent_osdeploy/el10-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el10-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh new file mode 100644 index 00000000..41675c36 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -0,0 +1,327 @@ +get_remote_apikey() { + while [ -z "$confluent_apikey" ]; do + /opt/confluent/bin/clortho $nodename $confluent_mgr > /etc/confluent/confluent.apikey + if grep ^SEALED: /etc/confluent/confluent.apikey > /dev/null; then + # we don't support remote sealed api keys anymore + echo > /etc/confluent/confluent.apikey + fi + confluent_apikey=$(cat /etc/confluent/confluent.apikey) + if [ -z "$confluent_apikey" ]; then + echo "Unable to acquire node api key, set deployment.apiarmed=once on node '$nodename', retrying..." + if [ ! -z "$autoconsdev" ]; then echo "Unable to acquire node api key, set deployment.apiarmed=once on node '$nodename', retrying..." > $autoconsdev; fi + sleep 10 + elif [ -c /dev/tpmrm0 ]; then + tmpdir=$(mktemp -d) + cd $tmpdir + tpm2_startauthsession --session=session.ctx + tpm2_policypcr -Q --session=session.ctx --pcr-list="sha256:15" --policy=pcr15.sha256.policy + tpm2_createprimary -G ecc -Q --key-context=prim.ctx + (echo -n "CONFLUENT_APIKEY:";cat /etc/confluent/confluent.apikey) | tpm2_create -Q --policy=pcr15.sha256.policy --public=data.pub --private=data.priv -i - -C prim.ctx + tpm2_load -Q --parent-context=prim.ctx --public=data.pub --private=data.priv --name=confluent.apikey --key-context=data.ctx + tpm2_evictcontrol -Q -c data.ctx + tpm2_flushcontext session.ctx + cd - > /dev/null + rm -rf $tmpdir + fi + done +} +root=1 +rootok=1 +netroot=confluent +echo -ne '\033[H\033[2J\033[3J' +mkdir -p /etc/ssh +mkdir -p /var/tmp/ +mkdir -p /var/empty/sshd +mkdir -p /usr/share/empty.sshd +mkdir -p /etc/confluent +sed -i '/^root:x/d' /etc/passwd +echo root:x:0:0::/:/bin/bash >> /etc/passwd +echo sshd:x:30:30:SSH User:/var/empty/sshd:/sbin/nologin >> /etc/passwd + +if ! grep console= /proc/cmdline >& /dev/null; then + autocons=$(/opt/confluent/bin/autocons) + autoconsdev=${autocons%,*} + autocons=${autocons##*/} + echo "Automatic console configured for $autocons" +fi +echo "Initializing confluent diskless environment" +echo -n "udevd: " +/usr/lib/systemd/systemd-udevd --daemon +echo -n "Loading drivers..." +udevadm trigger +udevadm trigger --type=devices --action=add +udevadm settle +modprobe ib_ipoib +modprobe ib_umad +modprobe hfi1 +modprobe mlx5_ib +echo "done" +cat > /etc/ssh/sshd_config << EOF +Port 2222 +Subsystem sftp /usr/libexec/openssh/sftp-server +PermitRootLogin yes +AuthorizedKeysFile .ssh/authorized_keys +EOF +mkdir /root/.ssh +mkdir /.ssh +cat /ssh/*pubkey > /root/.ssh/authorized_keys 2>/dev/null +cp /root/.ssh/authorized_keys /.ssh/ +cat /tls/*.pem > /etc/confluent/ca.pem +mkdir -p /etc/pki/tls/certs +cat /tls/*.pem > /etc/pki/tls/certs/ca-bundle.crt +TRIES=0 +oldumask=$(umask) +umask 0077 +tpmdir=$(mktemp -d) +cd $tpmdir +lasthdl="" +if [ -c /dev/tpmrm0 ]; then + for hdl in $(tpm2_getcap handles-persistent|awk '{print $2}'); do + tpm2_startauthsession --policy-session --session=session.ctx + tpm2_policypcr -Q --session=session.ctx --pcr-list="sha256:15" --policy=pcr15.sha256.policy + unsealeddata=$(tpm2_unseal --auth=session:session.ctx -Q -c $hdl 2>/dev/null) + tpm2_flushcontext session.ctx + if [[ $unsealeddata == "CONFLUENT_APIKEY:"* ]]; then + confluent_apikey=${unsealeddata#CONFLUENT_APIKEY:} + echo $confluent_apikey > /etc/confluent/confluent.apikey + if [ -n "$lasthdl" ]; then + tpm2_evictcontrol -c $lasthdl + fi + lasthdl=$hdl + fi + done +fi +cd - > /dev/null +rm -rf $tpmdir +touch /etc/confluent/confluent.info +cd /sys/class/net +echo -n "Scanning for network configuration..." +while ! grep ^EXTMGRINFO: /etc/confluent/confluent.info | awk -F'|' '{print $3}' | grep 1 >& /dev/null && [ "$TRIES" -lt 30 ]; do + TRIES=$((TRIES + 1)) + for i in *; do + ip link set $i up + done + /opt/confluent/bin/copernicus -t > /etc/confluent/confluent.info + echo -n . +done +TRIES=0 +while ! grep ^NODENAME: /etc/confluent/confluent.info >& /dev/null && [ "$TRIES" -lt 300 ]; do + sleep 0.5 + echo -n . + /opt/confluent/bin/copernicus -t > /etc/confluent/confluent.info + TRIES=$((TRIES + 1)) +done +cd / +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +hostname $nodename +confluent_mgr=$(grep '^EXTMGRINFO:.*1$' /etc/confluent/confluent.info | head -n 1 | awk -F': ' '{print $2}' | awk -F'|' '{print $1}') +if [ -z "$confluent_mgr" ]; then + confluent_mgr=$(grep ^MANAGER: /etc/confluent/confluent.info|head -n 1 | awk '{print $2}') +fi +if [[ $confluent_mgr == *%* ]]; then + echo $confluent_mgr | awk -F% '{print $2}' > /tmp/confluent.ifidx + ifidx=$(cat /tmp/confluent.ifidx) + ifname=$(ip link |grep ^$ifidx:|awk '{print $2}') + ifname=${ifname%:} +fi + +ready=0 +while [ $ready = "0" ]; do + get_remote_apikey + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then + confluent_mgr="[$confluent_mgr]" + fi + tmperr=$(mktemp) + curl -sSf -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg 2> $tmperr + if grep 401 $tmperr > /dev/null; then + confluent_apikey="" + if [ -n "$lasthdl" ]; then + tpm2_evictcontrol -c $lasthdl + fi + confluent_mgr=${confluent_mgr#[} + confluent_mgr=${confluent_mgr%]} + elif grep 'SSL' $tmperr > /dev/null; then + confluent_mgr=${confluent_mgr#[} + confluent_mgr=${confluent_mgr%]} + echo 'Failure establishing TLS conneection to '$confluent_mgr' (try `osdeploy initialize -t` on the deployment server)' + if [ ! -z "$autoconsdev" ]; then echo 'Failure establishing TLS conneection to '$confluent_mgr' (try `osdeploy initialize -t` on the deployment server)' > $autoconsdev; fi + sleep 10 + else + ready=1 + fi + rm $tmperr +done +if [ ! -z "$autocons" ] && grep "textconsole: true" /etc/confluent/confluent.deploycfg > /dev/null; then /opt/confluent/bin/autocons -c > /dev/null; fi +if [ -c /dev/tpmrm0 ]; then + tpm2_pcrextend 15:sha256=2fbe96c50dde38ce9cd2764ddb79c216cfbcd3499568b1125450e60c45dd19f2 +fi +umask $oldumask +mkdir -p /run/NetworkManager/system-connections +cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +[connection] +EOC +echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection +echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection +linktype=$(ip link show dev ${ifname}|grep link/|awk '{print $1}') +if [ "$linktype" = link/infiniband ]; then + linktype="infiniband" +else + linktype="ethernet" +fi +echo type=$linktype >> /run/NetworkManager/system-connections/$ifname.nmconnection +cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +autoconnect-retries=1 +EOC +echo interface-name=$ifname >> /run/NetworkManager/system-connections/$ifname.nmconnection +cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +multi-connect=1 +permissions= +wait-device-timeout=60000 + +EOC +if [ "$linktype" = infiniband ]; then +cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +[infiniband] +transport-mode=datagram + +EOC +fi +autoconfigmethod=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') +auto6configmethod=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') +if [ "$autoconfigmethod" = "dhcp" ]; then + echo -n "Attempting to use dhcp to bring up $ifname..." + dhcpcd $ifname + echo "Complete:" + ip addr show dev $ifname + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg| awk '{print $2}') +elif [ "$autoconfigmethod" = "static" ]; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg| awk '{print $2}') + v4addr=$(grep ^ipv4_address: /etc/confluent/confluent.deploycfg) + v4addr=${v4addr#ipv4_address: } + v4gw=$(grep ^ipv4_gateway: /etc/confluent/confluent.deploycfg) + v4gw=${v4gw#ipv4_gateway: } + if [ "$v4gw" = "null" ]; then + v4gw="" + fi + v4nm=$(grep ^prefix: /etc/confluent/confluent.deploycfg) + v4nm=${v4nm#prefix: } + echo "Setting up $ifname as static at $v4addr/$v4nm" + ip addr add dev $ifname $v4addr/$v4nm + if [ ! -z "$v4gw" ]; then + ip route add default via $v4gw + fi + echo '[ipv4]' >> /run/NetworkManager/system-connections/$ifname.nmconnection + echo address1=$v4addr/$v4nm >> /run/NetworkManager/system-connections/$ifname.nmconnection + if [ ! -z "$v4gw" ]; then + echo gateway=$v4gw >> /run/NetworkManager/system-connections/$ifname.nmconnection + fi + nameserversec=0 + nameservers="" + while read -r entry; do + if [ $nameserversec = 1 ]; then + if [[ $entry == "-"*.* ]]; then + nameservers="$nameservers"${entry#- }";" + continue + fi + fi + nameserversec=0 + if [ "${entry%:*}" = "nameservers" ]; then + nameserversec=1 + continue + fi + done < /etc/confluent/confluent.deploycfg + echo dns=$nameservers >> /run/NetworkManager/system-connections/$ifname.nmconnection + dnsdomain=$(grep ^dnsdomain: /etc/confluent/confluent.deploycfg) + dnsdomain=${dnsdomain#dnsdomain: } + echo dns-search=$dnsdomain >> /run/NetworkManager/system-connections/$ifname.nmconnection + cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +may-fail=false +method=manual + +[ipv6] +addr-gen-mode=eui64 +method=auto + +EOC +elif [ "$auto6configmethod" = "static" ]; then + confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg| awk '{print $2}') + v6addr=$(grep ^ipv6_address: /etc/confluent/confluent.deploycfg) + v6addr=${v6addr#ipv6_address: } + v6gw=$(grep ^ipv6_gateway: /etc/confluent/confluent.deploycfg) + v6gw=${v6gw#ipv6_gateway: } + if [ "$v6gw" = "null" ]; then + v6gw="" + fi + v6nm=$(grep ^ipv6_prefix: /etc/confluent/confluent.deploycfg) + v6nm=${v6nm#ipv6_prefix: } + echo "Setting up $ifname as static at $v6addr/$v6nm" + ip addr add dev $ifname $v6addr/$v6nm + + cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC +[ipv4] +dhcp-timeout=90 +dhcp-vendor-class-identifier=anaconda-Linux +method=disabled + +[ipv6] +addr-gen-mode=eui64 +method=manual +may-fail=false +EOC + echo address1=$v6addr/$v6nm >> /run/NetworkManager/system-connections/$ifname.nmconnection + if [ ! -z "$v6gw" ]; then + ip route add default via $v6gw + echo gateway=$v6gw >> /run/NetworkManager/system-connections/$ifname.nmconnection + fi + nameserversec=0 + nameservers="" + while read -r entry; do + if [ $nameserversec = 1 ]; then + if [[ $entry == "-"*:* ]]; then + nameservers="$nameservers"${entry#- }";" + continue + fi + fi + nameserversec=0 + if [ "${entry%:*}" = "nameservers" ]; then + nameserversec=1 + continue + fi + done < /etc/confluent/confluent.deploycfg + echo dns=$nameservers >> /run/NetworkManager/system-connections/$ifname.nmconnection + dnsdomain=$(grep ^dnsdomain: /etc/confluent/confluent.deploycfg) + dnsdomain=${dnsdomain#dnsdomain: } + echo dns-search=$dnsdomain >> /run/NetworkManager/system-connections/$ifname.nmconnection +fi +echo '[proxy]' >> /run/NetworkManager/system-connections/$ifname.nmconnection +chmod 600 /run/NetworkManager/system-connections/*.nmconnection +confluent_websrv=$confluent_mgr +if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then + confluent_websrv="[$confluent_websrv]" +fi +echo -n "Initializing ssh..." +ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -C '' -N '' +for pubkey in /etc/ssh/ssh_host*key.pub; do + certfile=${pubkey/.pub/-cert.pub} + privfile=${pubkey%.pub} + curl -sf -X POST -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" -d @$pubkey https://$confluent_websrv/confluent-api/self/sshcert > $certfile + if [ -s $certfile ]; then + echo HostCertificate $certfile >> /etc/ssh/sshd_config + fi + echo HostKey $privfile >> /etc/ssh/sshd_config +done +/usr/sbin/sshd +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg| awk '{print $2}') +confluent_proto=$(grep ^protocol: /etc/confluent/confluent.deploycfg| awk '{print $2}') +confluent_urls="" +for addr in $(grep ^MANAGER: /etc/confluent/confluent.info|awk '{print $2}'|sed -e s/%/%25/); do + if [[ $addr == *:* ]]; then + confluent_urls="$confluent_urls $confluent_proto://[$addr]/confluent-public/os/$confluent_profile/rootimg.sfs" + else + confluent_urls="$confluent_urls $confluent_proto://$addr/confluent-public/os/$confluent_profile/rootimg.sfs" + fi +done +mkdir -p /etc/confluent +curl -sf https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions +. /etc/confluent/functions +source_remote imageboot.sh diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories new file mode 100644 index 00000000..fb26d5ef --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories @@ -0,0 +1,58 @@ +try: + import configparser +except ImportError: + import ConfigParser as configparser + import cStringIO +import importlib.util +import importlib.machinery +import sys +modloader = importlib.machinery.SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient') +modspec = importlib.util.spec_from_file_location('apiclient', '/opt/confluent/bin/apiclient', loader=modloader) +apiclient = importlib.util.module_from_spec(modspec) +modspec.loader.exec_module(apiclient) +repo = None +server = None +v4cfg = None +server4 = None +server6 = None +profile = None +with open('/etc/confluent/confluent.deploycfg') as dplcfgfile: + lines = dplcfgfile.read().split('\n') + for line in lines: + if line.startswith('deploy_server:'): + _, server4 = line.split(' ', 1) + if line.startswith('deploy_server_v6:'): + _, server6 = line.split(' ', 1) + if line.startswith('profile: '): + _, profile = line.split(' ', 1) + if line.startswith('ipv4_method: '): + _, v4cfg = line.split(' ', 1) +if v4cfg == 'static' or v4cfg =='dhcp': + server = server4 +if not server: + server = '[{}]'.format(server6) + +path = '/confluent-public/os/{0}/distribution/'.format(profile) +clnt = apiclient.HTTPSClient() +cfgdata = clnt.grab_url(path + '.treeinfo').decode() +c = configparser.ConfigParser() +try: + c.read_string(cfgdata) +except AttributeError: + f = cStringIO.StringIO(cfgdata) + c.readfp(f) +for sec in c.sections(): + if sec.startswith('variant-'): + try: + repopath = c.get(sec, 'repository') + except Exception: + continue + _, varname = sec.split('-', 1) + reponame = '/etc/yum.repos.d/local-{0}.repo'.format(varname.lower()) + with open(reponame, 'w') as repout: + repout.write('[local-{0}]\n'.format(varname.lower())) + repout.write('name=Local install repository for {0}\n'.format(varname)) + if repopath[0] == '.': + repopath = repopath[1:] + repout.write('baseurl=https://{}/confluent-public/os/{}/distribution/{}\n'.format(server, profile, repopath)) + repout.write('enabled=1\n') diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom new file mode 100644 index 00000000..eea34051 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom @@ -0,0 +1,4 @@ +. /etc/confluent/functions +# This is a convenient place to keep customizations separate from modifying the stock scripts +# While modification of the stock scripts is fine, it may be easier to rebase to a newer +# stock profile if the '.custom' files are used. diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service new file mode 100644 index 00000000..209a95e6 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service @@ -0,0 +1,11 @@ +[Unit] +Description=First Boot Process +Requires=network-online.target +After=network-online.target + +[Service] +ExecStart=/opt/confluent/bin/firstboot.sh + +[Install] +WantedBy=multi-user.target + diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh new file mode 100644 index 00000000..fabb9385 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +# This script is executed on the first boot after install has +# completed. It is best to edit the middle of the file as +# noted below so custom commands are executed before +# the script notifies confluent that install is fully complete. + +HOME=$(getent passwd $(whoami)|cut -d: -f 6) +export HOME +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +export nodename confluent_mgr confluent_profile confluent_websrv +. /etc/confluent/functions +( +exec >> /var/log/confluent/confluent-firstboot.log +exec 2>> /var/log/confluent/confluent-firstboot.log +chmod 600 /var/log/confluent/confluent-firstboot.log +while ! ping -c 1 $confluent_mgr >& /dev/null; do + sleep 1 +done + +if [ ! -f /etc/confluent/firstboot.ran ]; then + touch /etc/confluent/firstboot.ran + + cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt + + run_remote firstboot.custom + # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh + run_remote_parts firstboot.d + + # Induce execution of remote configuration, e.g. ansible plays in ansible/firstboot.d/ + run_remote_config firstboot.d +fi + +curl -X POST -d 'status: complete' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus +systemctl disable firstboot +rm /etc/systemd/system/firstboot.service +rm /etc/confluent/firstboot.ran +) & +tail --pid $! -F /var/log/confluent/confluent-firstboot.log > /dev/console diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions new file mode 100644 index 00000000..f68f3a5e --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions @@ -0,0 +1,209 @@ +#!/bin/bash +function test_mgr() { + whost=$1 + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + if curl -gs https://${whost}/confluent-api/ > /dev/null; then + return 0 + fi + return 1 +} + +function confluentpython() { + if [ -x /usr/libexec/platform-python ]; then + /usr/libexec/platform-python $* + elif [ -x /usr/bin/python3 ]; then + /usr/bin/python3 $* + elif [ -x /usr/bin/python ]; then + /usr/bin/python $* + elif [ -x /usr/bin/python2 ]; then + /usr/bin/python2 $* + fi +} + +function set_confluent_vars() { + if [ -z "$nodename" ]; then + nodename=$(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') + fi + if [[ "$confluent_mgr" == *"%"* ]]; then + confluent_mgr="" + fi + if [ -z "$confluent_mgr" ]; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + if ! test_mgr $confluent_mgr; then + confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + if [[ "$confluent_mgr" = *":"* ]]; then + confluent_mgr="[$confluent_mgr]" + fi + fi + if ! test_mgr $confluent_mgr; then + BESTMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|1$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') + OKMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|0$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') + for confluent_mgr in $BESTMGRS $OKMGRS; do + if [[ $confluent_mgr == *":"* ]]; then + confluent_mgr="[$confluent_mgr]" + fi + if test_mgr $confluent_mgr; then + break + fi + done + fi + fi + if [ -z "$confluent_profile" ]; then + confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') + fi + export confluent_profile confluent_mgr nodename +} + +fetch_remote() { + curlargs="" + if [ -f /etc/confluent/ca.pem ]; then + curlargs=" --cacert /etc/confluent/ca.pem" + fi + set_confluent_vars + mkdir -p $(dirname $1) + whost=$confluent_mgr + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 + if [ $? != 0 ]; then echo $1 failed to download; return 1; fi +} + +source_remote_parts() { + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') + for script in $scriptlist; do + source_remote $1/$script + done + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir +} + +run_remote_parts() { + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') + for script in $scriptlist; do + run_remote $1/$script + done + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir +} + +source_remote() { + set_confluent_vars + unsettmpdir=0 + echo + echo '---------------------------------------------------------------------------' + echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + if [ -z "$confluentscripttmpdir" ]; then + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unsettmpdir=1 + fi + echo Sourcing from $confluentscripttmpdir + cd $confluentscripttmpdir + fetch_remote $1 + if [ $? != 0 ]; then echo $1 failed to download; return 1; fi + chmod +x $1 + cmd=$1 + shift + source ./$cmd + cd - > /dev/null + if [ "$unsettmpdir" = 1 ]; then + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + unsettmpdir=0 + fi + rm -rf $confluentscripttmpdir + return $retcode +} + +run_remote() { + requestedcmd="'$*'" + unsettmpdir=0 + set_confluent_vars + echo + echo '---------------------------------------------------------------------------' + echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + if [ -z "$confluentscripttmpdir" ]; then + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unsettmpdir=1 + fi + echo Executing in $confluentscripttmpdir + cd $confluentscripttmpdir + fetch_remote $1 + if [ $? != 0 ]; then echo $requestedcmd failed to download; return 1; fi + chmod +x $1 + cmd=$1 + if [ -x /usr/bin/chcon ]; then + chcon system_u:object_r:bin_t:s0 $cmd + fi + shift + ./$cmd $* + retcode=$? + if [ $retcode -ne 0 ]; then + echo "$requestedcmd exited with code $retcode" + fi + cd - > /dev/null + if [ "$unsettmpdir" = 1 ]; then + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + unsettmpdir=0 + fi + return $retcode +} + +run_remote_python() { + echo + set_confluent_vars + if [ -f /etc/confluent/ca.pem ]; then + curlargs=" --cacert /etc/confluent/ca.pem" + fi + echo '---------------------------------------------------------------------------' + echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ + confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + echo Executing in $confluentscripttmpdir + cd $confluentscripttmpdir + mkdir -p $(dirname $1) + whost=$confluent_mgr + if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then + whost="[$whost]" + fi + curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 + if [ $? != 0 ]; then echo "'$*'" failed to download; return 1; fi + confluentpython $* + retcode=$? + echo "'$*' exited with code $retcode" + cd - > /dev/null + rm -rf $confluentscripttmpdir + unset confluentscripttmpdir + return $retcode +} + +run_remote_config() { + echo + set_confluent_vars + apiclient=/opt/confluent/bin/apiclient + if [ -f /etc/confluent/apiclient ]; then + apiclient=/etc/confluent/apiclient + fi + echo '---------------------------------------------------------------------------' + echo Requesting to run remote configuration for "'$*'" from $confluent_mgr under profile $confluent_profile + confluentpython $apiclient /confluent-api/self/remoteconfig/"$*" -d {} + confluentpython $apiclient /confluent-api/self/remoteconfig/status -w 204 + echo + echo 'Completed remote configuration' + echo '---------------------------------------------------------------------------' + return +} +#If invoked as a command, use the arguments to actually run a function +(return 0 2>/dev/null) || $1 "${@:2}" diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk new file mode 100644 index 00000000..c954a254 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk @@ -0,0 +1,99 @@ +#!/usr/bin/python3 +import subprocess +import os + +class DiskInfo(object): + def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") + self.name = devname + self.wwn = None + self.path = None + self.model = '' + self.size = 0 + self.driver = '' + self.mdcontainer = '' + self.subsystype = '' + devnode = '/dev/{0}'.format(devname) + qprop = subprocess.check_output( + ['udevadm', 'info', '--query=property', devnode]) + if not isinstance(qprop, str): + qprop = qprop.decode('utf8') + for prop in qprop.split('\n'): + if '=' not in prop: + continue + k, v = prop.split('=', 1) + if k == 'DEVTYPE' and v != 'disk': + raise Exception('Not a disk') + elif k == 'DM_NAME': + raise Exception('Device Mapper') + elif k == 'ID_MODEL': + self.model = v + elif k == 'DEVPATH': + self.path = v + elif k == 'ID_WWN': + self.wwn = v + elif k == 'MD_CONTAINER': + self.mdcontainer = v + attrs = subprocess.check_output(['udevadm', 'info', '-a', devnode]) + if not isinstance(attrs, str): + attrs = attrs.decode('utf8') + for attr in attrs.split('\n'): + if '==' not in attr: + continue + k, v = attr.split('==', 1) + k = k.strip() + if k == 'ATTRS{size}': + self.size = v.replace('"', '') + elif (k == 'DRIVERS' and not self.driver + and v not in ('"sd"', '""')): + self.driver = v.replace('"', '') + elif k == 'ATTRS{subsystype}': + self.subsystype = v.replace('"', '') + if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': + raise Exception("No driver detected") + if os.path.exists('/sys/block/{0}/size'.format(self.name)): + with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: + self.size = int(sizesrc.read()) * 512 + if int(self.size) < 536870912: + raise Exception("Device too small for install") + + @property + def priority(self): + if self.model.lower() in ('m.2 nvme 2-bay raid kit', 'thinksystem_m.2_vd', 'thinksystem m.2', 'thinksystem_m.2'): + return 0 + if 'imsm' in self.mdcontainer: + return 1 + if self.driver == 'ahci': + return 2 + if self.driver.startswith('megaraid'): + return 3 + if self.driver.startswith('mpt'): + return 4 + return 99 + + def __repr__(self): + return repr({ + 'name': self.name, + 'path': self.path, + 'wwn': self.wwn, + 'driver': self.driver, + 'size': self.size, + 'model': self.model, + }) + + +def main(): + disks = [] + for disk in sorted(os.listdir('/sys/class/block')): + try: + disk = DiskInfo(disk) + disks.append(disk) + except Exception as e: + print("Skipping {0}: {1}".format(disk, str(e))) + nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + if nd: + open('/tmp/installdisk', 'w').write(nd[0]) + +if __name__ == '__main__': + main() diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py new file mode 100644 index 00000000..4a08716a --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py @@ -0,0 +1,561 @@ +#!/usr/bin/python3 +import glob +import json +import os +import re +import time +import shutil +import socket +import stat +import struct +import sys +import subprocess +import traceback + +bootuuid = None +vgname = 'localstorage' +oldvgname = None + +def convert_lv(oldlvname): + if oldvgname is None: + return None + return oldlvname.replace(oldvgname, vgname) + +def get_partname(devname, idx): + if devname[-1] in '0123456789': + return '{}p{}'.format(devname, idx) + else: + return '{}{}'.format(devname, idx) + +def get_next_part_meta(img, imgsize): + if img.tell() == imgsize: + return None + pathlen = struct.unpack('!H', img.read(2))[0] + mountpoint = img.read(pathlen).decode('utf8') + jsonlen = struct.unpack('!I', img.read(4))[0] + metadata = json.loads(img.read(jsonlen).decode('utf8')) + img.seek(16, 1) # skip the two 64-bit values we don't use, they are in json + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip filesystem type + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip orig devname (redundant with json) + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip padding + nextlen = struct.unpack('!Q', img.read(8))[0] + img.seek(nextlen, 1) # go to next section + return metadata + +def get_multipart_image_meta(img): + img.seek(0, 2) + imgsize = img.tell() + img.seek(16) + seekamt = img.read(1) + img.seek(struct.unpack('B', seekamt)[0], 1) + partinfo = get_next_part_meta(img, imgsize) + while partinfo: + yield partinfo + partinfo = get_next_part_meta(img, imgsize) + +def get_image_metadata(imgpath): + with open(imgpath, 'rb') as img: + header = img.read(16) + if header == b'\x63\x7b\x9d\x26\xb7\xfd\x48\x30\x89\xf9\x11\xcf\x18\xfd\xff\xa1': + for md in get_multipart_image_meta(img): + if md.get('device', '').startswith('/dev/zram'): + continue + yield md + else: + # plausible filesystem structure to apply to a nominally "diskless" image + yield {'mount': '/', 'filesystem': 'xfs', 'minsize': 39513563136, 'initsize': 954128662528, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/mapper/root', 'compressed_size': 27022069760} + yield {'mount': '/boot', 'filesystem': 'xfs', 'minsize': 232316928, 'initsize': 1006632960, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/nvme1n1p2', 'compressed_size': 171462656} + yield {'mount': '/boot/efi', 'filesystem': 'vfat', 'minsize': 7835648, 'initsize': 627900416, 'flags': 'rw,relatime,fmask=0077,dmask=0077,codepage=437,iocharset=ascii,shortname=winnt,errors=remount-ro', 'device': '/dev/nvme1n1p1', 'compressed_size': 1576960} + #raise Exception('Installation from single part image not supported') + +class PartedRunner(): + def __init__(self, disk): + self.disk = disk + + def run(self, command, check=True): + command = command.split() + command = ['parted', '-a', 'optimal', '-s', self.disk] + command + if check: + return subprocess.check_output(command).decode('utf8') + else: + return subprocess.run(command, stdout=subprocess.PIPE).stdout.decode('utf8') + +def fixup(rootdir, vols): + devbymount = {} + for vol in vols: + devbymount[vol['mount']] = vol['targetdisk'] + fstabfile = os.path.join(rootdir, 'etc/fstab') + if os.path.exists(fstabfile): + with open(fstabfile) as tfile: + fstab = tfile.read().split('\n') + else: + #diskless image, need to invent fstab + fstab = [ + "#ORIGFSTAB#/dev/mapper/root# / xfs defaults 0 0", + "#ORIGFSTAB#UUID=aaf9e0f9-aa4d-4d74-9e75-3537620cfe23# /boot xfs defaults 0 0", + "#ORIGFSTAB#UUID=C21D-B881# /boot/efi vfat umask=0077,shortname=winnt 0 2", + "#ORIGFSTAB#/dev/mapper/swap# none swap defaults 0 0", + ] + while not fstab[0]: + fstab = fstab[1:] + if os.path.exists(os.path.join(rootdir, '.autorelabel')): + os.unlink(os.path.join(rootdir, '.autorelabel')) + with open(fstabfile, 'w') as tfile: + for tab in fstab: + entry = tab.split() + if tab.startswith('#ORIGFSTAB#'): + if entry[1] in devbymount: + targetdev = devbymount[entry[1]] + if targetdev.startswith('/dev/{}/'.format(vgname)): + entry[0] = targetdev + else: + uuid = subprocess.check_output(['blkid', '-s', 'UUID', '-o', 'value', targetdev]).decode('utf8') + uuid = uuid.strip() + entry[0] = 'UUID={}'.format(uuid) + elif entry[2] == 'swap': + entry[0] = '/dev/mapper/{}-swap'.format(vgname.replace('-', '--')) + entry[0] = entry[0].ljust(42) + entry[1] = entry[1].ljust(16) + entry[3] = entry[3].ljust(28) + tab = '\t'.join(entry) + tfile.write(tab + '\n') + with open(os.path.join(rootdir, 'etc/hostname'), 'w') as nameout: + nameout.write(socket.gethostname() + '\n') + selinuxconfig = os.path.join(rootdir, 'etc/selinux/config') + policy = None + if os.path.exists(selinuxconfig): + with open(selinuxconfig) as cfgin: + sec = cfgin.read().split('\n') + for l in sec: + l = l.split('#', 1)[0] + if l.startswith('SELINUXTYPE='): + _, policy = l.split('=') + for sshkey in glob.glob(os.path.join(rootdir, 'etc/ssh/*_key*')): + os.unlink(sshkey) + for sshkey in glob.glob('/etc/ssh/*_key*'): + newkey = os.path.join(rootdir, sshkey[1:]) + shutil.copy2(sshkey, newkey) + finfo = os.stat(sshkey) + os.chown(newkey, finfo[stat.ST_UID], finfo[stat.ST_GID]) + for ifcfg in glob.glob(os.path.join(rootdir, 'etc/sysconfig/network-scripts/*')): + os.unlink(ifcfg) + for ifcfg in glob.glob(os.path.join(rootdir, 'etc/NetworkManager/system-connections/*')): + os.unlink(ifcfg) + for ifcfg in glob.glob('/run/NetworkManager/system-connections/*'): + newcfg = ifcfg.split('/')[-1] + newcfg = os.path.join(rootdir, 'etc/NetworkManager/system-connections/{0}'.format(newcfg)) + shutil.copy2(ifcfg, newcfg) + rootconfluentdir = os.path.join(rootdir, 'etc/confluent/') + if os.path.exists(rootconfluentdir): + shutil.rmtree(rootconfluentdir) + shutil.copytree('/etc/confluent', rootconfluentdir) + if policy: + sys.stdout.write('Applying SELinux labeling...') + sys.stdout.flush() + subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'etc')]) + subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'opt')]) + sys.stdout.write('Done\n') + sys.stdout.flush() + for metafs in ('proc', 'sys', 'dev'): + subprocess.check_call(['mount', '-o', 'bind', '/{}'.format(metafs), os.path.join(rootdir, metafs)]) + if os.path.exists(os.path.join(rootdir, 'etc/lvm/devices/system.devices')): + os.remove(os.path.join(rootdir, 'etc/lvm/devices/system.devices')) + grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') + if not os.path.exists(grubsyscfg): + grubsyscfg = os.path.join(rootdir, 'etc/default/grub') + kcmdline = os.path.join(rootdir, 'etc/kernel/cmdline') + if os.path.exists(kcmdline): + with open(kcmdline) as kcmdlinein: + kcmdlinecontent = kcmdlinein.read() + newkcmdlineent = [] + for ent in kcmdlinecontent.split(): + if ent.startswith('resume='): + newkcmdlineent.append('resume={}'.format(newswapdev)) + elif ent.startswith('root='): + newkcmdlineent.append('root={}'.format(newrootdev)) + elif ent.startswith('rd.lvm.lv='): + ent = convert_lv(ent) + if ent: + newkcmdlineent.append(ent) + else: + newkcmdlineent.append(ent) + with open(kcmdline, 'w') as kcmdlineout: + kcmdlineout.write(' '.join(newkcmdlineent) + '\n') + for loadent in glob.glob(os.path.join(rootdir, 'boot/loader/entries/*.conf')): + with open(loadent) as loadentin: + currentry = loadentin.read().split('\n') + with open(loadent, 'w') as loadentout: + for cfgline in currentry: + cfgparts = cfgline.split() + if not cfgparts or cfgparts[0] != 'options': + loadentout.write(cfgline + '\n') + continue + newcfgparts = [cfgparts[0]] + for cfgpart in cfgparts[1:]: + if cfgpart.startswith('root='): + newcfgparts.append('root={}'.format(newrootdev)) + elif cfgpart.startswith('resume='): + newcfgparts.append('resume={}'.format(newswapdev)) + elif cfgpart.startswith('rd.lvm.lv='): + cfgpart = convert_lv(cfgpart) + if cfgpart: + newcfgparts.append(cfgpart) + else: + newcfgparts.append(cfgpart) + loadentout.write(' '.join(newcfgparts) + '\n') + if os.path.exists(grubsyscfg): + with open(grubsyscfg) as defgrubin: + defgrub = defgrubin.read().split('\n') + else: + defgrub = [ + 'GRUB_TIMEOUT=5', + 'GRUB_DISTRIBUTOR="$(sed ' + "'s, release .*$,,g'" + ' /etc/system-release)"', + 'GRUB_DEFAULT=saved', + 'GRUB_DISABLE_SUBMENU=true', + 'GRUB_TERMINAL=""', + 'GRUB_SERIAL_COMMAND=""', + 'GRUB_CMDLINE_LINUX="crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"', + 'GRUB_DISABLE_RECOVERY="true"', + 'GRUB_ENABLE_BLSCFG=true', + ] + if not os.path.exists(os.path.join(rootdir, "etc/kernel/cmdline")): + with open(os.path.join(rootdir, "etc/kernel/cmdline"), "w") as cmdlineout: + cmdlineout.write("root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root") + with open(grubsyscfg, 'w') as defgrubout: + for gline in defgrub: + gline = gline.split() + newline = [] + for ent in gline: + if ent.startswith('resume='): + newline.append('resume={}'.format(newswapdev)) + elif ent.startswith('root='): + newline.append('root={}'.format(newrootdev)) + elif ent.startswith('rd.lvm.lv='): + ent = convert_lv(ent) + if ent: + newline.append(ent) + elif '""' in ent: + newline.append('""') + else: + newline.append(ent) + defgrubout.write(' '.join(newline) + '\n') + grubcfg = subprocess.check_output(['find', os.path.join(rootdir, 'boot'), '-name', 'grub.cfg']).decode('utf8').strip().replace(rootdir, '/').replace('//', '/') + grubcfg = grubcfg.split('\n') + if not grubcfg[-1]: + grubcfg = grubcfg[:-1] + if len(grubcfg) == 1: + grubcfg = grubcfg[0] + elif not grubcfg: + grubcfg = '/boot/grub2/grub.cfg' + paths = glob.glob(os.path.join(rootdir, 'boot/efi/EFI/*')) + for path in paths: + with open(os.path.join(path, 'grub.cfg'), 'w') as stubgrubout: + stubgrubout.write("search --no-floppy --root-dev-only --fs-uuid --set=dev " + bootuuid + "\nset prefix=($dev)/grub2\nexport $prefix\nconfigfile $prefix/grub.cfg\n") + else: + for gcfg in grubcfg: + rgcfg = os.path.join(rootdir, gcfg[1:]) # gcfg has a leading / to get rid of + if os.stat(rgcfg).st_size > 256: + grubcfg = gcfg + else: + with open(rgcfg, 'r') as gin: + tgrubcfg = gin.read() + tgrubcfg = tgrubcfg.split('\n') + if 'search --no-floppy --fs-uuid --set=dev' in tgrubcfg[0]: + tgrubcfg[0] = 'search --no-floppy --fs-uuid --set=dev ' + bootuuid + with open(rgcfg, 'w') as gout: + for gcline in tgrubcfg: + gout.write(gcline) + gout.write('\n') + try: + subprocess.check_call(['chroot', rootdir, 'grub2-mkconfig', '-o', grubcfg]) + except Exception as e: + print(repr(e)) + print(rootdir) + print(grubcfg) + time.sleep(86400) + newroot = None + with open('/etc/shadow') as shadowin: + shents = shadowin.read().split('\n') + for shent in shents: + shent = shent.split(':') + if not shent: + continue + if shent[0] == 'root' and shent[1] not in ('*', '!!', ''): + newroot = shent[1] + if newroot: + shlines = None + with open(os.path.join(rootdir, 'etc/shadow')) as oshadow: + shlines = oshadow.read().split('\n') + with open(os.path.join(rootdir, 'etc/shadow'), 'w') as oshadow: + for line in shlines: + if line.startswith('root:'): + line = line.split(':') + line[1] = newroot + line = ':'.join(line) + oshadow.write(line + '\n') + partnum = None + targblock = None + for vol in vols: + if vol['mount'] == '/boot/efi': + targdev = vol['targetdisk'] + partnum = re.search('(\d+)$', targdev).group(1) + targblock = re.search('(.*)\d+$', targdev).group(1) + if targblock: + if targblock.endswith('p') and 'nvme' in targblock: + targblock = targblock[:-1] + shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() + shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') + subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) + + try: + os.makedirs(os.path.join(rootdir, 'opt/confluent/bin')) + except Exception: + pass + shutil.copy2('/opt/confluent/bin/apiclient', os.path.join(rootdir, 'opt/confluent/bin/apiclient')) + #other network interfaces + + +def had_swap(): + if not os.path.exists('/etc/fstab'): + # diskless source, assume swap + return True + with open('/etc/fstab') as tabfile: + tabs = tabfile.read().split('\n') + for tab in tabs: + tab = tab.split() + if len(tab) < 3: + continue + if tab[2] == 'swap': + return True + return False + +newrootdev = None +newswapdev = None +def install_to_disk(imgpath): + global bootuuid + global newrootdev + global newswapdev + global vgname + global oldvgname + lvmvols = {} + deftotsize = 0 + mintotsize = 0 + deflvmsize = 0 + minlvmsize = 0 + biggestsize = 0 + biggestfs = None + plainvols = {} + allvols = [] + swapsize = 0 + if had_swap(): + with open('/proc/meminfo') as meminfo: + swapsize = meminfo.read().split('\n')[0] + swapsize = int(swapsize.split()[1]) + if swapsize < 2097152: + swapsize = swapsize * 2 + elif swapsize > 8388608 and swapsize < 67108864: + swapsize = swapsize * 0.5 + elif swapsize >= 67108864: + swapsize = 33554432 + swapsize = int(swapsize * 1024) + deftotsize = swapsize + mintotsize = swapsize + for fs in get_image_metadata(imgpath): + allvols.append(fs) + deftotsize += fs['initsize'] + mintotsize += fs['minsize'] + if fs['initsize'] > biggestsize: + biggestfs = fs + biggestsize = fs['initsize'] + if fs['device'].startswith('/dev/mapper'): + oldvgname = fs['device'].rsplit('/', 1)[-1] + # if node has - then /dev/mapper will double up the hypen + if '_' in oldvgname and '-' in oldvgname.split('_', 1)[-1]: + oldvgname = oldvgname.rsplit('-', 1)[0].replace('--', '-') + osname = oldvgname.split('_')[0] + nodename = socket.gethostname().split('.')[0] + vgname = '{}_{}'.format(osname, nodename) + lvmvols[fs['device'].replace('/dev/mapper/', '')] = fs + deflvmsize += fs['initsize'] + minlvmsize += fs['minsize'] + else: + plainvols[int(re.search('(\d+)$', fs['device'])[0])] = fs + with open('/tmp/installdisk') as diskin: + instdisk = diskin.read() + instdisk = '/dev/' + instdisk + parted = PartedRunner(instdisk) + dinfo = parted.run('unit s print', check=False) + dinfo = dinfo.split('\n') + sectors = 0 + sectorsize = 0 + for inf in dinfo: + if inf.startswith('Disk {0}:'.format(instdisk)): + _, sectors = inf.split(': ') + sectors = int(sectors.replace('s', '')) + if inf.startswith('Sector size (logical/physical):'): + _, sectorsize = inf.split(':') + sectorsize = sectorsize.split('/')[0] + sectorsize = sectorsize.replace('B', '') + sectorsize = int(sectorsize) + # for now, only support resizing/growing the largest partition + minexcsize = deftotsize - biggestfs['initsize'] + mintotsize = deftotsize - biggestfs['initsize'] + biggestfs['minsize'] + minsectors = mintotsize // sectorsize + if sectors < (minsectors + 65536): + raise Exception('Disk too small to fit image') + biggestsectors = sectors - (minexcsize // sectorsize) + biggestsize = sectorsize * biggestsectors + parted.run('mklabel gpt') + curroffset = 2048 + for volidx in sorted(plainvols): + vol = plainvols[volidx] + if vol is not biggestfs: + size = vol['initsize'] // sectorsize + else: + size = biggestsize // sectorsize + size += 2047 - (size % 2048) + end = curroffset + size + if end > sectors: + end = sectors + parted.run('mkpart primary {}s {}s'.format(curroffset, end)) + vol['targetdisk'] = get_partname(instdisk, volidx) + if vol['mount'] == '/': + newrootdev = vol['targetdisk'] + curroffset += size + 1 + if not lvmvols: + if swapsize: + swapsize = swapsize // sectorsize + swapsize += 2047 - (size % 2048) + end = curroffset + swapsize + if end > sectors: + end = sectors + parted.run('mkpart swap {}s {}s'.format(curroffset, end)) + newswapdev = get_partname(instdisk, volidx + 1) + subprocess.check_call(['mkswap', newswapdev]) + else: + parted.run('mkpart lvm {}s 100%'.format(curroffset)) + lvmpart = get_partname(instdisk, volidx + 1) + subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) + subprocess.check_call(['vgcreate', vgname, lvmpart]) + vginfo = subprocess.check_output(['vgdisplay', vgname, '--units', 'b']).decode('utf8') + vginfo = vginfo.split('\n') + pesize = 0 + pes = 0 + for infline in vginfo: + infline = infline.split() + if len(infline) >= 3 and infline[:2] == ['PE', 'Size']: + pesize = int(infline[2]) + if len(infline) >= 5 and infline[:2] == ['Free', 'PE']: + pes = int(infline[4]) + takeaway = swapsize // pesize + for volidx in lvmvols: + vol = lvmvols[volidx] + if vol is biggestfs: + continue + takeaway += vol['initsize'] // pesize + takeaway += 1 + biggestextents = pes - takeaway + for volidx in lvmvols: + vol = lvmvols[volidx] + if vol is biggestfs: + extents = biggestextents + else: + extents = vol['initsize'] // pesize + extents += 1 + if vol['mount'] == '/': + lvname = 'root' + + else: + lvname = vol['mount'].replace('/', '_') + subprocess.check_call(['lvcreate', '-l', '{}'.format(extents), '-y', '-n', lvname, vgname]) + vol['targetdisk'] = '/dev/{}/{}'.format(vgname, lvname) + if vol['mount'] == '/': + newrootdev = vol['targetdisk'] + if swapsize: + subprocess.check_call(['lvcreate', '-y', '-l', '{}'.format(swapsize // pesize), '-n', 'swap', vgname]) + subprocess.check_call(['mkswap', '/dev/{}/swap'.format(vgname)]) + newswapdev = '/dev/{}/swap'.format(vgname) + os.makedirs('/run/imginst/targ') + for vol in allvols: + with open(vol['targetdisk'], 'wb') as partition: + partition.write(b'\x00' * 1 * 1024 * 1024) + subprocess.check_call(['mkfs.{}'.format(vol['filesystem']), vol['targetdisk']]) + subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ']) + source = vol['mount'].replace('/', '_') + source = '/run/imginst/sources/' + source + if not os.path.exists(source): + source = '/run/imginst/sources/_' + vol['mount'] + blankfsstat = os.statvfs('/run/imginst/targ') + blankused = (blankfsstat.f_blocks - blankfsstat.f_bfree) * blankfsstat.f_bsize + sys.stdout.write('\nWriting {0}: '.format(vol['mount'])) + with subprocess.Popen(['cp', '-ax', source + '/.', '/run/imginst/targ']) as copier: + stillrunning = copier.poll() + lastprogress = 0.0 + while stillrunning is None: + currfsstat = os.statvfs('/run/imginst/targ') + currused = (currfsstat.f_blocks - currfsstat.f_bfree) * currfsstat.f_bsize + currused -= blankused + with open('/proc/meminfo') as meminf: + for line in meminf.read().split('\n'): + if line.startswith('Dirty:'): + _, dirty, _ = line.split() + dirty = int(dirty) * 1024 + progress = (currused - dirty) / vol['minsize'] + if progress < lastprogress: + progress = lastprogress + if progress > 0.99: + progress = 0.99 + lastprogress = progress + progress = progress * 100 + sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) + sys.stdout.flush() + time.sleep(0.5) + stillrunning = copier.poll() + if stillrunning != 0: + raise Exception("Error copying volume") + with subprocess.Popen(['sync']) as syncrun: + stillrunning = syncrun.poll() + while stillrunning is None: + with open('/proc/meminfo') as meminf: + for line in meminf.read().split('\n'): + if line.startswith('Dirty:'): + _, dirty, _ = line.split() + dirty = int(dirty) * 1024 + progress = (vol['minsize'] - dirty) / vol['minsize'] + if progress < lastprogress: + progress = lastprogress + if progress > 0.99: + progress = 0.99 + lastprogress = progress + progress = progress * 100 + sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) + sys.stdout.flush() + time.sleep(0.5) + stillrunning = syncrun.poll() + sys.stdout.write('\x1b[1K\rDone writing {0}'.format(vol['mount'])) + sys.stdout.write('\n') + sys.stdout.flush() + if vol['mount'] == '/boot': + tbootuuid = subprocess.check_output(['blkid', vol['targetdisk']]) + if b'UUID="' in tbootuuid: + bootuuid = tbootuuid.split(b'UUID="', 1)[1].split(b'"')[0].decode('utf8') + + + + + subprocess.check_call(['umount', '/run/imginst/targ']) + for vol in allvols: + subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ/' + vol['mount']]) + fixup('/run/imginst/targ', allvols) + + +if __name__ == '__main__': + try: + install_to_disk(os.environ['mountsrc']) + except Exception: + traceback.print_exc() + time.sleep(86400) + raise diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh new file mode 100644 index 00000000..5f4c3189 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -0,0 +1,132 @@ +. /lib/dracut-lib.sh +confluent_whost=$confluent_mgr +if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then + confluent_whost="[$confluent_mgr]" +fi +mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay +if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then + mount -t tmpfs untethered /mnt/remoteimg + curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs +else + confluent_urls="$confluent_urls https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs" + /opt/confluent/bin/urlmount $confluent_urls /mnt/remoteimg +fi +/opt/confluent/bin/confluent_imginfo /mnt/remoteimg/rootimg.sfs > /tmp/rootimg.info +loopdev=$(losetup -f) +export mountsrc=$loopdev +losetup -r $loopdev /mnt/remoteimg/rootimg.sfs +if grep '^Format: confluent_crypted' /tmp/rootimg.info > /dev/null; then + while ! curl -sf -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $(cat /etc/confluent/confluent.apikey)" https://$confluent_whost/confluent-api/self/profileprivate/pending/rootimg.key > /tmp/rootimg.key; do + echo "Unable to retrieve private key from $confluent_mgr (verify that confluent can access /var/lib/confluent/private/os/$confluent_profile/pending/rootimg.key)" + sleep 1 + done + cipher=$(head -n 1 /tmp/rootimg.key) + key=$(tail -n 1 /tmp/rootimg.key) + len=$(wc -c /mnt/remoteimg/rootimg.sfs | awk '{print $1}') + len=$(((len-4096)/512)) + dmsetup create cryptimg --table "0 $len crypt $cipher $key 0 $loopdev 8" + /opt/confluent/bin/confluent_imginfo /dev/mapper/cryptimg > /tmp/rootimg.info + mountsrc=/dev/mapper/cryptimg +fi + +if grep '^Format: squashfs' /tmp/rootimg.info > /dev/null; then + mount -o ro $mountsrc /mnt/remote +elif grep '^Format: confluent_multisquash' /tmp/rootimg.info; then + tail -n +3 /tmp/rootimg.info | awk '{gsub("/", "_"); print "echo 0 " $4 " linear '$mountsrc' " $3 " | dmsetup create mproot" $7}' > /tmp/setupmount.sh + . /tmp/setupmount.sh + cat /tmp/setupmount.sh |awk '{printf "mount /dev/mapper/"$NF" "; sub("mproot", ""); gsub("_", "/"); print "/mnt/remote"$NF}' > /tmp/mountparts.sh + . /tmp/mountparts.sh +fi + + +#mount -t tmpfs overlay /mnt/overlay +modprobe zram +memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') +memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) +echo $memtot > /sys/block/zram0/disksize +mkfs.xfs /dev/zram0 > /dev/null +mount -o discard /dev/zram0 /mnt/overlay +if [ ! -f /tmp/mountparts.sh ]; then + mkdir -p /mnt/overlay/upper /mnt/overlay/work + mount -t overlay -o upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work,lowerdir=/mnt/remote disklessroot /sysroot +else + for srcmount in $(cat /tmp/mountparts.sh | awk '{print $3}'); do + mkdir -p /mnt/overlay${srcmount}/upper /mnt/overlay${srcmount}/work + mount -t overlay -o upperdir=/mnt/overlay${srcmount}/upper,workdir=/mnt/overlay${srcmount}/work,lowerdir=${srcmount} disklesspart /sysroot${srcmount#/mnt/remote} + done +fi +mkdir -p /sysroot/etc/ssh +mkdir -p /sysroot/etc/confluent +mkdir -p /sysroot/root/.ssh +cp /root/.ssh/* /sysroot/root/.ssh +chmod 700 /sysroot/root/.ssh +cp /etc/confluent/* /sysroot/etc/confluent/ +cp /etc/ssh/*key* /sysroot/etc/ssh/ +for pubkey in /etc/ssh/ssh_host*key.pub; do + certfile=${pubkey/.pub/-cert.pub} + privfile=${pubkey%.pub} + if [ -s $certfile ]; then + echo HostCertificate $certfile >> /sysroot/etc/ssh/sshd_config + fi + echo HostKey $privfile >> /sysroot/etc/ssh/sshd_config +done + +mkdir -p /sysroot/dev /sysroot/sys /sysroot/proc /sysroot/run +if [ ! -z "$autocons" ]; then + autocons=${autocons%,*} + mkdir -p /run/systemd/generator/getty.target.wants + ln -s /usr/lib/systemd/system/serial-getty@.service /run/systemd/generator/getty.target.wants/serial-getty@${autocons}.service +fi +while [ ! -e /sysroot/sbin/init ]; do + echo "Failed to access root filesystem or it is missing /sbin/init" + echo "System should be accessible through ssh at port 2222 with the appropriate key" + while [ ! -e /sysroot/sbin/init ]; do + sleep 1 + done +done +rootpassword=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg) +rootpassword=${rootpassword#rootpassword: } +if [ "$rootpassword" = "null" ]; then + rootpassword="" +fi + +if [ ! -z "$rootpassword" ]; then + sed -i "s@root:[^:]*:@root:$rootpassword:@" /sysroot/etc/shadow +fi +for i in /ssh/*.ca; do + echo '@cert-authority *' $(cat $i) >> /sysroot/etc/ssh/ssh_known_hosts +done +echo HostbasedAuthentication yes >> /sysroot/etc/ssh/sshd_config +echo HostbasedUsesNameFromPacketOnly yes >> /sysroot/etc/ssh/sshd_config +echo IgnoreRhosts no >> /sysroot/etc/ssh/sshd_config +sshconf=/sysroot/etc/ssh/ssh_config +if [ -d /sysroot/etc/ssh/ssh_config.d/ ]; then + sshconf=/sysroot/etc/ssh/ssh_config.d/01-confluent.conf +fi +echo 'Host *' >> $sshconf +echo ' HostbasedAuthentication yes' >> $sshconf +echo ' EnableSSHKeysign yes' >> $sshconf +echo ' HostbasedKeyTypes *ed25519*' >> $sshconf +curl -sf -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $(cat /etc/confluent/confluent.apikey)" https://$confluent_whost/confluent-api/self/nodelist > /sysroot/etc/ssh/shosts.equiv +cp /sysroot/etc/ssh/shosts.equiv /sysroot/root/.shosts +chmod 640 /sysroot/etc/ssh/*_key +cp /tls/*.pem /sysroot/etc/pki/ca-trust/source/anchors/ +chroot /sysroot/ update-ca-trust +curl -sf https://$confluent_whost/confluent-public/os/$confluent_profile/scripts/onboot.service > /sysroot/etc/systemd/system/onboot.service +mkdir -p /sysroot/opt/confluent/bin +curl -sf https://$confluent_whost/confluent-public/os/$confluent_profile/scripts/onboot.sh > /sysroot/opt/confluent/bin/onboot.sh +chmod +x /sysroot/opt/confluent/bin/onboot.sh +cp /opt/confluent/bin/apiclient /sysroot/opt/confluent/bin +ln -s /etc/systemd/system/onboot.service /sysroot/etc/systemd/system/multi-user.target.wants/onboot.service +cp /etc/confluent/functions /sysroot/etc/confluent/functions +if grep installtodisk /proc/cmdline > /dev/null; then + . /etc/confluent/functions + run_remote installimage + exec reboot -f +fi +mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs +ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware +kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) +exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage new file mode 100644 index 00000000..c461173b --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage @@ -0,0 +1,49 @@ +#!/bin/bash +. /etc/confluent/functions +# the image will be used to deploy itself +# provide both access to image (for parsing metadata) +# and existing mounts of image (to take advantage of caching) +mount -o bind /sys /sysroot/sys +mount -o bind /dev /sysroot/dev +mount -o bind /proc /sysroot/proc +mount -o bind /run /sysroot/run + + +if [ ! -f /tmp/mountparts.sh ]; then + mkdir -p /sysroot/run/imginst/sources/_ + mount -o bind /mnt/remote /sysroot/run/imginst/sources/_ +else + for srcmount in $(cat /tmp/mountparts.sh | awk '{print $2}'); do + srcname=${srcmount#/dev/mapper/mproot} + srcdir=$(echo $srcmount | sed -e 's!/dev/mapper/mproot!/mnt/remote!' -e 's!_!/!g') + mkdir -p /sysroot/run/imginst/sources/$srcname + mount -o bind $srcdir /sysroot/run/imginst/sources/$srcname + done +fi +cd /sysroot/run +chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" +chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" +if [ ! -f /sysroot/tmp/installdisk ]; then + echo 'Unable to find a suitable installation target device, ssh to port 2222 to investigate' + while [ ! -f /sysroot/tmp/installdisk ]; do + sleep 1 + done +fi +lvm vgchange -a n +/sysroot/usr/sbin/wipefs -a /dev/$(cat /sysroot/tmp/installdisk) +udevadm control -e +if [ -f /sysroot/etc/lvm/devices/system.devices ]; then + rm /sysroot/etc/lvm/devices/system.devices +fi +chroot /sysroot /usr/lib/systemd/systemd-udevd --daemon +chroot /sysroot bash -c "source /etc/confluent/functions; run_remote_python image2disk.py" +echo "Port 22" >> /etc/ssh/sshd_config +echo 'Match LocalPort 22' >> /etc/ssh/sshd_config +echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config +kill -HUP $(cat /run/sshd.pid) +cp /sysroot/etc/pki/ca-trust/source/anchors/* /sysroot/run/imginst/targ/etc/pki/ca-trust/source/anchors/ +chroot /sysroot/run/imginst/targ update-ca-trust + +chroot /sysroot/run/imginst/targ bash -c "source /etc/confluent/functions; run_remote post.sh" +chroot /sysroot bash -c "umount \$(tac /proc/mounts|awk '{print \$2}'|grep ^/run/imginst/targ)" + diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom new file mode 100644 index 00000000..e69de29b diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service new file mode 100644 index 00000000..f9235033 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service @@ -0,0 +1,11 @@ +[Unit] +Description=Confluent onboot hook +Requires=network-online.target +After=network-online.target + +[Service] +ExecStart=/opt/confluent/bin/onboot.sh + +[Install] +WantedBy=multi-user.target + diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh new file mode 100644 index 00000000..80f95870 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +# This script is executed on each boot as it is +# completed. It is best to edit the middle of the file as +# noted below so custom commands are executed before +# the script notifies confluent that install is fully complete. +ntpsrvs="" +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +v4meth=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ "$v4meth" = "null" -o -z "$v4meth" ]; then + confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +if [ -z "$confluent_mgr" ]; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') +hostnamectl set-hostname $nodename + + +if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then + for ntpsrv in $(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //'); do + echo "server ${ntpsrv} iburst " >> /tmp/timeservers + done +fi + +if [ -f /tmp/timeservers ]; then + +ntpsrvs=$(cat /tmp/timeservers) + +sed -i "1,/^pool * /c\\ + +${ntpsrvs//$'\n'/\\$'\n'}" /etc/chrony.conf + + +systemctl restart chronyd + +rm -f /tmp/timeservers +fi + +export nodename confluent_mgr confluent_profile +. /etc/confluent/functions +mkdir -p /var/log/confluent +chmod 700 /var/log/confluent +exec >> /var/log/confluent/confluent-onboot.log +exec 2>> /var/log/confluent/confluent-onboot.log +chmod 600 /var/log/confluent/confluent-onboot.log +tail -f /var/log/confluent/confluent-onboot.log > /dev/console & +logshowpid=$! + +rpm --import /etc/pki/rpm-gpg/* + +run_remote_python add_local_repositories +run_remote_python syncfileclient +run_remote_python confignet + +run_remote onboot.custom +# onboot scripts may be placed into onboot.d, e.g. onboot.d/01-firstaction.sh, onboot.d/02-secondaction.sh +run_remote_parts onboot.d + +# Induce execution of remote configuration, e.g. ansible plays in ansible/onboot.d/ +run_remote_config onboot.d + +#curl -X POST -d 'status: booted' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus +kill $logshowpid diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh new file mode 100644 index 00000000..914a12c3 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +# This script is executed 'chrooted' into a cloned disk target before rebooting +# +if [ -f /etc/dracut.conf.d/diskless.conf ]; then + rm /etc/dracut.conf.d/diskless.conf +fi +for kver in /lib/modules/*; do kver=$(basename $kver); kernel-install add $kver /boot/vmlinuz-$kver; done +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi +export nodename confluent_mgr confluent_profile confluent_websrv +. /etc/confluent/functions +run_remote setupssh +mkdir -p /var/log/confluent +chmod 700 /var/log/confluent +exec >> /var/log/confluent/confluent-post.log +exec 2>> /var/log/confluent/confluent-post.log +chmod 600 /var/log/confluent/confluent-post.log +tail -f /var/log/confluent/confluent-post.log > /dev/console & +logshowpid=$! +curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service +mkdir -p /opt/confluent/bin +curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh +chmod +x /opt/confluent/bin/firstboot.sh +systemctl enable firstboot +selinuxpolicy=$(grep ^SELINUXTYPE /etc/selinux/config |awk -F= '{print $2}') +if [ ! -z "$selinuxpolicy" ]; then + setfiles /etc/selinux/${selinuxpolicy}/contexts/files/file_contexts /etc/ +fi +run_remote_python syncfileclient +run_remote post.custom +# post scripts may be placed into post.d, e.g. post.d/01-firstaction.sh, post.d/02-secondaction.sh +run_remote_parts post.d + +# Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ +run_remote_config post.d + +# rebuild initrd, pick up new drivers if needed +dracut -f /boot/initramfs-$(uname -r).img $(uname -r) + +curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus + +kill $logshowpid + diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient new file mode 100644 index 00000000..5f2efc5e --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient @@ -0,0 +1,307 @@ +#!/usr/bin/python3 +import random +import time +import subprocess +import importlib +import tempfile +import json +import os +import shutil +import pwd +import grp +import sys +from importlib.machinery import SourceFileLoader +try: + apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() +except FileNotFoundError: + apiclient = SourceFileLoader('apiclient', '/etc/confluent/apiclient').load_module() + + +def partitionhostsline(line): + comment = '' + try: + cmdidx = line.index('#') + comment = line[cmdidx:] + line = line[:cmdidx].strip() + except ValueError: + pass + if not line: + return '', [], comment + ipaddr, names = line.split(maxsplit=1) + names = names.split() + return ipaddr, names, comment + +class HostMerger(object): + def __init__(self): + self.byip = {} + self.byname = {} + self.sourcelines = [] + self.targlines = [] + + def read_source(self, sourcefile): + with open(sourcefile, 'r') as hfile: + self.sourcelines = hfile.read().split('\n') + while not self.sourcelines[-1]: + self.sourcelines = self.sourcelines[:-1] + for x in range(len(self.sourcelines)): + line = self.sourcelines[x] + currip, names, comment = partitionhostsline(line) + if currip: + self.byip[currip] = x + for name in names: + self.byname[name] = x + + def read_target(self, targetfile): + with open(targetfile, 'r') as hfile: + lines = hfile.read().split('\n') + if not lines[-1]: + lines = lines[:-1] + for y in range(len(lines)): + line = lines[y] + currip, names, comment = partitionhostsline(line) + if currip in self.byip: + x = self.byip[currip] + if self.sourcelines[x] is None: + # have already consumed this enntry + continue + self.targlines.append(self.sourcelines[x]) + self.sourcelines[x] = None + continue + for name in names: + if name in self.byname: + x = self.byname[name] + if self.sourcelines[x] is None: + break + self.targlines.append(self.sourcelines[x]) + self.sourcelines[x] = None + break + else: + self.targlines.append(line) + + def write_out(self, targetfile): + while not self.targlines[-1]: + self.targlines = self.targlines[:-1] + if not self.targlines: + break + while not self.sourcelines[-1]: + self.sourcelines = self.sourcelines[:-1] + if not self.sourcelines: + break + with open(targetfile, 'w') as hosts: + for line in self.targlines: + hosts.write(line + '\n') + for line in self.sourcelines: + if line is not None: + hosts.write(line + '\n') + + +class CredMerger: + def __init__(self): + try: + with open('/etc/login.defs', 'r') as ldefs: + defs = ldefs.read().split('\n') + except FileNotFoundError: + defs = [] + lkup = {} + self.discardnames = {} + self.shadowednames = {} + for line in defs: + try: + line = line[:line.index('#')] + except ValueError: + pass + keyval = line.split() + if len(keyval) < 2: + continue + lkup[keyval[0]] = keyval[1] + self.uidmin = int(lkup.get('UID_MIN', 1000)) + self.uidmax = int(lkup.get('UID_MAX', 60000)) + self.gidmin = int(lkup.get('GID_MIN', 1000)) + self.gidmax = int(lkup.get('GID_MAX', 60000)) + self.shadowlines = None + + def read_passwd(self, source, targfile=False): + self.read_generic(source, self.uidmin, self.uidmax, targfile) + + def read_group(self, source, targfile=False): + self.read_generic(source, self.gidmin, self.gidmax, targfile) + + def read_generic(self, source, minid, maxid, targfile): + if targfile: + self.targdata = [] + else: + self.sourcedata = [] + with open(source, 'r') as inputfile: + for line in inputfile.read().split('\n'): + try: + name, _, uid, _ = line.split(':', 3) + uid = int(uid) + except ValueError: + continue + if targfile: + if uid < minid or uid > maxid: + self.targdata.append(line) + else: + self.discardnames[name] = 1 + else: + if name[0] in ('+', '#', '@'): + self.sourcedata.append(line) + elif uid >= minid and uid <= maxid: + self.sourcedata.append(line) + + def read_shadow(self, source): + self.shadowlines = [] + try: + with open(source, 'r') as inshadow: + for line in inshadow.read().split('\n'): + try: + name, _ = line.split(':' , 1) + except ValueError: + continue + if name in self.discardnames: + continue + self.shadowednames[name] = 1 + self.shadowlines.append(line) + except FileNotFoundError: + return + + def write_out(self, outfile): + with open(outfile, 'w') as targ: + for line in self.targdata: + targ.write(line + '\n') + for line in self.sourcedata: + targ.write(line + '\n') + if outfile == '/etc/passwd': + if self.shadowlines is None: + self.read_shadow('/etc/shadow') + with open('/etc/shadow', 'w') as shadout: + for line in self.shadowlines: + shadout.write(line + '\n') + for line in self.sourcedata: + name, _ = line.split(':', 1) + if name[0] in ('+', '#', '@'): + continue + if name in self.shadowednames: + continue + shadout.write(name + ':!:::::::\n') + if outfile == '/etc/group': + if self.shadowlines is None: + self.read_shadow('/etc/gshadow') + with open('/etc/gshadow', 'w') as shadout: + for line in self.shadowlines: + shadout.write(line + '\n') + for line in self.sourcedata: + name, _ = line.split(':' , 1) + if name in self.shadowednames: + continue + shadout.write(name + ':!::\n') + +def appendonce(basepath, filename): + with open(filename, 'rb') as filehdl: + thedata = filehdl.read() + targname = filename.replace(basepath, '') + try: + with open(targname, 'rb') as filehdl: + targdata = filehdl.read() + except IOError: + targdata = b'' + if thedata in targdata: + return + with open(targname, 'ab') as targhdl: + targhdl.write(thedata) + +def synchronize(): + tmpdir = tempfile.mkdtemp() + appendoncedir = tempfile.mkdtemp() + try: + ac = apiclient.HTTPSClient() + myips = [] + ipaddrs = subprocess.check_output(['ip', '-br', 'a']).split(b'\n') + for line in ipaddrs: + isa = line.split() + if len(isa) < 3 or isa[1] != b'UP': + continue + for addr in isa[2:]: + if addr.startswith(b'fe80::') or addr.startswith(b'169.254'): + continue + addr = addr.split(b'/')[0] + if not isinstance(addr, str): + addr = addr.decode('utf8') + myips.append(addr) + data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) + status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() + return status + if status == 202: + lastrsp = '' + while status != 204: + time.sleep(1+(2*random.random())) + status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') + if not isinstance(rsp, str): + rsp = rsp.decode('utf8') + if status == 200: + lastrsp = rsp + pendpasswd = os.path.join(tmpdir, 'etc/passwd') + if os.path.exists(pendpasswd): + cm = CredMerger() + cm.read_passwd(pendpasswd, targfile=False) + cm.read_passwd('/etc/passwd', targfile=True) + cm.write_out('/etc/passwd') + pendgroup = os.path.join(tmpdir, 'etc/group') + if os.path.exists(pendgroup): + cm = CredMerger() + cm.read_group(pendgroup, targfile=False) + cm.read_group('/etc/group', targfile=True) + cm.write_out('/etc/group') + pendhosts = os.path.join(tmpdir, 'etc/hosts') + if os.path.exists(pendhosts): + cm = HostMerger() + cm.read_source(pendhosts) + cm.read_target('/etc/hosts') + cm.write_out('/etc/hosts') + for dirn in os.walk(appendoncedir): + for filen in dirn[2]: + appendonce(appendoncedir, os.path.join(dirn[0], filen)) + if lastrsp: + lastrsp = json.loads(lastrsp) + opts = lastrsp.get('options', {}) + for fname in opts: + uid = -1 + gid = -1 + for opt in opts[fname]: + if opt == 'owner': + try: + uid = pwd.getpwnam(opts[fname][opt]['name']).pw_uid + except KeyError: + uid = opts[fname][opt]['id'] + elif opt == 'group': + try: + gid = grp.getgrnam(opts[fname][opt]['name']).gr_gid + except KeyError: + gid = opts[fname][opt]['id'] + elif opt == 'permissions': + os.chmod(fname, int(opts[fname][opt], 8)) + if uid != -1 or gid != -1: + os.chown(fname, uid, gid) + return status + finally: + shutil.rmtree(tmpdir) + shutil.rmtree(appendoncedir) + + +if __name__ == '__main__': + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/imgutil/el10/dracut/install b/imgutil/el10/dracut/install new file mode 100644 index 00000000..9c2cd68c --- /dev/null +++ b/imgutil/el10/dracut/install @@ -0,0 +1,35 @@ +dracut_install mktemp +dracut_install /lib64/libtss2-tcti-device.so.0 +dracut_install tpm2_create tpm2_pcrread tpm2_createpolicy tpm2_createprimary +dracut_install tpm2_load tpm2_unseal tpm2_getcap tpm2_evictcontrol +dracut_install tpm2_pcrextend tpm2_policypcr tpm2_flushcontext tpm2_startauthsession +dracut_install curl openssl tar cpio gzip lsmod ethtool xz lsmod ethtool +dracut_install modprobe touch echo cut wc bash uniq grep ip hostname +dracut_install awk egrep dirname expr sort +dracut_install ssh sshd reboot parted mkfs mkfs.ext4 mkfs.xfs xfs_db mkswap +dracut_install efibootmgr uuidgen +dracut_install du df ssh-keygen scp +dracut_install /lib64/libnss_dns* +dracut_install /usr/lib64/libnl-3.so.200 +dracut_install /etc/nsswitch.conf /etc/services /etc/protocols +dracut_install chmod whoami head tail basename tr +dracut_install /usr/sbin/arping dhcpcd logger hostnamectl +inst /bin/bash /bin/sh +dracut_install /lib64/libfuse.so.2 +dracut_install chown chroot dd expr kill parted rsync sort blockdev findfs insmod lvm +dracut_install /usr/lib/udev/rules.d/10-dm.rules /usr/sbin/dmsetup /usr/lib/udev/rules.d/95-dm-notify.rules +dracut_install /usr/lib/udev/rules.d/60-net.rules /lib/udev/rename_device /usr/lib/systemd/network/99-default.link +dracut_install /lib64/libpthread.so.0 +dracut_install losetup # multipart support + +#this would be nfs with lock, but not needed, go nolock +dracut_install mount.nfs rpcbind rpc.statd /etc/netconfig sm-notify +dracut_install mount.nfs /etc/netconfig +inst /usr/lib/dracut/modules.d/45net-lib/net-lib.sh /lib/net-lib.sh + + + +# network mount, and disk imaging helpers can come from a second stage +# this is narrowly focused on getting network up and fetching images +# and those images may opt to do something with cloning or whatever + diff --git a/imgutil/el10/dracut/installkernel b/imgutil/el10/dracut/installkernel new file mode 100644 index 00000000..cb62b510 --- /dev/null +++ b/imgutil/el10/dracut/installkernel @@ -0,0 +1,14 @@ +#!/bin/bash +instmods nfsv3 nfs_acl nfsv4 dns_resolver lockd fscache sunrpc +instmods e1000 e1000e igb sfc mlx5_ib mlx5_core mlx4_en cxgb3 cxgb4 tg3 bnx2 bnx2x bna ixgb ixgbe qlge mptsas mpt2sas mpt3sas megaraid_sas ahci xhci-hcd sd_mod pmcraid be2net vfat ext3 ext4 usb_storage scsi_wait_scan ipmi_si ipmi_devintf qlcnic xfs +instmods nvme +instmods cdc_ether r8152 +instmods r8169 +instmods vmxnet3 virtio_net +instmods virtio_scsi vmw_pvscsi +instmods mptctl +instmods mlx4_ib mlx5_ub ib_umad ib_ipoib +instmods ice i40e hfi1 bnxt_en qed qede +instmods dm-mod dm-log raid0 raid1 raid10 raid456 dm-raid dm-thin-pool dm-crypt dm-snapshot linear dm-era +# nfs root and optionally gocryptfs +instmods fuse overlay squashfs loop zram diff --git a/imgutil/el10/dracut/module-setup.sh b/imgutil/el10/dracut/module-setup.sh new file mode 100644 index 00000000..63b288c6 --- /dev/null +++ b/imgutil/el10/dracut/module-setup.sh @@ -0,0 +1,10 @@ +check() { + return 0 +} +install() { + . $moddir/install +} + +installkernel() { + . $moddir/installkernel +} diff --git a/imgutil/el10/pkglist b/imgutil/el10/pkglist new file mode 100644 index 00000000..56acdd23 --- /dev/null +++ b/imgutil/el10/pkglist @@ -0,0 +1,22 @@ +dnf +hostname +irqbalance +less +sssd-client +NetworkManager +nfs-utils +numactl-libs +passwd +rootfiles +sudo +tuned +yum +initscripts +tpm2-tools +xfsprogs +e2fsprogs +fuse-libs +libnl3 +dhcpcd +openssh-keysign +chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr shim-x64.x86_64 grub2-efi-x64 attr diff --git a/imgutil/el10/pkglist.aarch64 b/imgutil/el10/pkglist.aarch64 new file mode 100644 index 00000000..0d23e958 --- /dev/null +++ b/imgutil/el10/pkglist.aarch64 @@ -0,0 +1,22 @@ +dnf +hostname +irqbalance +less +sssd-client +NetworkManager +nfs-utils +numactl-libs +passwd +rootfiles +sudo +tuned +yum +initscripts +tpm2-tools +xfsprogs +e2fsprogs +fuse-libs +libnl3 +dhcpcd +openssh-keysign +chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr shim-aa64 grub2-efi-aa64 attr diff --git a/imgutil/imgutil b/imgutil/imgutil index 019cfa4f..f5687655 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -1095,7 +1095,7 @@ def fingerprint_source_suse(files, sourcepath, args): def fingerprint_source_el(files, sourcepath, args): for filen in files: - if '-release-8' in filen or '-release-7' in filen or '-release-9' in filen: + if '-release-10' in filen or '-release-8' in filen or '-release-7' in filen or '-release-9' in filen: parts = filen.split('-') osname = '_'.join(parts[:-3]) if osname == 'centos_linux': @@ -1128,7 +1128,7 @@ def fingerprint_host_el(args, hostpath='/'): ts = rpm.TransactionSet(hostpath) rpms = ts.dbMatch('provides', 'system-release') for inf in rpms: - if 'el8' not in inf.release and 'el7' not in inf.release and 'el9' not in inf.release: + if 'el8' not in inf.release and 'el7' not in inf.release and 'el9' not in inf.release 'el10' not in inf.release: continue osname = inf.name version = inf.version @@ -1152,7 +1152,7 @@ def fingerprint_host_el(args, hostpath='/'): version = v except (subprocess.SubprocessError, FileNotFoundError): return None - if 'el8' not in release and 'el7' not in release and 'el9' not in release: + if 'el8' not in release and 'el7' not in release and 'el9' not in release and 'el10' not in release: return None osname = osname.replace('-release', '').replace('-', '_') if osname == 'centos_linux': From bb7e0d1d1ef28926ffa74205e9175a24be9d4f7b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:27:40 -0400 Subject: [PATCH 220/413] Correct mistake in the previous commit --- imgutil/imgutil | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index f5687655..6ecc7295 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -1128,7 +1128,7 @@ def fingerprint_host_el(args, hostpath='/'): ts = rpm.TransactionSet(hostpath) rpms = ts.dbMatch('provides', 'system-release') for inf in rpms: - if 'el8' not in inf.release and 'el7' not in inf.release and 'el9' not in inf.release 'el10' not in inf.release: + if 'el8' not in inf.release and 'el7' not in inf.release and 'el9' not in inf.release and 'el10' not in inf.release: continue osname = inf.name version = inf.version From b74732ecfa0fb8885be01a4a9812bfa06db082a3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:30:49 -0400 Subject: [PATCH 221/413] Change to symlinks for most of el10 diskless Most is common with el9 --- .../default/scripts/add_local_repositories | 59 +- .../profiles/default/scripts/firstboot.custom | 5 +- .../default/scripts/firstboot.service | 12 +- .../profiles/default/scripts/firstboot.sh | 50 +- .../profiles/default/scripts/functions | 210 +------ .../profiles/default/scripts/getinstalldisk | 100 +--- .../profiles/default/scripts/image2disk.py | 562 +----------------- .../profiles/default/scripts/installimage | 50 +- .../profiles/default/scripts/onboot.custom | 1 + .../profiles/default/scripts/onboot.service | 12 +- .../profiles/default/scripts/onboot.sh | 67 +-- .../profiles/default/scripts/post.sh | 54 +- .../profiles/default/scripts/syncfileclient | 308 +--------- 13 files changed, 13 insertions(+), 1477 deletions(-) mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/functions mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh mode change 100644 => 120000 confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories deleted file mode 100644 index fb26d5ef..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories +++ /dev/null @@ -1,58 +0,0 @@ -try: - import configparser -except ImportError: - import ConfigParser as configparser - import cStringIO -import importlib.util -import importlib.machinery -import sys -modloader = importlib.machinery.SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient') -modspec = importlib.util.spec_from_file_location('apiclient', '/opt/confluent/bin/apiclient', loader=modloader) -apiclient = importlib.util.module_from_spec(modspec) -modspec.loader.exec_module(apiclient) -repo = None -server = None -v4cfg = None -server4 = None -server6 = None -profile = None -with open('/etc/confluent/confluent.deploycfg') as dplcfgfile: - lines = dplcfgfile.read().split('\n') - for line in lines: - if line.startswith('deploy_server:'): - _, server4 = line.split(' ', 1) - if line.startswith('deploy_server_v6:'): - _, server6 = line.split(' ', 1) - if line.startswith('profile: '): - _, profile = line.split(' ', 1) - if line.startswith('ipv4_method: '): - _, v4cfg = line.split(' ', 1) -if v4cfg == 'static' or v4cfg =='dhcp': - server = server4 -if not server: - server = '[{}]'.format(server6) - -path = '/confluent-public/os/{0}/distribution/'.format(profile) -clnt = apiclient.HTTPSClient() -cfgdata = clnt.grab_url(path + '.treeinfo').decode() -c = configparser.ConfigParser() -try: - c.read_string(cfgdata) -except AttributeError: - f = cStringIO.StringIO(cfgdata) - c.readfp(f) -for sec in c.sections(): - if sec.startswith('variant-'): - try: - repopath = c.get(sec, 'repository') - except Exception: - continue - _, varname = sec.split('-', 1) - reponame = '/etc/yum.repos.d/local-{0}.repo'.format(varname.lower()) - with open(reponame, 'w') as repout: - repout.write('[local-{0}]\n'.format(varname.lower())) - repout.write('name=Local install repository for {0}\n'.format(varname)) - if repopath[0] == '.': - repopath = repopath[1:] - repout.write('baseurl=https://{}/confluent-public/os/{}/distribution/{}\n'.format(server, profile, repopath)) - repout.write('enabled=1\n') diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories b/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories new file mode 120000 index 00000000..9363a791 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/add_local_repositories @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/add_local_repositories \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom deleted file mode 100644 index eea34051..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom +++ /dev/null @@ -1,4 +0,0 @@ -. /etc/confluent/functions -# This is a convenient place to keep customizations separate from modifying the stock scripts -# While modification of the stock scripts is fine, it may be easier to rebase to a newer -# stock profile if the '.custom' files are used. diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom new file mode 120000 index 00000000..4c873444 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.custom @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/firstboot.custom \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service deleted file mode 100644 index 209a95e6..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=First Boot Process -Requires=network-online.target -After=network-online.target - -[Service] -ExecStart=/opt/confluent/bin/firstboot.sh - -[Install] -WantedBy=multi-user.target - diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service new file mode 120000 index 00000000..7c756923 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.service @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/firstboot.service \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh deleted file mode 100644 index fabb9385..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/sh - -# This script is executed on the first boot after install has -# completed. It is best to edit the middle of the file as -# noted below so custom commands are executed before -# the script notifies confluent that install is fully complete. - -HOME=$(getent passwd $(whoami)|cut -d: -f 6) -export HOME -nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') -confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') -if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then - confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') -fi -confluent_websrv=$confluent_mgr -if [[ "$confluent_mgr" == *:* ]]; then - confluent_websrv="[$confluent_mgr]" -fi -confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -export nodename confluent_mgr confluent_profile confluent_websrv -. /etc/confluent/functions -( -exec >> /var/log/confluent/confluent-firstboot.log -exec 2>> /var/log/confluent/confluent-firstboot.log -chmod 600 /var/log/confluent/confluent-firstboot.log -while ! ping -c 1 $confluent_mgr >& /dev/null; do - sleep 1 -done - -if [ ! -f /etc/confluent/firstboot.ran ]; then - touch /etc/confluent/firstboot.ran - - cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt - - run_remote firstboot.custom - # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh - run_remote_parts firstboot.d - - # Induce execution of remote configuration, e.g. ansible plays in ansible/firstboot.d/ - run_remote_config firstboot.d -fi - -curl -X POST -d 'status: complete' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus -systemctl disable firstboot -rm /etc/systemd/system/firstboot.service -rm /etc/confluent/firstboot.ran -) & -tail --pid $! -F /var/log/confluent/confluent-firstboot.log > /dev/console diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh new file mode 120000 index 00000000..5d76a4b3 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/firstboot.sh @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/firstboot.sh \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions deleted file mode 100644 index f68f3a5e..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions +++ /dev/null @@ -1,209 +0,0 @@ -#!/bin/bash -function test_mgr() { - whost=$1 - if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then - whost="[$whost]" - fi - if curl -gs https://${whost}/confluent-api/ > /dev/null; then - return 0 - fi - return 1 -} - -function confluentpython() { - if [ -x /usr/libexec/platform-python ]; then - /usr/libexec/platform-python $* - elif [ -x /usr/bin/python3 ]; then - /usr/bin/python3 $* - elif [ -x /usr/bin/python ]; then - /usr/bin/python $* - elif [ -x /usr/bin/python2 ]; then - /usr/bin/python2 $* - fi -} - -function set_confluent_vars() { - if [ -z "$nodename" ]; then - nodename=$(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') - fi - if [[ "$confluent_mgr" == *"%"* ]]; then - confluent_mgr="" - fi - if [ -z "$confluent_mgr" ]; then - confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') - if ! test_mgr $confluent_mgr; then - confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') - if [[ "$confluent_mgr" = *":"* ]]; then - confluent_mgr="[$confluent_mgr]" - fi - fi - if ! test_mgr $confluent_mgr; then - BESTMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|1$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') - OKMGRS=$(grep ^EXTMGRINFO: /etc/confluent/confluent.info | grep '|0$' | sed -e 's/EXTMGRINFO: //' -e 's/|.*//') - for confluent_mgr in $BESTMGRS $OKMGRS; do - if [[ $confluent_mgr == *":"* ]]; then - confluent_mgr="[$confluent_mgr]" - fi - if test_mgr $confluent_mgr; then - break - fi - done - fi - fi - if [ -z "$confluent_profile" ]; then - confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg | sed -e 's/[^ ]*: //') - fi - export confluent_profile confluent_mgr nodename -} - -fetch_remote() { - curlargs="" - if [ -f /etc/confluent/ca.pem ]; then - curlargs=" --cacert /etc/confluent/ca.pem" - fi - set_confluent_vars - mkdir -p $(dirname $1) - whost=$confluent_mgr - if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then - whost="[$whost]" - fi - curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 - if [ $? != 0 ]; then echo $1 failed to download; return 1; fi -} - -source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - apiclient=/opt/confluent/bin/apiclient - if [ -f /etc/confluent/apiclient ]; then - apiclient=/etc/confluent/apiclient - fi - scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') - for script in $scriptlist; do - source_remote $1/$script - done - rm -rf $confluentscripttmpdir - unset confluentscripttmpdir -} - -run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - apiclient=/opt/confluent/bin/apiclient - if [ -f /etc/confluent/apiclient ]; then - apiclient=/etc/confluent/apiclient - fi - scriptlist=$(confluentpython $apiclient /confluent-api/self/scriptlist/$1|sed -e 's/^- //') - for script in $scriptlist; do - run_remote $1/$script - done - rm -rf $confluentscripttmpdir - unset confluentscripttmpdir -} - -source_remote() { - set_confluent_vars - unsettmpdir=0 - echo - echo '---------------------------------------------------------------------------' - echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi - echo Sourcing from $confluentscripttmpdir - cd $confluentscripttmpdir - fetch_remote $1 - if [ $? != 0 ]; then echo $1 failed to download; return 1; fi - chmod +x $1 - cmd=$1 - shift - source ./$cmd - cd - > /dev/null - if [ "$unsettmpdir" = 1 ]; then - rm -rf $confluentscripttmpdir - unset confluentscripttmpdir - unsettmpdir=0 - fi - rm -rf $confluentscripttmpdir - return $retcode -} - -run_remote() { - requestedcmd="'$*'" - unsettmpdir=0 - set_confluent_vars - echo - echo '---------------------------------------------------------------------------' - echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi - echo Executing in $confluentscripttmpdir - cd $confluentscripttmpdir - fetch_remote $1 - if [ $? != 0 ]; then echo $requestedcmd failed to download; return 1; fi - chmod +x $1 - cmd=$1 - if [ -x /usr/bin/chcon ]; then - chcon system_u:object_r:bin_t:s0 $cmd - fi - shift - ./$cmd $* - retcode=$? - if [ $retcode -ne 0 ]; then - echo "$requestedcmd exited with code $retcode" - fi - cd - > /dev/null - if [ "$unsettmpdir" = 1 ]; then - rm -rf $confluentscripttmpdir - unset confluentscripttmpdir - unsettmpdir=0 - fi - return $retcode -} - -run_remote_python() { - echo - set_confluent_vars - if [ -f /etc/confluent/ca.pem ]; then - curlargs=" --cacert /etc/confluent/ca.pem" - fi - echo '---------------------------------------------------------------------------' - echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - echo Executing in $confluentscripttmpdir - cd $confluentscripttmpdir - mkdir -p $(dirname $1) - whost=$confluent_mgr - if [[ "$whost" == *:* ]] && [[ "$whost" != *[* ]] ; then - whost="[$whost]" - fi - curl -gf -sS $curlargs https://$whost/confluent-public/os/$confluent_profile/scripts/$1 > $1 - if [ $? != 0 ]; then echo "'$*'" failed to download; return 1; fi - confluentpython $* - retcode=$? - echo "'$*' exited with code $retcode" - cd - > /dev/null - rm -rf $confluentscripttmpdir - unset confluentscripttmpdir - return $retcode -} - -run_remote_config() { - echo - set_confluent_vars - apiclient=/opt/confluent/bin/apiclient - if [ -f /etc/confluent/apiclient ]; then - apiclient=/etc/confluent/apiclient - fi - echo '---------------------------------------------------------------------------' - echo Requesting to run remote configuration for "'$*'" from $confluent_mgr under profile $confluent_profile - confluentpython $apiclient /confluent-api/self/remoteconfig/"$*" -d {} - confluentpython $apiclient /confluent-api/self/remoteconfig/status -w 204 - echo - echo 'Completed remote configuration' - echo '---------------------------------------------------------------------------' - return -} -#If invoked as a command, use the arguments to actually run a function -(return 0 2>/dev/null) || $1 "${@:2}" diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions new file mode 120000 index 00000000..b20bb48f --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/functions @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/functions \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk deleted file mode 100644 index c954a254..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/python3 -import subprocess -import os - -class DiskInfo(object): - def __init__(self, devname): - if devname.startswith('nvme') and 'c' in devname: - raise Exception("Skipping multipath devname") - self.name = devname - self.wwn = None - self.path = None - self.model = '' - self.size = 0 - self.driver = '' - self.mdcontainer = '' - self.subsystype = '' - devnode = '/dev/{0}'.format(devname) - qprop = subprocess.check_output( - ['udevadm', 'info', '--query=property', devnode]) - if not isinstance(qprop, str): - qprop = qprop.decode('utf8') - for prop in qprop.split('\n'): - if '=' not in prop: - continue - k, v = prop.split('=', 1) - if k == 'DEVTYPE' and v != 'disk': - raise Exception('Not a disk') - elif k == 'DM_NAME': - raise Exception('Device Mapper') - elif k == 'ID_MODEL': - self.model = v - elif k == 'DEVPATH': - self.path = v - elif k == 'ID_WWN': - self.wwn = v - elif k == 'MD_CONTAINER': - self.mdcontainer = v - attrs = subprocess.check_output(['udevadm', 'info', '-a', devnode]) - if not isinstance(attrs, str): - attrs = attrs.decode('utf8') - for attr in attrs.split('\n'): - if '==' not in attr: - continue - k, v = attr.split('==', 1) - k = k.strip() - if k == 'ATTRS{size}': - self.size = v.replace('"', '') - elif (k == 'DRIVERS' and not self.driver - and v not in ('"sd"', '""')): - self.driver = v.replace('"', '') - elif k == 'ATTRS{subsystype}': - self.subsystype = v.replace('"', '') - if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': - raise Exception("No driver detected") - if os.path.exists('/sys/block/{0}/size'.format(self.name)): - with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: - self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") - - @property - def priority(self): - if self.model.lower() in ('m.2 nvme 2-bay raid kit', 'thinksystem_m.2_vd', 'thinksystem m.2', 'thinksystem_m.2'): - return 0 - if 'imsm' in self.mdcontainer: - return 1 - if self.driver == 'ahci': - return 2 - if self.driver.startswith('megaraid'): - return 3 - if self.driver.startswith('mpt'): - return 4 - return 99 - - def __repr__(self): - return repr({ - 'name': self.name, - 'path': self.path, - 'wwn': self.wwn, - 'driver': self.driver, - 'size': self.size, - 'model': self.model, - }) - - -def main(): - disks = [] - for disk in sorted(os.listdir('/sys/class/block')): - try: - disk = DiskInfo(disk) - disks.append(disk) - except Exception as e: - print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] - if nd: - open('/tmp/installdisk', 'w').write(nd[0]) - -if __name__ == '__main__': - main() diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk new file mode 120000 index 00000000..0b399a6f --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/getinstalldisk @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/getinstalldisk \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py deleted file mode 100644 index 4a08716a..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py +++ /dev/null @@ -1,561 +0,0 @@ -#!/usr/bin/python3 -import glob -import json -import os -import re -import time -import shutil -import socket -import stat -import struct -import sys -import subprocess -import traceback - -bootuuid = None -vgname = 'localstorage' -oldvgname = None - -def convert_lv(oldlvname): - if oldvgname is None: - return None - return oldlvname.replace(oldvgname, vgname) - -def get_partname(devname, idx): - if devname[-1] in '0123456789': - return '{}p{}'.format(devname, idx) - else: - return '{}{}'.format(devname, idx) - -def get_next_part_meta(img, imgsize): - if img.tell() == imgsize: - return None - pathlen = struct.unpack('!H', img.read(2))[0] - mountpoint = img.read(pathlen).decode('utf8') - jsonlen = struct.unpack('!I', img.read(4))[0] - metadata = json.loads(img.read(jsonlen).decode('utf8')) - img.seek(16, 1) # skip the two 64-bit values we don't use, they are in json - nextlen = struct.unpack('!H', img.read(2))[0] - img.seek(nextlen, 1) # skip filesystem type - nextlen = struct.unpack('!H', img.read(2))[0] - img.seek(nextlen, 1) # skip orig devname (redundant with json) - nextlen = struct.unpack('!H', img.read(2))[0] - img.seek(nextlen, 1) # skip padding - nextlen = struct.unpack('!Q', img.read(8))[0] - img.seek(nextlen, 1) # go to next section - return metadata - -def get_multipart_image_meta(img): - img.seek(0, 2) - imgsize = img.tell() - img.seek(16) - seekamt = img.read(1) - img.seek(struct.unpack('B', seekamt)[0], 1) - partinfo = get_next_part_meta(img, imgsize) - while partinfo: - yield partinfo - partinfo = get_next_part_meta(img, imgsize) - -def get_image_metadata(imgpath): - with open(imgpath, 'rb') as img: - header = img.read(16) - if header == b'\x63\x7b\x9d\x26\xb7\xfd\x48\x30\x89\xf9\x11\xcf\x18\xfd\xff\xa1': - for md in get_multipart_image_meta(img): - if md.get('device', '').startswith('/dev/zram'): - continue - yield md - else: - # plausible filesystem structure to apply to a nominally "diskless" image - yield {'mount': '/', 'filesystem': 'xfs', 'minsize': 39513563136, 'initsize': 954128662528, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/mapper/root', 'compressed_size': 27022069760} - yield {'mount': '/boot', 'filesystem': 'xfs', 'minsize': 232316928, 'initsize': 1006632960, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/nvme1n1p2', 'compressed_size': 171462656} - yield {'mount': '/boot/efi', 'filesystem': 'vfat', 'minsize': 7835648, 'initsize': 627900416, 'flags': 'rw,relatime,fmask=0077,dmask=0077,codepage=437,iocharset=ascii,shortname=winnt,errors=remount-ro', 'device': '/dev/nvme1n1p1', 'compressed_size': 1576960} - #raise Exception('Installation from single part image not supported') - -class PartedRunner(): - def __init__(self, disk): - self.disk = disk - - def run(self, command, check=True): - command = command.split() - command = ['parted', '-a', 'optimal', '-s', self.disk] + command - if check: - return subprocess.check_output(command).decode('utf8') - else: - return subprocess.run(command, stdout=subprocess.PIPE).stdout.decode('utf8') - -def fixup(rootdir, vols): - devbymount = {} - for vol in vols: - devbymount[vol['mount']] = vol['targetdisk'] - fstabfile = os.path.join(rootdir, 'etc/fstab') - if os.path.exists(fstabfile): - with open(fstabfile) as tfile: - fstab = tfile.read().split('\n') - else: - #diskless image, need to invent fstab - fstab = [ - "#ORIGFSTAB#/dev/mapper/root# / xfs defaults 0 0", - "#ORIGFSTAB#UUID=aaf9e0f9-aa4d-4d74-9e75-3537620cfe23# /boot xfs defaults 0 0", - "#ORIGFSTAB#UUID=C21D-B881# /boot/efi vfat umask=0077,shortname=winnt 0 2", - "#ORIGFSTAB#/dev/mapper/swap# none swap defaults 0 0", - ] - while not fstab[0]: - fstab = fstab[1:] - if os.path.exists(os.path.join(rootdir, '.autorelabel')): - os.unlink(os.path.join(rootdir, '.autorelabel')) - with open(fstabfile, 'w') as tfile: - for tab in fstab: - entry = tab.split() - if tab.startswith('#ORIGFSTAB#'): - if entry[1] in devbymount: - targetdev = devbymount[entry[1]] - if targetdev.startswith('/dev/{}/'.format(vgname)): - entry[0] = targetdev - else: - uuid = subprocess.check_output(['blkid', '-s', 'UUID', '-o', 'value', targetdev]).decode('utf8') - uuid = uuid.strip() - entry[0] = 'UUID={}'.format(uuid) - elif entry[2] == 'swap': - entry[0] = '/dev/mapper/{}-swap'.format(vgname.replace('-', '--')) - entry[0] = entry[0].ljust(42) - entry[1] = entry[1].ljust(16) - entry[3] = entry[3].ljust(28) - tab = '\t'.join(entry) - tfile.write(tab + '\n') - with open(os.path.join(rootdir, 'etc/hostname'), 'w') as nameout: - nameout.write(socket.gethostname() + '\n') - selinuxconfig = os.path.join(rootdir, 'etc/selinux/config') - policy = None - if os.path.exists(selinuxconfig): - with open(selinuxconfig) as cfgin: - sec = cfgin.read().split('\n') - for l in sec: - l = l.split('#', 1)[0] - if l.startswith('SELINUXTYPE='): - _, policy = l.split('=') - for sshkey in glob.glob(os.path.join(rootdir, 'etc/ssh/*_key*')): - os.unlink(sshkey) - for sshkey in glob.glob('/etc/ssh/*_key*'): - newkey = os.path.join(rootdir, sshkey[1:]) - shutil.copy2(sshkey, newkey) - finfo = os.stat(sshkey) - os.chown(newkey, finfo[stat.ST_UID], finfo[stat.ST_GID]) - for ifcfg in glob.glob(os.path.join(rootdir, 'etc/sysconfig/network-scripts/*')): - os.unlink(ifcfg) - for ifcfg in glob.glob(os.path.join(rootdir, 'etc/NetworkManager/system-connections/*')): - os.unlink(ifcfg) - for ifcfg in glob.glob('/run/NetworkManager/system-connections/*'): - newcfg = ifcfg.split('/')[-1] - newcfg = os.path.join(rootdir, 'etc/NetworkManager/system-connections/{0}'.format(newcfg)) - shutil.copy2(ifcfg, newcfg) - rootconfluentdir = os.path.join(rootdir, 'etc/confluent/') - if os.path.exists(rootconfluentdir): - shutil.rmtree(rootconfluentdir) - shutil.copytree('/etc/confluent', rootconfluentdir) - if policy: - sys.stdout.write('Applying SELinux labeling...') - sys.stdout.flush() - subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'etc')]) - subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'opt')]) - sys.stdout.write('Done\n') - sys.stdout.flush() - for metafs in ('proc', 'sys', 'dev'): - subprocess.check_call(['mount', '-o', 'bind', '/{}'.format(metafs), os.path.join(rootdir, metafs)]) - if os.path.exists(os.path.join(rootdir, 'etc/lvm/devices/system.devices')): - os.remove(os.path.join(rootdir, 'etc/lvm/devices/system.devices')) - grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') - if not os.path.exists(grubsyscfg): - grubsyscfg = os.path.join(rootdir, 'etc/default/grub') - kcmdline = os.path.join(rootdir, 'etc/kernel/cmdline') - if os.path.exists(kcmdline): - with open(kcmdline) as kcmdlinein: - kcmdlinecontent = kcmdlinein.read() - newkcmdlineent = [] - for ent in kcmdlinecontent.split(): - if ent.startswith('resume='): - newkcmdlineent.append('resume={}'.format(newswapdev)) - elif ent.startswith('root='): - newkcmdlineent.append('root={}'.format(newrootdev)) - elif ent.startswith('rd.lvm.lv='): - ent = convert_lv(ent) - if ent: - newkcmdlineent.append(ent) - else: - newkcmdlineent.append(ent) - with open(kcmdline, 'w') as kcmdlineout: - kcmdlineout.write(' '.join(newkcmdlineent) + '\n') - for loadent in glob.glob(os.path.join(rootdir, 'boot/loader/entries/*.conf')): - with open(loadent) as loadentin: - currentry = loadentin.read().split('\n') - with open(loadent, 'w') as loadentout: - for cfgline in currentry: - cfgparts = cfgline.split() - if not cfgparts or cfgparts[0] != 'options': - loadentout.write(cfgline + '\n') - continue - newcfgparts = [cfgparts[0]] - for cfgpart in cfgparts[1:]: - if cfgpart.startswith('root='): - newcfgparts.append('root={}'.format(newrootdev)) - elif cfgpart.startswith('resume='): - newcfgparts.append('resume={}'.format(newswapdev)) - elif cfgpart.startswith('rd.lvm.lv='): - cfgpart = convert_lv(cfgpart) - if cfgpart: - newcfgparts.append(cfgpart) - else: - newcfgparts.append(cfgpart) - loadentout.write(' '.join(newcfgparts) + '\n') - if os.path.exists(grubsyscfg): - with open(grubsyscfg) as defgrubin: - defgrub = defgrubin.read().split('\n') - else: - defgrub = [ - 'GRUB_TIMEOUT=5', - 'GRUB_DISTRIBUTOR="$(sed ' + "'s, release .*$,,g'" + ' /etc/system-release)"', - 'GRUB_DEFAULT=saved', - 'GRUB_DISABLE_SUBMENU=true', - 'GRUB_TERMINAL=""', - 'GRUB_SERIAL_COMMAND=""', - 'GRUB_CMDLINE_LINUX="crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"', - 'GRUB_DISABLE_RECOVERY="true"', - 'GRUB_ENABLE_BLSCFG=true', - ] - if not os.path.exists(os.path.join(rootdir, "etc/kernel/cmdline")): - with open(os.path.join(rootdir, "etc/kernel/cmdline"), "w") as cmdlineout: - cmdlineout.write("root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root") - with open(grubsyscfg, 'w') as defgrubout: - for gline in defgrub: - gline = gline.split() - newline = [] - for ent in gline: - if ent.startswith('resume='): - newline.append('resume={}'.format(newswapdev)) - elif ent.startswith('root='): - newline.append('root={}'.format(newrootdev)) - elif ent.startswith('rd.lvm.lv='): - ent = convert_lv(ent) - if ent: - newline.append(ent) - elif '""' in ent: - newline.append('""') - else: - newline.append(ent) - defgrubout.write(' '.join(newline) + '\n') - grubcfg = subprocess.check_output(['find', os.path.join(rootdir, 'boot'), '-name', 'grub.cfg']).decode('utf8').strip().replace(rootdir, '/').replace('//', '/') - grubcfg = grubcfg.split('\n') - if not grubcfg[-1]: - grubcfg = grubcfg[:-1] - if len(grubcfg) == 1: - grubcfg = grubcfg[0] - elif not grubcfg: - grubcfg = '/boot/grub2/grub.cfg' - paths = glob.glob(os.path.join(rootdir, 'boot/efi/EFI/*')) - for path in paths: - with open(os.path.join(path, 'grub.cfg'), 'w') as stubgrubout: - stubgrubout.write("search --no-floppy --root-dev-only --fs-uuid --set=dev " + bootuuid + "\nset prefix=($dev)/grub2\nexport $prefix\nconfigfile $prefix/grub.cfg\n") - else: - for gcfg in grubcfg: - rgcfg = os.path.join(rootdir, gcfg[1:]) # gcfg has a leading / to get rid of - if os.stat(rgcfg).st_size > 256: - grubcfg = gcfg - else: - with open(rgcfg, 'r') as gin: - tgrubcfg = gin.read() - tgrubcfg = tgrubcfg.split('\n') - if 'search --no-floppy --fs-uuid --set=dev' in tgrubcfg[0]: - tgrubcfg[0] = 'search --no-floppy --fs-uuid --set=dev ' + bootuuid - with open(rgcfg, 'w') as gout: - for gcline in tgrubcfg: - gout.write(gcline) - gout.write('\n') - try: - subprocess.check_call(['chroot', rootdir, 'grub2-mkconfig', '-o', grubcfg]) - except Exception as e: - print(repr(e)) - print(rootdir) - print(grubcfg) - time.sleep(86400) - newroot = None - with open('/etc/shadow') as shadowin: - shents = shadowin.read().split('\n') - for shent in shents: - shent = shent.split(':') - if not shent: - continue - if shent[0] == 'root' and shent[1] not in ('*', '!!', ''): - newroot = shent[1] - if newroot: - shlines = None - with open(os.path.join(rootdir, 'etc/shadow')) as oshadow: - shlines = oshadow.read().split('\n') - with open(os.path.join(rootdir, 'etc/shadow'), 'w') as oshadow: - for line in shlines: - if line.startswith('root:'): - line = line.split(':') - line[1] = newroot - line = ':'.join(line) - oshadow.write(line + '\n') - partnum = None - targblock = None - for vol in vols: - if vol['mount'] == '/boot/efi': - targdev = vol['targetdisk'] - partnum = re.search('(\d+)$', targdev).group(1) - targblock = re.search('(.*)\d+$', targdev).group(1) - if targblock: - if targblock.endswith('p') and 'nvme' in targblock: - targblock = targblock[:-1] - shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() - shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') - subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) - - try: - os.makedirs(os.path.join(rootdir, 'opt/confluent/bin')) - except Exception: - pass - shutil.copy2('/opt/confluent/bin/apiclient', os.path.join(rootdir, 'opt/confluent/bin/apiclient')) - #other network interfaces - - -def had_swap(): - if not os.path.exists('/etc/fstab'): - # diskless source, assume swap - return True - with open('/etc/fstab') as tabfile: - tabs = tabfile.read().split('\n') - for tab in tabs: - tab = tab.split() - if len(tab) < 3: - continue - if tab[2] == 'swap': - return True - return False - -newrootdev = None -newswapdev = None -def install_to_disk(imgpath): - global bootuuid - global newrootdev - global newswapdev - global vgname - global oldvgname - lvmvols = {} - deftotsize = 0 - mintotsize = 0 - deflvmsize = 0 - minlvmsize = 0 - biggestsize = 0 - biggestfs = None - plainvols = {} - allvols = [] - swapsize = 0 - if had_swap(): - with open('/proc/meminfo') as meminfo: - swapsize = meminfo.read().split('\n')[0] - swapsize = int(swapsize.split()[1]) - if swapsize < 2097152: - swapsize = swapsize * 2 - elif swapsize > 8388608 and swapsize < 67108864: - swapsize = swapsize * 0.5 - elif swapsize >= 67108864: - swapsize = 33554432 - swapsize = int(swapsize * 1024) - deftotsize = swapsize - mintotsize = swapsize - for fs in get_image_metadata(imgpath): - allvols.append(fs) - deftotsize += fs['initsize'] - mintotsize += fs['minsize'] - if fs['initsize'] > biggestsize: - biggestfs = fs - biggestsize = fs['initsize'] - if fs['device'].startswith('/dev/mapper'): - oldvgname = fs['device'].rsplit('/', 1)[-1] - # if node has - then /dev/mapper will double up the hypen - if '_' in oldvgname and '-' in oldvgname.split('_', 1)[-1]: - oldvgname = oldvgname.rsplit('-', 1)[0].replace('--', '-') - osname = oldvgname.split('_')[0] - nodename = socket.gethostname().split('.')[0] - vgname = '{}_{}'.format(osname, nodename) - lvmvols[fs['device'].replace('/dev/mapper/', '')] = fs - deflvmsize += fs['initsize'] - minlvmsize += fs['minsize'] - else: - plainvols[int(re.search('(\d+)$', fs['device'])[0])] = fs - with open('/tmp/installdisk') as diskin: - instdisk = diskin.read() - instdisk = '/dev/' + instdisk - parted = PartedRunner(instdisk) - dinfo = parted.run('unit s print', check=False) - dinfo = dinfo.split('\n') - sectors = 0 - sectorsize = 0 - for inf in dinfo: - if inf.startswith('Disk {0}:'.format(instdisk)): - _, sectors = inf.split(': ') - sectors = int(sectors.replace('s', '')) - if inf.startswith('Sector size (logical/physical):'): - _, sectorsize = inf.split(':') - sectorsize = sectorsize.split('/')[0] - sectorsize = sectorsize.replace('B', '') - sectorsize = int(sectorsize) - # for now, only support resizing/growing the largest partition - minexcsize = deftotsize - biggestfs['initsize'] - mintotsize = deftotsize - biggestfs['initsize'] + biggestfs['minsize'] - minsectors = mintotsize // sectorsize - if sectors < (minsectors + 65536): - raise Exception('Disk too small to fit image') - biggestsectors = sectors - (minexcsize // sectorsize) - biggestsize = sectorsize * biggestsectors - parted.run('mklabel gpt') - curroffset = 2048 - for volidx in sorted(plainvols): - vol = plainvols[volidx] - if vol is not biggestfs: - size = vol['initsize'] // sectorsize - else: - size = biggestsize // sectorsize - size += 2047 - (size % 2048) - end = curroffset + size - if end > sectors: - end = sectors - parted.run('mkpart primary {}s {}s'.format(curroffset, end)) - vol['targetdisk'] = get_partname(instdisk, volidx) - if vol['mount'] == '/': - newrootdev = vol['targetdisk'] - curroffset += size + 1 - if not lvmvols: - if swapsize: - swapsize = swapsize // sectorsize - swapsize += 2047 - (size % 2048) - end = curroffset + swapsize - if end > sectors: - end = sectors - parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - newswapdev = get_partname(instdisk, volidx + 1) - subprocess.check_call(['mkswap', newswapdev]) - else: - parted.run('mkpart lvm {}s 100%'.format(curroffset)) - lvmpart = get_partname(instdisk, volidx + 1) - subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) - subprocess.check_call(['vgcreate', vgname, lvmpart]) - vginfo = subprocess.check_output(['vgdisplay', vgname, '--units', 'b']).decode('utf8') - vginfo = vginfo.split('\n') - pesize = 0 - pes = 0 - for infline in vginfo: - infline = infline.split() - if len(infline) >= 3 and infline[:2] == ['PE', 'Size']: - pesize = int(infline[2]) - if len(infline) >= 5 and infline[:2] == ['Free', 'PE']: - pes = int(infline[4]) - takeaway = swapsize // pesize - for volidx in lvmvols: - vol = lvmvols[volidx] - if vol is biggestfs: - continue - takeaway += vol['initsize'] // pesize - takeaway += 1 - biggestextents = pes - takeaway - for volidx in lvmvols: - vol = lvmvols[volidx] - if vol is biggestfs: - extents = biggestextents - else: - extents = vol['initsize'] // pesize - extents += 1 - if vol['mount'] == '/': - lvname = 'root' - - else: - lvname = vol['mount'].replace('/', '_') - subprocess.check_call(['lvcreate', '-l', '{}'.format(extents), '-y', '-n', lvname, vgname]) - vol['targetdisk'] = '/dev/{}/{}'.format(vgname, lvname) - if vol['mount'] == '/': - newrootdev = vol['targetdisk'] - if swapsize: - subprocess.check_call(['lvcreate', '-y', '-l', '{}'.format(swapsize // pesize), '-n', 'swap', vgname]) - subprocess.check_call(['mkswap', '/dev/{}/swap'.format(vgname)]) - newswapdev = '/dev/{}/swap'.format(vgname) - os.makedirs('/run/imginst/targ') - for vol in allvols: - with open(vol['targetdisk'], 'wb') as partition: - partition.write(b'\x00' * 1 * 1024 * 1024) - subprocess.check_call(['mkfs.{}'.format(vol['filesystem']), vol['targetdisk']]) - subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ']) - source = vol['mount'].replace('/', '_') - source = '/run/imginst/sources/' + source - if not os.path.exists(source): - source = '/run/imginst/sources/_' + vol['mount'] - blankfsstat = os.statvfs('/run/imginst/targ') - blankused = (blankfsstat.f_blocks - blankfsstat.f_bfree) * blankfsstat.f_bsize - sys.stdout.write('\nWriting {0}: '.format(vol['mount'])) - with subprocess.Popen(['cp', '-ax', source + '/.', '/run/imginst/targ']) as copier: - stillrunning = copier.poll() - lastprogress = 0.0 - while stillrunning is None: - currfsstat = os.statvfs('/run/imginst/targ') - currused = (currfsstat.f_blocks - currfsstat.f_bfree) * currfsstat.f_bsize - currused -= blankused - with open('/proc/meminfo') as meminf: - for line in meminf.read().split('\n'): - if line.startswith('Dirty:'): - _, dirty, _ = line.split() - dirty = int(dirty) * 1024 - progress = (currused - dirty) / vol['minsize'] - if progress < lastprogress: - progress = lastprogress - if progress > 0.99: - progress = 0.99 - lastprogress = progress - progress = progress * 100 - sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) - sys.stdout.flush() - time.sleep(0.5) - stillrunning = copier.poll() - if stillrunning != 0: - raise Exception("Error copying volume") - with subprocess.Popen(['sync']) as syncrun: - stillrunning = syncrun.poll() - while stillrunning is None: - with open('/proc/meminfo') as meminf: - for line in meminf.read().split('\n'): - if line.startswith('Dirty:'): - _, dirty, _ = line.split() - dirty = int(dirty) * 1024 - progress = (vol['minsize'] - dirty) / vol['minsize'] - if progress < lastprogress: - progress = lastprogress - if progress > 0.99: - progress = 0.99 - lastprogress = progress - progress = progress * 100 - sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) - sys.stdout.flush() - time.sleep(0.5) - stillrunning = syncrun.poll() - sys.stdout.write('\x1b[1K\rDone writing {0}'.format(vol['mount'])) - sys.stdout.write('\n') - sys.stdout.flush() - if vol['mount'] == '/boot': - tbootuuid = subprocess.check_output(['blkid', vol['targetdisk']]) - if b'UUID="' in tbootuuid: - bootuuid = tbootuuid.split(b'UUID="', 1)[1].split(b'"')[0].decode('utf8') - - - - - subprocess.check_call(['umount', '/run/imginst/targ']) - for vol in allvols: - subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ/' + vol['mount']]) - fixup('/run/imginst/targ', allvols) - - -if __name__ == '__main__': - try: - install_to_disk(os.environ['mountsrc']) - except Exception: - traceback.print_exc() - time.sleep(86400) - raise diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py new file mode 120000 index 00000000..f3aca670 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/image2disk.py @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/image2disk.py \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage deleted file mode 100644 index c461173b..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -. /etc/confluent/functions -# the image will be used to deploy itself -# provide both access to image (for parsing metadata) -# and existing mounts of image (to take advantage of caching) -mount -o bind /sys /sysroot/sys -mount -o bind /dev /sysroot/dev -mount -o bind /proc /sysroot/proc -mount -o bind /run /sysroot/run - - -if [ ! -f /tmp/mountparts.sh ]; then - mkdir -p /sysroot/run/imginst/sources/_ - mount -o bind /mnt/remote /sysroot/run/imginst/sources/_ -else - for srcmount in $(cat /tmp/mountparts.sh | awk '{print $2}'); do - srcname=${srcmount#/dev/mapper/mproot} - srcdir=$(echo $srcmount | sed -e 's!/dev/mapper/mproot!/mnt/remote!' -e 's!_!/!g') - mkdir -p /sysroot/run/imginst/sources/$srcname - mount -o bind $srcdir /sysroot/run/imginst/sources/$srcname - done -fi -cd /sysroot/run -chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" -chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" -if [ ! -f /sysroot/tmp/installdisk ]; then - echo 'Unable to find a suitable installation target device, ssh to port 2222 to investigate' - while [ ! -f /sysroot/tmp/installdisk ]; do - sleep 1 - done -fi -lvm vgchange -a n -/sysroot/usr/sbin/wipefs -a /dev/$(cat /sysroot/tmp/installdisk) -udevadm control -e -if [ -f /sysroot/etc/lvm/devices/system.devices ]; then - rm /sysroot/etc/lvm/devices/system.devices -fi -chroot /sysroot /usr/lib/systemd/systemd-udevd --daemon -chroot /sysroot bash -c "source /etc/confluent/functions; run_remote_python image2disk.py" -echo "Port 22" >> /etc/ssh/sshd_config -echo 'Match LocalPort 22' >> /etc/ssh/sshd_config -echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config -kill -HUP $(cat /run/sshd.pid) -cp /sysroot/etc/pki/ca-trust/source/anchors/* /sysroot/run/imginst/targ/etc/pki/ca-trust/source/anchors/ -chroot /sysroot/run/imginst/targ update-ca-trust - -chroot /sysroot/run/imginst/targ bash -c "source /etc/confluent/functions; run_remote post.sh" -chroot /sysroot bash -c "umount \$(tac /proc/mounts|awk '{print \$2}'|grep ^/run/imginst/targ)" - diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage new file mode 120000 index 00000000..64455ac9 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/installimage @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/installimage \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom deleted file mode 100644 index e69de29b..00000000 diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom new file mode 120000 index 00000000..b0dfa87a --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.custom @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/onboot.custom \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service deleted file mode 100644 index f9235033..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=Confluent onboot hook -Requires=network-online.target -After=network-online.target - -[Service] -ExecStart=/opt/confluent/bin/onboot.sh - -[Install] -WantedBy=multi-user.target - diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service new file mode 120000 index 00000000..da70c79c --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.service @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/onboot.service \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh deleted file mode 100644 index 80f95870..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -# This script is executed on each boot as it is -# completed. It is best to edit the middle of the file as -# noted below so custom commands are executed before -# the script notifies confluent that install is fully complete. -ntpsrvs="" -nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') -confluent_apikey=$(cat /etc/confluent/confluent.apikey) -v4meth=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') -if [ "$v4meth" = "null" -o -z "$v4meth" ]; then - confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') -fi -if [ -z "$confluent_mgr" ]; then - confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') -fi -confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') -hostnamectl set-hostname $nodename - - -if grep ^ntpservers: /etc/confluent/confluent.deploycfg > /dev/null; then - for ntpsrv in $(sed -n '/^ntpservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|sed 1d|sed '$d' | sed -e 's/^- //'); do - echo "server ${ntpsrv} iburst " >> /tmp/timeservers - done -fi - -if [ -f /tmp/timeservers ]; then - -ntpsrvs=$(cat /tmp/timeservers) - -sed -i "1,/^pool * /c\\ - -${ntpsrvs//$'\n'/\\$'\n'}" /etc/chrony.conf - - -systemctl restart chronyd - -rm -f /tmp/timeservers -fi - -export nodename confluent_mgr confluent_profile -. /etc/confluent/functions -mkdir -p /var/log/confluent -chmod 700 /var/log/confluent -exec >> /var/log/confluent/confluent-onboot.log -exec 2>> /var/log/confluent/confluent-onboot.log -chmod 600 /var/log/confluent/confluent-onboot.log -tail -f /var/log/confluent/confluent-onboot.log > /dev/console & -logshowpid=$! - -rpm --import /etc/pki/rpm-gpg/* - -run_remote_python add_local_repositories -run_remote_python syncfileclient -run_remote_python confignet - -run_remote onboot.custom -# onboot scripts may be placed into onboot.d, e.g. onboot.d/01-firstaction.sh, onboot.d/02-secondaction.sh -run_remote_parts onboot.d - -# Induce execution of remote configuration, e.g. ansible plays in ansible/onboot.d/ -run_remote_config onboot.d - -#curl -X POST -d 'status: booted' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus -kill $logshowpid diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh new file mode 120000 index 00000000..c6b678db --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/onboot.sh @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/onboot.sh \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh deleted file mode 100644 index 914a12c3..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh - -# This script is executed 'chrooted' into a cloned disk target before rebooting -# -if [ -f /etc/dracut.conf.d/diskless.conf ]; then - rm /etc/dracut.conf.d/diskless.conf -fi -for kver in /lib/modules/*; do kver=$(basename $kver); kernel-install add $kver /boot/vmlinuz-$kver; done -nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') -confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') -if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then - confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') -fi -confluent_websrv=$confluent_mgr -if [[ "$confluent_mgr" == *:* ]]; then - confluent_websrv="[$confluent_mgr]" -fi -export nodename confluent_mgr confluent_profile confluent_websrv -. /etc/confluent/functions -run_remote setupssh -mkdir -p /var/log/confluent -chmod 700 /var/log/confluent -exec >> /var/log/confluent/confluent-post.log -exec 2>> /var/log/confluent/confluent-post.log -chmod 600 /var/log/confluent/confluent-post.log -tail -f /var/log/confluent/confluent-post.log > /dev/console & -logshowpid=$! -curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service -mkdir -p /opt/confluent/bin -curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh -chmod +x /opt/confluent/bin/firstboot.sh -systemctl enable firstboot -selinuxpolicy=$(grep ^SELINUXTYPE /etc/selinux/config |awk -F= '{print $2}') -if [ ! -z "$selinuxpolicy" ]; then - setfiles /etc/selinux/${selinuxpolicy}/contexts/files/file_contexts /etc/ -fi -run_remote_python syncfileclient -run_remote post.custom -# post scripts may be placed into post.d, e.g. post.d/01-firstaction.sh, post.d/02-secondaction.sh -run_remote_parts post.d - -# Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ -run_remote_config post.d - -# rebuild initrd, pick up new drivers if needed -dracut -f /boot/initramfs-$(uname -r).img $(uname -r) - -curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus - -kill $logshowpid - diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh new file mode 120000 index 00000000..61642fa5 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/post.sh @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/post.sh \ No newline at end of file diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient deleted file mode 100644 index 5f2efc5e..00000000 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/python3 -import random -import time -import subprocess -import importlib -import tempfile -import json -import os -import shutil -import pwd -import grp -import sys -from importlib.machinery import SourceFileLoader -try: - apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() -except FileNotFoundError: - apiclient = SourceFileLoader('apiclient', '/etc/confluent/apiclient').load_module() - - -def partitionhostsline(line): - comment = '' - try: - cmdidx = line.index('#') - comment = line[cmdidx:] - line = line[:cmdidx].strip() - except ValueError: - pass - if not line: - return '', [], comment - ipaddr, names = line.split(maxsplit=1) - names = names.split() - return ipaddr, names, comment - -class HostMerger(object): - def __init__(self): - self.byip = {} - self.byname = {} - self.sourcelines = [] - self.targlines = [] - - def read_source(self, sourcefile): - with open(sourcefile, 'r') as hfile: - self.sourcelines = hfile.read().split('\n') - while not self.sourcelines[-1]: - self.sourcelines = self.sourcelines[:-1] - for x in range(len(self.sourcelines)): - line = self.sourcelines[x] - currip, names, comment = partitionhostsline(line) - if currip: - self.byip[currip] = x - for name in names: - self.byname[name] = x - - def read_target(self, targetfile): - with open(targetfile, 'r') as hfile: - lines = hfile.read().split('\n') - if not lines[-1]: - lines = lines[:-1] - for y in range(len(lines)): - line = lines[y] - currip, names, comment = partitionhostsline(line) - if currip in self.byip: - x = self.byip[currip] - if self.sourcelines[x] is None: - # have already consumed this enntry - continue - self.targlines.append(self.sourcelines[x]) - self.sourcelines[x] = None - continue - for name in names: - if name in self.byname: - x = self.byname[name] - if self.sourcelines[x] is None: - break - self.targlines.append(self.sourcelines[x]) - self.sourcelines[x] = None - break - else: - self.targlines.append(line) - - def write_out(self, targetfile): - while not self.targlines[-1]: - self.targlines = self.targlines[:-1] - if not self.targlines: - break - while not self.sourcelines[-1]: - self.sourcelines = self.sourcelines[:-1] - if not self.sourcelines: - break - with open(targetfile, 'w') as hosts: - for line in self.targlines: - hosts.write(line + '\n') - for line in self.sourcelines: - if line is not None: - hosts.write(line + '\n') - - -class CredMerger: - def __init__(self): - try: - with open('/etc/login.defs', 'r') as ldefs: - defs = ldefs.read().split('\n') - except FileNotFoundError: - defs = [] - lkup = {} - self.discardnames = {} - self.shadowednames = {} - for line in defs: - try: - line = line[:line.index('#')] - except ValueError: - pass - keyval = line.split() - if len(keyval) < 2: - continue - lkup[keyval[0]] = keyval[1] - self.uidmin = int(lkup.get('UID_MIN', 1000)) - self.uidmax = int(lkup.get('UID_MAX', 60000)) - self.gidmin = int(lkup.get('GID_MIN', 1000)) - self.gidmax = int(lkup.get('GID_MAX', 60000)) - self.shadowlines = None - - def read_passwd(self, source, targfile=False): - self.read_generic(source, self.uidmin, self.uidmax, targfile) - - def read_group(self, source, targfile=False): - self.read_generic(source, self.gidmin, self.gidmax, targfile) - - def read_generic(self, source, minid, maxid, targfile): - if targfile: - self.targdata = [] - else: - self.sourcedata = [] - with open(source, 'r') as inputfile: - for line in inputfile.read().split('\n'): - try: - name, _, uid, _ = line.split(':', 3) - uid = int(uid) - except ValueError: - continue - if targfile: - if uid < minid or uid > maxid: - self.targdata.append(line) - else: - self.discardnames[name] = 1 - else: - if name[0] in ('+', '#', '@'): - self.sourcedata.append(line) - elif uid >= minid and uid <= maxid: - self.sourcedata.append(line) - - def read_shadow(self, source): - self.shadowlines = [] - try: - with open(source, 'r') as inshadow: - for line in inshadow.read().split('\n'): - try: - name, _ = line.split(':' , 1) - except ValueError: - continue - if name in self.discardnames: - continue - self.shadowednames[name] = 1 - self.shadowlines.append(line) - except FileNotFoundError: - return - - def write_out(self, outfile): - with open(outfile, 'w') as targ: - for line in self.targdata: - targ.write(line + '\n') - for line in self.sourcedata: - targ.write(line + '\n') - if outfile == '/etc/passwd': - if self.shadowlines is None: - self.read_shadow('/etc/shadow') - with open('/etc/shadow', 'w') as shadout: - for line in self.shadowlines: - shadout.write(line + '\n') - for line in self.sourcedata: - name, _ = line.split(':', 1) - if name[0] in ('+', '#', '@'): - continue - if name in self.shadowednames: - continue - shadout.write(name + ':!:::::::\n') - if outfile == '/etc/group': - if self.shadowlines is None: - self.read_shadow('/etc/gshadow') - with open('/etc/gshadow', 'w') as shadout: - for line in self.shadowlines: - shadout.write(line + '\n') - for line in self.sourcedata: - name, _ = line.split(':' , 1) - if name in self.shadowednames: - continue - shadout.write(name + ':!::\n') - -def appendonce(basepath, filename): - with open(filename, 'rb') as filehdl: - thedata = filehdl.read() - targname = filename.replace(basepath, '') - try: - with open(targname, 'rb') as filehdl: - targdata = filehdl.read() - except IOError: - targdata = b'' - if thedata in targdata: - return - with open(targname, 'ab') as targhdl: - targhdl.write(thedata) - -def synchronize(): - tmpdir = tempfile.mkdtemp() - appendoncedir = tempfile.mkdtemp() - try: - ac = apiclient.HTTPSClient() - myips = [] - ipaddrs = subprocess.check_output(['ip', '-br', 'a']).split(b'\n') - for line in ipaddrs: - isa = line.split() - if len(isa) < 3 or isa[1] != b'UP': - continue - for addr in isa[2:]: - if addr.startswith(b'fe80::') or addr.startswith(b'169.254'): - continue - addr = addr.split(b'/')[0] - if not isinstance(addr, str): - addr = addr.decode('utf8') - myips.append(addr) - data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) - status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) - if status >= 300: - sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(rsp.decode('utf8')) - sys.stderr.write('\n') - sys.stderr.flush() - return status - if status == 202: - lastrsp = '' - while status != 204: - time.sleep(1+(2*random.random())) - status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') - if not isinstance(rsp, str): - rsp = rsp.decode('utf8') - if status == 200: - lastrsp = rsp - pendpasswd = os.path.join(tmpdir, 'etc/passwd') - if os.path.exists(pendpasswd): - cm = CredMerger() - cm.read_passwd(pendpasswd, targfile=False) - cm.read_passwd('/etc/passwd', targfile=True) - cm.write_out('/etc/passwd') - pendgroup = os.path.join(tmpdir, 'etc/group') - if os.path.exists(pendgroup): - cm = CredMerger() - cm.read_group(pendgroup, targfile=False) - cm.read_group('/etc/group', targfile=True) - cm.write_out('/etc/group') - pendhosts = os.path.join(tmpdir, 'etc/hosts') - if os.path.exists(pendhosts): - cm = HostMerger() - cm.read_source(pendhosts) - cm.read_target('/etc/hosts') - cm.write_out('/etc/hosts') - for dirn in os.walk(appendoncedir): - for filen in dirn[2]: - appendonce(appendoncedir, os.path.join(dirn[0], filen)) - if lastrsp: - lastrsp = json.loads(lastrsp) - opts = lastrsp.get('options', {}) - for fname in opts: - uid = -1 - gid = -1 - for opt in opts[fname]: - if opt == 'owner': - try: - uid = pwd.getpwnam(opts[fname][opt]['name']).pw_uid - except KeyError: - uid = opts[fname][opt]['id'] - elif opt == 'group': - try: - gid = grp.getgrnam(opts[fname][opt]['name']).gr_gid - except KeyError: - gid = opts[fname][opt]['id'] - elif opt == 'permissions': - os.chmod(fname, int(opts[fname][opt], 8)) - if uid != -1 or gid != -1: - os.chown(fname, uid, gid) - return status - finally: - shutil.rmtree(tmpdir) - shutil.rmtree(appendoncedir) - - -if __name__ == '__main__': - status = 202 - while status not in (204, 200): - try: - status = synchronize() - except Exception as e: - sys.stderr.write(str(e)) - sys.stderr.write('\n') - sys.stderr.flush() - status = 300 - if status not in (204, 200): - time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient new file mode 120000 index 00000000..219244e6 --- /dev/null +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/syncfileclient @@ -0,0 +1 @@ +../../../../el9-diskless/profiles/default/scripts/syncfileclient \ No newline at end of file From a36040fa923abe9ea7f9b626ace92dc85aae48bb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:39:34 -0400 Subject: [PATCH 222/413] Include el10 imgutil contents --- imgutil/confluent_imgutil.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/confluent_imgutil.spec.tmpl b/imgutil/confluent_imgutil.spec.tmpl index b3aa3bc9..c15b4cc6 100644 --- a/imgutil/confluent_imgutil.spec.tmpl +++ b/imgutil/confluent_imgutil.spec.tmpl @@ -34,7 +34,7 @@ mkdir -p opt/confluent/lib/imgutil mkdir -p opt/confluent/bin mv imgutil opt/confluent/bin/ chmod a+x opt/confluent/bin/imgutil -mv ubuntu* suse15 el7 el9 el8 opt/confluent/lib/imgutil/ +mv ubuntu* suse15 el7 el9 el10 el8 opt/confluent/lib/imgutil/ mkdir -p opt/confluent/share/licenses/confluent_imgutil cp LICENSE opt/confluent/share/licenses/confluent_imgutil From 551862e85eeb280a57ddc5cd0e5c721f996db380 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:53:55 -0400 Subject: [PATCH 223/413] Only start ssh if requested on cmdline --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 9 +++++++-- imgutil/el10/dracut/install | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 9764d971..b46a22ac 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -42,7 +42,9 @@ if ! grep console= /proc/cmdline >& /dev/null; then autocons=$(/opt/confluent/bin/autocons) autoconsdev=${autocons%,*} autocons=${autocons##*/} - echo "Automatic console configured for $autocons" + if [ ! -z "$autocons" ]; then + echo "Automatic console configured for $autocons" + fi fi echo "Initializing confluent diskless environment" echo -n "udevd: " @@ -310,7 +312,10 @@ for pubkey in /etc/ssh/ssh_host*key.pub; do fi echo HostKey $privfile >> /etc/ssh/sshd_config done -/usr/sbin/sshd +if grep "debugssh" /proc/cmdline > /dev/null; then + /usr/sbin/sshd +fi +echo "done" confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg| awk '{print $2}') confluent_proto=$(grep ^protocol: /etc/confluent/confluent.deploycfg| awk '{print $2}') confluent_urls="" diff --git a/imgutil/el10/dracut/install b/imgutil/el10/dracut/install index 9c2cd68c..bface540 100644 --- a/imgutil/el10/dracut/install +++ b/imgutil/el10/dracut/install @@ -7,6 +7,7 @@ dracut_install curl openssl tar cpio gzip lsmod ethtool xz lsmod ethtool dracut_install modprobe touch echo cut wc bash uniq grep ip hostname dracut_install awk egrep dirname expr sort dracut_install ssh sshd reboot parted mkfs mkfs.ext4 mkfs.xfs xfs_db mkswap +dracut_install /usr/libexec/openssh/sshd-session dracut_install efibootmgr uuidgen dracut_install du df ssh-keygen scp dracut_install /lib64/libnss_dns* From 7fd9a207b1925f026721634276ce5a2a17762d0e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 10:58:54 -0400 Subject: [PATCH 224/413] Switch el10 to el9 binaries for diskless The libcrypt change has to be accommodated --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 2 ++ confluent_osdeploy/confluent_osdeploy.spec.tmpl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index b6cf3826..a78e7563 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -48,6 +48,8 @@ for os in el7 el8 suse15 el9 el10 ubuntu20.04; do cd ${os}disklessout if [ -d ../${os}bin ]; then cp -a ../${os}bin/opt . + elif [ $os = el10 ]; then + cp -a ../el9bin/opt . else cp -a ../opt . fi diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index a6cf95e5..c53307ff 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -51,6 +51,8 @@ for os in el7 el8 suse15 el9 el10 ubuntu20.04 ubuntu22.04 ubuntu24.04; do cd ${os}disklessout if [ -d ../${os}bin ]; then cp -a ../${os}bin/opt . + elif [ $os = el10 ]; then + cp -a ../el9bin/opt . else cp -a ../el8bin/opt . fi From 935691d1f32d569179ff9bc3af12808bc7612de1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 14:20:17 -0400 Subject: [PATCH 225/413] Fix util for pre-psutil distributions --- confluent_server/confluent/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index cb6c2973..b4aaf1e3 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -23,6 +23,7 @@ import hashlib try: import psutil except ImportError: + psutil = None import netifaces import os import re From 4009aa1aa12d20a76488770fcd291034945e85b5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 15:09:38 -0400 Subject: [PATCH 226/413] Query the BMC rather than the SMM itself --- confluent_server/confluent/discovery/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 790379b6..b74e26a7 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1195,7 +1195,9 @@ def search_smms_by_cert(currsmm, cert, cfg): cd = cfg.get_node_attributes(currsmm, ['hardwaremanagement.manager', 'pubkeys.tls_hardwaremanager']) smmaddr = cd.get(currsmm, {}).get('hardwaremanagement.manager', {}).get('value', None) - wc = webclient.SecureHTTPConnection(currsmm, verifycallback=cv) + if not smmaddr: + smmaddr = currsmm + wc = webclient.SecureHTTPConnection(smmaddr, verifycallback=cv) neighs = wc.grab_json_response('/scripts/neighdata.json') except Exception: return None From 7bde5c42915790f0e35dc032497e91af30554c5a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Jun 2025 15:43:15 -0400 Subject: [PATCH 227/413] Fix netifaces fallback If psutil is not available, continue with netifaces as before --- confluent_server/confluent/netutil.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 4ff5b29e..76dddbc2 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -21,6 +21,7 @@ import codecs try: import psutil except ImportError: + psutil = None import netifaces import struct import eventlet.green.socket as socket From e489d2d532fc6960ecdfe6a56b5f5a12ffd4e0c6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 18 Jun 2025 08:55:13 -0400 Subject: [PATCH 228/413] Fix setupssh behavior on nearly full /tmp/ --- confluent_osdeploy/common/profile/scripts/setupssh | 3 ++- confluent_osdeploy/debian/profiles/default/scripts/setupssh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/setupssh b/confluent_osdeploy/common/profile/scripts/setupssh index 06ae3e01..6215a1d7 100644 --- a/confluent_osdeploy/common/profile/scripts/setupssh +++ b/confluent_osdeploy/common/profile/scripts/setupssh @@ -7,7 +7,6 @@ for pubkey in /etc/ssh/ssh_host*key.pub; do continue fi certfile=${pubkey/.pub/-cert.pub} - rm $certfile confluentpython $confapiclient /confluent-api/self/sshcert $pubkey -o $certfile done if [ -d /etc/ssh/sshd_config.d/ -a ! -e /etc/ssh/sshd_config.d/90-confluent.conf ]; then @@ -25,6 +24,7 @@ confluentpython $confapiclient /confluent-public/site/initramfs.tgz -o initramfs tar xf initramfs.tgz for ca in ssh/*.ca; do LINE=$(cat $ca) + if [ -z "$LINE" ]; then continue; fi cp -af /etc/ssh/ssh_known_hosts /etc/ssh/ssh_known_hosts.new grep -v "$LINE" /etc/ssh/ssh_known_hosts > /etc/ssh/ssh_known_hosts.new echo '@cert-authority *' $LINE >> /etc/ssh/ssh_known_hosts.new @@ -32,6 +32,7 @@ for ca in ssh/*.ca; do done for pubkey in ssh/*.*pubkey; do LINE=$(cat $pubkey) + if [ -z "$LINE" ]; then continue; fi cp -af /root/.ssh/authorized_keys /root/.ssh/authorized_keys.new grep -v "$LINE" /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.new echo "$LINE" >> /root/.ssh/authorized_keys.new diff --git a/confluent_osdeploy/debian/profiles/default/scripts/setupssh b/confluent_osdeploy/debian/profiles/default/scripts/setupssh index 06ae3e01..6215a1d7 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/setupssh +++ b/confluent_osdeploy/debian/profiles/default/scripts/setupssh @@ -7,7 +7,6 @@ for pubkey in /etc/ssh/ssh_host*key.pub; do continue fi certfile=${pubkey/.pub/-cert.pub} - rm $certfile confluentpython $confapiclient /confluent-api/self/sshcert $pubkey -o $certfile done if [ -d /etc/ssh/sshd_config.d/ -a ! -e /etc/ssh/sshd_config.d/90-confluent.conf ]; then @@ -25,6 +24,7 @@ confluentpython $confapiclient /confluent-public/site/initramfs.tgz -o initramfs tar xf initramfs.tgz for ca in ssh/*.ca; do LINE=$(cat $ca) + if [ -z "$LINE" ]; then continue; fi cp -af /etc/ssh/ssh_known_hosts /etc/ssh/ssh_known_hosts.new grep -v "$LINE" /etc/ssh/ssh_known_hosts > /etc/ssh/ssh_known_hosts.new echo '@cert-authority *' $LINE >> /etc/ssh/ssh_known_hosts.new @@ -32,6 +32,7 @@ for ca in ssh/*.ca; do done for pubkey in ssh/*.*pubkey; do LINE=$(cat $pubkey) + if [ -z "$LINE" ]; then continue; fi cp -af /root/.ssh/authorized_keys /root/.ssh/authorized_keys.new grep -v "$LINE" /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.new echo "$LINE" >> /root/.ssh/authorized_keys.new From ac42c1b4c77eec18735797d7829e77d86e6d2cf9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 24 Jun 2025 13:40:40 -0400 Subject: [PATCH 229/413] Skip IPv6 lookups on IPv4 context For DHCPv4 context, it makes no sense to bother with IPv6 addresses that cannot be used. --- confluent_server/confluent/netutil.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 76dddbc2..6866d848 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -497,6 +497,8 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, #TODO(jjohnson2): ip address, prefix length, mac address, # join a bond/bridge, vlan configs, etc. # also other nic criteria, physical location, driver and index... + if not onlyfamily: + onlyfamily = 0 clientfam = None clientipn = None serverfam = None @@ -527,11 +529,13 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, bmc6 = None if bmc: try: - bmc4 = socket.getaddrinfo(bmc, 0, socket.AF_INET, socket.SOCK_DGRAM)[0][-1][0] + if onlyfamily in (0, socket.AF_INET): + bmc4 = socket.getaddrinfo(bmc, 0, socket.AF_INET, socket.SOCK_DGRAM)[0][-1][0] except Exception: pass try: - bmc6 = socket.getaddrinfo(bmc, 0, socket.AF_INET6, socket.SOCK_DGRAM)[0][-1][0] + if onlyfamily in (0, socket.AF_INET6): + bmc6 = socket.getaddrinfo(bmc, 0, socket.AF_INET6, socket.SOCK_DGRAM)[0][-1][0] except Exception: pass cfgbyname = {} @@ -555,8 +559,6 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, 'ipv6_method': None, } myaddrs = [] - if onlyfamily is None: - onlyfamily = 0 if ifidx is not None: dhcprequested = False myaddrs = get_my_addresses(ifidx, family=onlyfamily) @@ -591,13 +593,15 @@ def get_nic_config(configmanager, node, ip=None, mac=None, ifidx=None, ipbynodename = None ip6bynodename = None try: - for addr in socket.getaddrinfo(node, 0, socket.AF_INET, socket.SOCK_DGRAM): - ipbynodename = addr[-1][0] + if onlyfamily in (socket.AF_INET, 0): + for addr in socket.getaddrinfo(node, 0, socket.AF_INET, socket.SOCK_DGRAM): + ipbynodename = addr[-1][0] except socket.gaierror: pass try: - for addr in socket.getaddrinfo(node, 0, socket.AF_INET6, socket.SOCK_DGRAM): - ip6bynodename = addr[-1][0] + if onlyfamily in (socket.AF_INET6, 0): + for addr in socket.getaddrinfo(node, 0, socket.AF_INET6, socket.SOCK_DGRAM): + ip6bynodename = addr[-1][0] except socket.gaierror: pass if myaddrs: @@ -753,7 +757,7 @@ def get_addresses_by_serverip(serverip): elif ':' in serverip: fam = socket.AF_INET6 else: - raise ValueError('"{0}" is not a valid ip argument') + raise ValueError('"{0}" is not a valid ip argument'.format(serverip)) ipbytes = socket.inet_pton(fam, serverip) if ipbytes[:8] == b'\xfe\x80\x00\x00\x00\x00\x00\x00': myaddrs = get_my_addresses(matchlla=ipbytes) From 1f3b84cc9df9551e46a66867fd20dc67df32e313 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 24 Jun 2025 16:32:31 -0400 Subject: [PATCH 230/413] Implement Windows iso extraction If a user has custom windows categories, match them to media import. To do this, we needed to go to pycdlib, as libarchive can't do UDF-only isos. For now, this has no progress indication, but does extract it similar to most Linux distributions are done. --- confluent_server/confluent/osimage.py | 84 +++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 75a64e77..f0a197c2 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -9,6 +9,11 @@ logging.getLogger('libarchive').addHandler(logging.NullHandler()) import libarchive import hashlib import os +try: + from io import BytesIO + import pycdlib +except ImportError: + pycdlib = None import shutil import sys import time @@ -24,6 +29,7 @@ import confluent.messages as msg COPY = 1 EXTRACT = 2 +EXTRACTUDF = 4 READFILES = set([ '.disk/info', 'media.1/products', @@ -268,8 +274,33 @@ def extract_entries(entries, flags=0, callback=None, totalsize=None, extractlist return float(sizedone) / float(totalsize) -def extract_file(archfile, flags=0, callback=lambda x: None, imginfo=(), extractlist=None): +def extract_udf(archfile, callback=lambda x: None): + """Extracts a UDF archive from a file into the current directory.""" + dfd = os.dup(archfile.fileno()) + os.lseek(dfd, 0, 0) + fp = os.fdopen(dfd, 'rb') + udf = pycdlib.PyCdlib() + udf.open_fp(fp) + for dirent in udf.walk(udf_path='/'): + for filent in dirent[2]: + currfile = os.path.join(dirent[0], filent) + relfile = currfile + if currfile[0] == '/': + relfile = currfile[1:] + targfile = os.path.join('.', relfile) + if os.path.exists(targfile): + os.unlink(targfile) + os.makedirs(os.path.dirname(targfile), exist_ok=True) + udf.get_file_from_iso(targfile, udf_path=currfile) + udf.close() + fp.close() + return True + + +def extract_file(archfile, flags=0, callback=lambda x: None, imginfo=(), extractlist=None, method=EXTRACT): """Extracts an archive from a file into the current directory.""" + if EXTRACTUDF & method: + return extract_udf(archfile, callback) totalsize = 0 for img in imginfo: if not imginfo[img]: @@ -604,7 +635,28 @@ def check_coreos(isoinfo): 'method': EXTRACT, 'category': 'coreos'} - +def check_windows(isoinfo): + idwbinfo = isoinfo[1].get('sources/idwbinfo.txt', b'') + idwbinfo = idwbinfo.decode() + idwbinfo = idwbinfo.split('\n') + version = '' + for line in idwbinfo: + if 'BuildBranch=' in line: + branch = line.strip().split('=')[1] + if branch == 'rs5_release': + version = '2019' + elif branch == 'fe_release': + version = '2022' + elif branch == 'ge_release': + version = '2025' + category = f'windows{version}' + if version: + defprofile = '/opt/confluent/lib/osdeploy/{0}'.format(category) + if not os.path.exists(defprofile): + return None + return {'name': 'windows-{0}-x86_64'.format(version), 'method': EXTRACTUDF, 'category': category} + return None + def check_rhel(isoinfo): ver = None arch = None @@ -659,25 +711,47 @@ def check_rhel(isoinfo): def scan_iso(archive): + scanudf = False filesizes = {} filecontents = {} dfd = os.dup(archive.fileno()) os.lseek(dfd, 0, 0) try: - with libarchive.fd_reader(dfd) as reader: + with libarchive.fd_reader(dfd, ) as reader: for ent in reader: if str(ent).endswith('TRANS.TBL'): continue eventlet.sleep(0) filesizes[str(ent)] = ent.size + if str(ent) == 'README.TXT': + readmecontents = b'' + for block in ent.get_blocks(): + readmecontents += bytes(block) + if b'ISO-13346' in readmecontents: + scanudf = True if str(ent) in READFILES: filecontents[str(ent)] = b'' for block in ent.get_blocks(): filecontents[str(ent)] += bytes(block) + if scanudf: + return scan_udf(dfd) finally: os.close(dfd) return filesizes, filecontents +def scan_udf(dfd): + fp = os.fdopen(dfd, 'rb') + iso = pycdlib.PyCdlib() + iso.open_fp(fp) + try: + extracted = BytesIO() + iso.get_file_from_iso_fp(extracted, udf_path='/sources/idwbinfo.txt') + idwbinfo = extracted.getvalue() + return {}, {'sources/idwbinfo.txt': idwbinfo} + except Exception: + return {}, {} + + def fingerprint(archive): archive.seek(0) @@ -736,9 +810,9 @@ def import_image(filename, callback, backend=False, mfd=None, custtargpath=None, print('Importing OS to ' + targpath + ':') callback({'progress': 0.0}) pct = 0.0 - if EXTRACT & identity['method']: + if EXTRACT & identity['method'] or EXTRACTUDF & identity['method']: pct = extract_file(archive, callback=callback, imginfo=imginfo, - extractlist=identity.get('extractlist', None)) + extractlist=identity.get('extractlist', None), method=identity['method']) if COPY & identity['method']: basename = identity.get('copyto', os.path.basename(filename)) targiso = os.path.join(targpath, basename) From 61749c36492d0df94e54eac9b734ef47482cc5cf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 25 Jun 2025 13:47:29 -0400 Subject: [PATCH 231/413] Support older Pillow libraries Some distributions bundle older pillow, and we have a way to support them. --- confluent_client/bin/nodeconsole | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 7cb00f2c..37382dbd 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -38,7 +38,7 @@ import termios import fcntl import confluent.screensqueeze as sq try: - from PIL import Image, ImageDraw + from PIL import Image, ImageDraw, ImageFont except ImportError: Image = None @@ -227,15 +227,17 @@ def draw_text(text, width, height): nd = ImageDraw.Draw(nerr) for txtpiece in text.split('\n'): fntsize = 8 - while nd.textlength(txtpiece, font_size=fntsize) < int(imgwidth * 0.90): + txtfont = ImageFont.truetype('DejaVuSans.ttf', size=fntsize) + while nd.textlength(txtpiece, font=txtfont) < int(imgwidth * 0.90): fntsize += 1 + txtfont = ImageFont.truetype('DejaVuSans.ttf', size=fntsize) fntsize -= 1 if fntsize < maxfntsize: maxfntsize = fntsize hmargin = int(imgwidth * 0.05) vmargin = int(imgheight * 0.10) - nd.text((hmargin, vmargin), text, font_size=maxfntsize) - nd.rectangle((0, 0, nerr.width - 1, nerr.height -1), outline='white', width=1) + nd.text((hmargin, vmargin), text, font=txtfont) + nd.rectangle((0, 0, nerr.width - 1, nerr.height -1), outline='white') outfile = io.BytesIO() nerr.save(outfile, format='PNG') data = base64.b64encode(outfile.getbuffer()) @@ -270,8 +272,8 @@ def draw_image(data, width, height, doscale=True): rzheight = imgheight img = img.resize((rzwidth, rzheight)) nd = ImageDraw.Draw(nimg) - nd.rectangle((1, 1, rzwidth + 2, rzheight + 2), outline='black', width=1) - nd.rectangle((0, 0, rzwidth + 3, rzheight + 3), outline='white', width=1) + nd.rectangle((1, 1, rzwidth + 2, rzheight + 2), outline='black') + nd.rectangle((0, 0, rzwidth + 3, rzheight + 3), outline='white') nimg.paste(img, box=(2, 2)) outfile = io.BytesIO() nimg.save(outfile, format='PNG') From 05dbbd6ce07492e994fcf865417449c3bd0d29ae Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 25 Jun 2025 16:10:26 -0400 Subject: [PATCH 232/413] Explicitly check root user keys Replace simple existence check with a check that assures the content also matches. --- confluent_server/bin/confluent_selfcheck | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index f3ad8605..4ce37fb5 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -223,7 +223,26 @@ if __name__ == '__main__': emprint('TFTP failure, PXE will not work, though media and HTTP boot can still work. (Example resolution: osdeploy initialize -p)') fprint('SSH root user public key: ') if glob.glob('/var/lib/confluent/public/site/ssh/*.rootpubkey'): - print('OK') + if not glob.glob('/root/.ssh/id_*.pub'): + emprint('No SSH keys for root user, passwordless SSH from managers to nodes may not work (Example resolution: osdeploy initialize -u)') + for userpub in glob.glob('/root/.ssh/id_*.pub'): + with open(userpub) as f: + pubkey = f.read().strip() + for sitepubkey in glob.glob('/var/lib/confluent/public/site/ssh/*.rootpubkey'): + with open(sitepubkey) as sf: + spubkey = sf.read().strip() + for keyline in spubkey.split('\n'): + if keyline == pubkey: + print('OK') + break + else: + continue + break + else: + continue + break + else: + emprint('No matching public key found for root user (Example resolution: osdeploy initialize -u)') else: emprint('No trusted ssh keys for root user, passwordless SSH from managers to nodes may not work (Example resolution: osdeploy initialize -u)') if sshutil.sshver() > 7.6: From 5028ed9f07d9745c48e7839651f4bdcfc27e6b26 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 1 Jul 2025 15:45:24 -0400 Subject: [PATCH 233/413] Avoid set changed during iteration --- confluent_server/confluent/discovery/protocols/ssdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py index 447aaf5f..acd6b84e 100644 --- a/confluent_server/confluent/discovery/protocols/ssdp.py +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -316,7 +316,7 @@ def snoop(handler, byehandler=None, protocol=None, uuidlookup=None): if not mac: continue _process_snoop(peer, rsp, mac, known_peers, newmacs, peerbymacaddress, byehandler, machandlers, handler) - for mac in newmacs: + for mac in list(newmacs): thehandler = machandlers.get(mac, None) if thehandler: thehandler(peerbymacaddress[mac]) From 13d9fe271250eac97ecce80aa445a572346e25ff Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 2 Jul 2025 09:55:24 -0400 Subject: [PATCH 234/413] Revert useinsecureprotocols to any group value after adoption --- misc/adoptnode.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/misc/adoptnode.sh b/misc/adoptnode.sh index ad230186..b4536608 100755 --- a/misc/adoptnode.sh +++ b/misc/adoptnode.sh @@ -19,5 +19,6 @@ scp prepadopt.sh $TARGNODE:/tmp/ scp finalizeadopt.sh $TARGNODE:/tmp/ ssh $TARGNODE bash /tmp/prepadopt.sh $TARGNODE $TARGPROF nodeattrib $TARGNODE deployment.pendingprofile= +nodeattrib $TARGNODE -c deployment.useinsecureprotocols nodeapply $TARGNODE -k ssh $TARGNODE sh /tmp/finalizeadopt.sh From 5a0b2468f6293efff59eee184a9fa1c0b8d9bea6 Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Fri, 11 Jul 2025 03:08:50 +0200 Subject: [PATCH 235/413] Update documentation link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9be6cc60..d9b3cd1c 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ If you're coming from xCAT, check out [this comparison](https://hpc.lenovo.com/u # Documentation -Confluent documentation is hosted on hpc.lenovo.com: https://hpc.lenovo.com/users/documentation/ +Confluent documentation is hosted on: https://xcat2.github.io/confluent-docs/ # Download From 79e44e420ad46fb4d67257732e7073645c261f64 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Jul 2025 09:26:33 -0400 Subject: [PATCH 236/413] Fix console handling for vcenter and proxmox plugins --- .../confluent/plugins/hardwaremanagement/proxmox.py | 4 ++-- .../confluent/plugins/hardwaremanagement/vcenter.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py index f3694513..49368c3c 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/proxmox.py @@ -454,8 +454,8 @@ def create(nodes, element, configmanager, inputdata): yield msg.ChildCollection(url) return serialdata = clientsbynode[node].get_vm_serial(node) - return PmxConsole(serialdata, node, configmanager, clientsbynode[node]) - + yield PmxConsole(serialdata, node, configmanager, clientsbynode[node]) + return if __name__ == '__main__': diff --git a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py index 00f6c2bc..bdd1366d 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/vcenter.py @@ -372,7 +372,8 @@ def create(nodes, element, configmanager, inputdata): clientsbynode = prep_vcsa_clients(nodes, configmanager) for node in nodes: serialdata = clientsbynode[node].get_vm_serial(node) - return VmConsole(serialdata['server'], serialdata['port'], serialdata['tls'], configmanager) + yield VmConsole(serialdata['server'], serialdata['port'], serialdata['tls'], configmanager) + return From 6ec072be9ddf3b34cce0817e24631228ca09d901 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Jul 2025 14:25:39 -0400 Subject: [PATCH 237/413] Fix EL10 for ARM nodes --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index a78e7563..c473cb0b 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -27,6 +27,7 @@ mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 ln -s el8 el10 +mv el10/initramfs/usr el10/initramfs/var for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out cd ${os}out From bf209a800911656d819d7e06b338c3ef3dc93ad6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 24 Jul 2025 16:20:12 -0400 Subject: [PATCH 238/413] Explicitly recognize fe80::/64 as 'local' For various reasons, it is likely/expected for fe80:: to fail the 'local' check. This is fine in most contexts, except for credserver and ssdp. ssdp already special cased fe80::, so special case fe80:: in credserver as well. --- confluent_server/confluent/credserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/credserver.py b/confluent_server/confluent/credserver.py index 390179f8..89fe4ff2 100644 --- a/confluent_server/confluent/credserver.py +++ b/confluent_server/confluent/credserver.py @@ -43,6 +43,8 @@ libc = ctypes.CDLL(ctypes.util.find_library('c')) def address_is_somewhat_trusted(address, nodename, cfm): + if netutil.ip_on_same_subnet(address.split('%')[0], 'fe80::', 64): + return True if netutil.address_is_local(address): return True authnets = cfm.get_node_attributes(nodename, 'trusted.subnets') From ff0c11e91966be9791d066b77447cd4a29fe24a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Jul 2025 14:47:46 -0400 Subject: [PATCH 239/413] Add mtu to net attributes --- confluent_server/confluent/config/attributes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 4f6531bd..4251974d 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -478,6 +478,9 @@ node = { 'This would be the default name per the deployed OS and can be a comma delimited list to denote members of ' 'a team or a single interface for VLAN/PKEY connections.' }, + 'net.mtu': { + 'description': 'MTU to apply to this connection', + }, 'net.vlan_id': { 'description': 'Ethernet VLAN or InfiniBand PKEY to use for this connection. ' 'Specify the parent device using net.interface_names.' From 21429c6d7d93866c4184f2200b2ec3a7ca85f45f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Jul 2025 14:48:03 -0400 Subject: [PATCH 240/413] Implement mtu for networkmanager backend --- confluent_osdeploy/common/profile/scripts/confignet | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 0c93485b..5baa4951 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -381,6 +381,8 @@ class NetworkManager(object): cmdargs['ipv4.gateway'] = stgs['ipv4_gateway'] if stgs.get('ipv6_gateway', None): cmdargs['ipv6.gateway'] = stgs['ipv6_gateway'] + if stgs.get('mtu', None): + cmdargs['802-3-ethernet.mtu'] = stgs['mtu'] dnsips = self.deploycfg.get('nameservers', []) if not dnsips: dnsips = [] @@ -406,7 +408,7 @@ class NetworkManager(object): cargs = [] for arg in cmdargs: cargs.append(arg) - cargs.append(cmdargs[arg]) + cargs.append('{}'.format(cmdargs[arg])) if stgs['team_mode'] in self.bondtypes: stgs['team_mode'] = self.bondtypes[stgs['team_mode']] subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'miimon=100,mode={}'.format(stgs['team_mode'])] + cargs) @@ -441,7 +443,7 @@ class NetworkManager(object): cargs = [] for arg in cmdargs: cargs.append(arg) - cargs.append(cmdargs[arg]) + cargs.append('{}'.format(cmdargs[arg])) if u: subprocess.check_call(['nmcli', 'c', 'm', u, 'connection.interface-name', iname] + cargs) subprocess.check_call(['nmcli', 'c', 'u', u]) From 47710756a5c248376bd679ad65590ad98014f194 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 25 Jul 2025 15:01:26 -0400 Subject: [PATCH 241/413] Implement mtu for netplan backend --- confluent_osdeploy/common/profile/scripts/confignet | 3 +++ .../debian/profiles/default/scripts/confignet | 11 ++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 5baa4951..78126b6f 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -164,6 +164,9 @@ class NetplanManager(object): if curraddr not in currips: needcfgwrite = True currips.append(curraddr) + if stgs.get('mtu', None): + devdict = self.getcfgarrpath([devname]) + devdict['mtu'] = int(stgs['mtu']) gws = [] gws.append(stgs.get('ipv4_gateway', None)) gws.append(stgs.get('ipv6_gateway', None)) diff --git a/confluent_osdeploy/debian/profiles/default/scripts/confignet b/confluent_osdeploy/debian/profiles/default/scripts/confignet index 5bf0871b..78126b6f 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/confignet +++ b/confluent_osdeploy/debian/profiles/default/scripts/confignet @@ -164,6 +164,9 @@ class NetplanManager(object): if curraddr not in currips: needcfgwrite = True currips.append(curraddr) + if stgs.get('mtu', None): + devdict = self.getcfgarrpath([devname]) + devdict['mtu'] = int(stgs['mtu']) gws = [] gws.append(stgs.get('ipv4_gateway', None)) gws.append(stgs.get('ipv6_gateway', None)) @@ -381,6 +384,8 @@ class NetworkManager(object): cmdargs['ipv4.gateway'] = stgs['ipv4_gateway'] if stgs.get('ipv6_gateway', None): cmdargs['ipv6.gateway'] = stgs['ipv6_gateway'] + if stgs.get('mtu', None): + cmdargs['802-3-ethernet.mtu'] = stgs['mtu'] dnsips = self.deploycfg.get('nameservers', []) if not dnsips: dnsips = [] @@ -406,10 +411,10 @@ class NetworkManager(object): cargs = [] for arg in cmdargs: cargs.append(arg) - cargs.append(cmdargs[arg]) + cargs.append('{}'.format(cmdargs[arg])) if stgs['team_mode'] in self.bondtypes: stgs['team_mode'] = self.bondtypes[stgs['team_mode']] - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'miimon=100,mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) subprocess.check_call(['nmcli', 'c', 'u', cname]) @@ -441,7 +446,7 @@ class NetworkManager(object): cargs = [] for arg in cmdargs: cargs.append(arg) - cargs.append(cmdargs[arg]) + cargs.append('{}'.format(cmdargs[arg])) if u: subprocess.check_call(['nmcli', 'c', 'm', u, 'connection.interface-name', iname] + cargs) subprocess.check_call(['nmcli', 'c', 'u', u]) From 8c6f36adf36606c4fe1c8cbf8cf8094d29aa2239 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 28 Jul 2025 15:55:16 -0400 Subject: [PATCH 242/413] Fixes for ansible running Newer ansible requires deferred plugin initialization explicitly, support either ansible norm. Reliably mark a play as complete even if facing a very early init problem, e.g. if ansible shebang points to a non-existant python. Fix incorrect association of stderr to tasks, and present stderr to the client exactly once. --- confluent_server/confluent/runansible.py | 67 +++++++++++++----------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/confluent_server/confluent/runansible.py b/confluent_server/confluent/runansible.py index 50696742..b82d8684 100644 --- a/confluent_server/confluent/runansible.py +++ b/confluent_server/confluent/runansible.py @@ -48,6 +48,7 @@ class PlayRunner(object): return avail def dump_text(self): + stderr = self.stderr retinfo = self.dump_dict() textout = '' for result in retinfo['results']: @@ -64,9 +65,9 @@ class PlayRunner(object): else: textout += result['state'] + '\n' textout += '\n' - if self.stderr: - textout += "ERRORS **********************************\n" - textout += self.stderr + if stderr: + textout += "ERRORS **********************************\n" + textout += stderr return textout def dump_json(self): @@ -80,32 +81,34 @@ class PlayRunner(object): def _really_run_playbooks(self): global anspypath - mypath = anspypath - if not mypath: - ansloc = shutil.which('ansible') - if ansloc: - with open(ansloc, 'r') as onsop: - shebang = onsop.readline() - anspypath = shebang.strip().replace('#!', '') - mypath = anspypath - if not mypath: - mypath = sys.executable - with open(os.devnull, 'w+') as devnull: - targnodes = ','.join(self.nodes) - for playfilename in self.playfiles: - worker = subprocess.Popen( - [mypath, __file__, targnodes, playfilename], - stdin=devnull, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stder = worker.communicate() - self.stderr += stder.decode('utf8') - current = memoryview(stdout) - while len(current): - sz = struct.unpack('=q', current[:8])[0] - result = msgpack.unpackb(current[8:8+sz], raw=False) - self.results.append(result) - current = current[8+sz:] - self.complete = True + try: + mypath = anspypath + if not mypath: + ansloc = shutil.which('ansible') + if ansloc: + with open(ansloc, 'r') as onsop: + shebang = onsop.readline() + anspypath = shebang.strip().replace('#!', '') + mypath = anspypath + if not mypath: + mypath = sys.executable + with open(os.devnull, 'w+') as devnull: + targnodes = ','.join(self.nodes) + for playfilename in self.playfiles: + worker = subprocess.Popen( + [mypath, __file__, targnodes, playfilename], + stdin=devnull, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stder = worker.communicate() + self.stderr += stder.decode('utf8') + current = memoryview(stdout) + while len(current): + sz = struct.unpack('=q', current[:8])[0] + result = msgpack.unpackb(current[8:8+sz], raw=False) + self.results.append(result) + current = current[8+sz:] + finally: + self.complete = True def run_playbooks(playfiles, nodes): @@ -143,6 +146,7 @@ if __name__ == '__main__': from ansible import context from ansible.module_utils.common.collections import ImmutableDict from ansible.plugins.callback import CallbackBase + import ansible.plugins.loader import yaml class ResultsCollector(CallbackBase): @@ -161,7 +165,10 @@ if __name__ == '__main__': become=None, become_method=None, become_user=None, check=False, diff=False, verbosity=0, remote_user='root') - + try: + ansible.plugins.loader.init_plugin_loader() + except AttributeError: + pass loader = DataLoader() invman = None if os.path.exists('/etc/ansible/hosts'): From cfa16237e1f57914c883d7e9b51fd62a23c4167b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Jul 2025 14:26:43 -0400 Subject: [PATCH 243/413] Dismantle initramfs by default in EL9/EL10 diskless If debugssh is not requsted, the initramfs is not needed. Do not unshare the mount namespace, leave the kernel relevant namespace 'normal' Remove some initramfs content to free up some memory. --- .../el9-diskless/profiles/default/scripts/imageboot.sh | 7 ++++++- confluent_osdeploy/utils/start_root.c | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index fe53bf38..7b340d2c 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -130,4 +130,9 @@ ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) -exec /opt/confluent/bin/start_root +if grep debugssh /proc/cmdline >& /dev/null; then + exec /opt/confluent/bin/start_root +else + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ + exec /opt/confluent/bin/start_root -s # share mount namespace, keep kernel callbacks intact +fi diff --git a/confluent_osdeploy/utils/start_root.c b/confluent_osdeploy/utils/start_root.c index 132ef469..bd2c4d16 100644 --- a/confluent_osdeploy/utils/start_root.c +++ b/confluent_osdeploy/utils/start_root.c @@ -3,8 +3,11 @@ #include #define __USE_GNU #include +#include int main(int argc, char* argv[]) { - unshare(CLONE_NEWNS); + if (argc < 2 || strcmp(argv[1], "-s")) { + unshare(CLONE_NEWNS); + } mount("/dev", "/sysroot/dev", NULL, MS_MOVE, NULL); mount("/proc", "/sysroot/proc", NULL, MS_MOVE, NULL); mount("/sys", "/sysroot/sys", NULL, MS_MOVE, NULL); From 97e4d7c3d0fb36329547a4b033a762cc63e770af Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Jul 2025 15:33:05 -0400 Subject: [PATCH 244/413] Skip sr0 if attempted --- .../el7-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk | 2 ++ .../el8-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk | 2 ++ .../el9-diskless/profiles/default/scripts/getinstalldisk | 2 ++ .../rhvh4/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk | 2 ++ .../suse15/profiles/server/scripts/getinstalldisk | 2 ++ .../profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu20.04/profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu22.04/profiles/default/scripts/getinstalldisk | 2 ++ 11 files changed, 22 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk index c954a254..71a97e8e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk @@ -52,6 +52,8 @@ class DiskInfo(object): self.subsystype = v.replace('"', '') if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") + if self.driver == 'sr': + raise Exception('cd/dvd') if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 From 16a1c4d59839a71c978f172ed92ad2fc2b9ab3ba Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Jul 2025 15:45:41 -0400 Subject: [PATCH 245/413] Pull mtu into the netcfg api --- confluent_server/confluent/netutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 6866d848..e3fc5b3f 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -243,6 +243,9 @@ class NetManager(object): vlanid = attribs.get('vlan_id', None) if vlanid: myattribs['vlan_id'] = vlanid + mtuinfo = attribs.get('mtu', None) + if mtuinfo: + myattribs['mtu'] = int(mtuinfo) teammod = attribs.get('team_mode', None) if teammod: myattribs['team_mode'] = teammod From 2c43055aecb9a728a729016d0f338ebe32eedeb9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 31 Jul 2025 08:44:58 -0400 Subject: [PATCH 246/413] Amend handling of malformed bytes in output Additionally, fix handling of mixed numeric/string in the attribute handler. --- confluent_client/confluent/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index a9957b96..7a3674ec 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -57,7 +57,7 @@ def stringify(instr): # Normalize unicode and bytes to 'str', correcting for # current python version if isinstance(instr, bytes) and not isinstance(instr, str): - return instr.decode('utf-8') + return instr.decode('utf-8', errors='replace') elif not isinstance(instr, bytes) and not isinstance(instr, str): return instr.encode('utf-8') return instr @@ -464,8 +464,8 @@ def printattributes(session, requestargs, showtype, nodetype, noderange, options def _sort_attrib(k): if isinstance(k[1], dict) and k[1].get('sortid', None) is not None: - return k[1]['sortid'] - return k[0] + return sortutil.naturalize_string('{}'.format(k[1]['sortid'])) + return sortutil.naturalize_string(k[0]) def print_attrib_path(path, session, requestargs, options, rename=None, attrprefix=None): exitcode = 0 From 48a0c21300913e57c0cce105b2d28571b797099e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Aug 2025 09:00:25 -0400 Subject: [PATCH 247/413] Refine getinstalldisk Reduce obvious output about skipped devices. Rule out any read-only device. Amend minimum size to 2GB. Among same priority devices, select the smallest target. --- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../el7/profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../el8/profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../suse15/profiles/hpc/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/server/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- .../profiles/default/scripts/getinstalldisk | 17 +++++++++++++---- 11 files changed, 143 insertions(+), 44 deletions(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk index 71a97e8e..dec536b7 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk @@ -2,6 +2,9 @@ import subprocess import os +class SilentException(Exception): + pass + class DiskInfo(object): def __init__(self, devname): if devname.startswith('nvme') and 'c' in devname: @@ -24,9 +27,11 @@ class DiskInfo(object): continue k, v = prop.split('=', 1) if k == 'DEVTYPE' and v != 'disk': + if v == 'partition': + raise SilentException('Partition') raise Exception('Not a disk') elif k == 'DM_NAME': - raise Exception('Device Mapper') + raise SilentException('Device Mapper') elif k == 'ID_MODEL': self.model = v elif k == 'DEVPATH': @@ -50,6 +55,8 @@ class DiskInfo(object): self.driver = v.replace('"', '') elif k == 'ATTRS{subsystype}': self.subsystype = v.replace('"', '') + elif k == 'ATTR{ro}' and v == '"1"': + raise Exception("Device is read-only") if not self.driver and 'imsm' not in self.mdcontainer and self.subsystype != 'nvm': raise Exception("No driver detected") if self.driver == 'sr': @@ -57,8 +64,8 @@ class DiskInfo(object): if os.path.exists('/sys/block/{0}/size'.format(self.name)): with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: self.size = int(sizesrc.read()) * 512 - if int(self.size) < 536870912: - raise Exception("Device too small for install") + if int(self.size) < 2147483648: + raise Exception("Device too small for install ({}MiB)".format(int(self.size)/1024/1024)) @property def priority(self): @@ -91,9 +98,11 @@ def main(): try: disk = DiskInfo(disk) disks.append(disk) + except SilentException: + pass except Exception as e: print("Skipping {0}: {1}".format(disk, str(e))) - nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] if nd: open('/tmp/installdisk', 'w').write(nd[0]) From 8cfbf40a2e08e684a374e1d09477bb5293706a55 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Aug 2025 12:02:04 -0400 Subject: [PATCH 248/413] Implement capture of extra volume groups Provide a mechanism to capture non-boot hosted volume groups and map them to specified disks via a mapping. --- .../profiles/default/scripts/image2disk.py | 60 ++++++++++++++++--- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 4a08716a..3c70a178 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -11,6 +11,10 @@ import struct import sys import subprocess import traceback +try: + import yaml +except ImportError: + yaml = None bootuuid = None vgname = 'localstorage' @@ -334,13 +338,16 @@ def had_swap(): newrootdev = None newswapdev = None +vgmap = None def install_to_disk(imgpath): + global vgmap global bootuuid global newrootdev global newswapdev global vgname global oldvgname lvmvols = {} + vgmap = {} deftotsize = 0 mintotsize = 0 deflvmsize = 0 @@ -365,24 +372,30 @@ def install_to_disk(imgpath): mintotsize = swapsize for fs in get_image_metadata(imgpath): allvols.append(fs) - deftotsize += fs['initsize'] - mintotsize += fs['minsize'] - if fs['initsize'] > biggestsize: - biggestfs = fs - biggestsize = fs['initsize'] + if fs['device'].startswith('/dev/mapper'): - oldvgname = fs['device'].rsplit('/', 1)[-1] + odevname = fs['device'].rsplit('/', 1)[-1] # if node has - then /dev/mapper will double up the hypen - if '_' in oldvgname and '-' in oldvgname.split('_', 1)[-1]: - oldvgname = oldvgname.rsplit('-', 1)[0].replace('--', '-') + if '_' in odevname and '-' in odevname.split('_', 1)[-1]: + oldvgname = odevname.rsplit('-', 1)[0].replace('--', '-') osname = oldvgname.split('_')[0] nodename = socket.gethostname().split('.')[0] vgname = '{}_{}'.format(osname, nodename) - lvmvols[fs['device'].replace('/dev/mapper/', '')] = fs + elif '-' in odevname: # unique one + vgmap[odevname] = odevname.split('-')[0] + lvmvols[odevname] = fs + + continue + lvmvols[odevname] = fs deflvmsize += fs['initsize'] minlvmsize += fs['minsize'] else: plainvols[int(re.search('(\d+)$', fs['device'])[0])] = fs + if fs['initsize'] > biggestsize: + biggestfs = fs + biggestsize = fs['initsize'] + deftotsize += fs['initsize'] + mintotsize += fs['minsize'] with open('/tmp/installdisk') as diskin: instdisk = diskin.read() instdisk = '/dev/' + instdisk @@ -440,6 +453,28 @@ def install_to_disk(imgpath): lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) subprocess.check_call(['vgcreate', vgname, lvmpart]) + vgroupmap = {} + if yaml and vgmap: + with open('/tmp/volumegroupmap.yml') as mapin: + vgroupmap = yaml.safe_load(mapin) + donedisks = {} + for morevolname in vgmap: + morevg = vgmap[morevolname] + if morevg not in vgroupmap: + raise Exception("No mapping defined to create volume group {}".format(morevg)) + targdisk = vgroupmap[morevg] + if targdisk not in donedisks: + moreparted = PartedRunner(targdisk) + moreparted.run('mklabel gpt') + moreparted.run('mkpart lvm 0% 100%') + morelvmpart = get_partname(targdisk, 1) + subprocess.check_call(['pvcreate', '-ff', '-y', morelvmpart]) + subprocess.check_call(['vgcreate', morevg, morelvmpart]) + donedisks[targdisk] = 1 + morelvname = morevolname.split('-', 1)[1] + subprocess.check_call(['lvcreate', '-L', '{}b'.format(lvmvols[morevolname]['initsize']), '-y', '-n', morelvname, morevg]) + lvmvols[morevolname]['targetdisk'] = '/dev/{}/{}'.format(morevg, morelvname) + vginfo = subprocess.check_output(['vgdisplay', vgname, '--units', 'b']).decode('utf8') vginfo = vginfo.split('\n') pesize = 0 @@ -452,6 +487,9 @@ def install_to_disk(imgpath): pes = int(infline[4]) takeaway = swapsize // pesize for volidx in lvmvols: + if volidx in vgmap: + # was handled previously + continue vol = lvmvols[volidx] if vol is biggestfs: continue @@ -460,6 +498,10 @@ def install_to_disk(imgpath): biggestextents = pes - takeaway for volidx in lvmvols: vol = lvmvols[volidx] + if volidx in vgmap: + # was handled previously + continue + if vol is biggestfs: extents = biggestextents else: From 85ddf528a2ccb2e9e97060901973a6e0d9c31fa6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Aug 2025 15:53:38 -0400 Subject: [PATCH 249/413] Add Confluent UUID to identity image The UUID and tls material usually comes from site, but some OSes may depend entirely on the identity image, so make the UUID available that way as well. --- confluent_server/confluent/plugins/deployment/identimage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/plugins/deployment/identimage.py b/confluent_server/confluent/plugins/deployment/identimage.py index 4f57f2b6..c41c6e8f 100644 --- a/confluent_server/confluent/plugins/deployment/identimage.py +++ b/confluent_server/confluent/plugins/deployment/identimage.py @@ -21,6 +21,7 @@ import confluent.messages as msg import confluent.netutil as netutil import eventlet.green.subprocess as subprocess +import confluent.config.configmanager as cfm import os import shutil import tempfile @@ -51,6 +52,7 @@ def create_ident_image(node, configmanager): # It would be a reasonable enhancement to list all collective server addresses # restricted by 'managercandidates' ident['deploy_servers'] = [] + ident['confluent_uuid'] = cfm.get_global('confluent_uuid') for myaddr in netutil.get_my_addresses(): myaddr = socket.inet_ntop(myaddr[0], myaddr[1]) ident['deploy_servers'].append(myaddr) From bab169269c9eeff0e1a3c1154922fb737e2e07a9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 5 Aug 2025 08:39:18 -0400 Subject: [PATCH 250/413] Fix variable mistake in error handling --- confluent_client/bin/nodeconsole | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeconsole b/confluent_client/bin/nodeconsole index 37382dbd..62ec8b8a 100755 --- a/confluent_client/bin/nodeconsole +++ b/confluent_client/bin/nodeconsole @@ -244,7 +244,7 @@ def draw_text(text, width, height): draw_image(data, width, height, doscale=False) else: sys.stdout.write(text) - cursor_left(len(txt)) + cursor_left(len(text)) def draw_image(data, width, height, doscale=True): imageformat = os.environ.get('CONFLUENT_IMAGE_PROTOCOL', 'kitty') From 816f3be2ed5d7a6fceacbeb8b9338b5bbe137ebc Mon Sep 17 00:00:00 2001 From: Hengli Kuang Date: Wed, 6 Aug 2025 04:31:38 -0400 Subject: [PATCH 251/413] Configure the logdirectory from the configuration file - Set the log directory using the configuration manager - Add _get_logdirectory function to retrieve the log directory path - Update _redirectoutput to use the new log directory setting --- confluent_server/confluent/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/main.py b/confluent_server/confluent/main.py index b0e3508a..b8f47e94 100644 --- a/confluent_server/confluent/main.py +++ b/confluent_server/confluent/main.py @@ -94,6 +94,7 @@ def _daemonize(): def _redirectoutput(): os.umask(63) + configmanager.set_global('logdirectory', _get_logdirectory()) sys.stdout = log.Logger('stdout', buffered=False) sys.stderr = log.Logger('stderr', buffered=False) @@ -340,3 +341,6 @@ def _get_connector_config(session): host = conf.get_option(session, 'bindhost') port = conf.get_int_option(session, 'bindport') return (host, port) + +def _get_logdirectory(): + return conf.get_option('globals', 'logdirectory') \ No newline at end of file From d99689f84b6163556a2014b5789cfa41e5881eed Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Aug 2025 11:56:44 -0400 Subject: [PATCH 252/413] Have confluent2ansible support amending inventory Rather than writing from scratch each time, parse the existing file if present and merge new content without perturbing existing. --- confluent_client/bin/confluent2ansible | 63 ++++++++++++++++++-------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/confluent_client/bin/confluent2ansible b/confluent_client/bin/confluent2ansible index e6da2050..ba27e429 100644 --- a/confluent_client/bin/confluent2ansible +++ b/confluent_client/bin/confluent2ansible @@ -1,8 +1,9 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 import optparse import signal import sys import os + try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: @@ -23,12 +24,15 @@ def lookupdata(data, key): ret = '' return ret + def main(): argparser = optparse.OptionParser( - usage='''\n %prog noderange -o ansible.hosts - \n ''') + usage='''\n %prog noderange -o ansible.hosts -a + ''') argparser.add_option('-o', '--output', - help='Ansible hosts file') + help='Writes an Ansible hosts file') + argparser.add_option('-a', '--append', action='store_true', + help='Appends to existing hosts file') (options, args) = argparser.parse_args() try: noderange = args[0] @@ -45,24 +49,43 @@ def main(): if node not in databynode: databynode[node] = {} databynode[node].update(res['databynode'][node]) + nodesbygroup = {} - with open(options.output, 'w') as importfile: - needempty = False - for node in sortutil.natural_sort(databynode): - data = databynode[node] - if not data.get('groups', []): - importfile.write(node + '\n') - needempty = True - for g in data.get('groups', []): - if g not in nodesbygroup: - nodesbygroup[g] = set([node]) + for node in sortutil.natural_sort(databynode): + data = databynode[node] + groups = data.get('groups', []) + if not groups: + nodesbygroup.setdefault('', set()).add(node.strip().lower()) + else: + for g in groups: + nodesbygroup.setdefault(g, set()).add(node.strip().lower()) + + existing_data = {} + if options.append and os.path.exists(options.output): + current_group = '' + with open(options.output, 'r') as f: + for line in f: + line = line.strip().lower() + if not line: + continue + if line.startswith('[') and line.endswith(']'): + current_group = line[1:-1] + existing_data.setdefault(current_group, set()) else: - nodesbygroup[g].add(node) - if needempty: - importfile.write('\n') - for group in sortutil.natural_sort(nodesbygroup): - importfile.write('[{0}]\n'.format(group)) - for node in sortutil.natural_sort(nodesbygroup[group]): + existing_data.setdefault(current_group, set()).add(line) + + for group, nodes in nodesbygroup.items(): + nodes = {n.strip().lower() for n in nodes} + current_nodes = existing_data.get(group, set()) + new_nodes = nodes - current_nodes + if new_nodes: + existing_data.setdefault(group, set()).update(nodes) + + with open(options.output, 'w') as importfile: + for group in sortutil.natural_sort(existing_data.keys()): + if group: + importfile.write('[{0}]\n'.format(group)) + for node in sortutil.natural_sort(existing_data[group]): importfile.write('{0}\n'.format(node)) importfile.write('\n') From 63bb5f4d1b3d225d9e606872133b378ecc292ff1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Aug 2025 14:15:43 -0400 Subject: [PATCH 253/413] Have routed diskless retry checking for interface If the network interface is slow to link up, wait until something comes up, ignoring infiniband for now. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index cdcc12fd..cb7b7372 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -107,7 +107,11 @@ if [ ! -z "$confluentsrv" ]; then /usr/libexec/nm-initrd-generator ip=:dhcp6 else confluenthttpsrv=$confluentsrv - ifname=$(ip -br link|grep LOWER_UP|grep -v UNKNOWN|head -n 1|awk '{print $1}') + ifname="" + while [ -z "$ifname" ]; do + ifname=$(ip -br link|grep LOWER_UP|grep -v ib|grep -v UNKNOWN|head -n 1|awk '{print $1}') + sleep 0.5 + done echo -n "Attempting to use dhcp to bring up $ifname..." dhclient $ifname while ! ip -br addr show dev $ifname | grep \\. > /dev/null; do From aafa65274cd55dae04d2ebc335db52340e088301 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Aug 2025 14:27:05 -0400 Subject: [PATCH 254/413] Do not trip over .discinfo file contents --- confluent_server/confluent/osimage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index f0a197c2..d92edd94 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -358,6 +358,8 @@ def check_fedora(isoinfo): prodlines = prodinfo.split(b'\n') if len(prodlines) < 3: return None + if not prodlines[1].split(): + return None prod = prodlines[1].split()[0] if prod != b'Fedora': return None From 36ce0922fcca1e9cf14a40af1754c292d34c13a0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Aug 2025 14:56:12 -0400 Subject: [PATCH 255/413] Implement openEuler scripted install openEuler 24.03 seems closest to el8, so start with that baseline. Modify el8 handling to recognize a totally missing CA bundle and assume the original el8 location. Have osimage recognize openEuler media by presence of openEuler release file. --- .../usr/lib/dracut/hooks/cmdline/01-confluent.sh | 8 +++++--- confluent_server/confluent/osimage.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh index 84882ba4..09ab1212 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/cmdline/01-confluent.sh @@ -3,10 +3,12 @@ echo -n "" >> /tmp/net.ifaces echo -n "" > /tmp/01-autocons.devnode BUNDLENAME=/etc/pki/tls/certs/ca-bundle.crt if [ ! -e "$BUNDLENAME" ]; then - BUNDLENAME=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem mkdir -p /etc/pki/tls/certs - ln -s $BUNDLENAME /etc/pki/tls/certs/ca-bundle.crt -fi + if [ -e /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem ]; then + BUNDLENAME=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem + ln -s $BUNDLENAME /etc/pki/tls/certs/ca-bundle.crt + fi +fi cat /tls/*.0 >> $BUNDLENAME if ! grep console= /proc/cmdline >& /dev/null; then autocons=$(/opt/confluent/bin/autocons) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index d92edd94..8a3c2fb5 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -318,6 +318,16 @@ def extract_file(archfile, flags=0, callback=lambda x: None, imginfo=(), extract return pctdone +def check_openeuler(isoinfo): + for entry in isoinfo[0]: + if 'openEuler-release-24.03' in entry: + ver = entry.split('-')[2] + arch = entry.split('.')[-2] + cat = 'el9' + break + else: + return None + return {'name': 'openeuler-{0}-{1}'.format(ver, arch), 'method': EXTRACT, 'category': cat} def check_rocky(isoinfo): ver = None arch = None From d43de05b097e5abd5088b88b238ff7f46704f1c0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Aug 2025 14:56:12 -0400 Subject: [PATCH 256/413] Present error when user tries to operate undefined PDU outlets --- confluent_server/confluent/plugins/hardwaremanagement/pdu.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py index 1da24daa..4ca3f50a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py @@ -42,6 +42,8 @@ def retrieve(nodes, element, configmanager, inputdata): inletname = element[-1] outlets = get_outlets(nodes, emebs, inletname) for node in outlets: + if not outlets[node]: + yield msg.ConfluentTargetNotFound(node, 'No matching inlets defined for node in "power.*" attributes') for pgroup in outlets[node]: pdu = outlets[node][pgroup]['pdu'] outlet = outlets[node][pgroup]['outlet'] @@ -109,6 +111,8 @@ def update(nodes, element, configmanager, inputdata): gp = greenpool.GreenPool(64) outlets = get_outlets(nodes, emebs, inletname) for node in outlets: + if not outlets[node]: + yield msg.ConfluentTargetNotFound(node, 'No matching inlets defined for node in "power.*" attributes') for pgroup in outlets[node]: pdu = outlets[node][pgroup]['pdu'] outlet = outlets[node][pgroup]['outlet'] From 960a890530c2996fb93c8e913d830e4a6a752b74 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Aug 2025 15:15:14 -0400 Subject: [PATCH 257/413] Fix discovery mac deletion to also clean associated node records --- confluent_server/confluent/discovery/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index b74e26a7..38c1a733 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -316,6 +316,8 @@ def list_matching_nodes(criteria): retnodes = [] for node in known_nodes: for mac in known_nodes[node]: + if mac not in known_info: + continue info = known_info[mac] if _info_matches(info, criteria): retnodes.append(node) @@ -613,7 +615,11 @@ def handle_api_request(configmanager, inputdata, operation, pathcomponents): return [msg.AssignedResource(inputdata['node'])] elif operation == 'delete': mac = _get_mac_from_query(pathcomponents) - del known_info[mac] + for node in known_nodes: + if mac in known_nodes[node]: + del known_nodes[node][mac] + if mac in known_info: + del known_info[mac] return [msg.DeletedResource(mac)] raise exc.NotImplementedException( 'Unable to {0} to {1}'.format(operation, '/'.join(pathcomponents))) From a1cf8023c6bfda3f4d6ddf6146661ed883b133b0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Aug 2025 14:30:30 -0400 Subject: [PATCH 258/413] Clarify that nodesensors csv may have a different order CSV must keep sensors grouped and consistent, so it waits until it gathers all results to sort. Normally, it presents the results as quickly as the API provides it. This results in some different ordering for the "fast as API provides it" versus "delayed to group the values together" behavior. --- confluent_client/doc/man/nodesensors.ronn | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodesensors.ronn b/confluent_client/doc/man/nodesensors.ronn index 483bffc3..f7dfe360 100644 --- a/confluent_client/doc/man/nodesensors.ronn +++ b/confluent_client/doc/man/nodesensors.ronn @@ -19,7 +19,9 @@ interval of 1 second is used. ## OPTIONS * `-c`, `--csv`: - Organize output into CSV format, one sensor per column. + Organize output into CSV format, one sensor per column. Note that while normally nodesensors reports + sensors in order as returned by server, CSV output enforces consistency by sorting after receiving + the results, which may have a different ordering than non-CSV usage of nodesensors. * `-i`, `--interval`=**SECONDS**: Repeat data gathering waiting, waiting the specified time between samples. Unless `-n` is From e01701bcf13a50e4d3696b9c9fd8906b5f260418 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Aug 2025 15:51:45 -0400 Subject: [PATCH 259/413] Speed up log initialization Cache the directory list over a few seconds to avoid excessive filesystem calls. Also switchg to a more potent regex to avoid wasting time on timestamped files. --- confluent_server/confluent/log.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/log.py b/confluent_server/confluent/log.py index ee2444c1..72e232f1 100644 --- a/confluent_server/confluent/log.py +++ b/confluent_server/confluent/log.py @@ -386,14 +386,18 @@ class TimedAndSizeRotatingFileHandler(BaseRotatingHandler): f['time'] in t_list[:-(self.backupCount - 1)]] return result + dirContents = {} def initSizeRollingCount(self): """ Init the max number of log files for current time. """ dirName, baseName = os.path.split(self.textpath) prefix = baseName + "." - filePaths = glob.glob(os.path.join(dirName, "%s*" % prefix)) - fileNames = [os.path.split(f)[1] for f in filePaths] + if dirName not in self.dirContents or self.dirContents[dirName][1] < time.time(): + self.dirContents[dirName] = (os.listdir(dirName), time.time() + 5) + matchexp = re.compile(f'^{prefix}\.\d+$') + fileNames = [f for f in self.dirContents[dirName][0] + if matchexp.match(f)] plen = len(prefix) for fileName in fileNames: suffix = fileName[plen:] From 902ff43a9bc88f3e2d9b62cb75553b14f79f6042 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Aug 2025 17:27:26 -0400 Subject: [PATCH 260/413] Extend certificate management Provide checks for nginx config and apache configuration, perhaps even concurrently. Latch on the first match, since we are taking care of IP based SANs and subsequent server/virtualhost sections are irrelevant. Latch onto a chain file, if indicated in the apache configuration, placing our CA in the chain. For nginx, put our CA in the cert, since nginx uses the 'certificate' file as the chain. In this scenario, a cross-signed CA cert is possible. --- confluent_server/confluent/certutil.py | 102 +++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 7 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 3b8e5ef5..a60a585e 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -53,22 +53,61 @@ def get_ip_addresses(): def check_apache_config(path): keypath = None certpath = None + chainpath = None with open(path, 'r') as openf: webconf = openf.read() + insection = False + # we always manipulate the first VirtualHost section + # since we are managing IP based SANs, then SNI + # can never match anything but the first VirtualHost for line in webconf.split('\n'): line = line.strip() line = line.split('#')[0] - if line.startswith('SSLCertificateFile'): - _, certpath = line.split(None, 1) - if line.startswith('SSLCertificateKeyFile'): + if not certpath and line.startswith('SSLCertificateFile'): + insection = True + if not certpath: + _, certpath = line.split(None, 1) + if not keypath and line.startswith('SSLCertificateKeyFile'): + insection = True _, keypath = line.split(None, 1) + if not chainpath and line.startswith('SSLCertificateChainFile'): + insection = True + _, chainpath = line.split(None, 1) + if insection and line.startswith(''): + break + return keypath, certpath, chainpath + +def check_nginx_config(path): + keypath = None + certpath = None + # again, we only care about the first server section + # since IP won't trigger SNI matches down the configuration + with open(path, 'r') as openf: + webconf = openf.read() + for line in webconf.split('\n'): + if keypath and certpath: + break + line = line.strip() + line = line.split('#')[0] + for segment in line.split(';'): + if not certpath and segment.startswith('ssl_certificate'): + _, certpath = segment.split(None, 1) + if not keypath and segment.startswith('ssl_certificate_key'): + _, keypath = segment.split(None, 1) + if keypath: + keypath = keypath.strip('"') + if certpath: + certpath = certpath.strip('"') return keypath, certpath def get_certificate_paths(): keypath = None certpath = None + chainpath = None + ngkeypath = None + ngbundlepath = None if os.path.exists('/etc/httpd/conf.d/ssl.conf'): # redhat way - keypath, certpath = check_apache_config('/etc/httpd/conf.d/ssl.conf') + keypath, certpath, chainpath = check_apache_config('/etc/httpd/conf.d/ssl.conf') if not keypath and os.path.exists('/etc/apache2'): # suse way for currpath, _, files in os.walk('/etc/apache2'): for fname in files: @@ -80,8 +119,30 @@ def get_certificate_paths(): return None, None # Ambiguous... if kploc[0]: keypath, certpath = kploc + if os.path.exists('/etc/nginx'): # nginx way + for currpath, _, files in os.walk('/etc/nginx'): + if ngkeypath: + break + for fname in files: + if not fname.endswith('.conf'): + continue + ngkeypath, ngbundlepath = check_nginx_config(os.path.join(currpath, + fname)) + if ngkeypath: + break + tlsmateriallocation = {} + if keypath: + tlsmateriallocation.setdefault('keys', []).append(keypath) + if ngkeypath: + tlsmateriallocation.setdefault('keys', []).append(ngkeypath) + if certpath: + tlsmateriallocation.setdefault('certs', []).append(certpath) + if chainpath: + tlsmateriallocation.setdefault('chains', []).append(chainpath) + if ngbundlepath: + tlsmateriallocation.setdefault('bundles', []).append(ngbundlepath) + return tlsmateriallocation - return keypath, certpath def assure_tls_ca(): keyout, certout = ('/etc/confluent/tls/cakey.pem', '/etc/confluent/tls/cacert.pem') @@ -208,8 +269,12 @@ def create_simple_ca(keyout, certout): def create_certificate(keyout=None, certout=None, csrout=None): if not keyout: - keyout, certout = get_certificate_paths() - if not keyout: + tlsmateriallocation = get_certificate_paths() + keyout = tlsmateriallocation.get('keys', [None])[0] + certout = tlsmateriallocation.get('certs', [None])[0] + if not certout: + certout = tlsmateriallocation.get('bundles', [None])[0] + if not keyout or not certout: raise Exception('Unable to locate TLS certificate path automatically') assure_tls_ca() shortname = socket.gethostname().split('.')[0] @@ -291,6 +356,29 @@ def create_certificate(keyout=None, certout=None, csrout=None): '-startdate', '19700101010101Z', '-enddate', '21000101010101Z', '-extfile', extconfig ]) + for keycopy in tlsmateriallocation.get('keys', []): + if keycopy != keyout: + shutil.copy2(keyout, keycopy) + for certcopy in tlsmateriallocation.get('certs', []): + if certcopy != certout: + shutil.copy2(certout, certcopy) + cacert = None + with open('/etc/confluent/tls/cacert.pem', 'rb') as cacertfile: + cacert = cacertfile.read() + for bundlecopy in tlsmateriallocation.get('bundles', []): + if bundlecopy != certout: + shutil.copy2(certout, bundlecopy) + with open(bundlecopy, 'ab') as bundlefile: + bundlefile.write(b'\n') + bundlefile.write(cacert) + for chaincopy in tlsmateriallocation.get('chains', []): + if chaincopy != certout: + with open(chaincopy, 'wb') as chainfile: + chainfile.write(cacert) + else: + with open(chaincopy, 'ab') as chainfile: + chainfile.write(b'\n') + chainfile.write(cacert) finally: os.remove(tmpconfig) if needcsr: From 51afcc68a7edd1f6a577826a7f91e1957f6b7e2a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 10:14:05 -0400 Subject: [PATCH 261/413] Add debian 13 support Debian 13 changes the installer initramfs to have lib be under usr, compensate. --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 8 ++++++-- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 7 +++++-- confluent_server/confluent/osimage.py | 7 ++++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index c473cb0b..8e9a22a1 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -28,7 +28,11 @@ cp -a el8bin/* . ln -s el8 el9 ln -s el8 el10 mv el10/initramfs/usr el10/initramfs/var -for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do +cp -a debian debian13 +mkdir -p debian13/initrafms/usr +mv debian13/initrafms/lib debian13/initrafms/usr/ +mv el10/initramfs/usr el10/initramfs/var +for os in rhvh4 el7 genesis el8 suse15 debian debian13 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then @@ -82,7 +86,7 @@ cp -a esxi7 esxi8 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ #cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do +for os in rhvh4 el7 el8 el9 el10 debian debian13 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ if [ -d ${os}disklessout ]; then diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index c53307ff..1855b4a0 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -29,8 +29,11 @@ This contains support utilities for enabling deployment of x86_64 architecture s #cd .. ln -s el8 el9 cp -a el8 el10 +cp -a debian debian13 +mkdir -p debian13/initrafms/usr +mv debian13/initrafms/lib debian13/initrafms/usr/ mv el10/initramfs/usr el10/initramfs/var -for os in rhvh4 el7 genesis el8 suse15 debian ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do +for os in rhvh4 el7 genesis el8 suse15 debian debian13 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then @@ -86,7 +89,7 @@ cp -a esxi7 esxi9 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 debian ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 esxi9 coreos; do +for os in rhvh4 el7 el8 el9 el10 genesis suse15 ubuntu20.04 debian debian13 ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 esxi9 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/profiles cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 8a3c2fb5..80673bb0 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -510,10 +510,15 @@ def check_debian(isoinfo): raise Exception("Unsupported debian architecture {}".format(arch)) arch = 'x86_64' name = 'debian-{0}-{1}'.format(version, arch) + major = int(major) + if major > 12: + category = 'debian13' + else: + category = 'debian' return { 'name': name, 'method': EXTRACT, - 'category': 'debian', + 'category': category, } From c0a99f63a5d4b456a0286caf31ea809bace3d7e5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 10:31:55 -0400 Subject: [PATCH 262/413] Correct typo in rpm spec changes --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 4 ++-- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index 8e9a22a1..ecc0c445 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -29,8 +29,8 @@ ln -s el8 el9 ln -s el8 el10 mv el10/initramfs/usr el10/initramfs/var cp -a debian debian13 -mkdir -p debian13/initrafms/usr -mv debian13/initrafms/lib debian13/initrafms/usr/ +mkdir -p debian13/initramfs/usr +mv debian13/initramfs/lib debian13/initramfs/usr/ mv el10/initramfs/usr el10/initramfs/var for os in rhvh4 el7 genesis el8 suse15 debian debian13 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 1855b4a0..5c977b2d 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -30,8 +30,8 @@ This contains support utilities for enabling deployment of x86_64 architecture s ln -s el8 el9 cp -a el8 el10 cp -a debian debian13 -mkdir -p debian13/initrafms/usr -mv debian13/initrafms/lib debian13/initrafms/usr/ +mkdir -p debian13/initramfs/usr +mv debian13/initramfs/lib debian13/initramfs/usr/ mv el10/initramfs/usr el10/initramfs/var for os in rhvh4 el7 genesis el8 suse15 debian debian13 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9 el10; do mkdir ${os}out From c9b9275bb162a9dd920c12c70aa12e299ab747f3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 11:46:35 -0400 Subject: [PATCH 263/413] Support clear of custom attributes middle name attributes and custom attributes should be checked for explicitly during a clear. --- .../confluent/plugins/configuration/attributes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 434d6c50..6ee68537 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -319,6 +319,12 @@ def update_nodes(nodes, element, configmanager, inputdata): if fnmatch(candattrib, attrib): clearattribs.append(candattrib) foundattrib = True + currnodeattrs = configmanager.get_node_attributes(node, attrib) + for matchattrib in currnodeattrs.get(node, {}): + if matchattrib != attrib: + continue + clearattribs.append(matchattrib) + foundattrib = True if not foundattrib: raise exc.InvalidArgumentException("No attribute matches '" + attrib + "' (try wildcard if trying to clear a group)") elif '*' in attrib: From fc6c1495d3b00182aa297173c8e1de33426dbb32 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 12:10:33 -0400 Subject: [PATCH 264/413] Change comment to not have unicode Some environments don't like the UTF --- confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index cade07c2..5bd85761 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -464,7 +464,7 @@ def get_available_nics(): parts = re.split(r'\s{2,}', line.strip()) if len(parts) >= 5: nic_name = parts[0] - nic_status = parts[4] # "Link Status" este al 5-lea câmp + nic_status = parts[4] # "Link Status" is the 5th field available_nics[nic_name] = nic_status return available_nics From 58b6a2d317f71140653e9625475ce82d0adaf5b0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 12:20:10 -0400 Subject: [PATCH 265/413] Fix python2 incompatibility with apiclient --- confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index 5bd85761..29393902 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -509,7 +509,7 @@ if __name__ == '__main__': try: fix_vswitch() except Exception as e: - print(f"fix_vswitch() error: {e}") + print("fix_vswitch() error: {}".format(e)) sys.argv.remove('-f') sys.exit(0) usejson = False From 836b629986643d0787501bb6211d7c9acb3c03d6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 12:31:44 -0400 Subject: [PATCH 266/413] Remove unused json import from apiclient --- confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient | 1 - 1 file changed, 1 deletion(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index 29393902..d468b4d9 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -15,7 +15,6 @@ import sys import struct import time import re -import json class InvalidApiKey(Exception): pass From 465e985cc744b3b162611c680046710b058b014e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 13:55:36 -0400 Subject: [PATCH 267/413] Extract and use HWE by default for Ubuntu 24.04 --- .../profiles/default/initprofile.sh | 21 +++++++++++++------ confluent_server/confluent/osimage.py | 13 ++++++++---- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh index cebcd41d..3d6127b8 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh @@ -1,14 +1,23 @@ -#!/bin/sh -sed -i 's/label: ubuntu/label: Ubuntu/' $2/profile.yaml && \ -ln -s $1/casper/vmlinuz $2/boot/kernel && \ -ln -s $1/casper/initrd $2/boot/initramfs/distribution && \ -mkdir -p $2/boot/efi/boot && \ +#!/bin/bash +set -e +sed -i 's/label: ubuntu/label: Ubuntu/' $2/profile.yaml +if [ -e $1/casper/hwe-vmlinuz ]; then + ln -s $1/casper/hwe-vmlinuz $2/boot/kernel +else + ln -s $1/casper/vmlinuz $2/boot/kernel +fi +if [ -e $1/casper/hwe-initrd ]; then + ln -s $1/casper/hwe-initrd $2/boot/initramfs/distribution +else + ln -s $1/casper/initrd $2/boot/initramfs/distribution +fi +mkdir -p $2/boot/efi/boot if [ -d $1/EFI/boot/ ]; then ln -s $1/EFI/boot/* $2/boot/efi/boot elif [ -d $1/efi/boot/ ]; then ln -s $1/efi/boot/* $2/boot/efi/boot else - echo "Unrecogrized boot contents in media" >&2 + echo "Unrecognized boot contents in media" >&2 exit 1 fi diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 80673bb0..060f8794 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -250,8 +250,13 @@ def extract_entries(entries, flags=0, callback=None, totalsize=None, extractlist for entry in entries: if str(entry).endswith('TRANS.TBL'): continue - if extractlist and str(entry).lower() not in extractlist: - continue + if extractlist: + normname = str(entry).lower() + for extent in extractlist: + if fnmatch(normname, extent): + break + else: + continue write_header(write_p, entry._entry_p) read_p = entry._archive_p while 1: @@ -553,11 +558,11 @@ def check_ubuntu(isoinfo): 'method': EXTRACT, 'category': 'ubuntu{0}'.format(major)} elif 'efi/boot/bootaa64.efi' in isoinfo[0]: - exlist = ['casper/vmlinuz', 'casper/initrd', + exlist = ['casper/*vmlinuz', 'casper/*initrd', 'efi/boot/bootaa64.efi', 'efi/boot/grubaa64.efi' ] else: - exlist = ['casper/vmlinuz', 'casper/initrd', + exlist = ['casper/*vmlinuz', 'casper/*initrd', 'efi/boot/bootx64.efi', 'efi/boot/grubx64.efi' ] return {'name': 'ubuntu-{0}-{1}'.format(ver, arch), From 5ac0cccc4d2282245cf0d1a3c20e7515e7d55b3a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Aug 2025 17:11:18 -0400 Subject: [PATCH 268/413] Update proxmoxve for trixie Have the proxmox post script adaptive between Debian 12 or 13 --- .../default/scripts/proxmox/proxmoxve.post | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post index fc413997..8cc89faf 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post +++ b/confluent_osdeploy/debian/profiles/default/scripts/proxmox/proxmoxve.post @@ -2,12 +2,18 @@ # This script would run in post.d # export DEBIAN_FRONTEND=noninteractive -echo "deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription" > /etc/apt/sources.list.d/pve-install-repo.list -wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg -sum=$(sha512sum /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg) -if [ "$sum" -ne "7da6fe34168adc6e479327ba517796d4702fa2f8b4f0a9833f5ea6e6b48f6507a6da403a274fe201595edc86a84463d50383d07f64bdde2e3658108db7d6dc87" ]; then +codename=$(grep ^VERSION_CODENAME /etc/os-release | cut -d= -f2) +echo "deb [arch=amd64] http://download.proxmox.com/debian/pve $codename pve-no-subscription" > /etc/apt/sources.list.d/pve-install-repo.list +wget https://enterprise.proxmox.com/debian/proxmox-release-$codename.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-$codename.gpg +sum=$(sha512sum /etc/apt/trusted.gpg.d/proxmox-release-$codename.gpg) +if [ $codename == "bookworm" ]; then + expectedsum=7da6fe34168adc6e479327ba517796d4702fa2f8b4f0a9833f5ea6e6b48f6507a6da403a274fe201595edc86a84463d50383d07f64bdde2e3658108db7d6dc87 +elif [ $codename == "trixie" ]; then + expectedsum=8678f2327c49276615288d7ca11e7d296bc8a2b96946fe565a9c81e533f9b15a5dbbad210a0ad5cd46d361ff1d3c4bac55844bc296beefa4f88b86e44e69fa51 +fi +if [ "$sum" -ne "$expectedsum" ]; then echo "Mismatch in fingerprint!" - rm /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg + rm /etc/apt/trusted.gpg.d/proxmox-release-$codename.gpg exit 1 fi apt-get update && apt-get -y full-upgrade < /dev/null From dbda4f45a1c49ec2cbbd357e99a59620743eef70 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Aug 2025 10:49:01 -0400 Subject: [PATCH 269/413] Add 'proxmox' to valid values for console options --- confluent_server/confluent/config/attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 4251974d..5e0e97e7 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -376,7 +376,7 @@ node = { 'the managed node. If not specified, then console ' 'is disabled. "ipmi" should be specified for most ' 'systems if console is desired.'), - 'validvalues': ('ssh', 'ipmi', 'openbmc', 'tsmsol', 'vcenter'), + 'validvalues': ('ssh', 'ipmi', 'openbmc', 'tsmsol', 'vcenter', 'proxmox'), }, # 'virtualization.host': { # 'description': ('Hypervisor where this node does/should reside'), From a71804a13b020f81a5191b2ece87e0500f6c23ec Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Aug 2025 11:47:39 -0400 Subject: [PATCH 270/413] Fix suse/ubuntu paths for apache config --- confluent_server/confluent/certutil.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index a60a585e..46ae2f69 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -116,9 +116,9 @@ def get_certificate_paths(): kploc = check_apache_config(os.path.join(currpath, fname)) if keypath and kploc[0] and keypath != kploc[0]: - return None, None # Ambiguous... + return {'error': 'Ambiguous...'} if kploc[0]: - keypath, certpath = kploc + keypath, certpath, chainpath = kploc if os.path.exists('/etc/nginx'): # nginx way for currpath, _, files in os.walk('/etc/nginx'): if ngkeypath: @@ -143,7 +143,6 @@ def get_certificate_paths(): tlsmateriallocation.setdefault('bundles', []).append(ngbundlepath) return tlsmateriallocation - def assure_tls_ca(): keyout, certout = ('/etc/confluent/tls/cakey.pem', '/etc/confluent/tls/cacert.pem') if not os.path.exists(certout): From 580c451945a05fdf824a29a8dee292b161b6eca2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 14 Aug 2025 07:24:28 -0400 Subject: [PATCH 271/413] Explicitly restart sshd on completion Some changes from setupssh may require sshd restart, perform it if running with the sshd unit running. --- confluent_osdeploy/common/profile/scripts/setupssh | 1 + confluent_osdeploy/debian/profiles/default/scripts/setupssh | 1 + 2 files changed, 2 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/setupssh b/confluent_osdeploy/common/profile/scripts/setupssh index 6215a1d7..e6048808 100644 --- a/confluent_osdeploy/common/profile/scripts/setupssh +++ b/confluent_osdeploy/common/profile/scripts/setupssh @@ -42,3 +42,4 @@ confluentpython $confapiclient /confluent-api/self/nodelist | sed -e 's/^- //' > cat /etc/ssh/shosts.equiv > /root/.shosts cd - rm -rf $TMPDIR +systemctl try-restart sshd diff --git a/confluent_osdeploy/debian/profiles/default/scripts/setupssh b/confluent_osdeploy/debian/profiles/default/scripts/setupssh index 6215a1d7..e6048808 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/setupssh +++ b/confluent_osdeploy/debian/profiles/default/scripts/setupssh @@ -42,3 +42,4 @@ confluentpython $confapiclient /confluent-api/self/nodelist | sed -e 's/^- //' > cat /etc/ssh/shosts.equiv > /root/.shosts cd - rm -rf $TMPDIR +systemctl try-restart sshd From e7606e69bdf7c6744d4ffa3afc94e366554ac3fe Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Aug 2025 10:18:49 -0400 Subject: [PATCH 272/413] Wait for redfish account service to be ready before modifying users If an XCC is booting, it may appear before it's ready to use redfish to manage user accounts. Handle this by delaying the discovery until the service is ready. --- confluent_server/confluent/discovery/handlers/xcc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 77535ce3..23a6c437 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -477,6 +477,13 @@ class NodeHandler(immhandler.NodeHandler): tmpaccount = None while status != 200: tries -= 1 + rsp, status = wc.grab_json_response_with_status( + '/redfish/v1/AccountService/Accounts/{0}'.format(uid)) + if status >= 500: + if tries < 0: + raise Exception('Redfish account management failure') + eventlet.sleep(30) + continue rsp, status = wc.grab_json_response_with_status( '/redfish/v1/AccountService/Accounts/{0}'.format(uid), {'UserName': username}, method='PATCH') From 98e78dd43cf5ae74e8ed3d507f2cdaf2dde451ea Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Aug 2025 16:34:26 -0400 Subject: [PATCH 273/413] Reduce size of presumed diskless image for installtodisk A diskless image is likely to be significantly smaller, have it support install to smaller disks. --- .../el9-diskless/profiles/default/scripts/image2disk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 3c70a178..dc447d76 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -70,9 +70,9 @@ def get_image_metadata(imgpath): yield md else: # plausible filesystem structure to apply to a nominally "diskless" image - yield {'mount': '/', 'filesystem': 'xfs', 'minsize': 39513563136, 'initsize': 954128662528, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/mapper/root', 'compressed_size': 27022069760} - yield {'mount': '/boot', 'filesystem': 'xfs', 'minsize': 232316928, 'initsize': 1006632960, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/nvme1n1p2', 'compressed_size': 171462656} - yield {'mount': '/boot/efi', 'filesystem': 'vfat', 'minsize': 7835648, 'initsize': 627900416, 'flags': 'rw,relatime,fmask=0077,dmask=0077,codepage=437,iocharset=ascii,shortname=winnt,errors=remount-ro', 'device': '/dev/nvme1n1p1', 'compressed_size': 1576960} + yield {'mount': '/', 'filesystem': 'xfs', 'minsize': 4294967296, 'initsize': 954128662528, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/mapper/root', 'compressed_size': 27022069760} + yield {'mount': '/boot', 'filesystem': 'xfs', 'minsize': 536870912, 'initsize': 1006632960, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/nvme1n1p2', 'compressed_size': 171462656} + yield {'mount': '/boot/efi', 'filesystem': 'vfat', 'minsize': 33554432, 'initsize': 627900416, 'flags': 'rw,relatime,fmask=0077,dmask=0077,codepage=437,iocharset=ascii,shortname=winnt,errors=remount-ro', 'device': '/dev/nvme1n1p1', 'compressed_size': 1576960} #raise Exception('Installation from single part image not supported') class PartedRunner(): From d4e91b1c7e780077fb3fcc27ffa8a2734065d1e5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 08:49:11 -0400 Subject: [PATCH 274/413] Back port installtodisk to el8 diskless --- .../profiles/default/scripts/image2disk.py | 61 ++++++++++++++++--- .../profiles/default/scripts/installimage | 2 + .../profiles/default/scripts/post.sh | 6 +- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index 655aaedc..e98b15ae 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -56,7 +56,11 @@ def get_image_metadata(imgpath): for md in get_multipart_image_meta(img): yield md else: - raise Exception('Installation from single part image not supported') + yield {'mount': '/', 'filesystem': 'xfs', 'minsize': 4294967296, 'initsize': 954128662528, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/mapper/root', 'compressed_size': 27022069760} + yield {'mount': '/boot', 'filesystem': 'xfs', 'minsize': 536870912, 'initsize': 1006632960, 'flags': 'rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota', 'device': '/dev/nvme1n1p2', 'compressed_size': 171462656} + yield {'mount': '/boot/efi', 'filesystem': 'vfat', 'minsize': 33554432, 'initsize': 627900416, 'flags': 'rw,relatime,fmask=0077,dmask=0077,codepage=437,iocharset=ascii,shortname=winnt,errors=remount-ro', 'device': '/dev/nvme1n1p1', 'compressed_size': 1576960} + #raise Exception('Installation from single part image not supported') + class PartedRunner(): def __init__(self, disk): @@ -75,8 +79,17 @@ def fixup(rootdir, vols): for vol in vols: devbymount[vol['mount']] = vol['targetdisk'] fstabfile = os.path.join(rootdir, 'etc/fstab') - with open(fstabfile) as tfile: - fstab = tfile.read().split('\n') + if os.path.exists(fstabfile): + with open(fstabfile) as tfile: + fstab = tfile.read().split('\n') + else: + # fabricate a reference fstab + fstab = [ + "#ORIGFSTAB#/dev/mapper/root# / xfs defaults 0 0", + "#ORIGFSTAB#UUID=aaf9e0f9-aa4d-4d74-9e75-3537620cfe23# /boot xfs defaults 0 0", + "#ORIGFSTAB#UUID=C21D-B881# /boot/efi vfat umask=0077,shortname=winnt 0 2", + "#ORIGFSTAB#/dev/mapper/swap# none swap defaults 0 0", + ] while not fstab[0]: fstab = fstab[1:] if os.path.exists(os.path.join(rootdir, '.autorelabel')): @@ -126,8 +139,10 @@ def fixup(rootdir, vols): newcfg = ifcfg.split('/')[-1] newcfg = os.path.join(rootdir, 'etc/NetworkManager/system-connections/{0}'.format(newcfg)) shutil.copy2(ifcfg, newcfg) - shutil.rmtree(os.path.join(rootdir, 'etc/confluent/')) - shutil.copytree('/etc/confluent', os.path.join(rootdir, 'etc/confluent')) + rootconfluentdir = os.path.join(rootdir, 'etc/confluent/') + if os.path.exists(rootconfluentdir): + shutil.rmtree(rootconfluentdir)) + shutil.copytree('/etc/confluent', rootconfluentdir) if policy: sys.stdout.write('Applying SELinux labeling...') sys.stdout.flush() @@ -142,8 +157,24 @@ def fixup(rootdir, vols): grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') if not os.path.exists(grubsyscfg): grubsyscfg = os.path.join(rootdir, 'etc/default/grub') - with open(grubsyscfg) as defgrubin: - defgrub = defgrubin.read().split('\n') + if os.path.exists(grubsyscfg): + with open(grubsyscfg) as defgrubin: + defgrub = defgrubin.read().split('\n') + else: + defgrub = [ + 'GRUB_TIMEOUT=5', + 'GRUB_DISTRIBUTOR="$(sed ' + "'s, release .*$,,g'" + ' /etc/system-release)"', + 'GRUB_DEFAULT=saved', + 'GRUB_DISABLE_SUBMENU=true', + 'GRUB_TERMINAL=""', + 'GRUB_SERIAL_COMMAND=""', + 'GRUB_CMDLINE_LINUX="crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"', + 'GRUB_DISABLE_RECOVERY="true"', + 'GRUB_ENABLE_BLSCFG=true', + ] + if not os.path.exists(os.path.join(rootdir, "etc/kernel/cmdline")): + with open(os.path.join(rootdir, "etc/kernel/cmdline"), "w") as cmdlineout: + cmdlineout.write("root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root") with open(grubsyscfg, 'w') as defgrubout: for gline in defgrub: gline = gline.split() @@ -159,6 +190,12 @@ def fixup(rootdir, vols): grubcfg = grubcfg[:-1] if len(grubcfg) == 1: grubcfg = grubcfg[0] + elif not grubcfg: + grubcfg = '/boot/grub2/grub.cfg' + paths = glob.glob(os.path.join(rootdir, 'boot/efi/EFI/*')) + for path in paths: + with open(os.path.join(path, 'grub.cfg'), 'w') as stubgrubout: + stubgrubout.write("search --no-floppy --root-dev-only --fs-uuid --set=dev " + bootuuid + "\nset prefix=($dev)/grub2\nexport $prefix\nconfigfile $prefix/grub.cfg\n") else: for gcfg in grubcfg: rgcfg = os.path.join(rootdir, gcfg[1:]) # gcfg has a leading / to get rid of @@ -214,10 +251,18 @@ def fixup(rootdir, vols): shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) + try: + os.makedirs(os.path.join(rootdir, 'opt/confluent/bin')) + except Exception: + pass + shutil.copy2('/opt/confluent/bin/apiclient', os.path.join(rootdir, 'opt/confluent/bin/apiclient')) #other network interfaces def had_swap(): + if not os.path.exists('/etc/fstab'): + # diskless source, assume swap + return True with open('/etc/fstab') as tabfile: tabs = tabfile.read().split('\n') for tab in tabs: @@ -362,6 +407,8 @@ def install_to_disk(imgpath): subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ']) source = vol['mount'].replace('/', '_') source = '/run/imginst/sources/' + source + if not os.path.exists(source): + source = '/run/imginst/sources/_' + vol['mount'] blankfsstat = os.statvfs('/run/imginst/targ') blankused = (blankfsstat.f_blocks - blankfsstat.f_bfree) * blankfsstat.f_bsize sys.stdout.write('\nWriting {0}: '.format(vol['mount'])) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage index 2e791ce6..a880c3ee 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage @@ -40,6 +40,8 @@ echo "Port 22" >> /etc/ssh/sshd_config echo 'Match LocalPort 22' >> /etc/ssh/sshd_config echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config kill -HUP $(cat /run/sshd.pid) +cp /sysroot/etc/pki/ca-trust/source/anchors/* /sysroot/run/imginst/targ/etc/pki/ca-trust/source/anchors/ +chroot /sysroot/run/imginst/targ update-ca-trust chroot /sysroot/run/imginst/targ bash -c "source /etc/confluent/functions; run_remote post.sh" chroot /sysroot bash -c "umount \$(tac /proc/mounts|awk '{print \$2}'|grep ^/run/imginst/targ)" diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh index 3a52d128..a61bc67b 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh @@ -2,13 +2,17 @@ # This script is executed 'chrooted' into a cloned disk target before rebooting # - +if [ -f /etc/dracut.conf.d/diskless.conf ]; then + rm /etc/dracut.conf.d/diskless.conf +fi +for kver in /lib/modules/*; do kver=$(basename $kver); kernel-install add $kver /boot/vmlinuz-$kver; done nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') export nodename confluent_mgr confluent_profile . /etc/confluent/functions +run_remote setupssh mkdir -p /var/log/confluent chmod 700 /var/log/confluent exec >> /var/log/confluent/confluent-post.log From c9c5165245a586070a1bc9230f910644707dfbc7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 08:53:55 -0400 Subject: [PATCH 275/413] Fix syntax error in commit --- .../el8-diskless/profiles/default/scripts/image2disk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index e98b15ae..13dbe169 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -141,7 +141,7 @@ def fixup(rootdir, vols): shutil.copy2(ifcfg, newcfg) rootconfluentdir = os.path.join(rootdir, 'etc/confluent/') if os.path.exists(rootconfluentdir): - shutil.rmtree(rootconfluentdir)) + shutil.rmtree(rootconfluentdir) shutil.copytree('/etc/confluent', rootconfluentdir) if policy: sys.stdout.write('Applying SELinux labeling...') From 9a85b9ee945b0e9910508167cecb2c2db6ed9aa8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 09:55:26 -0400 Subject: [PATCH 276/413] Fixes for installtodisk for diskless for el9 --- .../profiles/default/scripts/image2disk.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index dc447d76..401aa7be 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -181,8 +181,10 @@ def fixup(rootdir, vols): elif ent.startswith('root='): newkcmdlineent.append('root={}'.format(newrootdev)) elif ent.startswith('rd.lvm.lv='): - ent = convert_lv(ent) - if ent: + nent = convert_lv(ent) + if nent: + newkcmdlineent.append(ent) + else: newkcmdlineent.append(ent) else: newkcmdlineent.append(ent) @@ -204,8 +206,10 @@ def fixup(rootdir, vols): elif cfgpart.startswith('resume='): newcfgparts.append('resume={}'.format(newswapdev)) elif cfgpart.startswith('rd.lvm.lv='): - cfgpart = convert_lv(cfgpart) - if cfgpart: + ncfgpart = convert_lv(cfgpart) + if ncfgpart: + newcfgparts.append(ncfgpart) + else: newcfgparts.append(cfgpart) else: newcfgparts.append(cfgpart) @@ -238,11 +242,11 @@ def fixup(rootdir, vols): elif ent.startswith('root='): newline.append('root={}'.format(newrootdev)) elif ent.startswith('rd.lvm.lv='): - ent = convert_lv(ent) - if ent: + nent = convert_lv(ent) + if nent: + newline.append(nent) + else newline.append(ent) - elif '""' in ent: - newline.append('""') else: newline.append(ent) defgrubout.write(' '.join(newline) + '\n') From afd2b6c219f9b06f452ca965ba2935c9f0455d49 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 10:04:13 -0400 Subject: [PATCH 277/413] Add storage drivers to imgutil for el8 diskless --- imgutil/el8/dracut/installkernel | 1 + 1 file changed, 1 insertion(+) diff --git a/imgutil/el8/dracut/installkernel b/imgutil/el8/dracut/installkernel index 2f9b41ec..cb62b510 100644 --- a/imgutil/el8/dracut/installkernel +++ b/imgutil/el8/dracut/installkernel @@ -5,6 +5,7 @@ instmods nvme instmods cdc_ether r8152 instmods r8169 instmods vmxnet3 virtio_net +instmods virtio_scsi vmw_pvscsi instmods mptctl instmods mlx4_ib mlx5_ub ib_umad ib_ipoib instmods ice i40e hfi1 bnxt_en qed qede From 75c228dae4f2a6760077393accb2429112816d96 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 10:10:01 -0400 Subject: [PATCH 278/413] Fix syntax mistake --- .../el9-diskless/profiles/default/scripts/image2disk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 401aa7be..51a58c7d 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -245,7 +245,7 @@ def fixup(rootdir, vols): nent = convert_lv(ent) if nent: newline.append(nent) - else + else: newline.append(ent) else: newline.append(ent) From b32ded9c6a0c64daabb07588ec3e9859fc0be6f8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 10:31:59 -0400 Subject: [PATCH 279/413] Fix skipping of quotation in grub config --- .../el8-diskless/profiles/default/scripts/image2disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index 13dbe169..5aeaeefe 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -181,6 +181,8 @@ def fixup(rootdir, vols): newline = [] for ent in gline: if ent.startswith('resume=') or ent.startswith('rd.lvm.lv'): + if ent.endswith('"'): + newline.append('"') continue newline.append(ent) defgrubout.write(' '.join(newline) + '\n') From b2b2b5710b59c32b8487eed81b2bf63617aa810a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 11:26:41 -0400 Subject: [PATCH 280/413] Fix up monolithing sshd_config for platforms that need it --- confluent_osdeploy/common/profile/scripts/setupssh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/setupssh b/confluent_osdeploy/common/profile/scripts/setupssh index e6048808..63d5e462 100644 --- a/confluent_osdeploy/common/profile/scripts/setupssh +++ b/confluent_osdeploy/common/profile/scripts/setupssh @@ -16,6 +16,13 @@ if [ -d /etc/ssh/sshd_config.d/ -a ! -e /etc/ssh/sshd_config.d/90-confluent.conf echo HostbasedAuthentication yes >> /etc/ssh/sshd_config.d/90-confluent.conf echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/90-confluent.conf echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/90-confluent.conf +elif [ ! -d /etc/ssh/sshd_config.d/ ] && ! grep HostCertificate /etc/ssh/sshd_config > /dev/null; then + for cert in /etc/ssh/ssh*-cert.pub; do + echo HostCertificate $cert >> /etc/ssh/sshd_config + done + echo HostbasedAuthentication yes >> /etc/ssh/sshd_config + echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config + echo IgnoreRhosts no >> /etc/ssh/sshd_config fi TMPDIR=$(mktemp -d) @@ -30,6 +37,9 @@ for ca in ssh/*.ca; do echo '@cert-authority *' $LINE >> /etc/ssh/ssh_known_hosts.new mv /etc/ssh/ssh_known_hosts.new /etc/ssh/ssh_known_hosts done +mkdir -p /root/.ssh/ +chmod 700 /root/.ssh/ +touch /root/.ssh/authorized_keys for pubkey in ssh/*.*pubkey; do LINE=$(cat $pubkey) if [ -z "$LINE" ]; then continue; fi From ec3fcee7d7f3439bde2aa7cd4960551e17927299 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Aug 2025 20:15:05 -0400 Subject: [PATCH 281/413] Implement updateboot for windows profiles --- confluent_server/confluent/osimage.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 060f8794..1cd7dc11 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -88,6 +88,14 @@ def update_boot(profilename): update_boot_linux(profiledir, profile, label) elif ostype == 'esxi': update_boot_esxi(profiledir, profile, label) + elif ostype == 'windows': + update_boot_windows(profiledir, profile, label) + +def update_boot_windows(profiledir, profile, label): + profname = os.path.basename(profiledir) + subprocess.check_call( + ['/opt/confluent/bin/dir2img', '{0}/boot'.format(profiledir), + '{0}/boot.img'.format(profiledir), profname], preexec_fn=relax_umask) def update_boot_esxi(profiledir, profile, label): profname = os.path.basename(profiledir) From 63bbe53448604f73f7c598a42ae8fe28f0a46a79 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 22 Aug 2025 08:39:40 -0400 Subject: [PATCH 282/413] Address numerous issues with 'installtodisk' for el8 Add missing pre.d directory to let user know they can use such scripts Preserve console directievs from kernelargs into installed system Retry umount during image2disk, if processes have the filesystem busy. Fix DNS behavior during post phase of installtodisk Invoke confignet properly during firstboot to set up additional interfaces. Have sshd run during the install from '/sysroot', for convenience Fix some cosmetic error output for setupssh --- .../common/profile/scripts/setupssh | 8 ++++--- .../profiles/default/scripts/firstboot.sh | 3 ++- .../profiles/default/scripts/image2disk.py | 23 +++++++++++++++---- .../profiles/default/scripts/installimage | 10 +++++++- .../profiles/default/scripts/post.sh | 2 ++ .../profiles/default/scripts/pre.d/.gitignore | 0 6 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 confluent_osdeploy/el8-diskless/profiles/default/scripts/pre.d/.gitignore diff --git a/confluent_osdeploy/common/profile/scripts/setupssh b/confluent_osdeploy/common/profile/scripts/setupssh index 63d5e462..83c05fa1 100644 --- a/confluent_osdeploy/common/profile/scripts/setupssh +++ b/confluent_osdeploy/common/profile/scripts/setupssh @@ -31,9 +31,11 @@ confluentpython $confapiclient /confluent-public/site/initramfs.tgz -o initramfs tar xf initramfs.tgz for ca in ssh/*.ca; do LINE=$(cat $ca) - if [ -z "$LINE" ]; then continue; fi - cp -af /etc/ssh/ssh_known_hosts /etc/ssh/ssh_known_hosts.new - grep -v "$LINE" /etc/ssh/ssh_known_hosts > /etc/ssh/ssh_known_hosts.new + if [ -z "$LINE" ]; then continue; fi + if [ -f /etc/ssh/ssh_known_hosts ]; then + cp -af /etc/ssh/ssh_known_hosts /etc/ssh/ssh_known_hosts.new + grep -v "$LINE" /etc/ssh/ssh_known_hosts > /etc/ssh/ssh_known_hosts.new + fi echo '@cert-authority *' $LINE >> /etc/ssh/ssh_known_hosts.new mv /etc/ssh/ssh_known_hosts.new /etc/ssh/ssh_known_hosts done diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/firstboot.sh index 2bab4136..922dbcda 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/firstboot.sh @@ -25,7 +25,8 @@ if [ ! -f /etc/confluent/firstboot.ran ]; then touch /etc/confluent/firstboot.ran cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt - + confluentpython /root/confignet + rm /root/confignet run_remote firstboot.custom # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh run_remote_parts firstboot.d diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index 5aeaeefe..a6951379 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -157,6 +157,15 @@ def fixup(rootdir, vols): grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') if not os.path.exists(grubsyscfg): grubsyscfg = os.path.join(rootdir, 'etc/default/grub') + currcmdline = [] + with open('/proc/cmdline') as cmdlinein: + cmdline = cmdlinein.read().strip() + for arg in cmdline.split(): + if arg.startswith('console='): + currcmdline.append(arg) + elif arg == 'quiet': + currcmdline.append(arg) + currcmdlinestr = ' '.join(currcmdline) if os.path.exists(grubsyscfg): with open(grubsyscfg) as defgrubin: defgrub = defgrubin.read().split('\n') @@ -168,13 +177,13 @@ def fixup(rootdir, vols): 'GRUB_DISABLE_SUBMENU=true', 'GRUB_TERMINAL=""', 'GRUB_SERIAL_COMMAND=""', - 'GRUB_CMDLINE_LINUX="crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"', + 'GRUB_CMDLINE_LINUX="{} crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"'.format(currcmdlinestr), 'GRUB_DISABLE_RECOVERY="true"', 'GRUB_ENABLE_BLSCFG=true', ] if not os.path.exists(os.path.join(rootdir, "etc/kernel/cmdline")): with open(os.path.join(rootdir, "etc/kernel/cmdline"), "w") as cmdlineout: - cmdlineout.write("root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root") + cmdlineout.write("{} root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root".format(currcmdlinestr)) with open(grubsyscfg, 'w') as defgrubout: for gline in defgrub: gline = gline.split() @@ -468,8 +477,14 @@ def install_to_disk(imgpath): - - subprocess.check_call(['umount', '/run/imginst/targ']) + while True: + try: + subprocess.check_call(['umount', '/run/imginst/targ']) + except subprocess.CalledProcessError: + print("Failed to unmount /run/imginst/targ, retrying") + time.sleep(1) + else: + break for vol in allvols: subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ/' + vol['mount']]) fixup('/run/imginst/targ', allvols) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage index a880c3ee..031b1479 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/installimage @@ -5,6 +5,7 @@ # and existing mounts of image (to take advantage of caching) mount -o bind /sys /sysroot/sys mount -o bind /dev /sysroot/dev +mount -o bind /dev/pts /sysroot/dev/pts mount -o bind /proc /sysroot/proc mount -o bind /run /sysroot/run @@ -21,8 +22,14 @@ else done fi cd /sysroot/run +cp /run/sshd.pid /tmp/dbgssh.pid +chroot /sysroot/ bash -c "/usr/sbin/sshd" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" +for nameserver in $(sed -n '/^nameservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //'); do + echo "nameserver $nameserver" >> /sysroot/etc/resolv.conf +done +#chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python confignet" if [ ! -f /sysroot/tmp/installdisk ]; then echo 'Unable to find a suitable installation target device, ssh to port 2222 to investigate' while [ ! -f /sysroot/tmp/installdisk ]; do @@ -39,7 +46,8 @@ chroot /sysroot bash -c "source /etc/confluent/functions; run_remote_python imag echo "Port 22" >> /etc/ssh/sshd_config echo 'Match LocalPort 22' >> /etc/ssh/sshd_config echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config -kill -HUP $(cat /run/sshd.pid) +kill $(cat /sysroot/var/run/sshd.pid) +kill -HUP $(cat /tmp/dbgssh.pid) cp /sysroot/etc/pki/ca-trust/source/anchors/* /sysroot/run/imginst/targ/etc/pki/ca-trust/source/anchors/ chroot /sysroot/run/imginst/targ update-ca-trust diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh index a61bc67b..23e13dfe 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/post.sh @@ -37,6 +37,8 @@ run_remote_parts post.d # Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ run_remote_config post.d +cd /root/ +fetch_remote confignet curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus kill $logshowpid diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/pre.d/.gitignore b/confluent_osdeploy/el8-diskless/profiles/default/scripts/pre.d/.gitignore new file mode 100644 index 00000000..e69de29b From 157641e37a36070207c3ab4920961eb758e83dbc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 25 Aug 2025 08:59:53 -0400 Subject: [PATCH 283/413] Fixup imported windows media Samba by default needs executable bit on files for them to be executable by windows. Only give executable bits to .exe files that are PE32, mitigating the chance the executable bit could mean anything for Linux. It could still mean something with binfmt misc hooks, but that shouldn't be done much. --- confluent_server/confluent/osimage.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 1cd7dc11..2501b62b 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -648,6 +648,32 @@ def fixup_coreos(targpath): bootimg.write(b'\x01') +def is_windows_executable(filename): + with open(filename, 'rb') as f: + header = f.read(2) + if header == b'MZ': + # seems to be DOS, but let's also make sure it is PE32 + f.seek(0x3c) + pe_offset = f.read(4) + offset = int.from_bytes(pe_offset, byteorder='little') + f.seek(offset) + pe_header = f.read(4) + if pe_header == b'PE\x00\x00': + return True + return False + + +def fixup_windows(targpath): + # windows needs the executable file to be executable, which samba + # manifests as following the executable bit + for root, _, files in os.walk(targpath): + for fname in files: + if fname.endswith('.exe'): + fpath = os.path.join(root, fname) + if is_windows_executable(fpath): + st = os.stat(fpath) + os.chmod(fpath, st.st_mode | 0o111) + def check_coreos(isoinfo): arch = 'x86_64' # TODO: would check magic of vmlinuz to see which arch if 'zipl.prm' in isoinfo[1]: From 1a64768fca3126b43fe9e7c1fbf9750b0f954b5b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 08:08:24 -0400 Subject: [PATCH 284/413] Carry forward EL* installtodisk to EL9 --- .../profiles/default/scripts/firstboot.sh | 4 +++- .../profiles/default/scripts/image2disk.py | 21 ++++++++++++++++--- .../profiles/default/scripts/installimage | 10 ++++++++- .../profiles/default/scripts/post.sh | 3 ++- .../profiles/default/scripts/pre.d/.gitignore | 0 5 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 confluent_osdeploy/el9-diskless/profiles/default/scripts/pre.d/.gitignore diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh index fabb9385..6a1c98b4 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh @@ -31,8 +31,10 @@ done if [ ! -f /etc/confluent/firstboot.ran ]; then touch /etc/confluent/firstboot.ran - cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt + cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt + confluentpython /root/confignet + rm /root/confignet run_remote firstboot.custom # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh run_remote_parts firstboot.d diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 51a58c7d..36ce84c6 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -170,6 +170,15 @@ def fixup(rootdir, vols): grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') if not os.path.exists(grubsyscfg): grubsyscfg = os.path.join(rootdir, 'etc/default/grub') + currcmdline = [] + with open('/proc/cmdline') as cmdlinein: + cmdline = cmdlinein.read().strip() + for arg in cmdline.split(): + if arg.startswith('console='): + currcmdline.append(arg) + elif arg == 'quiet': + currcmdline.append(arg) + currcmdlinestr = ' '.join(currcmdline) kcmdline = os.path.join(rootdir, 'etc/kernel/cmdline') if os.path.exists(kcmdline): with open(kcmdline) as kcmdlinein: @@ -225,13 +234,13 @@ def fixup(rootdir, vols): 'GRUB_DISABLE_SUBMENU=true', 'GRUB_TERMINAL=""', 'GRUB_SERIAL_COMMAND=""', - 'GRUB_CMDLINE_LINUX="crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"', + 'GRUB_CMDLINE_LINUX="{}crashkernel=1G-4G:192M,4G-64G:256M,64G-:512M rd.lvm.lv=vg/root rd.lvm.lv=vg/swap"'.format(currcmdlinestr), 'GRUB_DISABLE_RECOVERY="true"', 'GRUB_ENABLE_BLSCFG=true', ] if not os.path.exists(os.path.join(rootdir, "etc/kernel/cmdline")): with open(os.path.join(rootdir, "etc/kernel/cmdline"), "w") as cmdlineout: - cmdlineout.write("root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root") + cmdlineout.write("{} root=/dev/mapper/localstorage-root rd.lvm.lv=localstorage/root".format(currcmdlinestr)) with open(grubsyscfg, 'w') as defgrubout: for gline in defgrub: gline = gline.split() @@ -592,7 +601,13 @@ def install_to_disk(imgpath): - subprocess.check_call(['umount', '/run/imginst/targ']) + while True: + try: + subprocess.check_call(['umount', '/run/imginst/targ']) + break + except subprocess.CalledProcessError: + print("Failed to unmount /run/imginst/targ, retrying") + time.sleep(1) for vol in allvols: subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ/' + vol['mount']]) fixup('/run/imginst/targ', allvols) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage index c461173b..b342de7d 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage @@ -21,8 +21,15 @@ else done fi cd /sysroot/run +[ -f /run/sshd.pid ] && +cp /run/sshd.pid /tmp/dbgssh.pid +chroot /sysroot/ bash -c "/usr/sbin/sshd" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" +for nameserver in $(sed -n '/^nameservers:/,/^[^-]/p' /etc/confluent/confluent.deploycfg|grep ^- | cut -d ' ' -f 2|sed -e 's/ //'); do + echo "nameserver $nameserver" >> /sysroot/etc/resolv.conf +done + if [ ! -f /sysroot/tmp/installdisk ]; then echo 'Unable to find a suitable installation target device, ssh to port 2222 to investigate' while [ ! -f /sysroot/tmp/installdisk ]; do @@ -40,7 +47,8 @@ chroot /sysroot bash -c "source /etc/confluent/functions; run_remote_python imag echo "Port 22" >> /etc/ssh/sshd_config echo 'Match LocalPort 22' >> /etc/ssh/sshd_config echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config -kill -HUP $(cat /run/sshd.pid) +kill $(cat /sysroot/var/run/sshd.pid) +[ -f /tmp/dbgssh.pid ] && kill -HUP $(cat /tmp/dbgssh.pid) cp /sysroot/etc/pki/ca-trust/source/anchors/* /sysroot/run/imginst/targ/etc/pki/ca-trust/source/anchors/ chroot /sysroot/run/imginst/targ update-ca-trust diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh index 914a12c3..0a09299b 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh @@ -43,7 +43,8 @@ run_remote_parts post.d # Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ run_remote_config post.d - +cd /root/ +fetch_remote confignet # rebuild initrd, pick up new drivers if needed dracut -f /boot/initramfs-$(uname -r).img $(uname -r) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/pre.d/.gitignore b/confluent_osdeploy/el9-diskless/profiles/default/scripts/pre.d/.gitignore new file mode 100644 index 00000000..e69de29b From 6be98c7e60fd7b120df5ccb516559583489db88c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 08:44:42 -0400 Subject: [PATCH 285/413] Fix leaking ssh-agent processes in selfcheck --- confluent_server/bin/confluent_selfcheck | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 4ce37fb5..14ced5d3 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -273,6 +273,8 @@ if __name__ == '__main__': emprint('Permissions incorrect on /etc/confluent/ssh/automation (Example resolution: chmod 600 /etc/confluent/ssh/automation)') else: emprint('Failed to load confluent automation key, syncfiles and profile ansible plays will not work (Example resolution: osdeploy initialize -a)') + if sshutil.agent_pid: + os.kill(int(sshutil.agent_pid), signal.SIGTERM) sys.exit(0) fprint('Checking for blocked insecure boot: ') if insecure_boot_attempts(): @@ -421,7 +423,9 @@ if __name__ == '__main__': else: emprint('Unknown error attempting confluent automation ssh:') sys.stderr.buffer.write(srun.stderr) - os.kill(int(sshutil.agent_pid), signal.SIGTERM) + if sshutil.agent_pid: + os.kill(int(sshutil.agent_pid), signal.SIGTERM) + sys.exit(0) else: print("Skipping node checks, no node specified (Example: confluent_selfcheck -n n1)") # possible checks: From a6a57e8590bd8bdd0ef20f6b1e875ae16b78cd05 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 08:59:10 -0400 Subject: [PATCH 286/413] Fix ssh operation during install in installtodisk for el9 --- .../el9-diskless/profiles/default/scripts/installimage | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage index b342de7d..ceb498d2 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage @@ -5,6 +5,7 @@ # and existing mounts of image (to take advantage of caching) mount -o bind /sys /sysroot/sys mount -o bind /dev /sysroot/dev +mount -o bind /dev/pts /sysroot/dev/pts mount -o bind /proc /sysroot/proc mount -o bind /run /sysroot/run From 87990c72c3a8080ccc34398a8322708c21bd93fc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 09:06:28 -0400 Subject: [PATCH 287/413] Make EL10 diskless consistent with EL9 --- .../el10-diskless/profiles/default/scripts/imageboot.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh index 5f4c3189..d65b32b6 100644 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -129,4 +129,10 @@ ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) -exec /opt/confluent/bin/start_root +if grep debugssh /proc/cmdline >& /dev/null; then + exec /opt/confluent/bin/start_root +else + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ + exec /opt/confluent/bin/start_root -s # share mount namespace, keep kernel callbacks intact +fi + From ac8179b8672a12920f30e5cebd0528b0a2832d67 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 09:49:28 -0400 Subject: [PATCH 288/413] Amend swraid example script --- misc/swraid | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/misc/swraid b/misc/swraid index 836f1fb1..1391d8c8 100644 --- a/misc/swraid +++ b/misc/swraid @@ -4,10 +4,10 @@ mdadm --detail /dev/md*|grep 'Version : 1.0' >& /dev/null || ( lvm vgchange -a n mdadm -S -s NUMDEVS=$(for dev in $DEVICES; do - echo wipefs -a $dev + echo wipefs -a -f $dev done|wc -l) for dev in $DEVICES; do - wipefs -a $dev + wipefs -a -f $dev done # must use older metadata format to leave disks looking normal for uefi mdadm -C /dev/md/raid $DEVICES -n $NUMDEVS -e 1.0 -l $RAIDLEVEL @@ -15,5 +15,9 @@ mdadm -C /dev/md/raid $DEVICES -n $NUMDEVS -e 1.0 -l $RAIDLEVEL mdadm -S -s mdadm --assemble --scan ) +while [ ! -e /dev/md/raid ]; do + echo 'Waiting on array to be linked...' + sleep 0.5 +done readlink /dev/md/raid|sed -e 's/.*\///' > /tmp/installdisk From a01eb64adc78b1d6f46dbcd400e8c5b236cb04ed Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 09:50:25 -0400 Subject: [PATCH 289/413] Remove disused function from confluent2ansible --- confluent_client/bin/confluent2ansible | 6 ------ 1 file changed, 6 deletions(-) diff --git a/confluent_client/bin/confluent2ansible b/confluent_client/bin/confluent2ansible index ba27e429..31a3971d 100644 --- a/confluent_client/bin/confluent2ansible +++ b/confluent_client/bin/confluent2ansible @@ -18,11 +18,6 @@ import confluent.client as client import confluent.sortutil as sortutil -def lookupdata(data, key): - ret = data.get(key, {}).get('value', '') - if ret is None: - ret = '' - return ret def main(): @@ -59,7 +54,6 @@ def main(): else: for g in groups: nodesbygroup.setdefault(g, set()).add(node.strip().lower()) - existing_data = {} if options.append and os.path.exists(options.output): current_group = '' From c26fba74e76d14ffe3495f484acb512f194f8904 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 09:52:21 -0400 Subject: [PATCH 290/413] Fix issues with EL10 installtodisk --- .../el9-diskless/profiles/default/scripts/image2disk.py | 6 +++--- .../el9-diskless/profiles/default/scripts/installimage | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 36ce84c6..315c0827 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -318,8 +318,8 @@ def fixup(rootdir, vols): for vol in vols: if vol['mount'] == '/boot/efi': targdev = vol['targetdisk'] - partnum = re.search('(\d+)$', targdev).group(1) - targblock = re.search('(.*)\d+$', targdev).group(1) + partnum = re.search(r'(\d+)$', targdev).group(1) + targblock = re.search(r'(.*)\d+$', targdev).group(1) if targblock: if targblock.endswith('p') and 'nvme' in targblock: targblock = targblock[:-1] @@ -403,7 +403,7 @@ def install_to_disk(imgpath): deflvmsize += fs['initsize'] minlvmsize += fs['minsize'] else: - plainvols[int(re.search('(\d+)$', fs['device'])[0])] = fs + plainvols[int(re.search(r'(\d+)$', fs['device'])[0])] = fs if fs['initsize'] > biggestsize: biggestfs = fs biggestsize = fs['initsize'] diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage index ceb498d2..eef9da18 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage @@ -8,6 +8,9 @@ mount -o bind /dev /sysroot/dev mount -o bind /dev/pts /sysroot/dev/pts mount -o bind /proc /sysroot/proc mount -o bind /run /sysroot/run +mount -t efivarfs none /sysroot/sys/firmware/efi/efivars + + if [ ! -f /tmp/mountparts.sh ]; then @@ -24,6 +27,7 @@ fi cd /sysroot/run [ -f /run/sshd.pid ] && cp /run/sshd.pid /tmp/dbgssh.pid +chmod 0600 /sysroot/etc/ssh/ssh*key chroot /sysroot/ bash -c "/usr/sbin/sshd" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" From 710b24e9f5f45f482b4653a1e7c76a397f375979 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 11:10:43 -0400 Subject: [PATCH 291/413] Recover from dead ssh agent If the ssh-agent is gone, for whatever reason, restart it. --- confluent_server/confluent/sshutil.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index eb3509d6..83215abf 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -7,7 +7,7 @@ import confluent.util as util import eventlet.green.subprocess as subprocess import eventlet import glob -import os +import eventlet.green.os as os import shutil import tempfile @@ -34,6 +34,7 @@ def normalize_uid(): return curruid agent_starting = False + def assure_agent(): global agent_starting global agent_pid @@ -54,7 +55,7 @@ def assure_agent(): k = k.decode('utf8') v = v.decode('utf8') if k == 'SSH_AGENT_PID': - agent_pid = v + agent_pid = int(v) os.environ[k] = v finally: agent_starting = False @@ -113,9 +114,14 @@ def initialize_ca(): adding_key = False def prep_ssh_key(keyname): global adding_key + global agent_pid while adding_key: eventlet.sleep(0.1) adding_key = True + if agent_pid: + if not os.path.exists(os.environ['SSH_AUTH_SOCK']): + agent_pid = None + ready_keys.clear() if keyname in ready_keys: adding_key = False return From 595b628e0848b529cb6b84e07cc255ceeea62bb9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Aug 2025 14:00:36 -0400 Subject: [PATCH 292/413] Validate that the agent socket actually works If agent is 'kill -9', then recover from that by reaping the now dead socket. --- confluent_server/confluent/sshutil.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index 83215abf..05fe3cc0 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -5,6 +5,7 @@ import confluent.config.configmanager as cfm import confluent.collective.manager as collective import confluent.util as util import eventlet.green.subprocess as subprocess +import eventlet.green.socket as socket import eventlet import glob import eventlet.green.os as os @@ -119,6 +120,15 @@ def prep_ssh_key(keyname): eventlet.sleep(0.1) adding_key = True if agent_pid: + if os.path.exists(os.environ['SSH_AUTH_SOCK']): + try: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.connect(os.environ['SSH_AUTH_SOCK']) + except Exception: + os.unlink(os.environ['SSH_AUTH_SOCK']) + os.rmdir(os.path.dirname(os.environ['SSH_AUTH_SOCK'])) + finally: + sock.close() if not os.path.exists(os.environ['SSH_AUTH_SOCK']): agent_pid = None ready_keys.clear() From 9defc474741af7c2c37aae2d082dec06917e57c2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 27 Aug 2025 12:29:19 -0400 Subject: [PATCH 293/413] Give pycdlib a duped filehandle Attempts to share the filehandle resulted in race conditions around closing, dedicate a dupe filehandle to pycdlib to avoid the conflict. --- confluent_server/confluent/osimage.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 2501b62b..89c94d08 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -790,7 +790,9 @@ def scan_iso(archive): for block in ent.get_blocks(): filecontents[str(ent)] += bytes(block) if scanudf: - return scan_udf(dfd) + ndfd = os.dup(archive.fileno()) + os.lseek(ndfd, 0, 0) + return scan_udf(ndfd) finally: os.close(dfd) return filesizes, filecontents @@ -799,14 +801,18 @@ def scan_udf(dfd): fp = os.fdopen(dfd, 'rb') iso = pycdlib.PyCdlib() iso.open_fp(fp) + imginfo = {} try: extracted = BytesIO() iso.get_file_from_iso_fp(extracted, udf_path='/sources/idwbinfo.txt') idwbinfo = extracted.getvalue() - return {}, {'sources/idwbinfo.txt': idwbinfo} + imginfo = {'sources/idwbinfo.txt': idwbinfo} except Exception: - return {}, {} - + pass + finally: + iso.close() + fp.close() + return {}, imginfo def fingerprint(archive): From f321f5610920e97c7a942c3b72532a08c4010bef Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Aug 2025 08:08:30 -0400 Subject: [PATCH 294/413] Make more windows content executable Other files use the executable bit as an indication of whether to run or not. --- confluent_server/confluent/osimage.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 89c94d08..b1676f3d 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -668,11 +668,12 @@ def fixup_windows(targpath): # manifests as following the executable bit for root, _, files in os.walk(targpath): for fname in files: - if fname.endswith('.exe'): - fpath = os.path.join(root, fname) - if is_windows_executable(fpath): - st = os.stat(fpath) - os.chmod(fpath, st.st_mode | 0o111) + for ext in ('.exe', '.dll', '.sys', '.mui', '.efi'): + if fname.endswith(ext): + fpath = os.path.join(root, fname) + if is_windows_executable(fpath): + st = os.stat(fpath) + os.chmod(fpath, st.st_mode | 0o111) def check_coreos(isoinfo): arch = 'x86_64' # TODO: would check magic of vmlinuz to see which arch From 5905510a3209689c27beab16baf4c6ae1f86cd4d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Aug 2025 08:34:07 -0400 Subject: [PATCH 295/413] Move tmp script execution out of /tmp Some environments want noexec on /tmp, this will work in such environments. --- .../debian/profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../el7/profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../el8/profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../suse15/profiles/hpc/scripts/functions | 23 ++++++++++++------- .../suse15/profiles/server/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- .../profiles/default/scripts/functions | 23 ++++++++++++------- 12 files changed, 180 insertions(+), 96 deletions(-) diff --git a/confluent_osdeploy/debian/profiles/default/scripts/functions b/confluent_osdeploy/debian/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/debian/profiles/default/scripts/functions +++ b/confluent_osdeploy/debian/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/el7/profiles/default/scripts/functions b/confluent_osdeploy/el7/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/functions +++ b/confluent_osdeploy/el7/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/functions b/confluent_osdeploy/el8/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/functions +++ b/confluent_osdeploy/el8/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions b/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/functions b/confluent_osdeploy/suse15/profiles/hpc/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/functions +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/functions b/confluent_osdeploy/suse15/profiles/server/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/functions +++ b/confluent_osdeploy/suse15/profiles/server/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu18.04/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions index f68f3a5e..a88ba210 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/functions @@ -10,6 +10,13 @@ function test_mgr() { return 1 } +function initconfluentscriptstmp() { + if [ -z "$confluentscripttmpdir" ]; then + mkdir -p /opt/confluent/tmpexec + confluentscripttmpdir=$(mktemp -d /opt/confluent/tmpexec/confluentscripts.XXXXXXXXX) + fi +} + function confluentpython() { if [ -x /usr/libexec/platform-python ]; then /usr/libexec/platform-python $* @@ -72,7 +79,8 @@ fetch_remote() { } source_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -86,7 +94,8 @@ source_remote_parts() { } run_remote_parts() { - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp apiclient=/opt/confluent/bin/apiclient if [ -f /etc/confluent/apiclient ]; then apiclient=/etc/confluent/apiclient @@ -105,10 +114,7 @@ source_remote() { echo echo '---------------------------------------------------------------------------' echo Sourcing $1 from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) - unsettmpdir=1 - fi + initconfluentscriptstmp echo Sourcing from $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -135,9 +141,9 @@ run_remote() { echo '---------------------------------------------------------------------------' echo Running $requestedcmd from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ if [ -z "$confluentscripttmpdir" ]; then - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) unsettmpdir=1 fi + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir fetch_remote $1 @@ -170,7 +176,8 @@ run_remote_python() { fi echo '---------------------------------------------------------------------------' echo Running python script "'$*'" from https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/ - confluentscripttmpdir=$(mktemp -d /tmp/confluentscripts.XXXXXXXXX) + unset confluentscripttmpdir + initconfluentscriptstmp echo Executing in $confluentscripttmpdir cd $confluentscripttmpdir mkdir -p $(dirname $1) From 5045b46014caf5fdcd32bb7ba535bce66b4990c1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Aug 2025 15:14:02 -0400 Subject: [PATCH 296/413] Switch to ISO based boot for windows Windows boot loader can be easily confused by a plurality of vfat volumes, coddle it by giving it an ISO image for now. --- confluent_server/confluent/osimage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index b1676f3d..f26eb13b 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -94,8 +94,10 @@ def update_boot(profilename): def update_boot_windows(profiledir, profile, label): profname = os.path.basename(profiledir) subprocess.check_call( - ['/opt/confluent/bin/dir2img', '{0}/boot'.format(profiledir), - '{0}/boot.img'.format(profiledir), profname], preexec_fn=relax_umask) + ['/usr/bin/genisoimage', '-o', + '{0}/boot.img'.format(profiledir), '-udf', '-b', 'dvd/etfsboot.com', + '-no-emul-boot', '-eltorito-alt-boot', '-eltorito-boot', + 'dvd/efisys_noprompt.bin', '{0}/boot'.format(profiledir)], preexec_fn=relax_umask) def update_boot_esxi(profiledir, profile, label): profname = os.path.basename(profiledir) From 6d2146f2525748e9ed2b396e15f82b9b8532b2ee Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 29 Aug 2025 17:12:36 -0400 Subject: [PATCH 297/413] Provide more category based firmware query Some platforms can have a very slow category, like disks. Give CLI a way to ask for the desired categories and a chance to optimize away the uninteresting. --- confluent_client/bin/nodefirmware | 8 ++++++-- confluent_server/confluent/core.py | 16 ++++++++++++++++ .../confluent/plugins/hardwaremanagement/ipmi.py | 6 +++--- .../plugins/hardwaremanagement/redfish.py | 6 +++--- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/confluent_client/bin/nodefirmware b/confluent_client/bin/nodefirmware index 0c6a0958..98ddc50a 100755 --- a/confluent_client/bin/nodefirmware +++ b/confluent_client/bin/nodefirmware @@ -153,9 +153,13 @@ def show_firmware(session): firmware_shown = False nodes_matched = False for component in components: + category = 'all' + if component in ('adapters', 'disks', 'misc', 'core'): + category = component + component = 'all' for res in session.read( - '/noderange/{0}/inventory/firmware/all/{1}'.format( - noderange, component)): + '/noderange/{0}/inventory/firmware/{2}/{1}'.format( + noderange, component, category)): nodes_matched = True exitcode |= client.printerror(res) if 'databynode' not in res: diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 0e754b9f..2375078e 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -498,6 +498,22 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), + 'core': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'adapters': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'disks': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'misc': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), 'updatestatus': PluginRoute({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 02e324bb..38119dd6 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -973,12 +973,12 @@ class IpmiHandler(object): for id, data in self.ipmicmd.get_firmware(): self.output.put(msg.ChildCollection(simplify_name(id))) - def read_firmware(self, component): + def read_firmware(self, component, category): items = [] errorneeded = False try: complist = () if component == 'all' else (component,) - for id, data in self.ipmicmd.get_firmware(complist): + for id, data in self.ipmicmd.get_firmware(complist, category): if (component in ('core', 'all') or component == simplify_name(id) or match_aliases(component, simplify_name(id))): @@ -1014,7 +1014,7 @@ class IpmiHandler(object): if len(self.element) == 3: return self.list_firmware() elif len(self.element) == 4: - return self.read_firmware(self.element[-1]) + return self.read_firmware(self.element[-1], self.element[-2]) elif self.element[1] == 'hardware': if len(self.element) == 3: # list things in inventory return self.list_inventory() diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 03edd4a6..bb709b40 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -830,12 +830,12 @@ class IpmiHandler(object): for id, data in self.ipmicmd.get_firmware(): self.output.put(msg.ChildCollection(simplify_name(id))) - def read_firmware(self, component): + def read_firmware(self, component, category): items = [] errorneeded = False try: complist = () if component == 'all' else (component,) - for id, data in self.ipmicmd.get_firmware(complist): + for id, data in self.ipmicmd.get_firmware(complist, category): if (component in ('core', 'all') or component == simplify_name(id) or match_aliases(component, simplify_name(id))): @@ -871,7 +871,7 @@ class IpmiHandler(object): if len(self.element) == 3: return self.list_firmware() elif len(self.element) == 4: - return self.read_firmware(self.element[-1]) + return self.read_firmware(self.element[-1], self.element[-2]) elif self.element[1] == 'hardware': if len(self.element) == 3: # list things in inventory return self.list_inventory() From c567bfbd179402efb6a053a7f4dfc4168feeb6f7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Sep 2025 08:53:55 -0400 Subject: [PATCH 298/413] Add sysctl tune check to selfcheck Apart frem the gc_thresh indirect check, perform other checks. For now, just highlight that tcp_sack being disabled can really mess with BMC connections. Since the management node may have high speed and the BMC may be behind a 100MBit link, SACK is needed to overcome the massive loss and induce TCP to rate limit appropriately. --- confluent_server/bin/confluent_selfcheck | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 14ced5d3..f2ad7ef0 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -27,6 +27,16 @@ import signal import confluent.collective.manager as collective import confluent.noderange as noderange +def check_sysctl_tuning(): + with open('/proc/sys/net/ipv4/tcp_sack', 'r') as f: + value = f.read().strip() + if value == '1': + print('OK') + return + else: + emprint('TCP SACK is disabled, network operations to BMCs may be particularly impacted, including firmware updates and virtual media') + + def check_neigh_overflow(): dmesgout = subprocess.check_output(['dmesg']) if b'_cache: neighbor table overflow!' in subprocess.check_output(['dmesg']): @@ -216,6 +226,8 @@ if __name__ == '__main__': emprint('ARP/Neighbor table problem detected, evaluate and increase net.ipv*.neigh.default.gc_thresh*') else: print('OK') + fprint('Checking sysctl tunables: ') + check_sysctl_tuning() fprint('TFTP Status: ') if tftp_works(): print('OK') From a112297e60e0d7de0eb6d6a6c02aaf821c9206da Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Sep 2025 10:19:41 -0400 Subject: [PATCH 299/413] Detect ESXi editions for more specific fingerprinting --- confluent_server/confluent/osimage.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index f26eb13b..81fe2f3f 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -36,6 +36,7 @@ READFILES = set([ 'media.2/products', '.DISCINFO', '.discinfo', + 'ISOLINUX.CFG', 'zipl.prm', 'sources/idwbinfo.txt', ]) @@ -495,9 +496,24 @@ def check_esxi(isoinfo): _, version = line.split(b' ', 1) if not isinstance(version, str): version = version.decode('utf8') + edition = '' if isesxi and version: + if 'ISOLINUX.CFG' in isoinfo[1]: + for line in isoinfo[1]['ISOLINUX.CFG'].split(b'\n'): + if line.startswith(b'MENU TITLE'): + words = line.split() + if len(words) > 2: + edition = words[2].decode('utf8') + break + if edition: + for vnd in ('LNV', 'LVO', 'LVN'): + if edition.startswith(vnd): + edition = '_' + edition.split('-', 1)[1].strip() + break + else: + edition = '' return { - 'name': 'esxi-{0}'.format(version), + 'name': 'esxi-{0}{1}'.format(version, edition), 'method': EXTRACT, 'category': 'esxi{0}'.format(version.split('.', 1)[0]) } From 87a6891effee2f6d1f1866c3b71987eaa9344cc6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Sep 2025 09:09:21 -0400 Subject: [PATCH 300/413] Include boot filename in ARM case ARM PXE solutions often fail to properly implement PXE, workaround by going ahead and including the boot filename. --- .../confluent/discovery/protocols/pxe.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 56d870d6..a44b0261 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -844,7 +844,7 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N log.log({'error': 'Unable to serve {0} due to duplicated address between node and interface index "{}"'.format(node, info['netinfo']['ifidx'])}) return can302 = True - if httpboot: + if isboot and httpboot: proto = 'https' if insecuremode == 'never' else 'http' bootfile = '{0}://{1}/confluent-public/os/{2}/boot.img'.format( proto, myipn, profile @@ -865,13 +865,16 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile, sock=N node, profile, len(bootfile) - 127)}) return repview[108:108 + len(bootfile)] = bootfile - elif info.get('architecture', None) == 'uefi-aarch64' and packet.get(77, None) == b'iPXE': - if not profile: - profile, stgprofile = get_deployment_profile(node, cfg) - if not profile: - log.log({'info': 'No pending profile for {0}, skipping proxyDHCP eply'.format(node)}) - return - bootfile = 'http://{0}/confluent-public/os/{1}/boot.ipxe'.format(myipn, profile).encode('utf8') + elif isboot and info.get('architecture', None) == 'uefi-aarch64': + if packet.get(77, None) == b'iPXE': + if not profile: + profile, stgprofile = get_deployment_profile(node, cfg) + if not profile: + log.log({'info': 'No pending profile for {0}, skipping proxyDHCP eply'.format(node)}) + return + bootfile = 'http://{0}/confluent-public/os/{1}/boot.ipxe'.format(myipn, profile).encode('utf8') + else: + bootfile = b'confluent/aarch64/ipxe.efi' repview[108:108 + len(bootfile)] = bootfile myip = myipn myipn = socket.inet_aton(myipn) From 29c6ce230fa4bdfb7380dfd60c8466a154c2fc02 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Sep 2025 10:21:01 -0400 Subject: [PATCH 301/413] Tolerate updateboot failure on first import --- confluent_server/confluent/osimage.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 81fe2f3f..617bf0b6 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -72,7 +72,7 @@ def symlink(src, targ): raise -def update_boot(profilename): +def update_boot(profilename, initialimport=False): if profilename.startswith('/var/lib/confluent/public'): profiledir = profilename else: @@ -90,15 +90,20 @@ def update_boot(profilename): elif ostype == 'esxi': update_boot_esxi(profiledir, profile, label) elif ostype == 'windows': - update_boot_windows(profiledir, profile, label) + update_boot_windows(profiledir, profile, label, initialimport) -def update_boot_windows(profiledir, profile, label): +def update_boot_windows(profiledir, profile, label, initialimport): profname = os.path.basename(profiledir) - subprocess.check_call( - ['/usr/bin/genisoimage', '-o', - '{0}/boot.img'.format(profiledir), '-udf', '-b', 'dvd/etfsboot.com', - '-no-emul-boot', '-eltorito-alt-boot', '-eltorito-boot', - 'dvd/efisys_noprompt.bin', '{0}/boot'.format(profiledir)], preexec_fn=relax_umask) + try: + subprocess.check_call( + ['/usr/bin/genisoimage', '-o', + '{0}/boot.img'.format(profiledir), '-udf', '-b', 'dvd/etfsboot.com', + '-no-emul-boot', '-eltorito-alt-boot', '-eltorito-boot', + 'dvd/efisys_noprompt.bin', '{0}/boot'.format(profiledir)], preexec_fn=relax_umask) + except Exception: + if initialimport: + return + raise def update_boot_esxi(profiledir, profile, label): profname = os.path.basename(profiledir) @@ -1090,7 +1095,7 @@ def generate_stock_profiles(defprofile, distpath, targpath, osname, subprocess.check_call( ['sh', '{0}/initprofile.sh'.format(dirname), targpath, dirname]) - bootupdates.append(eventlet.spawn(update_boot, dirname)) + bootupdates.append(eventlet.spawn(update_boot, dirname, True)) profilelist.append(profname) for upd in bootupdates: upd.wait() From 8109adaabf511dcf624b714732b526b8e7dcafa1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Sep 2025 15:23:03 -0400 Subject: [PATCH 302/413] Add BFB recognition to osimage parsing Recognize BFB embedded OS as a potential osdeploy target. This is toward the end of identifying the appropriate 'addons.cpio' for setting up for a bf.cfg driven bfb install. For now, it is disabled until companion os category exists. --- confluent_server/confluent/osimage.py | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 617bf0b6..e7a9dc07 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -789,6 +789,31 @@ def check_rhel(isoinfo): major = ver.split('.', 1)[0] return {'name': 'rhel-{0}-{1}'.format(ver, arch), 'method': EXTRACT, 'category': 'el{0}'.format(major)} +def fingerprint_initramfs(archive): + curroffset = archive.tell() + dfd = os.dup(archive.fileno()) + os.lseek(dfd, curroffset, 0) + try: + with libarchive.fd_reader(dfd) as reader: + for ent in reader: + if str(ent) == 'usr/lib/initrd-release': + osrelcontents = b'' + for block in ent.get_blocks(): + osrelcontents += bytes(block) + osrelease = osrelcontents.decode('utf-8').strip() + osid = '' + osver = '' + for line in osrelease.split('\n'): + if line.startswith('ID='): + osid = line.split('=', 1)[1].strip().strip('"') + if line.startswith('VERSION_ID='): + osver = line.split('=', 1)[1].strip().strip('"') + if osid and osver: + return (osid, osver) + finally: + os.close(dfd) + return None + def scan_iso(archive): scanudf = False @@ -839,6 +864,32 @@ def scan_udf(dfd): return {}, imginfo +def parse_bfb(archive): + currtype = 0 + # we want to find the initramfs image (id 63) and dig around to see the OS version + while currtype != 63: + currhdr = archive.read(24) + if currhdr[:5] != b'Bf\x02\x13!': + return None + currsize = int.from_bytes(currhdr[8:12], byteorder='little') + # currsize needs to be rounded up to nearest 8 byte boundary + if currsize % 8: + currsize += 8 - (currsize % 8) + currtype = currhdr[7] + if currtype == 63: + ossig = fingerprint_initramfs(archive) + if ossig: + osinfo = { + 'name': f'bluefield_{ossig[0]}-{ossig[1]}-aarch64', + 'method': COPY, + 'category': f'bluefield_{ossig[0]}{ossig[1]}' + } + if os.path.exists(f'/opt/confluent/lib/osdeploy/{osinfo["category"]}'): + return osinfo + else: + archive.seek(currsize, os.SEEK_CUR) + return None + def fingerprint(archive): archive.seek(0) header = archive.read(32768) @@ -853,6 +904,12 @@ def fingerprint(archive): if name: return name, isoinfo[0], fun.replace('check_', '') return None + elif header[:4] == b'Bf\x02\x13': + # BFB payload for Bluefield + archive.seek(0) + imginfo = parse_bfb(archive) + if imginfo: + return imginfo, None, 'bluefield' else: sum = hashlib.sha256(header) if sum.digest() in HEADERSUMS: From c9ca199b16d2f21792d85b2247b231a83d59a05b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Sep 2025 11:17:26 -0400 Subject: [PATCH 303/413] Fix preference of netplan If netplan and nmcli both exist, the intent was to prefer netplan. However, there was a mistake that caused nmcli to be the most preferred. --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 78126b6f..bb52eed2 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -545,7 +545,7 @@ if __name__ == '__main__': rm_tmp_llas(tmpllas) if os.path.exists('/usr/sbin/netplan'): nm = NetplanManager(dc) - if os.path.exists('/usr/bin/nmcli'): + elif os.path.exists('/usr/bin/nmcli'): nm = NetworkManager(devtypes, dc) elif os.path.exists('/usr/sbin/wicked'): nm = WickedManager() From 5f26fb73e6a78d9f5763dc6768be0914b7753aed Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Sep 2025 16:47:44 -0400 Subject: [PATCH 304/413] Enable apiclient to be more self-sufficient Provide a totally 'clortho' and 'copernicus' free behavior. This allows some flows to skip the cpio addons to go straight to python. Some scenarios demand the utilities (initramfs) and others are more awkward with the utilities, so we enable both. --- .../initramfs/opt/confluent/bin/apiclient | 111 +++++++++++++----- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index d468b4d9..dc2d8e4b 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -3,6 +3,7 @@ try: import http.client as client except ImportError: import httplib as client +import base64 import ctypes import ctypes.util import glob @@ -15,6 +16,12 @@ import sys import struct import time import re +import hmac +import hashlib +try: + import json +except ImportError: + json = None class InvalidApiKey(Exception): pass @@ -72,7 +79,7 @@ def get_my_addresses(): return addrs -def scan_confluents(): +def scan_confluents(confuuid=None): srvs = {} s6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) s6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) @@ -84,12 +91,13 @@ def scan_confluents(): s4.bind(('0.0.0.0', 1900)) doneidxs = set([]) msg = 'M-SEARCH * HTTP/1.1\r\nST: urn:xcat.org:service:confluent:' - with open('/etc/confluent/confluent.deploycfg') as dcfg: - for line in dcfg.read().split('\n'): - if line.startswith('confluent_uuid:'): - confluentuuid = line.split(': ')[1] - msg += '/confluentuuid=' + confluentuuid - break + if not confuuid: + with open('/etc/confluent/confluent.deploycfg') as dcfg: + for line in dcfg.read().split('\n'): + if line.startswith('confluent_uuid:'): + confluentuuid = line.split(': ')[1] + msg += '/confluentuuid=' + confluentuuid + break try: with open('/sys/devices/virtual/dmi/id/product_uuid') as uuidin: msg += '/uuid=' + uuidin.read().strip() @@ -126,6 +134,7 @@ def scan_confluents(): srvlist = [] if r: r = r[0] + nodename = None while r: for s in r: (rsp, peer) = s.recvfrom(9000) @@ -133,6 +142,7 @@ def scan_confluents(): current = None for line in rsp: if line.startswith(b'NODENAME: '): + nodename = line.replace(b'NODENAME: ', b'').strip().decode('utf8') current = {} elif line.startswith(b'DEFAULTNET: 1'): current['isdefault'] = True @@ -148,16 +158,32 @@ def scan_confluents(): r = select.select((s4, s6), (), (), 2) if r: r = r[0] + if not os.path.exists('/etc/confluent/confluent.info'): + with open('/etc/confluent/confluent.info', 'w+') as cinfo: + if nodename: + cinfo.write('NODENAME: {0}\n'.format(nodename)) + for srv in srvlist: + cinfo.write('MANAGER: {0}\n'.format(srv)) return srvlist, srvs -def get_net_apikey(nodename, mgr): +def get_net_apikey(nodename, mgr, hmackey=None, confuuid=None): alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789./' newpass = ''.join([alpha[x >> 2] for x in bytearray(os.urandom(32))]) salt = '$5$' + ''.join([alpha[x >> 2] for x in bytearray(os.urandom(8))]) newpass = newpass.encode('utf8') salt = salt.encode('utf8') crypted = c_crypt(newpass, salt) + if hmackey: + hmacvalue = hmac.new(hmackey.encode('utf8'), crypted, hashlib.sha256).digest() + hmacvalue = base64.b64encode(hmacvalue).decode('utf8') + client = HTTPSClient(host=mgr, phmac=hmacvalue, nodename=nodename, confuuid=confuuid) + try: + status, rsp = client.grab_url_with_status('/confluent-api/self/registerapikey', data=crypted, returnrsp=True) + if status == 200: + return newpass.decode('utf8') + except Exception: + pass for addrinfo in socket.getaddrinfo(mgr, 13001, 0, socket.SOCK_STREAM): try: clisock = socket.socket(addrinfo[0], addrinfo[1]) @@ -195,7 +221,7 @@ def get_net_apikey(nodename, mgr): return '' -def get_apikey(nodename, hosts, errout=None): +def get_apikey(nodename, hosts, errout=None, hmackey=None, confuuid=None): apikey = "" if os.path.exists('/etc/confluent/confluent.apikey'): apikey = open('/etc/confluent/confluent.apikey').read().strip() @@ -204,16 +230,16 @@ def get_apikey(nodename, hosts, errout=None): while not apikey: for host in hosts: try: - apikey = get_net_apikey(nodename, host) + apikey = get_net_apikey(nodename, host, hmackey=hmackey, confuuid=confuuid) except OSError: apikey = None if apikey: break else: - srvlist, _ = scan_confluents() + srvlist, _ = scan_confluents(confuuid=confuuid) for host in srvlist: try: - apikey = get_net_apikey(nodename, host) + apikey = get_net_apikey(nodename, host, hmackey=hmackey, confuuid=confuuid) except OSError: apikey = None if apikey: @@ -231,35 +257,43 @@ def get_apikey(nodename, hosts, errout=None): return apikey class HTTPSClient(client.HTTPConnection, object): - def __init__(self, usejson=False, port=443, host=None, errout=None, phmac=None, checkonly=False): + def __init__(self, usejson=False, port=443, host=None, errout=None, phmac=None, checkonly=False, hmackey=None, nodename=None, confuuid=None): self.ignorehosts = set([]) self.phmac = phmac + self.hmackey = hmackey + self.confuuid = confuuid self.errout = None + self.stdheaders = {} + if nodename: + self.stdheaders['CONFLUENT_NODENAME'] = nodename if errout: self.errout = open(errout, 'w') self.errout.flush() - self.stdheaders = {} mgtiface = None if usejson: self.stdheaders['ACCEPT'] = 'application/json' if host: self.hosts = [host] - with open('/etc/confluent/confluent.info') as cinfo: - info = cinfo.read().split('\n') - for line in info: - if line.startswith('NODENAME:'): - node = line.split(' ')[1] - self.stdheaders['CONFLUENT_NODENAME'] = node + if not nodename: + with open('/etc/confluent/confluent.info') as cinfo: + info = cinfo.read().split('\n') + for line in info: + if line.startswith('NODENAME:'): + nodename = line.split(' ')[1] + self.stdheaders['CONFLUENT_NODENAME'] = nodename else: self.hosts = [] - info = open('/etc/confluent/confluent.info').read().split('\n') + try: + info = open('/etc/confluent/confluent.info').read().split('\n') + except Exception: + info = [] havedefault = '0' plainhost = '' for line in info: host = '' if line.startswith('NODENAME:'): - node = line.split(' ')[1] - self.stdheaders['CONFLUENT_NODENAME'] = node + nodename = line.split(' ')[1] + self.stdheaders['CONFLUENT_NODENAME'] = nodename if line.startswith('MANAGER:') and not host: host = line.split(' ')[1] self.hosts.append(host) @@ -294,15 +328,14 @@ class HTTPSClient(client.HTTPConnection, object): if plainhost and not self.hosts: self.hosts.append(plainhost) if self.phmac: - with open(phmac, 'r') as hmacin: - self.stdheaders['CONFLUENT_CRYPTHMAC'] = hmacin.read() + self.stdheaders['CONFLUENT_CRYPTHMAC'] = self.phmac elif not checkonly: - self.stdheaders['CONFLUENT_APIKEY'] = get_apikey(node, self.hosts, errout=self.errout) + self.stdheaders['CONFLUENT_APIKEY'] = get_apikey(nodename, self.hosts, errout=self.errout, hmackey=hmackey, confuuid=self.confuuid) if mgtiface: self.stdheaders['CONFLUENT_MGTIFACE'] = mgtiface self.port = port self.host = None - self.node = node + self.node = nodename host = self.check_connections() client.HTTPConnection.__init__(self, host, port) self.connect() @@ -342,7 +375,7 @@ class HTTPSClient(client.HTTPConnection, object): continue break if not foundsrv: - srvlist, srvs = scan_confluents() + srvlist, srvs = scan_confluents(self.confuuid) hosts = [] for srv in srvlist: if srvs[srv].get('isdefault', False): @@ -416,7 +449,7 @@ class HTTPSClient(client.HTTPConnection, object): with open('/etc/confluent/confluent.apikey', 'w+') as akfile: akfile.write('') self.stdheaders['CONFLUENT_APIKEY'] = get_apikey( - self.node, [self.host], errout=self.errout) + self.node, [self.host], errout=self.errout, hmackey=self.hmackey, confuuid=self.confuuid) if rsp.status == 503: # confluent is down, but the server running confluent is otherwise up authed = False self.ignorehosts.add(self.host) @@ -545,8 +578,24 @@ if __name__ == '__main__': phmac = sys.argv.index('-p') sys.argv.pop(phmac) phmac = sys.argv.pop(phmac) + with open(phmac, 'r') as hmacin: + phmac = hmacin.read() except ValueError: phmac = None + try: + identfile = sys.argv.index('-i') + sys.argv.pop(identfile) + identfile = sys.argv.pop(identfile) + with open(identfile) as idin: + data = idin.read() + identinfo = json.loads(data) + nodename = identinfo.get('nodename', None) + hmackey = identinfo.get('apitoken', None) + confuuid = identinfo.get('confluent_uuid', None) + except ValueError: + hmackey = None + nodename = None + confuuid = None try: checkonly = False idxit = sys.argv.index('-c') @@ -558,7 +607,7 @@ if __name__ == '__main__': data = open(sys.argv[-1]).read() if outbin: with open(outbin, 'ab+') as outf: - reader = HTTPSClient(usejson=usejson, errout=errout).grab_url( + reader = HTTPSClient(usejson=usejson, errout=errout, hmackey=hmackey, nodename=nodename, confuuid=confuuid).grab_url( sys.argv[1], data, returnrsp=True) chunk = reader.read(16384) while chunk: @@ -566,7 +615,7 @@ if __name__ == '__main__': chunk = reader.read(16384) sys.exit(0) - mclient = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly) + mclient = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly, hmackey=hmackey, nodename=nodename, confuuid=confuuid) if waitfor: status = 201 while status != waitfor: From cb5fcf077afb5b307a39d082a26640a82bf14753 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Sep 2025 08:50:32 -0400 Subject: [PATCH 305/413] Fix incorrect character in release filename --- genesis/97genesis/install-base | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/97genesis/install-base b/genesis/97genesis/install-base index e6c3d3a4..1f25e9ce 100644 --- a/genesis/97genesis/install-base +++ b/genesis/97genesis/install-base @@ -26,7 +26,7 @@ dracut_install poweroff date /etc/nsswitch.conf /etc/services /etc/protocols dracut_install /usr/share/terminfo/x/xterm /usr/share/terminfo/l/linux /usr/share/terminfo/v/vt100 /usr/share/terminfo/x/xterm-color /usr/share/terminfo/s/screen /usr/share/terminfo/x/xterm-256color /usr/share/terminfo/p/putty-256color /usr/share/terminfo/p/putty /usr/share/terminfo/d/dumb dracut_install chmod whoami head tail basename ping tr /usr/share/hwdata/usb.ids if [ -e /etc/redhat-release ]; then - dracut_install /etc/redhat_release + dracut_install /etc/redhat-release fi dracut_install dmidecode /usr/$IMPLIBDIR/libstdc++.so.6 dracut_install ps free find From 678bd6052a83c91aa47a1856cf9eac43a8a2fa66 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Sep 2025 15:59:48 -0400 Subject: [PATCH 306/413] Correct path to util-linux in genesis build --- genesis/fetchlicenses | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/fetchlicenses b/genesis/fetchlicenses index 0d36d89d..d054b64a 100644 --- a/genesis/fetchlicenses +++ b/genesis/fetchlicenses @@ -39,6 +39,6 @@ cp -a /root/rpmbuild/BUILD/kernel-*/linux-*/LICENSES/* /usr/share/licenses/kerne cp /usr/share/licenses/krb5-libs/LICENSE /usr/share/licenses/krb5-libs/NOTICE mkdir -p /usr/share/licenses/libdb cp /root/rpmbuild/BUILD/db-5.3.28/lang/sql/odbc/debian/copyright /usr/share/licenses/libdb/copyright -head -n 105 $(pwd)/util-linux-2.37.4/sys-utils/hwclock-parse-date.c|tail -n 34 > /usr/share/licenses/util-linux/COPYING.GPLv3 +head -n 105 /root/rpmbuild/BUILD/util-linux-2.37.4/sys-utils/hwclock-parse-date.c|tail -n 34 > /usr/share/licenses/util-linux/COPYING.GPLv3 From dd2b7be2cade202ae8e33eb0e71d9682291878d2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Sep 2025 16:15:00 -0400 Subject: [PATCH 307/413] Bump genesis version --- genesis/confluent-genesis.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/confluent-genesis.spec b/genesis/confluent-genesis.spec index 4fd80bb2..4a7afdba 100644 --- a/genesis/confluent-genesis.spec +++ b/genesis/confluent-genesis.spec @@ -1,5 +1,5 @@ %define arch x86_64 -Version: 3.13.0 +Version: 3.14.0 Release: 1 Name: confluent-genesis-%{arch} BuildArch: noarch From 7a2cb80f6a43fe32523adf85940ce69d59b458eb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Sep 2025 16:57:37 -0400 Subject: [PATCH 308/413] Make hmac import optional Some environments do not have this module --- .../common/initramfs/opt/confluent/bin/apiclient | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index dc2d8e4b..d9cfb2dc 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -16,12 +16,13 @@ import sys import struct import time import re -import hmac import hashlib try: import json + import hmac except ImportError: json = None + hmac = None class InvalidApiKey(Exception): pass From ebcf7d7bf8ce11f734d768f67f1f572273702a03 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 15 Sep 2025 11:21:03 -0400 Subject: [PATCH 309/413] Refresh genesis build version --- genesis/confluent-genesis.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/confluent-genesis.spec b/genesis/confluent-genesis.spec index 4a7afdba..32a8eed8 100644 --- a/genesis/confluent-genesis.spec +++ b/genesis/confluent-genesis.spec @@ -1,5 +1,5 @@ %define arch x86_64 -Version: 3.14.0 +Version: 3.14.1 Release: 1 Name: confluent-genesis-%{arch} BuildArch: noarch From 22c89214554448bf77875d86da2d0464da8902f1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 17 Sep 2025 09:25:40 -0400 Subject: [PATCH 310/413] Place identity files loose in directory as well Some OS deployment mechanism may wish to convey the identity information more loosely. For those, it's convenient if the files are loose instead of needing extraction from a VFAT image. --- confluent_server/confluent/plugins/deployment/identimage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/deployment/identimage.py b/confluent_server/confluent/plugins/deployment/identimage.py index c41c6e8f..1fe7b624 100644 --- a/confluent_server/confluent/plugins/deployment/identimage.py +++ b/confluent_server/confluent/plugins/deployment/identimage.py @@ -62,6 +62,9 @@ def create_ident_image(node, configmanager): with open(os.path.join(tmpd, 'cnflnt.jsn'), 'w') as jsonout: json.dump(ident, jsonout) shutil.copytree('/var/lib/confluent/public/site/tls', os.path.join(tmpd, 'tls')) + mkdirp('/var/lib/confluent/private/identity_files/') + shutil.copy(os.path.join(tmpd, 'cnflnt.yml'), '/var/lib/confluent/private/identity_files/{0}.yml'.format(node)) + shutil.copy(os.path.join(tmpd, 'cnflnt.jsn'), '/var/lib/confluent/private/identity_files/{0}.json'.format(node)) mkdirp('/var/lib/confluent/private/identity_images/') imgname = '/var/lib/confluent/private/identity_images/{0}.img'.format(node) if os.path.exists(imgname): From 500cdf7535a819f8c5920846e22fd29669c99150 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 18 Sep 2025 08:55:09 -0400 Subject: [PATCH 311/413] Change boot.img to boot.iso for Windows Some things expect an iso to be named as such. This drives different handling, but there's little choice in the matter. --- confluent_server/confluent/osimage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index e7a9dc07..c0f3b605 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -97,7 +97,7 @@ def update_boot_windows(profiledir, profile, label, initialimport): try: subprocess.check_call( ['/usr/bin/genisoimage', '-o', - '{0}/boot.img'.format(profiledir), '-udf', '-b', 'dvd/etfsboot.com', + '{0}/boot.iso'.format(profiledir), '-udf', '-b', 'dvd/etfsboot.com', '-no-emul-boot', '-eltorito-alt-boot', '-eltorito-boot', 'dvd/efisys_noprompt.bin', '{0}/boot'.format(profiledir)], preexec_fn=relax_umask) except Exception: From 3f9a13ed6fcb53f3fee75bc7a23f7202d39154a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 18 Sep 2025 15:14:39 -0400 Subject: [PATCH 312/413] Ensure certfile is blanked before writing to it --- confluent_osdeploy/common/profile/scripts/setupssh | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/common/profile/scripts/setupssh b/confluent_osdeploy/common/profile/scripts/setupssh index 83c05fa1..cdea86b1 100644 --- a/confluent_osdeploy/common/profile/scripts/setupssh +++ b/confluent_osdeploy/common/profile/scripts/setupssh @@ -7,6 +7,7 @@ for pubkey in /etc/ssh/ssh_host*key.pub; do continue fi certfile=${pubkey/.pub/-cert.pub} + echo -n > $certfile confluentpython $confapiclient /confluent-api/self/sshcert $pubkey -o $certfile done if [ -d /etc/ssh/sshd_config.d/ -a ! -e /etc/ssh/sshd_config.d/90-confluent.conf ]; then From 8911193acaa3f81650f9961169aab95850de40ea Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Sep 2025 11:50:12 -0400 Subject: [PATCH 313/413] Implement a test with retry for basic communication confuesbox is likely to be a very early utility, and the relevant network is at high risk of being merely 'partially' up. --- .../utils/confusebox/apiclient.go | 79 +++++++++++++------ 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/confluent_osdeploy/utils/confusebox/apiclient.go b/confluent_osdeploy/utils/confusebox/apiclient.go index bbbab0e5..f4170f28 100644 --- a/confluent_osdeploy/utils/confusebox/apiclient.go +++ b/confluent_osdeploy/utils/confusebox/apiclient.go @@ -2,20 +2,23 @@ package main import ( "bytes" + "crypto/tls" + "crypto/x509" + "errors" "fmt" "io" - "os" + "net" "net/http" - "crypto/x509" - "crypto/tls" + "os" "strings" - "errors" + "time" ) + type ApiClient struct { - server string + server string urlserver string - apikey string - nodename string + apikey string + nodename string webclient *http.Client } @@ -24,7 +27,7 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) if err != nil { return nil, err } - cacerts := x509.NewCertPool() + cacerts := x509.NewCertPool() cacerts.AppendCertsFromPEM(currcacerts) apikey := []byte("") if keyfile != "" { @@ -32,7 +35,7 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) if err != nil { return nil, err } - if apikey[len(apikey) - 1] == 0xa { + if apikey[len(apikey)-1] == 0xa { apikey = apikey[:len(apikey)-1] } } @@ -40,7 +43,9 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) cinfo, err := os.ReadFile("/etc/confluent/confliuent.info") if err != nil { nodename, err = os.Hostname() - if err != nil { return nil, err } + if err != nil { + return nil, err + } } cinfolines := bytes.Split(cinfo, []byte("\n")) if bytes.Contains(cinfolines[0], []byte("NODENAME")) { @@ -48,6 +53,20 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) nodename = string(cnodebytes[0]) } } + // Test connectivity with up to 3 retries + var conn net.Conn + for i := 0; i < 3; i++ { + conn, err = net.Dial("tcp", net.JoinHostPort(server, "443")) + if err == nil { + conn.Close() + break + } + time.Sleep(5 * time.Second) + fmt.Print("Connection attempt failed, retrying...\n") + if i == 2 { + return nil, fmt.Errorf("failed to connect after 3 attempts: %v", err) + } + } urlserver := server if strings.Contains(server, ":") { if strings.Contains(server, "%") && !strings.Contains(server, "%25") { @@ -58,10 +77,11 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) server = server[:strings.Index(server, "%")] } } + webclient := &http.Client{ Transport: &http.Transport{ TLSClientConfig: &tls.Config{ - RootCAs: cacerts, + RootCAs: cacerts, ServerName: server, }, }, @@ -70,34 +90,42 @@ func NewApiClient(cafile string, keyfile string, nodename string, server string) return &vc, nil } -func (apiclient *ApiClient) RegisterKey(crypted string, hmac string) (error) { +func (apiclient *ApiClient) RegisterKey(crypted string, hmac string) error { cryptbytes := []byte(crypted) cryptbuffer := bytes.NewBuffer(cryptbytes) _, err := apiclient.request("/confluent-api/self/registerapikey", "", cryptbuffer, "", hmac) return err } -func (apiclient *ApiClient) Fetch(url string, outputfile string, mime string, body io.Reader) (error) { +func (apiclient *ApiClient) Fetch(url string, outputfile string, mime string, body io.Reader) error { outp, err := os.Create(outputfile) - if err != nil { return err } + if err != nil { + return err + } defer outp.Close() rsp, err := apiclient.request(url, mime, body, "", "") - if err != nil { return err } + if err != nil { + return err + } _, err = io.Copy(outp, rsp) return err } -func (apiclient *ApiClient) GrabText(url string, mime string, body io.Reader) (string, error){ +func (apiclient *ApiClient) GrabText(url string, mime string, body io.Reader) (string, error) { rsp, err := apiclient.request(url, mime, body, "", "") - if err != nil { return "", err } + if err != nil { + return "", err + } rspdata, err := io.ReadAll(rsp) - if err != nil { return "", err } + if err != nil { + return "", err + } rsptxt := string(rspdata) return rsptxt, nil } func (apiclient *ApiClient) request(url string, mime string, body io.Reader, method string, hmac string) (io.ReadCloser, error) { - if ! strings.Contains(url, "https://") { + if !strings.Contains(url, "https://") { url = fmt.Sprintf("https://%s%s", apiclient.urlserver, url) } if method == "" { @@ -114,8 +142,12 @@ func (apiclient *ApiClient) request(url string, mime string, body io.Reader, met } else { rq, err = http.NewRequest(method, url, body) } - if err != nil { return nil, err } - if (mime != "") { rq.Header.Set("Accept", mime) } + if err != nil { + return nil, err + } + if mime != "" { + rq.Header.Set("Accept", mime) + } rq.Header.Set("CONFLUENT_NODENAME", apiclient.nodename) if len(hmac) > 0 { rq.Header.Set("CONFLUENT_CRYPTHMAC", hmac) @@ -124,11 +156,12 @@ func (apiclient *ApiClient) request(url string, mime string, body io.Reader, met rq.Header.Set("CONFLUENT_APIKEY", apiclient.apikey) } rsp, err := apiclient.webclient.Do(rq) - if err != nil { return nil, err } + if err != nil { + return nil, err + } if rsp.StatusCode >= 300 { err = errors.New(rsp.Status) return nil, err } return rsp.Body, err } - From d7879bad5bbe265604ffa06a9e76329266d004bb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Sep 2025 15:44:55 -0400 Subject: [PATCH 314/413] Improve robustness of Ubuntu net bringup If using DHCP, have the loop to validate connectivity repeat. --- .../initramfs/scripts/init-premount/confluent | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 995fb086..c72a7b69 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -54,10 +54,16 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do echo $NIC > /tmp/autodetectnic else configure_networking - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - break + while [ -z "$NIC" ]; do + for dsrv in $deploysrvs; do + if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + deploysrvs=$dsrv + NIC=1 + break + fi + done + if [ -z "$NIC" ]; then + echo "No connectivity to deployment servers, retrying..." fi done fi From ac7fdb3ef71333579046a0b73619244c2cf6a493 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Sep 2025 15:46:18 -0400 Subject: [PATCH 315/413] Enhance message for enclosure based discovery If nodes are accidentally omitted, but present, provide a hint that may clarify the situation. --- confluent_server/confluent/discovery/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 38c1a733..6941845c 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1362,7 +1362,8 @@ def eval_node(cfg, handler, info, nodename, manual=False): errorstr = 'The detected node {0} was detected using switch, ' \ 'however the relevant port has too many macs learned ' \ 'for this type of device ({1}) to be discovered by ' \ - 'switch.'.format(nodename, handler.devname) + 'switch. If this should be an enclosure, make sure there are ' \ + 'defined nodes for the enclosure'.format(nodename, handler.devname) log.log({'error': errorstr}) return if not discover_node(cfg, handler, info, nodename, manual): From 184132c398bb9b412456e007d529c49f207f1fd2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Sep 2025 19:41:54 -0400 Subject: [PATCH 316/413] Fix collective manager candidates not in nodelist For switch operations, need to carry over the same logic as other evaluations. --- confluent_server/confluent/networking/macmap.py | 5 ++++- confluent_server/confluent/networking/netutil.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index 1bff9e5f..5545e09a 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -535,7 +535,10 @@ def _full_updatemacmap(configmanager): if incollective: candmgrs = cfg.get('collective.managercandidates', {}).get('value', None) if candmgrs: - candmgrs = noderange.NodeRange(candmgrs, configmanager).nodes + try: + candmgrs = noderange.NodeRange(candmgrs, configmanager).nodes + except Exception: + candmgrs = noderange.NodeRange(candmgrs).nodes if mycollectivename not in candmgrs: # do not think about trying to find nodes that we aren't possibly # supposed to be a manager for in a collective diff --git a/confluent_server/confluent/networking/netutil.py b/confluent_server/confluent/networking/netutil.py index a1fd6d08..48b2f028 100644 --- a/confluent_server/confluent/networking/netutil.py +++ b/confluent_server/confluent/networking/netutil.py @@ -29,7 +29,10 @@ def get_switchcreds(configmanager, switches): continue candmgrs = switchcfg.get(switch, {}).get('collective.managercandidates', {}).get('value', None) if candmgrs: - candmgrs = noderange.NodeRange(candmgrs, configmanager).nodes + try: + candmgrs = noderange.NodeRange(candmgrs, configmanager).nodes + except Exception: + candmgrs = noderange.NodeRange(candmgrs).nodes if collective.get_myname() not in candmgrs: continue switchparms = switchcfg.get(switch, {}) @@ -81,4 +84,4 @@ def get_portnamemap(conn): ifidx, ifname = vb ifidx = int(str(ifidx).rsplit('.', 1)[1]) ifnamemap[ifidx] = str(ifname) - return ifnamemap \ No newline at end of file + return ifnamemap From 97d4015b09f3df742ca18cca350eda92a155c58d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 Sep 2025 15:21:53 -0400 Subject: [PATCH 317/413] Handle memory inventory without type indicated --- confluent_client/bin/nodeinventory | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeinventory b/confluent_client/bin/nodeinventory index 11472390..1fb3d151 100755 --- a/confluent_client/bin/nodeinventory +++ b/confluent_client/bin/nodeinventory @@ -49,7 +49,9 @@ def pretty(text): def print_mem_info(node, prefix, meminfo): memdescfmt = '{0}GB PC' - if meminfo['memory_type'] == 'DDR3 SDRAM': + if meminfo['memory_type'] is None: + memdescfmt = '{0}GB ' + elif meminfo['memory_type'] == 'DDR3 SDRAM': memdescfmt += '3-{1} ' elif 'DDR4' in meminfo['memory_type']: memdescfmt += '4-{1} ' From f66093680be44291673e5b53f0d7dd3f5768e951 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 10:08:05 -0400 Subject: [PATCH 318/413] Attempt to loop on reconfiguring networking This may induce DHCP to be retried --- .../ubuntu22.04/initramfs/scripts/init-premount/confluent | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index c72a7b69..0eeadd55 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -53,8 +53,8 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do ipconfig -d $MYIP::$MYGW:$MYNM::$NIC echo $NIC > /tmp/autodetectnic else - configure_networking while [ -z "$NIC" ]; do + configure_networking for dsrv in $deploysrvs; do if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then deploysrvs=$dsrv From 29accaa49477bfd2ee99b7cff296da5f45c15a62 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 10:09:25 -0400 Subject: [PATCH 319/413] Change grub to not prompt Sometimes grub can get stuck unexpectedly waiting for interaction. Try to get away from this by default by setting the timeout to 0. --- confluent_server/confluent/osimage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index c0f3b605..4d7838a5 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -210,7 +210,7 @@ def update_boot_linux(profiledir, profile, label): needefi = True lincmd = 'linuxefi' if needefi else 'linux' initrdcmd = 'initrdefi' if needefi else 'initrd' - grubcfg = "set timeout=5\nmenuentry '" + grubcfg = "set timeout=0\nhiddenmenu\nmenuentry '" grubcfg += label grubcfg += "' {\n " + lincmd + " /kernel " + kernelargs + "\n" initrds = [] From 3505fe36e65f1086ff7db8d0761b152e5b84b842 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 14:07:27 -0400 Subject: [PATCH 320/413] Remove hiddenmenu This no longer applies to most grub2 --- confluent_server/confluent/osimage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 4d7838a5..5f8f68ca 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -210,7 +210,7 @@ def update_boot_linux(profiledir, profile, label): needefi = True lincmd = 'linuxefi' if needefi else 'linux' initrdcmd = 'initrdefi' if needefi else 'initrd' - grubcfg = "set timeout=0\nhiddenmenu\nmenuentry '" + grubcfg = "set timeout=0\nmenuentry '" grubcfg += label grubcfg += "' {\n " + lincmd + " /kernel " + kernelargs + "\n" initrds = [] From 39eb32df38eb80dd84bc0814142861bfb7fa9826 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 15:18:18 -0400 Subject: [PATCH 321/413] Test connection on net cfg apply When network configuration is applied, wait until we can reach the deployment server again before exiting. This should make us more robust against various potential delays after changing the nature of network interfaces. --- confluent_osdeploy/common/profile/scripts/confignet | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index bb52eed2..18945a3d 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -567,4 +567,13 @@ if __name__ == '__main__': if havefirewall: subprocess.check_call(['systemctl', 'start', 'firewalld']) await_tentative() + maxwait = 10 + while maxwait: + try: + tclient = apiclient.HTTPSClient(checkonly=True) + tclient.check_connections() + break + except Exception: + maxwait -= 1 + time.sleep(1) From a480cc73df5f91376363173f0f1b540c60567123 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 15:29:33 -0400 Subject: [PATCH 322/413] Add connectivity check to esxi ident bringup If using the identity image bringup with dhcp, be more careful about waiting for connectivity before proceeding. --- confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel index f9e02624..6132feec 100644 --- a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel +++ b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel @@ -73,6 +73,19 @@ if [ -e /tmp/confluentident/cnflnt.yml ]; then hmacfile=$(mktemp) ln -s /opt/confluent/bin/clortho /opt/confluent/bin/genpasshmac /opt/confluent/bin/genpasshmac $passfile $passcrypt $hmacfile $hmackeyfile + echo -n 'Checking connectivity to server: ' + maxwait=30 + while ! /opt/confluent/bin/apiclient -c >& /dev/null; do + echo -n '.' + sleep 1 + maxwait=$((maxwait - 1)) + if [ $maxwait -le 0 ]; then + echo "Unable to contact deployment server, verify network connectivity" + echo "A debug session has been made available on Alt-F1" + sleep 30 + maxwait=30 + done + echo echo -n 'Registering new API key with deployment server: ' /opt/confluent/bin/apiclient -p $hmacfile /confluent-api/self/registerapikey $passcrypt echo From 871685ea20445996bdfb95297da4a5d54528c0e4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Sep 2025 15:49:25 -0400 Subject: [PATCH 323/413] Correct missing closure of if --- confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel index 6132feec..5bf0476f 100644 --- a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel +++ b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel @@ -84,6 +84,7 @@ if [ -e /tmp/confluentident/cnflnt.yml ]; then echo "A debug session has been made available on Alt-F1" sleep 30 maxwait=30 + fi done echo echo -n 'Registering new API key with deployment server: ' From 6938bba2d3cb15a4a930be6ac748110a6cdaa57b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 26 Sep 2025 13:42:29 -0400 Subject: [PATCH 324/413] Have confignet pause until connectivity restored If we are reconfiguring network for a diskless node, wait for things to settle back in before continuing. --- .../common/profile/scripts/confignet | 17 +++++++++++++++++ .../profiles/default/scripts/onboot.sh | 2 +- .../profiles/default/scripts/onboot.sh | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 18945a3d..41deed4c 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -460,6 +460,9 @@ class NetworkManager(object): if __name__ == '__main__': + checktarg = None + if '-c' in sys.argv: + checktarg = sys.argv[sys.argv.index('-c') + 1] havefirewall = subprocess.call(['systemctl', 'status', 'firewalld']) havefirewall = havefirewall == 0 if havefirewall: @@ -576,4 +579,18 @@ if __name__ == '__main__': except Exception: maxwait -= 1 time.sleep(1) + maxwait = 10 + if checktarg: + while maxwait: + try: + addrinf = socket.getaddrinfo(checktarg, 443)[0] + psock = socket.socket(addrinf[0], socket.SOCK_STREAM) + psock.settimeout(10) + psock.connect(addrinf[4]) + psock.close() + break + except Exception: + maxwait -= 1 + time.sleep(1) + diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh index 65b13ff1..0bc3777b 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh @@ -59,7 +59,7 @@ rpm --import /etc/pki/rpm-gpg/* run_remote_python add_local_repositories run_remote_python syncfileclient -run_remote_python confignet +run_remote_python confignet -c $confluent_mgr run_remote onboot.custom # onboot scripts may be placed into onboot.d, e.g. onboot.d/01-firstaction.sh, onboot.d/02-secondaction.sh diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh index 80f95870..b8a55cf1 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh @@ -53,7 +53,7 @@ rpm --import /etc/pki/rpm-gpg/* run_remote_python add_local_repositories run_remote_python syncfileclient -run_remote_python confignet +run_remote_python confignet -c $confluent_mgr run_remote onboot.custom # onboot scripts may be placed into onboot.d, e.g. onboot.d/01-firstaction.sh, onboot.d/02-secondaction.sh From a4ba92a2e7911efcf24c1ae2f559d4ffd08ee799 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 1 Oct 2025 13:08:17 -0400 Subject: [PATCH 325/413] Retry network bringup ESXi may be slow in being ready for network bringup. Workaround by retrying. --- confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel index 5bf0476f..0813d588 100644 --- a/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel +++ b/confluent_osdeploy/esxi7/initramfs/bin/dcuiweasel @@ -62,8 +62,8 @@ if [ -e /tmp/confluentident/cnflnt.yml ]; then fi v4nm=$(grep ipv4_netmask: $tcfg) v4nm=${v4nm#ipv4_netmask: } - localcli network ip interface ipv4 set -i vmk0 -I $v4addr -N $v4nm -g $v4gw -t static - localcli network ip route ipv4 add -n default -g $v4gw + while ! localcli network ip interface ipv4 set -i vmk0 -I $v4addr -N $v4nm -g $v4gw -t static; do echo "Retrying..."; sleep 5; done + while ! localcli network ip route ipv4 add -n default -g $v4gw; do sleep 1; done fi hmackeyfile=$(mktemp) echo -n $(grep ^apitoken: /tmp/confluentident/cnflnt.yml|awk '{print $2}') > $hmackeyfile From a9d15de1564a90a46efc54dd84aba676c33f393e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 2 Oct 2025 10:55:43 -0400 Subject: [PATCH 326/413] Rework Ubuntu identity image DHCP bringup The stock Ubuntu approach was inadequate. It would DHCP out every nic and take the fastest result, and no going back. Now the CDC nic can frequently win that race. First, rmmod cdc_ether, as a scenario that is completely right out. But beyond that, let Ubuntu have one shot at multi-nic bringup. Beyond that, maintain a list of all link-up devices. If the check should fail, then start doing one nic at a time, cycling through them. Also, the openssl s_client timeout is painfully slow, use subshell and kill to speed up things. --- .../initramfs/scripts/init-premount/confluent | 35 ++++++++++++++++--- .../initramfs/scripts/init-premount/confluent | 32 +++++++++++++---- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index 964869d7..82ff8aec 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -52,13 +52,38 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do ipconfig -d $MYIP::$MYGW:$MYNM::$NIC echo $NIC > /tmp/autodetectnic else - configure_networking - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - break + rmmod cdc_ether 2> /dev/null + while [ ! -f /run/confirmednic ]; do + ALLNETDEVS=$(ip a|grep LOWER_UP|grep MULTICAST|awk '{print $2}'|sed -e s/://) + rm -rf /run/net* /run/dhcpcd /var/lib/dhcpcd + for dev in $(ip a|grep MULTICAST|awk '{print $2}'|sed -e s/://); do + ip a flush $dev + echo 1 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + echo 0 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + done + unset DEVICE DEVICE6 IP IP6 dev + [ -z "$1" ] || DEVICE=$1 + shift + configure_networking + for dsrv in $deploysrvs; do + (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + echo $dsrv > /run/confirmednic + break + fi) & + chkpid=$! + ( sleep 10 && kill $chkpid ) & + timeoutpid=$! + wait $chkpid + kill $timeoutpid 2> /dev/null + unset chkpid timeoutpid + done + if [ ! -f /run/confirmednic ]; then + echo "No connectivity to deployment servers, retrying..." + [ -z "$1" ] && set -- $ALLNETDEVS fi done + deploysrvs=$(cat /run/confirmednic) + rm /run/confirmednic fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 0eeadd55..98c9129c 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -53,19 +53,39 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do ipconfig -d $MYIP::$MYGW:$MYNM::$NIC echo $NIC > /tmp/autodetectnic else - while [ -z "$NIC" ]; do + rmmod cdc_ether 2> /dev/null + while [ ! -f /run/confirmednic ]; do + ALLNETDEVS=$(ip a|grep LOWER_UP|grep MULTICAST|awk '{print $2}'|sed -e s/://) + + rm -rf /run/net* /run/dhcpcd /var/lib/dhcpcd + for dev in $(ip a|grep MULTICAST|awk '{print $2}'|sed -e s/://); do + ip a flush $dev + echo 1 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + echo 0 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + done + unset DEVICE DEVICE6 IP IP6 dev + [ -z "$1" ] || DEVICE=$1 + shift configure_networking for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - NIC=1 + (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + echo $dsrv > /run/confirmednic break - fi + fi) & + chkpid=$! + ( sleep 10 && kill $chkpid ) & + timeoutpid=$! + wait $chkpid + kill $timeoutpid 2> /dev/null + unset chkpid timeoutpid done - if [ -z "$NIC" ]; then + if [ ! -f /run/confirmednic ]; then echo "No connectivity to deployment servers, retrying..." + [ -z "$1" ] && set -- $ALLNETDEVS fi done + deploysrvs=$(cat /run/confirmednic) + rm /run/confirmednic fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') From 2d29813320845b49d3f1745837a4be61eb83dd8b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 2 Oct 2025 14:28:46 -0400 Subject: [PATCH 327/413] Store device for future use in ubuntu deployment --- .../ubuntu20.04/initramfs/scripts/init-premount/confluent | 2 ++ .../ubuntu22.04/initramfs/scripts/init-premount/confluent | 1 + 2 files changed, 3 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index 82ff8aec..1cafcd99 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -65,6 +65,7 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do [ -z "$1" ] || DEVICE=$1 shift configure_networking + echo $DEVICE > /tmp/autodetectnic for dsrv in $deploysrvs; do (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then echo $dsrv > /run/confirmednic @@ -84,6 +85,7 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do done deploysrvs=$(cat /run/confirmednic) rm /run/confirmednic + fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 98c9129c..3d9affc6 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -67,6 +67,7 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do [ -z "$1" ] || DEVICE=$1 shift configure_networking + echo $DEVICE > /tmp/autodetectnic for dsrv in $deploysrvs; do (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then echo $dsrv > /run/confirmednic From 02791418bc88c82cdaf5bc458adaf8797a897e70 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 2 Oct 2025 15:45:48 -0400 Subject: [PATCH 328/413] Support attribute expansion in filenames For nodemedia, nodelicense, and nodefirmware, support for expressions in filenames was fouled when pass by filehandle was added. Restore this by adding all the files matching an expression. --- confluent_client/bin/nodefirmware | 25 ++++++++++++++++------ confluent_client/bin/nodelicense | 26 +++++++++++++++++------ confluent_client/bin/nodemedia | 35 ++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 22 deletions(-) diff --git a/confluent_client/bin/nodefirmware b/confluent_client/bin/nodefirmware index 98ddc50a..9ac6acdf 100755 --- a/confluent_client/bin/nodefirmware +++ b/confluent_client/bin/nodefirmware @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2016-2017 Lenovo @@ -114,11 +114,24 @@ def update_firmware(session, filename): upargs['bank'] = 'backup' noderrs = {} if session.unixdomain: - of = open(filename, 'rb') - try: - session.add_file(filename, of.fileno(), 'rb') - except Exception: - pass + filesbynode = {} + for exp in session.create('/noderange/{0}/attributes/expression'.format(noderange), + {'expression': filename}): + if 'error' in exp: + sys.stderr.write(exp['error'] + '\n') + exitcode |= exp.get('errorcode', 1) + ex = exp.get('databynode', ()) + for node in ex: + filesbynode[node] = ex[node]['value'] + if not isinstance(filesbynode[node], bytes) and not isinstance(filesbynode[node], str): + filesbynode[node] = filesbynode[node].encode('utf-8') + for node in filesbynode: + endfilename = filesbynode[node] + of = open(endfilename, 'rb') + try: + session.add_file(endfilename, of.fileno(), 'rb') + except Exception: + pass for res in session.create(resource, upargs): if 'created' not in res: for nodename in res.get('databynode', ()): diff --git a/confluent_client/bin/nodelicense b/confluent_client/bin/nodelicense index 6478d011..b13140da 100755 --- a/confluent_client/bin/nodelicense +++ b/confluent_client/bin/nodelicense @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2019 Lenovo @@ -65,16 +65,30 @@ client.check_globbing(noderange) def install_license(session, filename): global exitcode + resource = '/noderange/{0}/configuration/' \ 'management_controller/licenses/'.format(noderange) filename = os.path.abspath(filename) instargs = {'filename': filename} if session.unixdomain: - of = open(filename, 'rb') - try: - session.add_file(filename, of.fileno(), 'rb') - except Exception: - pass + filesbynode = {} + for exp in session.create('/noderange/{0}/attributes/expression'.format(noderange), + {'expression': filename}): + if 'error' in exp: + sys.stderr.write(exp['error'] + '\n') + exitcode |= exp.get('errorcode', 1) + ex = exp.get('databynode', ()) + for node in ex: + filesbynode[node] = ex[node]['value'] + if not isinstance(filesbynode[node], bytes) and not isinstance(filesbynode[node], str): + filesbynode[node] = filesbynode[node].encode('utf-8') + for node in filesbynode: + endfilename = filesbynode[node] + of = open(endfilename, 'rb') + try: + session.add_file(endfilename, of.fileno(), 'rb') + except Exception: + pass for res in session.create(resource, instargs): for node in res.get('databynode', []): if 'error' in res['databynode'][node]: diff --git a/confluent_client/bin/nodemedia b/confluent_client/bin/nodemedia index 74be65e5..b2130e46 100644 --- a/confluent_client/bin/nodemedia +++ b/confluent_client/bin/nodemedia @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2018 Lenovo @@ -101,22 +101,37 @@ def detach_media(noderange, media): def upload_media(noderange, media): global exitcode - if not os.path.exists(media): - sys.stderr.write('Unable to locate requested file {0}\n'.format( - media)) - sys.exit(404) + session = client.Command() output = sq.ScreenPrinter(noderange, session) filename = os.path.abspath(media) resource = '/noderange/{0}/media/uploads/'.format(noderange) + filename = os.path.abspath(filename) upargs = {'filename': filename} noderrs = {} if session.unixdomain: - of = open(filename, 'rb') - try: - session.add_file(filename, of.fileno(), 'rb') - except Exception: - pass + filesbynode = {} + for exp in session.create('/noderange/{0}/attributes/expression'.format(noderange), + {'expression': filename}): + if 'error' in exp: + sys.stderr.write(exp['error'] + '\n') + exitcode |= exp.get('errorcode', 1) + ex = exp.get('databynode', ()) + for node in ex: + filesbynode[node] = ex[node]['value'] + if not isinstance(filesbynode[node], bytes) and not isinstance(filesbynode[node], str): + filesbynode[node] = filesbynode[node].encode('utf-8') + for node in filesbynode: + endfilename = filesbynode[node] + if not os.path.exists(endfilename): + sys.stderr.write('Unable to locate requested file {0}\n'.format( + endfilename)) + sys.exit(404) + of = open(endfilename, 'rb') + try: + session.add_file(endfilename, of.fileno(), 'rb') + except Exception: + pass nodeurls = {} for res in session.create(resource, upargs): if 'created' not in res: From c472d96406f27d80c5ec741e693a5897feb4b632 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 08:24:17 -0400 Subject: [PATCH 329/413] Add '-r' to nodedeploy This allows a shorthand to request a redeploy of whatever the most appropriate profile is. --- confluent_client/bin/nodedeploy | 83 ++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/confluent_client/bin/nodedeploy b/confluent_client/bin/nodedeploy index 1e172fea..7fc4d8be 100755 --- a/confluent_client/bin/nodedeploy +++ b/confluent_client/bin/nodedeploy @@ -48,7 +48,18 @@ def armonce(nr, cli): pass -def setpending(nr, profile, cli): +def setpending(nr, profile, profilebynodes, cli): + if profilebynodes: + for node in sortutil.natural_sort(profilebynodes): + prof = profilebynodes[node] + args = {'deployment.pendingprofile': prof, 'deployment.state': '', 'deployment.state_detail': ''} + if not prof.startswith('genesis-'): + args['deployment.stagedprofile'] = '' + args['deployment.profile'] = '' + for rsp in cli.update('/nodes/{0}/attributes/current'.format(node), + args): + pass + return args = {'deployment.pendingprofile': profile, 'deployment.state': '', 'deployment.state_detail': ''} if not profile.startswith('genesis-'): args['deployment.stagedprofile'] = '' @@ -69,6 +80,7 @@ def main(args): ap.add_argument('-n', '--network', help='Initiate deployment over PXE/HTTP', action='store_true') ap.add_argument('-p', '--prepareonly', help='Prepare only, skip any interaction with a BMC associated with this deployment action', action='store_true') ap.add_argument('-m', '--maxnodes', help='Specifiy a maximum nodes to be deployed') + ap.add_argument('-r', '--redeploy', help='Redeploy nodes with the current or pending profile', action='store_true') ap.add_argument('noderange', help='Set of nodes to deploy') ap.add_argument('profile', nargs='?', help='Profile name to deploy') args, extra = ap.parse_known_args(args) @@ -78,7 +90,7 @@ def main(args): if args.profile and not args.network: sys.stderr.write('-n is a required argument currently to perform an install, optionally with -p\n') return 1 - if not args.profile and args.network: + if not args.profile and args.network and not args.redeploy: sys.stderr.write('Both noderange and a profile name are required arguments to request a network deployment\n') return 1 if args.clear and args.profile: @@ -96,27 +108,38 @@ def main(args): if 'error' in rsp: sys.stderr.write(rsp['error'] + '\n') sys.exit(1) + profilebynode = {} if args.clear: cleararm(args.noderange, c) clearpending(args.noderange, c) - elif args.profile: - profnames = [] - for prof in c.read('/deployment/profiles/'): - profname = prof.get('item', {}).get('href', None) - if profname: - profname = profname.replace('/', '') - profnames.append(profname) - if profname == args.profile: - break - else: - sys.stderr.write('The specified profile "{}" is not an available profile\n'.format(args.profile)) - if profnames: - sys.stderr.write('The following profiles are available:\n') - for profname in profnames: - sys.stderr.write(' ' + profname + '\n') - else: - sys.stderr.write('No deployment profiles available, try osdeploy import or imgutil capture\n') - sys.exit(1) + elif args.redeploy: + hadpending = {} + for rsp in c.read('/noderange/{0}/attributes/current'.format(args.noderange)): + for node in rsp.get('databynode', {}): + nodeinfo = rsp['databynode'][node] + for attr in nodeinfo: + if attr == 'deployment.pendingprofile': + curr = nodeinfo[attr].get('value', '') + if curr: + hadpending[node] = True + profilebynode[node] = curr + if attr == 'deployment.stagedprofile': + curr = nodeinfo[attr].get('value', '') + if curr and node not in hadpending: + profilebynode[node] = curr + if attr == 'deployment.profile': + curr = nodeinfo[attr].get('value', '') + if curr and node not in profilebynode: + profilebynode[node] = curr + for lockinfo in c.read('/noderange/{0}/deployment/lock'.format(args.noderange)): + for node in lockinfo.get('databynode', {}): + lockstate = lockinfo['databynode'][node]['lock']['value'] + if lockstate == 'locked': + lockednodes.append(node) + if args.profile and profilebynode: + sys.stderr.write('The -r/--redeploy option cannot be used with a profile, it redeploys the current or pending profile\n') + return 1 + if args.profile or profilebynode: lockednodes = [] for lockinfo in c.read('/noderange/{0}/deployment/lock'.format(args.noderange)): for node in lockinfo.get('databynode', {}): @@ -127,8 +150,26 @@ def main(args): sys.stderr.write('Requested noderange has nodes with locked deployment: ' + ','.join(lockednodes)) sys.stderr.write('\n') sys.exit(1) + if args.profile: + profnames = [] + for prof in c.read('/deployment/profiles/'): + profname = prof.get('item', {}).get('href', None) + if profname: + profname = profname.replace('/', '') + profnames.append(profname) + if profname == args.profile: + break + else: + sys.stderr.write('The specified profile "{}" is not an available profile\n'.format(args.profile)) + if profnames: + sys.stderr.write('The following profiles are available:\n') + for profname in profnames: + sys.stderr.write(' ' + profname + '\n') + else: + sys.stderr.write('No deployment profiles available, try osdeploy import or imgutil capture\n') + sys.exit(1) armonce(args.noderange, c) - setpending(args.noderange, args.profile, c) + setpending(args.noderange, args.profile, profilebynode, c) else: databynode = {} for r in c.read('/noderange/{0}/attributes/current'.format(args.noderange)): From a1144fd49a84cc31460f20ddc181b348f9a09de2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 10:48:35 -0400 Subject: [PATCH 330/413] Auto-intense color Terminals seem to expect 'bold or intensity' to imply intense color. There are certain terminals that steadfastly refuse to do bold and intense. So implement the logic on behalf of the remote terminal. Commonly, UEFI setup menus request bold white text on white background. This fixes such menus to be readable by explicitly requesting intense white foreground rather than normal background. For example, the kitty terminal has no 'intense on bold feature. --- confluent_client/bin/confetty | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index e14a55ed..9463cddb 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -45,6 +45,7 @@ import math import getpass import optparse import os +import re import select import shlex import signal @@ -969,6 +970,9 @@ def main(): sys.stdout.write('Lost connection to server') quitconfetty(fullexit=True) +sgr_re = re.compile(r'(\x1b\[[0-9;]*m)') +sgr_parameters_re = re.compile(r'\x1b\[([0-9;]*)m') + def consume_termdata(fh, bufferonly=False): global clearpowermessage try: @@ -979,7 +983,27 @@ def consume_termdata(fh, bufferonly=False): updatestatus(data) return '' if data is not None: - data = client.stringify(data) + indata = client.stringify(data) + data = '' + for segment in sgr_re.split(indata): + if sgr_re.match(segment): # it is an sgr, analyze, maybe replace + params = [] + bold = False + for parameters in sgr_parameters_re.findall(segment): + for param in parameters.split(';'): + if param == '1': + bold = True + params.append(param) + for idx, param in enumerate(params): + try: + ival = int(param) + except ValueError: + continue + if bold and (30 <= ival <= 37): + ival += 60 + params[idx] = str(ival) + segment = '\x1b[' + ';'.join(str(p) for p in params) + 'm' + data += segment if clearpowermessage: sys.stdout.write("\x1b[2J\x1b[;H") clearpowermessage = False From 17866d7657cae4de956dfa29718b6996c8b8a926 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 12:22:21 -0400 Subject: [PATCH 331/413] Change to only force intense if bg == fg In the interest of interfering with terminal behavior as little as possible, only apply the forced intensity if the background and foreground color are identical and would make it otherwise literally impossible to read when working as designed. --- confluent_client/bin/confetty | 47 ++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index 9463cddb..c5373f37 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -972,9 +972,13 @@ def main(): sgr_re = re.compile(r'(\x1b\[[0-9;]*m)') sgr_parameters_re = re.compile(r'\x1b\[([0-9;]*)m') +fgcolor = None +bgcolor = None +fgshifted = False def consume_termdata(fh, bufferonly=False): global clearpowermessage + global fgcolor, bgcolor, fgshifted try: data = tlvdata.recv(fh) except Exception: @@ -985,23 +989,42 @@ def consume_termdata(fh, bufferonly=False): if data is not None: indata = client.stringify(data) data = '' + # this logic currently doesn't manage to catch SGRs that span multiple payloads for segment in sgr_re.split(indata): if sgr_re.match(segment): # it is an sgr, analyze, maybe replace params = [] - bold = False for parameters in sgr_parameters_re.findall(segment): for param in parameters.split(';'): - if param == '1': - bold = True - params.append(param) - for idx, param in enumerate(params): - try: - ival = int(param) - except ValueError: - continue - if bold and (30 <= ival <= 37): - ival += 60 - params[idx] = str(ival) + params.append(param) + if param == '0': + fgcolor = None + bgcolor = None + try: + ival = int(param) + except ValueError: + continue + if 40 <= ival <= 47 or 100 <= ival <= 107: + bgcolor = ival + if 30 <= ival <= 37 or 90 <= ival <= 97: + fgcolor = ival + if bgcolor is not None: + fgindicated = False + for idx, param in enumerate(params): + try: + ival = int(param) + except ValueError: + continue + if 30 <= ival <= 37 and (bgcolor % 10 == ival % 10): + fgindicated = True + fgshifted = True + ival += 60 + params[idx] = str(ival) + if not fgindicated and fgcolor is not None: + if bgcolor and (bgcolor % 10) == (fgcolor % 10): + fgshifted = True + params.append(str((fgcolor % 10) + 90)) + elif fgshifted: + params.append(str(fgcolor)) segment = '\x1b[' + ';'.join(str(p) for p in params) + 'm' data += segment if clearpowermessage: From 3f5d96788e6ef7895b8830262d074b1129163a81 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 13:12:43 -0400 Subject: [PATCH 332/413] Fix handling of split SGR payloads Surprisingly frequently, the firmware stacks split right after the \x1b byte in sending data down. Defer a dangling partial sequence until more data comes in that should make it complete. --- confluent_client/bin/confetty | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index c5373f37..a792fcea 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -975,10 +975,10 @@ sgr_parameters_re = re.compile(r'\x1b\[([0-9;]*)m') fgcolor = None bgcolor = None fgshifted = False - +pendseq = '' def consume_termdata(fh, bufferonly=False): global clearpowermessage - global fgcolor, bgcolor, fgshifted + global fgcolor, bgcolor, fgshifted, pendseq try: data = tlvdata.recv(fh) except Exception: @@ -987,9 +987,9 @@ def consume_termdata(fh, bufferonly=False): updatestatus(data) return '' if data is not None: - indata = client.stringify(data) + indata = pendseq + client.stringify(data) + pendseq = '' data = '' - # this logic currently doesn't manage to catch SGRs that span multiple payloads for segment in sgr_re.split(indata): if sgr_re.match(segment): # it is an sgr, analyze, maybe replace params = [] @@ -1027,6 +1027,19 @@ def consume_termdata(fh, bufferonly=False): params.append(str(fgcolor)) segment = '\x1b[' + ';'.join(str(p) for p in params) + 'm' data += segment + # defer any partial ansi escape sequence for a later pass + escidx = segment.rfind('\x1b[') + if escidx >= 0: + for chr in segment[escidx + 1:]: + if 0x40 <= ord(chr) <= 0x7e: + break + else: + # incomplete escape sequence, don't print it yet + data = data[:-len(segment) + escidx] + pendseq = segment[escidx:] + if not pendseq and segment and segment[-1] == '\x1b': + data = data[:-1] + pendseq = '\x1b' if clearpowermessage: sys.stdout.write("\x1b[2J\x1b[;H") clearpowermessage = False From f34395648e8e56c38f06a29d4bc895ba0ef51bea Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 17:28:55 -0400 Subject: [PATCH 333/413] Add vlan_id management to redfish and ipmi plugins --- confluent_client/bin/nodeconfig | 10 ++++++++-- confluent_server/confluent/messages.py | 10 +++++++--- .../confluent/plugins/hardwaremanagement/ipmi.py | 4 +++- .../confluent/plugins/hardwaremanagement/redfish.py | 6 ++++-- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/confluent_client/bin/nodeconfig b/confluent_client/bin/nodeconfig index 4d3d17f3..1033e72e 100755 --- a/confluent_client/bin/nodeconfig +++ b/confluent_client/bin/nodeconfig @@ -1,7 +1,7 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2017 Lenovo +# Copyright 2025 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -96,6 +96,12 @@ cfgpaths = { 'bmc.static_ipv6_gateway': ( 'configuration/management_controller/net_interfaces/management', 'static_v6_gateway'), + 'bmc.vlan_id': ( + 'configuration/management_controller/net_interfaces/management', + 'vlan_id'), + 'bmc.mac_address': ( + 'configuration/management_controller/net_interfaces/management', + 'hw_addr'), 'bmc.hostname': ( 'configuration/management_controller/hostname', 'hostname'), } diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 04ca43f7..1df368ff 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1148,6 +1148,9 @@ class InputNetworkConfiguration(ConfluentInputMessage): if 'ipv4_gateway' not in inputdata: inputdata['ipv4_gateway'] = None + if 'vlan_id' not in inputdata: + inputdata['vlan_id'] = None + if 'ipv4_configuration' in inputdata and inputdata['ipv4_configuration']: if inputdata['ipv4_configuration'].lower() not in ['dhcp','static']: raise exc.InvalidArgumentException( @@ -1736,8 +1739,8 @@ class NetworkConfiguration(ConfluentMessage): desc = 'Network configuration' def __init__(self, name=None, ipv4addr=None, ipv4gateway=None, - ipv4cfgmethod=None, hwaddr=None, staticv6addrs=(), staticv6gateway=None): - self.myargs = (name, ipv4addr, ipv4gateway, ipv4cfgmethod, hwaddr) + ipv4cfgmethod=None, hwaddr=None, staticv6addrs=(), staticv6gateway=None, vlan_id=None): + self.myargs = (name, ipv4addr, ipv4gateway, ipv4cfgmethod, hwaddr, staticv6addrs, staticv6gateway, vlan_id) self.notnode = name is None self.stripped = False v6addrs = ','.join(staticv6addrs) @@ -1748,7 +1751,8 @@ class NetworkConfiguration(ConfluentMessage): 'ipv4_configuration': {'value': ipv4cfgmethod}, 'hw_addr': {'value': hwaddr}, 'static_v6_addresses': {'value': v6addrs}, - 'static_v6_gateway': {'value': staticv6gateway} + 'static_v6_gateway': {'value': staticv6gateway}, + 'vlan_id': {'value': vlan_id} } if self.notnode: self.kvpairs = kvpairs diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 38119dd6..76fca095 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -773,6 +773,7 @@ class IpmiHandler(object): hwaddr=lancfg['mac_address'], staticv6addrs=v6cfg.get('static_addrs', ''), staticv6gateway=v6cfg.get('static_gateway', ''), + vlan_id=lancfg.get('vlan_id', None) )) elif self.op == 'update': config = self.inputdata.netconfig(self.node) @@ -780,7 +781,8 @@ class IpmiHandler(object): self.ipmicmd.set_net_configuration( ipv4_address=config['ipv4_address'], ipv4_configuration=config['ipv4_configuration'], - ipv4_gateway=config['ipv4_gateway']) + ipv4_gateway=config['ipv4_gateway'], + vlan_id=config.get('vlan_id', None)) v6addrs = config.get('static_v6_addresses', None) if v6addrs is not None: v6addrs = v6addrs.split(',') diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index bb709b40..0334696f 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -632,7 +632,8 @@ class IpmiHandler(object): ipv4cfgmethod=lancfg['ipv4_configuration'], hwaddr=lancfg['mac_address'], staticv6addrs=v6cfg['static_addrs'], - staticv6gateway=v6cfg['static_gateway'] + staticv6gateway=v6cfg['static_gateway'], + vlan_id=lancfg.get('vlan_id', None) )) elif self.op == 'update': config = self.inputdata.netconfig(self.node) @@ -640,7 +641,8 @@ class IpmiHandler(object): self.ipmicmd.set_net_configuration( ipv4_address=config['ipv4_address'], ipv4_configuration=config['ipv4_configuration'], - ipv4_gateway=config['ipv4_gateway']) + ipv4_gateway=config['ipv4_gateway'], + vlan_id=config.get('vlan_id', None)) v6addrs = config.get('static_v6_addresses', None) if v6addrs is not None: v6addrs = v6addrs.split(',') From be2ae57a386f497dd93a6720c87609679fa123e1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 6 Oct 2025 17:34:17 -0400 Subject: [PATCH 334/413] Skip the '[' when evaluating partial ESC for terminator --- confluent_client/bin/confetty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index a792fcea..1a75e1cd 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -1030,7 +1030,7 @@ def consume_termdata(fh, bufferonly=False): # defer any partial ansi escape sequence for a later pass escidx = segment.rfind('\x1b[') if escidx >= 0: - for chr in segment[escidx + 1:]: + for chr in segment[escidx + 2:]: if 0x40 <= ord(chr) <= 0x7e: break else: From 179ad4e1965576094007fe68b2ef2fbc1d582301 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Oct 2025 09:39:17 -0400 Subject: [PATCH 335/413] Fix IPv6 redfish config fetch for select targets Not all targets offer up static gateway --- .../confluent/plugins/hardwaremanagement/redfish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 0334696f..96471fc2 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -632,7 +632,7 @@ class IpmiHandler(object): ipv4cfgmethod=lancfg['ipv4_configuration'], hwaddr=lancfg['mac_address'], staticv6addrs=v6cfg['static_addrs'], - staticv6gateway=v6cfg['static_gateway'], + staticv6gateway=v6cfg.get('static_gateway', None), vlan_id=lancfg.get('vlan_id', None) )) elif self.op == 'update': From 7cdfcd49136a0888ea5a28d84e48a8564c25da32 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Oct 2025 09:42:17 -0400 Subject: [PATCH 336/413] Implement support for multi-manager XCC managed systems --- .../confluent/discovery/handlers/redfishbmc.py | 17 +++++++++++------ .../confluent/discovery/handlers/xcc3.py | 3 +++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/redfishbmc.py b/confluent_server/confluent/discovery/handlers/redfishbmc.py index b8764361..df94f897 100644 --- a/confluent_server/confluent/discovery/handlers/redfishbmc.py +++ b/confluent_server/confluent/discovery/handlers/redfishbmc.py @@ -68,14 +68,19 @@ class NodeHandler(generic.NodeHandler): self._srvroot = srvroot return self._srvroot + def get_manager_url(self, wc): + mgrs = self.srvroot(wc).get('Managers', {}).get('@odata.id', None) + if not mgrs: + raise Exception("No Managers resource on BMC") + rsp = wc.grab_json_response(mgrs) + if len(rsp.get('Members', [])) != 1: + raise Exception("Can not handle multiple Managers") + mgrurl = rsp['Members'][0]['@odata.id'] + return mgrurl + def mgrinfo(self, wc): if not self._mgrinfo: - mgrs = self.srvroot(wc)['Managers']['@odata.id'] - rsp = wc.grab_json_response(mgrs) - if len(rsp['Members']) != 1: - raise Exception("Can not handle multiple Managers") - mgrurl = rsp['Members'][0]['@odata.id'] - self._mgrinfo = wc.grab_json_response(mgrurl) + self._mgrinfo = wc.grab_json_response(self.get_manager_url(wc)) return self._mgrinfo diff --git a/confluent_server/confluent/discovery/handlers/xcc3.py b/confluent_server/confluent/discovery/handlers/xcc3.py index 050186e9..de2294a0 100644 --- a/confluent_server/confluent/discovery/handlers/xcc3.py +++ b/confluent_server/confluent/discovery/handlers/xcc3.py @@ -29,6 +29,9 @@ class NodeHandler(redfishbmc.NodeHandler): def get_firmware_default_account_info(self): return ('USERID', 'PASSW0RD') + def get_manager_url(self, wc): + return '/redfish/v1/Managers/1' + def scan(self): ip, port = self.get_web_port_and_ip() c = webclient.SecureHTTPConnection(ip, port, From 50243b67d579e7fcc4d7fb83fb8eb4f7caf02d43 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Oct 2025 14:20:44 -0400 Subject: [PATCH 337/413] Add a more helpful error when bmc not set When doing a configbmc, need to present a more reasonable message about lack of address. --- confluent_server/confluent/selfservice.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index 6df8ff17..c0b185af 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -261,6 +261,10 @@ def handle_request(env, start_response): res['bmcvlan'] = vlan bmcaddr = hmattr.get('hardwaremanagement.manager', {}).get('value', None) + if not bmcaddr: + start_response('500 Internal Server Error', []) + yield 'Missing value in hardwaremanagement.manager' + return bmcaddr = bmcaddr.split('/', 1)[0] bmcaddr = socket.getaddrinfo(bmcaddr, 0)[0] bmcaddr = bmcaddr[-1][0] From b6188683b8ce44fe703f6930e4f61977ea63f33a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Oct 2025 10:22:15 -0400 Subject: [PATCH 338/413] Redirect 'xcc' to 'xcc3' for bmcconfig -c --- confluent_server/confluent/discovery/handlers/xcc.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 23a6c437..a0846187 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -15,6 +15,7 @@ import base64 import codecs import confluent.discovery.handlers.imm as immhandler +import confluent.discovery.handlers.xcc3 as xcc3handler import confluent.exceptions as exc import confluent.netutil as netutil import confluent.util as util @@ -715,6 +716,13 @@ def remote_nodecfg(nodename, cfm): raise Exception('Cannot remote configure a system without known ' 'address') info = {'addresses': [ipaddr]} - nh = NodeHandler(info, cfm) + ipaddr = ipaddr[0] + wc = webclient.SecureHTTPConnection( + ipaddr, 443, verifycallback=lambda x: True) + rsp = wc.grab_json_response('/DeviceDescription.json') + if isinstance(rsp, list): + nh = NodeHandler(info, cfm) + else: + nh = xcc3handler.NodeHandler(info, cfm) nh.config(nodename) From a73dced80bb3a6ddfffd4d8bc0c2e3bf566978ae Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Oct 2025 13:19:28 -0400 Subject: [PATCH 339/413] Fix configbmc on XCC3 IPv4 based configbmc is now fixed for multi-nic XCC3 configurations. --- confluent_server/confluent/discovery/handlers/redfishbmc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/redfishbmc.py b/confluent_server/confluent/discovery/handlers/redfishbmc.py index df94f897..58b53054 100644 --- a/confluent_server/confluent/discovery/handlers/redfishbmc.py +++ b/confluent_server/confluent/discovery/handlers/redfishbmc.py @@ -286,7 +286,7 @@ class NodeHandler(generic.NodeHandler): compip = compip.split('%')[0] ipkey = 'IPv6Addresses' else: - ipkey = 'IPv6Addresses' + ipkey = 'IPv4Addresses' actualnic = None for curractnic in actualnics: currnicinfo = wc.grab_json_response(curractnic) From c5896c056e7f03dd96b0c0640dc239579e5ce3df Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Oct 2025 14:30:27 -0400 Subject: [PATCH 340/413] Add facility to manage BMC CA certs For redfish at least, we can manage some BMC CA certificate trust stores. --- confluent_client/bin/nodecertutil | 89 +++++++++++++++++++ confluent_server/confluent/core.py | 4 + confluent_server/confluent/messages.py | 17 ++++ .../plugins/hardwaremanagement/redfish.py | 24 +++++ 4 files changed, 134 insertions(+) create mode 100644 confluent_client/bin/nodecertutil diff --git a/confluent_client/bin/nodecertutil b/confluent_client/bin/nodecertutil new file mode 100644 index 00000000..3b220b3d --- /dev/null +++ b/confluent_client/bin/nodecertutil @@ -0,0 +1,89 @@ +#!/usr/bin/python3 + +import os +import sys +from cryptography import x509 +from cryptography.hazmat.primitives import hashes + +path = os.path.dirname(os.path.realpath(__file__)) +path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) +if path.startswith('/opt'): + sys.path.append(path) +import confluent.client as client + +def removebmccacert(noderange, certid, cmd): + for res in cmd.delete(f'/noderange/{noderange}/configuration/management_controller/certificate_authorities/{certid}'): + print(repr(res)) + +def listbmccacerts(noderange, cmd): + certids = [] + for res in cmd.read(f'/noderange/{noderange}/configuration/management_controller/certificate_authorities'): + certids.append(res.get('item', {}).get('href', '')) + for certid in certids: + for res in cmd.read(f'/noderange/{noderange}/configuration/management_controller/certificate_authorities/{certid}'): + for node in res.get('databynode', {}): + certdata = res['databynode'][node].get('pem', {}).get('value', '') + summary = '' + if not certdata: + continue + san = res['databynode'][node].get('san', {}).get('value', '') + if san: + summary += f" SANs: {san}" + subject = res['databynode'][node].get('subject', {}).get('value', '') + if subject: + summary = subject + try: + cert = x509.load_pem_x509_certificate(certdata.encode()) + sha256 = cert.fingerprint(hashes.SHA256()).hex().upper() + except Exception as e: + print(f"Error processing certificate for {node}: {e}", file=sys.stderr) + continue + summary += f" (SHA256={sha256})" + print(f"{node}: {certid}: {summary}") + +def installbmccacert(noderange, certfile, cmd): + if certfile: + try: + with open(certfile, 'r') as f: + certdata = f.read() + except Exception as e: + print(f"Error reading certificate file: {e}", file=sys.stderr) + sys.exit(1) + + # Simple validation: check if it starts and ends with the correct PEM markers + if not (certdata.startswith("-----BEGIN CERTIFICATE-----") and certdata.strip().endswith("-----END CERTIFICATE-----")): + print("Invalid certificate format. Must be a PEM encoded certificate.", file=sys.stderr) + sys.exit(1) + payload = {'pem': certdata} + for res in cmd.update(f'/noderange/{noderange}/configuration/management_controller/certificate_authorities', payload): + print(repr(res)) + + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='Node certificate utility') + parser.add_argument('noderange', help='Node range to operate on') + subparsers = parser.add_subparsers(dest='command', help='Available commands') + + # installbmccacert subcommand + install_parser = subparsers.add_parser('installbmccacert', help='Install BMC CA certificate') + install_parser.add_argument('filename', help='Certificate file to install') + + remove_parser = subparsers.add_parser('removebmccacert', help='Remove BMC CA certificate') + remove_parser.add_argument('id', help='Certificate id to remove') + + list_parser = subparsers.add_parser('listbmccacerts', help='List BMC CA certificates') + + args = parser.parse_args() + c = client.Command() + if args.command == 'installbmccacert': + installbmccacert(args.noderange, args.filename, c) + elif args.command == 'removebmccacert': + removebmccacert(args.noderange, args.id, c) + elif args.command == 'listbmccacerts': + listbmccacerts(args.noderange, c) + else: + parser.print_help() + sys.exit(1) \ No newline at end of file diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 2375078e..78820497 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -300,6 +300,10 @@ def _init_core(): 'default': 'ipmi', }), }, + 'certificate_authorities': PluginCollection({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), 'clear': PluginRoute({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 1df368ff..50a0242e 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -517,6 +517,8 @@ def get_input_message(path, operation, inputdata, nodes=None, multinode=False, path[:4] == ['configuration', 'management_controller', 'alerts', 'destinations'] and operation != 'retrieve'): return InputAlertDestination(path, nodes, inputdata, multinode) + elif len(path) == 3 and path[:3] == ['configuration', 'management_controller', 'certificate_authorities'] and operation not in ('retrieve', 'delete'): + return InputCertificateAuthority(path, nodes, inputdata) elif path == ['identify'] and operation != 'retrieve': return InputIdentifyMessage(path, nodes, inputdata) elif path == ['events', 'hardware', 'decode']: @@ -955,6 +957,16 @@ class ConfluentInputMessage(ConfluentMessage): return key in self.valid_values +class InputCertificateAuthority(ConfluentInputMessage): + keyname = 'pem' + # anything is valid, since it is a blob of text + + def get_pem(self, node): + return self.inputbynode[node] + + def is_valid_key(self, key): + return key.strip().startswith('-----BEGIN') and '-----END' in key + class InputIdentImage(ConfluentInputMessage): keyname = 'ident_image' valid_values = ['create'] @@ -1345,6 +1357,11 @@ class ReseatResult(ConfluentChoiceMessage): keyname = 'reseat' +class CertificateAuthority(ConfluentMessage): + def __init__(self, node, pem, subject, san): + self.myargs = (node, pem, subject, san) + self.kvpairs = {node: {'pem': {'value': pem}, 'subject': {'value': subject}, 'san': {'value': san}}} + class PowerState(ConfluentChoiceMessage): valid_values = set([ 'on', diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 96471fc2..2158e629 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -526,6 +526,8 @@ class IpmiHandler(object): def handle_configuration(self): if self.element[1:3] == ['management_controller', 'alerts']: return self.handle_alerts() + elif self.element[1:3] == ['management_controller', 'certificate_authorities']: + return self.handle_cert_authorities() elif self.element[1:3] == ['management_controller', 'users']: return self.handle_users() elif self.element[1:3] == ['management_controller', 'net_interfaces']: @@ -576,6 +578,28 @@ class IpmiHandler(object): self.pyghmi_event_to_confluent(event) self.output.put(msg.EventCollection((event,), name=self.node)) + def handle_cert_authorities(self): + if len(self.element) == 3: + if self.op == 'read': + for cert in self.ipmicmd.get_trusted_cas(): + self.output.put(msg.ChildCollection(cert['id'])) + elif self.op == 'update': + cert = self.inputdata.get_pem(self.node) + self.ipmicmd.add_trusted_ca(cert) + elif len(self.element) == 4: + certid = self.element[-1] + if self.op == 'read': + for certdata in self.ipmicmd.get_trusted_cas(): + if certdata['id'] == certid: + self.output.put(msg.CertificateAuthority( + pem=certdata['pem'], + node=self.node, + subject=certdata['subject'], + san=certdata.get('san', None))) + elif self.op == 'delete': + self.ipmicmd.del_trusted_ca(certid) + return + def handle_alerts(self): if self.element[3] == 'destinations': if len(self.element) == 4: From a43d7e11e2401f3a21e191d72a2f74e44202b358 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 15 Oct 2025 10:43:36 -0400 Subject: [PATCH 341/413] Implement an esxi getinstalldisk --- .../esxi7/profiles/hypervisor/kickstart | 3 +- .../hypervisor/scripts/getinstalldisk | 129 ++++++++++++++++++ .../profiles/hypervisor/scripts/modinstall | 2 + 3 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/kickstart b/confluent_osdeploy/esxi7/profiles/hypervisor/kickstart index 99c2934b..a5bed980 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/kickstart +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/kickstart @@ -1,6 +1,5 @@ accepteula -clearpart --firstdisk --overwritevmfs -install --firstdisk --overwritevmfs +%include /tmp/storagecfg %include /tmp/ksnet %include /tmp/rootpw reboot diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk new file mode 100644 index 00000000..6e3c710a --- /dev/null +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk @@ -0,0 +1,129 @@ +#!/usr/bin/python3 +import subprocess +import os + +class SilentException(Exception): + pass + +class DiskInfo(object): + def __init__(self, devname, devinfo): + self.name = devname + self.path = '/dev/' + devname + self.wwn = None + self.model = devinfo.get('model', 'Unknown') + self.driver = devinfo.get('adapter_driver', 'Unknown') + self.size = devinfo.get('size', 0) # in MiB + if not devinfo.get('is_local', False): + raise SilentException("Not local") + if devinfo.get('is_removable', False): + raise SilentException("Removable") + if devinfo.get('is_usb', False): + raise SilentException("USB device") + if devinfo.get('type', '').lower() in ('cd-rom',): + raise SilentException("CD-ROM device") + if self.size < 2048: + raise SilentException("Too small") + + + + + @property + def priority(self): + if self.model.lower() in ('m.2 nvme 2-bay raid kit', 'thinksystem_m.2_vd', 'thinksystem m.2', 'thinksystem_m.2'): + return 0 + if self.driver == 'vmw_ahci': + return 2 + if self.driver == 'nvme_pcie': + return 3 + return 99 + + def __repr__(self): + return repr({ + 'name': self.name, + 'path': self.path, + 'wwn': self.wwn, + 'driver': self.driver, + 'size': self.size, + 'model': self.model, + }) + +def list_disks(): + current_dev = None + disks = {} + devlist = subprocess.check_output(['localcli', 'storage', 'core', 'device', 'list']) + if not isinstance(devlist, str): + devlist = devlist.decode('utf8') + devbyadp = {} + for line in devlist.split('\n'): + if not line.strip(): + continue + if not line.startswith(' '): + current_dev = line.rsplit(':', 1)[0] + if current_dev not in disks: + disks[current_dev] = {} + elif current_dev: + if ' Model:' in line: + disks[current_dev]['model'] = ' '.join(line.split()[1:]) + elif ' Driver:' in line: + disks[current_dev]['driver'] = ' '.join(line.split()[1:]) + elif ' Is Local:' in line: + disks[current_dev]['is_local'] = ' '.join(line.split()[2:]).lower() == 'true' + elif ' Is Removable:' in line: + disks[current_dev]['is_removable'] = ' '.join(line.split()[2:]).lower() == 'true' + elif ' Size:' in line: # in MiB + disks[current_dev]['size'] = int(line.split()[1]) + elif ' Is SSD:' in line: + disks[current_dev]['is_ssd'] = ' '.join(line.split()[2:]).lower() == 'true' + elif ' Is USB:' in line: + disks[current_dev]['is_usb'] = ' '.join(line.split()[2:]).lower() == 'true' + elif ' Is Removable:' in line: + disks[current_dev]['is_removable'] = ' '.join(line.split()[2:]).lower() == 'true' + elif 'Device Type:' in line: + disks[current_dev]['type'] = ' '.join(line.split()[2:]) + for dev in disks: + pathlist = subprocess.check_output(['localcli', 'storage', 'core', 'path', 'list', '--device', dev]) + if not isinstance(pathlist, str): + pathlist = pathlist.decode('utf8') + for line in pathlist.split('\n'): + if not line.strip(): + continue + if not line.startswith(' '): + continue + if ' Adapter Identifier:' in line: + adpname = ' '.join(line.split()[2:]) + disks[dev]['adapter_id'] = adpname + elif ' Adapter:' in line: + adp = ' '.join(line.split()[1:]) + disks[dev]['adapter'] = adp + devbyadp.setdefault(adp, []).append(dev) + for adp in devbyadp: + adaplist = subprocess.check_output(['localcli', 'storage', 'core', 'adapter', 'listdetailed', '--adapter', adp]) + if not isinstance(adaplist, str): + adaplist = adaplist.decode('utf8') + for line in adaplist.split('\n'): + if not line.strip(): + continue + if 'Driver Name:' in line: + driver = ' '.join(line.split()[2:]) + for dev in devbyadp[adp]: + disks[dev]['adapter_driver'] = driver + return disks + +def main(): + disks = [] + alldisks = list_disks() + for disk in alldisks: + try: + disks.append(DiskInfo(disk, alldisks[disk])) + except SilentException: + pass + except Exception as e: + print("Skipping {0}: {1}".format(disk, str(e))) + nd = [x.name for x in sorted(disks, key=lambda x: [x.priority, x.size])] + if nd: + with open('/tmp/storagecfg', 'w') as sc: + sc.write(f'clearpart --all --drives={nd[0]} --overwritevmfs\n') + sc.write(f'install --drive={nd[0]} --overwritevmfs\n') + +if __name__ == '__main__': + main() diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall index ed10f3f2..1907ef30 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall @@ -1,9 +1,11 @@ #!/bin/sh mv /etc/confluent/confluent.deploycfg /etc/confluent/confluent.newdeploycfg /opt/confluent/bin/apiclient /confluent-public/os/$profile/scripts/makeksnet >> /tmp/makeksnet +/opt/confluent/bin/apiclient /confluent-public/os/$profile/scripts/getinstalldisk >> /tmp/getinstalldisk mv /etc/confluent/confluent.newdeploycfg /etc/confluent/confluent.deploycfg chmod +x /tmp/makeksnet /tmp/makeksnet > /tmp/ksnet +python3 /tmp/getinstalldisk rootpw=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg|sed -e 's/^rootpassword: //') echo rootpw --iscrypted $rootpw > /tmp/rootpw export BOOT_CMDLINE=ks=/etc/confluent/ks.cfg From 4982ac1a17e7dadd3c4a997b3429d8228c7aab2a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 15 Oct 2025 16:51:21 -0400 Subject: [PATCH 342/413] Bump genesis version --- genesis/confluent-genesis.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/confluent-genesis.spec b/genesis/confluent-genesis.spec index 32a8eed8..27880206 100644 --- a/genesis/confluent-genesis.spec +++ b/genesis/confluent-genesis.spec @@ -1,5 +1,5 @@ %define arch x86_64 -Version: 3.14.1 +Version: 3.14.2 Release: 1 Name: confluent-genesis-%{arch} BuildArch: noarch From b22c17208a212771ed654663911b88f8b7dd2f6c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 16 Oct 2025 18:30:46 -0400 Subject: [PATCH 343/413] Stop preferring HWE for now The HWE has some missing hardware support, ironically... --- .../ubuntu22.04/profiles/default/initprofile.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh index 3d6127b8..50c87545 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh @@ -1,16 +1,16 @@ #!/bin/bash set -e sed -i 's/label: ubuntu/label: Ubuntu/' $2/profile.yaml -if [ -e $1/casper/hwe-vmlinuz ]; then - ln -s $1/casper/hwe-vmlinuz $2/boot/kernel -else +#if [ -e $1/casper/hwe-vmlinuz ]; then +# ln -s $1/casper/hwe-vmlinuz $2/boot/kernel +#else ln -s $1/casper/vmlinuz $2/boot/kernel -fi -if [ -e $1/casper/hwe-initrd ]; then - ln -s $1/casper/hwe-initrd $2/boot/initramfs/distribution -else +#fi +#if [ -e $1/casper/hwe-initrd ]; then +# ln -s $1/casper/hwe-initrd $2/boot/initramfs/distribution +#else ln -s $1/casper/initrd $2/boot/initramfs/distribution -fi +#fi mkdir -p $2/boot/efi/boot if [ -d $1/EFI/boot/ ]; then ln -s $1/EFI/boot/* $2/boot/efi/boot From f9351484a4b12099689ded018766e926e98e3e00 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 17 Oct 2025 09:32:33 -0400 Subject: [PATCH 344/413] Add fallback if getinstalldisk detects no preferred disks --- .../esxi7/profiles/hypervisor/scripts/getinstalldisk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk index 6e3c710a..c66b7103 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk @@ -124,6 +124,11 @@ def main(): with open('/tmp/storagecfg', 'w') as sc: sc.write(f'clearpart --all --drives={nd[0]} --overwritevmfs\n') sc.write(f'install --drive={nd[0]} --overwritevmfs\n') + else: + with open('/tmp/storagecfg', 'w') as sc: + sc.write(f'clearpart --firstdisk --overwritevmfs\n') + sc.write(f'install --firstdisk --overwritevmfs\n') + if __name__ == '__main__': main() From 11ff2dabfcf9cf487a37db71a25958dd625bd89c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 17 Oct 2025 10:00:38 -0400 Subject: [PATCH 345/413] Clean up kickstart networking Try to apply hostname through localcli, since hostname is unsupported through net if dhcp. Also more affirimatively indicate dhcp. --- .../esxi7/profiles/hypervisor/scripts/makeksnet | 5 ++++- .../esxi7/profiles/hypervisor/scripts/modinstall | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/makeksnet b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/makeksnet index eeb72ef7..7dcf966a 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/makeksnet +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/makeksnet @@ -45,7 +45,10 @@ try: cfg['ipv4_gateway'] = ncfg['ipv4_gateway'] except Exception: pass -netline = 'network --hostname={0} --bootproto={1}'.format(nodename, cfg['ipv4_method']) +if cfg['ipv4_method'] == 'static': + netline = 'network --hostname={0} --bootproto={1}'.format(nodename, cfg['ipv4_method']) +else: + netline = 'network --bootproto=dhcp' if vmnic: netline += ' --device={0}'.format(vmnic) if cfg['ipv4_method'] == 'static': diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall index 1907ef30..4dc880f8 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/modinstall @@ -5,6 +5,7 @@ mv /etc/confluent/confluent.deploycfg /etc/confluent/confluent.newdeploycfg mv /etc/confluent/confluent.newdeploycfg /etc/confluent/confluent.deploycfg chmod +x /tmp/makeksnet /tmp/makeksnet > /tmp/ksnet +localcli system hostname set --host $node python3 /tmp/getinstalldisk rootpw=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg|sed -e 's/^rootpassword: //') echo rootpw --iscrypted $rootpw > /tmp/rootpw From 36687069aae86807bc25df2184e6bfad4dc02f11 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 21 Oct 2025 11:11:52 -0400 Subject: [PATCH 346/413] Fix ESXi8 deployment The changes for getinstalldisk assumed functionality in ESXi9. Target older functional level for our purposes. Also expand the fallback to cover cases where the disk interrogation fails. --- .../hypervisor/scripts/getinstalldisk | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk index c66b7103..3c780361 100644 --- a/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk +++ b/confluent_osdeploy/esxi7/profiles/hypervisor/scripts/getinstalldisk @@ -96,22 +96,36 @@ def list_disks(): adp = ' '.join(line.split()[1:]) disks[dev]['adapter'] = adp devbyadp.setdefault(adp, []).append(dev) + adapterlist = subprocess.check_output(['localcli', 'storage', 'core', 'adapter', 'list']) + if not isinstance(adapterlist, str): + adapterlist = adapterlist.decode('utf8') + driverbyadp = {} + linenum = 0 + for line in adapterlist.split('\n'): + linenum += 1 + if not line.strip(): + continue + if linenum < 3: + continue + parts = line.split() + if len(parts) < 2: + continue + adp = parts[0] + driver = parts[1] + driverbyadp[adp] = driver for adp in devbyadp: - adaplist = subprocess.check_output(['localcli', 'storage', 'core', 'adapter', 'listdetailed', '--adapter', adp]) - if not isinstance(adaplist, str): - adaplist = adaplist.decode('utf8') - for line in adaplist.split('\n'): - if not line.strip(): - continue - if 'Driver Name:' in line: - driver = ' '.join(line.split()[2:]) - for dev in devbyadp[adp]: - disks[dev]['adapter_driver'] = driver + driver = driverbyadp.get(adp, 'Unknown') + for dev in devbyadp[adp]: + disks[dev]['adapter_driver'] = driver return disks def main(): disks = [] - alldisks = list_disks() + try: + alldisks = list_disks() + except Exception as e: + print("Error listing disks: {0}".format(str(e))) + alldisks = {} for disk in alldisks: try: disks.append(DiskInfo(disk, alldisks[disk])) From 762adb882a486e6b9687cf46d418d1a930f9b4a7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 21 Oct 2025 13:04:30 -0400 Subject: [PATCH 347/413] Track client address on checkin When doing DHCP deployment in particular, it's good to track what the actual ip was. --- confluent_server/confluent/config/attributes.py | 3 +++ confluent_server/confluent/selfservice.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 5e0e97e7..72518ab2 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -215,6 +215,9 @@ node = { 'Using this requires that collective members be ' 'defined as nodes for noderange expansion') }, + 'deployment.client_ip': { + 'description': ('Client IP used when most recently reporting state.') + }, 'deployment.lock': { 'description': ('Indicates whether deployment actions should be impeded. ' 'If locked, it indicates that a pending profile should not be applied. ' diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index c0b185af..72ffb765 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -466,6 +466,9 @@ def handle_request(env, start_response): statusstr = update.get('state', None) statusdetail = update.get('state_detail', None) didstateupdate = False + if statusstr or 'status' in update: + cfg.set_node_attributes({nodename: { + 'deployment.client_ip': {'value': clientip}}}) if statusstr: cfg.set_node_attributes({nodename: {'deployment.state': statusstr}}) didstateupdate = True From 3125f4171b29f9cf645aaaf6a627e7207698bf93 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 24 Oct 2025 20:02:51 -0400 Subject: [PATCH 348/413] Begin overhaul of TLS cert management Begin expanding certutil to sign other certificates from external CSRs more easily. Have certutil make the CA constraint critical. Have the fingerprint based validator have a mechanism to check for properly signed certificate in lieu of exact match, and update the stored fingerprint on match. Provide a means to request a custom subject when evaluating a target. Change redfish plugin to set that subject in the verifier. --- confluent_server/confluent/certutil.py | 101 +++++++++++------- .../confluent/config/attributes.py | 8 +- .../plugins/hardwaremanagement/redfish.py | 6 +- confluent_server/confluent/util.py | 98 ++++++++++++++++- 4 files changed, 172 insertions(+), 41 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 46ae2f69..06831e8e 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -179,6 +179,16 @@ def assure_tls_ca(): finally: os.seteuid(ouid) +#def is_self_signed(pem): +# cert = ssl.PEM_cert_to_DER_cert(pem) +# return cert.get('subjectAltName', []) == cert.get('issuer', []) +# x509 certificate issuer subject comparison.. +#>>> b.issuer +# +#>>> b.subject +# + + def substitute_cfg(setting, key, val, newval, cfgfile, line): if key.strip() == setting: cfgfile.write(line.replace(val, newval) + '\n') @@ -266,8 +276,9 @@ def create_simple_ca(keyout, certout): finally: os.remove(tmpconfig) -def create_certificate(keyout=None, certout=None, csrout=None): - if not keyout: +def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=None): + tlsmateriallocation = {} + if not certout: tlsmateriallocation = get_certificate_paths() keyout = tlsmateriallocation.get('keys', [None])[0] certout = tlsmateriallocation.get('certs', [None])[0] @@ -276,60 +287,64 @@ def create_certificate(keyout=None, certout=None, csrout=None): if not keyout or not certout: raise Exception('Unable to locate TLS certificate path automatically') assure_tls_ca() - shortname = socket.gethostname().split('.')[0] - longname = shortname # socket.getfqdn() - if not csrout: + if not subj: + shortname = socket.gethostname().split('.')[0] + longname = shortname # socket.getfqdn() + subj = '/CN={0}'.format(longname) + elif '/CN=' not in subj: + subj = '/CN={0}'.format(subj) + if not csrfile: subprocess.check_call( ['openssl', 'ecparam', '-name', 'secp384r1', '-genkey', '-out', keyout]) - ipaddrs = list(get_ip_addresses()) - san = ['IP:{0}'.format(x) for x in ipaddrs] - # It is incorrect to put IP addresses as DNS type. However - # there exists non-compliant clients that fail with them as IP - # san.extend(['DNS:{0}'.format(x) for x in ipaddrs]) - dnsnames = set(ipaddrs) - dnsnames.add(shortname) - for currip in ipaddrs: - dnsnames.add(socket.getnameinfo((currip, 0), 0)[0]) - for currname in dnsnames: - san.append('DNS:{0}'.format(currname)) - #san.append('DNS:{0}'.format(longname)) - san = ','.join(san) + if not san: + ipaddrs = list(get_ip_addresses()) + san = ['IP:{0}'.format(x) for x in ipaddrs] + # It is incorrect to put IP addresses as DNS type. However + # there exists non-compliant clients that fail with them as IP + # san.extend(['DNS:{0}'.format(x) for x in ipaddrs]) + dnsnames = set(ipaddrs) + dnsnames.add(shortname) + for currip in ipaddrs: + dnsnames.add(socket.getnameinfo((currip, 0), 0)[0]) + for currname in dnsnames: + san.append('DNS:{0}'.format(currname)) + #san.append('DNS:{0}'.format(longname)) + san = ','.join(san) sslcfg = get_openssl_conf_location() tmphdl, tmpconfig = tempfile.mkstemp() os.close(tmphdl) tmphdl, extconfig = tempfile.mkstemp() os.close(tmphdl) needcsr = False - if csrout is None: + if csrfile is None: needcsr = True - tmphdl, csrout = tempfile.mkstemp() + tmphdl, csrfile = tempfile.mkstemp() os.close(tmphdl) shutil.copy2(sslcfg, tmpconfig) try: + with open(extconfig, 'a') as cfgfile: + cfgfile.write('\nbasicConstraints=critical,CA:false\nsubjectAltName={0}'.format(san)) if needcsr: with open(tmpconfig, 'a') as cfgfile: cfgfile.write('\n[SAN]\nsubjectAltName={0}'.format(san)) - with open(extconfig, 'a') as cfgfile: - cfgfile.write('\nbasicConstraints=CA:false\nsubjectAltName={0}'.format(san)) subprocess.check_call([ - 'openssl', 'req', '-new', '-key', keyout, '-out', csrout, '-subj', - '/CN={0}'.format(longname), - '-extensions', 'SAN', '-config', tmpconfig + 'openssl', 'req', '-new', '-key', keyout, '-out', csrfile, '-subj', + subj, '-extensions', 'SAN', '-config', tmpconfig ]) - else: - # when used manually, allow the csr SAN to stand - # may add explicit subj/SAN argument, in which case we would skip copy - with open(tmpconfig, 'a') as cfgfile: - cfgfile.write('\ncopy_extensions=copy\n') - with open(extconfig, 'a') as cfgfile: - cfgfile.write('\nbasicConstraints=CA:false\n') + #else: + # # when used manually, allow the csr SAN to stand + # # may add explicit subj/SAN argument, in which case we would skip copy + # #with open(tmpconfig, 'a') as cfgfile: + # # cfgfile.write('\ncopy_extensions=copy\n') + # with open(extconfig, 'a') as cfgfile: + # cfgfile.write('\nbasicConstraints=CA:false\n') if os.path.exists('/etc/confluent/tls/cakey.pem'): # simple style CA in effect, make a random serial number and # hope for the best, and accept inability to backdate the cert serialnum = '0x' + ''.join(['{:02x}'.format(x) for x in bytearray(os.urandom(20))]) subprocess.check_call([ - 'openssl', 'x509', '-req', '-in', csrout, + 'openssl', 'x509', '-req', '-in', csrfile, '-CA', '/etc/confluent/tls/cacert.pem', '-CAkey', '/etc/confluent/tls/cakey.pem', '-set_serial', serialnum, '-out', certout, '-days', '27300', @@ -351,9 +366,9 @@ def create_certificate(keyout=None, certout=None, csrout=None): # with realcalock: # if we put it in server, we must lock it subprocess.check_call([ 'openssl', 'ca', '-config', cacfgfile, - '-in', csrout, '-out', certout, '-batch', '-notext', + '-in', csrfile, '-out', certout, '-batch', '-notext', '-startdate', '19700101010101Z', '-enddate', '21000101010101Z', - '-extfile', extconfig + '-extfile', extconfig, '-subj', subj ]) for keycopy in tlsmateriallocation.get('keys', []): if keycopy != keyout: @@ -381,7 +396,7 @@ def create_certificate(keyout=None, certout=None, csrout=None): finally: os.remove(tmpconfig) if needcsr: - os.remove(csrout) + os.remove(csrfile) print(extconfig) # os.remove(extconfig) @@ -389,10 +404,20 @@ if __name__ == '__main__': import sys outdir = os.getcwd() keyout = os.path.join(outdir, 'key.pem') - certout = os.path.join(outdir, sys.argv[2] + 'cert.pem') + certout = os.path.join(outdir, 'cert.pem') csrout = None + subj, san = (None, None) + try: + bindex = sys.argv.index('-b') + bmcnode = sys.argv.pop(bindex + 1) # Remove bmcnode argument + sys.argv.pop(bindex) # Remove -b flag + import confluent.config.configmanager as cfm + c = cfm.ConfigManager(None) + subj, san = util.get_bmc_subject_san(c, bmcnode) + except ValueError: + bindex = None try: csrout = sys.argv[1] except IndexError: csrout = None - create_certificate(keyout, certout, csrout) + create_certificate(keyout, certout, csrout, subj, san) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 72518ab2..26f77732 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015-2019 Lenovo +# Copyright 2015-2025 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -408,6 +408,12 @@ node = { 'include / CIDR suffix to indicate subnet length, which is ' 'autodetected by default where possible.', }, + 'hardwaremanagement.manager_tls_name': { + 'description': 'A name to use in lieu of the value in hardwaremanagement.manager for ' + 'TLS certificate verification purposes. Some strategies involve a non-IP, ' + 'non-resolvable name, or this can be used to access by IP while using name-based ' + 'validation', + }, 'hardwaremanagement.method': { 'description': 'The method used to perform operations such as power ' 'control, get sensor data, get inventory, and so on. ' diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 2158e629..1c7e211a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -184,8 +184,12 @@ class IpmiCommandWrapper(ipmicommand.Command): (node,), ('secret.hardwaremanagementuser', 'collective.manager', 'secret.hardwaremanagementpassword', 'hardwaremanagement.manager'), self._attribschanged) + htn = cfm.get_node_attributes(node, 'hardwaremanagement.manager_tls_name') + subject = htn.get(node, {}).get('hardwaremanagement.manager_tls_name', {}).get('value', None) + if not subject: + subject = kwargs['bmc'] kv = util.TLSCertVerifier(cfm, node, - 'pubkeys.tls_hardwaremanager').verify_cert + 'pubkeys.tls_hardwaremanager', subject).verify_cert kwargs['verifycallback'] = kv try: super(IpmiCommandWrapper, self).__init__(**kwargs) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index b4aaf1e3..4d46ee60 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -19,7 +19,9 @@ import base64 import confluent.exceptions as cexc import confluent.log as log +import glob import hashlib +import ipaddress try: import psutil except ImportError: @@ -31,6 +33,9 @@ import socket import ssl import struct import eventlet.green.subprocess as subprocess +import cryptography.x509 as x509 +import cryptography.x509.verification as verification + def mkdirp(path, mode=0o777): @@ -86,6 +91,49 @@ def list_interface_indexes(): return +def get_bmc_subject_san(configmanager, nodename, addip=None): + bmc_san = [] + subject = '' + ipas = set([]) + if addip: + ipas.add(addip) + dnsnames = set([]) + nodecfg = configmanager.get_node_attributes(nodename, + ('dns.domain', 'hardwaremanagement.manager', 'hardwaremanagement.manager_tls_name')) + bmcaddr = nodecfg.get(nodename, {}).get('hardwaremanagement.manager', {}).get('value', '') + domain = nodecfg.get(nodename, {}).get('dns.domain', {}).get('value', '') + isipv4 = False + if bmcaddr: + bmcaddr = bmcaddr.split('/', 1)[0] + bmcaddr = bmcaddr.split('%', 1)[0] + dnsnames.add(bmcaddr) + subject = bmcaddr + if ':' in bmcaddr: + ipas.add(bmcaddr) + dnsnames.add('{0}.ipv6-literal.net'.format(bmcaddr.replace(':', '-'))) + else: + try: + socket.inet_aton(bmcaddr) + isipv4 = True + ipas.add(bmcaddr) + except socket.error: + pass + if not isipv4: # neither ipv6 nor ipv4, should be a name + if domain and domain not in bmcaddr: + dnsnames.add('{0}.{1}'.format(bmcaddr, domain)) + bmcname = nodecfg.get(nodename, {}).get('hardwaremanagement.manager_tls_name', {}).get('value', '') + if bmcname: + subject = bmcname + dnsnames.add(bmcname) + if domain and domain not in bmcname: + dnsnames.add('{0}.{1}'.format(bmcname, domain)) + for dns in dnsnames: + bmc_san.append('DNS:{0}'.format(dns)) + for ip in ipas: + bmc_san.append('IP:{0}'.format(ip)) + return subject, ','.join(bmc_san) + + def list_ips(): # Used for getting addresses to indicate the multicast address # as well as getting all the broadcast addresses @@ -184,15 +232,51 @@ def cert_matches(fingerprint, certificate): return newfp and fingerprint == newfp +_polbuilder = None + + class TLSCertVerifier(object): - def __init__(self, configmanager, node, fieldname): + def __init__(self, configmanager, node, fieldname, subject=None): self.cfm = configmanager self.node = node self.fieldname = fieldname + self.subject = subject + + def verify_by_ca(self, certificate): + global _polbuilder + _polbuilder = None + if not _polbuilder: + certs = [] + for cert in glob.glob('/var/lib/confluent/public/site/tls/*.pem'): + with open(cert, 'rb') as certfile: + certs.extend(x509.load_pem_x509_certificates(certfile.read())) + if not certs: + return False + castore = verification.Store(certs) + _polbuilder = verification.PolicyBuilder() + eep = verification.ExtensionPolicy.permit_all().require_present( + x509.SubjectAlternativeName, verification.Criticality.AGNOSTIC, None).may_be_present( + x509.KeyUsage, verification.Criticality.AGNOSTIC, None) + cap = verification.ExtensionPolicy.webpki_defaults_ca().require_present( + x509.BasicConstraints, verification.Criticality.AGNOSTIC, None).may_be_present( + x509.KeyUsage, verification.Criticality.AGNOSTIC, None) + _polbuilder = _polbuilder.store(castore).extension_policies( + ee_policy=eep, ca_policy=cap) + try: + addr = ipaddress.ip_address(self.subject) + subject = x509.IPAddress(addr) + except ValueError: + subject = x509.DNSName(self.subject) + cert = x509.load_der_x509_certificate(certificate) + _polbuilder.build_server_verifier(subject).verify(cert, []) + return True + + def verify_cert(self, certificate): storedprint = self.cfm.get_node_attributes(self.node, (self.fieldname,) ) + if (self.fieldname not in storedprint[self.node] or storedprint[self.node][self.fieldname]['value'] == ''): # no stored value, check policy for next action @@ -220,6 +304,18 @@ class TLSCertVerifier(object): certificate): return True fingerprint = get_fingerprint(certificate, 'sha256') + # Mismatches, but try more traditional validation using the site CAs + if self.subject: + try: + if self.verify_by_ca(certificate): + auditlog = log.Logger('audit') + auditlog.log({'node': self.node, 'event': 'certautoupdate', + 'fingerprint': fingerprint}) + self.cfm.set_node_attributes( + {self.node: {self.fieldname: fingerprint}}) + return True + except Exception: + pass raise cexc.PubkeyInvalid( 'Mismatched certificate detected', certificate, fingerprint, self.fieldname, 'mismatch') From 6c9c58f46478231b850d1f31ea64b64590a8cd75 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sun, 26 Oct 2025 14:57:26 -0400 Subject: [PATCH 349/413] Update certutil to prepare for broader usage For one, apply more rules from CA/B forum. This includes including KU and EKU extensions, marking basicConstraints critical, and randomized serial numbers. Also make the backdate and end date configurable, to allow for the BMC certs to have a more palatable validity interval. --- confluent_server/confluent/certutil.py | 27 ++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 06831e8e..f469bd8f 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -2,6 +2,7 @@ import os import confluent.collective.manager as collective import confluent.util as util from os.path import exists +import datetime import shutil import socket import eventlet.green.subprocess as subprocess @@ -232,7 +233,7 @@ def create_full_ca(certout): cfgfile.write(line.strip() + '\n') continue cfgfile.write(line.strip() + '\n') - cfgfile.write('\n[CACert]\nbasicConstraints = CA:true\n\n[ca_confluent]\n') + cfgfile.write('\n[CACert]\nbasicConstraints = critical,CA:true\nkeyUsage = critical,keyCertSign,cRLSign\n[ca_confluent]\n') subprocess.check_call( ['openssl', 'ecparam', '-name', 'secp384r1', '-genkey', '-out', keyout]) @@ -267,7 +268,7 @@ def create_simple_ca(keyout, certout): if len(subj) > 68: subj = subj[:68] with open(tmpconfig, 'a') as cfgfile: - cfgfile.write('\n[CACert]\nbasicConstraints = CA:true\n') + cfgfile.write('\n[CACert]\nbasicConstraints = critical,CA:true\n') subprocess.check_call([ 'openssl', 'req', '-new', '-x509', '-key', keyout, '-days', '27300', '-out', certout, '-subj', subj, @@ -276,7 +277,18 @@ def create_simple_ca(keyout, certout): finally: os.remove(tmpconfig) -def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=None): +def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=None, backdate=True, days=None): + now_utc = datetime.datetime.now(datetime.timezone.utc) + if backdate: + # To deal with wildly off clocks, we backdate certificates. + startdate = '20000101010101Z' + else: + # apply a mild backdate anyway, even if these are supposed to be for more accurate clocks + startdate = (now_utc - datetime.timedelta(hours=24)).strftime('%Y%m%d%H%M%SZ') + if days is None: + enddate = '21000101010101Z' + else: + enddate = (now_utc + datetime.timedelta(days=days)).strftime('%Y%m%d%H%M%SZ') tlsmateriallocation = {} if not certout: tlsmateriallocation = get_certificate_paths() @@ -324,7 +336,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N shutil.copy2(sslcfg, tmpconfig) try: with open(extconfig, 'a') as cfgfile: - cfgfile.write('\nbasicConstraints=critical,CA:false\nsubjectAltName={0}'.format(san)) + cfgfile.write('\nbasicConstraints=critical,CA:false\nkeyUsage=critical,digitalSignature\nextendedKeyUsage=serverAuth,clientAuth\nsubjectAltName={0}'.format(san)) if needcsr: with open(tmpconfig, 'a') as cfgfile: cfgfile.write('\n[SAN]\nsubjectAltName={0}'.format(san)) @@ -363,11 +375,10 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N shutil.copy2(cacfgfile, tmpcafile) os.close(tmphdl) cacfgfile = tmpcafile - # with realcalock: # if we put it in server, we must lock it subprocess.check_call([ - 'openssl', 'ca', '-config', cacfgfile, + 'openssl', 'ca', '-config', cacfgfile, '-rand_serial', '-in', csrfile, '-out', certout, '-batch', '-notext', - '-startdate', '19700101010101Z', '-enddate', '21000101010101Z', + '-startdate', startdate, '-enddate', enddate, '-extfile', extconfig, '-subj', subj ]) for keycopy in tlsmateriallocation.get('keys', []): @@ -420,4 +431,4 @@ if __name__ == '__main__': csrout = sys.argv[1] except IndexError: csrout = None - create_certificate(keyout, certout, csrout, subj, san) + create_certificate(keyout, certout, csrout, subj, san, backdate=False, days=3650) From 24a76612ae49d15df51c79f28900e004dda88808 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Oct 2025 06:41:05 -0400 Subject: [PATCH 350/413] Use sha284 hash algorithm Some implementations reject sha256 as inadequate if ecdsa has 384 bit keylength. Bring the digest up to match the key size for the ECDSA. --- confluent_server/confluent/certutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index f469bd8f..74827304 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -242,7 +242,7 @@ def create_full_ca(certout): subprocess.check_call( ['openssl', 'ca', '-config', newcfg, '-batch', '-selfsign', '-extensions', 'CACert', '-extfile', newcfg, - '-notext', '-startdate', + '-notext', '-startdate', '-md', 'sha384', '19700101010101Z', '-enddate', '21000101010101Z', '-keyfile', keyout, '-out', '/etc/confluent/tls/ca/cacert.pem', '-in', csrout] ) @@ -378,7 +378,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N subprocess.check_call([ 'openssl', 'ca', '-config', cacfgfile, '-rand_serial', '-in', csrfile, '-out', certout, '-batch', '-notext', - '-startdate', startdate, '-enddate', enddate, + '-startdate', startdate, '-enddate', enddate, '-md', 'sha384', '-extfile', extconfig, '-subj', subj ]) for keycopy in tlsmateriallocation.get('keys', []): From 0ad7e99efefd21f469df5c563b56729872a71369 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Oct 2025 08:38:14 -0400 Subject: [PATCH 351/413] Only optionally use cryptography verification Some supported distributions can't run the newer cryptography. Make it a feature that only works with newer platforms. --- confluent_server/confluent/util.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 4d46ee60..77fcf696 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -34,7 +34,10 @@ import ssl import struct import eventlet.green.subprocess as subprocess import cryptography.x509 as x509 -import cryptography.x509.verification as verification +try: + import cryptography.x509.verification as verification +except ImportError: + verification = None @@ -307,7 +310,7 @@ class TLSCertVerifier(object): # Mismatches, but try more traditional validation using the site CAs if self.subject: try: - if self.verify_by_ca(certificate): + if verification and self.verify_by_ca(certificate): auditlog = log.Logger('audit') auditlog.log({'node': self.node, 'event': 'certautoupdate', 'fingerprint': fingerprint}) From f0a779764d8a22c9468b0767f1a086cd6909ffe6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Oct 2025 15:39:04 -0400 Subject: [PATCH 352/413] Fix ordering of digest argument The digest argument was erroneously inserted between startdate and it's argument, correct this mistake. --- confluent_server/confluent/certutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 74827304..549c2aeb 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -242,7 +242,7 @@ def create_full_ca(certout): subprocess.check_call( ['openssl', 'ca', '-config', newcfg, '-batch', '-selfsign', '-extensions', 'CACert', '-extfile', newcfg, - '-notext', '-startdate', '-md', 'sha384', + '-notext', '-md', 'sha384', '-startdate', '19700101010101Z', '-enddate', '21000101010101Z', '-keyfile', keyout, '-out', '/etc/confluent/tls/ca/cacert.pem', '-in', csrout] ) From 8b12047ae0101ad9b9abb54e95d9874fc925c8e8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 31 Oct 2025 09:45:59 -0400 Subject: [PATCH 353/413] Update to handle newer XCC2 firmware --- confluent_server/confluent/discovery/handlers/xcc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index a0846187..b39574d5 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -490,7 +490,7 @@ class NodeHandler(immhandler.NodeHandler): {'UserName': username}, method='PATCH') if status != 200: rsp = json.loads(rsp) - if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError', 'Base.1.18.GeneralError'): + if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError', 'Base.1.18.GeneralError', 'Base.1.21.GeneralError'): if tries: eventlet.sleep(4) elif tmpaccount: @@ -522,7 +522,7 @@ class NodeHandler(immhandler.NodeHandler): if userent['users_user_name'] == user: curruser = userent break - if curruser.get('users_pass_is_sha256', 0): + if curruser and curruser.get('users_pass_is_sha256', 0): self._wc = None wc = self.wc nwc = wc.dupe() From ce8531375af781c52e225f7a1ca78326a7a52021 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 31 Oct 2025 09:45:59 -0400 Subject: [PATCH 354/413] Update to handle newer XCC2 firmware --- confluent_server/confluent/discovery/handlers/xcc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index a0846187..b39574d5 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -490,7 +490,7 @@ class NodeHandler(immhandler.NodeHandler): {'UserName': username}, method='PATCH') if status != 200: rsp = json.loads(rsp) - if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError', 'Base.1.18.GeneralError'): + if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError', 'Base.1.18.GeneralError', 'Base.1.21.GeneralError'): if tries: eventlet.sleep(4) elif tmpaccount: @@ -522,7 +522,7 @@ class NodeHandler(immhandler.NodeHandler): if userent['users_user_name'] == user: curruser = userent break - if curruser.get('users_pass_is_sha256', 0): + if curruser and curruser.get('users_pass_is_sha256', 0): self._wc = None wc = self.wc nwc = wc.dupe() From f755ba9f9182b4df0cf735d8fada15a0b7e8a29f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 31 Oct 2025 10:46:42 -0400 Subject: [PATCH 355/413] Implement method to sign BMC certificates --- confluent_server/confluent/certutil.py | 2 +- confluent_server/confluent/core.py | 14 +++++++++++ confluent_server/confluent/messages.py | 16 +++++++++++++ .../plugins/hardwaremanagement/redfish.py | 24 +++++++++++++++++++ confluent_server/confluent/util.py | 10 +++++--- 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 549c2aeb..a471b2bf 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -296,7 +296,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N certout = tlsmateriallocation.get('certs', [None])[0] if not certout: certout = tlsmateriallocation.get('bundles', [None])[0] - if not keyout or not certout: + if (not keyout and not csrfile) or not certout: raise Exception('Unable to locate TLS certificate path automatically') assure_tls_ca() if not subj: diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 78820497..b368efbe 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -300,6 +300,20 @@ def _init_core(): 'default': 'ipmi', }), }, + 'certificate': { + 'sign': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'generate_csr': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'install': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + }, 'certificate_authorities': PluginCollection({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 50a0242e..5967291a 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -519,6 +519,8 @@ def get_input_message(path, operation, inputdata, nodes=None, multinode=False, return InputAlertDestination(path, nodes, inputdata, multinode) elif len(path) == 3 and path[:3] == ['configuration', 'management_controller', 'certificate_authorities'] and operation not in ('retrieve', 'delete'): return InputCertificateAuthority(path, nodes, inputdata) + elif len(path) == 4 and path[:4] == ['configuration', 'management_controller', 'certificate', 'sign'] and operation not in ('retrieve', 'delete'): + return InputSigningParameters(path, inputdata, nodes, configmanager) elif path == ['identify'] and operation != 'retrieve': return InputIdentifyMessage(path, nodes, inputdata) elif path == ['events', 'hardware', 'decode']: @@ -956,6 +958,20 @@ class ConfluentInputMessage(ConfluentMessage): def is_valid_key(self, key): return key in self.valid_values +class InputSigningParameters(InputConfigChangeSet): + + def get_days(self, node): + attribs = self.get_attributes(node) + return int(attribs['days']) + + def get_added_san(self, node): + attribs = self.get_attributes(node) + addsans = [] + for subj in attribs.get('added_san', '').split(','): + if subj: + addsans.append(subj.strip()) + return addsans + class InputCertificateAuthority(ConfluentInputMessage): keyname = 'pem' diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 1c7e211a..163ea351 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -20,6 +20,7 @@ import confluent.messages as msg import confluent.util as util import copy import errno +from confluent import certutil import eventlet import eventlet.event import eventlet.green.threading as threading @@ -37,6 +38,7 @@ ipmicommand = eventlet.import_patched('pyghmi.redfish.command') import socket import ssl import traceback +import tempfile if not hasattr(ssl, 'SSLEOFError'): ssl.SSLEOFError = None @@ -532,6 +534,8 @@ class IpmiHandler(object): return self.handle_alerts() elif self.element[1:3] == ['management_controller', 'certificate_authorities']: return self.handle_cert_authorities() + elif self.element[1:3] == ['management_controller', 'certificate']: + return self.handle_certificate() elif self.element[1:3] == ['management_controller', 'users']: return self.handle_users() elif self.element[1:3] == ['management_controller', 'net_interfaces']: @@ -582,6 +586,26 @@ class IpmiHandler(object): self.pyghmi_event_to_confluent(event) self.output.put(msg.EventCollection((event,), name=self.node)) + def handle_certificate(self): + self.element = self.element[3:] + if len(self.element) != 1: + raise Exception('Not implemented') + if self.element[0] == 'sign' and self.op == 'update': + csr = self.ipmicmd.get_bmc_csr() + subj, san = util.get_bmc_subject_san(self.cfm, self.node, self.inputdata.get_added_san(self.node)) + with tempfile.NamedTemporaryFile() as tmpfile: + tmpfile.write(csr.encode()) + tmpfile.flush() + certfile = tempfile.NamedTemporaryFile(delete=False) + certname = certfile.name + certfile.close() + certutil.create_certificate(None, certname, tmpfile.name, subj, san, backdate=False, + days=self.inputdata.get_days(self.node)) + with open(certname, 'rb') as certf: + cert = certf.read() + os.unlink(certname) + self.ipmicmd.install_bmc_certificate(cert) + def handle_cert_authorities(self): if len(self.element) == 3: if self.op == 'read': diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 77fcf696..78cfcdd8 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -94,13 +94,17 @@ def list_interface_indexes(): return -def get_bmc_subject_san(configmanager, nodename, addip=None): +def get_bmc_subject_san(configmanager, nodename, addnames=()): bmc_san = [] subject = '' ipas = set([]) - if addip: - ipas.add(addip) dnsnames = set([]) + for addname in addnames: + try: + addr = ipaddress.ip_address(addname) + ipas.add(addname) + except Exception: + dnsnames.add(addname) nodecfg = configmanager.get_node_attributes(nodename, ('dns.domain', 'hardwaremanagement.manager', 'hardwaremanagement.manager_tls_name')) bmcaddr = nodecfg.get(nodename, {}).get('hardwaremanagement.manager', {}).get('value', '') From 073f6d1389042ad4102dcff52867ab444898186e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 31 Oct 2025 12:04:27 -0400 Subject: [PATCH 356/413] Wire up cert signing to nodecertutil --- confluent_client/bin/nodecertutil | 15 +++++++++++++++ confluent_server/confluent/messages.py | 17 ++++++++++------- .../plugins/hardwaremanagement/redfish.py | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/confluent_client/bin/nodecertutil b/confluent_client/bin/nodecertutil index 3b220b3d..f2e20896 100644 --- a/confluent_client/bin/nodecertutil +++ b/confluent_client/bin/nodecertutil @@ -76,6 +76,10 @@ if __name__ == '__main__': list_parser = subparsers.add_parser('listbmccacerts', help='List BMC CA certificates') + sign_bmc_parser = subparsers.add_parser('signbmccert', help='Sign BMC certificate') + sign_bmc_parser.add_argument('--days', type=int, help='Number of days the certificate is valid for') + sign_bmc_parser.add_argument('--added-names', type=str, help='Additional names to include in the certificate') + args = parser.parse_args() c = client.Command() if args.command == 'installbmccacert': @@ -84,6 +88,17 @@ if __name__ == '__main__': removebmccacert(args.noderange, args.id, c) elif args.command == 'listbmccacerts': listbmccacerts(args.noderange, c) + elif args.command == 'signbmccert': + payload = {} + if args.days is not None: + payload['days'] = args.days + else: + print("Error: --days is required for signbmccert", file=sys.stderr) + sys.exit(1) + if args.added_names: + payload['added_names'] = args.added_names + for res in c.update(f'/noderange/{args.noderange}/configuration/management_controller/certificate/sign', payload): + print(repr(res)) else: parser.print_help() sys.exit(1) \ No newline at end of file diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index 5967291a..aac7aa89 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -723,6 +723,9 @@ class InputConfigChangeSet(InputExpression): endattrs = {} for attr in attrs: origval = attrs[attr] + if isinstance(origval, int): + endattrs[attr] = origval + continue if isinstance(origval, bytes) or isinstance(origval, unicode): origval = {'expression': origval} if 'expression' not in origval: @@ -963,15 +966,15 @@ class InputSigningParameters(InputConfigChangeSet): def get_days(self, node): attribs = self.get_attributes(node) return int(attribs['days']) - - def get_added_san(self, node): + + def get_added_names(self, node): attribs = self.get_attributes(node) - addsans = [] - for subj in attribs.get('added_san', '').split(','): + addnames = [] + for subj in (attribs.get('added_names') or '').split(','): if subj: - addsans.append(subj.strip()) - return addsans - + addnames.append(subj.strip()) + return addnames + class InputCertificateAuthority(ConfluentInputMessage): keyname = 'pem' diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 163ea351..6c66ae04 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -592,7 +592,7 @@ class IpmiHandler(object): raise Exception('Not implemented') if self.element[0] == 'sign' and self.op == 'update': csr = self.ipmicmd.get_bmc_csr() - subj, san = util.get_bmc_subject_san(self.cfm, self.node, self.inputdata.get_added_san(self.node)) + subj, san = util.get_bmc_subject_san(self.cfm, self.node, self.inputdata.get_added_names(self.node)) with tempfile.NamedTemporaryFile() as tmpfile: tmpfile.write(csr.encode()) tmpfile.flush() From 5de063212f0430807af4fda435a287570de92f56 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 3 Nov 2025 10:43:34 -0500 Subject: [PATCH 357/413] Prepare for supporting constrained CA If asked to sign using a name constrained CA, avoid generating a certificate that would violate those constraints. --- confluent_server/confluent/certutil.py | 47 +++++++++++++++++++++----- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index a471b2bf..bdafdefc 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -7,6 +7,10 @@ import shutil import socket import eventlet.green.subprocess as subprocess import tempfile +try: + import cryptography.x509 as x509 +except ImportError: + x509 = None def mkdirp(targ): try: @@ -179,6 +183,7 @@ def assure_tls_ca(): os.symlink(certname, hashname) finally: os.seteuid(ouid) + return certout #def is_self_signed(pem): # cert = ssl.PEM_cert_to_DER_cert(pem) @@ -298,7 +303,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N certout = tlsmateriallocation.get('bundles', [None])[0] if (not keyout and not csrfile) or not certout: raise Exception('Unable to locate TLS certificate path automatically') - assure_tls_ca() + cacertname = assure_tls_ca() if not subj: shortname = socket.gethostname().split('.')[0] longname = shortname # socket.getfqdn() @@ -309,16 +314,42 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N subprocess.check_call( ['openssl', 'ecparam', '-name', 'secp384r1', '-genkey', '-out', keyout]) + permitdomains = [] + if x509: + # check if this CA has name constraints, and avoid violating them + with open(cacertname, 'rb') as f: + cer = x509.load_pem_x509_certificate(f.read()) + for extension in cer.extensions: + if extension.oid == x509.ExtensionOID.NAME_CONSTRAINTS: + nc = extension.value + for pname in nc.permitted_subtrees: + permitdomains.append(pname.value) if not san: ipaddrs = list(get_ip_addresses()) - san = ['IP:{0}'.format(x) for x in ipaddrs] - # It is incorrect to put IP addresses as DNS type. However - # there exists non-compliant clients that fail with them as IP - # san.extend(['DNS:{0}'.format(x) for x in ipaddrs]) - dnsnames = set(ipaddrs) - dnsnames.add(shortname) + if not permitdomains: + san = ['IP:{0}'.format(x) for x in ipaddrs] + # It is incorrect to put IP addresses as DNS type. However + # there exists non-compliant clients that fail with them as IP + # san.extend(['DNS:{0}'.format(x) for x in ipaddrs]) + dnsnames = set(ipaddrs) + dnsnames.add(shortname) + dnsnames.add(longname) + # nameconstraints preclude IP and shortname + san = [] + dnsnames = set() + for suffix in permitdomains: + if longname.endswith(suffix): + dnsnames.add(longname) + break + break for currip in ipaddrs: - dnsnames.add(socket.getnameinfo((currip, 0), 0)[0]) + currname = socket.getnameinfo((currip, 0), 0)[0] + for suffix in permitdomains: + if currname.endswith(suffix): + dnsnames.add(currname) + break + if not permitdomains: + dnsnames.add(currname) for currname in dnsnames: san.append('DNS:{0}'.format(currname)) #san.append('DNS:{0}'.format(longname)) From 5adb5fa780f9cf67da884465b36751504c5e7a7c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 3 Nov 2025 14:02:33 -0500 Subject: [PATCH 358/413] Automatically sign XCC certificates on discover If an XCC doesn't have a 'real' certificate, sign it with the confluent CA for 47 days. --- confluent_server/confluent/discovery/core.py | 6 ++- .../confluent/discovery/handlers/generic.py | 38 +++++++++++++++++++ .../discovery/handlers/redfishbmc.py | 9 +++++ .../confluent/discovery/handlers/xcc.py | 10 +++++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 6941845c..1e7aed8d 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -98,6 +98,7 @@ import eventlet import eventlet.greenpool import eventlet.semaphore + autosensors = set() scanner = None @@ -1472,7 +1473,7 @@ def discover_node(cfg, handler, info, nodename, manual): break log.log({'info': 'Discovered {0} ({1})'.format(nodename, handler.devname)}) - if nodeconfig: + if nodeconfig or handler.current_cert_self_signed(): bmcaddr = cfg.get_node_attributes(nodename, 'hardwaremanagement.manager') bmcaddr = bmcaddr.get(nodename, {}).get('hardwaremanagement.manager', {}).get('value', '') if not bmcaddr: @@ -1481,9 +1482,12 @@ def discover_node(cfg, handler, info, nodename, manual): bmcaddr = bmcaddr.split('/', 1)[0] wait_for_connection(bmcaddr) socket.getaddrinfo(bmcaddr, 443) + if nodeconfig: subprocess.check_call(['/opt/confluent/bin/nodeconfig', nodename] + nodeconfig) log.log({'info': 'Configured {0} ({1})'.format(nodename, handler.devname)}) + if handler.current_cert_self_signed(): + handler.autosign_certificate() info['discostatus'] = 'discovered' for i in pending_by_uuid.get(curruuid, []): diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py index 2e941238..5f00639f 100644 --- a/confluent_server/confluent/discovery/handlers/generic.py +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -17,6 +17,10 @@ import errno import eventlet import socket webclient = eventlet.import_patched('pyghmi.util.webclient') +try: + import cryptography.x509 as x509 +except ImportError: + x509 = None class NodeHandler(object): https_supported = True @@ -59,6 +63,40 @@ class NodeHandler(object): # may occur against the target in a short while return True + def current_cert_self_signed(self): + if not x509: + return + if not self._ipaddr: + return + try: + wc = webclient.SecureHTTPConnection(self._ipaddr, verifycallback=self._savecert, port=443) + wc.connect() + wc.close() + if not self._fp: + return False + # Check if certificate is self-signed by comparing issuer and subject + cert = self._fp + certobj = x509.load_der_x509_certificate(cert) + skid = None + akid = None + for ext in certobj.extensions: + if ext.oid == x509.ExtensionOID.SUBJECT_KEY_IDENTIFIER: + skid = ext.value + elif ext.oid == x509.ExtensionOID.AUTHORITY_KEY_IDENTIFIER: + akid = ext.value + if akid: + if skid.digest == akid.key_identifier: + return True + elif certobj.issuer == certobj.subject: + return True + except Exception: + pass + return False + + def autosign_certificate(self): + # A no-op by default + return + def scan(self): # Do completely passive things to enhance data. # Probe is permitted to for example attempt a login diff --git a/confluent_server/confluent/discovery/handlers/redfishbmc.py b/confluent_server/confluent/discovery/handlers/redfishbmc.py index 58b53054..7573370f 100644 --- a/confluent_server/confluent/discovery/handlers/redfishbmc.py +++ b/confluent_server/confluent/discovery/handlers/redfishbmc.py @@ -23,6 +23,7 @@ try: from urllib import urlencode except ImportError: from urllib.parse import urlencode +import eventlet.green.subprocess as subprocess getaddrinfo = eventlet.support.greendns.getaddrinfo @@ -326,6 +327,14 @@ class NodeHandler(generic.NodeHandler): raise exc.TargetEndpointUnreachable( 'hardwaremanagement.manager must be set to desired address (No IPv6 Link Local detected)') + def autosign_certificate(self): + nodename = self.nodename + hwmgt_method = self.configmanager.get_node_attributes( + nodename, 'hardwaremanagement.method').get( + nodename, {}).get('hardwaremanagement.method', {}).get('value', 'ipmi') + if hwmgt_method != 'redfish': + return + subprocess.check_call(['/opt/confluent/bin/nodecertutil', nodename, 'signbmccert', '--days', '47']) def remote_nodecfg(nodename, cfm): cfg = cfm.get_node_attributes( diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index b39574d5..2637a6aa 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -29,6 +29,7 @@ import eventlet.green.socket as socket webclient = eventlet.import_patched('pyghmi.util.webclient') import struct getaddrinfo = eventlet.support.greendns.getaddrinfo +import eventlet.green.subprocess as subprocess def fixuuid(baduuid): @@ -704,6 +705,15 @@ class NodeHandler(immhandler.NodeHandler): if em: self.configmanager.set_node_attributes( {em: {'id.uuid': enclosureuuid}}) + def autosign_certificate(self): + nodename = self.nodename + hwmgt_method = self.configmanager.get_node_attributes( + nodename, 'hardwaremanagement.method').get( + nodename, {}).get('hardwaremanagement.method', {}).get('value', 'ipmi') + if hwmgt_method != 'redfish': + return + subprocess.check_call(['/opt/confluent/bin/nodecertutil', nodename, 'signbmccert', '--days', '47']) + def remote_nodecfg(nodename, cfm): cfg = cfm.get_node_attributes( From 2826abb7abe8ac0cc5d4ee6e56e9fb0d910f7231 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 3 Nov 2025 14:21:36 -0500 Subject: [PATCH 359/413] Prune excessive leftover ext config files --- confluent_server/confluent/certutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index bdafdefc..d7c6ba16 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -439,7 +439,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N os.remove(tmpconfig) if needcsr: os.remove(csrfile) - print(extconfig) # os.remove(extconfig) + os.remove(extconfig) if __name__ == '__main__': From 174d20460787e618231f49f4cbf8a4f67898f8b3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Nov 2025 09:58:11 -0500 Subject: [PATCH 360/413] Implement compatibility with newer pysnmp For now, terminate the async nature if newer pysnmp is detected. --- confluent_server/confluent/snmputil.py | 64 ++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py index 611b3831..ce6e3759 100644 --- a/confluent_server/confluent/snmputil.py +++ b/confluent_server/confluent/snmputil.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2016 Lenovo +# Copyright 2016-2025 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,16 +26,54 @@ import eventlet from eventlet.support.greendns import getaddrinfo import pysnmp.smi.error as snmperr import socket +import asyncio snmp = eventlet.import_patched('pysnmp.hlapi') +asyn = False +if not hasattr(snmp, 'UsmUserData'): + # pysnmp that dropped the sync support + import pysnmp.hlapi.v3arch.asyncio as snmp + asyn = True + import pysnmp.smi.rfc1902 as rfc1902 +def get_loop(): + try: + return asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return loop + +def _run_coro(coro): + loop = get_loop() + fun = asyncio.wait_for(coro, None) + if loop.is_running(): + task = asyncio.ensure_future(fun) + return loop.run_until_complete(task) + return loop.run_until_complete(fun) + +async def _agen_to_list(agen): + out = [] + async for item in agen: + out.append(item) + return out + +def _sync_gen(agen): + return _run_coro(_agen_to_list(agen)) + def _get_transport(name): # Annoyingly, pysnmp does not automatically determine ipv6 v ipv4 res = getaddrinfo(name, 161, 0, socket.SOCK_DGRAM) if res[0][0] == socket.AF_INET6: - return snmp.Udp6TransportTarget(res[0][4], 2) + if asyn: + return _run_coro(snmp.Udp6TransportTarget.create(res[0][4], 2)) + else: + return snmp.Udp6TransportTarget(res[0][4], 2) else: - return snmp.UdpTransportTarget(res[0][4], 2) + if asyn: + return _run_coro(snmp.UdpTransportTarget.create(res[0][4], 2)) + else: + return snmp.UdpTransportTarget(res[0][4], 2) class Session(object): @@ -83,12 +121,22 @@ class Session(object): if '::' in oid: resolvemib = True mib, field = oid.split('::') - obj = snmp.ObjectType(snmp.ObjectIdentity(mib, field)) + if asyn: + obj = rfc1902.ObjectType(rfc1902.ObjectIdentity(mib, field)) + else: + obj = snmp.ObjectType(snmp.ObjectIdentity(mib, field)) else: - obj = snmp.ObjectType(snmp.ObjectIdentity(oid)) - - walking = snmp.bulkCmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, - lexicographicMode=False, lookupMib=resolvemib) + if asyn: + obj = rfc1902.ObjectType(rfc1902.ObjectIdentity(oid)) + else: + obj = snmp.ObjectType(snmp.ObjectIdentity(oid)) + if asyn: + walking = snmp.bulk_walk_cmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, + lexicographicMode=False, lookupMib=resolvemib) + walking = _sync_gen(walking) + else: + walking = snmp.bulkCmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, + lexicographicMode=False, lookupMib=resolvemib) try: for rsp in walking: errstr, errnum, erridx, answers = rsp From 6c0d7ea60e1a806db89178a532703cafd7920546 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Nov 2025 11:14:52 -0500 Subject: [PATCH 361/413] Simplify end untethered el9 diskless environment Rather than treat both as the same, since untethered has everything up front anyway, go ahead and extract the filesystem. This makes the mount look more straightforward and makes it so deletion of files from the image also frees ram. --- .../profiles/default/scripts/imageboot.sh | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 7b340d2c..df89a6ce 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -3,7 +3,7 @@ confluent_whost=$confluent_mgr if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi -mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay +mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay /sysroot if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then mount -t tmpfs untethered /mnt/remoteimg curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs @@ -45,15 +45,40 @@ memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) echo $memtot > /sys/block/zram0/disksize mkfs.xfs /dev/zram0 > /dev/null -mount -o discard /dev/zram0 /mnt/overlay -if [ ! -f /tmp/mountparts.sh ]; then - mkdir -p /mnt/overlay/upper /mnt/overlay/work - mount -t overlay -o upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work,lowerdir=/mnt/remote disklessroot /sysroot +if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then + mount -o discard /dev/zram0 /sysroot + echo -en "Decrypting and extracting root filesystem: 0%\r" + srcsz=$(du -sk /mnt/remote | awk '{print $1}') + while [ -f /mnt/remoteimg/rootimg.sfs ]; do + dstsz=$(du -sk /sysroot | awk '{print $1}') + pct=$((dstsz * 100 / srcsz)) + if [ $pct -gt 99 ]; then + pct=99 + fi + echo -en "Decrypting and extracting root filesystem: $pct%\r" + sleep 0.25 + done & + cp -ax /mnt/remote/* /sysroot/ + umount /mnt/remote + if [ -e /dev/mapper/cryptimg ]; then + dmsetup remove cryptimg + fi + losetup -d $loopdev + rm /mnt/remoteimg/rootimg.sfs + umount /mnt/remoteimg + wait + echo -e "Decrypting and extracting root filesystem: 100%" else - for srcmount in $(cat /tmp/mountparts.sh | awk '{print $3}'); do - mkdir -p /mnt/overlay${srcmount}/upper /mnt/overlay${srcmount}/work - mount -t overlay -o upperdir=/mnt/overlay${srcmount}/upper,workdir=/mnt/overlay${srcmount}/work,lowerdir=${srcmount} disklesspart /sysroot${srcmount#/mnt/remote} - done + mount -o discard /dev/zram0 /mnt/overlay + if [ ! -f /tmp/mountparts.sh ]; then + mkdir -p /mnt/overlay/upper /mnt/overlay/work + mount -t overlay -o upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work,lowerdir=/mnt/remote disklessroot /sysroot + else + for srcmount in $(cat /tmp/mountparts.sh | awk '{print $3}'); do + mkdir -p /mnt/overlay${srcmount}/upper /mnt/overlay${srcmount}/work + mount -t overlay -o upperdir=/mnt/overlay${srcmount}/upper,workdir=/mnt/overlay${srcmount}/work,lowerdir=${srcmount} disklesspart /sysroot${srcmount#/mnt/remote} + done + fi fi mkdir -p /sysroot/etc/ssh mkdir -p /sysroot/etc/confluent From 21155d2091105ea6ae57dbd8dfa835306e57fa8b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Nov 2025 11:17:28 -0500 Subject: [PATCH 362/413] Bring untethered changes to el10 diskless --- .../profiles/default/scripts/imageboot.sh | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh index d65b32b6..3104444d 100644 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -3,7 +3,7 @@ confluent_whost=$confluent_mgr if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi -mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay +mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay /sysroot if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then mount -t tmpfs untethered /mnt/remoteimg curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs @@ -45,15 +45,40 @@ memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) echo $memtot > /sys/block/zram0/disksize mkfs.xfs /dev/zram0 > /dev/null -mount -o discard /dev/zram0 /mnt/overlay -if [ ! -f /tmp/mountparts.sh ]; then - mkdir -p /mnt/overlay/upper /mnt/overlay/work - mount -t overlay -o upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work,lowerdir=/mnt/remote disklessroot /sysroot +if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then + mount -o discard /dev/zram0 /sysroot + echo -en "Decrypting and extracting root filesystem: 0%\r" + srcsz=$(du -sk /mnt/remote | awk '{print $1}') + while [ -f /mnt/remoteimg/rootimg.sfs ]; do + dstsz=$(du -sk /sysroot | awk '{print $1}') + pct=$((dstsz * 100 / srcsz)) + if [ $pct -gt 99 ]; then + pct=99 + fi + echo -en "Decrypting and extracting root filesystem: $pct%\r" + sleep 0.25 + done & + cp -ax /mnt/remote/* /sysroot/ + umount /mnt/remote + if [ -e /dev/mapper/cryptimg ]; then + dmsetup remove cryptimg + fi + losetup -d $loopdev + rm /mnt/remoteimg/rootimg.sfs + umount /mnt/remoteimg + wait + echo -e "Decrypting and extracting root filesystem: 100%" else - for srcmount in $(cat /tmp/mountparts.sh | awk '{print $3}'); do - mkdir -p /mnt/overlay${srcmount}/upper /mnt/overlay${srcmount}/work - mount -t overlay -o upperdir=/mnt/overlay${srcmount}/upper,workdir=/mnt/overlay${srcmount}/work,lowerdir=${srcmount} disklesspart /sysroot${srcmount#/mnt/remote} - done + mount -o discard /dev/zram0 /mnt/overlay + if [ ! -f /tmp/mountparts.sh ]; then + mkdir -p /mnt/overlay/upper /mnt/overlay/work + mount -t overlay -o upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work,lowerdir=/mnt/remote disklessroot /sysroot + else + for srcmount in $(cat /tmp/mountparts.sh | awk '{print $3}'); do + mkdir -p /mnt/overlay${srcmount}/upper /mnt/overlay${srcmount}/work + mount -t overlay -o upperdir=/mnt/overlay${srcmount}/upper,workdir=/mnt/overlay${srcmount}/work,lowerdir=${srcmount} disklesspart /sysroot${srcmount#/mnt/remote} + done + fi fi mkdir -p /sysroot/etc/ssh mkdir -p /sysroot/etc/confluent From cc9a81103bad1fcfd8a3af213d773b328df5e4d2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 4 Nov 2025 15:51:22 -0500 Subject: [PATCH 363/413] Do not autosign if the corresponding cryptography is unavailable We use cryptography verification, but it's relatively new. For compatibility, we fall back to fingerprint only. This is pretty bad when inflicted on unsuspecting users on autosign, so skip autosign if cert validation would break. --- confluent_server/confluent/discovery/core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 1e7aed8d..c8018110 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -107,6 +107,11 @@ try: except NameError: unicode = str +try: + import cryptography.x509.verification as verification +except ImportError: + verification = None + class nesteddict(dict): def __missing__(self, key): @@ -1486,7 +1491,7 @@ def discover_node(cfg, handler, info, nodename, manual): subprocess.check_call(['/opt/confluent/bin/nodeconfig', nodename] + nodeconfig) log.log({'info': 'Configured {0} ({1})'.format(nodename, handler.devname)}) - if handler.current_cert_self_signed(): + if verification and handler.current_cert_self_signed(): handler.autosign_certificate() info['discostatus'] = 'discovered' From b07da455c2635a464ce8f33c19cef0bef035ee25 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Nov 2025 11:22:12 -0500 Subject: [PATCH 364/413] Fix SAN generation The nameconstraint support missed a branch, fix this. --- confluent_server/confluent/certutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index d7c6ba16..b87a3818 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -334,6 +334,7 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N dnsnames = set(ipaddrs) dnsnames.add(shortname) dnsnames.add(longname) + else: # nameconstraints preclude IP and shortname san = [] dnsnames = set() @@ -341,7 +342,6 @@ def create_certificate(keyout=None, certout=None, csrfile=None, subj=None, san=N if longname.endswith(suffix): dnsnames.add(longname) break - break for currip in ipaddrs: currname = socket.getnameinfo((currip, 0), 0)[0] for suffix in permitdomains: From 20292cdfd0dc1e9042ba5875e918bb7bb8fb0407 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Nov 2025 13:22:21 -0500 Subject: [PATCH 365/413] Do not let diskless.conf persist into EL9 diskless images It fouls run of kdump building the kdump image. --- .../el9-diskless/profiles/default/scripts/imageboot.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index df89a6ce..3c83ff14 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -154,6 +154,7 @@ mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware +rm -f /sysroot/etc/dracut.conf.d/diskless.conf # remove diskless dracut from runtime, to make kdump happier kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root From d2d77ab1d5bd14208004760be9c115f49c202086 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 7 Nov 2025 13:22:21 -0500 Subject: [PATCH 366/413] Do not let diskless.conf persist into EL9 diskless images It fouls run of kdump building the kdump image. --- .../el9-diskless/profiles/default/scripts/imageboot.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 7b340d2c..544cb366 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -129,6 +129,7 @@ mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware +rm -f /sysroot/etc/dracut.conf.d/diskless.conf # remove diskless dracut from runtime, to make kdump happier kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root From 6ebb6de10743f0db13e0dbcff488013d0adcaad6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 10 Nov 2025 10:21:01 -0500 Subject: [PATCH 367/413] Allow specifiying SNMP privacy protocol Modern SNMP devices may require AES. Unfortunately, older ones may refuse AES. For compatibility, continue to default to DES, but allow AES to be indicated in attributes. --- .../confluent/config/attributes.py | 4 +++ confluent_server/confluent/networking/lldp.py | 14 ++++++++--- .../confluent/networking/macmap.py | 25 +++++++++---------- .../confluent/networking/netutil.py | 6 +++-- confluent_server/confluent/snmputil.py | 12 +++++++-- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 26f77732..dc5b8d40 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -605,6 +605,10 @@ node = { 'description': ('SNMPv1 community string, it is highly recommended to' 'step up to SNMPv3'), }, + 'snmp.privacyprotocol': { + 'description': 'The privacy protocol to use for SNMPv3', + 'valid_values': ('aes', 'des'), + }, # 'secret.snmplocalizedkey': { # 'description': ("SNMPv3 key localized to this node's SNMP Engine id" # 'This can be used in lieu of snmppassphrase to avoid' diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index 5f17ab80..e2eba7e3 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -255,7 +255,13 @@ def _extract_neighbor_data_b(args): args are carried as a tuple, because of eventlet convenience """ - switch, password, user, cfm, force = args[:5] + # Safely unpack args with defaults to avoid IndexError + switch = args[0] if len(args) > 0 else None + password = args[1] if len(args) > 1 else None + user = args[2] if len(args) > 2 else None + cfm = args[3] if len(args) > 3 else None + privproto = args[4] if len(args) > 4 else None + force = args[5] if len(args) > 5 else False vintage = _neighdata.get(switch, {}).get('!!vintage', 0) now = util.monotonic_time() if vintage > (now - 60) and not force: @@ -265,7 +271,7 @@ def _extract_neighbor_data_b(args): return _extract_neighbor_data_https(switch, user, password, cfm, lldpdata) except Exception as e: pass - conn = snmp.Session(switch, password, user) + conn = snmp.Session(switch, password, user, privacy_protocol=privproto) sid = None for sysid in conn.walk('1.3.6.1.2.1.1.2'): sid = str(sysid[1][6:]) @@ -364,8 +370,8 @@ def _extract_neighbor_data(args): return _extract_neighbor_data_b(args) except Exception as e: yieldexc = False - if len(args) >= 6: - yieldexc = args[5] + if len(args) >= 7: + yieldexc = args[6] if yieldexc: return e else: diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index 5545e09a..a96a48ee 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -213,14 +213,14 @@ def _fast_backend_fixup(macs, switch): else: _nodesbymac[mac] = (nodename, nummacs) -def _offload_map_switch(switch, password, user): +def _offload_map_switch(switch, password, user, privprotocol=None): if _offloader is None: _start_offloader() evtid = random.randint(0, 4294967295) while evtid in _offloadevts: evtid = random.randint(0, 4294967295) _offloadevts[evtid] = eventlet.Event() - _offloader.stdin.write(msgpack.packb((evtid, switch, password, user), + _offloader.stdin.write(msgpack.packb((evtid, switch, password, user, privprotocol), use_bin_type=True)) _offloader.stdin.flush() result = _offloadevts[evtid].wait() @@ -280,12 +280,11 @@ def _map_switch_backend(args): # fallback if ifName is empty # global _macmap - if len(args) == 4: - switch, password, user, _ = args # 4th arg is for affluent only - if not user: - user = None - else: - switch, password = args + switch = args[0] if len(args) > 0 else None + password = args[1] if len(args) > 1 else None + user = args[2] if len(args) > 2 else None + privprotocol = args[4] if len(args) > 4 else None + if not user: # make '' be treated as None user = None if switch not in noaffluent: try: @@ -298,7 +297,7 @@ def _map_switch_backend(args): except Exception as e: pass mactobridge, ifnamemap, bridgetoifmap = _offload_map_switch( - switch, password, user) + switch, password, user, privprotocol) maccounts = {} bridgetoifvalid = False for mac in mactobridge: @@ -367,9 +366,9 @@ def _map_switch_backend(args): _nodesbymac[mac] = (nodename, maccounts[ifname]) _macsbyswitch[switch] = newmacs -def _snmp_map_switch_relay(rqid, switch, password, user): +def _snmp_map_switch_relay(rqid, switch, password, user, privprotocol=None): try: - res = _snmp_map_switch(switch, password, user) + res = _snmp_map_switch(switch, password, user, privprotocol) payload = msgpack.packb((rqid,) + res, use_bin_type=True) try: sys.stdout.buffer.write(payload) @@ -391,10 +390,10 @@ def _snmp_map_switch_relay(rqid, switch, password, user): finally: sys.stdout.flush() -def _snmp_map_switch(switch, password, user): +def _snmp_map_switch(switch, password, user, privprotocol=None): haveqbridge = False mactobridge = {} - conn = snmp.Session(switch, password, user) + conn = snmp.Session(switch, password, user, privacy_protocol=privprotocol) ifnamemap = get_portnamemap(conn) for vb in conn.walk('1.3.6.1.2.1.17.7.1.2.2.1.2'): haveqbridge = True diff --git a/confluent_server/confluent/networking/netutil.py b/confluent_server/confluent/networking/netutil.py index 48b2f028..65cd8236 100644 --- a/confluent_server/confluent/networking/netutil.py +++ b/confluent_server/confluent/networking/netutil.py @@ -21,7 +21,7 @@ import confluent.collective.manager as collective def get_switchcreds(configmanager, switches): switchcfg = configmanager.get_node_attributes( switches, ('secret.hardwaremanagementuser', 'secret.snmpcommunity', - 'secret.hardwaremanagementpassword', + 'secret.hardwaremanagementpassword', 'snmp.privacyprotocol', 'collective.managercandidates'), decrypt=True) switchauth = [] for switch in switches: @@ -47,7 +47,9 @@ def get_switchcreds(configmanager, switches): 'secret.hardwaremanagementuser', {}).get('value', None) if not user: user = None - switchauth.append((switch, password, user, configmanager)) + privacy_protocol = switchparms.get( + 'snmp.privacyprotocol', {}).get('value', None) + switchauth.append((switch, password, user, configmanager, privacy_protocol)) return switchauth diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py index ce6e3759..6f03a31b 100644 --- a/confluent_server/confluent/snmputil.py +++ b/confluent_server/confluent/snmputil.py @@ -78,7 +78,7 @@ def _get_transport(name): class Session(object): - def __init__(self, server, secret, username=None, context=None): + def __init__(self, server, secret, username=None, context=None, privacy_protocol=None): """Create a new session to interrogate a switch If username is not given, it is assumed that @@ -97,9 +97,17 @@ class Session(object): # SNMP v2c self.authdata = snmp.CommunityData(secret, mpModel=1) else: + if privacy_protocol == 'aes': + privproto = snmp.usmAesCfb128Protocol + elif privacy_protocol in ('des', None): + privproto = snmp.usmDESPrivProtocol + else: + raise exc.ConfluentException('Unsupported SNMPv3 privacy protocol ' + '{0}'.format(privacy_protocol)) self.authdata = snmp.UsmUserData( username, authKey=secret, privKey=secret, - authProtocol=snmp.usmHMACSHAAuthProtocol) + authProtocol=snmp.usmHMACSHAAuthProtocol, + privProtocol=privproto) self.eng = snmp.SnmpEngine() def walk(self, oid): From 9148a841b592cdab2c84075f858e77a501a3118d Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Thu, 13 Nov 2025 00:45:53 +0100 Subject: [PATCH 368/413] Add documentation for custom nodeattribs --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index c8127ad8..33c19675 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -24,6 +24,8 @@ For a full list of attributes, run `nodeattrib all` against a node. If `-c` is specified, this will set the nodeattribute to a null value. This is different from setting the value to an empty string. +Arbitrary custom attributes can also be created with the `custom.` prefix. + Attributes may be specified by wildcard, for example `net.*switch` will report all attributes that begin with `net.` and end with `switch`. From 53c918042adbc3d5203364bdfaa2c132b02796ed Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Nov 2025 14:28:25 -0500 Subject: [PATCH 369/413] Remove double-caching in tethered diskless By default, the squashfs file was being cached as well as the contents after extraction. This is superfluous pressure on the cache of the OS. However, it does help keep the image afloat through 'confignet', so leave it on until onboot completes, then reclaim cache and disable further caching. --- .../profiles/default/scripts/imageboot.sh | 13 +++++++++++++ .../profiles/default/scripts/imageboot.sh | 17 +++++++++++++++++ .../profiles/default/scripts/onboot.sh | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh index 3104444d..90e66de7 100644 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -45,6 +45,7 @@ memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) echo $memtot > /sys/block/zram0/disksize mkfs.xfs /dev/zram0 > /dev/null +TETHERED=0 if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then mount -o discard /dev/zram0 /sysroot echo -en "Decrypting and extracting root filesystem: 0%\r" @@ -69,6 +70,7 @@ if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then wait echo -e "Decrypting and extracting root filesystem: 100%" else + TETHERED=1 mount -o discard /dev/zram0 /mnt/overlay if [ ! -f /tmp/mountparts.sh ]; then mkdir -p /mnt/overlay/upper /mnt/overlay/work @@ -154,6 +156,17 @@ ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) +if [ $TETHERED -eq 1 ]; then + ( + sleep 86400 & + ONBOOTPID=$! + mkdir -p /sysroot/run/confluent + echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + wait $ONBOOTPID + losetup $loopdev --direct-io=on + dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + ) & +fi if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root else diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 3c83ff14..468d0a2d 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -45,6 +45,7 @@ memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) echo $memtot > /sys/block/zram0/disksize mkfs.xfs /dev/zram0 > /dev/null +TETHERED=0 if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then mount -o discard /dev/zram0 /sysroot echo -en "Decrypting and extracting root filesystem: 0%\r" @@ -69,6 +70,7 @@ if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then wait echo -e "Decrypting and extracting root filesystem: 100%" else + TETHERED=1 mount -o discard /dev/zram0 /mnt/overlay if [ ! -f /tmp/mountparts.sh ]; then mkdir -p /mnt/overlay/upper /mnt/overlay/work @@ -156,6 +158,21 @@ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware rm -f /sysroot/etc/dracut.conf.d/diskless.conf # remove diskless dracut from runtime, to make kdump happier kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) +if [ $TETHERED -eq 1 ]; then + # In tethered mode, the double-caching is useful to get through tricky part of + # onboot with confignet. After that, it's excessive cache usage. + # Give the onboot script a hook to have us come in and enable directio to the + # squashfs and drop the cache of the rootimg so far + ( + sleep 86400 & + ONBOOTPID=$! + mkdir -p /sysroot/run/confluent + echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + wait $ONBOOTPID + losetup $loopdev --direct-io=on + dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + ) & +fi if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root else diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh index b8a55cf1..33b2a6e4 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh @@ -62,5 +62,11 @@ run_remote_parts onboot.d # Induce execution of remote configuration, e.g. ansible plays in ansible/onboot.d/ run_remote_config onboot.d +if [ -f /run/confluent/onboot_sleep.pid ]; then + sleeppid=$(cat /run/confluent/onboot_sleep.pid) + kill "$sleeppid" + rm -f /run/confluent/onboot_sleep.pid +fi + #curl -X POST -d 'status: booted' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus kill $logshowpid From 58d5209595ae4a9701df1f2a8d471524312d7884 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Nov 2025 14:35:18 -0500 Subject: [PATCH 370/413] Port tethered improvments to EL8 --- .../profiles/default/scripts/imageboot.sh | 13 +++++++++++++ .../el8-diskless/profiles/default/scripts/onboot.sh | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index fe53bf38..0a3d336f 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -4,10 +4,12 @@ if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay +TETHERED=0 if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then mount -t tmpfs untethered /mnt/remoteimg curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs else + TETHERED=1 confluent_urls="$confluent_urls https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs" /opt/confluent/bin/urlmount $confluent_urls /mnt/remoteimg fi @@ -130,4 +132,15 @@ ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) +if [ $TETHERED -eq 1 ]; then + ( + sleep 86400 & + ONBOOTPID=$! + mkdir -p /sysroot/run/confluent + echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + wait $ONBOOTPID + losetup $loopdev --direct-io=on + dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + ) & +fi exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh index 0bc3777b..506225b1 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh @@ -68,5 +68,10 @@ run_remote_parts onboot.d # Induce execution of remote configuration, e.g. ansible plays in ansible/onboot.d/ run_remote_config onboot.d +if [ -f /run/confluent/onboot_sleep.pid ]; then + sleeppid=$(cat /run/confluent/onboot_sleep.pid) + kill "$sleeppid" + rm -f /run/confluent/onboot_sleep.pid +fi #curl -X POST -d 'status: booted' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus kill $logshowpid From e1efd6a9c5c1f994268b827fc160085d2d585e8d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Nov 2025 14:39:53 -0500 Subject: [PATCH 371/413] Implement new 'uncompressed' image method This allows the FS to just live, uncompressed, in cache. This is generally a bad idea, however: - In a hypothetically super-tuned diskless image, the lack of double-cache can offset the lack of compression - The image will have supreme read performance - It will have the most deterministic memory behavior --- .../profiles/default/scripts/imageboot.sh | 2 +- .../profiles/default/scripts/imageboot.sh | 22 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index 0a3d336f..bc0b8437 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -140,7 +140,7 @@ if [ $TETHERED -eq 1 ]; then echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid wait $ONBOOTPID losetup $loopdev --direct-io=on - dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null ) & fi exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 468d0a2d..f2b2176a 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -4,7 +4,7 @@ if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay /sysroot -if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then +if [ "untethered" = "$(getarg confluent_imagemethod)" -o "uncompressed" = "$(getarg confluent_imagemethod)" ]; then mount -t tmpfs untethered /mnt/remoteimg curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs else @@ -40,14 +40,20 @@ fi #mount -t tmpfs overlay /mnt/overlay -modprobe zram -memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') -memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) -echo $memtot > /sys/block/zram0/disksize -mkfs.xfs /dev/zram0 > /dev/null +if [ ! "uncompressed" = "$(getarg confluent_imagemethod)" ]; then + modprobe zram + memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') + memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) + echo $memtot > /sys/block/zram0/disksize + mkfs.xfs /dev/zram0 > /dev/null +fi TETHERED=0 -if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then - mount -o discard /dev/zram0 /sysroot +if [ "untethered" = "$(getarg confluent_imagemethod)" -o "uncompressed" = "$(getarg confluent_imagemethod)" ]; then + if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then + mount -o discard /dev/zram0 /sysroot + else + mount -t tmpfs disklessroot /sysroot + fi echo -en "Decrypting and extracting root filesystem: 0%\r" srcsz=$(du -sk /mnt/remote | awk '{print $1}') while [ -f /mnt/remoteimg/rootimg.sfs ]; do From 4484216198482f675ae48d9c7218ddeb90a96efa Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Nov 2025 15:24:26 -0500 Subject: [PATCH 372/413] Fix issues with the tethered memory optimizations --- .../profiles/default/scripts/imageboot.sh | 35 ++++++++++++------- .../profiles/default/scripts/imageboot.sh | 7 ++-- .../profiles/default/scripts/imageboot.sh | 20 ++++++++--- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh index 90e66de7..78041aa3 100644 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -4,7 +4,7 @@ if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay /sysroot -if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then +if [ "untethered" = "$(getarg confluent_imagemethod)" -o "uncompressed" = "$(getarg confluent_imagemethod)" ]; then mount -t tmpfs untethered /mnt/remoteimg curl https://$confluent_whost/confluent-public/os/$confluent_profile/rootimg.sfs -o /mnt/remoteimg/rootimg.sfs else @@ -40,14 +40,20 @@ fi #mount -t tmpfs overlay /mnt/overlay -modprobe zram -memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') -memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) -echo $memtot > /sys/block/zram0/disksize -mkfs.xfs /dev/zram0 > /dev/null +if [ ! "uncompressed" = "$(getarg confluent_imagemethod)" ]; then + modprobe zram + memtot=$(grep ^MemTotal: /proc/meminfo|awk '{print $2}') + memtot=$((memtot/2))$(grep ^MemTotal: /proc/meminfo | awk '{print $3'}) + echo $memtot > /sys/block/zram0/disksize + mkfs.xfs /dev/zram0 > /dev/null +fi TETHERED=0 -if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then - mount -o discard /dev/zram0 /sysroot +if [ "untethered" = "$(getarg confluent_imagemethod)" -o "uncompressed" = "$(getarg confluent_imagemethod)" ]; then + if [ "untethered" = "$(getarg confluent_imagemethod)" ]; then + mount -o discard /dev/zram0 /sysroot + else + mount -t tmpfs disklessroot /sysroot + fi echo -en "Decrypting and extracting root filesystem: 0%\r" srcsz=$(du -sk /mnt/remote | awk '{print $1}') while [ -f /mnt/remoteimg/rootimg.sfs ]; do @@ -160,17 +166,22 @@ if [ $TETHERED -eq 1 ]; then ( sleep 86400 & ONBOOTPID=$! - mkdir -p /sysroot/run/confluent - echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + mkdir -p /run/confluent + echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID - losetup $loopdev --direct-io=on + losetup /sysroot/$loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ ) & + while [ ! -f /run/confluent/onboot_sleep.pid ]; do + sleep 0.1 + done +else + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ fi if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root else - rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ exec /opt/confluent/bin/start_root -s # share mount namespace, keep kernel callbacks intact fi diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index bc0b8437..426df86c 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -136,11 +136,14 @@ if [ $TETHERED -eq 1 ]; then ( sleep 86400 & ONBOOTPID=$! - mkdir -p /sysroot/run/confluent - echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + mkdir -p /run/confluent + echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID losetup $loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null ) & + while [ ! -f /run/confluent/onboot_sleep.pid ]; do + sleep 0.1 + done fi exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index f2b2176a..c318c0b5 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -164,6 +164,11 @@ mv /lib/firmware /lib/firmware-ramfs ln -s /sysroot/lib/firmware /lib/firmware rm -f /sysroot/etc/dracut.conf.d/diskless.conf # remove diskless dracut from runtime, to make kdump happier kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) +if grep debugssh /proc/cmdline >& /dev/null; then + debugssh=1 +else + debugssh=0 +fi if [ $TETHERED -eq 1 ]; then # In tethered mode, the double-caching is useful to get through tricky part of # onboot with confignet. After that, it's excessive cache usage. @@ -172,16 +177,23 @@ if [ $TETHERED -eq 1 ]; then ( sleep 86400 & ONBOOTPID=$! - mkdir -p /sysroot/run/confluent - echo $ONBOOTPID > /sysroot/run/confluent/onboot_sleep.pid + mkdir -p /run/confluent + echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID - losetup $loopdev --direct-io=on + losetup /sysroot/$loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null + if [ $debugssh -eq 0 ]; then + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ + fi ) & + while [ ! -f /run/confluent/onboot_sleep.pid ]; do + sleep 0.1 + done +elif [ $debugssh -eq 0 ]; then + rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ fi if grep debugssh /proc/cmdline >& /dev/null; then exec /opt/confluent/bin/start_root else - rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ exec /opt/confluent/bin/start_root -s # share mount namespace, keep kernel callbacks intact fi From d20c5ac6eb236c9415636c3549b9ece96b653f17 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 13 Nov 2025 15:33:04 -0500 Subject: [PATCH 373/413] Move handling of the loop directio straight to onboot There were difficulties in the devfs after boot, just let the full system handle it. --- .../el10-diskless/profiles/default/scripts/imageboot.sh | 1 - .../el8-diskless/profiles/default/scripts/imageboot.sh | 1 - .../el8-diskless/profiles/default/scripts/onboot.sh | 4 ++++ .../el9-diskless/profiles/default/scripts/imageboot.sh | 1 - .../el9-diskless/profiles/default/scripts/onboot.sh | 4 ++++ 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh index 78041aa3..d6880ff4 100644 --- a/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el10-diskless/profiles/default/scripts/imageboot.sh @@ -169,7 +169,6 @@ if [ $TETHERED -eq 1 ]; then mkdir -p /run/confluent echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID - losetup /sysroot/$loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ ) & diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index 426df86c..38be47f7 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -139,7 +139,6 @@ if [ $TETHERED -eq 1 ]; then mkdir -p /run/confluent echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID - losetup $loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null ) & while [ ! -f /run/confluent/onboot_sleep.pid ]; do diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh index 506225b1..b6ebb48f 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh @@ -69,6 +69,10 @@ run_remote_parts onboot.d run_remote_config onboot.d if [ -f /run/confluent/onboot_sleep.pid ]; then + loopdev=$(losetup -j /mnt/remoteimg/rootimg.sfs|cut -d: -f 1) + if [ -n "$loopdev" ]; then + losetup "$loopdev" --direct-io=on + fi sleeppid=$(cat /run/confluent/onboot_sleep.pid) kill "$sleeppid" rm -f /run/confluent/onboot_sleep.pid diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index c318c0b5..b48ba87f 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -180,7 +180,6 @@ if [ $TETHERED -eq 1 ]; then mkdir -p /run/confluent echo $ONBOOTPID > /run/confluent/onboot_sleep.pid wait $ONBOOTPID - losetup /sysroot/$loopdev --direct-io=on dd if=/mnt/remoteimg/rootimg.sfs iflag=nocache count=0 >& /dev/null if [ $debugssh -eq 0 ]; then rm -rf /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs /lib/firmware-ramfs /usr/lib64/libcrypto.so* /usr/lib64/systemd/ /kernel/ /usr/bin/ /usr/sbin/ /usr/libexec/ diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh index 33b2a6e4..7f02e1f9 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh @@ -63,6 +63,10 @@ run_remote_parts onboot.d run_remote_config onboot.d if [ -f /run/confluent/onboot_sleep.pid ]; then + loopdev=$(losetup -j /mnt/remoteimg/rootimg.sfs|cut -d: -f 1) + if [ -n "$loopdev" ]; then + losetup "$loopdev" --direct-io=on + fi sleeppid=$(cat /run/confluent/onboot_sleep.pid) kill "$sleeppid" rm -f /run/confluent/onboot_sleep.pid From 1f688ead289aadbb01b397a5cda0ca1a9caa07bc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 14 Nov 2025 17:20:06 -0500 Subject: [PATCH 374/413] Implement .replace() for attribute expressions Provide an easy to use replace() to allow removing or substiting values during expression evaluation. --- .../confluent/config/configmanager.py | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 36187373..a5132e93 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1142,7 +1142,10 @@ class _ExpressionFormat(string.Formatter): # such as 'net.pxe.hwaddr' key = '.' + left.attr + key left = left.value - key = left.id + key + if isinstance(left, ast.Name): + key = left.id + key + else: + raise ValueError("Invalid AST structure: expected ast.Name at end of attribute chain") if (not key.startswith('custom.') and _get_valid_attrname(key) not in allattributes.node): raise ValueError( @@ -1203,6 +1206,34 @@ class _ExpressionFormat(string.Formatter): return strval[index] elif isinstance(node, ast.Constant): return node.value + elif isinstance(node, ast.Call): + key = '' + if isinstance(node.func, ast.Attribute): + fun_name = node.func.attr + left = node.func.value + while isinstance(left, ast.Attribute): + # Loop through, to handle multi dot expressions + # such as 'net.pxe.hwaddr' + key = '.' + left.attr + key + left = left.value + if isinstance(left, ast.Name): + key = left.id + key + else: + raise ValueError("Invalid AST structure: expected ast.Name at end of attribute chain") + else: + raise ValueError(f"Unsupported function in expression") + if fun_name == 'replace': + if len(node.args) != 2: + raise ValueError("Invalid number of arguments to replace") + arg1 = self._handle_ast_node(node.args[0]) + arg2 = self._handle_ast_node(node.args[1]) + if key in ('node', 'nodename'): + keyval = self._nodename + else: + keyval = self._expand_attribute(key).get('value', '') + return keyval.replace(arg1, arg2) + else: + raise ValueError("Unsupported function in expression") else: raise ValueError("Unrecognized expression syntax") From d3e7a49f9289f763c7bc0e31d79fa171b36f480d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sat, 15 Nov 2025 10:32:11 -0500 Subject: [PATCH 375/413] Simplify by recursion Use _handle_ast_node to process everything before the function name in an Attribute call --- .../confluent/config/configmanager.py | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index a5132e93..1810641a 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1208,30 +1208,18 @@ class _ExpressionFormat(string.Formatter): return node.value elif isinstance(node, ast.Call): key = '' + baseval = '' if isinstance(node.func, ast.Attribute): fun_name = node.func.attr - left = node.func.value - while isinstance(left, ast.Attribute): - # Loop through, to handle multi dot expressions - # such as 'net.pxe.hwaddr' - key = '.' + left.attr + key - left = left.value - if isinstance(left, ast.Name): - key = left.id + key - else: - raise ValueError("Invalid AST structure: expected ast.Name at end of attribute chain") + baseval = self._handle_ast_node(node.func.value) else: - raise ValueError(f"Unsupported function in expression") + raise ValueError("Invalid function call syntax in expression") if fun_name == 'replace': if len(node.args) != 2: raise ValueError("Invalid number of arguments to replace") arg1 = self._handle_ast_node(node.args[0]) arg2 = self._handle_ast_node(node.args[1]) - if key in ('node', 'nodename'): - keyval = self._nodename - else: - keyval = self._expand_attribute(key).get('value', '') - return keyval.replace(arg1, arg2) + return baseval.replace(arg1, arg2) else: raise ValueError("Unsupported function in expression") else: From 53760ab5ddb51ff138d6d783202e5fbba1d836b4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 17 Nov 2025 11:58:04 -0500 Subject: [PATCH 376/413] Attribute feature enhancement Add expression functions upper, lower, block_number, and block_offset. Add an 'id.index' auto-attribute to yield a number for nodes. --- .../confluent/collective/manager.py | 1 + .../confluent/config/attributes.py | 3 + .../confluent/config/configmanager.py | 119 +++++++++++++++++- 3 files changed, 120 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py index 2519cc39..cd099993 100644 --- a/confluent_server/confluent/collective/manager.py +++ b/confluent_server/confluent/collective/manager.py @@ -716,6 +716,7 @@ def become_leader(connection): if reassimilate is not None: reassimilate.kill() reassimilate = eventlet.spawn(reassimilate_missing) + cfm._init_indexes() cfm._ready = True if _assimilate_missing(skipaddr): schedule_rebalance() diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index dc5b8d40..84e527b2 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -450,6 +450,9 @@ node = { #IBM Flex)''', # 'appliesto': ['system'], # }, + 'id.index': { + 'description': 'Confluent generated numeric index for the node.', + }, 'id.model': { 'description': 'The model number of a node. In scenarios where there ' 'is both a name and a model number, it is generally ' diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 1810641a..fd8a97a7 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -164,6 +164,45 @@ def _mkpath(pathname): raise +def _count_freeindexes(freeindexes): + count = 0 + for idx in freeindexes: + if isinstance(idx, list): + for subidx in range(idx[0], idx[1] + 1): + count += 1 + else: + count += 1 + return count + +def _is_free_index(freeindexes, idx): + for freeidx in freeindexes: + if isinstance(freeidx, list): + if freeidx[0] <= idx <= freeidx[1]: + return True + else: + if freeidx == idx: + return True + return False + +def _remove_free_index(freeindexes, idx): + for i, freeidx in enumerate(freeindexes): + if isinstance(freeidx, list): + if freeidx[0] <= idx <= freeidx[1]: + if freeidx[0] == freeidx[1]: + del freeindexes[i] + elif freeidx[0] == idx: + freeindexes[i][0] += 1 + elif freeidx[1] == idx: + freeindexes[i][1] -= 1 + else: + freeindexes.insert(i + 1, [idx + 1, freeidx[1]]) + freeindexes[i][1] = idx - 1 + return + else: + if freeidx == idx: + del freeindexes[i] + return + def _derive_keys(password, salt): #implement our specific combination of pbkdf2 transforms to get at #key. We bump the iterations up because we can afford to @@ -1220,6 +1259,16 @@ class _ExpressionFormat(string.Formatter): arg1 = self._handle_ast_node(node.args[0]) arg2 = self._handle_ast_node(node.args[1]) return baseval.replace(arg1, arg2) + elif fun_name == 'upper': + return baseval.upper() + elif fun_name == 'lower': + return baseval.lower() + elif fun_name == 'block_number': + chunk_size = self._handle_ast_node(node.args[0]) + return (int(baseval) - 1) // chunk_size + 1 + elif fun_name == 'block_offset': + chunk_size = self._handle_ast_node(node.args[0]) + return (int(baseval) - 1) % chunk_size + 1 else: raise ValueError("Unsupported function in expression") else: @@ -2225,7 +2274,7 @@ class ConfigManager(object): watcher = self._nodecollwatchers[self.tenant][watcher] watcher(added=(), deleting=nodes, renamed=(), configmanager=self) changeset = {} - for node in nodes: + for node in confluent.util.natural_sort(nodes): # set a reserved attribute for the sake of the change notification # framework to trigger on changeset[node] = {'_nodedeleted': 1} @@ -2233,6 +2282,29 @@ class ConfigManager(object): if node in self._cfgstore['nodes']: self._sync_groups_to_node(node=node, groups=[], changeset=changeset) + nidx = self._cfgstore['nodes'][node].get('id.index', {}).get('value', None) + if nidx is not None: + currmaxidx = get_global('max_node_index') + freeindexes = get_global('free_node_indexes') + if not freeindexes: + freeindexes = [] + if nidx == currmaxidx - 1: + currmaxidx = currmaxidx - 1 + while _is_free_index(freeindexes, currmaxidx - 1): + _remove_free_index(freeindexes, currmaxidx - 1) + currmaxidx = currmaxidx - 1 + set_global('max_node_index', currmaxidx) + else: + lastindex = freeindexes[-1] if freeindexes else [-2, -2] + if not isinstance(lastindex, list): + lastindex = [lastindex, lastindex] + if nidx == lastindex[1] + 1: + lastindex[1] = nidx + if freeindexes: + freeindexes[-1] = lastindex + else: + freeindexes.append(nidx) + set_global('free_node_indexes', freeindexes) del self._cfgstore['nodes'][node] _mark_dirtykey('nodes', node, self.tenant) self._notif_attribwatchers(changeset) @@ -2510,12 +2582,29 @@ class ConfigManager(object): attrname, node) raise ValueError(errstr) attribmap[node][attrname] = attrval - for node in attribmap: + for node in confluent.util.natural_sort(attribmap): node = confluent.util.stringify(node) exprmgr = None if node not in self._cfgstore['nodes']: newnodes.append(node) - self._cfgstore['nodes'][node] = {} + freeindexes = get_global('free_node_indexes') + if not freeindexes: + freeindexes = [] + if _count_freeindexes(freeindexes) > 128: # tend to leave freed indexes disused until a lot have accumulated + if isinstance(freeindexes[0], list): + nidx = freeindexes[0][0] + freeindexes[0][0] = nidx + 1 + if freeindexes[0][0] == freeindexes[0][1]: + freeindexes[0] = freeindexes[0][0] + else: + nidx = freeindexes.pop(0) + set_global('free_node_indexes', freeindexes) + else: + nidx = get_global('max_node_index') + if nidx is None: + nidx = 0 + set_global('max_node_index', nidx + 1) + self._cfgstore['nodes'][node] = {'id.index': {'value': nidx}} cfgobj = self._cfgstore['nodes'][node] recalcexpressions = False for attrname in attribmap[node]: @@ -3148,6 +3237,29 @@ def get_globals(): bkupglobals[globvar] = _cfgstore['globals'][globvar] return bkupglobals +def _init_indexes(): + maxidx = get_global('max_node_index') + if maxidx is not None: + return + maxidx = 0 + maincfgstore = _cfgstore['main'] + nodes_without_index = [] + for node in confluent.util.natural_sort(maincfgstore.get('nodes', {})): + nidx = maincfgstore['nodes'][node].get('id.index', {}).get('value', None) + if nidx is not None: + if nidx >= maxidx: + maxidx = nidx + 1 + else: + nodes_without_index.append(node) + for node in nodes_without_index: + maincfgstore['nodes'][node]['id.index'] = {'value': maxidx} + maxidx += 1 + _mark_dirtykey('nodes', node, None) + set_global('max_node_index', maxidx) + set_global('free_node_indexes', []) + ConfigManager._bg_sync_to_file() + + def init(stateless=False): global _cfgstore global _ready @@ -3160,6 +3272,7 @@ def init(stateless=False): _cfgstore = {} members = list(list_collective()) if len(members) < 2: + _init_indexes() _ready = True From 61b07e0af4999e36ea313e43ba7e13a6db71a865 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 17 Nov 2025 12:05:03 -0500 Subject: [PATCH 377/413] Start index at 1 instead of 0 --- confluent_server/confluent/config/configmanager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index fd8a97a7..acf3937e 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -2602,7 +2602,7 @@ class ConfigManager(object): else: nidx = get_global('max_node_index') if nidx is None: - nidx = 0 + nidx = 1 set_global('max_node_index', nidx + 1) self._cfgstore['nodes'][node] = {'id.index': {'value': nidx}} cfgobj = self._cfgstore['nodes'][node] @@ -3241,7 +3241,7 @@ def _init_indexes(): maxidx = get_global('max_node_index') if maxidx is not None: return - maxidx = 0 + maxidx = 1 maincfgstore = _cfgstore['main'] nodes_without_index = [] for node in confluent.util.natural_sort(maincfgstore.get('nodes', {})): From 100944490cbbd0d214c3d987b7ffcf97fb021e2d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 17 Nov 2025 15:07:17 -0500 Subject: [PATCH 378/413] Fix potentially uninitialized curridx --- confluent_osdeploy/common/profile/scripts/confignet | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 41deed4c..c291fc02 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -488,6 +488,7 @@ if __name__ == '__main__': continue myname = s.getsockname() s.close() + curridx = None if len(myname) == 4: curridx = myname[-1] else: @@ -496,7 +497,7 @@ if __name__ == '__main__': for addr in myaddrs: if myname == addr[1].tobytes(): curridx = addr[-1] - if curridx in doneidxs: + if curridx is not None and curridx in doneidxs: continue for tries in (1, 2, 3): try: From 73216fc062cf8c28fb3f2099b234ca43ad80a68b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 18 Nov 2025 09:10:30 -0500 Subject: [PATCH 379/413] Fix architecture name mismatch Confluent went with aarch64 consistent with EL naming, but Ubuntu used debian naming, recognize and just handle that. --- confluent_server/confluent/osimage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 5f8f68ca..b09f6505 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -1137,6 +1137,8 @@ def generate_stock_profiles(defprofile, distpath, targpath, osname, initrds = ['{0}/initramfs/{1}'.format(defprofile, initrd) for initrd in os.listdir('{0}/initramfs'.format(defprofile))] if os.path.exists('{0}/initramfs/{1}'.format(defprofile, arch)): initrds.extend(['{0}/initramfs/{1}/{2}'.format(defprofile, arch, initrd) for initrd in os.listdir('{0}/initramfs/{1}'.format(defprofile, arch))]) + elif arch == 'arm64' and os.path.exists('{0}/initramfs/aarch64'.format(defprofile)): + initrds.extend(['{0}/initramfs/aarch64/{1}'.format(defprofile, initrd) for initrd in os.listdir('{0}/initramfs/aarch64'.format(defprofile))]) for fullpath in initrds: initrd = os.path.basename(fullpath) if os.path.isdir(fullpath): From 041008a5242fbc8d202af18ab25128479dd8727a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 19 Nov 2025 15:37:29 -0500 Subject: [PATCH 380/413] Remove redundant el10 initramfs fixup --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index ecc0c445..d1e63735 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -27,7 +27,6 @@ mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 ln -s el8 el10 -mv el10/initramfs/usr el10/initramfs/var cp -a debian debian13 mkdir -p debian13/initramfs/usr mv debian13/initramfs/lib debian13/initramfs/usr/ @@ -86,6 +85,9 @@ cp -a esxi7 esxi8 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ #cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ +mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/common +cp common/initramfs/opt/confluent/bin/apiclient %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/common + for os in rhvh4 el7 el8 el9 el10 debian debian13 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ From 4d2f36917ce8f3012546f135e4f9448d6febbc33 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Nov 2025 15:49:51 -0500 Subject: [PATCH 381/413] Restore useinsecureprotocols after adopt --- misc/adoptnode.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/misc/adoptnode.sh b/misc/adoptnode.sh index b4536608..5d74d521 100755 --- a/misc/adoptnode.sh +++ b/misc/adoptnode.sh @@ -9,6 +9,8 @@ if [ -z "$TARGPROF" ]; then echo "Target profile must be specified" exit 1 fi +OLDINSECURE=$(nodeattrib TARGNODE deployment.useinsecureprotocols -b 2> /dev/null |grep -v inherited|awk '{print $3}') +nodeattrib $TARGNODE deployment.useinsecureprotocols nodedefine $TARGNODE deployment.apiarmed=once deployment.profile=$TARGPROF deployment.useinsecureprotocols= deployment.pendingprofile=$TARGPROF cat /var/lib/confluent/public/site/ssh/*pubkey | ssh $TARGNODE "mkdir -p /root/.ssh/; cat - >> /root/.ssh/authorized_keys" ssh $TARGNODE mkdir -p /etc/confluent /opt/confluent/bin @@ -20,5 +22,8 @@ scp finalizeadopt.sh $TARGNODE:/tmp/ ssh $TARGNODE bash /tmp/prepadopt.sh $TARGNODE $TARGPROF nodeattrib $TARGNODE deployment.pendingprofile= nodeattrib $TARGNODE -c deployment.useinsecureprotocols +if [ ! -z "$OLDINSECURE" ]; then + nodeattrib $TARGNODE $OLDINSECURE +fi nodeapply $TARGNODE -k ssh $TARGNODE sh /tmp/finalizeadopt.sh From 4f75d4942bc012982e1be67578d38b9f2881ba51 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Nov 2025 16:05:22 -0500 Subject: [PATCH 382/413] Modify adoption process: Restore useinsecureprotocols if set directly on node Switch from pxe-style to identity-file based node api token for hardened node authentication --- misc/adoptnode.sh | 12 ++++++++---- misc/prepadopt.sh | 7 +++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/misc/adoptnode.sh b/misc/adoptnode.sh index 5d74d521..887ddf4b 100755 --- a/misc/adoptnode.sh +++ b/misc/adoptnode.sh @@ -9,9 +9,13 @@ if [ -z "$TARGPROF" ]; then echo "Target profile must be specified" exit 1 fi -OLDINSECURE=$(nodeattrib TARGNODE deployment.useinsecureprotocols -b 2> /dev/null |grep -v inherited|awk '{print $3}') -nodeattrib $TARGNODE deployment.useinsecureprotocols -nodedefine $TARGNODE deployment.apiarmed=once deployment.profile=$TARGPROF deployment.useinsecureprotocols= deployment.pendingprofile=$TARGPROF +OLDINSECURE=$(nodeattrib $TARGNODE deployment.useinsecureprotocols -b 2> /dev/null |grep -v inherited|awk '{print $3}') +nodedefine $TARGNODE deployment.profile=$TARGPROF deployment.useinsecureprotocols= deployment.pendingprofile=$TARGPROF +confetty set /nodes/$TARGNODE/deployment/ident_image=create +REMTMP=$(ssh $TARGNODE $(mktemp -d)) +scp /var/lib/confluent/private/identity_files/$TARGNODE.json $TARGNODE:$REMTMP +rm /var/lib/confluent/private/identity_files/$TARGNODE.* +rm /var/lib/confluent/private/identity_images/$TARGNODE.* cat /var/lib/confluent/public/site/ssh/*pubkey | ssh $TARGNODE "mkdir -p /root/.ssh/; cat - >> /root/.ssh/authorized_keys" ssh $TARGNODE mkdir -p /etc/confluent /opt/confluent/bin cat /var/lib/confluent/public/site/tls/*.pem | ssh $TARGNODE "cat - >> /etc/confluent/ca.pem" @@ -19,7 +23,7 @@ cat /var/lib/confluent/public/site/tls/*.pem | ssh $TARGNODE "cat - >> /etc/pki/ nodeattrib $TARGNODE id.uuid=$(ssh $TARGNODE cat /sys/devices/virtual/dmi/id/product_uuid) scp prepadopt.sh $TARGNODE:/tmp/ scp finalizeadopt.sh $TARGNODE:/tmp/ -ssh $TARGNODE bash /tmp/prepadopt.sh $TARGNODE $TARGPROF +ssh $TARGNODE bash /tmp/prepadopt.sh $TARGNODE $TARGPROF $REMTMP/$TARGNODE.json nodeattrib $TARGNODE deployment.pendingprofile= nodeattrib $TARGNODE -c deployment.useinsecureprotocols if [ ! -z "$OLDINSECURE" ]; then diff --git a/misc/prepadopt.sh b/misc/prepadopt.sh index b47602d8..19046d3e 100644 --- a/misc/prepadopt.sh +++ b/misc/prepadopt.sh @@ -1,6 +1,7 @@ #!/bin/bash TARGNODE=$1 TARGPROF=$2 +TARGIDENT=$3 TMPDIR=$(mktemp -d) cd $TMPDIR DEPLOYSRV=$(echo $SSH_CLIENT|awk '{print $1}') @@ -16,10 +17,12 @@ cpio -dumi < addons.cpio systemctl status firewalld >& /dev/null && FWACTIVE=1 if [ "$FWACTIVE" == 1 ]; then systemctl stop firewalld; fi opt/confluent/bin/copernicus > /etc/confluent/confluent.info -opt/confluent/bin/clortho $TARGNODE $DEPLOYSRV > /etc/confluent/confluent.apikey +#opt/confluent/bin/clortho $TARGNODE $DEPLOYSRV > /etc/confluent/confluent.apikey +. /etc/confluent/functions +confluentpython opt/confluent/bin/apiclient -i $TAGRIDENT /confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg if [ "$FWACTIVE" == 1 ]; then systemctl start firewalld; fi cp opt/confluent/bin/apiclient /opt/confluent/bin -curl -sg -H "CONFLUENT_APIKEY: $(cat /etc/confluent/confluent.apikey)" -H "CONFLUENT_NODENAME: $TARGNODE" https://$UDEPLOYSRV/confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg +#curl -sg -H "CONFLUENT_APIKEY: $(cat /etc/confluent/confluent.apikey)" -H "CONFLUENT_NODENAME: $TARGNODE" https://$UDEPLOYSRV/confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg # python3 /opt/confluent/bin/apiclient /confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg cd - echo rm -rf $TMPDIR From a3b768c70f2aac4437180ca7f428409795015ad2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Nov 2025 16:44:24 -0500 Subject: [PATCH 383/413] Draft bluefield deploymeent facilities --- confluent_osdeploy/bluefield/bfb-autoinstall | 74 +++++++++++ .../bluefield/hostscripts/bfb-autoinstall | 74 +++++++++++ .../profiles/default/bluefield.cfg.template | 71 ++++++++++ .../bluefield/profiles/default/nodedeploy-bfb | 125 ++++++++++++++++++ 4 files changed, 344 insertions(+) create mode 100644 confluent_osdeploy/bluefield/bfb-autoinstall create mode 100644 confluent_osdeploy/bluefield/hostscripts/bfb-autoinstall create mode 100644 confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template create mode 100644 confluent_osdeploy/bluefield/profiles/default/nodedeploy-bfb diff --git a/confluent_osdeploy/bluefield/bfb-autoinstall b/confluent_osdeploy/bluefield/bfb-autoinstall new file mode 100644 index 00000000..32ff47ed --- /dev/null +++ b/confluent_osdeploy/bluefield/bfb-autoinstall @@ -0,0 +1,74 @@ +#!/usr/bin/python3 +import glob +import gzip +import base64 +import os +import subprocess +import sys +import tempfile + +def collect_certificates(tmpdir): + certdata = '' + for cacert in glob.glob(f'{tmpdir}/*.pem'): + with open(cacert, 'r') as f: + certdata += f.read() + return certdata + +def embed_certificates(incfg, certdata): + if not certdata: + raise Exception('No certificates found to embed') + incfg = incfg.replace('%CONFLUENTCERTCOLL%', certdata) + return incfg + +def embed_identity(incfg, identityjson): + incfg = incfg.replace('%IDENTJSON%', identityjson) + return incfg + +def embed_apiclient(incfg, apiclient): + with open(apiclient, 'r') as f: + apiclientdata = f.read() + compressed = gzip.compress(apiclientdata.encode()) + encoded = base64.b64encode(compressed).decode() + incfg = incfg.replace('%APICLIENTZ64%', encoded) + return incfg + +def embed_data(tmpdir, outfile): + templatefile = f'{tmpdir}/bfb.cfg.template' + with open(templatefile, 'r') as f: + incfg = f.read() + + certdata = collect_certificates(tmpdir) + incfg = embed_certificates(incfg, certdata) + + with open(f'{tmpdir}/identity.json', 'r') as f: + identityjson = f.read() + + incfg = embed_identity(incfg, identityjson) + + incfg = embed_apiclient(incfg, f'{tmpdir}/../apiclient') + + with open(outfile, 'w') as f: + f.write(incfg) + +def get_identity_json(node): + identity_file = f'/var/lib/confluent/private/site/identity_files/{node}.json' + try: + with open(identity_file, 'r') as f: + return f.read() + except FileNotFoundError: + return None + +if __name__ == '__main__': + if len(sys.argv) != 4: + print("Usage: bfb-autoinstall ") + sys.exit(1) + + node = sys.argv[1] + bfbfile = sys.argv[2] + rshim = sys.argv[3] + + os.chdir(os.path.dirname(os.path.abspath(__file__))) + currdir = os.getcwd() + tempdir = tempfile.mkdtemp(prefix=f'bfb-autoinstall-{node}-') + embed_data(f'{currdir}/{node}', f'{tempdir}/bfb.cfg') + subprocess.check_call(['bfb-install', '-b', bfbfile, '-c', f'{tempdir}/bfb.cfg', '-r', rshim]) diff --git a/confluent_osdeploy/bluefield/hostscripts/bfb-autoinstall b/confluent_osdeploy/bluefield/hostscripts/bfb-autoinstall new file mode 100644 index 00000000..32ff47ed --- /dev/null +++ b/confluent_osdeploy/bluefield/hostscripts/bfb-autoinstall @@ -0,0 +1,74 @@ +#!/usr/bin/python3 +import glob +import gzip +import base64 +import os +import subprocess +import sys +import tempfile + +def collect_certificates(tmpdir): + certdata = '' + for cacert in glob.glob(f'{tmpdir}/*.pem'): + with open(cacert, 'r') as f: + certdata += f.read() + return certdata + +def embed_certificates(incfg, certdata): + if not certdata: + raise Exception('No certificates found to embed') + incfg = incfg.replace('%CONFLUENTCERTCOLL%', certdata) + return incfg + +def embed_identity(incfg, identityjson): + incfg = incfg.replace('%IDENTJSON%', identityjson) + return incfg + +def embed_apiclient(incfg, apiclient): + with open(apiclient, 'r') as f: + apiclientdata = f.read() + compressed = gzip.compress(apiclientdata.encode()) + encoded = base64.b64encode(compressed).decode() + incfg = incfg.replace('%APICLIENTZ64%', encoded) + return incfg + +def embed_data(tmpdir, outfile): + templatefile = f'{tmpdir}/bfb.cfg.template' + with open(templatefile, 'r') as f: + incfg = f.read() + + certdata = collect_certificates(tmpdir) + incfg = embed_certificates(incfg, certdata) + + with open(f'{tmpdir}/identity.json', 'r') as f: + identityjson = f.read() + + incfg = embed_identity(incfg, identityjson) + + incfg = embed_apiclient(incfg, f'{tmpdir}/../apiclient') + + with open(outfile, 'w') as f: + f.write(incfg) + +def get_identity_json(node): + identity_file = f'/var/lib/confluent/private/site/identity_files/{node}.json' + try: + with open(identity_file, 'r') as f: + return f.read() + except FileNotFoundError: + return None + +if __name__ == '__main__': + if len(sys.argv) != 4: + print("Usage: bfb-autoinstall ") + sys.exit(1) + + node = sys.argv[1] + bfbfile = sys.argv[2] + rshim = sys.argv[3] + + os.chdir(os.path.dirname(os.path.abspath(__file__))) + currdir = os.getcwd() + tempdir = tempfile.mkdtemp(prefix=f'bfb-autoinstall-{node}-') + embed_data(f'{currdir}/{node}', f'{tempdir}/bfb.cfg') + subprocess.check_call(['bfb-install', '-b', bfbfile, '-c', f'{tempdir}/bfb.cfg', '-r', rshim]) diff --git a/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template b/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template new file mode 100644 index 00000000..5f7b6a08 --- /dev/null +++ b/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template @@ -0,0 +1,71 @@ +function bfb_modify_os() { + echo 'ubuntu:!' | chpasswd -e + mkdir -p /mnt/opt/confluent/bin/ + cat > /mnt/opt/confluent/bin/confluentbootstrap.sh << 'END_OF_EMBED' +#!/bin/bash + cat > /usr/local/share/ca-certificates/confluent.crt << 'END_OF_CERTS' +%CONFLUENTCERTCOLL% +END_OF_CERTS + update-ca-certificates + mkdir -p /opt/confluent/bin /etc/confluent/ + cp /usr/local/share/ca-certificates/confluent.crt /etc/confluent/ca.pem + cat > /opt/confluent/bin/apiclient.gz.b64 << 'END_OF_CLIENT' +%APICLIENTZ64% +END_OF_CLIENT + base64 -d /opt/confluent/bin/apiclient.gz.b64 | gunzip > /opt/confluent/bin/apiclient + cat > /etc/confluent/ident.json << 'END_OF_IDENT' +%IDENTJSON% +END_OF_IDENT + python3 /opt/confluent/bin/apiclient -i /etc/confluent/ident.json /confluent-api/self/deploycfg2 > /etc/confluent/confluent.deploycfg + PROFILE=$(grep ^profile: /etc/confluent/confluent.deploycfg |awk '{print $2}') + ROOTPASS=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg | awk '{print $2}'|grep -v null) + if [ -n "$ROOTPASS" ]; then + echo root:$ROOTPASS | chpasswd -e + echo "ubuntu:$ROOTPASS" | chpasswd -e + else + echo 'ubuntu:!' | chpasswd -e + fi + python3 /opt/confluent/bin/apiclient /confluent-public/os/$PROFILE/scripts/functions > /etc/confluent/functions + touch /etc/confluent/confluent.deploycfg + bash /etc/confluent/functions run_remote_python confignet + bash /etc/confluent/functions run_remote setupssh + for cert in /etc/ssh/ssh*-cert.pub; do + if [ -s $cert ]; then + echo HostCertificate $cert >> /etc/ssh/sshd_config.d/90-confluent.conf + fi + done + mkdir -p /var/log/confluent + chmod 700 /var/log/confluent + touch /var/log/confluent/confluent-firstboot.log + touch /var/log/confluent/confluent-post.log + chmod 600 /var/log/confluent/confluent-post.log + chmod 600 /var/log/confluent/confluent-firstboot.log + exec >> /var/log/confluent/confluent-post.log + exec 2>> /var/log/confluent/confluent-post.log + bash /etc/confluent/functions run_remote_python syncfileclient + bash /etc/confluent/functions run_remote_parts post.d + bash /etc/confluent/functions run_remote_config post.d + exec >> /var/log/confluent/confluent-firstboot.log + exec 2>> /var/log/confluent/confluent-firstboot.log + bash /etc/confluent/functions run_remote_parts firstboot.d + bash /etc/confluent/functions run_remote_config firstboot.d + python3 /opt/confluent/bin/apiclient /confluent-api/self/updatestatus -d 'status: staged' + python3 /opt/confluent/bin/apiclient /confluent-api/self/updatestatus -d 'status: complete' + systemctl disable confluentbootstrap + rm /etc/systemd/system/confluentbootstrap.service +END_OF_EMBED + chmod +x /mnt/opt/confluent/bin/confluentbootstrap.sh + cat > /mnt/etc/systemd/system/confluentbootstrap.service << EOS +[Unit] +Description=First Boot Process +Requires=network-online.target +After=network-online.target + +[Service] +ExecStart=/opt/confluent/bin/confluentbootstrap.sh + +[Install] +WantedBy=multi-user.target +EOS + chroot /mnt systemctl enable confluentbootstrap +} \ No newline at end of file diff --git a/confluent_osdeploy/bluefield/profiles/default/nodedeploy-bfb b/confluent_osdeploy/bluefield/profiles/default/nodedeploy-bfb new file mode 100644 index 00000000..33d04cb2 --- /dev/null +++ b/confluent_osdeploy/bluefield/profiles/default/nodedeploy-bfb @@ -0,0 +1,125 @@ +#!/usr/bin/python3 + +import os +import sys +import tempfile +import glob +import shutil +import shlex +import subprocess +import select + +sys.path.append('/opt/lib/confluent/python') + +import confluent.sortutil as sortutil +import confluent.client as client + + +def prep_outdir(node): + tmpdir = tempfile.mkdtemp() + for certfile in glob.glob('/var/lib/confluent/public/site/tls/*.pem'): + basename = os.path.basename(certfile) + destfile = os.path.join(tmpdir, basename) + shutil.copy2(certfile, destfile) + subprocess.check_call(shlex.split(f'confetty set /nodes/{node}/deployment/ident_image=create')) + shutil.copy2(f'/var/lib/confluent/private/identity_files/{node}.json', os.path.join(tmpdir, 'identity.json')) + return tmpdir + +def exec_bfb_install(host, nodetorshim, bfbfile, installprocs, pipedesc, all, poller): + remotedir = subprocess.check_output(shlex.split(f'ssh {host} mktemp -d /tmp/bfb.XXXXXX')).decode().strip() + bfbbasename = os.path.basename(bfbfile) + subprocess.check_call(shlex.split(f'rsync -avz --info=progress2 {bfbfile} {host}:{remotedir}/{bfbbasename}')) + subprocess.check_call(shlex.split(f'rsync -avc --info=progress2 /opt/lib/confluent/osdeploy/bluefield/hostscripts/ {host}:{remotedir}/')) + for node in nodetorshim: + rshim = nodetorshim[node] + nodeoutdir = prep_outdir(node) + nodeprofile = subprocess.check_output(shlex.split(f'nodeattrib {node} deployment.pendingprofile')).decode().strip().split(':', 2)[2].strip() + shutil.copy2(f'/var/lib/confluent/public/os/{nodeprofile}/bfb.cfg.template', os.path.join(nodeoutdir, 'bfb.cfg.template')) + subprocess.check_call(shlex.split(f'rsync -avz {nodeoutdir}/ {host}:{remotedir}/{node}/')) + shutil.rmtree(nodeoutdir) + run_cmdv(node, shlex.split(f'ssh {host} sh /etc/confluent/functions confluentpython {remotedir}/bfb-autoinstall {node} {remotedir}/{bfbbasename} {rshim}'), all, poller, pipedesc) + + +def run_cmdv(node, cmdv, all, poller, pipedesc): + try: + nopen = subprocess.Popen( + cmdv, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except OSError as e: + if e.errno == 2: + sys.stderr.write('{0}: Unable to find local executable file "{1}"\n'.format(node, cmdv[0])) + return + raise + pipedesc[nopen.stdout.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stdout', 'file': nopen.stdout} + pipedesc[nopen.stderr.fileno()] = {'node': node, 'popen': nopen, + 'type': 'stderr', 'file': nopen.stderr} + all.add(nopen.stdout) + poller.register(nopen.stdout, select.EPOLLIN) + all.add(nopen.stderr) + poller.register(nopen.stderr, select.EPOLLIN) + +if __name__ == '__main__': + + + if len(sys.argv) < 3: + print(f'Usage: {sys.argv[0]} [ ...]') + sys.exit(1) + + host = sys.argv[1] + bfbfile = sys.argv[2] + nodetorshim = {} + for arg in sys.argv[3:]: + node, rshim = arg.split(':') + nodetorshim[node] = rshim + + installprocs = {} + pipedesc = {} + all = set() + poller = select.epoll() + + exec_bfb_install(host, nodetorshim, bfbfile, installprocs, pipedesc, all, poller) + rdy = poller.poll(10) + pendingexecs = [] + exitcode = 0 + while all: + pernodeout = {} + for r in rdy: + r = r[0] + desc = pipedesc[r] + r = desc['file'] + node = desc['node'] + data = True + singlepoller = select.epoll() + singlepoller.register(r, select.EPOLLIN) + while data and singlepoller.poll(0): + data = r.readline() + if data: + if desc['type'] == 'stdout': + if node not in pernodeout: + pernodeout[node] = [] + pernodeout[node].append(data) + else: + data = client.stringify(data) + sys.stderr.write('{0}: {1}'.format(node, data)) + sys.stderr.flush() + else: + pop = desc['popen'] + ret = pop.poll() + if ret is not None: + exitcode = exitcode | ret + all.discard(r) + poller.unregister(r) + r.close() + if desc['type'] == 'stdout' and pendingexecs: + node, cmdv = pendingexecs.popleft() + run_cmdv(node, cmdv, all, poller, pipedesc) + singlepoller.close() + for node in sortutil.natural_sort(pernodeout): + for line in pernodeout[node]: + line = client.stringify(line) + sys.stdout.write('{0}: {1}'.format(node, line)) + sys.stdout.flush() + if all: + rdy = poller.poll(10) + + From ec39de3df0737d67b941d40c2c01c56e61789d80 Mon Sep 17 00:00:00 2001 From: Markus Hilger Date: Fri, 21 Nov 2025 14:16:07 +0100 Subject: [PATCH 384/413] Add bond alias to team description --- confluent_server/confluent/config/attributes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 84e527b2..0e02d28a 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -478,17 +478,17 @@ node = { 'the discovery process to decide where to place the mac address of a detected PXE nic.', }, 'net.connection_name': { - 'description': 'Name to use when specifiying a name for connection and/or interface name for a team. This may be the name of a team interface, ' + 'description': 'Name to use when specifiying a name for connection and/or interface name for a team/bond. This may be the name of a team/bond interface, ' 'the connection name in network manager for the interface, or may be installed as an altname ' 'as supported by the respective OS deployment profiles. Default is to accept default name for ' - 'a team consistent with the respective OS, or to use the matching original port name as connection name.' + 'a team/bond consistent with the respective OS, or to use the matching original port name as connection name.' }, 'net.interface_names': { 'description': 'Interface name or comma delimited list of names to match for this interface. It is generally recommended ' 'to leave this blank unless needing to set up interfaces that are not on a common subnet with a confluent server, ' 'as confluent servers provide autodetection for matching the correct network definition to an interface. ' 'This would be the default name per the deployed OS and can be a comma delimited list to denote members of ' - 'a team or a single interface for VLAN/PKEY connections.' + 'a team/bond or a single interface for VLAN/PKEY connections.' }, 'net.mtu': { 'description': 'MTU to apply to this connection', @@ -574,7 +574,7 @@ node = { 'operating system', }, 'net.team_mode': { - 'description': 'Indicates that this interface should be a team and what mode or runner to use when teamed. ' + 'description': 'Indicates that this interface should be a team/bond and what mode or runner to use when teamed or bonded. ' 'If this covers a deployment interface, one of the member interfaces may be brought up as ' 'a standalone interface until deployment is complete, as supported by the OS deployment profile. ' 'To support this scenario, the switch should be set up to allow independent operation of member ports (e.g. lacp bypass mode or fallback mode).', From 224f34905308c300cd3fe7e479acbed743807613 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Nov 2025 11:51:07 -0500 Subject: [PATCH 385/413] Extend autocons to more use cases If SPCR comes up blank, see if there is one and exactly one serial with carrier detect Failing that, give DMI a chance to indicate a preference, for now just SuperMicro, since they have the most inconsistent carrier detect behavior but almost always consider ttyS1 to be the answer. --- confluent_osdeploy/utils/autocons.c | 172 +++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 27 deletions(-) diff --git a/confluent_osdeploy/utils/autocons.c b/confluent_osdeploy/utils/autocons.c index d3ac0b9a..af3bf7f1 100644 --- a/confluent_osdeploy/utils/autocons.c +++ b/confluent_osdeploy/utils/autocons.c @@ -7,6 +7,7 @@ #include #include #include +#include #define COM1 0x3f8 #define COM2 0x2f8 @@ -19,6 +20,131 @@ #define SPEED57600 6 #define SPEED115200 7 +typedef struct { + char devnode[32]; + speed_t speed; + int valid; +} serial_port_t; + +serial_port_t process_spcr() { + serial_port_t result = {0}; + char buff[128]; + int fd; + uint64_t address; + int currspeed; + + result.valid = 0; + + fd = open("/sys/firmware/acpi/tables/SPCR", O_RDONLY); + if (fd < 0) { + return result; + } + + if (read(fd, buff, 80) < 80) { + close(fd); + return result; + } + close(fd); + + if (buff[8] != 2) return result; // revision 2 + if (buff[36] != 0) return result; // 16550 only + if (buff[40] != 1) return result; // IO only + + address = *(uint64_t *)(buff + 44); + currspeed = buff[58]; + + if (address == COM1) { + strncpy(result.devnode, "/dev/ttyS0", sizeof(result.devnode)); + } else if (address == COM2) { + strncpy(result.devnode, "/dev/ttyS1", sizeof(result.devnode)); + } else if (address == COM3) { + strncpy(result.devnode, "/dev/ttyS2", sizeof(result.devnode)); + } else if (address == COM4) { + strncpy(result.devnode, "/dev/ttyS3", sizeof(result.devnode)); + } else { + return result; + } + + if (currspeed == SPEED9600) { + result.speed = B9600; + } else if (currspeed == SPEED19200) { + result.speed = B19200; + } else if (currspeed == SPEED57600) { + result.speed = B57600; + } else if (currspeed == SPEED115200) { + result.speed = B115200; + } else { + return result; + } + + result.valid = 1; + return result; +} + +serial_port_t identify_by_sys_vendor() { + serial_port_t result = {0}; + char buff[128]; + FILE *f; + + f = fopen("/sys/devices/virtual/dmi/id/sys_vendor", "r"); + if (f) { + if (fgets(buff, sizeof(buff), f)) { + if (strstr(buff, "Supermicro")) { + strncpy(result.devnode, "/dev/ttyS1", sizeof(result.devnode)); + result.speed = B115200; + result.valid = 1; + } + } + fclose(f); + } + return result; +} + +serial_port_t search_serial_ports() { + serial_port_t result = {0}; + DIR *dir; + struct dirent *entry; + int fd; + int status; + int numfound= 0; + + dir = opendir("/dev"); + if (!dir) { + return result; + } + + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, "ttyS", 4) != 0) { + continue; + } + + char devpath[64]; + snprintf(devpath, sizeof(devpath), "/dev/%s", entry->d_name); + + fd = open(devpath, O_RDWR | O_NOCTTY | O_NONBLOCK); + if (fd < 0) { + continue; + } + + if (ioctl(fd, TIOCMGET, &status) == 0) { + if (status & TIOCM_CAR) { + strncpy(result.devnode, devpath, sizeof(result.devnode)); + numfound++; + result.speed = B115200; + + } + } + + close(fd); + } + + closedir(dir); + if (numfound == 1) { + result.valid = 1; + } + return result; +} + int main(int argc, char* argv[]) { struct termios tty; struct termios tty2; @@ -36,46 +162,38 @@ int main(int argc, char* argv[]) { char* offset; uint64_t address; bufflen = 0; - tmpi = open("/sys/firmware/acpi/tables/SPCR", O_RDONLY); - if (tmpi < 0) { - exit(0); - } - if (read(tmpi, buff, 80) < 80) { - exit(0); - } - close(tmpi); - if (buff[8] != 2) exit(0); //revision 2 - if (buff[36] != 0) exit(0); //16550 only - if (buff[40] != 1) exit(0); //IO only - address = *(uint64_t *)(buff + 44); - currspeed = buff[58]; - offset = buff + 10; - if (address == COM1) { - strncpy(buff, "/dev/ttyS0", 128); - } else if (address == COM2) { - strncpy(buff, "/dev/ttyS1", 128); - } else if (address == COM3) { - strncpy(buff, "/dev/ttyS2", 128); - } else if (address == COM4) { - strncpy(buff, "/dev/ttyS3", 128); - } else { + #ifndef __x86_64__ + // Only x86 needs autoconsole, other platforms have reasonable default serial console + exit(0); + #endif + serial_port_t spcr = process_spcr(); + if (!spcr.valid) { + spcr = search_serial_ports(); + } + if (!spcr.valid) { + spcr = identify_by_sys_vendor(); + } + if (!spcr.valid) { exit(0); } + strncpy(buff, spcr.devnode, sizeof(buff)); + offset = strchr(buff, 0); + currspeed = spcr.speed; ttyf = open(buff, O_RDWR | O_NOCTTY); if (ttyf < 0) { fprintf(stderr, "Unable to open tty\n"); exit(1); } - if (currspeed == SPEED9600) { + if (currspeed == B9600) { cspeed = B9600; strncpy(offset, ",9600", 6); - } else if (currspeed == SPEED19200) { + } else if (currspeed == B19200) { cspeed = B19200; strncpy(offset, ",19200", 7); - } else if (currspeed == SPEED57600) { + } else if (currspeed == B57600) { cspeed = B57600; strncpy(offset, ",57600", 7); - } else if (currspeed == SPEED115200) { + } else if (currspeed == B115200) { cspeed = B115200; strncpy(offset, ",115200", 8); } else { From 3cbac38d5799ed33504d9bbcb72d3b509faa9c48 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Nov 2025 11:53:50 -0500 Subject: [PATCH 386/413] Also autoconsole when exactly one serial port is detected at all. --- confluent_osdeploy/utils/autocons.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/utils/autocons.c b/confluent_osdeploy/utils/autocons.c index af3bf7f1..83f608de 100644 --- a/confluent_osdeploy/utils/autocons.c +++ b/confluent_osdeploy/utils/autocons.c @@ -107,6 +107,7 @@ serial_port_t search_serial_ports() { int fd; int status; int numfound= 0; + int numpossible = 0; dir = opendir("/dev"); if (!dir) { @@ -127,6 +128,11 @@ serial_port_t search_serial_ports() { } if (ioctl(fd, TIOCMGET, &status) == 0) { + numpossible++; + if (numfound < 1) { + strncpy(result.devnode, devpath, sizeof(result.devnode)); + result.speed = B115200; + } if (status & TIOCM_CAR) { strncpy(result.devnode, devpath, sizeof(result.devnode)); numfound++; @@ -139,7 +145,7 @@ serial_port_t search_serial_ports() { } closedir(dir); - if (numfound == 1) { + if (numfound == 1 || numpossible == 1) { result.valid = 1; } return result; From 12d886a4f6797a9097ef50c41a2645a827015b4b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Nov 2025 13:19:03 -0500 Subject: [PATCH 387/413] Add more imgutil documentation --- confluent_client/doc/man/imgutil.ronn | 139 ++++++++++++++++++++++++++ imgutil/imgutil | 2 +- 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 confluent_client/doc/man/imgutil.ronn diff --git a/confluent_client/doc/man/imgutil.ronn b/confluent_client/doc/man/imgutil.ronn new file mode 100644 index 00000000..939cef09 --- /dev/null +++ b/confluent_client/doc/man/imgutil.ronn @@ -0,0 +1,139 @@ +# imgutil(1) -- Work with confluent OS cloning and diskless images + +## SYNOPSIS + +`imgutil` `build` [] +`imgutil` `exec` [] [...] +`imgutil` `unpack` +`imgutil` `pack` [] +`imgutil` `capture` + +## DESCRIPTION + +**imgutil** is a utility for creating, managing, and deploying OS images for diskless boot and system cloning in a Confluent environment. It supports building images from scratch, capturing images from running systems, and packing/unpacking diskless profiles. + +## COMMANDS + +* `build`: + Build a new diskless image from scratch in the specified scratch directory. + +* `exec`: + Start the specified scratch directory as a container and optionally run a command inside it. + +* `unpack`: + Unpack a diskless image profile to a scratch directory for modification. + +* `pack`: + Pack a scratch directory into a diskless profile that can be deployed. + +* `capture`: + Capture an image for cloning from a running system. + +## BUILD OPTIONS + +* `-r`, `--addrepos` : + Repositories to add in addition to the main source. May be specified multiple times. + +* `-p`, `--packagelist` : + Filename of package list to replace default pkglist. + +* `-a`, `--addpackagelist` : + A list of additional packages to include. May be specified multiple times. + +* `-s`, `--source` : + Directory to pull installation from, typically a subdirectory of `/var/lib/confluent/distributions`. By default, the repositories for the build system are used. For Ubuntu, this is not supported; the build system repositories are always used. + +* `-y`, `--non-interactive`: + Avoid prompting for confirmation. + +* `-v`, `--volume` : + Directory to make available in the build environment. `-v /` will cause it to be mounted in image as `/run/external/`. `-v /:/run/root` will override the target to be `/run/root`. Something like `/var/lib/repository:-` will cause it to mount to the identical path inside the image. May be specified multiple times. + +* : + Directory to build new diskless root in. + +## EXEC OPTIONS + +* `-v`, `--volume` : + Directory to make available in the build environment. `-v /` will cause it to be mounted in image as `/run/external/`. `-v /:/run/root` will override the target to be `/run/root`. May be specified multiple times. + +* : + Directory of an unpacked diskless root. + +* : + Optional command to run (defaults to a shell). + +## UNPACK OPTIONS + +* : + The diskless OS profile to unpack. + +* : + Directory to extract diskless root to. + +## PACK OPTIONS + +* `-b`, `--baseprofile` : + Profile to copy extra info from. For example, to make a new version of an existing profile, reference the previous one as baseprofile. + +* `-u`, `--unencrypted`: + Pack an unencrypted image rather than encrypting. + +* : + Directory containing diskless root. + +* : + The desired diskless OS profile name to pack the root into. + +## CAPTURE OPTIONS + +* : + Node to capture image from. + +* : + Profile name for captured image. + +## EXAMPLES + +Build a diskless image from a distribution: + + imgutil build -s alma-9.6-x86_64 /tmp/myimage + +Execute a shell in an unpacked image: + + imgutil exec /tmp/myimage + +Execute a specific command in an image: + + imgutil exec /tmp/myimage /bin/rpm -qa + +Unpack an existing profile for modification: + + imgutil unpack myprofile /tmp/myimage + +Pack a modified image into a new profile: + + imgutil pack /tmp/myimage myprofile-v2 + +Capture an image from a running node: + + imgutil capture node01 production-image + +## FILES + +* `/var/lib/confluent/public/os/`: + Default location for OS profiles. + +* `/var/lib/confluent/private/os/`: + Location for encrypted image keys and private data. + +* `/var/lib/confluent/distributions/`: + Default location for installation sources. + +## SEE ALSO + +osdeploy(8) + +## AUTHOR + +Written for the Confluent project. diff --git a/imgutil/imgutil b/imgutil/imgutil index 6ecc7295..0c44d8ee 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -894,7 +894,7 @@ def main(): buildp.add_argument('-p', '--packagelist', help='Filename of package list to replace default pkglist', default='') buildp.add_argument('-a', '--addpackagelist', action='append', default=[], help='A list of additional packages to include, may be specified multiple times') - buildp.add_argument('-s', '--source', help='Directory to pull installation from, typically a subdirectory of /var/lib/confluent/distributions. By default, the repositories for the build system are used.') + buildp.add_argument('-s', '--source', help='Directory to pull installation from, typically a subdirectory of /var/lib/confluent/distributions. By default, the repositories for the build system are used. For Ubuntu, this is not supported, the build system repositories are always used.') buildp.add_argument('-y', '--non-interactive', help='Avoid prompting for confirmation', action='store_true') buildp.add_argument('-v', '--volume', help='Directory to make available in the build environment. -v / will ' From c196bf9d55c65f4b61e4887da105ca3db6b5a84a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Dec 2025 14:31:10 -0500 Subject: [PATCH 388/413] Fix initial startup of a new confluent The indexes change failed on a brand new install. --- confluent_server/confluent/config/configmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index acf3937e..10eff81c 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -3239,7 +3239,7 @@ def get_globals(): def _init_indexes(): maxidx = get_global('max_node_index') - if maxidx is not None: + if maxidx is not None or 'main' not in _cfgstore: return maxidx = 1 maincfgstore = _cfgstore['main'] From 2464e0ff4f28a63d51335f5c57b959a11642ab31 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Dec 2025 14:35:50 -0500 Subject: [PATCH 389/413] Fix location of the apiclient common resource --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index d1e63735..5572b801 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -86,7 +86,7 @@ cp -a esxi7 esxi8 mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ #cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/common -cp common/initramfs/opt/confluent/bin/apiclient %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/common +cp common/initramfs/opt/confluent/bin/apiclient %{buildroot}/opt/confluent/lib/osdeploy/common for os in rhvh4 el7 el8 el9 el10 debian debian13 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ From 04e983a2d34291fe108e755c005dc94f2c9c2262 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Dec 2025 09:52:15 -0500 Subject: [PATCH 390/413] Handle broader memory information being returned from confluent --- confluent_client/bin/nodeinventory | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeinventory b/confluent_client/bin/nodeinventory index 1fb3d151..6551c1b5 100755 --- a/confluent_client/bin/nodeinventory +++ b/confluent_client/bin/nodeinventory @@ -60,16 +60,21 @@ def print_mem_info(node, prefix, meminfo): elif 'DCPMM' in meminfo['memory_type']: memdescfmt = '{0}GB {1} ' meminfo['module_type'] = 'DCPMM' + elif meminfo['memory_type'] == 'HBM': + memdescfmt = '{0}GB HBM ' else: print('{0}: {1}: Unrecognized Memory'.format(node, prefix)) return if meminfo.get('ecc', False): memdescfmt += 'ECC ' - capacity = meminfo['capacity_mb'] / 1024 modtype = meminfo.get('module_type', None) if modtype: memdescfmt += modtype - memdesc = memdescfmt.format(capacity, meminfo['speed']) + if meminfo.get('capacity_mb', None): + capacity = meminfo['capacity_mb'] // 1024 + memdesc = memdescfmt.format(capacity, meminfo['speed']) + else: + memdesc = 'Unspecified Module' print('{0}: {1} description: {2}'.format(node, prefix, memdesc)) print('{0}: {1} manufacturer: {2}'.format( node, prefix, meminfo['manufacturer'])) From 75e7b9040ba9f44a4bff7152d9b56e066854f5c7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Nov 2025 13:19:03 -0500 Subject: [PATCH 391/413] Add more imgutil documentation --- confluent_client/doc/man/imgutil.ronn | 139 ++++++++++++++++++++++++++ imgutil/imgutil | 2 +- 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 confluent_client/doc/man/imgutil.ronn diff --git a/confluent_client/doc/man/imgutil.ronn b/confluent_client/doc/man/imgutil.ronn new file mode 100644 index 00000000..939cef09 --- /dev/null +++ b/confluent_client/doc/man/imgutil.ronn @@ -0,0 +1,139 @@ +# imgutil(1) -- Work with confluent OS cloning and diskless images + +## SYNOPSIS + +`imgutil` `build` [] +`imgutil` `exec` [] [...] +`imgutil` `unpack` +`imgutil` `pack` [] +`imgutil` `capture` + +## DESCRIPTION + +**imgutil** is a utility for creating, managing, and deploying OS images for diskless boot and system cloning in a Confluent environment. It supports building images from scratch, capturing images from running systems, and packing/unpacking diskless profiles. + +## COMMANDS + +* `build`: + Build a new diskless image from scratch in the specified scratch directory. + +* `exec`: + Start the specified scratch directory as a container and optionally run a command inside it. + +* `unpack`: + Unpack a diskless image profile to a scratch directory for modification. + +* `pack`: + Pack a scratch directory into a diskless profile that can be deployed. + +* `capture`: + Capture an image for cloning from a running system. + +## BUILD OPTIONS + +* `-r`, `--addrepos` : + Repositories to add in addition to the main source. May be specified multiple times. + +* `-p`, `--packagelist` : + Filename of package list to replace default pkglist. + +* `-a`, `--addpackagelist` : + A list of additional packages to include. May be specified multiple times. + +* `-s`, `--source` : + Directory to pull installation from, typically a subdirectory of `/var/lib/confluent/distributions`. By default, the repositories for the build system are used. For Ubuntu, this is not supported; the build system repositories are always used. + +* `-y`, `--non-interactive`: + Avoid prompting for confirmation. + +* `-v`, `--volume` : + Directory to make available in the build environment. `-v /` will cause it to be mounted in image as `/run/external/`. `-v /:/run/root` will override the target to be `/run/root`. Something like `/var/lib/repository:-` will cause it to mount to the identical path inside the image. May be specified multiple times. + +* : + Directory to build new diskless root in. + +## EXEC OPTIONS + +* `-v`, `--volume` : + Directory to make available in the build environment. `-v /` will cause it to be mounted in image as `/run/external/`. `-v /:/run/root` will override the target to be `/run/root`. May be specified multiple times. + +* : + Directory of an unpacked diskless root. + +* : + Optional command to run (defaults to a shell). + +## UNPACK OPTIONS + +* : + The diskless OS profile to unpack. + +* : + Directory to extract diskless root to. + +## PACK OPTIONS + +* `-b`, `--baseprofile` : + Profile to copy extra info from. For example, to make a new version of an existing profile, reference the previous one as baseprofile. + +* `-u`, `--unencrypted`: + Pack an unencrypted image rather than encrypting. + +* : + Directory containing diskless root. + +* : + The desired diskless OS profile name to pack the root into. + +## CAPTURE OPTIONS + +* : + Node to capture image from. + +* : + Profile name for captured image. + +## EXAMPLES + +Build a diskless image from a distribution: + + imgutil build -s alma-9.6-x86_64 /tmp/myimage + +Execute a shell in an unpacked image: + + imgutil exec /tmp/myimage + +Execute a specific command in an image: + + imgutil exec /tmp/myimage /bin/rpm -qa + +Unpack an existing profile for modification: + + imgutil unpack myprofile /tmp/myimage + +Pack a modified image into a new profile: + + imgutil pack /tmp/myimage myprofile-v2 + +Capture an image from a running node: + + imgutil capture node01 production-image + +## FILES + +* `/var/lib/confluent/public/os/`: + Default location for OS profiles. + +* `/var/lib/confluent/private/os/`: + Location for encrypted image keys and private data. + +* `/var/lib/confluent/distributions/`: + Default location for installation sources. + +## SEE ALSO + +osdeploy(8) + +## AUTHOR + +Written for the Confluent project. diff --git a/imgutil/imgutil b/imgutil/imgutil index 6ecc7295..0c44d8ee 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -894,7 +894,7 @@ def main(): buildp.add_argument('-p', '--packagelist', help='Filename of package list to replace default pkglist', default='') buildp.add_argument('-a', '--addpackagelist', action='append', default=[], help='A list of additional packages to include, may be specified multiple times') - buildp.add_argument('-s', '--source', help='Directory to pull installation from, typically a subdirectory of /var/lib/confluent/distributions. By default, the repositories for the build system are used.') + buildp.add_argument('-s', '--source', help='Directory to pull installation from, typically a subdirectory of /var/lib/confluent/distributions. By default, the repositories for the build system are used. For Ubuntu, this is not supported, the build system repositories are always used.') buildp.add_argument('-y', '--non-interactive', help='Avoid prompting for confirmation', action='store_true') buildp.add_argument('-v', '--volume', help='Directory to make available in the build environment. -v / will ' From c8745292bfdb69f8f87701baeb4f84e5f073fee9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Dec 2025 09:52:15 -0500 Subject: [PATCH 392/413] Handle broader memory information being returned from confluent --- confluent_client/bin/nodeinventory | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeinventory b/confluent_client/bin/nodeinventory index 1fb3d151..6551c1b5 100755 --- a/confluent_client/bin/nodeinventory +++ b/confluent_client/bin/nodeinventory @@ -60,16 +60,21 @@ def print_mem_info(node, prefix, meminfo): elif 'DCPMM' in meminfo['memory_type']: memdescfmt = '{0}GB {1} ' meminfo['module_type'] = 'DCPMM' + elif meminfo['memory_type'] == 'HBM': + memdescfmt = '{0}GB HBM ' else: print('{0}: {1}: Unrecognized Memory'.format(node, prefix)) return if meminfo.get('ecc', False): memdescfmt += 'ECC ' - capacity = meminfo['capacity_mb'] / 1024 modtype = meminfo.get('module_type', None) if modtype: memdescfmt += modtype - memdesc = memdescfmt.format(capacity, meminfo['speed']) + if meminfo.get('capacity_mb', None): + capacity = meminfo['capacity_mb'] // 1024 + memdesc = memdescfmt.format(capacity, meminfo['speed']) + else: + memdesc = 'Unspecified Module' print('{0}: {1} description: {2}'.format(node, prefix, memdesc)) print('{0}: {1} manufacturer: {2}'.format( node, prefix, meminfo['manufacturer'])) From 523c93dfc353cd4738e308a3139641bc12a9a89f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Dec 2025 08:49:27 -0500 Subject: [PATCH 393/413] Tolerate more network circumstances in bluefield deploy If the networking didn't come up well, the 'functions' routines would not be able to handle. Switch to using apiclient which is designed specifically to handle less cooperative initial network conditions. --- .../bluefield/profiles/default/bluefield.cfg.template | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template b/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template index 5f7b6a08..4fdd10ab 100644 --- a/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template +++ b/confluent_osdeploy/bluefield/profiles/default/bluefield.cfg.template @@ -25,9 +25,14 @@ END_OF_IDENT else echo 'ubuntu:!' | chpasswd -e fi - python3 /opt/confluent/bin/apiclient /confluent-public/os/$PROFILE/scripts/functions > /etc/confluent/functions + cntmp=$(mktemp -d) + cd "$cntmp" || { echo "Failed to cd to temporary directory $cntmp"; exit 1; } touch /etc/confluent/confluent.deploycfg - bash /etc/confluent/functions run_remote_python confignet + python3 /opt/confluent/bin/apiclient /confluent-public/os/$PROFILE/scripts/confignet > confignet + python3 confignet + cd - + rm -rf "$cntmp" + python3 /opt/confluent/bin/apiclient /confluent-public/os/$PROFILE/scripts/functions > /etc/confluent/functions bash /etc/confluent/functions run_remote setupssh for cert in /etc/ssh/ssh*-cert.pub; do if [ -s $cert ]; then From b72d6c9cfc313840279c2b6dfd7969efccc4283a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 10 Dec 2025 14:14:14 -0500 Subject: [PATCH 394/413] Fix typo --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index 4d9a92ad..febb4acf 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -56,7 +56,7 @@ cat /tls/*.pem > /etc/pki/tls/certs/ca-bundle.crt TRIES=0 touch /etc/confluent/confluent.info TRIES=5 -echo -n "Waitiing for disks..." +echo -n "Waiting for disks..." while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do sleep 1 TRIES=$((TRIES - 1)) From d7577a04a76a3abb8a67b52584fc2a0b438b6984 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 11 Dec 2025 08:46:19 -0500 Subject: [PATCH 395/413] Fix ESXi compatibility of apiclient apiclient was using Linux specific network information. Change to libc getifaddrs for better cross-platform compatibility. --- .../initramfs/opt/confluent/bin/apiclient | 147 +++++++++++++----- 1 file changed, 112 insertions(+), 35 deletions(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index d9cfb2dc..9671b206 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -47,38 +47,112 @@ c_crypt.restype = ctypes.c_char_p def get_my_addresses(): - nlhdrsz = struct.calcsize('IHHII') - ifaddrsz = struct.calcsize('BBBBI') - # RTM_GETADDR = 22 - # nlmsghdr struct: u32 len, u16 type, u16 flags, u32 seq, u32 pid - nlhdr = struct.pack('IHHII', nlhdrsz + ifaddrsz, 22, 0x301, 0, 0) - # ifaddrmsg struct: u8 family, u8 prefixlen, u8 flags, u8 scope, u32 index - ifaddrmsg = struct.pack('BBBBI', 0, 0, 0, 0, 0) - s = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, socket.NETLINK_ROUTE) - s.bind((0, 0)) - s.sendall(nlhdr + ifaddrmsg) addrs = [] - while True: - pdata = s.recv(65536) - v = memoryview(pdata) - if struct.unpack('H', v[4:6])[0] == 3: # netlink done message - break - while len(v): - length, typ = struct.unpack('IH', v[:6]) - if typ == 20: - fam, plen, _, scope, ridx = struct.unpack('BBBBI', v[nlhdrsz:nlhdrsz+ifaddrsz]) - if scope in (253, 0): - rta = v[nlhdrsz+ifaddrsz:length] - while len(rta): - rtalen, rtatyp = struct.unpack('HH', rta[:4]) - if rtalen < 4: - break - if rtatyp == 1: - addrs.append((fam, rta[4:rtalen], plen, ridx)) - rta = rta[msg_align(rtalen):] - v = v[msg_align(length):] + for ifa in get_ifaddrs(): + if ifa[0] == 'ip': + addrs.append((ifa[1], ifa[2], ifa[3])) return addrs +def get_mac_addresses(): + macs = [] + for ifa in get_ifaddrs(): + if ifa[0] == 'ETHER': + macs.append((ifa[1], ifa[2])) + return macs + +def get_ifaddrs(): + class sockaddr(ctypes.Structure): + _fields_ = [ + ('sa_family', ctypes.c_uint16), + ('sa_data', ctypes.c_ubyte * 14), + ] + + class sockaddr_in(ctypes.Structure): + _fields_ = [ + ('sin_family', ctypes.c_uint16), + ('sin_port', ctypes.c_uint16), + ('sin_addr', ctypes.c_ubyte * 4), + ('sin_zero', ctypes.c_ubyte * 8), + ] + + class sockaddr_in6(ctypes.Structure): + _fields_ = [ + ('sin6_family', ctypes.c_uint16), + ('sin6_port', ctypes.c_uint16), + ('sin6_flowinfo', ctypes.c_uint32), + ('sin6_addr', ctypes.c_ubyte * 16), + ('sin6_scope_id', ctypes.c_uint32), + ] + + class sockaddr_ll(ctypes.Structure): + _fields_ = [ + ('sll_family', ctypes.c_uint16), + ('sll_protocol', ctypes.c_uint16), + ('sll_ifindex', ctypes.c_int32), + ('sll_hatype', ctypes.c_uint16), + ('sll_pkttype', ctypes.c_uint8), + ('sll_halen', ctypes.c_uint8), + ('sll_addr', ctypes.c_ubyte * 8), + ] + + class ifaddrs(ctypes.Structure): + pass + + ifaddrs._fields_ = [ + ('ifa_next', ctypes.POINTER(ifaddrs)), + ('ifa_name', ctypes.c_char_p), + ('ifa_flags', ctypes.c_uint), + ('ifa_addr', ctypes.POINTER(sockaddr)), + ('ifa_netmask', ctypes.POINTER(sockaddr)), + ('ifa_ifu', ctypes.POINTER(sockaddr)), + ('ifa_data', ctypes.c_void_p), + ] + + libc = ctypes.CDLL(ctypes.util.find_library('c')) + libc.getifaddrs.argtypes = [ctypes.POINTER(ctypes.POINTER(ifaddrs))] + libc.getifaddrs.restype = ctypes.c_int + libc.freeifaddrs.argtypes = [ctypes.POINTER(ifaddrs)] + libc.freeifaddrs.restype = None + ifap = ctypes.POINTER(ifaddrs)() + result = libc.getifaddrs(ctypes.pointer(ifap)) + if result != 0: + return [] + addresses = [] + ifa = ifap + try: + while ifa: + if ifa.contents.ifa_addr: + family = ifa.contents.ifa_addr.contents.sa_family + name = ifa.contents.ifa_name.decode('utf-8') if ifa.contents.ifa_name else None + if family in (socket.AF_INET, socket.AF_INET6): + # skip loopback and non-multicast interfaces + if ifa.contents.ifa_flags & 8 or not ifa.contents.ifa_flags & 0x1000: + ifa = ifa.contents.ifa_next + continue + if family == socket.AF_INET: + addr_ptr = ctypes.cast(ifa.contents.ifa_addr, ctypes.POINTER(sockaddr_in)) + addr_bytes = bytes(addr_ptr.contents.sin_addr) + addresses.append(('ip', family, addr_bytes, name)) + elif family == socket.AF_INET6: + addr_ptr = ctypes.cast(ifa.contents.ifa_addr, ctypes.POINTER(sockaddr_in6)) + addr_bytes = bytes(addr_ptr.contents.sin6_addr) + scope_id = addr_ptr.contents.sin6_scope_id + addresses.append(('ip', family, addr_bytes, scope_id)) + elif family == socket.AF_PACKET: + addr_ptr = ctypes.cast(ifa.contents.ifa_addr, ctypes.POINTER(sockaddr_ll)) + halen = addr_ptr.contents.sll_halen + if addr_ptr.contents.sll_hatype in (1, 32) and halen > 0: # ARPHRD_ETHER or ARPHRD_INFINIBAND + if addr_ptr.contents.sll_hatype == 1 and addr_ptr.contents.sll_addr[0] & 2: # skip locally administered MACs + ifa = ifa.contents.ifa_next + continue + mac_bytes = bytes(addr_ptr.contents.sll_addr[:halen]) + macaddr = ':'.join('{:02x}'.format(b) for b in mac_bytes) + addresses.append(('ETHER', name, macaddr)) + ifa = ifa.contents.ifa_next + finally: + libc.freeifaddrs(ifap) + + return addresses def scan_confluents(confuuid=None): srvs = {} @@ -92,22 +166,24 @@ def scan_confluents(confuuid=None): s4.bind(('0.0.0.0', 1900)) doneidxs = set([]) msg = 'M-SEARCH * HTTP/1.1\r\nST: urn:xcat.org:service:confluent:' - if not confuuid: + if not confuuid and os.path.exists('/etc/confluent/confluent.deploycfg'): with open('/etc/confluent/confluent.deploycfg') as dcfg: for line in dcfg.read().split('\n'): if line.startswith('confluent_uuid:'): confluentuuid = line.split(': ')[1] msg += '/confluentuuid=' + confluentuuid break + if not confuuid and os.path.exists('/confluent_uuid'): + with open('/confluent_uuid') as cuuidin: + confluentuuid = cuuidin.read().strip() + msg += '/confluentuuid=' + confluentuuid try: with open('/sys/devices/virtual/dmi/id/product_uuid') as uuidin: msg += '/uuid=' + uuidin.read().strip() except Exception: pass - for addrf in glob.glob('/sys/class/net/*/address'): - with open(addrf) as addrin: - hwaddr = addrin.read().strip() - msg += '/mac=' + hwaddr + for iface, hwaddr in get_mac_addresses(): + msg += '/mac=' + hwaddr msg = msg.encode('utf8') for addr in get_my_addresses(): if addr[0] == socket.AF_INET6: @@ -155,7 +231,8 @@ def scan_confluents(confuuid=None): if currip.startswith('fe80::') and '%' not in currip: currip = '{0}%{1}'.format(currip, peer[-1]) srvs[currip] = current - srvlist.append(currip) + if currip not in srvlist: + srvlist.append(currip) r = select.select((s4, s6), (), (), 2) if r: r = r[0] From 56dfb6dc6b04ef61ebecb5bcf0af9072e55fff9d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 11 Dec 2025 08:46:59 -0500 Subject: [PATCH 396/413] Fix spelling issue in man page --- confluent_client/doc/man/nodegroupattrib.ronn.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodegroupattrib.ronn.tmpl b/confluent_client/doc/man/nodegroupattrib.ronn.tmpl index d574f8ca..c88baf64 100644 --- a/confluent_client/doc/man/nodegroupattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodegroupattrib.ronn.tmpl @@ -11,7 +11,7 @@ nodegroupattrib(8) -- List or change confluent nodegroup attributes ## DESCRIPTION -`nodegroupattrip` queries the confluent server to get information about nodes. +`nodegroupattrib` queries the confluent server to get information about nodes. In the simplest form, it simply takes the given group and lists the attributes of that group. Contrasted with nodeattrib(8), settings managed by nodegroupattrib will be added From 31c1a865dc6db911c94b94a8a9022d22599283e6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Dec 2025 09:30:56 -0500 Subject: [PATCH 397/413] Update confignet to match apiclient changes --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index c291fc02..a5695afc 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -495,7 +495,7 @@ if __name__ == '__main__': myname = myname[0] myname = socket.inet_pton(socket.AF_INET, myname) for addr in myaddrs: - if myname == addr[1].tobytes(): + if myname == addr[1]: curridx = addr[-1] if curridx is not None and curridx in doneidxs: continue From a4229fc58d41900fe7910a9c4453b15f7dac1204 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Dec 2025 11:18:33 -0500 Subject: [PATCH 398/413] Change name to index in apiclient confignet was using the index for ipv4 --- .../common/initramfs/opt/confluent/bin/apiclient | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index 9671b206..1d3e5f0e 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -132,7 +132,8 @@ def get_ifaddrs(): if family == socket.AF_INET: addr_ptr = ctypes.cast(ifa.contents.ifa_addr, ctypes.POINTER(sockaddr_in)) addr_bytes = bytes(addr_ptr.contents.sin_addr) - addresses.append(('ip', family, addr_bytes, name)) + if_index = socket.if_nametoindex(name) if name else 0 + addresses.append(('ip', family, addr_bytes, if_index)) elif family == socket.AF_INET6: addr_ptr = ctypes.cast(ifa.contents.ifa_addr, ctypes.POINTER(sockaddr_in6)) addr_bytes = bytes(addr_ptr.contents.sin6_addr) From 1a684f20126e27dc0e0bc11759f643133b21456a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 6 Jan 2026 15:49:50 -0500 Subject: [PATCH 399/413] Ensure rpmbuild directory exists before building --- confluent_vtbufferd/buildrpm | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_vtbufferd/buildrpm b/confluent_vtbufferd/buildrpm index 9a20844d..89175fad 100755 --- a/confluent_vtbufferd/buildrpm +++ b/confluent_vtbufferd/buildrpm @@ -12,6 +12,7 @@ cp ../LICENSE NOTICE *.c *.h Makefile dist/confluent_vtbufferd-$VERSION cd dist tar czf confluent_vtbufferd-$VERSION.tar.gz confluent_vtbufferd-$VERSION cd - +mkdir -p ~/rpmbuild/SOURCES ~/rpmbuild/SPEC cp dist/confluent_vtbufferd-$VERSION.tar.gz ~/rpmbuild/SOURCES sed -e 's/#VERSION#/'$VERSION/ confluent_vtbufferd.spec.tmpl > ~/rpmbuild/SPECS/confluent_vtbufferd.spec rpmbuild -ba ~/rpmbuild/SPECS/confluent_vtbufferd.spec 2> /dev/null |grep ^Wrote: From 289c31e7ac731925d7cb313c6f9990d25e796544 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 6 Jan 2026 15:51:06 -0500 Subject: [PATCH 400/413] Ensure in expected directory to start --- confluent_vtbufferd/buildrpm | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_vtbufferd/buildrpm b/confluent_vtbufferd/buildrpm index 89175fad..c9dbd8a4 100755 --- a/confluent_vtbufferd/buildrpm +++ b/confluent_vtbufferd/buildrpm @@ -1,3 +1,4 @@ +cd $(dirname $0) VERSION=`git describe|cut -d- -f 1` NUMCOMMITS=`git describe|cut -d- -f 2` if [ "$NUMCOMMITS" != "$VERSION" ]; then From 45bc9788b4c2e7b632e36f83e43399c336e3e9fb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 6 Jan 2026 15:51:40 -0500 Subject: [PATCH 401/413] Correct mistake in SPECS spelling --- confluent_vtbufferd/buildrpm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_vtbufferd/buildrpm b/confluent_vtbufferd/buildrpm index c9dbd8a4..c3e0d567 100755 --- a/confluent_vtbufferd/buildrpm +++ b/confluent_vtbufferd/buildrpm @@ -13,7 +13,7 @@ cp ../LICENSE NOTICE *.c *.h Makefile dist/confluent_vtbufferd-$VERSION cd dist tar czf confluent_vtbufferd-$VERSION.tar.gz confluent_vtbufferd-$VERSION cd - -mkdir -p ~/rpmbuild/SOURCES ~/rpmbuild/SPEC +mkdir -p ~/rpmbuild/SOURCES ~/rpmbuild/SPECS cp dist/confluent_vtbufferd-$VERSION.tar.gz ~/rpmbuild/SOURCES sed -e 's/#VERSION#/'$VERSION/ confluent_vtbufferd.spec.tmpl > ~/rpmbuild/SPECS/confluent_vtbufferd.spec rpmbuild -ba ~/rpmbuild/SPECS/confluent_vtbufferd.spec 2> /dev/null |grep ^Wrote: From 6ca62cbb356c985077ea45615bf8e6f93c5a9560 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 6 Jan 2026 15:54:46 -0500 Subject: [PATCH 402/413] Provide optional output directory --- confluent_vtbufferd/buildrpm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/confluent_vtbufferd/buildrpm b/confluent_vtbufferd/buildrpm index c3e0d567..bd1ae99b 100755 --- a/confluent_vtbufferd/buildrpm +++ b/confluent_vtbufferd/buildrpm @@ -24,5 +24,10 @@ else # Clean up the generated files in this directory rm -rf dist fi +ARCH=$(uname -m) +if [ ! -z "$1" ]; then + cp /root/prmbuild/RPMS/$ARCH/confluent_vtbufferd-*.$ARCH.rpm $1 +fi + From 488f23e3ed6c01e91a89cb41e915661c049188ad Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 6 Jan 2026 15:55:36 -0500 Subject: [PATCH 403/413] Fix spelling of rpmbuild --- confluent_vtbufferd/buildrpm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_vtbufferd/buildrpm b/confluent_vtbufferd/buildrpm index bd1ae99b..cc9e8a64 100755 --- a/confluent_vtbufferd/buildrpm +++ b/confluent_vtbufferd/buildrpm @@ -26,7 +26,7 @@ else fi ARCH=$(uname -m) if [ ! -z "$1" ]; then - cp /root/prmbuild/RPMS/$ARCH/confluent_vtbufferd-*.$ARCH.rpm $1 + cp /root/rpmbuild/RPMS/$ARCH/confluent_vtbufferd-*.$ARCH.rpm $1 fi From 99d10896e8eeec921262fc464d71a853a591746b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 8 Jan 2026 17:07:39 -0500 Subject: [PATCH 404/413] Fix parameter count unpack for accelerated switch interrogation --- confluent_server/confluent/networking/macmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index a96a48ee..56fb5654 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -153,7 +153,7 @@ def _nodelookup(switch, ifname): return None def _fast_map_switch(args): - switch, password, user, cfgm = args + switch, password, user, cfgm = args[:4] macdata = None kv = util.TLSCertVerifier(cfgm, switch, 'pubkeys.tls_hardwaremanager').verify_cert From 6e6ac67b3d178a496a17093dc73480dc143a723a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 Jan 2026 13:57:37 -0500 Subject: [PATCH 405/413] Provide some build assets Provide some dockerfiles for creating build containers --- build/arm/el10/Dockerfile | 8 +++++++ build/arm/el10/buildpackages.sh | 6 ++++++ build/arm/el10/rpmmacro | 3 +++ build/arm/el8/Dockerfile | 8 +++++++ build/arm/el8/buildpackages.sh | 6 ++++++ build/arm/el8/rpmmacro | 2 ++ build/arm/el9/Dockerfile | 10 +++++++++ build/arm/el9/buildpackages.sh | 6 ++++++ build/arm/el9/rpmmacro | 2 ++ build/arm/noble/Dockerfile | 12 +++++++++++ build/arm/noble/buildapt.sh | 21 ++++++++++++++++++ build/arm/noble/distributions.tmpl | 7 ++++++ build/arm/noble/stdeb.patch | 34 ++++++++++++++++++++++++++++++ build/buildarm | 9 ++++++++ 14 files changed, 134 insertions(+) create mode 100644 build/arm/el10/Dockerfile create mode 100644 build/arm/el10/buildpackages.sh create mode 100644 build/arm/el10/rpmmacro create mode 100644 build/arm/el8/Dockerfile create mode 100644 build/arm/el8/buildpackages.sh create mode 100644 build/arm/el8/rpmmacro create mode 100644 build/arm/el9/Dockerfile create mode 100644 build/arm/el9/buildpackages.sh create mode 100644 build/arm/el9/rpmmacro create mode 100644 build/arm/noble/Dockerfile create mode 100644 build/arm/noble/buildapt.sh create mode 100644 build/arm/noble/distributions.tmpl create mode 100644 build/arm/noble/stdeb.patch create mode 100644 build/buildarm diff --git a/build/arm/el10/Dockerfile b/build/arm/el10/Dockerfile new file mode 100644 index 00000000..41e1732a --- /dev/null +++ b/build/arm/el10/Dockerfile @@ -0,0 +1,8 @@ +FROM almalinux:10 +RUN ["yum", "-y","update"] +RUN ["yum", "-y","install","gcc","make","rpm-build","python3-devel","python3-setuptools","createrepo","python3", "perl", "perl-DBI", "perl-JSON", "perl-XML-LibXML", "pinentry-tty", "rpm-sign", "git", "golang"] +ADD rpmmacro /root/.rpmmacros +ADD buildpackages.sh /bin/ +#VOLUME ["/rpms", "/srpms"] +CMD ["/bin/bash","/bin/buildpackages.sh"] + diff --git a/build/arm/el10/buildpackages.sh b/build/arm/el10/buildpackages.sh new file mode 100644 index 00000000..94a90f41 --- /dev/null +++ b/build/arm/el10/buildpackages.sh @@ -0,0 +1,6 @@ +for package in /srpms/*; do + rpmbuild --rebuild $package +done +find ~/rpmbuild/RPMS -type f -exec cp {} /rpms/ \; + + diff --git a/build/arm/el10/rpmmacro b/build/arm/el10/rpmmacro new file mode 100644 index 00000000..ebef7c8c --- /dev/null +++ b/build/arm/el10/rpmmacro @@ -0,0 +1,3 @@ +%_gpg_digest_algo sha256 +%_gpg_name Lenovo Scalable Infrastructure + diff --git a/build/arm/el8/Dockerfile b/build/arm/el8/Dockerfile new file mode 100644 index 00000000..e12c1715 --- /dev/null +++ b/build/arm/el8/Dockerfile @@ -0,0 +1,8 @@ +FROM almalinux:8 +RUN ["yum", "-y","update"] +RUN ["yum", "-y","install","gcc","make","rpm-build","python3-devel","python3-setuptools","createrepo","python3", "perl", "perl-DBI", "perl-JSON", "perl-Net-DNS", "perl-DB_File", "perl-XML-LibXML", "rpm-sign", "git", "fuse-devel","libcurl-devel"] +ADD rpmmacro /root/.rpmmacros +ADD buildpackages.sh /bin/ +#VOLUME ["/rpms", "/srpms"] +CMD ["/bin/bash","/bin/buildpackages.sh"] + diff --git a/build/arm/el8/buildpackages.sh b/build/arm/el8/buildpackages.sh new file mode 100644 index 00000000..c5d65c9a --- /dev/null +++ b/build/arm/el8/buildpackages.sh @@ -0,0 +1,6 @@ +#!/bin/bash +for package in /srpms/*; do + rpmbuild --rebuild $package +done +find ~/rpmbuild/RPMS -type f -exec cp {} /rpms/ \; + diff --git a/build/arm/el8/rpmmacro b/build/arm/el8/rpmmacro new file mode 100644 index 00000000..e7b23afb --- /dev/null +++ b/build/arm/el8/rpmmacro @@ -0,0 +1,2 @@ +%_gpg_digest_algo sha256 +%_gpg_name Lenovo Scalable Infrastructure diff --git a/build/arm/el9/Dockerfile b/build/arm/el9/Dockerfile new file mode 100644 index 00000000..9637dd46 --- /dev/null +++ b/build/arm/el9/Dockerfile @@ -0,0 +1,10 @@ +FROM almalinux:9 +RUN ["yum", "-y","update"] +RUN ["yum", "-y","install","gcc","make","rpm-build","python3-devel","python3-setuptools","createrepo","python3", "perl", "perl-DBI", "perl-JSON", "perl-Net-DNS", "perl-DB_File", "perl-XML-LibXML", "pinentry-tty", "rpm-sign", "epel-release", "git"] +RUN ["crb", "enable"] +RUN ["yum", "-y","install","fuse-devel","libcurl-devel"] +ADD rpmmacro /root/.rpmmacros +ADD buildpackages.sh /bin/ +#VOLUME ["/rpms", "/srpms"] +CMD ["/bin/bash","/bin/buildpackages.sh"] + diff --git a/build/arm/el9/buildpackages.sh b/build/arm/el9/buildpackages.sh new file mode 100644 index 00000000..c5d65c9a --- /dev/null +++ b/build/arm/el9/buildpackages.sh @@ -0,0 +1,6 @@ +#!/bin/bash +for package in /srpms/*; do + rpmbuild --rebuild $package +done +find ~/rpmbuild/RPMS -type f -exec cp {} /rpms/ \; + diff --git a/build/arm/el9/rpmmacro b/build/arm/el9/rpmmacro new file mode 100644 index 00000000..e7b23afb --- /dev/null +++ b/build/arm/el9/rpmmacro @@ -0,0 +1,2 @@ +%_gpg_digest_algo sha256 +%_gpg_name Lenovo Scalable Infrastructure diff --git a/build/arm/noble/Dockerfile b/build/arm/noble/Dockerfile new file mode 100644 index 00000000..e145de1f --- /dev/null +++ b/build/arm/noble/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:noble +ADD stdeb.patch /tmp/ +ADD buildapt.sh /bin/ +ADD distributions.tmpl /bin/ +RUN ["apt-get", "update"] +RUN ["apt-get", "install", "-y", "reprepro", "python3-stdeb", "gnupg-agent", "devscripts", "debhelper", "libsoap-lite-perl", "libdbi-perl", "quilt", "git", "python3-pyparsing", "python3-dnspython", "python3-eventlet", "python3-netifaces", "python3-paramiko", "dh-python", "libjson-perl", "ronn", "alien", "gcc", "make"] +RUN ["mkdir", "-p", "/sources/git/"] +RUN ["mkdir", "-p", "/debs/"] +RUN ["mkdir", "-p", "/apt/"] +RUN ["bash", "-c", "patch -p1 < /tmp/stdeb.patch"] +CMD ["/bin/bash", "/bin/buildapt.sh"] + diff --git a/build/arm/noble/buildapt.sh b/build/arm/noble/buildapt.sh new file mode 100644 index 00000000..600e832c --- /dev/null +++ b/build/arm/noble/buildapt.sh @@ -0,0 +1,21 @@ +#cp -a /sources/git /tmp +for builder in $(find /sources/git -name builddeb); do + cd $(dirname $builder) + ./builddeb /debs/ +done +cp /prebuilt/* /debs/ +cp /osd/*.deb /debs/ +mkdir -p /apt/conf/ +CODENAME=$(grep VERSION_CODENAME= /etc/os-release | sed -e 's/.*=//') +if [ -z "$CODENAME" ]; then + CODENAME=$(grep VERSION= /etc/os-release | sed -e 's/.*(//' -e 's/).*//') +fi +if ! grep $CODENAME /apt/conf/distributions; then + sed -e s/#CODENAME#/$CODENAME/ /bin/distributions.tmpl >> /apt/conf/distributions +fi +cd /apt/ +reprepro includedeb $CODENAME /debs/*.deb +for dsc in /debs/*.dsc; do + reprepro includedsc $CODENAME $dsc +done + diff --git a/build/arm/noble/distributions.tmpl b/build/arm/noble/distributions.tmpl new file mode 100644 index 00000000..b2000a43 --- /dev/null +++ b/build/arm/noble/distributions.tmpl @@ -0,0 +1,7 @@ +Origin: Lenovo HPC Packages +Label: Lenovo HPC Packages +Codename: #CODENAME# +Architectures: amd64 source +Components: main +Description: Lenovo HPC Packages + diff --git a/build/arm/noble/stdeb.patch b/build/arm/noble/stdeb.patch new file mode 100644 index 00000000..0eab2038 --- /dev/null +++ b/build/arm/noble/stdeb.patch @@ -0,0 +1,34 @@ +diff -urN t/usr/lib/python3/dist-packages/stdeb/cli_runner.py t.patch/usr/lib/python3/dist-packages/stdeb/cli_runner.py +--- t/usr/lib/python3/dist-packages/stdeb/cli_runner.py 2024-06-11 18:30:13.930328999 +0000 ++++ t.patch/usr/lib/python3/dist-packages/stdeb/cli_runner.py 2024-06-11 18:32:05.392731405 +0000 +@@ -8,7 +8,7 @@ + from ConfigParser import SafeConfigParser # noqa: F401 + except ImportError: + # python 3.x +- from configparser import SafeConfigParser # noqa: F401 ++ from configparser import ConfigParser # noqa: F401 + from distutils.util import strtobool + from distutils.fancy_getopt import FancyGetopt, translate_longopt + from stdeb.util import stdeb_cmdline_opts, stdeb_cmd_bool_opts +diff -urN t/usr/lib/python3/dist-packages/stdeb/util.py t.patch/usr/lib/python3/dist-packages/stdeb/util.py +--- t/usr/lib/python3/dist-packages/stdeb/util.py 2024-06-11 18:32:53.864776149 +0000 ++++ t.patch/usr/lib/python3/dist-packages/stdeb/util.py 2024-06-11 18:33:02.063952870 +0000 +@@ -730,7 +730,7 @@ + example. + """ + +- cfg = ConfigParser.SafeConfigParser() ++ cfg = ConfigParser.ConfigParser() + cfg.read(cfg_files) + if cfg.has_section(module_name): + section_items = cfg.items(module_name) +@@ -801,7 +801,7 @@ + if len(cfg_files): + check_cfg_files(cfg_files, module_name) + +- cfg = ConfigParser.SafeConfigParser(cfg_defaults) ++ cfg = ConfigParser.ConfigParser(cfg_defaults) + for cfg_file in cfg_files: + with codecs.open(cfg_file, mode='r', encoding='utf-8') as fd: + cfg.readfp(fd) + diff --git a/build/buildarm b/build/buildarm new file mode 100644 index 00000000..73793eea --- /dev/null +++ b/build/buildarm @@ -0,0 +1,9 @@ +cd ~/confluent +git pull +rm ~/rpmbuild/RPMS/noarch/*osdeploy* +rm ~/rpmbuild/SRPMS/*osdeploy* +sh confluent_osdeploy/buildrpm-aarch64 +mkdir -p $HOME/el9/ +mkdir -p $HOME/el10/ +podman run --rm -it -v $HOME:/build el9build bash /build/confluent/confluent_vtbufferd/buildrpm /build/el9/ + From afb6356f9d9c82b1e5f9856250e0f411a9fc2a39 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 14 Jan 2026 16:29:31 -0500 Subject: [PATCH 406/413] Change ownership Container runs as internal 'root' user for now --- container/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/container/Dockerfile b/container/Dockerfile index 1d9ab420..80d3b5de 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -1,7 +1,8 @@ -FROM almalinux:8 +FROM almalinux:9 RUN ["yum", "-y", "update"] -RUN ["rpm", "-ivh", "https://hpc.lenovo.com/yum/latest/el8/x86_64/lenovo-hpc-yum-1-1.x86_64.rpm"] +RUN ["rpm", "-ivh", "https://hpc.lenovo.com/yum/latest/el9/x86_64/lenovo-hpc-yum-1-1.x86_64.rpm"] RUN ["yum", "-y", "install", "lenovo-confluent", "tftp-server", "openssh-clients", "openssl", "vim-enhanced", "iproute"] +RUN ["chown", "-r", "root", "/etc/confluent"] ADD runconfluent.sh /bin/ CMD ["/bin/bash", "/bin/runconfluent.sh"] From 72c486807348394975cbd8de02f2f50f79e4703a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 Jan 2026 09:46:23 -0500 Subject: [PATCH 407/413] Update container with more packages, volumes, env, and alma 10 --- container/Dockerfile | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/container/Dockerfile b/container/Dockerfile index 80d3b5de..25cebdd0 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -1,7 +1,13 @@ -FROM almalinux:9 +FROM almalinux:10 +VOLUME /var/lib/confluent +VOLUME /var/run/confluent +VOLUME /etc/confluent +VOLUME /var/lib/tftpboot +ENV EMBEDHTTP=1 +ENV EMBEDTFTP=0 RUN ["yum", "-y", "update"] -RUN ["rpm", "-ivh", "https://hpc.lenovo.com/yum/latest/el9/x86_64/lenovo-hpc-yum-1-1.x86_64.rpm"] -RUN ["yum", "-y", "install", "lenovo-confluent", "tftp-server", "openssh-clients", "openssl", "vim-enhanced", "iproute"] +RUN ["rpm", "-ivh", "https://hpc.lenovo.com/yum/latest/el10/x86_64/lenovo-hpc-yum-1-1.x86_64.rpm"] +RUN ["yum", "-y", "install", "lenovo-confluent", "tftp-server", "openssh-clients", "openssl", "vim-enhanced", "iproute", "policycoreutils", "selinux-policy-targeted"] RUN ["chown", "-r", "root", "/etc/confluent"] ADD runconfluent.sh /bin/ CMD ["/bin/bash", "/bin/runconfluent.sh"] From ccaf22f44fd80639a107d27b1cfff81bedc78b31 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 Jan 2026 12:52:07 -0500 Subject: [PATCH 408/413] Add architecture handling in pkglist To handle amd64/arm64 profiles, have the pkglist allow for architecture specific qualifiers. Additionally, soften failure to accomplish selinux changes. --- imgutil/el10/pkglist | 6 +++++- imgutil/el9/pkglist | 7 ++++++- imgutil/imgutil | 18 ++++++++++++++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/imgutil/el10/pkglist b/imgutil/el10/pkglist index 56acdd23..0bee79dd 100644 --- a/imgutil/el10/pkglist +++ b/imgutil/el10/pkglist @@ -19,4 +19,8 @@ fuse-libs libnl3 dhcpcd openssh-keysign -chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr shim-x64.x86_64 grub2-efi-x64 attr +chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr attr +%onlyarch x86_64 +shim-x64.x86_64 grub2-efi-x64 +%onlyarch aarch64 +shim-aa64.aarch64 grub2-efi-aa64 \ No newline at end of file diff --git a/imgutil/el9/pkglist b/imgutil/el9/pkglist index 8fc1dbf4..44eaaf6d 100644 --- a/imgutil/el9/pkglist +++ b/imgutil/el9/pkglist @@ -1,3 +1,4 @@ +system-release dnf hostname irqbalance @@ -17,4 +18,8 @@ xfsprogs e2fsprogs fuse-libs libnl3 -chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl dhclient openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr shim-x64.x86_64 grub2-efi-x64 attr +chrony kernel net-tools nfs-utils openssh-server rsync tar util-linux python3 tar dracut dracut-network ethtool parted openssl dhclient openssh-clients bash vim-minimal rpm iputils lvm2 efibootmgr attr +%onlyarch x86_64 +shim-x64.x86_64 grub2-efi-x64 +%onlyarch aarch64 +shim-aa64.aarch64 grub2-efi-aa64 \ No newline at end of file diff --git a/imgutil/imgutil b/imgutil/imgutil index 0c44d8ee..acc9e0f4 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -519,9 +519,18 @@ class OsHandler(object): pkglistfile = pkglistfile[:-1] with open(pkglistfile, 'r') as pkglist: pkgs = '' + ignoremode = False for line in pkglist.readlines(): line = line.split('#', 1)[0].strip() - pkgs += line + ' ' + if line.startswith(r'%onlyarch '): + archs = line[len(r'%onlyarch '):].split() + if self.arch not in archs: + ignoremode = True + else: + ignoremode = False + continue + if not ignoremode: + pkgs += line + ' ' pkgs = pkgs.split() retpkgs = [] for pkg in pkgs: @@ -764,7 +773,12 @@ class ElHandler(OsHandler): if line.startswith('selinuxfs '): break else: - self.relabel_targdir() + try: + self.relabel_targdir() + except subprocess.CalledProcessError: + # Some filesystem contexts can not accommodate the selinux labels, warn that + # this failed, but allow it to proceed in case it can boot anyway + sys.stderr.write('Warning: could not relabel target filesystem for SELinux\n') def relabel_targdir(self): subprocess.check_call( From a0a58872141d1fcb8483dff87a7c249c555d6a3b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 Jan 2026 13:27:21 -0500 Subject: [PATCH 409/413] Fallback to filename for PE format kernels Some ARM64 kernels ship as EFI executables, but it's not obvious how to extract version numbers from those properly. --- .../initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh index f8b576a2..8505c6df 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh @@ -211,7 +211,7 @@ v6cfg=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg) v6cfg=${v6cfg#ipv6_method: } v4cfg=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg) v4cfg=${v4cfg#ipv4_method: } -if [ "$v4cfg" = "static" ] || [ "$v4cfg" = "dhcp" ]; then +if [ "$v4cfg" = "static" ] || [ "$v4cfg" = "dhcp" ]; then # someone might feed 'manual' in or other such nonsense mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg) mgr=${mgr#deploy_server: } fi From f8b8ce3847bd31fa2c8ca207c30b1b6e264ad991 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 Jan 2026 14:29:23 -0500 Subject: [PATCH 410/413] Fallback to filename for PE format kernels Some ARM64 kernels ship as EFI executables, but it's not obvious how to extract version numbers from those properly. --- imgutil/imgutil | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index acc9e0f4..0aaf7f55 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -824,11 +824,14 @@ def version_sort(iterable): def get_kern_version(filename): with open(filename, 'rb') as kernfile: - checkgzip = kernfile.read(2) - if checkgzip == b'\x1f\x8b': - # gzipped... this would probably be aarch64 + header = kernfile.read(2) + if header == b'\x1f\x8b': + # gzipped... we can't process this right now, # assume the filename has the version embedded return os.path.basename(filename).replace('vmlinuz-', '') + if header == b'MZ': + # PE format, no easy way to get version, assume filename has it + return os.path.basename(filename).replace('vmlinuz-', '') kernfile.seek(0x20e) offset = struct.unpack(' Date: Thu, 15 Jan 2026 14:29:31 -0500 Subject: [PATCH 411/413] Revert "Fallback to filename for PE format kernels" This reverts commit a0a58872141d1fcb8483dff87a7c249c555d6a3b. --- .../initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh index 8505c6df..f8b576a2 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh @@ -211,7 +211,7 @@ v6cfg=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg) v6cfg=${v6cfg#ipv6_method: } v4cfg=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg) v4cfg=${v4cfg#ipv4_method: } -if [ "$v4cfg" = "static" ] || [ "$v4cfg" = "dhcp" ]; then # someone might feed 'manual' in or other such nonsense +if [ "$v4cfg" = "static" ] || [ "$v4cfg" = "dhcp" ]; then mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg) mgr=${mgr#deploy_server: } fi From 048780e16d865515ba251df065fb9456b2d8a6c9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 15 Jan 2026 15:15:11 -0500 Subject: [PATCH 412/413] Explicitly mknodes for pack/unpack In some contexts, udev may be asleep at the wheel. Explictly have dmsetup refresh the devnodes. --- imgutil/imgutil | 2 ++ 1 file changed, 2 insertions(+) diff --git a/imgutil/imgutil b/imgutil/imgutil index 0aaf7f55..30cbd1bd 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -386,6 +386,7 @@ def encrypt_image(plainfile, cryptfile, keyfile): loopdev = subprocess.check_output(['losetup', '-f']).decode('utf8').strip() subprocess.check_call(['losetup', loopdev, cryptfile]) subprocess.check_call(['dmsetup', 'create', dmname, '--table', '0 {} crypt aes-xts-plain64 {} 0 {} 8'.format(neededblocks, key, loopdev)]) + subprocess.check_call(['dmsetup', 'mknodes', dmname]) with open('/dev/mapper/{}'.format(dmname), 'wb') as cryptout: with open(plainfile, 'rb+') as plainin: lastoffset = 0 @@ -1295,6 +1296,7 @@ def prep_decrypt(indir): tempfile.mktemp() subprocess.check_call(['dmsetup', 'create', dmname, '--table', '0 {0} crypt {1} {2} 0 {3} 8'.format( imglen, cipher, key, loopdev)]) + subprocess.check_call(['dmsetup', 'mknodes', dmname]) return '/dev/mapper/{0}'.format(dmname), loopdev From e6c19388a2e456450e07a6787b20b82f52f9d3fc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 16 Jan 2026 08:45:12 -0500 Subject: [PATCH 413/413] Add device-manager to container build Confluent needs device-mapper for imgutil operation --- container/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/container/Dockerfile b/container/Dockerfile index 25cebdd0..4c4072ba 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -7,7 +7,7 @@ ENV EMBEDHTTP=1 ENV EMBEDTFTP=0 RUN ["yum", "-y", "update"] RUN ["rpm", "-ivh", "https://hpc.lenovo.com/yum/latest/el10/x86_64/lenovo-hpc-yum-1-1.x86_64.rpm"] -RUN ["yum", "-y", "install", "lenovo-confluent", "tftp-server", "openssh-clients", "openssl", "vim-enhanced", "iproute", "policycoreutils", "selinux-policy-targeted"] +RUN ["yum", "-y", "install", "lenovo-confluent", "tftp-server", "openssh-clients", "openssl", "vim-enhanced", "iproute", "policycoreutils", "selinux-policy-targeted", "device-mapper"] RUN ["chown", "-r", "root", "/etc/confluent"] ADD runconfluent.sh /bin/ CMD ["/bin/bash", "/bin/runconfluent.sh"]