From 49a504972f2b17bce0ab396758b4dd4ac798e1af Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 25 Oct 2023 14:21:55 -0400 Subject: [PATCH 1/4] Fix syntax error in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f2a2edff..7e641205 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -435,7 +435,7 @@ if __name__ == '__main__': curridx = addr[-1] if curridx in doneidxs: continue - for tries in (1, 2 3): + for tries in (1, 2, 3): try: status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') break @@ -446,7 +446,7 @@ if __name__ == '__main__': continue nc = json.loads(nc) if not dc: - for tries in (1, 2 3): + for tries in (1, 2, 3): try: status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') break From 0857716f64a294b44de4ba883dd552af33800ff5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 26 Oct 2023 08:58:37 -0400 Subject: [PATCH 2/4] Add support for normalized sensors This opens the door for normalized common sensors for clients that care about the semantics but cannot keep track of inconsistent sensor names from implementation to implementation. --- confluent_server/confluent/core.py | 14 ++++++++++++++ .../plugins/hardwaremanagement/ipmi.py | 19 +++++++++++++++++++ .../plugins/hardwaremanagement/redfish.py | 19 +++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index f70bc6ae..6ab6bd59 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -481,6 +481,20 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), + 'normalized': { + 'inlet_temp': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'average_cpu_temp': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'total_power': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + }, 'energy': PluginCollection({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 938b69ae..06a8c444 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -861,6 +861,23 @@ class IpmiHandler(object): resourcename = sensor['name'] self.ipmicmd.sensormap[simplify_name(resourcename)] = resourcename + def read_normalized(self, sensorname): + readings = None + if sensorname == 'average_cpu_temp': + cputemp = self.ipmicmd.get_average_processor_temperature() + readings = [cputemp] + elif sensorname == 'inlet_temp': + inltemp = self.ipmicmd.get_inlet_temperature() + readings = [inltemp] + elif sensorname == 'total_power': + sensor = EmptySensor('Total Power') + sensor.states = [] + sensor.units = 'W' + sensor.value = self.ipmicmd.get_system_power_watts() + readings = [sensor] + if readings: + self.output.put(msg.SensorReadings(readings, name=self.node)) + def read_sensors(self, sensorname): if sensorname == 'all': sensors = self.ipmicmd.get_sensor_descriptions() @@ -1157,6 +1174,8 @@ class IpmiHandler(object): if len(self.element) < 3: return self.sensorcategory = self.element[2] + if self.sensorcategory == 'normalized': + return self.read_normalized(self.element[-1]) # list sensors per category if len(self.element) == 3 and self.element[-2] == 'hardware': if self.sensorcategory == 'leds': diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 20315134..f53cc393 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -712,6 +712,23 @@ class IpmiHandler(object): resourcename = sensor['name'] self.sensormap[simplify_name(resourcename)] = resourcename + def read_normalized(self, sensorname): + readings = None + if sensorname == 'average_cpu_temp': + cputemp = self.ipmicmd.get_average_processor_temperature() + readings = [cputemp] + elif sensorname == 'inlet_temp': + inltemp = self.ipmicmd.get_inlet_temperature() + readings = [inltemp] + elif sensorname == 'total_power': + sensor = EmptySensor('Total Power') + sensor.states = [] + sensor.units = 'W' + sensor.value = self.ipmicmd.get_system_power_watts() + readings = [sensor] + if readings: + self.output.put(msg.SensorReadings(readings, name=self.node)) + def read_sensors(self, sensorname): if sensorname == 'all': sensors = self.ipmicmd.get_sensor_descriptions() @@ -1012,6 +1029,8 @@ class IpmiHandler(object): if len(self.element) < 3: return self.sensorcategory = self.element[2] + if self.sensorcategory == 'normalized': + return self.read_normalized(self.element[-1]) # list sensors per category if len(self.element) == 3 and self.element[-2] == 'hardware': if self.sensorcategory == 'leds': From d0826106780c82c3b2c60441b3d3e6c85f256983 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 27 Oct 2023 13:34:52 -0400 Subject: [PATCH 3/4] Add more deep checking of node networking Whether due to the management node or node IP addresses, check if deployment can reasonably proceed using IPv4 or IPv6, and give a warning with some suggestions to check. Also, add nodeinventory -s as an example resolution for missing uuid. --- confluent_server/bin/confluent_selfcheck | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 1b504e95..cc1409cf 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -15,6 +15,7 @@ import confluent.sshutil as sshutil import confluent.certutil as certutil import confluent.client as client import confluent.config.configmanager as configmanager +import confluent.netutil as netutil import eventlet.green.subprocess as subprocess import tempfile import shutil @@ -244,7 +245,7 @@ if __name__ == '__main__': allok = False uuidok = True # not really, but suppress the spurious error dnsdomain = rsp.get('dns.domain', {}).get('value', '') - if ',' in dnsdomain or ' ' in dnsdomain: + if dnsdomain and (',' in dnsdomain or ' ' in dnsdomain): allok = False emprint(f'{args.node} has a dns.domain that appears to be a search instead of singular domain') uuidok = True # not really, but suppress the spurious error @@ -269,9 +270,28 @@ if __name__ == '__main__': switch_value = rsp[key].get('value',None) if switch_value and switch_value not in valid_nodes: emprint(f'{switch_value} is not a valid node name (as referenced by attribute "{key}" of node {args.node}).') + print(f"Checking network configuration for {args.node}") + cfg = configmanager.ConfigManager(None) + bootablev4nics = [] + bootablev6nics = [] + for nic in glob.glob("/sys/class/net/*/ifindex"): + idx = int(open(nic, "r").read()) + nicname = nic.split('/')[-2] + ncfg = netutil.get_nic_config(cfg, args.node, ifidx=idx) + if ncfg['ipv4_address'] or ncfg['ipv4_method'] == 'dhcp': + bootablev4nics.append(nicname) + if ncfg['ipv6_address']: + bootablev6nics.append(nicname) + if bootablev4nics: + print("{} appears to have network configuration suitable for IPv4 deployment via: {}".format(args.node, ",".join(bootablev4nics))) + elif bootablev6nics: + print('{} appears to have networking configuration suitable for IPv6 deployment via: {}'.format(args.node, ",".join(bootablev6nics))) + else: + emprint(f"{args.node} may not have any viable IP network configuration (check name resolution (DNS or hosts file) " + "and/or net.*ipv4_address, and verify that the deployment serer addresses and subnet mask/prefix length are accurate)") if not uuidok and not macok: allok = False - emprint(f'{args.node} does not have a uuid or mac address defined in id.uuid or net.*hwaddr, deployment will not work') + emprint(f'{args.node} does not have a uuid or mac address defined in id.uuid or net.*hwaddr, deployment will not work (Example resolution: nodeinventory {args.node} -s)') if allok: print(f'No issues detected with attributes of {args.node}') fprint("Checking name resolution: ") From a1ac234b73173c87a679d1f718cf4b70dd5115da Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 27 Oct 2023 15:31:14 -0400 Subject: [PATCH 4/4] Enhance error message for authentication issue during syncfiles --- confluent_server/confluent/sshutil.py | 16 +++++++++++++--- confluent_server/confluent/syncfiles.py | 2 ++ confluent_server/confluent/util.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index 0a52fe81..d097ade1 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -129,11 +129,21 @@ def prep_ssh_key(keyname): ap.write('#!/bin/sh\necho $CONFLUENT_SSH_PASSPHRASE\nrm {0}\n'.format(askpass)) os.chmod(askpass, 0o700) os.environ['CONFLUENT_SSH_PASSPHRASE'] = get_passphrase() + olddisplay = os.environ.get('DISPLAY', None) + oldaskpass = os.environ.get('SSH_ASKPASS', None) os.environ['DISPLAY'] = 'NONE' os.environ['SSH_ASKPASS'] = askpass - with open(os.devnull, 'wb') as devnull: - subprocess.check_output(['ssh-add', keyname], stdin=devnull, stderr=devnull) - del os.environ['CONFLUENT_SSH_PASSPHRASE'] + try: + with open(os.devnull, 'wb') as devnull: + subprocess.check_output(['ssh-add', keyname], stdin=devnull, stderr=devnull) + finally: + del os.environ['CONFLUENT_SSH_PASSPHRASE'] + del os.environ['DISPLAY'] + del os.environ['SSH_ASKPASS'] + if olddisplay: + os.environ['DISPLAY'] = olddisplay + if oldaskpass: + os.environ['SSH_ASKPASS'] = oldaskpass ready_keys[keyname] = 1 finally: adding_key = False diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 556d9bcf..6c11d072 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -212,6 +212,8 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): unreadablefiles.append(filename.replace(targdir, '')) if unreadablefiles: raise Exception("Syncing failed due to unreadable files: " + ','.join(unreadablefiles)) + elif b'Permission denied, please try again.' in e.stderr: + raise Exception('Syncing failed due to authentication error, is the confluent automation key not set up (osdeploy initialize -a) or is there some process replacing authorized_keys on the host?') else: raise finally: diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 1509a827..8cf9bbc9 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -42,7 +42,7 @@ def run(cmd): stdout, stderr = process.communicate() retcode = process.poll() if retcode: - raise subprocess.CalledProcessError(retcode, process.args, output=stdout) + raise subprocess.CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) return stdout, stderr