From 2787e1d8620113e847e1a29abf5d9f68e7ae39f3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 23 Jul 2019 11:57:35 -0400 Subject: [PATCH 1/7] Present more data about missing entries from assign csv When doing an assign on csv, present all the missing entries rather than stopping on the first. --- confluent_client/bin/nodediscover | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodediscover b/confluent_client/bin/nodediscover index b7301623..c045f20c 100755 --- a/confluent_client/bin/nodediscover +++ b/confluent_client/bin/nodediscover @@ -154,6 +154,7 @@ def import_csv(options, session): for field in fields: if field in unique_fields: unique_data[field] = set([]) + broken = False for record in records: currfields = list(fields) nodedatum = {} @@ -170,14 +171,16 @@ def import_csv(options, session): nodedatum[currfield] = datum if not datum_complete(nodedatum): sys.exit(1) - if not search_record(nodedatum, options, session): + if not search_record(nodedatum, options, session) and not broken: blocking_scan(session) if not search_record(nodedatum, options, session): sys.stderr.write( "Could not match the following data: " + repr(nodedatum) + '\n') - sys.exit(1) + broken = True nodedata.append(nodedatum) + if broken: + sys.exit(1) for datum in nodedata: maclist = search_record(datum, options, session) datum = datum_to_attrib(datum) From 64cc2416d1f2338123ae953fde87e8e360ef25c1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 24 Jul 2019 15:37:01 -0400 Subject: [PATCH 2/7] Fix list argument --- confluent_client/bin/nodelicense | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/nodelicense b/confluent_client/bin/nodelicense index 4ef0e6b9..ecc5d6fb 100755 --- a/confluent_client/bin/nodelicense +++ b/confluent_client/bin/nodelicense @@ -50,7 +50,7 @@ try: downdir = args[2] elif args[1] == 'delete': delete = args[2] - else: + elif args[1] != 'list': argparser.print_help() sys.exit(1) except IndexError: From 8641885f861f8b3617517456969eac46ad6bab43 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Jul 2019 08:52:47 -0400 Subject: [PATCH 3/7] Fix handling of socket error with neighbor If a system is half up, a different sort of timeout is given. Handle this and also preserve the original exception better if not one of these two. --- .../confluent/plugins/hardwaremanagement/redfish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index e4d3d6d3..cef02cc0 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -169,7 +169,7 @@ class IpmiCommandWrapper(ipmicommand.Command): try: super(IpmiCommandWrapper, self).__init__(**kwargs) except socket.error as se: - if se[1] == 'EHOSTUNREACH': + if isinstance(se, socket.timeout) or (len(se) > 1 and se[1] == 'EHOSTUNREACH'): raise exc.TargetEndpointUnreachable('timeout') raise except pygexc.PyghmiException as pe: From 21edd8217763af1646d9fd875ca8976c53251e56 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Jul 2019 09:28:55 -0400 Subject: [PATCH 4/7] Extend generic catches in redfish/ipmi Have it provide more detailed error data at a glance to short out some debug requirements. --- confluent_server/confluent/plugins/hardwaremanagement/ipmi.py | 2 +- .../confluent/plugins/hardwaremanagement/redfish.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 91352bd4..a27fe007 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -434,7 +434,7 @@ def perform_request(operator, node, element, except pygexc.InvalidParameterValue as e: results.put(msg.ConfluentNodeError(node, str(e))) except Exception as e: - results.put(msg.ConfluentNodeError(node, 'Unexpected Error')) + results.put(msg.ConfluentNodeError(node, 'Unexpected Error: {0}'.format(str(e)))) traceback.print_exc() finally: results.put('Done') diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index cef02cc0..4f893a27 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -323,7 +323,7 @@ def perform_request(operator, node, element, except (pygexc.InvalidParameterValue, pygexc.RedfishError) as e: results.put(msg.ConfluentNodeError(node, str(e))) except Exception as e: - results.put(msg.ConfluentNodeError(node, 'Unexpected Error')) + results.put(msg.ConfluentNodeError(node, 'Unexpected Error: {0}'.format(str(e)))) traceback.print_exc() finally: results.put('Done') From af025f7304424e5b1074ae6c1179b50cf821379c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 25 Jul 2019 13:09:52 -0400 Subject: [PATCH 5/7] Present log name when provided in nodeeventlog Some managers combine logs, allow disambiguation through including in output. --- confluent_client/bin/nodeeventlog | 6 ++++-- confluent_server/confluent/messages.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/confluent_client/bin/nodeeventlog b/confluent_client/bin/nodeeventlog index 03f870c5..291f6ac4 100755 --- a/confluent_client/bin/nodeeventlog +++ b/confluent_client/bin/nodeeventlog @@ -1,7 +1,7 @@ #!/usr/bin/python # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2015-2017 Lenovo +# Copyright 2015-2019 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -65,6 +65,8 @@ def format_event(evt): display = dt.strptime(evt['timestamp'], '%Y-%m-%dT%H:%M:%S') retparts.append(display.strftime('%m/%d/%Y %H:%M:%S')) dscparts = [] + if evt.get('log_id', None): + retparts.append(evt['log_id'] + ':') if 'component_type' in evt and evt['component_type'] is not None: dscparts.append(evt['component_type']) if 'component' in evt and evt['component'] is not None: @@ -102,4 +104,4 @@ for rsp in func('/noderange/{0}/events/hardware/log'.format(noderange)): if 'events' in thisdata: evtdata = thisdata['events'] for evt in evtdata: - print '{0}: {1}'.format(node, format_event(evt)) + print('{0}: {1}'.format(node, format_event(evt))) diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index fa0bee06..36213d28 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015-2017 Lenovo +# Copyright 2015-2019 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1249,6 +1249,7 @@ class EventCollection(ConfluentMessage): 'timestamp': event.get('timestamp', None), 'message': event.get('message', None), 'record_id': event.get('record_id', None), + 'log_id': event.get('log_id', None), } if event['severity'] not in valid_health_values: raise exc.NotImplementedException( From a251a538b0708de4bc9c69f626148ce687463ca8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 26 Jul 2019 09:25:19 -0400 Subject: [PATCH 6/7] Improve SMM discovery SMM discovery behavior has seemingly gotten more picky with time. First switch to an IPMI-free if the user has custom password. The web based approach is much less problematic than SMM IPMI stack in this context. If user specifies they want to use default credentials, we have no choice but to use IPMI. Omit things and shuffle order of operations to mitigate problems. It isn't perfect, but it does work eventually. --- .../confluent/discovery/handlers/bmc.py | 109 ++++++++------- .../confluent/discovery/handlers/smm.py | 127 ++++++++++++++++-- 2 files changed, 179 insertions(+), 57 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/bmc.py b/confluent_server/confluent/discovery/handlers/bmc.py index dc783f8b..4985f795 100644 --- a/confluent_server/confluent/discovery/handlers/bmc.py +++ b/confluent_server/confluent/discovery/handlers/bmc.py @@ -56,27 +56,27 @@ class NodeHandler(generic.NodeHandler): def config(self, nodename, reset=False): self._bmcconfig(nodename, reset) - def _bmcconfig(self, nodename, reset=False, customconfig=None): + def _bmcconfig(self, nodename, reset=False, customconfig=None, vc=None): # TODO(jjohnson2): set ip parameters, user/pass, alert cfg maybe # In general, try to use https automation, to make it consistent # between hypothetical secure path and today. + creds = self.configmanager.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword'], decrypt=True) + user = creds.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', None) + passwd = creds.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', None) try: ic = self._get_ipmicmd() passwd = DEFAULT_PASS except pygexc.IpmiException as pi: - creds = self.configmanager.get_node_attributes( - nodename, - ['secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassword'], decrypt=True) - user = creds.get(nodename, {}).get( - 'secret.hardwaremanagementuser', {}).get('value', None) havecustomcreds = False if user is not None and user != DEFAULT_USER: havecustomcreds = True else: user = DEFAULT_USER - passwd = creds.get(nodename, {}).get( - 'secret.hardwaremanagementpassword', {}).get('value', None) if passwd is not None and passwd != DEFAULT_PASS: havecustomcreds = True else: @@ -85,8 +85,8 @@ class NodeHandler(generic.NodeHandler): ic = self._get_ipmicmd(user, passwd) else: raise - if customconfig: - customconfig(ic) + if vc: + ic.register_key_handler(vc) currusers = ic.get_users() lanchan = ic.get_network_channel() userdata = ic.xraw_command(netfn=6, command=0x44, data=(lanchan, @@ -106,6 +106,55 @@ class NodeHandler(generic.NodeHandler): raise exc.TargetEndpointBadCredentials( 'secret.hardwaremanagementuser and/or ' 'secret.hardwaremanagementpassword was not configured') + newuser = cd['secret.hardwaremanagementuser']['value'] + newpass = cd['secret.hardwaremanagementpassword']['value'] + for uid in currusers: + if currusers[uid]['name'] == newuser: + # Use existing account that has been created + newuserslot = uid + if newpass != passwd: # don't mess with existing if no change + ic.set_user_password(newuserslot, password=newpass) + ic = self._get_ipmicmd(user, passwd) + if vc: + ic.register_key_handler(vc) + break + else: + newuserslot = lockedusers + 1 + if newuserslot < 2: + newuserslot = 2 + if newpass != passwd: # don't mess with existing if no change + ic.set_user_password(newuserslot, password=newpass) + ic.set_user_name(newuserslot, newuser) + if havecustomcreds: + ic = self._get_ipmicmd(user, passwd) + if vc: + ic.register_key_handler(vc) + #We are remote operating on the account we are + #using, no need to try to set user access + #ic.set_user_access(newuserslot, lanchan, + # privilege_level='administrator') + # Now to zap others + for uid in currusers: + if uid != newuserslot: + if uid <= lockedusers: # we cannot delete, settle for disable + ic.disable_user(uid, 'disable') + else: + # lead with the most critical thing, removing user access + ic.set_user_access(uid, channel=None, callback=False, + link_auth=False, ipmi_msg=False, + privilege_level='no_access') + # next, try to disable the password + ic.set_user_password(uid, mode='disable', password=None) + # ok, now we can be less paranoid + try: + ic.user_delete(uid) + except pygexc.IpmiException as ie: + if ie.ipmicode != 0xd5: # some response to the 0xff + # name... + # the user will remain, but that is life + raise + if customconfig: + customconfig(ic) if ('hardwaremanagement.manager' in cd and cd['hardwaremanagement.manager']['value'] and not cd['hardwaremanagement.manager']['value'].startswith( @@ -134,44 +183,6 @@ class NodeHandler(generic.NodeHandler): else: raise exc.TargetEndpointUnreachable( 'hardwaremanagement.manager must be set to desired address') - newuser = cd['secret.hardwaremanagementuser']['value'] - newpass = cd['secret.hardwaremanagementpassword']['value'] - for uid in currusers: - if currusers[uid]['name'] == newuser: - # Use existing account that has been created - newuserslot = uid - if newpass != passwd: # don't mess with existing if no change - ic.set_user_password(newuserslot, password=newpass) - break - else: - newuserslot = lockedusers + 1 - if newuserslot < 2: - newuserslot = 2 - if newpass != passwd: # don't mess with existing if no change - ic.set_user_password(newuserslot, password=newpass) - ic.set_user_name(newuserslot, newuser) - ic.set_user_access(newuserslot, lanchan, - privilege_level='administrator') - # Now to zap others - for uid in currusers: - if uid != newuserslot: - if uid <= lockedusers: # we cannot delete, settle for disable - ic.disable_user(uid, 'disable') - else: - # lead with the most critical thing, removing user access - ic.set_user_access(uid, channel=None, callback=False, - link_auth=False, ipmi_msg=False, - privilege_level='no_access') - # next, try to disable the password - ic.set_user_password(uid, mode='disable', password=None) - # ok, now we can be less paranoid - try: - ic.user_delete(uid) - except pygexc.IpmiException as ie: - if ie.ipmicode != 0xd5: # some response to the 0xff - # name... - # the user will remain, but that is life - raise if reset: ic.reset_bmc() return ic diff --git a/confluent_server/confluent/discovery/handlers/smm.py b/confluent_server/confluent/discovery/handlers/smm.py index 6b426aab..82149588 100644 --- a/confluent_server/confluent/discovery/handlers/smm.py +++ b/confluent_server/confluent/discovery/handlers/smm.py @@ -13,7 +13,15 @@ # limitations under the License. import confluent.discovery.handlers.bmc as bmchandler +import confluent.exceptions as exc +import pyghmi.util.webclient as webclient import struct +import urllib +import eventlet.support.greendns +import confluent.netutil as netutil +getaddrinfo = eventlet.support.greendns.getaddrinfo + +from xml.etree.ElementTree import fromstring def fixuuid(baduuid): # SMM dumps it out in hex @@ -26,7 +34,7 @@ def fixuuid(baduuid): class NodeHandler(bmchandler.NodeHandler): is_enclosure = True devname = 'SMM' - maxmacs = 5 # support an enclosure, but try to avoid catching daisy chain + maxmacs = 6 # support an enclosure, but try to avoid catching daisy chain def scan(self): # the UUID is in a weird order, fix it up to match @@ -44,7 +52,7 @@ class NodeHandler(bmchandler.NodeHandler): self._fp = certificate return certificate == self._fp - def set_password_policy(self, ic): + def _webconfigrules(self, wc): rules = [] for rule in self.ruleset.split(','): if '=' not in rule: @@ -62,10 +70,95 @@ class NodeHandler(bmchandler.NodeHandler): rules.append('passwordReuseCheckNum:' + value) if rules: apirequest = 'set={0}'.format(','.join(rules)) - ic.register_key_handler(self._validate_cert) - ic.oem_init() - ic._oem.smmhandler.wc.request('POST', '/data', apirequest) - ic._oem.smmhandler.wc.getresponse().read() + wc.request('POST', '/data', apirequest) + wc.getresponse().read() + + def _webconfignet(self, wc, nodename): + cfg = self.configmanager + cd = cfg.get_node_attributes( + nodename, ['hardwaremanagement.manager']) + smmip = cd.get(nodename, {}).get('hardwaremanagement.manager', {}).get('value', None) + if smmip and ':' not in smmip: + smmip = getaddrinfo(smmip, 0)[0] + smmip = smmip[-1][0] + if ':' in smmip: + raise exc.NotImplementedException('IPv6 not supported') + netconfig = netutil.get_nic_config(cfg, nodename, ip=smmip) + netmask = netutil.cidr_to_mask(netconfig['prefix']) + setdata = 'set=ifIndex:0,v4DHCPEnabled:0,v4IPAddr:{0},v4NetMask:{1}'.format(smmip, netmask) + gateway = netconfig.get('ipv4_gateway', None) + if gateway: + setdata += ',v4Gateway:{0}'.format(gateway) + wc.request('POST', '/data', setdata) + rsp = wc.getresponse() + rspdata = rsp.read() + if '0' not in rspdata: + raise Exception("Error configuring SMM Network") + return + if ':' in smmip and not smmip.startswith('fe80::'): + raise exc.NotImplementedException('IPv6 configuration TODO') + if self.ipaddr.startswith('fe80::'): + cfg.set_node_attributes( + {nodename: {'hardwaremanagement.manager': self.ipaddr}}) + + def _webconfigcreds(self, username, password): + wc = webclient.SecureHTTPConnection(self.ipaddr, 443, verifycallback=self._validate_cert) + wc.connect() + authdata = { # start by trying factory defaults + 'user': 'USERID', + 'password': 'PASSW0RD', + } + headers = {'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded'} + wc.request('POST', '/data/login', urllib.urlencode(authdata), headers) + rsp = wc.getresponse() + rspdata = rsp.read() + if 'authResult>0' not in rspdata: + # default credentials are refused, try with the actual + authdata['user'] = username + authdata['password'] = password + wc.request('POST', '/data/login', urllib.urlencode(authdata), headers) + rsp = wc.getresponse() + rspdata = rsp.read() + if 'renew_account' in rspdata: + raise Exception('Configured password has expired') + if 'authResult>0' not in rspdata: + raise Exception('Unknown username/password on SMM') + tokens = fromstring(rspdata) + st2 = tokens.findall('st2')[0].text + wc.set_header('ST2', st2) + return wc + if 'renew_account' in rspdata: + passwdchange = {'oripwd': 'PASSW0RD', 'newpwd': password} + tokens = fromstring(rspdata) + st2 = tokens.findall('st2')[0].text + wc.set_header('ST2', st2) + wc.request('POST', '/data/changepwd', urllib.urlencode(passwdchange)) + rsp = wc.getresponse() + rspdata = rsp.read() + authdata['password'] = password + wc.request('POST', '/data/login', urllib.urlencode(authdata), headers) + rsp = wc.getresponse() + rspdata = rsp.read() + if 'authResult>0' in rspdata: + tokens = fromstring(rspdata) + st2 = tokens.findall('st2')[0].text + wc.set_header('ST2', st2) + if username == 'USERID': + return wc + wc.request('POST', '/data', 'set=user(2,1,{0},511,,4,15,0)'.format(username)) + rsp = wc.getresponse() + rspdata = rsp.read() + wc.request('POST', '/data/logout') + rsp = wc.getresponse() + rspdata = rsp.read() + authdata['user'] = username + wc.request('POST', '/data/login', urllib.urlencode(authdata, headers)) + rsp = wc.getresponse() + rspdata = rsp.read() + tokens = fromstring(rspdata) + st2 = tokens.findall('st2')[0].text + wc.set_header('ST2', st2) + return wc def config(self, nodename): # SMM for now has to reset to assure configuration applies @@ -73,7 +166,25 @@ class NodeHandler(bmchandler.NodeHandler): nodename, 'discovery.passwordrules') self.ruleset = dpp.get(nodename, {}).get( 'discovery.passwordrules', {}).get('value', '') - ic = self._bmcconfig(nodename, customconfig=self.set_password_policy) + creds = self.configmanager.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword'], decrypt=True) + username = creds.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', 'USERID') + passwd = creds.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', 'PASSW0RD') + if passwd == 'PASSW0RD' and self.ruleset: + raise Exception('Cannot support default password and setting password rules at same time') + if passwd == 'PASSW0RD': + # We must avoid hitting the web interface due to forced password change, best effert + self._bmcconfig(nodename) + else: + # Switch to full web based configuration, to mitigate risks with the SMM + wc = self._webconfigcreds(username, passwd) + self._webconfigrules(wc) + self._webconfignet(wc, nodename) + # notes for smm: # POST to: @@ -88,4 +199,4 @@ class NodeHandler(bmchandler.NodeHandler): # with body user=USERID&password=Passw0rd!4321 # yields: # ok 0 index.html -# note forwardUrl, if password change needed, will indicate something else \ No newline at end of file +# note forwardUrl, if password change needed, will indicate something else From 8897842fc4052c92f6b62a8c52a9360121900613 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 26 Jul 2019 13:50:59 -0400 Subject: [PATCH 7/7] Fix SMM handler when None bmc This fixes a common scenario for using fe80 collection --- confluent_server/confluent/discovery/handlers/smm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/smm.py b/confluent_server/confluent/discovery/handlers/smm.py index 82149588..e6e2bdad 100644 --- a/confluent_server/confluent/discovery/handlers/smm.py +++ b/confluent_server/confluent/discovery/handlers/smm.py @@ -81,7 +81,7 @@ class NodeHandler(bmchandler.NodeHandler): if smmip and ':' not in smmip: smmip = getaddrinfo(smmip, 0)[0] smmip = smmip[-1][0] - if ':' in smmip: + if smmip and ':' in smmip: raise exc.NotImplementedException('IPv6 not supported') netconfig = netutil.get_nic_config(cfg, nodename, ip=smmip) netmask = netutil.cidr_to_mask(netconfig['prefix']) @@ -95,7 +95,7 @@ class NodeHandler(bmchandler.NodeHandler): if '0' not in rspdata: raise Exception("Error configuring SMM Network") return - if ':' in smmip and not smmip.startswith('fe80::'): + if smmip and ':' in smmip and not smmip.startswith('fe80::'): raise exc.NotImplementedException('IPv6 configuration TODO') if self.ipaddr.startswith('fe80::'): cfg.set_node_attributes(