From 76bfb29d60d68ee4f14dde8f251c23ef3dbc20e0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2017 14:28:31 -0500 Subject: [PATCH 1/6] Try to put the brakes on too many mac discovery attempts In various scenarios, too many macs on a port can be a sign of trouble. For example, a chained SMM configuration with head on switch port, or incorrectly pointing a nodes net attributes at a switch uplink port, or defining SMMs without any nodes, causing XCCs to think they are rackmount. This sets some sanity value for avoiding problems. This is of course a mitigation, invalid scenarios could still run afoul of the limits, but it should catch a large chunk of offending scenarios. --- confluent_server/confluent/discovery/core.py | 12 ++++++++++- .../confluent/discovery/handlers/generic.py | 11 +++++++--- .../confluent/discovery/handlers/smm.py | 1 + .../confluent/networking/macmap.py | 21 +++++++++---------- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index c072bb77..1f68f9a0 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -634,7 +634,17 @@ def get_nodename(cfg, handler, info): _map_unique_ids() nodename = nodes_by_uuid.get(curruuid, None) if not nodename: # as a last resort, search switch for info - nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + nodename, macinfo = macmap.find_nodeinfo_by_mac(info['hwaddr'], cfg) + if (nodename and + not handler.discoverable_by_switch(macinfo['maccount'])): + if handler.devname == 'SMM': + errorstr = 'Attempt to discover SMM by switch, but chained ' \ + 'topology or incorrect net attributes detected, ' \ + 'which is not compatible with switch discovery ' \ + 'of SMM, nodename would have been ' \ + '{0}'.format(nodename) + log.log({'error': errorstr}) + return None return nodename diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py index 389067db..b68bab3d 100644 --- a/confluent_server/confluent/discovery/handlers/generic.py +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -19,6 +19,10 @@ webclient = eventlet.import_patched('pyghmi.util.webclient') class NodeHandler(object): https_supported = True is_enclosure = False + devname = '' + maxmacs = 2 # reasonable default, allowing for common scenario of + # shared nic in theory, but blocking enclosure managers + # and uplink ports def __init__(self, info, configmanager): self._certfailreason = None @@ -50,9 +54,10 @@ class NodeHandler(object): def preconfig(self): return - @property - def discoverable_by_switch(self): - return True + def discoverable_by_switch(self, macs): + # Given the number of macs sharing the port, is this handler + # appropriate? + return macs <= self.maxmacs def _savecert(self, certificate): self._fp = certificate diff --git a/confluent_server/confluent/discovery/handlers/smm.py b/confluent_server/confluent/discovery/handlers/smm.py index 3f9da8e6..aba470d9 100644 --- a/confluent_server/confluent/discovery/handlers/smm.py +++ b/confluent_server/confluent/discovery/handlers/smm.py @@ -26,6 +26,7 @@ def fixuuid(baduuid): class NodeHandler(bmchandler.NodeHandler): is_enclosure = True devname = 'SMM' + maxmacs = 5 # support an enclosure, but try to avoid catching daisy chain def scan(self): # the UUID is in a weird order, fix it up to match diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index 7bf4caa8..69cdddb3 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -229,33 +229,32 @@ def _map_switch_backend(args): _macsbyswitch[switch][ifname] = [mac] nodename = _nodelookup(switch, ifname) if nodename is not None: - if mac in _nodesbymac and _nodesbymac[mac] != nodename: + if mac in _nodesbymac and _nodesbymac[mac][0] != nodename: # For example, listed on both a real edge port # and by accident a trunk port log.log({'error': '{0} and {1} described by ambiguous' - ' switch topology values'.format(nodename, - _nodesbymac[mac] - )}) - _nodesbymac[mac] = None + ' switch topology values'.format( + nodename, _nodesbymac[mac][0])}) + _nodesbymac[mac] = (None, None) else: - _nodesbymac[mac] = nodename + _nodesbymac[mac] = (nodename, maccounts[ifname]) switchbackoff = 30 -def find_node_by_mac(mac, configmanager): +def find_nodeinfo_by_mac(mac, configmanager): now = util.monotonic_time() if vintage and (now - vintage) < 90 and mac in _nodesbymac: - return _nodesbymac[mac] + return _nodesbymac[mac][0], {'maccount': _nodesbymac[mac][1]} # do not actually sweep switches more than once every 30 seconds # however, if there is an update in progress, wait on it for _ in update_macmap(configmanager, vintage and (now - vintage) < switchbackoff): if mac in _nodesbymac: - return _nodesbymac[mac] + return _nodesbymac[mac][0], {'maccount': _nodesbymac[mac][1]} # If update_mac bailed out, still check one last time - return _nodesbymac.get(mac, None) + return _nodesbymac.get(mac, (None, {'maccount': 0})) mapupdating = eventlet.semaphore.Semaphore() @@ -456,7 +455,7 @@ def dump_macinfo(macaddr): raise exc.NotFoundException( '{0} not found in mac table of ' 'any known switches'.format(macaddr)) - return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, None)) + return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, (None,))[0]) def rescan(cfg): From 9d2ec60b50f73ccc68cc34309e66fd21153995e4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2017 14:59:37 -0500 Subject: [PATCH 2/6] Set executable bits on various commands --- confluent_client/bin/collate | 0 confluent_client/bin/nodebmcreset | 0 confluent_client/bin/nodeconfig | 0 confluent_client/bin/nodedefine | 0 confluent_client/bin/noderemove | 0 confluent_client/bin/nodereseat | 0 6 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 confluent_client/bin/collate mode change 100644 => 100755 confluent_client/bin/nodebmcreset mode change 100644 => 100755 confluent_client/bin/nodeconfig mode change 100644 => 100755 confluent_client/bin/nodedefine mode change 100644 => 100755 confluent_client/bin/noderemove mode change 100644 => 100755 confluent_client/bin/nodereseat diff --git a/confluent_client/bin/collate b/confluent_client/bin/collate old mode 100644 new mode 100755 diff --git a/confluent_client/bin/nodebmcreset b/confluent_client/bin/nodebmcreset old mode 100644 new mode 100755 diff --git a/confluent_client/bin/nodeconfig b/confluent_client/bin/nodeconfig old mode 100644 new mode 100755 diff --git a/confluent_client/bin/nodedefine b/confluent_client/bin/nodedefine old mode 100644 new mode 100755 diff --git a/confluent_client/bin/noderemove b/confluent_client/bin/noderemove old mode 100644 new mode 100755 diff --git a/confluent_client/bin/nodereseat b/confluent_client/bin/nodereseat old mode 100644 new mode 100755 From 43c5ecd6aee550046f1a357c77ea32a734b269b6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2017 15:16:58 -0500 Subject: [PATCH 3/6] Fix missing aliases --- confluent_client/confluent_env.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index b4139b52..916634c9 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -15,7 +15,9 @@ export MANPATH # disabled in the parent shell. Instead, store the current command in a # variable and use that to check for misglobbed noderanges, which was the goal alias nodeattrib='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeattrib' +alias nodebmcreset='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodebmcreset' alias nodeboot='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeboot' +alias nodeconfig='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeconfig' alias nodeconsole='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeconsole' alias nodedefine='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodedefine' alias nodeeventlog='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeeventlog' @@ -26,6 +28,7 @@ alias nodeidentify='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export alias nodeinventory='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeinventory' alias nodelist='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodelist' alias nodepower='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodepower' +alias noderemove='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; noderemove' alias nodereseat='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodereseat' alias noderun='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; noderun' alias nodesensors='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodesensors' From 6d88dbb374e65de9484bad428d1923d3cadfd1a3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2017 15:55:03 -0500 Subject: [PATCH 4/6] Add missing man pages Several commands did not yet have man pages, address this documentation issue. --- confluent_client/doc/man/nodeconfig.ronn | 33 ++++++++++++++++++++++++ confluent_client/doc/man/nodedefine.ronn | 28 ++++++++++++++++++++ confluent_client/doc/man/noderemove.ronn | 25 ++++++++++++++++++ confluent_client/doc/man/nodereseat.ronn | 19 ++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 confluent_client/doc/man/nodeconfig.ronn create mode 100644 confluent_client/doc/man/nodedefine.ronn create mode 100644 confluent_client/doc/man/noderemove.ronn create mode 100644 confluent_client/doc/man/nodereseat.ronn diff --git a/confluent_client/doc/man/nodeconfig.ronn b/confluent_client/doc/man/nodeconfig.ronn new file mode 100644 index 00000000..8f7919f2 --- /dev/null +++ b/confluent_client/doc/man/nodeconfig.ronn @@ -0,0 +1,33 @@ +nodeconfig(8) -- Show or change node configuration +================================================== + +## SYNOPSIS + +`nodecanfig [..]` +`nodecanfig [..]` + +## DESCRIPTION + +**nodeconfig** manages the configuration of nodes managed by confluent. +Rather than manipulating the confluent database, this actually modifies the +running configuration on the node firmware. Calling without '=' will show the +current value, and '=' will change the value. Network information can be +given as a node expression, as documented in the man page for nodeattribexpressions(5). + +## EXAMPLES +* Showing the current IP configuration of noderange BMC/IMM/XCC: + `# nodeconfig s3,s4 bmc` + `s3: bmc.ipv4_address: 172.30.254.193/16` + `s3: bmc.ipv4_method: DHCP` + `s3: bmc.ipv4_gateway: 172.30.0.6` + `s4: bmc.ipv4_address: 172.30.254.192/16` + `s4: bmc.ipv4_method: DHCP` + `s4: bmc.ipv4_gateway: 172.30.0.6` + +* Changing nodes `s3` and `s4` to have the ip addressess 10.1.2.3 and 10.1.2.4 with a 16 bit subnet mask: + `# nodeconfig s3,s4 bmc.ipv4_address=10.1.2.{n1}/16` + +## SEE ALSO + +nodeattribexpressions(5) + diff --git a/confluent_client/doc/man/nodedefine.ronn b/confluent_client/doc/man/nodedefine.ronn new file mode 100644 index 00000000..6bd4e70c --- /dev/null +++ b/confluent_client/doc/man/nodedefine.ronn @@ -0,0 +1,28 @@ +nodedefine(8) -- Define new confluent nodes + +## SYNOPSIS + +`nodedefine [nodeattribute1=value1> ...]` + +## DESCRIPTION + +`nodedefine` allows the definition of new nodes for the confluent management +system. It has the same syntax as `nodeattrib(8)`, and the commands differ in +that `nodeattrib(8)` will error if a node does not exist. + +## EXAMPLES + +* Define two racks of nodes, named r{rack}u{u}: + `# nodedefine r1u1-r2u4` + `r1u4: created` + `r1u1: created` + `r1u2: created` + `r1u3: created` + `r2u4: created` + `r2u3: created` + `r2u2: created` + `r2u1: created` + +## SEE ALSO + +noderange(5), nodeattribexpressions(8) diff --git a/confluent_client/doc/man/noderemove.ronn b/confluent_client/doc/man/noderemove.ronn new file mode 100644 index 00000000..0fd1b48e --- /dev/null +++ b/confluent_client/doc/man/noderemove.ronn @@ -0,0 +1,25 @@ +noderemove(8) -- Remove nodes from the confluent management service +=================================================================== + +## SYNOPSIS + +`noderemove ` + +## DESCRIPTION + +`noderemove` simply removes the given noderange from the confluent database. + + +## EXAMPLES + +* Remove two racks each with 4 nodes: + `# noderemove r1u1-r2u4` + `r1u4: deleted` + `r1u1: deleted` + `r1u2: deleted` + `r1u3: deleted` + `r2u4: deleted` + `r2u3: deleted` + `r2u2: deleted` + `r2u1: deleted` + diff --git a/confluent_client/doc/man/nodereseat.ronn b/confluent_client/doc/man/nodereseat.ronn new file mode 100644 index 00000000..09b45286 --- /dev/null +++ b/confluent_client/doc/man/nodereseat.ronn @@ -0,0 +1,19 @@ +nodereseat(8) -- Request a reseat of a node +============================================ + +## SYNOPSIS + +`nodereseat ` + +## DESCRIPTION + +`nodereseat` requests the enclosure manager of the current node to reseat that +node's slot. This should be equivalent to removing the system entirely from +the chassis and putting it back in, but without actually having to do so. + +## EXAMPLES + +* Reseating the node `s1`: + `# nodereseat s1` + `s1: Reseat successful` + From 16297b048fc17a4d4f9d4bb968f38af772812045 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2017 16:05:03 -0500 Subject: [PATCH 5/6] Add nodegroupdefine and nodegroupremove With these, a user can noow largely ignore confetty for most abosuletly universal functions. --- confluent_client/bin/nodegroupdefine | 58 +++++++++++++++++++ confluent_client/bin/nodegroupremove | 52 +++++++++++++++++ confluent_client/confluent_env.sh | 2 + confluent_client/doc/man/nodegroupdefine.ronn | 23 ++++++++ confluent_client/doc/man/nodegroupremove.ronn | 18 ++++++ 5 files changed, 153 insertions(+) create mode 100755 confluent_client/bin/nodegroupdefine create mode 100755 confluent_client/bin/nodegroupremove create mode 100644 confluent_client/doc/man/nodegroupdefine.ronn create mode 100644 confluent_client/doc/man/nodegroupremove.ronn diff --git a/confluent_client/bin/nodegroupdefine b/confluent_client/bin/nodegroupdefine new file mode 100755 index 00000000..bf79d5e5 --- /dev/null +++ b/confluent_client/bin/nodegroupdefine @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import optparse +import os +import signal +import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass + +path = os.path.dirname(os.path.realpath(__file__)) +path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) +if path.startswith('/opt'): + sys.path.append(path) + +import confluent.client as client + +argparser = optparse.OptionParser( + usage='''\n %prog noderange attribute1=value1 attribute2=value,... + \n ''') +(options, args) = argparser.parse_args() +requestargs=None +try: + noderange = args[0] +except IndexError: + argparser.print_help() + sys.exit(1) +client.check_globbing(noderange) +session = client.Command() +exitcode = 0 +attribs = {'name': noderange} +for arg in args[1:]: + key, val = arg.split('=') + attribs[key] = val +for r in session.create('/nodegroups/', attribs): + if 'error' in r: + sys.stderr.write(r['error'] + '\n') + exitcode |= 1 + if 'created' in r: + print('{0}: created'.format(r['created'])) +sys.exit(exitcode) \ No newline at end of file diff --git a/confluent_client/bin/nodegroupremove b/confluent_client/bin/nodegroupremove new file mode 100755 index 00000000..1c7c8d3f --- /dev/null +++ b/confluent_client/bin/nodegroupremove @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import optparse +import os +import signal +import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass + +path = os.path.dirname(os.path.realpath(__file__)) +path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) +if path.startswith('/opt'): + sys.path.append(path) + +import confluent.client as client + +argparser = optparse.OptionParser( + usage='''\n %prog noderange + \n ''') +(options, args) = argparser.parse_args() +if len(args) != 1: + argparser.print_help() + sys.exit(1) +noderange = args[0] +client.check_globbing(noderange) +session = client.Command() +exitcode = 0 +for r in session.delete('/nodegroups/{0}'.format(noderange)): + if 'error' in r: + sys.stderr.write(r['error'] + '\n') + exitcode |= 1 + if 'deleted' in r: + print('{0}: deleted'.format(r['deleted'])) +sys.exit(exitcode) \ No newline at end of file diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 916634c9..55a0cd02 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -23,6 +23,8 @@ alias nodedefine='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export C alias nodeeventlog='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeeventlog' alias nodefirmware='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodefirmware' alias nodegroupattrib='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodegroupattrib' +alias nodegroupdefine='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodegroupdefine' +alias nodegroupremove='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodegroupremove' alias nodehealth='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodehealth' alias nodeidentify='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeidentify' alias nodeinventory='CURRENT_CMDLINE=$(HISTTIMEFORMAT= builtin history 1); export CURRENT_CMDLINE; nodeinventory' diff --git a/confluent_client/doc/man/nodegroupdefine.ronn b/confluent_client/doc/man/nodegroupdefine.ronn new file mode 100644 index 00000000..a990f8ff --- /dev/null +++ b/confluent_client/doc/man/nodegroupdefine.ronn @@ -0,0 +1,23 @@ +nodegroupdefine(8) -- Define new confluent node group + +## SYNOPSIS + +`nodegroupdefine [nodeattribute1=value1> ...]` + +## DESCRIPTION + +`nodegroupdefine` allows the definition of a new node for the confluent management +service. It may only define a single group name at a time. +It has the same syntax as `nodegroupattrib(8)`, and the commands differ in +that `nodegroupattrib(8)` will error if a node group does not exist. + +## EXAMPLES + +* Create a group called `compute`: + `# nodegroupdefine compute` + `compute: created` + + +## SEE ALSO + +nodeattribexpressions(8), nodegroupattrib(8), nodegroupremove(8) diff --git a/confluent_client/doc/man/nodegroupremove.ronn b/confluent_client/doc/man/nodegroupremove.ronn new file mode 100644 index 00000000..be0a5967 --- /dev/null +++ b/confluent_client/doc/man/nodegroupremove.ronn @@ -0,0 +1,18 @@ +nodegroupremove(8) -- Remove a nodegroup from the confluent database +==================================================================== + +## SYNOPSIS + +`nodegroupremove ` + +## DESCRIPTION + +`nodegroupremove` simply removes the given single nodegroup from the confluent database. + + +## EXAMPLES + +* Remove group called testgroup + `# nodegroupremove testgroup` + `testgroup: deleted` + From e63d7f9fe3756d6f438c1a4da7d5010f1caed103 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 15 Nov 2017 09:41:01 -0500 Subject: [PATCH 6/6] Correct typo in the nodeshell command The November 6th change contained a typo. --- confluent_client/bin/nodeshell | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeshell b/confluent_client/bin/nodeshell index f80071d7..c1959d0d 100755 --- a/confluent_client/bin/nodeshell +++ b/confluent_client/bin/nodeshell @@ -107,7 +107,7 @@ def run(): run_cmdv(node, cmdv, all, pipedesc) for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: - sys.stdout.ouwrite('{0}: {1}'.format(node, line)) + sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: rdy, _, _ = select.select(all, [], [], 10)