From 4124e0fcc04d0694664cc7ee1c9bc0aa50d09b0b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Mar 2019 15:19:17 -0500 Subject: [PATCH 01/25] Add ability for noderange to wildcard attrib names Useful for net.*attribs to search when nic is unknown. --- .../confluent/config/configmanager.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 9d67a6ab..8cd3b061 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1055,24 +1055,32 @@ class ConfigManager(object): raise Exception('Invalid Expression') for node in nodes: try: - currval = self._cfgstore['nodes'][node][attribute]['value'] + currvals = [self._cfgstore['nodes'][node][attribute]['value']] except KeyError: # Let's treat 'not set' as being an empty string for this path - currval = '' - if exmatch: - if yieldmatches: - if exmatch.search(currval): - yield node + currvals = list( + [self._cfgstore['nodes'][node][x].get('value', '') + for x in fnmatch.filter(self._cfgstore['nodes'][node], attribute)]) + currvals.append('') + for currval in currvals: + if exmatch: + if yieldmatches: + if exmatch.search(currval): + yield node + break + else: + if not exmatch.search(currval): + yield node + break else: - if not exmatch.search(currval): - yield node - else: - if yieldmatches: - if match == currval: - yield node - else: - if match != currval: - yield node + if yieldmatches: + if match == currval: + yield node + break + else: + if match != currval: + yield node + break def filter_nodenames(self, expression, nodes=None): """Filter nodenames by regular expression From 269acf99433da1215ac3f89338253dbc3ae98462 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Mar 2019 13:25:04 -0500 Subject: [PATCH 02/25] Add CNOS plugin for Lenovo switches --- .../plugins/hardwaremanagement/cnos.py | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 confluent_server/confluent/plugins/hardwaremanagement/cnos.py diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py new file mode 100644 index 00000000..3297be05 --- /dev/null +++ b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py @@ -0,0 +1,95 @@ + +# Copyright 2019 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#Noncritical: +# - One or more temperature sensors is in the warning range; +# - A panic dump exists in flash. +#Critical: +# - One or more temperature sensors is in the failure range; +# - One or more fans are running < 100 RPM; +# - One power supply is off. + + +import eventlet +import confluent.exceptions as exc +webclient = eventlet.import_patched('pyghmi.util.webclient') +import confluent.messages as msg +import confluent.util as util + +class SwitchSensor(object): + def __init__(self, name, states, value=None, health=None): + self.name = name + self.value = value + self.states = states + + +def cnos_login(node, configmanager, creds): + wc = webclient.SecureHTTPConnection(node, port=443, verifycallback=util.TLSCertVerifier( + configmanager, node, 'pubkeys.tls_hardwaremanager').verify_cert) + wc.set_basic_credentials(creds[node]['secret.hardwaremanagementuser']['value'], creds[node]['secret.hardwaremanagementpassword']['value']) + wc.request('GET', '/nos/api/login/') + rsp = wc.getresponse() + body = rsp.read() + if rsp.status == 401: # CNOS gives 401 on first attempt... + wc.request('GET', '/nos/api/login/') + rsp = wc.getresponse() + body = rsp.read() + if rsp.status >= 200 and rsp.status < 300: + return wc + raise exc.TargetEndpointBadCredentials('Unable to authenticate') + +def retrieve(nodes, element, configmanager, inputdata): + if element == ['power', 'state']: + for node in nodes: + yield msg.PowerState(node=node, state='on') + if element == ['health', 'hardware']: + creds = configmanager.get_node_attributes( + nodes, ['secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) + for node in nodes: + wc = cnos_login(node, configmanager, creds) + hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus') + summary = hinfo['status'].lower() + if summary == 'noncritical': + summary = 'warning' + yield msg.HealthSummary(summary, name=node) + state = None + badreadings = [] + if summary != 'ok': # temperature or dump or fans or psu + wc.grab_json_response('/nos/api/sysinfo/panic_dump') + switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump') + if switchinfo: + badreadings.append(SwitchSensor('Panicdump', ['Present'], health='warning')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures') + for temp in switchinfo: + if temp == 'Temperature threshold': + continue + if switchinfo[temp]['State'] != 'OK': + temphealth = switchinfo[temp]['State'].lower() + if temphealth == 'noncritical': + temphealth = 'warning' + tempval = switchinfo[temp]['Temp'] + badreadings.append(SwitchSensor(temp, [], value=tempval, health=temphealth)) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans') + for fan in switchinfo: + if switchinfo[fan]['speed-rpm'] < 100: + badreadings.append(SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], health='critical')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/power') + for psu in switchinfo: + if switchinfo[psu]['State'] != 'Normal ON': + psuname = switchinfo[psu]['Name'] + badreadings.append(SwitchSensor(psuname, states=[switchinfo[psu]['State']], health='critical')) + yield msg.SensorReadings(badreadings, name=node) + From 16abf7cb64811f85bf9f6c6aa7987ee001fb85b6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Mar 2019 13:38:15 -0500 Subject: [PATCH 03/25] Fix missing sub-health info on CNOS health --- confluent_server/confluent/plugins/hardwaremanagement/cnos.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py index 3297be05..64bc9948 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py @@ -34,6 +34,7 @@ class SwitchSensor(object): self.name = name self.value = value self.states = states + self.health = health def cnos_login(node, configmanager, creds): From 656e82c3fe7a9ad1a22d39591e4b0780b8e47719 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Mar 2019 16:02:52 -0500 Subject: [PATCH 04/25] Speed up cnos health and add stubs Add concurrency to accelerate nodehealth and provide stubs for the as-yet unimplemented functionality. --- .../plugins/hardwaremanagement/cnos.py | 119 +++++++++++++----- 1 file changed, 85 insertions(+), 34 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py index 64bc9948..955736b5 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py @@ -24,6 +24,7 @@ import eventlet +import eventlet.queue as queue import confluent.exceptions as exc webclient = eventlet.import_patched('pyghmi.util.webclient') import confluent.messages as msg @@ -52,45 +53,95 @@ def cnos_login(node, configmanager, creds): return wc raise exc.TargetEndpointBadCredentials('Unable to authenticate') +def update(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + +def delete(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + +def create(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + def retrieve(nodes, element, configmanager, inputdata): + results = queue.LightQueue() + workers = set([]) if element == ['power', 'state']: for node in nodes: yield msg.PowerState(node=node, state='on') - if element == ['health', 'hardware']: + return + elif element == ['health', 'hardware']: creds = configmanager.get_node_attributes( nodes, ['secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) for node in nodes: - wc = cnos_login(node, configmanager, creds) - hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus') - summary = hinfo['status'].lower() - if summary == 'noncritical': - summary = 'warning' - yield msg.HealthSummary(summary, name=node) - state = None - badreadings = [] - if summary != 'ok': # temperature or dump or fans or psu - wc.grab_json_response('/nos/api/sysinfo/panic_dump') - switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump') - if switchinfo: - badreadings.append(SwitchSensor('Panicdump', ['Present'], health='warning')) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures') - for temp in switchinfo: - if temp == 'Temperature threshold': - continue - if switchinfo[temp]['State'] != 'OK': - temphealth = switchinfo[temp]['State'].lower() - if temphealth == 'noncritical': - temphealth = 'warning' - tempval = switchinfo[temp]['Temp'] - badreadings.append(SwitchSensor(temp, [], value=tempval, health=temphealth)) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans') - for fan in switchinfo: - if switchinfo[fan]['speed-rpm'] < 100: - badreadings.append(SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], health='critical')) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/power') - for psu in switchinfo: - if switchinfo[psu]['State'] != 'Normal ON': - psuname = switchinfo[psu]['Name'] - badreadings.append(SwitchSensor(psuname, states=[switchinfo[psu]['State']], health='critical')) - yield msg.SensorReadings(badreadings, name=node) + workers.add(eventlet.spawn(retrieve_health, configmanager, creds, + node, results)) + else: + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + return + currtimeout = 10 + while workers: + try: + datum = results.get(10) + while datum: + if datum: + yield datum + datum = results.get_nowait() + except queue.Empty: + pass + eventlet.sleep(0.001) + for t in list(workers): + if t.dead: + workers.discard(t) + try: + while True: + datum = results.get_nowait() + if datum: + yield datum + except queue.Empty: + pass + +def retrieve_health(configmanager, creds, node, results): + wc = cnos_login(node, configmanager, creds) + hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus') + summary = hinfo['status'].lower() + if summary == 'noncritical': + summary = 'warning' + results.put(msg.HealthSummary(summary, name=node)) + state = None + badreadings = [] + if summary != 'ok': # temperature or dump or fans or psu + wc.grab_json_response('/nos/api/sysinfo/panic_dump') + switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump') + if switchinfo: + badreadings.append( + SwitchSensor('Panicdump', ['Present'], health='warning')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures') + for temp in switchinfo: + if temp == 'Temperature threshold': + continue + if switchinfo[temp]['State'] != 'OK': + temphealth = switchinfo[temp]['State'].lower() + if temphealth == 'noncritical': + temphealth = 'warning' + tempval = switchinfo[temp]['Temp'] + badreadings.append( + SwitchSensor(temp, [], value=tempval, health=temphealth)) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans') + for fan in switchinfo: + if switchinfo[fan]['speed-rpm'] < 100: + badreadings.append( + SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], + health='critical')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/power') + for psu in switchinfo: + if switchinfo[psu]['State'] != 'Normal ON': + psuname = switchinfo[psu]['Name'] + badreadings.append( + SwitchSensor(psuname, states=[switchinfo[psu]['State']], + health='critical')) + results.put(msg.SensorReadings(badreadings, name=node)) From 48079f297b066c011b316ba6f662913e7554a11d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 19 Mar 2019 15:03:06 -0400 Subject: [PATCH 05/25] Change wheezy name to python-confluent... --- confluent_server/builddeb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/builddeb b/confluent_server/builddeb index 95ffe99d..6ab5a54d 100755 --- a/confluent_server/builddeb +++ b/confluent_server/builddeb @@ -2,6 +2,9 @@ cd `dirname $0` PKGNAME=$(basename $(pwd)) DPKGNAME=$(basename $(pwd) | sed -e s/_/-/) +if grep wheezy /etc/os-release; then + DPKGNAME=python-$DPKGNAME +fi cd .. mkdir -p /tmp/confluent # $DPKGNAME cp -a * .git /tmp/confluent # $DPKGNAME From a3162daf623fbcbb11c3093e7d9fb6f74d4f74f7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Mar 2019 13:43:30 -0400 Subject: [PATCH 06/25] Skip pushing static config if config already matches A strategy of manually adding DHCP managed nodes produced static-baking when not desired. For now skip the baking in if the address matches. --- confluent_server/confluent/discovery/handlers/bmc.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/bmc.py b/confluent_server/confluent/discovery/handlers/bmc.py index fe86876d..389b429a 100644 --- a/confluent_server/confluent/discovery/handlers/bmc.py +++ b/confluent_server/confluent/discovery/handlers/bmc.py @@ -109,9 +109,14 @@ class NodeHandler(generic.NodeHandler): netconfig = netutil.get_nic_config(cfg, nodename, ip=newip) plen = netconfig['prefix'] newip = '{0}/{1}'.format(newip, plen) - ic.set_net_configuration(ipv4_address=newip, - ipv4_configuration='static', - ipv4_gateway=netconfig['ipv4_gateway']) + currcfg = ic.get_net_configuration() + if currcfg['ipv4_address'] != newip: + # do not change the ipv4_config if the current config looks + # like it is already accurate + ic.set_net_configuration(ipv4_address=newip, + ipv4_configuration='static', + ipv4_gateway=netconfig[ + 'ipv4_gateway']) elif self.ipaddr.startswith('fe80::'): cfg.set_node_attributes( {nodename: {'hardwaremanagement.manager': self.ipaddr}}) From 70690517de036788c7c012af19d9f773a816c2b1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Mar 2019 13:46:39 -0400 Subject: [PATCH 07/25] Change wheezy to depend on old package name --- confluent_server/builddeb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_server/builddeb b/confluent_server/builddeb index 6ab5a54d..d2208d8e 100755 --- a/confluent_server/builddeb +++ b/confluent_server/builddeb @@ -33,7 +33,11 @@ if [ "$DPKGNAME" = "confluent-server" ]; then else sed -i 's/^\(Depends:.*\)/\1, confluent-client, python-lxml, python-eficompressor, python-pycryptodome/' debian/control fi - echo 'confluent_client confluent-client' >> debian/pydist-overrides + if grep wheezy /etc/os-release; then + echo 'confluent_client python-confluent-client' >> debian/pydist-overrides + else + echo 'confluent_client confluent-client' >> debian/pydist-overrides + fi fi head -n -1 debian/control > debian/control1 mv debian/control1 debian/control From 11ffa7a09153b33e30e01d946d9f21fbdeb489d8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Mar 2019 13:49:54 -0400 Subject: [PATCH 08/25] Fix debian build process --- confluent_server/builddeb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/confluent_server/builddeb b/confluent_server/builddeb index d2208d8e..4382a423 100755 --- a/confluent_server/builddeb +++ b/confluent_server/builddeb @@ -2,6 +2,7 @@ cd `dirname $0` PKGNAME=$(basename $(pwd)) DPKGNAME=$(basename $(pwd) | sed -e s/_/-/) +OPKGNAME=$(basename $(pwd) | sed -e s/_/-/) if grep wheezy /etc/os-release; then DPKGNAME=python-$DPKGNAME fi @@ -27,9 +28,9 @@ python setup.py sdist > /dev/null 2>&1 py2dsc dist/*.tar.gz shopt -s extglob cd deb_dist/!(*.orig)/ -if [ "$DPKGNAME" = "confluent-server" ]; then +if [ "$OPKGNAME" = "confluent-server" ]; then if grep wheezy /etc/os-release; then - sed -i 's/^\(Depends:.*\)/\1, confluent-client, python-lxml, python-eficompressor, python-pycryptodomex/' debian/control + sed -i 's/^\(Depends:.*\)/\1, python-confluent-client, python-lxml, python-eficompressor, python-pycryptodomex/' debian/control else sed -i 's/^\(Depends:.*\)/\1, confluent-client, python-lxml, python-eficompressor, python-pycryptodome/' debian/control fi @@ -41,10 +42,10 @@ if [ "$DPKGNAME" = "confluent-server" ]; then fi head -n -1 debian/control > debian/control1 mv debian/control1 debian/control -echo 'Provides: python-'$DPKGNAME >> debian/control +#echo 'Provides: python-'$DPKGNAME >> debian/control #echo 'Conflicts: python-'$DPKGNAME >> debian/control -echo 'Replaces: python-'$DPKGNAME' (<<2)' >> debian/control -echo 'Breaks: python-'$DPKGNAME' (<<2)' >> debian/control +#echo 'Replaces: python-'$DPKGNAME' (<<2)' >> debian/control +#echo 'Breaks: python-'$DPKGNAME' (<<2)' >> debian/control dpkg-buildpackage -rfakeroot -uc -us -i if [ $? -ne 0 ]; then From adb4ce919e5a577c877f0f9d9ed5a3a743afd2f4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Mar 2019 10:05:35 -0400 Subject: [PATCH 09/25] Fix nodegrouplist man page --- confluent_client/doc/man/nodegrouplist.ronn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodegrouplist.ronn b/confluent_client/doc/man/nodegrouplist.ronn index 65604a12..1a22d2a7 100644 --- a/confluent_client/doc/man/nodegrouplist.ronn +++ b/confluent_client/doc/man/nodegrouplist.ronn @@ -1,4 +1,4 @@ -nodegroupattrib(8) -- List or change confluent nodegroup attributes +nodegrouplist(8) -- List the defined confluent nodegroups =================================================================== ## SYNOPSIS From 0b85fab5297072981fe3ad82b4da2e509d72d367 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Mar 2019 11:03:33 -0400 Subject: [PATCH 10/25] Replace the network error with a local error This is a bit more clear about the cause when local commands fail. --- confluent_client/confluent/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index b79d865a..a9a63d7b 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -136,6 +136,8 @@ class Command(object): self.serverloc = server if os.path.isabs(self.serverloc) and os.path.exists(self.serverloc): self._connect_unix() + elif self.serverloc == '/var/run/confluent/api.sock': + raise Exception('Confluent service is not available') else: self._connect_tls() tlvdata.recv(self.connection) From 2b275cd369df8b8388853cf8210a6f84e185f5de Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Mar 2019 11:12:36 -0400 Subject: [PATCH 11/25] Add wildcard documentation to nodeattrib/nodelist --- confluent_client/doc/man/nodeattrib.ronn | 9 +++++++++ confluent_client/doc/man/nodelist.ronn | 3 +++ 2 files changed, 12 insertions(+) diff --git a/confluent_client/doc/man/nodeattrib.ronn b/confluent_client/doc/man/nodeattrib.ronn index 67606ced..ca7b81e4 100644 --- a/confluent_client/doc/man/nodeattrib.ronn +++ b/confluent_client/doc/man/nodeattrib.ronn @@ -23,6 +23,9 @@ For a full list of attributes, run `nodeattrib all` against a node. If `-c` is specified, this will set the nodeattribute to a null value. This is different from setting the value to an empty string. +Attributes may be specified by wildcard, for example `net.*switch` will report +all attributes that begin with `net.` and end with `switch`. + If the word all is specified, then all available attributes are given. Omitting any attribute name or the word 'all' will display only attributes that are currently set. @@ -88,6 +91,12 @@ See nodegroupattrib(8) command on how to manage attributes on a group level. `n1: console.method: ` `n2: console.method: ` +* List all switches that a node is described as connected to: + `# nodeattrib d1 net.*switch` + `d1: net.mgt.switch: mgtswitch1` + `d1: net.pxe.switch: pxeswitch1` + `d1: net.switch:` + ## SEE ALSO nodegroupattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodelist.ronn b/confluent_client/doc/man/nodelist.ronn index e78c3713..d2fc5ff5 100644 --- a/confluent_client/doc/man/nodelist.ronn +++ b/confluent_client/doc/man/nodelist.ronn @@ -17,6 +17,9 @@ displayed. If `-b` is specified, it will also display information on how inherited and expression based attributes are defined. There is more information on node attributes in nodeattributes(5) man page. +Attributes may be specified by wildcard, for example `net.*switch` will report +all attributes that begin with `net.` and end with `switch`. + ## OPTIONS * `-b`, `--blame`: From 5c61430ccc6c9a10bfad5c4d05566d1e344c707c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 28 Mar 2019 11:14:29 -0400 Subject: [PATCH 12/25] Add wildcard documentation to noderange man page. --- confluent_client/doc/man/noderange.ronn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_client/doc/man/noderange.ronn b/confluent_client/doc/man/noderange.ronn index 1451728a..883c6928 100644 --- a/confluent_client/doc/man/noderange.ronn +++ b/confluent_client/doc/man/noderange.ronn @@ -36,6 +36,9 @@ Also, regular expressions may be used to indicate nodes with names matching cert The other major noderange primitive is indicating nodes by some attribute value: `location.rack=7` +The attribute name may use a wildcard: +`net.*switch=switch1` + Commas can be used to indicate multiple nodes, and can mix and match any of the above primitives. The following can be a valid single noderange, combining any and all members of each comma separated component `n1,n2,rack1,storage,location.rack=9,~s1..,n20-n30` From ca29f6ae3532a4ce6d48b45edf36700dfa307e48 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 29 Mar 2019 14:01:36 -0400 Subject: [PATCH 13/25] Opportunisticly start mac rescan on rescan While the network rescan might be too slow to hold up general rescan, at least begin a rescan of switches when a rescan is requested. --- confluent_client/bin/nodediscover | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodediscover b/confluent_client/bin/nodediscover index 2c34c697..9686a503 100755 --- a/confluent_client/bin/nodediscover +++ b/confluent_client/bin/nodediscover @@ -289,6 +289,7 @@ def blocking_scan(session): list(session.update('/discovery/rescan', {'rescan': 'start'})) while(list(session.read('/discovery/rescan'))[0].get('scanning', False)): time.sleep(0.5) + list(session.update('/networking/macs/rescan', {'rescan': 'start'})) def main(): @@ -347,4 +348,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() From 4af1f998fb2f28d8252cacce8690f884236dae4f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 1 Apr 2019 14:16:20 -0400 Subject: [PATCH 14/25] Fix nodeconfig formatting The man page did not have a hard line break. --- confluent_client/doc/man/nodeconfig.ronn | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_client/doc/man/nodeconfig.ronn b/confluent_client/doc/man/nodeconfig.ronn index 6cf918fc..d504fa5f 100644 --- a/confluent_client/doc/man/nodeconfig.ronn +++ b/confluent_client/doc/man/nodeconfig.ronn @@ -3,8 +3,8 @@ nodeconfig(8) -- Show or change node configuration ## SYNOPSIS -`nodeconfig [options] [..]` -`nodeconfig [options] [..]` +`nodeconfig [options] [..]` +`nodeconfig [options] [..]` ## DESCRIPTION From 9fd091daadffddc4d025c111e219b51f1e1c9ad8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Apr 2019 09:36:37 -0400 Subject: [PATCH 15/25] Tolerate an XCC with downed web service Make the best of the situation by trying to continue without the policy applied. --- confluent_server/confluent/discovery/handlers/xcc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index f6ba2722..658cf747 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -77,7 +77,11 @@ class NodeHandler(immhandler.NodeHandler): ruleset['USER_GlobalMinPassReuseCycle'] = value ic.register_key_handler(self.validate_cert) ic.oem_init() - ic._oem.immhandler.wc.grab_json_response('/api/dataset', ruleset) + try: + ic._oem.immhandler.wc.grab_json_response('/api/dataset', ruleset) + except Exception as e: + print(repr(e)) + pass def config(self, nodename, reset=False): # TODO(jjohnson2): set ip parameters, user/pass, alert cfg maybe From 835d1fc0ab24da572c7184b865c6b965bf0aaf43 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 2 Apr 2019 09:39:06 -0400 Subject: [PATCH 16/25] Update the pyopenssl message It makes it more clear that a restart would be required to pull in updated dependency. --- confluent_server/confluent/sockapi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/sockapi.py b/confluent_server/confluent/sockapi.py index fa5d2499..aa0b8a9f 100644 --- a/confluent_server/confluent/sockapi.py +++ b/confluent_server/confluent/sockapi.py @@ -159,7 +159,9 @@ def sessionhdl(connection, authname, skipauth=False, cert=None): 'python-pyopenssl installed or has an ' 'incorrect version installed ' '(e.g. pyOpenSSL would need to be ' - 'replaced with python-pyopenssl)'}}) + 'replaced with python-pyopenssl). ' + 'Restart confluent after updating ' + 'the dependency.'}}) return return collective.handle_connection(connection, None, request['collective'], local=True) From 7232a0c1b306c7cc6134aa0ac2d4d9eb67a0f7ec Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Apr 2019 08:34:42 -0400 Subject: [PATCH 17/25] Fix SLP hangs on bad targets Have SLP timeout if there are endpoints that can half-hang a connection. --- confluent_server/confluent/discovery/protocols/slp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index 77e642e1..2d5dc063 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -335,6 +335,7 @@ def _add_attributes(parsed): else: net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: + net.settimeout(1.0) net.connect(target) except socket.error: return @@ -363,6 +364,7 @@ def query_srvtypes(target): while tries and not connected: tries -= 1 try: + net.settimeout(1.0) net.connect(target) connected = True except socket.error: From 16430e1ec93569e99f325a829a4d147bd4269f55 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Apr 2019 14:23:30 -0400 Subject: [PATCH 18/25] Enhance collective usage output Collective usage output provided no hints as to how to access more detailed help. Amend the wording to make this more clear/obvious. --- confluent_server/bin/collective | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_server/bin/collective b/confluent_server/bin/collective index c8d31e62..27b12566 100644 --- a/confluent_server/bin/collective +++ b/confluent_server/bin/collective @@ -101,11 +101,11 @@ def main(): sl = sp.add_parser('show', help='Show information about the collective') ic = sp.add_parser('invite', help='Generate a invitation to allow a new ' 'confluent instance to join as a ' - 'collective member') + 'collective member. Run collective invite -h for more information') ic.add_argument('name', help='Name of server to invite to join the ' 'collective') - jc = sp.add_parser('join', help='Join a collective') - jc.add_argument('server', help='A server currently in the collective') + jc = sp.add_parser('join', help='Join a collective. Run collective join -h for more information') + jc.add_argument('server', help='Existing collective member that ran invite and generated a token') jc.add_argument('-i', help='Invitation provided by runniing invite on an ' 'existing collective member') cmdset = a.parse_args() From 36d5d60edc43366a7c4cc3a0ea9bdc342e890931 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Apr 2019 09:49:02 -0400 Subject: [PATCH 19/25] Autofill collective.manager on discovery if not set To improve the ease of use, if an administrator has a collective but does not designate a collective.manager for a node being discovered, default to the collective member that executes the discovery. --- confluent_server/confluent/discovery/core.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 0b445d64..10db839b 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -63,6 +63,7 @@ import base64 import confluent.config.configmanager as cfm +import confluent.collective.manager as collective import confluent.discovery.protocols.pxe as pxe #import confluent.discovery.protocols.ssdp as ssdp import confluent.discovery.protocols.slp as slp @@ -1055,6 +1056,14 @@ def discover_node(cfg, handler, info, nodename, manual): traceback.print_exc() return False newnodeattribs = {} + if cfm.list_collective(): + # We are in a collective, check collective.manager + cmc = cfg.get_node_attributes(nodename, 'collective.manager') + cm = cmc.get(nodename, {}).get('collective.manager', {}).get('value', None) + if not cm: + # Node is being discovered in collective, but no collective.manager, default + # to the collective member actually able to execute the discovery + newnodeattribs['collective.manager'] = collective.get_myname() if 'uuid' in info: newnodeattribs['id.uuid'] = info['uuid'] if 'serialnumber' in info: From 06b31f4845c5816ba3c8dfdb0c9070eeca7bb13c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Apr 2019 10:18:06 -0400 Subject: [PATCH 20/25] Add man page for collective command --- confluent_client/doc/man/collective.ronn | 55 ++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 confluent_client/doc/man/collective.ronn diff --git a/confluent_client/doc/man/collective.ronn b/confluent_client/doc/man/collective.ronn new file mode 100644 index 00000000..2fc67ed2 --- /dev/null +++ b/confluent_client/doc/man/collective.ronn @@ -0,0 +1,55 @@ +collective(1) -- Check and manage a confluent collective +============================== + +## SYNOPSIS + +`collective invite ` +`collective join [-i TOKEN]` +`collective show` +`collective gencert` + + +## DESCRIPTION + +**collective** helps manage the collective mode of confluent, where multiple +confluent servers are linked together to act as one. For example, the procedure to set up +a collective to run on three servers called mgt1, mgt2, and mgt3, first install and start +confluent as usual on the three servers. On mgt1, run `collective invite mgt2` and an +invitation token will be output. On mgt2, either run `collective join mgt1` to paste +the token interactively, or `collective join mgt1 -i `. At this point, either +mgt1 or mgt2 can bring in mgt3. For example on mgt2 run `collective invite mgt3` and +on mgt3 run `collective join mgt2 -i ` + +This can be linked together in the following manner with ssh: +on mgt1: + `# ssh mgt2 collective join mgt1 -i $(collective invite mgt2)` + +Note that a collective is only redundant with 3 or more members. The collective +will function so long as more than half of the members are online. A collective +of two members is supported, but without redundancy. + +Also note that the collective leader role is dynamic, but has no impact on interacting +with confluent. It is merely an internal role that can dynamically change depending +on circumstances. + +## OPTIONS + + * `-i`: + Provide the token as an argument rather than interactively. + +## EXAMPLES + * Inviting a server called mgt2: + `# collective invite mgt2` + `bWd0MkA+BNQ6XAxMXlqJJa+EQRlihL/k9xCXnasgSQXZr989Pa1/ln7G3e1Ncxx6BMzMqqreHJVkPr2FrzjNit/UgHlg` + +* On mgt2, joining mgt1: + `# collective join mgt1 -i bWd0MkA+BNQ6XAxMXlqJJa+EQRlihL/k9xCXnasgSQXZr989Pa1/ln7G3e1Ncxx6BMzMqqreHJVkPr2FrzjNit/UgHlg` + `Success` + +* Showing the collective state: + `# collective show` + `Quorum: True` + `Leader: mgt1` + `Active collective members:` + ` mgt2` + From d6ecee955bd35fd702e1a4557da5bb48fe4bd8cc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 5 Apr 2019 09:17:45 -0400 Subject: [PATCH 21/25] Skip empty nodes list A noderange based nodegroup would have the empty nodes list cluttering the output. Skip empty nodes list in current settings. --- .../confluent/plugins/configuration/attributes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index e9b3b1fc..8c37fd8f 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -74,6 +74,8 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): for attribute in sorted(list(grpcfg)): currattr = grpcfg[attribute] if attribute == 'nodes': + if not currattr: + continue desc = 'The nodes belonging to this group' elif attribute == 'noderange': desc = 'A dynamic noderange that this group refers to in noderange expansion' @@ -97,8 +99,8 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): kv={attribute: currattr}, desc=desc) else: - print attribute - print repr(currattr) + print(attribute) + print(repr(currattr)) raise Exception("BUGGY ATTRIBUTE FOR NODEGROUP") From c070148aed8f7d14b62b6bfa7670205f728c79b1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 8 Apr 2019 10:14:49 -0400 Subject: [PATCH 22/25] Fix adequate check on inadequate IMMs --- confluent_server/confluent/discovery/handlers/imm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/handlers/imm.py b/confluent_server/confluent/discovery/handlers/imm.py index f82f4e71..2e6ccbbc 100644 --- a/confluent_server/confluent/discovery/handlers/imm.py +++ b/confluent_server/confluent/discovery/handlers/imm.py @@ -25,7 +25,7 @@ class NodeHandler(bmchandler.NodeHandler): def adequate(cls, info): # We can sometimes receive a partially initialized SLP packet # This is not adequate for being satisfied - return bool(info['attributes']) + return bool(info.get('attributes', {})) def scan(self): slpattrs = self.info.get('attributes', {}) From 1ae055fa8f0f0b07050dacb124da70588676f7a3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 11 Apr 2019 09:17:38 -0400 Subject: [PATCH 23/25] Add '-n' option to nodeshell and noderun Provide ability to suppress node prefix for nodeshell. This for example can be a quick 'makehosts' substituted and similar. --- confluent_client/bin/noderun | 14 +++++++++++--- confluent_client/bin/nodeshell | 14 +++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/confluent_client/bin/noderun b/confluent_client/bin/noderun index efe61a59..c931a548 100755 --- a/confluent_client/bin/noderun +++ b/confluent_client/bin/noderun @@ -39,11 +39,13 @@ import confluent.sortutil as sortutil def run(): argparser = optparse.OptionParser( - usage="Usage: %prog noderange commandexpression", + usage="Usage: %prog [options] noderange commandexpression", epilog="Expressions are the same as in attributes, e.g. " "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") argparser.add_option('-f', '-c', '--count', type='int', default=168, help='Number of commands to run at a time') + argparser.add_option('-n', '--nonodeprefix', action='store_true', + help='Do not prefix output with node names') # among other things, FD_SETSIZE limits. Besides, spawning too many # processes can be unkind for the unaware on memory pressure and such... argparser.disable_interspersed_args() @@ -93,7 +95,10 @@ def run(): pernodeout[node] = [] pernodeout[node].append(data) else: - sys.stderr.write('{0}: {1}'.format(node, data)) + if options.nonodeprefix: + sys.stderr.write(data) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) sys.stderr.flush() else: pop = desc['popen'] @@ -107,7 +112,10 @@ def run(): run_cmdv(node, cmdv, all, pipedesc) for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: - sys.stdout.write('{0}: {1}'.format(node, line)) + if options.nonodeprefix: + sys.stdout.write(line) + else: + sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: rdy, _, _ = select.select(all, [], [], 10) diff --git a/confluent_client/bin/nodeshell b/confluent_client/bin/nodeshell index d090e0ba..779bec7d 100755 --- a/confluent_client/bin/nodeshell +++ b/confluent_client/bin/nodeshell @@ -39,11 +39,13 @@ import confluent.sortutil as sortutil def run(): argparser = optparse.OptionParser( - usage="Usage: %prog noderange commandexpression", + usage="Usage: %prog [options] noderange commandexpression", epilog="Expressions are the same as in attributes, e.g. " "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") argparser.add_option('-f', '-c', '--count', type='int', default=168, help='Number of commands to run at a time') + argparser.add_option('-n', '--nonodeprefix', action='store_true', + help='Do not prefix output with node names') # among other things, FD_SETSIZE limits. Besides, spawning too many # processes can be unkind for the unaware on memory pressure and such... argparser.disable_interspersed_args() @@ -94,7 +96,10 @@ def run(): pernodeout[node] = [] pernodeout[node].append(data) else: - sys.stderr.write('{0}: {1}'.format(node, data)) + if options.nonodeprefix: + sys.stderr.write(data) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) sys.stderr.flush() else: pop = desc['popen'] @@ -108,7 +113,10 @@ def run(): run_cmdv(node, cmdv, all, pipedesc) for node in sortutil.natural_sort(pernodeout): for line in pernodeout[node]: - sys.stdout.write('{0}: {1}'.format(node, line)) + if options.nonodeprefix: + sys.stdout.write(line) + else: + sys.stdout.write('{0}: {1}'.format(node, line)) sys.stdout.flush() if all: rdy, _, _ = select.select(all, [], [], 10) From e5bbd226ff06a057f47eeaef21f70b06bd1ee59e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 15 Apr 2019 13:38:56 -0400 Subject: [PATCH 24/25] Add completion for attributes in node*attrib Make some of the tedium of the long attribute names bearable through tab completion. --- confluent_client/confluent_env.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index ebc3d321..13e8ee19 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -190,6 +190,13 @@ _confluent_nodesupport_completion() fi } +_confluent_nodeattrib_completion() +{ + COMP_CANDIDATES=$(nodeattrib '~.>1' all | awk '{print $2}'|sed -e 's/://') + _confluent_generic_completion +} + + _confluent_nn_completion() { _confluent_get_args @@ -250,7 +257,8 @@ _confluent_ng_completion() COMPREPLY=($(compgen -W "$(confetty show /nodegroups|sed -e 's/\///' -e s/^/$PREFIX/)" -- "${COMP_WORDS[-1]}")) } -complete -F _confluent_nr_completion nodeattrib +complete -F _confluent_nodeattrib_completion nodeattrib +complete -F _confluent_nodeattrib_completion nodegroupattrib complete -F _confluent_nr_completion nodebmcreset complete -F _confluent_nodesetboot_completion nodeboot complete -F _confluent_nr_completion nodeconfig From 72448aa0b4ae1abcbd163ab81f81422ba534bcab Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 16 Apr 2019 08:46:50 -0400 Subject: [PATCH 25/25] Disable MIB resolution for raw requests MIB resolution turns out to be rather CPU intensive, and the current SNMP consumers don't want the resolution anyway. --- confluent_server/confluent/snmputil.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py index e862afe8..05090947 100644 --- a/confluent_server/confluent/snmputil.py +++ b/confluent_server/confluent/snmputil.py @@ -79,14 +79,16 @@ class Session(object): # overriden, but some devices only support DES) tp = _get_transport(self.server) ctx = snmp.ContextData(self.context) + resolvemib = False if '::' in oid: + resolvemib = True mib, field = oid.split('::') obj = snmp.ObjectType(snmp.ObjectIdentity(mib, field)) else: obj = snmp.ObjectType(snmp.ObjectIdentity(oid)) walking = snmp.bulkCmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, - lexicographicMode=False) + lexicographicMode=False, lookupMib=resolvemib) try: for rsp in walking: errstr, errnum, erridx, answers = rsp