From 812e34f59bd854a97d00ad636f3b38abc01a1100 Mon Sep 17 00:00:00 2001 From: Arif Ali Date: Wed, 3 May 2017 13:21:36 +0100 Subject: [PATCH 01/31] Add csh/tcsh profile in /etc/profile.d --- confluent_client/confluent_env.csh | 1 + confluent_client/setup.py.tmpl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 confluent_client/confluent_env.csh diff --git a/confluent_client/confluent_env.csh b/confluent_client/confluent_env.csh new file mode 100644 index 00000000..0ace4e8d --- /dev/null +++ b/confluent_client/confluent_env.csh @@ -0,0 +1 @@ +setenv PATH /opt/confluent/bin:$PATH diff --git a/confluent_client/setup.py.tmpl b/confluent_client/setup.py.tmpl index f8768bd5..724d9473 100644 --- a/confluent_client/setup.py.tmpl +++ b/confluent_client/setup.py.tmpl @@ -11,5 +11,5 @@ setup( url='http://xcat.sf.net/', packages=['confluent'], scripts=scriptlist, - data_files=[('/etc/profile.d', ['confluent_env.sh'])], + data_files=[('/etc/profile.d', ['confluent_env.sh','confluent_env.csh'])], ) From 2055c6d698201a8be462108ed44c0f66e265b3a7 Mon Sep 17 00:00:00 2001 From: "michael.du" Date: Tue, 9 May 2017 13:28:57 +0800 Subject: [PATCH 02/31] Update MANIFEST.in fix the issue which failed to compile the confluent_client, it is due to the add confluent_env.csh in profile, but not add it in MANIFEST --- confluent_client/MANIFEST.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_client/MANIFEST.in b/confluent_client/MANIFEST.in index 2a8d2b80..a0e989c3 100644 --- a/confluent_client/MANIFEST.in +++ b/confluent_client/MANIFEST.in @@ -1 +1,2 @@ -include confluent_env.sh \ No newline at end of file +include confluent_env.sh +include confluent_env.csh From 6117a90372b1067cb25570c430711edafaac3ab9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 May 2017 09:24:58 -0400 Subject: [PATCH 03/31] Provide a script to summarize hangtraces files --- confluent_server/confluentdbgcli.py | 27 +++++++++++++++++++ .../dbgtools/processhangtraces.py | 22 +++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 confluent_server/confluentdbgcli.py create mode 100644 confluent_server/dbgtools/processhangtraces.py diff --git a/confluent_server/confluentdbgcli.py b/confluent_server/confluentdbgcli.py new file mode 100644 index 00000000..4ed804b5 --- /dev/null +++ b/confluent_server/confluentdbgcli.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2014 IBM Corporation +# Copyright 2015-2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import readline +import socket + +connection = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +self.connection.connect('/var/run/confluent/dbg.sock') + +readline.parse_and_bind("tab: complete") +readline.parse_and_bind("set bell-style none") + diff --git a/confluent_server/dbgtools/processhangtraces.py b/confluent_server/dbgtools/processhangtraces.py new file mode 100644 index 00000000..18df9f33 --- /dev/null +++ b/confluent_server/dbgtools/processhangtraces.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + + +import sys + +threadtraces = {} + +with open(sys.argv[1]) as tracefile: + traces = tracefile.read() + currtrace = None + for line in traces.split("\n"): + if line.startswith("Thread trace:"): + if currtrace is not None: + if currtrace not in threadtraces: + threadtraces[currtrace] = 0 + threadtraces[currtrace] += 1 + currtrace = line + elif currtrace is not None: + currtrace += line + '\n' +for trace in sorted(threadtraces, key=lambda x: threadtraces[x]): + print('Following stack seen {0} times'.format(threadtraces[trace])) + print(trace) From e0cc67f57afe631af4ace8fe21eb53d7cbe3e2f9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 20 Jun 2017 14:56:24 -0400 Subject: [PATCH 04/31] Implement Lenovo Thinksystem and discovery support --- confluent_client/bin/confetty | 100 ++++- confluent_client/bin/nodeattrib | 19 +- confluent_client/bin/nodeboot | 5 + confluent_client/bin/nodeeventlog | 7 +- confluent_client/bin/nodefirmware | 6 + confluent_client/bin/nodegroupattrib | 6 + confluent_client/bin/nodehealth | 5 + confluent_client/bin/nodeidentify | 5 + confluent_client/bin/nodeinventory | 8 + confluent_client/bin/nodelist | 10 +- confluent_client/bin/nodepower | 8 + confluent_client/bin/noderun | 126 ++++--- confluent_client/bin/nodesensors | 5 + confluent_client/bin/nodesetboot | 5 + confluent_client/bin/nodeshell | 126 ++++--- confluent_client/confluent/client.py | 25 +- confluent_client/confluent_env.sh | 2 + confluent_client/doc/man/confetty.ronn | 7 +- confluent_client/doc/man/nodeattrib.ronn | 91 ++--- confluent_client/doc/man/nodeconsole.ronn | 28 +- confluent_client/doc/man/nodelist.ronn | 7 +- confluent_client/doc/man/nodesensors.ronn | 4 +- confluent_client/setup.py.tmpl | 10 +- confluent_server/buildrpm | 3 + .../confluent/config/attributes.py | 210 ++++++++--- .../confluent/config/configmanager.py | 109 ++++-- confluent_server/confluent/consoleserver.py | 199 ++++++++-- confluent_server/confluent/core.py | 84 ++++- confluent_server/confluent/exceptions.py | 6 + confluent_server/confluent/log.py | 1 + confluent_server/confluent/main.py | 2 + confluent_server/confluent/messages.py | 85 ++++- .../confluent/networking/macmap.py | 344 +++++++++++++++--- confluent_server/confluent/noderange.py | 28 +- .../plugins/configuration/attributes.py | 42 ++- .../plugins/hardwaremanagement/ipmi.py | 72 ++-- confluent_server/confluent/snmputil.py | 25 +- confluent_server/confluent/sockapi.py | 12 +- confluent_server/confluent/util.py | 47 ++- confluent_server/confluent_server.spec.tmpl | 5 +- .../dbgtools/processhangtraces.py | 13 + confluent_server/makesetup | 1 + confluent_server/setup.py.tmpl | 4 + 43 files changed, 1435 insertions(+), 472 deletions(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index 89296ff6..cccaa45c 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -47,6 +47,7 @@ import optparse import os import select import shlex +import signal import socket import sys import time @@ -56,7 +57,10 @@ try: import tty except ImportError: pass - +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass exitcode = 0 consoleonly = False consolename = "" @@ -84,6 +88,32 @@ netserver = None laststate = {} +def print_help(): + print("confetty provides a filesystem like interface to confluent. " + "Navigation is done using the same commands as would be used in a " + "filesystem. Tab completion is supported to aid in navigation," + "as is up arrow to recall previous commands and control-r to search" + "previous command history, similar to using bash\n\n" + "The supported commands are:\n" + "cd [location] - Set the current command context, similar to a " + "working directory.\n" + "show [resource] - Present the information about the specified " + "resource, or current context if omitted.\n" + "create [resource] attributename=value attributename=value - Create " + "a new instance of a resource.\n" + "remove [resource] - Remove a resource from a list\n" + "set [resource] attributename=value attributename=value - Change " + "the specified attributes value for the given resource name\n" + "unset [resource] attributename - Clear any value for the given " + "attribute names on a resource.\n" + "start [resource] - When used on a text session resource, it " + "enters remote terminal mode. In this mode, use 'ctrl-e, c, ?' for " + "help" + ) + #TODO(jjohnson2): lookup context help for 'target' variable, perhaps + #common with the api document + + def updatestatus(stateinfo={}): status = consolename info = [] @@ -106,7 +136,7 @@ def updatestatus(stateinfo={}): if 'showtime' in laststate: showtime = laststate['showtime'] age = time.time() - laststate['showtime'] - if age > 86400: # older than one day + if age > 86400: # older than one day # disambiguate by putting date in and time info.append(time.strftime('%m-%dT%H:%M', time.localtime(showtime))) else: @@ -169,6 +199,7 @@ valid_commands = [ 'remove', 'rm', 'delete', + 'help', ] candidates = None @@ -238,7 +269,7 @@ def parse_command(command): try: args = shlex.split(command, posix=True) except ValueError as ve: - print('Error: ' + ve.message) + print('Error: ' + str(ve)) return [] return args @@ -306,7 +337,11 @@ def do_command(command, server): return argv[0] = argv[0].lower() if argv[0] == 'exit': + if os.environ['TERM'] not in ('linux'): + sys.stdout.write('\x1b]0;\x07') sys.exit(0) + elif argv[0] in ('help', '?'): + return print_help() elif argv[0] == 'cd': otarget = target if len(argv) > 1: @@ -348,6 +383,21 @@ def do_command(command, server): elif argv[0] in ('cat', 'show', 'ls', 'dir'): if len(argv) > 1: targpath = fullpath_target(argv[1]) + if argv[0] in ('ls', 'dir'): + if targpath[-1] != '/': + # could still be a directory, fetch the parent.. + childname = targpath[targpath.rindex('/') + 1:] + parentpath = targpath[:targpath.rindex('/') + 1] + if parentpath != '/noderange/': + # if it were /noderange/, then it's a directory + # even though parent won't tell us that + for res in session.read(parentpath, server): + try: + if res['item']['href'] == childname: + print(childname) + return + except KeyError: + pass else: targpath = target for res in session.read(targpath): @@ -418,6 +468,10 @@ def createresource(args): def makecall(callout, args): global exitcode for response in callout(*args): + if 'deleted' in response: + print("Deleted: " + response['deleted']) + if 'created' in response: + print("Created: " + response['created']) if 'error' in response: if 'errorcode' in response: exitcode = response['errorcode'] @@ -526,7 +580,11 @@ def quitconfetty(code=0, fullexit=False, fixterm=True): fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, currfl ^ os.O_NONBLOCK) if oldtcattr is not None: termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, oldtcattr) + # Request default color scheme, to undo potential weirdness of terminal + sys.stdout.write('\x1b[m') if fullexit: + if os.environ['TERM'] not in ('linux'): + sys.stdout.write('\x1b]0;\x07') sys.exit(code) else: tlvdata.send(session.connection, {'operation': 'stop', @@ -651,11 +709,11 @@ def conserver_command(filehandle, localcommand): else: print("Unknown power state.]\r") - check_power_state() + #check_power_state() elif localcommand[0] == '?': print("help]\r") - print(". disconnect\r") + print(". exit console\r") print("b break\r") print("o reopen\r") print("po power off\r") @@ -744,6 +802,8 @@ if sys.stdout.isatty(): readline.parse_and_bind("tab: complete") readline.parse_and_bind("set bell-style none") + dl = readline.get_completer_delims().replace('-', '') + readline.set_completer_delims(dl) readline.set_completer(completer) doexit = False @@ -767,10 +827,11 @@ def check_power_state(): global powerstate, powertime for rsp in session.read('/nodes/' + consolename + '/power/state'): if type(rsp) == dict and 'state' in rsp: - powerstate = rsp['state']['value'] + newpowerstate = rsp['state']['value'] powertime = time.time() - if powerstate == 'off': - sys.stdout.write("\r\n[powered off]\r\n") + if newpowerstate != powerstate and newpowerstate == 'off': + sys.stdout.write("\x1b[2J\x1b[;H[powered off]\r\n") + powerstate = newpowerstate elif type(rsp) == dict and '_requestdone' in rsp: break elif type(rsp) == dict: @@ -799,7 +860,12 @@ while inconsole or not doexit: updatestatus(data) continue if data is not None: - sys.stdout.write(data) + try: + sys.stdout.write(data) + except IOError: # Some times circumstances are bad + # resort to byte at a time... + for d in data: + sys.stdout.write(d) now = time.time() if ('showtime' not in laststate or (now // 60) != laststate['showtime'] // 60): @@ -829,13 +895,15 @@ while inconsole or not doexit: sys.stdout.write("\r\n[remote disconnected]\r\n") break else: - myinput = fh.read() - myinput = check_escape_seq(myinput, fh) - if myinput: - tlvdata.send(session.connection, myinput) - if powerstate is None or powertime < time.time() - 60: # Check powerstate every 60 seconds - check_power_state() - + try: + myinput = fh.read() + myinput = check_escape_seq(myinput, fh) + if myinput: + tlvdata.send(session.connection, myinput) + except IOError: + pass + #if powerstate is None or powertime < time.time() - 60: # Check powerstate every 60 seconds + # check_power_state() else: currcommand = prompt() try: diff --git a/confluent_client/bin/nodeattrib b/confluent_client/bin/nodeattrib index c36942d2..5db662a1 100755 --- a/confluent_client/bin/nodeattrib +++ b/confluent_client/bin/nodeattrib @@ -19,8 +19,14 @@ __author__ = 'alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass + path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -29,13 +35,14 @@ if path.startswith('/opt'): import confluent.client as client argparser = optparse.OptionParser( - usage='''\n %prog [options] noderange [list of attributes] \ - \n %prog [options] noderange attribute1=value1,attribute2=value,... + usage='''\n %prog [-b] noderange [list of attributes] \ + \n %prog -c noderange \ + \n %prog noderange attribute1=value1 attribute2=value,... \n ''') argparser.add_option('-b', '--blame', action='store_true', help='Show information about how attributes inherited') argparser.add_option('-c', '--clear', action='store_true', - help='Clear variables') + help='Clear attributes') (options, args) = argparser.parse_args() @@ -46,7 +53,8 @@ try: noderange = args[0] nodelist = '/noderange/{0}/nodes/'.format(noderange) except IndexError: - nodelist = '/nodes/' + argparser.print_help() + sys.exit(1) session = client.Command() exitcode = 0 @@ -54,7 +62,7 @@ exitcode = 0 nodetype="noderange" if len(args) > 1: - if "=" in args[1]: + if "=" in args[1] or options.clear: exitcode=client.updateattrib(session,args,nodetype, noderange, options) try: # setting user output to what the user inputs @@ -65,6 +73,7 @@ if len(args) > 1: showtype = 'current' requestargs=args[2:] else: + showtype = 'all' requestargs=args[1:] except: pass diff --git a/confluent_client/bin/nodeboot b/confluent_client/bin/nodeboot index 9aa940a6..ee1f4eba 100755 --- a/confluent_client/bin/nodeboot +++ b/confluent_client/bin/nodeboot @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodeeventlog b/confluent_client/bin/nodeeventlog index fc7a68a1..b467ad28 100755 --- a/confluent_client/bin/nodeeventlog +++ b/confluent_client/bin/nodeeventlog @@ -18,8 +18,13 @@ from datetime import datetime as dt import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -28,7 +33,7 @@ if path.startswith('/opt'): import confluent.client as client argparser = optparse.OptionParser( - usage="Usage: %prog [options] noderange (clear)") + usage="Usage: %prog [options] noderange [clear]") (options, args) = argparser.parse_args() try: noderange = args[0] diff --git a/confluent_client/bin/nodefirmware b/confluent_client/bin/nodefirmware index 1b51d6ec..5c14d6b4 100755 --- a/confluent_client/bin/nodefirmware +++ b/confluent_client/bin/nodefirmware @@ -17,7 +17,13 @@ import optparse import os +import signal import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodegroupattrib b/confluent_client/bin/nodegroupattrib index f0fa7051..a7915f13 100755 --- a/confluent_client/bin/nodegroupattrib +++ b/confluent_client/bin/nodegroupattrib @@ -19,8 +19,13 @@ __author__ = 'alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -57,6 +62,7 @@ exitcode = 0 #Sets attributes if len(args) > 1: + showtype = 'all' exitcode=client.updateattrib(session,args,nodetype, nodegroups, options) try: # setting user output to what the user inputs diff --git a/confluent_client/bin/nodehealth b/confluent_client/bin/nodehealth index 294a73b4..31dc21f5 100755 --- a/confluent_client/bin/nodehealth +++ b/confluent_client/bin/nodehealth @@ -18,8 +18,13 @@ import codecs import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodeidentify b/confluent_client/bin/nodeidentify index 2cbf573f..0618b12e 100755 --- a/confluent_client/bin/nodeidentify +++ b/confluent_client/bin/nodeidentify @@ -17,7 +17,12 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) diff --git a/confluent_client/bin/nodeinventory b/confluent_client/bin/nodeinventory index 83a44bd7..eeaba71e 100755 --- a/confluent_client/bin/nodeinventory +++ b/confluent_client/bin/nodeinventory @@ -17,7 +17,13 @@ import optparse import os +import signal import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -77,6 +83,8 @@ try: except IndexError: argparser.print_help() sys.exit(1) +if len(args) > 1 and args[1] == 'firm': + os.execlp('nodefirmware', 'nodefirmware', noderange) try: session = client.Command() for res in session.read('/noderange/{0}/inventory/hardware/all/all'.format( diff --git a/confluent_client/bin/nodelist b/confluent_client/bin/nodelist index ef3816e6..9892fa79 100755 --- a/confluent_client/bin/nodelist +++ b/confluent_client/bin/nodelist @@ -19,8 +19,13 @@ __author__ = 'jjohnson2,alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,7 +35,8 @@ import confluent.client as client def main(): argparser = optparse.OptionParser( - usage="Usage: %prog [options] noderange [list of attributes]") + usage="Usage: %prog noderange\n" + " or: %prog [options] noderange ...") argparser.add_option('-b', '--blame', action='store_true', help='Show information about how attributes inherited') (options, args) = argparser.parse_args() @@ -59,4 +65,4 @@ def main(): sys.exit(exitcode) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/confluent_client/bin/nodepower b/confluent_client/bin/nodepower index 5dd0b007..c1169ea0 100755 --- a/confluent_client/bin/nodepower +++ b/confluent_client/bin/nodepower @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -43,6 +48,9 @@ if len(sys.argv) > 2: elif not sys.argv[2] in ('stat', 'state', 'status'): setstate = sys.argv[2] +if setstate not in (None, 'on', 'off', 'shutdown', 'boot', 'reset'): + argparser.print_help() + sys.exit(1) session = client.Command() exitcode = 0 session.add_precede_key('oldstate') diff --git a/confluent_client/bin/noderun b/confluent_client/bin/noderun index dbebd4b2..81b036cd 100755 --- a/confluent_client/bin/noderun +++ b/confluent_client/bin/noderun @@ -15,13 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import deque import optparse import os import select import shlex +import signal import subprocess import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,57 +36,75 @@ if path.startswith('/opt'): import confluent.client as client -argparser = optparse.OptionParser( - usage="Usage: %prog node commandexpression", - epilog="Expressions are the same as in attributes, e.g. " - "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") -argparser.disable_interspersed_args() -(options, args) = argparser.parse_args() -if len(args) < 2: - argparser.print_help() - sys.exit(1) -c = client.Command() -cmdstr = " ".join(args[1:]) +def run(): + concurrentprocs = 168 + # among other things, FD_SETSIZE limits. Besides, spawning too many + # processes can be unkind for the unaware on memory pressure and such... + argparser = optparse.OptionParser( + usage="Usage: %prog node commandexpression", + epilog="Expressions are the same as in attributes, e.g. " + "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") + argparser.disable_interspersed_args() + (options, args) = argparser.parse_args() + if len(args) < 2: + argparser.print_help() + sys.exit(1) + c = client.Command() + cmdstr = " ".join(args[1:]) -nodeforpopen = {} -popens = [] -for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), - {'expression': cmdstr}): - ex = exp['databynode'] - for node in ex: - cmd = ex[node]['value'].encode('utf-8') - cmdv = shlex.split(cmd) - nopen = subprocess.Popen( - cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - popens.append(nopen) - nodeforpopen[nopen] = node + currprocs = 0 + all = set([]) + pipedesc = {} + pendingexecs = deque() -all = set([]) -pipedesc = {} -exitcode = 0 -for pop in popens: - node = nodeforpopen[pop] - pipedesc[pop.stdout] = { 'node': node, 'popen': pop, 'type': 'stdout'} - pipedesc[pop.stderr] = {'node': node, 'popen': pop, 'type': 'stderr'} - all.add(pop.stdout) - all.add(pop.stderr) -rdy, _, _ = select.select(all, [], [], 10) -while all and rdy: - for r in rdy: - data = r.readline() - desc = pipedesc[r] - if data: - node = desc['node'] - if desc['type'] == 'stdout': - sys.stdout.write('{0}: {1}'.format(node,data)) + for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), + {'expression': cmdstr}): + ex = exp['databynode'] + for node in ex: + cmd = ex[node]['value'].encode('utf-8') + cmdv = shlex.split(cmd) + if currprocs < concurrentprocs: + currprocs += 1 + run_cmdv(node, cmdv, all, pipedesc) else: - sys.stderr.write('{0}: {1}'.format(node, data)) - else: - pop = desc['popen'] - ret = pop.poll() - if ret is not None: - exitcode = exitcode | ret - all.discard(r) - if all: - rdy, _, _ = select.select(all, [], [], 10) -sys.exit(exitcode) \ No newline at end of file + pendingexecs.append((node, cmdv)) + + exitcode = 0 + rdy, _, _ = select.select(all, [], [], 10) + while all: + for r in rdy: + data = r.readline() + desc = pipedesc[r] + if data: + node = desc['node'] + if desc['type'] == 'stdout': + sys.stdout.write('{0}: {1}'.format(node,data)) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) + else: + pop = desc['popen'] + ret = pop.poll() + if ret is not None: + exitcode = exitcode | ret + all.discard(r) + if desc['type'] == 'stdout' and pendingexecs: + node, cmdv = pendingexecs.popleft() + run_cmdv(node, cmdv, all, pipedesc) + if all: + rdy, _, _ = select.select(all, [], [], 10) + sys.exit(exitcode) + + +def run_cmdv(node, cmdv, all, pipedesc): + nopen = subprocess.Popen( + cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, + 'type': 'stdout'} + pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, + 'type': 'stderr'} + all.add(nopen.stdout) + all.add(nopen.stderr) + + +if __name__ == '__main__': + run() \ No newline at end of file diff --git a/confluent_client/bin/nodesensors b/confluent_client/bin/nodesensors index 3c9827a3..3b7c9618 100755 --- a/confluent_client/bin/nodesensors +++ b/confluent_client/bin/nodesensors @@ -19,9 +19,14 @@ import csv import datetime import optparse import os +import signal import sys import time +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodesetboot b/confluent_client/bin/nodesetboot index 65f8aaeb..524d30d3 100755 --- a/confluent_client/bin/nodesetboot +++ b/confluent_client/bin/nodesetboot @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodeshell b/confluent_client/bin/nodeshell index 74a5cf15..30f84242 100755 --- a/confluent_client/bin/nodeshell +++ b/confluent_client/bin/nodeshell @@ -15,13 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import deque import optparse import os import select import shlex +import signal import subprocess import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,57 +36,75 @@ if path.startswith('/opt'): import confluent.client as client -argparser = optparse.OptionParser( - usage="Usage: %prog node commandexpression", - epilog="Expressions are the same as in attributes, e.g. " - "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") -argparser.disable_interspersed_args() -(options, args) = argparser.parse_args() -if len(args) < 2: - argparser.print_help() - sys.exit(1) -c = client.Command() -cmdstr = " ".join(args[1:]) +def run(): + concurrentprocs = 168 + # among other things, FD_SETSIZE limits. Besides, spawning too many + # processes can be unkind for the unaware on memory pressure and such... + argparser = optparse.OptionParser( + usage="Usage: %prog node commandexpression", + epilog="Expressions are the same as in attributes, e.g. " + "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") + argparser.disable_interspersed_args() + (options, args) = argparser.parse_args() + if len(args) < 2: + argparser.print_help() + sys.exit(1) + c = client.Command() + cmdstr = " ".join(args[1:]) -nodeforpopen = {} -popens = [] -for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), - {'expression': cmdstr}): - ex = exp['databynode'] - for node in ex: - cmd = ex[node]['value'].encode('utf-8') - cmdv = ['ssh', node] + shlex.split(cmd) - nopen = subprocess.Popen( - cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - popens.append(nopen) - nodeforpopen[nopen] = node + currprocs = 0 + all = set([]) + pipedesc = {} + pendingexecs = deque() -all = set([]) -pipedesc = {} -exitcode = 0 -for pop in popens: - node = nodeforpopen[pop] - pipedesc[pop.stdout] = { 'node': node, 'popen': pop, 'type': 'stdout'} - pipedesc[pop.stderr] = {'node': node, 'popen': pop, 'type': 'stderr'} - all.add(pop.stdout) - all.add(pop.stderr) -rdy, _, _ = select.select(all, [], [], 10) -while all and rdy: - for r in rdy: - data = r.readline() - desc = pipedesc[r] - if data: - node = desc['node'] - if desc['type'] == 'stdout': - sys.stdout.write('{0}: {1}'.format(node,data)) + for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), + {'expression': cmdstr}): + ex = exp['databynode'] + for node in ex: + cmd = ex[node]['value'].encode('utf-8') + cmdv = ['ssh', node] + shlex.split(cmd) + if currprocs < concurrentprocs: + currprocs += 1 + run_cmdv(node, cmdv, all, pipedesc) else: - sys.stderr.write('{0}: {1}'.format(node, data)) - else: - pop = desc['popen'] - ret = pop.poll() - if ret is not None: - exitcode = exitcode | ret - all.discard(r) - if all: - rdy, _, _ = select.select(all, [], [], 10) -sys.exit(exitcode) \ No newline at end of file + pendingexecs.append((node, cmdv)) + + exitcode = 0 + rdy, _, _ = select.select(all, [], [], 10) + while all: + for r in rdy: + data = r.readline() + desc = pipedesc[r] + if data: + node = desc['node'] + if desc['type'] == 'stdout': + sys.stdout.write('{0}: {1}'.format(node,data)) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) + else: + pop = desc['popen'] + ret = pop.poll() + if ret is not None: + exitcode = exitcode | ret + all.discard(r) + if desc['type'] == 'stdout' and pendingexecs: + node, cmdv = pendingexecs.popleft() + run_cmdv(node, cmdv, all, pipedesc) + if all: + rdy, _, _ = select.select(all, [], [], 10) + sys.exit(exitcode) + + +def run_cmdv(node, cmdv, all, pipedesc): + nopen = subprocess.Popen( + cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, + 'type': 'stdout'} + pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, + 'type': 'stderr'} + all.add(nopen.stdout) + all.add(nopen.stderr) + + +if __name__ == '__main__': + run() \ No newline at end of file diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index 3d14ef90..1936ab89 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -274,7 +274,7 @@ def attrrequested(attr, attrlist, seenattributes): if candidate == attr: seenattributes.add(truename) return True - elif '.' not in candidate and attr.startswith(candidate + '.'): + elif attr.startswith(candidate + '.'): seenattributes.add(truename) return True return False @@ -309,12 +309,12 @@ def printattributes(session, requestargs, showtype, nodetype, noderange, options '{2}'.format(node, attr, currattr['broken']) elif isinstance(currattr, list) or isinstance(currattr, tuple): - attrout = '{0}: {1}: {2}'.format(node, attr, ', '.join(map(str, currattr))) + attrout = '{0}: {1}: {2}'.format(node, attr, ','.join(map(str, currattr))) elif isinstance(currattr, dict): dictout = [] for k, v in currattr.items: dictout.append("{0}={1}".format(k, v)) - attrout = '{0}: {1}: {2}'.format(node, attr, ', '.join(map(str, dictout))) + attrout = '{0}: {1}: {2}'.format(node, attr, ','.join(map(str, dictout))) else: print ("CODE ERROR" + repr(attr)) @@ -367,28 +367,17 @@ def printgroupattributes(session, requestargs, showtype, nodetype, noderange, op attrout = '{0}: {1}: *ERROR* BROKEN EXPRESSION: ' \ '{2}'.format(noderange, attr, currattr['broken']) + elif 'expression' in currattr: + attrout = '{0}: {1}: (will derive from expression {2})'.format(noderange, attr, currattr['expression']) elif isinstance(currattr, list) or isinstance(currattr, tuple): - attrout = '{0}: {1}: {2}'.format(noderange, attr, ', '.join(map(str, currattr))) + attrout = '{0}: {1}: {2}'.format(noderange, attr, ','.join(map(str, currattr))) elif isinstance(currattr, dict): dictout = [] for k, v in currattr.items: dictout.append("{0}={1}".format(k, v)) - attrout = '{0}: {1}: {2}'.format(noderange, attr, ', '.join(map(str, dictout))) + attrout = '{0}: {1}: {2}'.format(noderange, attr, ','.join(map(str, dictout))) else: print ("CODE ERROR" + repr(attr)) - - if options.blame or 'broken' in currattr: - blamedata = [] - if 'inheritedfrom' in currattr: - blamedata.append('inherited from group {0}'.format( - currattr['inheritedfrom'] - )) - if 'expression' in currattr: - blamedata.append( - 'derived from expression "{0}"'.format( - currattr['expression'])) - if blamedata: - attrout += ' (' + ', '.join(blamedata) + ')' print attrout if not exitcode: if requestargs: diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 01eededd..9580f10e 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -1,2 +1,4 @@ PATH=/opt/confluent/bin:$PATH export PATH +MANPATH=/opt/confluent/share/man:$MANPATH +export MANPATH diff --git a/confluent_client/doc/man/confetty.ronn b/confluent_client/doc/man/confetty.ronn index b7c4758b..8c4d3e60 100644 --- a/confluent_client/doc/man/confetty.ronn +++ b/confluent_client/doc/man/confetty.ronn @@ -1,9 +1,10 @@ -confetty(1) --- Interactive confluent client +confetty(8) --- Interactive confluent client ================================================= ## SYNOPSIS -`confetty` +`confetty` +`confetty ` ## DESCRIPTION @@ -33,5 +34,3 @@ commands. Start a console session indicated by **ELEMENT** (e.g. /nodes/n1/console/session) * `rm` **ELEMENT** Request removal of an element. (e.g. rm events/hardware/log clears log from a node) - - diff --git a/confluent_client/doc/man/nodeattrib.ronn b/confluent_client/doc/man/nodeattrib.ronn index dc330b0c..92f2f89b 100644 --- a/confluent_client/doc/man/nodeattrib.ronn +++ b/confluent_client/doc/man/nodeattrib.ronn @@ -1,75 +1,80 @@ -nodeattrib(1) -- List or change confluent nodes attributes +nodeattrib(8) -- List or change confluent nodes attributes ========================================================= ## SYNOPSIS -`nodeattrib` `noderange` [ current | all ] -`nodeattrib` `noderange` [-b] [...] -`nodeattrib` `noderange` [ ...] -`nodeattrib` `noderange` [-c] [ ...] +`nodeattrib [-b] [...]` +`nodeattrib [ ...]` +`nodeattrib -c ...` ## DESCRIPTION -**nodeattrib** queries the confluent server to get information about nodes. In +**nodeattrib** manages the attributes of confluent nodes. In the simplest form, it simply takes the given noderange(5) and lists the matching nodes, one line at a time. If a list of node attribute names are given, the value of those are also displayed. If `-b` is specified, it will also display information on -how inherited and expression based attributes are defined. There is more -information on node attributes in nodeattributes(5) man page. +how inherited and expression based attributes are defined. Attributes can be +straightforward values, or an expression as documented in nodeattribexpressions(5). +For a full list of attributes, run `nodeattrib all` against a node. If `-c` is specified, this will set the nodeattribute to a null valid. This is different from setting the value to an empty string. +Note that `nodeattrib ` will likely not provide the expected behavior. +See nodegroupattrib(8) command on how to manage attributes on a group level. + ## OPTIONS * `-b`, `--blame`: Annotate inherited and expression based attributes to show their base value. * `-c`, `--clear`: - Clear given nodeattributes since '' is not the same as empty + Clear specified nodeattributes ## EXAMPLES * Listing matching nodes of a simple noderange: - `# nodeattrib n1-n2` - `n1`: console.method: ipmi - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: ipmi - `n2`: hardwaremanagement.manager: 172.30.3.2 + `# nodeattrib n1-n2` + `n1: console.method: ipmi` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: ipmi` + `n2: hardwaremanagement.manager: 172.30.3.2` * Getting an attribute of nodes matching a noderange: - `# nodeattrib n1,n2 hardwaremanagement.manager` - `n1: hardwaremanagement.manager: 172.30.3.1` - `n2: hardwaremanagement.manager: 172.30.3.2` + `# nodeattrib n1,n2 hardwaremanagement.manager` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: hardwaremanagement.manager: 172.30.3.2` * Getting a group of attributes while determining what group defines them: - `# nodeattrib n1,n2 hardwaremanagement --blame` - `n1: hardwaremanagement.manager: 172.30.3.1` - `n1: hardwaremanagement.method: ipmi (inherited from group everything)` - `n1: hardwaremanagement.switch: r8e1` - `n1: hardwaremanagement.switchport: 14` - `n2: hardwaremanagement.manager: 172.30.3.2` - `n2: hardwaremanagement.method: ipmi (inherited from group everything)` - `n2: hardwaremanagement.switch: r8e1` - `n2: hardwaremanagement.switchport: 2` + `# nodeattrib n1,n2 hardwaremanagement --blame` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n1: hardwaremanagement.method: ipmi (inherited from group everything)` + `n1: hardwaremanagement.switch: r8e1` + `n1: hardwaremanagement.switchport: 14` + `n2: hardwaremanagement.manager: 172.30.3.2` + `n2: hardwaremanagement.method: ipmi (inherited from group everything)` + `n2: hardwaremanagement.switch: r8e1` + `n2: hardwaremanagement.switchport: 2` - * Listing matching nodes of a simple noderange that are set: - `# nodeattrib n1-n2 current` - `n1`: console.method: ipmi - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: ipmi - `n2`: hardwaremanagement.manager: 172.30.3.2 +* Listing matching nodes of a simple noderange that are set: + `# nodeattrib n1-n2 current` + `n1: console.method: ipmi` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: ipmi` + `n2: hardwaremanagement.manager: 172.30.3.2` - * Change attribute on nodes of a simple noderange: - `# nodeattrib n1-n2 console.method=serial` - `n1`: console.method: serial - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: serial - `n2`: hardwaremanagement.manager: 172.30.3.2 - - * Clear attribute on nodes of a simple noderange, if you want to retain the variable set the attribute to "": - `# nodeattrib n1-n2 -c console.method` - `# nodeattrib n1-n2 console.method` - Error: console.logging not a valid attribute +* Change attribute on nodes of a simple noderange: + `# nodeattrib n1-n2 console.method=serial` + `n1: console.method: serial` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: serial` + `n2: hardwaremanagement.manager: 172.30.3.2` +* Clear attribute on nodes of a simple noderange, if you want to retain the variable set the attribute to "": + `# nodeattrib n1-n2 -c console.method` + `# nodeattrib n1-n2 console.method` + `n1: console.method: ` + `n2: console.method: ` +## SEE ALSO +nodegroupattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodeconsole.ronn b/confluent_client/doc/man/nodeconsole.ronn index c7e3eb4d..95ceeaa3 100644 --- a/confluent_client/doc/man/nodeconsole.ronn +++ b/confluent_client/doc/man/nodeconsole.ronn @@ -1,14 +1,14 @@ -nodeconsole(1) -- Open a console to a confluent node +nodeconsole(8) -- Open a console to a confluent node ===================================================== ## SYNOPSIS -`nodeconsole` `node` +`nodeconsole ` ## DESCRIPTION **nodeconsole** opens an interactive console session to a given node. This is the text or serial console of a system. Exiting is done by hitting `Ctrl-e`, then `c`, - then `.`. Note that console output by default is additionally logged to + then `.`. Note that console output by default is additionally logged to `/var/log/confluent/consoles/`**NODENAME**. ## ESCAPE SEQUENCE COMMANDS @@ -20,11 +20,29 @@ keystroke will be interpreted as a command. The following commands are availabl * `.`: Exit the session and return to the command prompt * `b`: + [send Break] Send a break to the remote console when possible (some console plugins may not support this) * `o`: + [reOpen] Request confluent to disconnect and reconnect to console. For example if there is suspicion that the console has gone inoperable, but would work if reconnected. +* `po`: + [Power Off] + Power off server immediately, without waiting for OS to shutdown +* `ps`: + [Power Shutdown] + Request OS shut down gracefully, and then power off +* `pb`: + [Power Boot] + Cause system to immediately boot, resetting or turning on as appropriate. + Hitting enter is required to execute the reboot rather than another pb sequence +* `pbs`: + [Power Boot Setup] + Request immediate boot ultimately landing in interactive firmware setup +* `pbn`: + [Power Boot Network] + Request immediate boot to network * `?`: Get a list of supported commands -* ``: - Abandon entering an escape sequence command +* ``: + Hit enter to skip entering a command at the escape prompt. diff --git a/confluent_client/doc/man/nodelist.ronn b/confluent_client/doc/man/nodelist.ronn index b231cc54..e78c3713 100644 --- a/confluent_client/doc/man/nodelist.ronn +++ b/confluent_client/doc/man/nodelist.ronn @@ -1,10 +1,10 @@ -nodelist(1) -- List confluent nodes and their attributes +nodelist(8) -- List confluent nodes and their attributes ========================================================= ## SYNOPSIS -`nodelist` `noderange` -`nodelist` `noderange` [-b] [...] +`nodelist ` +`nodelist [-b] ...` ## DESCRIPTION @@ -45,4 +45,3 @@ information on node attributes in nodeattributes(5) man page. `n2: hardwaremanagement.method: ipmi (inherited from group everything)` `n2: hardwaremanagement.switch: r8e1` `n2: hardwaremanagement.switchport: 2` - diff --git a/confluent_client/doc/man/nodesensors.ronn b/confluent_client/doc/man/nodesensors.ronn index 1f5a8467..44deb352 100644 --- a/confluent_client/doc/man/nodesensors.ronn +++ b/confluent_client/doc/man/nodesensors.ronn @@ -1,9 +1,9 @@ -nodesensors(1) --- Retrieve telemetry for sensors of confluent nodes +nodesensors(8) --- Retrieve telemetry for sensors of confluent nodes ==================================================================== ## SYNOPSIS -`nodesensors` `noderange` [-c] [-i ] [-n ] [...] +`nodesensors [-c] [-i ] [-n ] [...]` ## DESCRIPTION diff --git a/confluent_client/setup.py.tmpl b/confluent_client/setup.py.tmpl index 724d9473..59db1b33 100644 --- a/confluent_client/setup.py.tmpl +++ b/confluent_client/setup.py.tmpl @@ -1,7 +1,13 @@ from setuptools import setup import os -scriptlist = ['bin/{0}'.format(d) for d in os.listdir('bin/')] +data_files = [('/etc/profile.d', ['confluent_env.sh', 'confluent_env.csh'])] +try: + scriptlist = ['bin/{0}'.format(d) for d in os.listdir('bin/')] + data_files.append(('/opt/confluent/share/man/man5', ['man/man5/' + x for x in os.listdir('man/man5')])) + data_files.append(('/opt/confluent/share/man/man8', ['man/man8/' + x for x in os.listdir('man/man8')])) +except OSError: + pass setup( name='confluent_client', @@ -11,5 +17,5 @@ setup( url='http://xcat.sf.net/', packages=['confluent'], scripts=scriptlist, - data_files=[('/etc/profile.d', ['confluent_env.sh','confluent_env.csh'])], + data_files=data_files, ) diff --git a/confluent_server/buildrpm b/confluent_server/buildrpm index 627f7dc7..14071382 100755 --- a/confluent_server/buildrpm +++ b/confluent_server/buildrpm @@ -1,5 +1,8 @@ #!/bin/sh cd `dirname $0` +if [ -x ./makeman ]; then + ./makeman +fi ./makesetup VERSION=`cat VERSION` PKGNAME=$(basename $(pwd)) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 468358b1..27a70c06 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -16,50 +16,69 @@ # limitations under the License. -#This defines the attributes of variou classes of things +#This defines the attributes of various classes of things # 'nic', meant to be a nested structure under node -nic = { - 'name': { - 'description': 'Name in ip/ifconfig as desired by administrator', - }, - 'port': { - 'description': 'Port that this nic connects to', - }, - 'switch': { - 'description': 'Switch that this nic connects to', - }, - 'customhardwareaddress': { - 'description': 'Mac address to push to nic', - }, - 'dnssuffix': { - 'description': ('String to place after nodename, but before' - 'Network.Domain to derive FQDN for this NIC'), - }, - 'hardwareaddress': { - 'description': 'Active mac address on this nic (factory or custom)' - }, - 'ipaddresses': { - 'description': 'Set of IPv4 and IPv6 addresses in CIDR format' - }, - 'pvid': { - 'description': 'PVID of port on switch this nic connects to', - }, - 'mtu': { - 'description': 'Requested MTU to configure on this interface', - }, - 'vlans': { - 'description': 'Tagged VLANs to apply to nic/switch', - }, - 'dhcpv4enabled': { - 'description': ('Whether DHCP should be attempted to acquire IPv4' - 'address on this interface'), - }, - 'dhcpv6enabled': { - 'description': ('Whether DHCP should be attempted to acquire IPv6' - 'address on this interface'), - }, -} +# changing mind on design, flattening to a single attribute, a *touch* less +# flexible at the top end, but much easier on the low end +# now net..attribute scheme +# similarly, leaning toward comma delimited ip addresses, since 99.99% of the +# time each nic will have one ip address +# vlan specification will need to be thought about a tad, each ip could be on +# a distinct vlan, but could have a vlan without an ip for sake of putting +# to a bridge. Current thought is +# vlans attribute would be comma delimited referring to the same index +# as addresses, with either 'native' or a number for vlan id +# the 'joinbridge' attribute would have some syntax like @ to indicate +# joining only a vlan of the nic to the bridge +# 'joinbond' attribute would not support vlans. + +#nic = { +# 'name': { +# 'description': 'Name in ip/ifconfig as desired by administrator', +# }, +# 'biosdevname': { +# 'description': '"biosdevname" scheme to identify the adapter. If not' +# 'mac address match is preferred, then biosdevname, then' +# 'name.', +# }, +# 'port': { +# 'description': 'Port that this nic connects to', +# }, +# 'switch': { +# 'description': 'Switch that this nic connects to', +# }, +# 'customhardwareaddress': { +# 'description': 'Mac address to push to nic', +# }, +# 'dnssuffix': { +# 'description': ('String to place after nodename, but before' +# 'Network.Domain to derive FQDN for this NIC'), +# }, +# 'hardwareaddress': { +# 'description': 'Active mac address on this nic (factory or custom)' +# }, +# 'ipaddresses': { +# 'description': 'Set of IPv4 and IPv6 addresses in CIDR format' +# }, +# 'pvid': { +# 'description': 'PVID of port on switch this nic connects to', +# }, +# 'mtu': { +# 'description': 'Requested MTU to configure on this interface', +# }, +# 'vlans': { +# 'description': 'Tagged VLANs to apply to nic/switch', +# }, +# 'dhcpv4enabled': { +# 'description': ('Whether DHCP should be attempted to acquire IPv4' +# 'address on this interface'), +# }, +# 'dhcpv6enabled': { +# 'description': ('Whether DHCP should be attempted to acquire IPv6' +# 'address on this interface'), +# }, +#} user = { 'password': { @@ -71,7 +90,6 @@ user = { node = { 'groups': { 'type': list, - 'default': 'all', 'description': ('List of static groups for which this node is ' 'considered a member'), }, @@ -81,6 +99,72 @@ node = { #'id': { # 'description': ('Numeric identifier for node') #}, + # autonode is the feature of generating nodes based on connectivity to + # current node. In recursive autonode, for now we just allow endpoint to + # either be a server directly *or* a server enclosure. This precludes + # for the moment a concept of nested arbitrarily deep, but for now do this. + # hypothetically, one could imagine supporting an array and 'popping' + # names until reaching end. Not worth implementing at this point. If + # a traditional switch is added, it needs some care and feeding anyway. + # If a more exciting scheme presents itself, well we won't have to +# # own discovering switches anyway. +# 'autonode.servername': { +# 'description': ('Template for creating nodenames for automatic ' +# 'creation of nodes detected as children of ' +# 'this node. For example, a node in a server ' +# 'enclosure bay or a server connected to a switch or ' +# 'an enclosure manager connected to a switch. Certain ' +# 'special template parameters are available and can ' +# 'be used alongside usual config template directives. ' +# '"discovered.nodenumber" will be replaced with the ' +# 'bay or port number where the child node is connected.' +# ), +# }, +# 'autonode.servergroups': { +# 'type': list, +# 'description': ('A list of groups to which discovered nodes will ' +# 'belong to. As in autonode.servername, "discovered." ' +# 'variable names will be substituted in special context') +# }, +# 'autonode.enclosurename': { +# 'description': ('Template for creating nodenames when the discovered ' +# 'node is an enclosure that will in turn generate nodes.' +# ) +# }, +# 'autonode.enclosuregroups': { +# 'type': list, +# 'description': ('A list of groups to which a discovered node will be' +# 'placed, presuming that node is an enclosure.') +# }, +#For now, we consider this eventuality if needed. For now emphasize paradigm +# of group membership and see how far that goes. +# 'autonode.copyattribs': { +# 'type': list, +# 'description': ('A list of attributes to copy from the node generator ' +# 'to the generated node. Expressions will be copied ' +# 'over without evaluation, so will be evaluated ' +# 'in the context of the generated node, rather than the' +# 'parent node. By default, an enclosure will copy over' +# 'autonode.servername, so that would not need to be ' +# 'copied ') +# }, + 'discovery.policy': { + 'description': 'Policy to use for auto-configuration of discovered ' + 'and identified nodes. Valid values are "manual", ' + '"permissive", or "open". "manual" means nodes are ' + 'detected, but not autoconfigured until a user ' + 'approves. "permissive" indicates to allow discovery, ' + 'so long as the node has no existing public key. ' + '"open" allows discovery even if a known public key ' + 'is already stored', + }, + 'info.note': { + 'description': 'A field used for administrators to make arbitrary ' + 'notations about nodes. This is meant entirely for ' + 'human use and not programmatic use, so it can be ' + 'freeform text data without concern for issues in how ' + 'the server will process it.', + }, 'location.room': { 'description': 'Room description for the node', }, @@ -195,17 +279,6 @@ node = { 'description': 'The method used to perform operations such as power ' 'control, get sensor data, get inventory, and so on. ' }, - 'hardwaremanagement.switch': { - 'description': 'The switch to which the hardware manager is connected.' - ' Only relevant if using switch based discovery via the' - ' hardware manager (Lenovo IMMs and CMMs). Not ' - 'applicable to Lenovo Flex nodes.' - }, - 'hardwaremanagement.switchport': { - 'description': 'The port of the switch that the hardware manager is ' - 'connected. See documentation of ' - 'hardwaremanagement.switch for more detail.' - }, 'enclosure.manager': { 'description': "The management device for this node's chassis", # 'appliesto': ['system'], @@ -223,9 +296,32 @@ node = { # 'id.serial': { # 'description': 'The manufacturer serial number of node', # }, -# 'id.uuid': { -# 'description': 'The UUID of the node as presented in DMI', -# }, + 'id.uuid': { + 'description': 'The UUID of the node as presented in DMI.', + }, + 'net.ipv4_gateway': { + 'description': 'The IPv4 gateway to use if applicable. As is the ' + 'case for other net attributes, net.eth0.ipv4_gateway ' + 'and similar is accepted.' + }, + # 'net.pxe': { 'description': 'Whether pxe will be used on this interface' + # TODO(jjohnson2): Above being 'true' will control whether mac addresses + # are stored in this nics attribute on pxe-client discovery, since + # pxe discovery is ambiguous for BMC and system on same subnet, + # or even both on the same port and same subnet + 'net.switch': { + 'description': 'An ethernet switch the node is connected to. Note ' + 'that net.* attributes may be indexed by interface. ' + 'For example instead of using net.switch, it is ' + 'possible to use net.eth0.switch and net.eth1.switch ' + 'or net.0.switch and net.1.switch to define multiple ' + 'sets of net connectivity associated with each other.' + }, + 'net.switchport': { + 'description': 'The port on the switch that corresponds to this node. ' + 'See information on net.switch for more on the ' + 'flexibility of net.* attributes.' + }, # 'id.modelnumber': { # 'description': 'The manufacturer dictated model number for the node', # }, diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index d9600a3a..c4a14c39 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -72,6 +72,8 @@ import confluent.exceptions as exc import copy import cPickle import errno +import eventlet +import fnmatch import json import operator import os @@ -151,6 +153,29 @@ def _format_key(key, password=None): return {"unencryptedvalue": key} +def _do_notifier(cfg, watcher, callback): + try: + callback(nodeattribs=watcher['nodeattrs'], configmanager=cfg) + except Exception: + logException() + + +def logException(): + global tracelog + if tracelog is None: + tracelog = confluent.log.Logger('trace') + tracelog.log(traceback.format_exc(), + ltype=confluent.log.DataTypes.event, + event=confluent.log.Events.stacktrace) + + +def _do_add_watcher(watcher, added, configmanager): + try: + watcher(added=added, deleting=[], configmanager=configmanager) + except Exception: + logException() + + def init_masterkey(password=None): global _masterkey global _masterintegritykey @@ -198,6 +223,26 @@ def decrypt_value(cryptvalue, return value[0:-padsize] +def attribute_is_invalid(attrname, attrval): + if attrname.startswith('custom.'): + # No type checking or name checking is provided for custom, + # it's not possible + return False + if attrname.startswith('net.'): + # For net.* attribtues, split on the dots and put back together + # longer term we might want a generic approach, but + # right now it's just net. attributes + netattrparts = attrname.split('.') + attrname = netattrparts[0] + '.' + netattrparts[-1] + if attrname not in allattributes.node: + # Otherwise, it must be in the allattributes key list + return True + if 'type' in allattributes.node[attrname]: + if not isinstance(attrval, allattributes.node[attrname]['type']): + # provide type checking for attributes with a specific type + return True + return False + def crypt_value(value, key=None, integritykey=None): @@ -372,8 +417,8 @@ class _ExpressionFormat(string.Formatter): if optype not in self._supported_ops: raise Exception("Unsupported operation") op = self._supported_ops[optype] - return op(self._handle_ast_node(node.left), - self._handle_ast_node(node.right)) + return op(int(self._handle_ast_node(node.left)), + int(self._handle_ast_node(node.right))) def _decode_attribute(attribute, nodeobj, formatter=None, decrypt=False): @@ -551,7 +596,9 @@ class ConfigManager(object): def watch_attributes(self, nodes, attributes, callback): """ - Watch a list of attributes for changes on a list of nodes + Watch a list of attributes for changes on a list of nodes. The + attributes may be literal, or a filename style wildcard like + 'net*.switch' :param nodes: An iterable of node names to be watching :param attributes: An iterable of attribute names to be notified about @@ -579,6 +626,10 @@ class ConfigManager(object): } else: attribwatchers[node][attribute][notifierid] = callback + if '*' in attribute: + currglobs = attribwatchers[node].get('_attrglobs', set([])) + currglobs.add(attribute) + attribwatchers[node]['_attrglobs'] = currglobs return notifierid def watch_nodecollection(self, callback): @@ -786,9 +837,11 @@ class ConfigManager(object): if decrypt is None: decrypt = self.decrypt retdict = {} - relattribs = attributes if isinstance(nodelist, str) or isinstance(nodelist, unicode): nodelist = [nodelist] + if isinstance(attributes, str) or isinstance(attributes, unicode): + attributes = [attributes] + relattribs = attributes for node in nodelist: if node not in self._cfgstore['nodes']: continue @@ -800,6 +853,10 @@ class ConfigManager(object): if attribute.startswith('_'): # skip private things continue + if '*' in attribute: + for attr in fnmatch.filter(list(cfgnodeobj), attribute): + nodeobj[attr] = _decode_attribute(attr, cfgnodeobj, + decrypt=decrypt) if attribute not in cfgnodeobj: continue # since the formatter is not passed in, the calculator is @@ -916,11 +973,8 @@ class ConfigManager(object): raise ValueError("{0} group does not exist".format(group)) for attr in attribmap[group].iterkeys(): if (attr not in ('nodes', 'noderange') and - (attr not in allattributes.node or - ('type' in allattributes.node[attr] and - not isinstance(attribmap[group][attr], - allattributes.node[attr]['type'])))): - raise ValueError("nodes attribute is invalid") + attribute_is_invalid(attr, attribmap[group][attr])): + raise ValueError("{0} attribute is invalid".format(attr)) if attr == 'nodes': if not isinstance(attribmap[group][attr], list): if type(attribmap[group][attr]) is unicode or type(attribmap[group][attr]) is str: @@ -1019,7 +1073,7 @@ class ConfigManager(object): return notifdata = {} attribwatchers = self._attribwatchers[self.tenant] - for node in nodeattrs.iterkeys(): + for node in nodeattrs: if node not in attribwatchers: continue attribwatcher = attribwatchers[node] @@ -1032,10 +1086,21 @@ class ConfigManager(object): # to deletion, to make all watchers aware of the removed # node and take appropriate action checkattrs = attribwatcher + globattrs = {} + for attrglob in attribwatcher.get('_attrglobs', []): + for matched in fnmatch.filter(list(checkattrs), attrglob): + globattrs[matched] = attrglob for attrname in checkattrs: - if attrname not in attribwatcher: + if attrname == '_attrglobs': continue - for notifierid in attribwatcher[attrname].iterkeys(): + watchkey = attrname + # the attrib watcher could still have a glob + if attrname not in attribwatcher: + if attrname in globattrs: + watchkey = globattrs[attrname] + else: + continue + for notifierid in attribwatcher[watchkey]: if notifierid in notifdata: if node in notifdata[notifierid]['nodeattrs']: notifdata[notifierid]['nodeattrs'][node].append( @@ -1046,18 +1111,12 @@ class ConfigManager(object): else: notifdata[notifierid] = { 'nodeattrs': {node: [attrname]}, - 'callback': attribwatcher[attrname][notifierid] + 'callback': attribwatcher[watchkey][notifierid] } for watcher in notifdata.itervalues(): callback = watcher['callback'] - try: - callback(nodeattribs=watcher['nodeattrs'], configmanager=self) - except Exception: - global tracelog - if tracelog is None: - tracelog = confluent.log.Logger('trace') - tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, - event=log.Events.stacktrace) + eventlet.spawn_n(_do_notifier, self, watcher, callback) + def del_nodes(self, nodes): if self.tenant in self._nodecollwatchers: @@ -1154,11 +1213,7 @@ class ConfigManager(object): if ('everything' in self._cfgstore['nodegroups'] and 'everything' not in attribmap[node]['groups']): attribmap[node]['groups'].append('everything') - elif (attrname not in allattributes.node or - ('type' in allattributes.node[attrname] and - not isinstance( - attrval, - allattributes.node[attrname]['type']))): + elif attribute_is_invalid(attrname, attrval): errstr = "{0} attribute on node {1} is invalid".format( attrname, node) raise ValueError(errstr) @@ -1206,7 +1261,7 @@ class ConfigManager(object): if self.tenant in self._nodecollwatchers: nodecollwatchers = self._nodecollwatchers[self.tenant] for watcher in nodecollwatchers.itervalues(): - watcher(added=newnodes, deleting=[], configmanager=self) + eventlet.spawn_n(_do_add_watcher, watcher, newnodes, self) self._bg_sync_to_file() #TODO: wait for synchronization to suceed/fail??) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index d265837f..defa6089 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -1,6 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation +# Copyright 2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,6 +21,7 @@ # we track nodes that are actively being logged, watched, or have attached # there should be no more than one handler per node +import codecs import collections import confluent.config.configmanager as configmodule import confluent.exceptions as exc @@ -29,6 +31,7 @@ import confluent.core as plugin import confluent.util as util import eventlet import eventlet.event +import pyte import random import time import traceback @@ -37,6 +40,100 @@ _handled_consoles = {} _tracelog = None +try: + range = xrange +except NameError: + pass + +pytecolors2ansi = { + 'black': 0, + 'red': 1, + 'green': 2, + 'brown': 3, + 'blue': 4, + 'magenta': 5, + 'cyan': 6, + 'white': 7, + 'default': 9, +} +# might be able to use IBMPC map from pyte charsets, +# in that case, would have to mask out certain things (like ESC) +# in the same way that Screen's draw method would do +# for now at least get some of the arrows in there (note ESC is one +# of those arrows... so skip it... +ansichars = dict(zip((0x18, 0x19), u'\u2191\u2193')) + + +def _utf8_normalize(data, shiftin, decoder): + # first we give the stateful decoder a crack at the byte stream, + # we may come up empty in the event of a partial multibyte + try: + data = decoder.decode(data) + except UnicodeDecodeError: + # first order of business is to reset the state of + # the decoder to a clean one, so we can switch back to utf-8 + # when things change, for example going from an F1 setup menu stuck + # in the old days to a modern platform using utf-8 + decoder.setstate(codecs.getincrementaldecoder('utf-8')().getstate()) + # Ok, so we have something that is not valid UTF-8, + # our next stop is to try CP437. We don't try incremental + # decode, since cp437 is single byte + # replace is silly here, since there does not exist invalid c437, + # but just in case + data = data.decode('cp437', 'replace') + # Finally, the low part of ascii is valid utf-8, but we are going to be + # more interested in the cp437 versions (since this is console *output* + # not input + if shiftin is None: + data = data.translate(ansichars) + return data.encode('utf-8') + + +def pytechars2line(chars, maxlen=None): + line = '\x1b[m' # start at default params + lb = False # last bold + li = False # last italic + lu = False # last underline + ls = False # last strikethrough + lr = False # last reverse + lfg = 'default' # last fg color + lbg = 'default' # last bg color + hasdata = False + len = 1 + for charidx in range(maxlen): + char = chars[charidx] + csi = [] + if char.fg != lfg: + csi.append(30 + pytecolors2ansi[char.fg]) + lfg = char.fg + if char.bg != lbg: + csi.append(40 + pytecolors2ansi[char.bg]) + lbg = char.bg + if char.bold != lb: + lb = char.bold + csi.append(1 if lb else 22) + if char.italics != li: + li = char.italics + csi.append(3 if li else 23) + if char.underscore != lu: + lu = char.underscore + csi.append(4 if lu else 24) + if char.strikethrough != ls: + ls = char.strikethrough + csi.append(9 if ls else 29) + if char.reverse != lr: + lr = char.reverse + csi.append(7 if lr else 27) + if csi: + line += b'\x1b[' + b';'.join(['{0}'.format(x) for x in csi]) + b'm' + if not hasdata and char.data.encode('utf-8').rstrip(): + hasdata = True + line += char.data.encode('utf-8') + if maxlen and len >= maxlen: + break + len += 1 + return line, hasdata + class ConsoleHandler(object): _plugin_path = '/nodes/{0}/_console/session' @@ -44,6 +141,7 @@ class ConsoleHandler(object): _genwatchattribs = frozenset(('console.method', 'console.logging')) def __init__(self, node, configmanager): + self.clearpending = False self._dologging = True self._isondemand = False self.error = None @@ -51,14 +149,15 @@ class ConsoleHandler(object): self.node = node self.connectstate = 'unconnected' self._isalive = True - self.buffer = bytearray() + self.buffer = pyte.Screen(100, 31) + self.termstream = pyte.ByteStream() + self.termstream.attach(self.buffer) self.livesessions = set([]) + self.utf8decoder = codecs.getincrementaldecoder('utf-8')() if self._logtobuffer: self.logger = log.Logger(node, console=True, tenant=configmanager.tenant) - (text, termstate, timestamp) = self.logger.read_recent_text(8192) - else: - (text, termstate, timestamp) = ('', 0, False) + (text, termstate, timestamp) = (b'', 0, False) # when reading from log file, we will use wall clock # it should usually match walltime. self.lasttime = 0 @@ -70,7 +169,7 @@ class ConsoleHandler(object): # wall clock has gone backwards, use current time as best # guess self.lasttime = util.monotonic_time() - self.buffer += text + self.clearbuffer() self.appmodedetected = False self.shiftin = None self.reconnect = None @@ -91,6 +190,16 @@ class ConsoleHandler(object): self.connectstate = 'connecting' eventlet.spawn(self._connect) + def feedbuffer(self, data): + try: + self.termstream.feed(data) + except StopIteration: # corrupt parser state, start over + self.termstream = pyte.ByteStream() + self.termstream.attach(self.buffer) + except Exception: + _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, + event=log.Events.stacktrace) + def check_isondemand(self): self._dologging = True attrvalue = self.cfgmgr.get_node_attributes( @@ -157,10 +266,18 @@ class ConsoleHandler(object): else: self._console.ping() + def clearbuffer(self): + self.feedbuffer( + '\x1bc[no replay buffer due to console.logging attribute set to ' + 'none or interactive,\r\nconnection loss, or service restart]') + self.clearpending = True + def _disconnect(self): if self.connectionthread: self.connectionthread.kill() self.connectionthread = None + # clear the terminal buffer when disconnected + self.clearbuffer() if self._console: self.log( logdata='console disconnected', ltype=log.DataTypes.event, @@ -200,6 +317,7 @@ class ConsoleHandler(object): _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) if not isinstance(self._console, conapi.Console): + self.clearbuffer() self.connectstate = 'unconnected' self.error = 'misconfigured' self._send_rcpts({'connectstate': self.connectstate, @@ -219,6 +337,7 @@ class ConsoleHandler(object): try: self._console.connect(self.get_console_output) except exc.TargetEndpointBadCredentials: + self.clearbuffer() self.error = 'badcredentials' self.connectstate = 'unconnected' self._send_rcpts({'connectstate': self.connectstate, @@ -228,6 +347,7 @@ class ConsoleHandler(object): self.reconnect = eventlet.spawn_after(retrytime, self._connect) return except exc.TargetEndpointUnreachable: + self.clearbuffer() self.error = 'unreachable' self.connectstate = 'unconnected' self._send_rcpts({'connectstate': self.connectstate, @@ -237,6 +357,7 @@ class ConsoleHandler(object): self.reconnect = eventlet.spawn_after(retrytime, self._connect) return except Exception: + self.clearbuffer() _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) self.error = 'unknown' @@ -257,6 +378,7 @@ class ConsoleHandler(object): self._send_rcpts({'connectstate': self.connectstate}) def _got_disconnected(self): + self.clearbuffer() if self.connectstate != 'unconnected': self.connectstate = 'unconnected' self.log( @@ -278,12 +400,6 @@ class ConsoleHandler(object): self.connectionthread.kill() self.connectionthread = None - def flushbuffer(self): - # Logging is handled in a different stream - # this buffer is now just for having screen redraw on - # connect - self.buffer = bytearray(self.buffer[-8192:]) - def get_console_output(self, data): # Spawn as a greenthread, return control as soon as possible # to the console object @@ -354,19 +470,18 @@ class ConsoleHandler(object): eventdata |= 2 self.log(data, eventdata=eventdata) self.lasttime = util.monotonic_time() - if isinstance(data, bytearray) or isinstance(data, bytes): - self.buffer += data - else: - self.buffer += data.encode('utf-8') + self.feedbuffer(data) # TODO: analyze buffer for registered events, examples: # panics # certificate signing request - if len(self.buffer) > 16384: - self.flushbuffer() - self._send_rcpts(data) + if self.clearpending: + self.clearpending = False + self.feedbuffer(b'\x1bc') + self._send_rcpts(b'\x1bc') + self._send_rcpts(_utf8_normalize(data, self.shiftin, self.utf8decoder)) def _send_rcpts(self, data): - for rcpt in self.livesessions: + for rcpt in list(self.livesessions): try: rcpt.data_handler(data) except: # No matter the reason, advance to next recipient @@ -385,7 +500,26 @@ class ConsoleHandler(object): 'connectstate': self.connectstate, 'clientcount': len(self.livesessions), } - retdata = '' + retdata = b'\x1b[H\x1b[J' # clear screen + pendingbl = b'' # pending blank lines + maxlen = 0 + for line in self.buffer.display: + line = line.rstrip() + if len(line) > maxlen: + maxlen = len(line) + for line in range(self.buffer.lines): + nline, notblank = pytechars2line(self.buffer.buffer[line], maxlen) + if notblank: + if pendingbl: + retdata += pendingbl + pendingbl = b'' + retdata += nline + '\r\n' + else: + pendingbl += nline + '\r\n' + if len(retdata) > 6: + retdata = retdata[:-2] # remove the last \r\n + retdata += b'\x1b[{0};{1}H'.format(self.buffer.cursor.y + 1, + self.buffer.cursor.x + 1) if self.shiftin is not None: # detected that terminal requested a # shiftin character set, relay that to the terminal that cannected retdata += '\x1b)' + self.shiftin @@ -393,27 +527,16 @@ class ConsoleHandler(object): retdata += '\x1b[?1h' else: retdata += '\x1b[?1l' - # an alternative would be to emulate a VT100 to know what the - # whole screen would look like - # this is one scheme to clear screen, move cursor then clear - bufidx = self.buffer.rfind('\x1b[H\x1b[J') - if bufidx >= 0: - return retdata + str(self.buffer[bufidx:]), connstate - # another scheme is the 2J scheme - bufidx = self.buffer.rfind('\x1b[2J') - if bufidx >= 0: - # there was some sort of clear screen event - # somewhere in the buffer, replay from that point - # in hopes that it reproduces the screen - return retdata + str(self.buffer[bufidx:]), connstate - else: - # we have no indication of last erase, play back last kibibyte - # to give some sense of context anyway - return retdata + str(self.buffer[-1024:]), connstate + return retdata, connstate def write(self, data): if self.connectstate == 'connected': - self._console.write(data) + try: + self._console.write(data) + except Exception: + _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, + event=log.Events.stacktrace) + self._got_disconnected() def disconnect_node(node, configmanager): diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 608e8206..c2d647c6 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,11 +33,14 @@ # functions. Console is special and just get's passed through # see API.txt +import confluent import confluent.alerts as alerts import confluent.config.attributes as attrscheme +import confluent.discovery.core as disco import confluent.interface.console as console import confluent.exceptions as exc import confluent.messages as msg +import confluent.networking.macmap as macmap import confluent.noderange as noderange try: import confluent.shellmodule as shellmodule @@ -100,7 +103,8 @@ def load_plugins(): sys.path.pop(1) -rootcollections = ['noderange/', 'nodes/', 'nodegroups/', 'users/', 'events/'] +rootcollections = ['discovery/', 'events/', 'networking/', + 'noderange/', 'nodes/', 'nodegroups/', 'users/', 'version'] class PluginRoute(object): @@ -344,11 +348,14 @@ def delete_nodegroup_collection(collectionpath, configmanager): raise Exception("Not implemented") -def delete_node_collection(collectionpath, configmanager): +def delete_node_collection(collectionpath, configmanager, isnoderange): if len(collectionpath) == 2: # just node - node = collectionpath[-1] - configmanager.del_nodes([node]) - yield msg.DeletedResource(node) + nodes = [collectionpath[-1]] + if isnoderange: + nodes = noderange.NodeRange(nodes[0], configmanager).nodes + configmanager.del_nodes(nodes) + for node in nodes: + yield msg.DeletedResource(node) else: raise Exception("Not implemented") @@ -392,6 +399,7 @@ def create_group(inputdata, configmanager): configmanager.add_group_attributes(attribmap) except ValueError as e: raise exc.InvalidArgumentException(str(e)) + yield msg.CreatedResource(groupname) def create_node(inputdata, configmanager): @@ -405,6 +413,25 @@ def create_node(inputdata, configmanager): configmanager.add_node_attributes(attribmap) except ValueError as e: raise exc.InvalidArgumentException(str(e)) + yield msg.CreatedResource(nodename) + + +def create_noderange(inputdata, configmanager): + try: + noder = inputdata['name'] + del inputdata['name'] + attribmap = {} + for node in noderange.NodeRange(noder).nodes: + attribmap[node] = inputdata + except KeyError: + raise exc.InvalidArgumentException('name not specified') + try: + configmanager.add_node_attributes(attribmap) + except ValueError as e: + raise exc.InvalidArgumentException(str(e)) + for node in attribmap: + yield msg.CreatedResource(node) + def enumerate_collections(collections): @@ -419,7 +446,7 @@ def handle_nodegroup_request(configmanager, inputdata, if len(pathcomponents) < 2: if operation == "create": inputdata = msg.InputAttributes(pathcomponents, inputdata) - create_group(inputdata.attribs, configmanager) + return create_group(inputdata.attribs, configmanager) allgroups = list(configmanager.get_groups()) try: allgroups.sort(key=noderange.humanify_nodename) @@ -458,6 +485,16 @@ def handle_nodegroup_request(configmanager, inputdata, raise Exception("unknown case encountered") +class BadPlugin(object): + def __init__(self, node, plugin): + self.node = node + self.plugin = plugin + + def error(self, *args, **kwargs): + yield msg.ConfluentNodeError( + self.node, self.plugin + ' is not a supported plugin') + + def handle_node_request(configmanager, inputdata, operation, pathcomponents, autostrip=True): iscollection = False @@ -489,11 +526,14 @@ def handle_node_request(configmanager, inputdata, operation, # this is enumerating a list of nodes or just empty noderange if isnoderange and operation == "retrieve": return iterate_collections([]) + elif isnoderange and operation == "create": + inputdata = msg.InputAttributes(pathcomponents, inputdata) + return create_noderange(inputdata.attribs, configmanager) elif isnoderange or operation == "delete": raise exc.InvalidArgumentException() if operation == "create": inputdata = msg.InputAttributes(pathcomponents, inputdata) - create_node(inputdata.attribs, configmanager) + return create_node(inputdata.attribs, configmanager) allnodes = list(configmanager.list_nodes()) try: allnodes.sort(key=noderange.humanify_nodename) @@ -524,7 +564,8 @@ def handle_node_request(configmanager, inputdata, operation, raise exc.InvalidArgumentException('Custom interface required for resource') if iscollection: if operation == "delete": - return delete_node_collection(pathcomponents, configmanager) + return delete_node_collection(pathcomponents, configmanager, + isnoderange) elif operation == "retrieve": return enumerate_node_collection(pathcomponents, configmanager) else: @@ -561,7 +602,11 @@ def handle_node_request(configmanager, inputdata, operation, if attrname in nodeattr[node]: plugpath = nodeattr[node][attrname]['value'] if plugpath is not None: - hfunc = getattr(pluginmap[plugpath], operation) + try: + hfunc = getattr(pluginmap[plugpath], operation) + except KeyError: + nodesbyhandler[BadPlugin(node, plugpath).error] = [node] + continue if hfunc in nodesbyhandler: nodesbyhandler[hfunc].append(node) else: @@ -588,6 +633,14 @@ def handle_node_request(configmanager, inputdata, operation, # return stripnode(passvalues[0], nodes[0]) +def handle_discovery(pathcomponents, operation, configmanager, inputdata): + if pathcomponents[0] == 'detected': + pass + +def handle_discovery(pathcomponents, operation, configmanager, inputdata): + if pathcomponents[0] == 'detected': + pass + def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): """Given a full path request, return an object. @@ -612,6 +665,14 @@ def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): # single node request of some sort return handle_node_request(configmanager, inputdata, operation, pathcomponents, autostrip) + elif pathcomponents[0] == 'discovery': + return disco.handle_api_request( + configmanager, inputdata, operation, pathcomponents) + elif pathcomponents[0] == 'networking': + return macmap.handle_api_request( + configmanager, inputdata, operation, pathcomponents) + elif pathcomponents[0] == 'version': + return (msg.Attributes(kv={'version': confluent.__version__}),) elif pathcomponents[0] == 'users': # TODO: when non-administrator accounts exist, # they must only be allowed to see their own user @@ -646,5 +707,8 @@ def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): raise exc.NotFoundException() if operation == 'update': return alerts.decode_alert(inputdata, configmanager) + elif pathcomponents[0] == 'discovery': + return handle_discovery(pathcomponents[1:], operation, configmanager, + inputdata) else: raise exc.NotFoundException() diff --git a/confluent_server/confluent/exceptions.py b/confluent_server/confluent/exceptions.py index 5bbead51..ca618b0e 100644 --- a/confluent_server/confluent/exceptions.py +++ b/confluent_server/confluent/exceptions.py @@ -81,6 +81,12 @@ class GlobalConfigError(ConfluentException): apierrorstr = 'Global configuration contains an error' +class TargetResourceUnavailable(ConfluentException): + # This is meant for scenarios like asking to read a sensor that is + # currently unavailable. This may be a persistent or transient state + apierrocode = 503 + apierrorstr = 'Target Resource Unavailable' + class PubkeyInvalid(ConfluentException): apierrorcode = 502 apierrorstr = '502 - Invalid certificate or key on target' diff --git a/confluent_server/confluent/log.py b/confluent_server/confluent/log.py index c63ddf12..aa1f260b 100644 --- a/confluent_server/confluent/log.py +++ b/confluent_server/confluent/log.py @@ -743,6 +743,7 @@ tracelog = None def log(logdata=None, ltype=None, event=0, eventdata=None): + global globaleventlog if globaleventlog is None: globaleventlog = Logger('events') globaleventlog.log(logdata, ltype, event, eventdata) diff --git a/confluent_server/confluent/main.py b/confluent_server/confluent/main.py index 7457dbdf..b8439b3f 100644 --- a/confluent_server/confluent/main.py +++ b/confluent_server/confluent/main.py @@ -39,6 +39,7 @@ except ImportError: #On platforms without pwd, give up on the sockapi in general and be http #only for now pass +import confluent.discovery.core as disco import eventlet dbgif = False if map(int, (eventlet.__version__.split('.'))) > [0, 18]: @@ -238,6 +239,7 @@ def run(): sock_bind_host, sock_bind_port = _get_connector_config('socket') webservice = httpapi.HttpApi(http_bind_host, http_bind_port) webservice.start() + disco.start_detection() try: sockservice = sockapi.SockApi(sock_bind_host, sock_bind_port) sockservice.start() diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index fc4a7df2..ac26d3da 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015-2016 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -194,6 +194,17 @@ class ConfluentNodeError(object): raise Exception(self.error) +class ConfluentResourceUnavailable(ConfluentNodeError): + apicode = 503 + + def __init__(self, node, errstr='Unavailable'): + self.node = node + self.error = errstr + + def strip_node(self, node): + raise exc.TargetResourceUnavailable() + + class ConfluentTargetTimeout(ConfluentNodeError): apicode = 504 @@ -228,9 +239,19 @@ class ConfluentTargetInvalidCredentials(ConfluentNodeError): class DeletedResource(ConfluentMessage): + notnode = True def __init__(self, resource): - self.kvpairs = {} + self.kvpairs = {'deleted': resource} +class CreatedResource(ConfluentMessage): + notnode = True + def __init__(self, resource): + self.kvpairs = {'created': resource} + +class AssignedResource(ConfluentMessage): + notnode = True + def __init__(self, resource): + self.kvpairs = {'assigned': resource} class ConfluentChoiceMessage(ConfluentMessage): valid_values = set() @@ -325,9 +346,16 @@ class ChildCollection(LinkRelation): extension) +# TODO(jjohnson2): enhance the following to support expressions: +# InputNetworkConfiguration +# InputMCI +# InputDomainName +# InputNTPServer def get_input_message(path, operation, inputdata, nodes=None, multinode=False): if path[0] == 'power' and path[1] == 'state' and operation != 'retrieve': return InputPowerMessage(path, nodes, inputdata) + elif path == ['attributes', 'expression']: + return InputExpression(path, inputdata, nodes) elif path[0] in ('attributes', 'users') and operation != 'retrieve': return InputAttributes(path, inputdata, nodes) elif path == ['boot', 'nextdevice'] and operation != 'retrieve': @@ -387,7 +415,47 @@ class InputAlertData(ConfluentMessage): return self.alertparams +class InputExpression(ConfluentMessage): + # This is specifically designed to suppress the expansion of an expression + # so that it can make it intact to the pertinent configmanager function + def __init__(self, path, inputdata, nodes=None): + self.nodeattribs = {} + nestedmode = False + if not inputdata: + raise exc.InvalidArgumentException('no request data provided') + if nodes is None: + self.attribs = inputdata + return + for node in nodes: + if node in inputdata: + nestedmode = True + self.nodeattribs[node] = inputdata[node] + if nestedmode: + for key in inputdata: + if key not in nodes: + raise exc.InvalidArgumentException + else: + for node in nodes: + self.nodeattribs[node] = inputdata + + def get_attributes(self, node): + if node not in self.nodeattribs: + return {} + nodeattr = deepcopy(self.nodeattribs[node]) + return nodeattr + + class InputAttributes(ConfluentMessage): + # This is particularly designed for attributes, where a simple string + # should become either a string value or a dict with {'expression':} to + # preserve the client provided expression for posterity, rather than + # immediate consumption. + # for things like node configuration or similar, a different class is + # appropriate since it nedes to immediately expand an expression. + # with that class, the 'InputExpression' and calling code in attributes.py + # might be deprecated in favor of the generic expression expander + # and a small function in attributes.py to reflect the expansion back + # to the client def __init__(self, path, inputdata, nodes=None): self.nodeattribs = {} nestedmode = False @@ -468,12 +536,13 @@ class InputCredential(ConfluentMessage): if len(path) == 4: inputdata['uid'] = path[-1] # if the operation is 'create' check if all fields are present - elif ('uid' not in inputdata or 'privilege_level' not in inputdata or - 'username' not in inputdata or 'password' not in inputdata): - raise exc.InvalidArgumentException('all fields are required') - - if 'uid' not in inputdata: - raise exc.InvalidArgumentException('uid is missing') + missingattrs = [] + for attrname in ('uid', 'privilege_level', 'username', 'password'): + if attrname not in inputdata: + missingattrs.append(attrname) + if missingattrs: + raise exc.InvalidArgumentException( + 'Required fields missing: {0}'.format(','.join(missingattrs))) if (isinstance(inputdata['uid'], str) and not inputdata['uid'].isdigit()): raise exc.InvalidArgumentException('uid must be a number') diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index b764929a..441b2d38 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2016 Lenovo +# Copyright 2016-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,16 +31,24 @@ # this module will provide mac to switch and full 'ifName' label # This functionality is restricted to the null tenant +if __name__ == '__main__': + import sys + import confluent.config.configmanager as cfm import confluent.exceptions as exc import confluent.log as log +import confluent.messages as msg import confluent.snmputil as snmp +import confluent.util as util from eventlet.greenpool import GreenPool +import eventlet +import eventlet.semaphore import re _macmap = {} _macsbyswitch = {} _nodesbymac = {} _switchportmap = {} +vintage = None _whitelistnames = ( @@ -90,7 +98,19 @@ def _namesmatch(switchdesc, userdesc): def _map_switch(args): try: return _map_switch_backend(args) + except UnicodeError: + log.log({'error': "Cannot resolve switch '{0}' to an address".format( + args[0])}) + except exc.TargetEndpointUnreachable: + log.log({'error': "Timeout or bad SNMPv1 community string trying to " + "reach switch '{0}'".format( + args[0])}) + except exc.TargetEndpointBadCredentials: + log.log({'error': "Bad SNMPv3 credentials for \'{0}\'".format( + args[0])}) except Exception as e: + log.log({'error': 'Unexpected condition trying to reach switch "{0}"' + ' check trace log for more'.format(args[0])}) log.logtrace() @@ -120,7 +140,13 @@ def _map_switch_backend(args): # fallback if ifName is empty # global _macmap - switch, password, user = args + if len(args) == 3: + switch, password, user = args + if not user: + user = None + else: + switch, password = args + user = None haveqbridge = False mactobridge = {} conn = snmp.Session(switch, password, user) @@ -135,12 +161,24 @@ def _map_switch_backend(args): ) mactobridge[macaddr] = int(bridgeport) if not haveqbridge: - raise exc.NotImplementedException('TODO: Bridge-MIB without QBRIDGE') + for vb in conn.walk('1.3.6.1.2.1.17.4.3.1.2'): + oid, bridgeport = vb + if not bridgeport: + continue + oid = str(oid).rsplit('.', 6) + macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format( + *([int(x) for x in oid[-6:]]) + ) + mactobridge[macaddr] = int(bridgeport) bridgetoifmap = {} for vb in conn.walk('1.3.6.1.2.1.17.1.4.1.2'): bridgeport, ifidx = vb bridgeport = int(str(bridgeport).rsplit('.', 1)[1]) - bridgetoifmap[bridgeport] = int(ifidx) + try: + bridgetoifmap[bridgeport] = int(ifidx) + except ValueError: + # ifidx might be '', skip in such a case + continue ifnamemap = {} havenames = False for vb in conn.walk('1.3.6.1.2.1.31.1.1.1.1'): @@ -156,17 +194,41 @@ def _map_switch_backend(args): ifidx = int(str(ifidx).rsplit('.', 1)[1]) ifnamemap[ifidx] = str(ifname) maccounts = {} + bridgetoifvalid = False for mac in mactobridge: - ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + try: + ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + bridgetoifvalid = True + except KeyError: + continue if ifname not in maccounts: maccounts[ifname] = 1 else: maccounts[ifname] += 1 + if not bridgetoifvalid: + bridgetoifmap = {} + # Not a single mac address resolved to an interface index, chances are + # that the switch is broken, and the mactobridge is reporting ifidx + # instead of bridge port index + # try again, skipping the bridgetoifmap lookup + for mac in mactobridge: + try: + ifname = ifnamemap[mactobridge[mac]] + bridgetoifmap[mactobridge[mac]] = mactobridge[mac] + except KeyError: + continue + if ifname not in maccounts: + maccounts[ifname] = 1 + else: + maccounts[ifname] += 1 _macsbyswitch[switch] = {} for mac in mactobridge: # We want to merge it so that when a mac appears in multiple # places, it is captured. - ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + try: + ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + except KeyError: + continue if mac in _macmap: _macmap[mac].append((switch, ifname, maccounts[ifname])) else: @@ -178,14 +240,34 @@ def _map_switch_backend(args): nodename = _nodelookup(switch, ifname) if nodename is not None: if mac in _nodesbymac and _nodesbymac[mac] != nodename: - log.log({'warning': '{0} and {1} described by ambiguous' + # For example, listed on both a real edge port + # and by accident a trunk port + log.log({'error': '{0} and {1} described by ambiguous' ' switch topology values'.format(nodename, _nodesbymac[mac] )}) - _nodesbymac[mac] = nodename + _nodesbymac[mac] = None + else: + _nodesbymac[mac] = nodename -def update_macmap(configmanager): +def find_node_by_mac(mac, configmanager): + now = util.monotonic_time() + if vintage and (now - vintage) < 90 and mac in _nodesbymac: + return _nodesbymac[mac] + # do not actually sweep switches more than once every 30 seconds + # however, if there is an update in progress, wait on it + for _ in update_macmap(configmanager, vintage and (now - vintage) < 30): + if mac in _nodesbymac: + return _nodesbymac[mac] + # If update_mac bailed out, still check one last time + return _nodesbymac.get(mac, None) + + +mapupdating = eventlet.semaphore.Semaphore() + + +def update_macmap(configmanager, impatient=False): """Interrogate switches to build/update mac table Begin a rebuild process. This process is a generator that will yield @@ -193,57 +275,205 @@ def update_macmap(configmanager): recheck the cache as results become possible, rather than having to wait for the process to complete to interrogate. """ + if mapupdating.locked(): + while mapupdating.locked(): + eventlet.sleep(1) + yield None + return + if impatient: + return + completions = _full_updatemacmap(configmanager) + for completion in completions: + try: + yield completion + except GeneratorExit: + # the calling function has stopped caring, but we want to finish + # the sweep, background it + eventlet.spawn_n(_finish_update, completions) + raise + +def _finish_update(completions): + for _ in completions: + pass + +def _full_updatemacmap(configmanager): + global vintage global _macmap global _nodesbymac global _switchportmap - # Clear all existing entries - _macmap = {} - _nodesbymac = {} - _switchportmap = {} - if configmanager.tenant is not None: - raise exc.ForbiddenRequest('Network topology not available to tenants') - nodelocations = configmanager.get_node_attributes( - configmanager.list_nodes(), ('hardwaremanagement.switch', - 'hardwaremanagement.switchport')) - switches = set([]) - for node in nodelocations: - cfg = nodelocations[node] - if 'hardwaremanagement.switch' in cfg: - curswitch = cfg['hardwaremanagement.switch']['value'] - switches.add(curswitch) - if 'hardwaremanagement.switchport' in cfg: - portname = cfg['hardwaremanagement.switchport']['value'] - if curswitch not in _switchportmap: - _switchportmap[curswitch] = {} - if portname in _switchportmap[curswitch]: - log.log({'warning': 'Duplicate switch topology config for ' - '{0} and {1}'.format(node, - _switchportmap[ - curswitch][ - portname])}) - _switchportmap[curswitch][portname] = node - switchcfg = configmanager.get_node_attributes( - switches, ('secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassword'), decrypt=True) - switchauth = [] - for switch in switches: - password = 'public' - user = None - if (switch in switchcfg and - 'secret.hardwaremanagementpassword' in switchcfg[switch]): - password = switchcfg[switch]['secret.hardwaremanagementpassword'][ - 'value'] - if 'secret.hardwaremanagementuser' in switchcfg[switch]: - user = switchcfg[switch]['secret.hardwaremanagementuser'][ - 'value'] - switchauth.append((switch, password, user)) - pool = GreenPool() - for res in pool.imap(_map_switch, switchauth): - yield res - print(repr(_macmap)) + global _macsbyswitch + with mapupdating: + vintage = util.monotonic_time() + # Clear all existing entries + _macmap = {} + _nodesbymac = {} + _switchportmap = {} + _macsbyswitch = {} + if configmanager.tenant is not None: + raise exc.ForbiddenRequest( + 'Network topology not available to tenants') + nodelocations = configmanager.get_node_attributes( + configmanager.list_nodes(), ('net*.switch', 'net*.switchport')) + switches = set([]) + for node in nodelocations: + cfg = nodelocations[node] + for attr in cfg: + if not attr.endswith('.switch') or 'value' not in cfg[attr]: + continue + curswitch = cfg[attr].get('value', None) + if not curswitch: + continue + switches.add(curswitch) + switchportattr = attr + 'port' + if switchportattr in cfg: + portname = cfg[switchportattr].get('value', '') + if not portname: + continue + if curswitch not in _switchportmap: + _switchportmap[curswitch] = {} + if portname in _switchportmap[curswitch]: + log.log({'error': 'Duplicate switch topology config ' + 'for {0} and {1}'.format( + node, + _switchportmap[curswitch][ + portname])}) + _switchportmap[curswitch][portname] = None + else: + _switchportmap[curswitch][portname] = node + switchcfg = configmanager.get_node_attributes( + switches, ('secret.hardwaremanagementuser', 'secret.snmpcommunity', + 'secret.hardwaremanagementpassword'), decrypt=True) + switchauth = [] + for switch in switches: + if not switch: + continue + switchparms = switchcfg.get(switch, {}) + user = None + password = switchparms.get( + 'secret.snmpcommunity', {}).get('value', None) + if not password: + password = switchparms.get( + 'secret.hardwaremanagementpassword', {}).get('value', + 'public') + user = switchparms.get( + 'secret.hardwaremanagementuser', {}).get('value', None) + switchauth.append((switch, password, user)) + pool = GreenPool() + for ans in pool.imap(_map_switch, switchauth): + vintage = util.monotonic_time() + yield ans + + +def _dump_locations(info, macaddr, nodename=None): + yield msg.KeyValueData({'possiblenode': nodename, 'mac': macaddr}) + retdata = {} + portinfo = [] + for location in info: + portinfo.append({'switch': location[0], + 'port': location[1], 'macsonport': location[2]}) + retdata['ports'] = sorted(portinfo, key=lambda x: x['macsonport'], + reverse=True) + yield msg.KeyValueData(retdata) + + +def handle_api_request(configmanager, inputdata, operation, pathcomponents): + if operation == 'retrieve': + return handle_read_api_request(pathcomponents) + if (operation in ('update', 'create') and + pathcomponents == ['networking', 'macs', 'rescan']): + if inputdata != {'rescan': 'start'}: + raise exc.InvalidArgumentException() + eventlet.spawn_n(rescan, configmanager) + return [msg.KeyValueData({'rescan': 'started'})] + raise exc.NotImplementedException( + 'Operation {0} on {1} not implemented'.format( + operation, '/'.join(pathcomponents))) + + +def handle_read_api_request(pathcomponents): + # TODO(jjohnson2): discovery core.py api handler design, apply it here + # to make this a less tangled mess as it gets extended + if len(pathcomponents) == 1: + return [msg.ChildCollection('macs/')] + elif len(pathcomponents) == 2: + return [msg.ChildCollection(x) for x in (# 'by-node/', + 'by-mac/', 'by-switch/', + 'rescan')] + if False and pathcomponents[2] == 'by-node': + # TODO: should be list of node names, and then under that 'by-mac' + if len(pathcomponents) == 3: + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(list(_nodesbymac))] + elif len(pathcomponents) == 4: + macaddr = pathcomponents[-1].replace('-', ':') + return dump_macinfo(macaddr) + elif pathcomponents[2] == 'by-mac': + if len(pathcomponents) == 3: + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(list(_macmap))] + elif len(pathcomponents) == 4: + return dump_macinfo(pathcomponents[-1]) + elif pathcomponents[2] == 'by-switch': + if len(pathcomponents) == 3: + return [msg.ChildCollection(x + '/') + for x in sorted(list(_macsbyswitch))] + + if len(pathcomponents) == 4: + return [msg.ChildCollection('by-port/')] + if len(pathcomponents) == 5: + switchname = pathcomponents[-2] + if switchname not in _macsbyswitch: + raise exc.NotFoundException( + 'No known macs for switch {0}'.format(switchname)) + return [msg.ChildCollection(x.replace('/', '-') + '/') + for x in sorted(list(_macsbyswitch[switchname]))] + if len(pathcomponents) == 6: + return [msg.ChildCollection('by-mac/')] + if len(pathcomponents) == 7: + switchname = pathcomponents[-4] + portname = pathcomponents[-2] + try: + if portname not in _macsbyswitch[switchname]: + portname = portname.replace('-', '/') + maclist = _macsbyswitch[switchname][portname] + except KeyError: + raise exc.NotFoundException('No known macs for switch {0} ' + 'port {1}'.format(switchname, + portname)) + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(maclist)] + if len(pathcomponents) == 8: + return dump_macinfo(pathcomponents[-1]) + raise exc.NotFoundException('Unrecognized path {0}'.format( + '/'.join(pathcomponents))) + + +def dump_macinfo(macaddr): + macaddr = macaddr.replace('-', ':') + info = _macmap.get(macaddr, None) + if info is None: + raise exc.NotFoundException( + '{0} not found in mac table of ' + 'any known switches'.format(macaddr)) + return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, None)) + + +def rescan(cfg): + for _ in update_macmap(cfg): + pass if __name__ == '__main__': - # invoke as switch community - import sys - _map_switch(sys.argv[1], sys.argv[2]) + cg = cfm.ConfigManager(None) + for res in update_macmap(cg): + print("map has updated") + if len(sys.argv) > 1: + print(repr(_macmap[sys.argv[1]])) + print(repr(_nodesbymac[sys.argv[1]])) + else: + print("Mac to Node lookup table: -------------------") + print(repr(_nodesbymac)) + print("Mac to location lookup table: -------------------") + print(repr(_macmap)) + print("switch to fdb lookup table: -------------------") + print(repr(_macsbyswitch)) \ No newline at end of file diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 1899e6f2..79a77777 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -170,6 +170,17 @@ class NodeRange(object): def _expandstring(self, element, filternodes=None): prefix = '' + if element[0][0] in ('/', '~'): + element = ''.join(element) + nameexpression = element[1:] + if self.cfm is None: + raise Exception('Verification configmanager required') + return set(self.cfm.filter_nodenames(nameexpression, filternodes)) + elif '=' in element[0] or '!~' in element[0]: + element = ''.join(element) + if self.cfm is None: + raise Exception('Verification configmanager required') + return set(self.cfm.filter_node_attributes(element, filternodes)) for idx in xrange(len(element)): if element[idx][0] == '[': nodes = set([]) @@ -191,19 +202,10 @@ class NodeRange(object): nodes |= NodeRange( grpcfg['noderange']['value'], self.cfm).nodes return nodes - if '-' in element and ':' not in element: - return self.expandrange(element, '-') - elif ':' in element: # : range for less ambiguity + if ':' in element: # : range for less ambiguity return self.expandrange(element, ':') - elif '=' in element or '!~' in element: - if self.cfm is None: - raise Exception('Verification configmanager required') - return set(self.cfm.filter_node_attributes(element, filternodes)) - elif element[0] in ('/', '~'): - nameexpression = element[1:] - if self.cfm is None: - raise Exception('Verification configmanager required') - return set(self.cfm.filter_nodenames(nameexpression, filternodes)) + elif '-' in element: + return self.expandrange(element, '-') elif '+' in element: element, increment = element.split('+') try: diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 10aa3307..11e00f8f 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -28,14 +28,16 @@ def retrieve(nodes, element, configmanager, inputdata): def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): grpcfg = configmanager.get_nodegroup_attributes(nodegroup) if element == 'all': - nodes = [] - if 'nodes' in grpcfg: - nodes = list(grpcfg['nodes']) - yield msg.ListAttributes(kv={'nodes': nodes}, - desc="The nodes belonging to this group") - for attribute in sorted(allattributes.node.iterkeys()): + theattrs = set(allattributes.node).union(set(grpcfg)) + theattrs.add('nodes') + for attribute in sorted(theattrs): if attribute == 'groups': continue + if attribute == 'nodes': + yield msg.ListAttributes( + kv={'nodes': list(grpcfg.get('nodes', []))}, + desc="The nodes belonging to this group") + continue if attribute in grpcfg: val = grpcfg[attribute] else: @@ -45,13 +47,17 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): kv={attribute: val}, desc=allattributes.node[attribute]['description']) elif isinstance(val, list): - raise Exception("TODO") + yield msg.ListAttributes( + kv={attribute: val}, + desc=allattributes.node.get( + attribute, {}).get('description', '')) else: yield msg.Attributes( kv={attribute: val}, - desc=allattributes.node[attribute]['description']) + desc=allattributes.node.get(attribute, {}).get( + 'description', '')) if element == 'current': - for attribute in sorted(grpcfg.iterkeys()): + for attribute in sorted(list(grpcfg)): currattr = grpcfg[attribute] if attribute == 'nodes': desc = 'The nodes belonging to this group' @@ -61,7 +67,7 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): try: desc = allattributes.node[attribute]['description'] except KeyError: - desc = 'Unknown' + desc = '' if 'value' in currattr or 'expression' in currattr: yield msg.Attributes(kv={attribute: currattr}, desc=desc) elif 'cryptvalue' in currattr: @@ -86,7 +92,8 @@ def retrieve_nodes(nodes, element, configmanager, inputdata): attributes = configmanager.get_node_attributes(nodes) if element[-1] == 'all': for node in nodes: - for attribute in sorted(allattributes.node.iterkeys()): + theattrs = set(allattributes.node).union(set(attributes[node])) + for attribute in sorted(theattrs): if attribute in attributes[node]: # have a setting for it val = attributes[node][attribute] elif attribute == 'groups': # no setting, provide a blank @@ -96,23 +103,26 @@ def retrieve_nodes(nodes, element, configmanager, inputdata): if attribute.startswith('secret.'): yield msg.CryptedAttributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) elif isinstance(val, list): yield msg.ListAttributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) else: yield msg.Attributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) elif element[-1] == 'current': - for node in attributes.iterkeys(): + for node in sorted(list(attributes)): for attribute in sorted(attributes[node].iterkeys()): currattr = attributes[node][attribute] try: desc = allattributes.node[attribute]['description'] except KeyError: - desc = 'Unknown' + desc = '' if 'value' in currattr or 'expression' in currattr: yield msg.Attributes(node, {attribute: currattr}, desc) elif 'cryptvalue' in currattr: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index ff956a9a..c5c1d72d 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -36,7 +36,8 @@ console.session.socket.getaddrinfo = eventlet.support.greendns.getaddrinfo def exithandler(): - console.session.iothread.join() + if console.session.iothread is not None: + console.session.iothread.join() atexit.register(exithandler) @@ -52,6 +53,15 @@ sensor_categories = { } +class EmptySensor(object): + def __init__(self, name): + self.name = name + self.value = None + self.states = ['Unavailable'] + self.units = None + self.health = 'ok' + + def hex2bin(hexstring): hexvals = hexstring.split(':') if len(hexvals) < 2: @@ -300,7 +310,6 @@ def perform_requests(operator, nodes, element, cfg, inputdata): pass - def perform_request(operator, node, element, configdata, inputdata, cfg, results): try: @@ -361,7 +370,7 @@ class IpmiHandler(object): ipmisess.wait_for_rsp(180) if not (self.broken or self.loggedin): raise exc.TargetEndpointUnreachable( - "Login process to " + bmc + " died") + "Login process to " + connparams['bmc'] + " died") except socket.gaierror as ge: if ge[0] == -2: raise exc.TargetEndpointUnreachable(ge[1]) @@ -599,29 +608,31 @@ class IpmiHandler(object): self.sensormap[simplify_name(resourcename)] = resourcename def read_sensors(self, sensorname): - try: - if sensorname == 'all': - sensors = self.ipmicmd.get_sensor_descriptions() - readings = [] - for sensor in filter(self.match_sensor, sensors): - try: - reading = self.ipmicmd.get_sensor_reading( - sensor['name']) - except pygexc.IpmiException as ie: - if ie.ipmicode == 203: - continue - raise - if hasattr(reading, 'health'): - reading.health = _str_health(reading.health) - readings.append(reading) - self.output.put(msg.SensorReadings(readings, name=self.node)) - else: - self.make_sensor_map() - if sensorname not in self.sensormap: - self.output.put( - msg.ConfluentTargetNotFound(self.node, - 'Sensor not found')) - return + if sensorname == 'all': + sensors = self.ipmicmd.get_sensor_descriptions() + readings = [] + for sensor in filter(self.match_sensor, sensors): + try: + reading = self.ipmicmd.get_sensor_reading( + sensor['name']) + except pygexc.IpmiException as ie: + if ie.ipmicode == 203: + self.output.put(msg.SensorReadings([EmptySensor( + sensor['name'])], name=self.node)) + continue + raise + if hasattr(reading, 'health'): + reading.health = _str_health(reading.health) + readings.append(reading) + self.output.put(msg.SensorReadings(readings, name=self.node)) + else: + self.make_sensor_map() + if sensorname not in self.sensormap: + self.output.put( + msg.ConfluentTargetNotFound(self.node, + 'Sensor not found')) + return + try: reading = self.ipmicmd.get_sensor_reading( self.sensormap[sensorname]) if hasattr(reading, 'health'): @@ -629,8 +640,13 @@ class IpmiHandler(object): self.output.put( msg.SensorReadings([reading], name=self.node)) - except pygexc.IpmiException: - self.output.put(msg.ConfluentTargetTimeout(self.node)) + except pygexc.IpmiException as ie: + if ie.ipmicode == 203: + self.output.put(msg.ConfluentResourceUnavailable( + self.node, 'Unavailable' + )) + else: + self.output.put(msg.ConfluentTargetTimeout(self.node)) def list_inventory(self): try: diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py index 9b5afd0d..ca467037 100644 --- a/confluent_server/confluent/snmputil.py +++ b/confluent_server/confluent/snmputil.py @@ -24,6 +24,7 @@ import confluent.exceptions as exc import eventlet from eventlet.support.greendns import getaddrinfo +import pysnmp.smi.error as snmperr import socket snmp = eventlet.import_patched('pysnmp.hlapi') @@ -85,14 +86,22 @@ class Session(object): walking = snmp.bulkCmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, lexicographicMode=False) - for rsp in walking: - errstr, errnum, erridx, answers = rsp - if errstr: - raise exc.TargetEndpointUnreachable(str(errstr)) - elif errnum: - raise exc.ConfluentException(errnum.prettyPrint()) - for ans in answers: - yield ans + try: + for rsp in walking: + errstr, errnum, erridx, answers = rsp + if errstr: + errstr = str(errstr) + if errstr in ('unknownUserName', 'wrongDigest'): + raise exc.TargetEndpointBadCredentials(errstr) + # need to do bad credential versus timeout + raise exc.TargetEndpointUnreachable(errstr) + elif errnum: + raise exc.ConfluentException(errnum.prettyPrint()) + for ans in answers: + yield ans + except snmperr.WrongValueError: + raise exc.TargetEndpointBadCredentials('Invalid SNMPv3 password') + if __name__ == '__main__': diff --git a/confluent_server/confluent/sockapi.py b/confluent_server/confluent/sockapi.py index 679c6140..558c7c9b 100644 --- a/confluent_server/confluent/sockapi.py +++ b/confluent_server/confluent/sockapi.py @@ -234,8 +234,16 @@ def start_term(authname, cfm, connection, params, path, authdata, skipauth): consession.reopen() continue else: - process_request(connection, data, cfm, authdata, authname, - skipauth) + try: + process_request(connection, data, cfm, authdata, authname, + skipauth) + except Exception: + tracelog.log(traceback.format_exc(), + ltype=log.DataTypes.event, + event=log.Events.stacktrace) + send_data(connection, {'errorcode': 500, + 'error': 'Unexpected error'}) + send_data(connection, {'_requestdone': 1}) continue if not data: consession.destroy() diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 67d1e0d0..4fd60fbf 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,10 +20,36 @@ import base64 import confluent.exceptions as cexc import confluent.log as log import hashlib +import netifaces import os import struct +def list_interface_indexes(): + # Getting the interface indexes in a portable manner + # would be better, but there's difficulty from a python perspective. + # For now be linux specific + try: + for iface in os.listdir('/sys/class/net/'): + ifile = open('/sys/class/net/{0}/ifindex'.format(iface), 'r') + intidx = int(ifile.read()) + ifile.close() + yield intidx + except (IOError, OSError): + # Probably situation is non-Linux, just do limited support for + # such platforms until other people come alonge + return + + +def list_ips(): + # Used for getting addresses to indicate the multicast address + # as well as getting all the broadcast addresses + for iface in netifaces.interfaces(): + addrs = netifaces.ifaddresses(iface) + if netifaces.AF_INET in addrs: + for addr in addrs[netifaces.AF_INET]: + yield addr + def randomstring(length=20): """Generate a random string of requested length @@ -61,6 +87,23 @@ def monotonic_time(): # for now, just support POSIX systems return os.times()[4] + +def get_fingerprint(certificate, algo='sha512'): + if algo != 'sha512': + raise Exception("TODO: Non-sha512") + return 'sha512$' + hashlib.sha512(certificate).hexdigest() + + +def cert_matches(fingerprint, certificate): + if not fingerprint or not certificate: + return False + algo, _, fp = fingerprint.partition('$') + newfp = None + if algo == 'sha512': + newfp = get_fingerprint(certificate) + return newfp and fingerprint == newfp + + class TLSCertVerifier(object): def __init__(self, configmanager, node, fieldname): self.cfm = configmanager @@ -68,7 +111,7 @@ class TLSCertVerifier(object): self.fieldname = fieldname def verify_cert(self, certificate): - fingerprint = 'sha512$' + hashlib.sha512(certificate).hexdigest() + fingerprint = get_fingerprint(certificate) storedprint = self.cfm.get_node_attributes(self.node, (self.fieldname,) ) if self.fieldname not in storedprint[self.node]: # no stored value, check diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 6a8c7aec..37800cdc 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -12,7 +12,7 @@ Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot Prefix: %{_prefix} BuildArch: noarch -Requires: python-pyghmi, python-eventlet, python-greenlet, python-crypto >= 2.6.1, confluent_client, pyparsing, python-paramiko, python-dns +Requires: python-pyghmi, python-eventlet, python-greenlet, python-crypto >= 2.6.1, confluent_client, pyparsing, python-paramiko, python-dns, python-netifaces, python2-pyasn1, python-pysnmp, python-pyte Vendor: Jarrod Johnson Url: http://xcat.sf.net/ @@ -34,7 +34,8 @@ grep -v confluent/__init__.py INSTALLED_FILES.bare > INSTALLED_FILES cat INSTALLED_FILES %post -if [ -x /usr/bin/systemctl ]; then /usr/bin/systemctl try-restart confluent; fi +if [ -x /usr/bin/systemctl ]; then /usr/bin/systemctl try-restart confluent >& /dev/null; fi +true %clean rm -rf $RPM_BUILD_ROOT diff --git a/confluent_server/dbgtools/processhangtraces.py b/confluent_server/dbgtools/processhangtraces.py index 18df9f33..9e93acfe 100644 --- a/confluent_server/dbgtools/processhangtraces.py +++ b/confluent_server/dbgtools/processhangtraces.py @@ -1,5 +1,18 @@ #!/usr/bin/env python +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys diff --git a/confluent_server/makesetup b/confluent_server/makesetup index 2ee30143..26bb712a 100755 --- a/confluent_server/makesetup +++ b/confluent_server/makesetup @@ -6,3 +6,4 @@ if [ "$NUMCOMMITS" != "$VERSION" ]; then fi echo $VERSION > VERSION sed -e "s/#VERSION#/$VERSION/" setup.py.tmpl > setup.py +echo '__version__ = "'$VERSION'"' > confluent/__init__.py diff --git a/confluent_server/setup.py.tmpl b/confluent_server/setup.py.tmpl index fdbbee84..445e2d08 100644 --- a/confluent_server/setup.py.tmpl +++ b/confluent_server/setup.py.tmpl @@ -9,6 +9,10 @@ setup( url='http://xcat.sf.net/', description='confluent systems management server', packages=['confluent', 'confluent/config', 'confluent/interface', + 'confluent/discovery/', + 'confluent/discovery/protocols/', + 'confluent/discovery/handlers/', + 'confluent/networking/', 'confluent/plugins/hardwaremanagement/', 'confluent/plugins/shell/', 'confluent/plugins/configuration/'], From f4267e6013d460b6e615d8fab12367f09791b62a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 21 Jun 2017 14:02:59 -0400 Subject: [PATCH 05/31] Add missing files from the ThinkSystem merge --- confluent_client/doc/man/buildindex.sh | 1 + confluent_client/doc/man/confluent.ronn | 14 + confluent_client/doc/man/confluentdbutil.ronn | 26 + .../doc/man/nodeattribexpressions.ronn | 64 ++ confluent_client/doc/man/nodeboot.ronn | 34 + confluent_client/doc/man/nodeeventlog.ronn | 33 + confluent_client/doc/man/nodefirmware.ronn | 30 + confluent_client/doc/man/nodegroupattrib.ronn | 42 + confluent_client/doc/man/nodehealth.ronn | 22 + confluent_client/doc/man/nodeidentify.ronn | 31 + confluent_client/doc/man/nodeinventory.ronn | 106 +++ confluent_client/doc/man/nodepower.ronn | 43 + confluent_client/doc/man/noderun.ronn | 53 ++ confluent_client/doc/man/nodesetboot.ronn | 69 ++ confluent_client/doc/man/nodeshell.ronn | 28 + confluent_client/makeman | 8 + confluent_server/confluent/discovery/core.py | 850 ++++++++++++++++++ .../confluent/discovery/handlers/bmc.py | 153 ++++ .../confluent/discovery/handlers/generic.py | 85 ++ .../confluent/discovery/handlers/imm.py | 46 + .../confluent/discovery/handlers/pxe.py | 39 + .../confluent/discovery/handlers/smm.py | 38 + .../confluent/discovery/handlers/xcc.py | 69 ++ .../confluent/discovery/protocols/pxe.py | 118 +++ .../confluent/discovery/protocols/slp.py | 515 +++++++++++ .../confluent/discovery/protocols/ssdp.py | 232 +++++ confluent_server/confluent/neighutil.py | 64 ++ confluent_server/confluent/netutil.py | 124 +++ confluent_server/confluent/networking/lldp.py | 131 +++ confluent_server/dbgtools/confluentdbgcli.py | 58 ++ 30 files changed, 3126 insertions(+) create mode 100644 confluent_client/doc/man/buildindex.sh create mode 100644 confluent_client/doc/man/confluent.ronn create mode 100644 confluent_client/doc/man/confluentdbutil.ronn create mode 100644 confluent_client/doc/man/nodeattribexpressions.ronn create mode 100644 confluent_client/doc/man/nodeboot.ronn create mode 100644 confluent_client/doc/man/nodeeventlog.ronn create mode 100644 confluent_client/doc/man/nodefirmware.ronn create mode 100644 confluent_client/doc/man/nodegroupattrib.ronn create mode 100644 confluent_client/doc/man/nodehealth.ronn create mode 100644 confluent_client/doc/man/nodeidentify.ronn create mode 100644 confluent_client/doc/man/nodeinventory.ronn create mode 100644 confluent_client/doc/man/nodepower.ronn create mode 100644 confluent_client/doc/man/noderun.ronn create mode 100644 confluent_client/doc/man/nodesetboot.ronn create mode 100644 confluent_client/doc/man/nodeshell.ronn create mode 100644 confluent_client/makeman create mode 100644 confluent_server/confluent/discovery/core.py create mode 100644 confluent_server/confluent/discovery/handlers/bmc.py create mode 100644 confluent_server/confluent/discovery/handlers/generic.py create mode 100644 confluent_server/confluent/discovery/handlers/imm.py create mode 100644 confluent_server/confluent/discovery/handlers/pxe.py create mode 100644 confluent_server/confluent/discovery/handlers/smm.py create mode 100644 confluent_server/confluent/discovery/handlers/xcc.py create mode 100644 confluent_server/confluent/discovery/protocols/pxe.py create mode 100644 confluent_server/confluent/discovery/protocols/slp.py create mode 100644 confluent_server/confluent/discovery/protocols/ssdp.py create mode 100644 confluent_server/confluent/neighutil.py create mode 100644 confluent_server/confluent/netutil.py create mode 100644 confluent_server/confluent/networking/lldp.py create mode 100644 confluent_server/dbgtools/confluentdbgcli.py diff --git a/confluent_client/doc/man/buildindex.sh b/confluent_client/doc/man/buildindex.sh new file mode 100644 index 00000000..7476716a --- /dev/null +++ b/confluent_client/doc/man/buildindex.sh @@ -0,0 +1 @@ +for i in *.ronn; do echo -n `head -n 1 $i|awk '{print $1}'`; echo " $i"; done > index.txt diff --git a/confluent_client/doc/man/confluent.ronn b/confluent_client/doc/man/confluent.ronn new file mode 100644 index 00000000..633cef86 --- /dev/null +++ b/confluent_client/doc/man/confluent.ronn @@ -0,0 +1,14 @@ +confluent(8) -- Start the confluent server +========================================================= + +## SYNOPSIS + +`confluent` + +## DESCRIPTION + +**confluent** is the name of the server daemon. It is normally run +through the init subsystem rather than executed directly. All confluent +commands connect to confluent daemon. It provides the web interface, debug, +and unix socket connectivity. + diff --git a/confluent_client/doc/man/confluentdbutil.ronn b/confluent_client/doc/man/confluentdbutil.ronn new file mode 100644 index 00000000..e7dc9d39 --- /dev/null +++ b/confluent_client/doc/man/confluentdbutil.ronn @@ -0,0 +1,26 @@ +confluentdbutil(8) -- Backup or restore confluent database +========================================================= + +## SYNOPSIS + + +`confluentdbutil [options] ` + +## DESCRIPTION + +**confluentdbutil** is a utility to export/import the confluent attributes +to/from json files. The path is a directory that holds the json version. +In order to perform restore, the confluent service must not be running. It +is required to indicate how to treat the usernames/passwords are treated in +the json files (password protected, removed from the files, or unprotected). + +## OPTIONS + +* `-p`, `--password`: + If specified, information such as usernames and passwords will be encrypted + using the given password. +* `-r`, `--redact`: + Indicates to replace usernames and passwords with a dummy string rather + than included. +* `-u`, `--unprotected`: + The keys.json file will include the encryption keys without any protection. \ No newline at end of file diff --git a/confluent_client/doc/man/nodeattribexpressions.ronn b/confluent_client/doc/man/nodeattribexpressions.ronn new file mode 100644 index 00000000..61277bad --- /dev/null +++ b/confluent_client/doc/man/nodeattribexpressions.ronn @@ -0,0 +1,64 @@ +nodeattribexpressions(5) -- Confluent attribute expression syntax +================================================================= + +## DESCRIPTION + +In confluent, any attribute may either be a straightforward value, or an +expression to generate the value. + +An expression will contain some directives wrapped in `{}` characters. Within +`{}` are a number of potential substitute values and operations. + +The most common operation is to extract a number from the nodename. These +values are available as n1, n2, etc. So for example attributes for a node named +b1o2r3u4 would have {n1} as 1, {n2} as 2, {n3} as 3, and {n4} as 4. +Additionally, {n0} is special as representing the last number in a name, so in +the b1o2r3u4 example, {n0} would be 4. + +Frequently a value derives from a number in the node name, but must undergo a +transform to be useful. As an example, if we have a scheme where nodes are +numbered n1-n512, and they are arranged 1-42 in rack1, 43-84 in rack2, and so +forth, it is convenient to perform arithmetic on the extracted number. Here is +an example of codifying the above scheme, and setting the u to the remainder: + +`location.rack=rack{(n1-1)/42+1}` +`location.u={(n1-1)%42+1}` + +Note how text may be mixed into expressions, only data within {} will receive +special treatment. Here we also had to adjust by subtracting 1 and adding it +back to make the math work as expected. + +It is sometimes the case that the number must be formatted a different way, +either specifying 0 padding or converting to hexadecimal. This can be done by a +number of operators at the end to indicate formatting changes. + +`{n1:02x} - Zero pad to two decimal places, and convert to hexadecimal, as mightbe used for generating MAC addresses` +`{n1:x} - Hexadecimal without padding, as may be used in a generated IPv6 address` +`{n1:X} - Uppercase hexadecimal` +`{n1:02d} - Zero pad a normal numeric representation of the number.` + +Another common element to pull into an expression is the node name in whole: + +`hardwaremanagement.manager={nodename}-imm` + +Additionally other attributes may be pulled in: + +`hardwaremanagement.switchport={location.u}` + +Multiple expressions are permissible within a single attribute: + +`hardwaremanagement.manager={nodename}-{hardwaremanagement.method}` + +A note to developers: in general the API layer will automatically recognize a +generic set attribute to string with expression syntax and import it as an +expression. For example, submitting the following JSON: + +`{ 'location.rack': '{n1}' }` + +Will auto-detect {n1} as an expression and assign it normally. If wanting to +set that value verbatim, it can either be escaped by doubling the {} or by +explicitly declaring it as a value: + +`{ 'location.rack': '{{n1}}' }` + +`{ 'location.rack': { 'value': '{n1}' } }` diff --git a/confluent_client/doc/man/nodeboot.ronn b/confluent_client/doc/man/nodeboot.ronn new file mode 100644 index 00000000..9b067556 --- /dev/null +++ b/confluent_client/doc/man/nodeboot.ronn @@ -0,0 +1,34 @@ +nodeboot(8) -- Reboot a confluent node to a specific device +========================================================= + +## SYNOPSIS + +`nodeboot ` +`nodeboot ` [net|setup] + +## DESCRIPTION + +**nodeboot** reboots nodes in a noderange. If an additional argument is given, +it sets the node to specifically boot to that as the next boot. + +## EXAMPLES +* Booting n3 and n4 to the default boot behavior: + `# nodeboot n3-n4` + `n3: default` + `n4: default` + `n3: on->reset` + `n4: on->reset` + +* Booting n1 and n2 to setup menu: + `# nodeboot n1-n2 setup` + `n2: setup` + `n1: setup` + `n2: on->reset` + `n1: on->reset` + +* Booting n3 and n4 to network: + `# nodeboot n3-n4 net` + `n3: network` + `n4: network` + `n4: on->reset` + `n3: off->on` diff --git a/confluent_client/doc/man/nodeeventlog.ronn b/confluent_client/doc/man/nodeeventlog.ronn new file mode 100644 index 00000000..69e78480 --- /dev/null +++ b/confluent_client/doc/man/nodeeventlog.ronn @@ -0,0 +1,33 @@ +nodeeventlog(8) -- Pull eventlog from confluent nodes +============================================================ + +## SYNOPSIS + +`nodeeventlog ` +`nodeeventlog [clear]` + +## DESCRIPTION + +`nodeeventlog` pulls and optionally clears the event log from the requested +noderange. + +## EXAMPLES +* Pull the event log from n2 and n3: + `# nodeeventlog n2,n3` + `n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` + `n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` + `n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` + +* Pull and clear the event log from n2 and n3: +`# nodeeventlog n2,n3 clear` +`n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` +`n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` +`n2: 05/03/2017 11:48:29 System Firmware - Progress - Starting OS boot` +`n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` +`n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` +`# nodeeventlog n2,n3` +`n2: 05/03/2017 11:48:48 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:48:52 Event Log Disabled - SEL Fullness - Log clear` diff --git a/confluent_client/doc/man/nodefirmware.ronn b/confluent_client/doc/man/nodefirmware.ronn new file mode 100644 index 00000000..4aa7bf83 --- /dev/null +++ b/confluent_client/doc/man/nodefirmware.ronn @@ -0,0 +1,30 @@ +nodefirmware(8) -- Report firmware information on confluent nodes +================================================================= + +## SYNOPSIS + +`nodefirmware ` + +## DESCRIPTION + +`nodefirmware` reports various pieces of firmware on confluent nodes. + +## EXAMPLES + +* Pull firmware from a node: +`# nodefirmware r1` +`r1: IMM: 3.70 (TCOO26H 2016-11-29T05:09:51)` +`r1: IMM Backup: 1.71 (TCOO10D 2015-04-17T00:00:00)` +`r1: IMM Trusted Image: TCOO26H` +`r1: UEFI: 2.31 (TCE128I 2016-12-13T00:00:00)` +`r1: UEFI Backup: 2.20 (TCE126O)` +`r1: FPGA: 3.2.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Bootcode: 1.38` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller MBA: 16.8.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Firmware Package: 0.0.0a` +`r1: ServeRAID M1215 MegaRAID Controller Firmware: 24.12.0-0038 (2016-10-20T00:00:00)` +`r1: ServeRAID M1215 Disk 28 MBF2600RC: SB2C` +`r1: ServeRAID M1215 Disk 29 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 0 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 1 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 2 MBF2600RC: SB2C` diff --git a/confluent_client/doc/man/nodegroupattrib.ronn b/confluent_client/doc/man/nodegroupattrib.ronn new file mode 100644 index 00000000..57ada27c --- /dev/null +++ b/confluent_client/doc/man/nodegroupattrib.ronn @@ -0,0 +1,42 @@ +nodegroupattrib(8) -- List or change confluent nodegroup attributes +=================================================================== + +## SYNOPSIS + +`nodegroupattrib [ current | all ]` +`nodegroupattrib [...]` +`nodegroupattrib [ ...]` +`nodegroupattrib [-c] [ ...]` + +## DESCRIPTION + +`nodegroupattrip` queries the confluent server to get information about nodes. +In the simplest form, it simply takes the given group and lists the attributes of that group. + +Contrasted with nodeattrib(8), settings managed by nodegroupattrib will be added +and removed from a node as it is added or removed from a group. If an attribute +is set using nodeattrib(8) against a noderange(5) that happens to be a group name, +nodeattrib(8) individually sets attributes directly on each individual node that is +currently a member of that group. Removing group membership or adding a new +node after using the nodeattrib(8) command will not have attributes change automatically. +It's easiest to see by using the `nodeattrib -b` to understand how +the attributes are set on the node versus a group to which a node belongs. + +## OPTIONS + +* `-c`, `--clear`: + Clear specified nodeattributes. + +## EXAMPLES + +* Show attributes of a group called `demogrp`: + `# nodegroupattrib demogrp` + `demogrp: hardwaremanagement.manager: (will derive from expression 10.30.{n0/255}.{n0%255})` + `demogrp: nodes: n12,n13,n10,n11,n9,n1,n2,n3,n4` + +* Set location.u to be the remainder of first number in node name when divided by 42: + `# nodegroupattrib demogrp location.u={n1%42}` + +## SEE ALSO + +nodeattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodehealth.ronn b/confluent_client/doc/man/nodehealth.ronn new file mode 100644 index 00000000..b8baa722 --- /dev/null +++ b/confluent_client/doc/man/nodehealth.ronn @@ -0,0 +1,22 @@ +nodehealth(8) -- Show health summary of confluent nodes +======================================================== + +## SYNOPSIS + +`nodehealth ` + +## DESCRIPTION + +`nodehealth` reports the current health assessment of a confluent node. It +will report either `ok`, `warning`, `critical`, or `failed`, along with +a string explaining the reason for any result other than `ok`. + +## EXAMPLES + +* Pull health summary of 5 nodes: + `# nodehealth n1-n4,r1` + `n1: critical (Mezz Exp 2 Fault:Critical)` + `n3: ok` + `n2: ok` + `r1: ok` + `n4: ok` diff --git a/confluent_client/doc/man/nodeidentify.ronn b/confluent_client/doc/man/nodeidentify.ronn new file mode 100644 index 00000000..3a9cbe69 --- /dev/null +++ b/confluent_client/doc/man/nodeidentify.ronn @@ -0,0 +1,31 @@ +nodeidentify(8) -- Control the identify LED of confluent nodes +========================================================= + +## SYNOPSIS + +`nodidentify [on|off]` + +## DESCRIPTION + +`nodeidentify` allows you to turn on or off the location LED of conflueunt nodes, +making it easier to determine the physical location of the nodes. The following +options are supported: + +* `on`: Turn on the identify LED +* `off`: Turn off the identify LED + +## EXAMPLES: + +* Turn on the identify LED on nodes n1 through n4: + `# nodeidentify n1-n4 on` + `n1: on` + `n2: on` + `n3: on` + `n4: on` + +* Turn off the identify LED on nodes n1 thorugh n4: + `# nodeidentify n1-n4 off` + `n1: off` + `n2: off` + `n4: off` + `n3: off` diff --git a/confluent_client/doc/man/nodeinventory.ronn b/confluent_client/doc/man/nodeinventory.ronn new file mode 100644 index 00000000..802b1c87 --- /dev/null +++ b/confluent_client/doc/man/nodeinventory.ronn @@ -0,0 +1,106 @@ +nodeinventory(8) -- Get hardware inventory of confluent node +=============================================================== + +## SYNOPSIS + +`nodeinventory ` + +## DESCRIPTION + +`nodeinventory` pulls information about hardware of a node. This includes +information such as adapters, serial numbers, processors, and memory modules, +as supported by the platforms hardware management implementation. + +## EXAMPLES + +* Pulling inventory of a node named r1: + `# nodeinventory r1` + `r1: System MAC Address 1: 40:f2:e9:af:45:a0` + `r1: System MAC Address 2: 40:f2:e9:af:45:a1` + `r1: System MAC Address 3: 40:f2:e9:af:45:a2` + `r1: System MAC Address 4: 40:f2:e9:af:45:a3` + `r1: System Board manufacturer: IBM` + `r1: System Product name: System x3650 M5` + `r1: System Device ID: 32` + `r1: System Revision: 9` + `r1: System Product ID: 323` + `r1: System Board model: 00KG915` + `r1: System Device Revision: 0` + `r1: System Serial Number: E2K4831` + `r1: System Board manufacture date: 2014-10-20T12:00` + `r1: System Board serial number: Y010UF4AL0B5` + `r1: System Manufacturer: IBM` + `r1: System FRU Number: 00FK639` + `r1: System Board product name: System Board` + `r1: System Model: 5462AC1` + `r1: System UUID: 1B29CE46-765E-31A3-A3B9-B5FB934F15AB` + `r1: System Hardware Version: 0x0000` + `r1: System Manufacturer ID: 20301` + `r1: System Chassis serial number: E2K4831` + `r1: System Asset Number: ` + `r1: System Chassis type: Other` + `r1: Power Supply 1 Board model: 94Y8136` + `r1: Power Supply 1 Board manufacturer: EMER` + `r1: Power Supply 1 FRU Number: 94Y8137` + `r1: Power Supply 1 Board product name: IBM Designed Device` + `r1: Power Supply 1 Board manufacture date: 2014-11-08T00:00` + `r1: Power Supply 1 Board serial number: K13814B88ED` + `r1: Power Supply 1 Revision: 49` + `r1: Power Supply 2: Not Present` + `r1: DASD Backplane 1 Board model: 00JY139` + `r1: DASD Backplane 1 Board manufacturer: WIST` + `r1: DASD Backplane 1 FRU Number: 00FJ756` + `r1: DASD Backplane 1 Board product name: IBM Designed Device` + `r1: DASD Backplane 1 Board manufacture date: 2014-08-28T00:00` + `r1: DASD Backplane 1 Board serial number: Y011UF48W02U` + `r1: DASD Backplane 1 Revision: 0` + `r1: DASD Backplane 2: Not Present` + `r1: DASD Backplane 3: Not Present` + `r1: DASD Backplane 4: Not Present` + `r1: DASD Backplane 5 Board model: 00YJ530` + `r1: DASD Backplane 5 Board manufacturer: WIST` + `r1: DASD Backplane 5 FRU Number: 00AL953` + `r1: DASD Backplane 5 Board product name: IBM Designed Device` + `r1: DASD Backplane 5 Board manufacture date: 2016-02-04T00:00` + `r1: DASD Backplane 5 Board serial number: Y010UF624024` + `r1: DASD Backplane 5 Revision: 0` + `r1: DASD Backplane 6: Not Present` + `r1: CPU 1 Hardware Version: Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz` + `r1: CPU 1 Asset Number: Unknown` + `r1: CPU 1 Manufacturer: Intel(R) Corporation` + `r1: CPU 2: Not Present` + `r1: ML2 Card: Not Present` + `r1: DIMM 1: Not Present` + `r1: DIMM 2: Not Present` + `r1: DIMM 3: Not Present` + `r1: DIMM 4: Not Present` + `r1: DIMM 5: Not Present` + `r1: DIMM 6: Not Present` + `r1: DIMM 7: Not Present` + `r1: DIMM 8: Not Present` + `r1: DIMM 9: Not Present` + `r1: DIMM 10: Not Present` + `r1: DIMM 11: Not Present` + `r1: DIMM 12: Not Present` + `r1: DIMM 13: Not Present` + `r1: DIMM 14: Not Present` + `r1: DIMM 15: Not Present` + `r1: DIMM 16: Not Present` + `r1: DIMM 17: Not Present` + `r1: DIMM 18: Not Present` + `r1: DIMM 19: Not Present` + `r1: DIMM 20: Not Present` + `r1: DIMM 21: Not Present` + `r1: DIMM 22: Not Present` + `r1: DIMM 23: Not Present` + `r1: DIMM 24: Not Present` + `r1: X8 PCI 1: Not Present` + `r1: X8 PCI 2: Not Present` + `r1: X8 PCI 6: Not Present` + `r1: X8 PCI 7: Not Present` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 1: 40:f2:e9:af:45:a0` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 2: 40:f2:e9:af:45:a1` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 3: 40:f2:e9:af:45:a2` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 4: 40:f2:e9:af:45:a3` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller PCI slot: 1b:00` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller location: Onboard` diff --git a/confluent_client/doc/man/nodepower.ronn b/confluent_client/doc/man/nodepower.ronn new file mode 100644 index 00000000..8249c95b --- /dev/null +++ b/confluent_client/doc/man/nodepower.ronn @@ -0,0 +1,43 @@ +nodepower(8) -- Check or change power state of confluent nodes +========================================================= + +## SYNOPSIS + +`nodepower ` +`nodepower [on|off|boot|shutdown|reset|status]` + +## DESCRIPTION + +**nodepower** with only a noderange will retrieve current power state of nodes +through confluent. When given an additional argument, it will request a change +to the power state of the nodes. The following arguments are recognized: + +* `on`: Turn on the specified noderange. Nothing will happen to nodes of +the noderange that are already on. +* `off`: Immediately turn off the specified noderange, without waiting for OS +to shutdown. Nothing will happen to nodes of the noderange that are already on. +* `boot`: Immediately boot a system. This will power on nodes of the noderange +that are off, and reset nodes of the noderange that are on. The previous state +will be reflected in the output. +* `shutdown`: Request the OS gracefully shut down. Nothing will happen for +nodes that are off, and nodes will not shutdown if the OS fails to gracefully +respond. +* `reset`: Request immediate reset of nodes of the noderange. Nodes that are +off will not react to this request. +* `status`: Behave identically to having no argument passed at all. + +## EXAMPLES +* Get power state of nodes n1 through n4: + `# nodepower n1-n4` + `n1: on` + `n2: on` + `n3: on` + `n4: off` + + +* Forcing a reboot of nodes n1-n4: + `# nodepower n1-n4 boot` + `n3: on->reset` + `n1: on->reset` + `n2: on->reset` + `n4: off->on` diff --git a/confluent_client/doc/man/noderun.ronn b/confluent_client/doc/man/noderun.ronn new file mode 100644 index 00000000..626bb685 --- /dev/null +++ b/confluent_client/doc/man/noderun.ronn @@ -0,0 +1,53 @@ +noderun(8) -- Run arbitrary commands per node in a noderange +============================================================= + +## SYNOPSIS + +`noderun ` + +## DESCRIPTION + +`noderun` will take a given command and execute it in parallel once per node +in the specified noderange. Attribute expressions as documented in +nodeattribexpressions(5) are expanded prior to execution of the command. For +noderun, the commands are locally executed. To execute commands on the nodes +themselves, see nodeshell(8). + +## EXAMPLES + +* Run ping against nodes n1 through n4: + `# noderun n1-n4 ping -c 1 {nodename}` + `n3: PING n3 (172.30.2.3) 56(84) bytes of data.` + `n3: 64 bytes from n3 (172.30.2.3): icmp_seq=1 ttl=64 time=0.387 ms` + `n3: ` + `n3: --- n3 ping statistics ---` + `n3: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n3: rtt min/avg/max/mdev = 0.387/0.387/0.387/0.000 ms` + `n4: PING n4 (172.30.2.4) 56(84) bytes of data.` + `n4: 64 bytes from n4 (172.30.2.4): icmp_seq=1 ttl=64 time=0.325 ms` + `n4: ` + `n4: --- n4 ping statistics ---` + `n4: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n4: rtt min/avg/max/mdev = 0.325/0.325/0.325/0.000 ms` + `n2: PING n2 (172.30.2.2) 56(84) bytes of data.` + `n2: From odin (172.30.0.6) icmp_seq=1 Destination Host Unreachable` + `n2: ` + `n2: --- n2 ping statistics ---` + `n2: 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 3000ms` + `n2: ` + `n1: PING n1 (172.30.2.1) 56(84) bytes of data.` + `n1: ` + `n1: --- n1 ping statistics ---` + `n1: 1 packets transmitted, 0 received, 100% packet loss, time 10000ms` + `n1: ` + +* Run an ipmitool raw command against the management controllers of n1 through n4: + `# noderun n1-n4 ipmitool -I lanplus -U USERID -E -H {hardwaremanagement.manager} raw 0 1` + `n3: 01 10 00` + `n1: 01 10 00` + `n4: 01 10 00` + `n2: 01 10 00` + +## SEE ALSO + +nodeshell(8) diff --git a/confluent_client/doc/man/nodesetboot.ronn b/confluent_client/doc/man/nodesetboot.ronn new file mode 100644 index 00000000..349a9a47 --- /dev/null +++ b/confluent_client/doc/man/nodesetboot.ronn @@ -0,0 +1,69 @@ +nodesetboot(8) -- Check or set next boot device for noderange +==================================================== + +## SYNOPSIS + +`nodesetboot ` +`nodesetboot [options] [default|cd|network|setup|hd]` + +## DESCRIPTION + +Requests that the next boot occur from the specified device. Unless otherwise +specified, this is a one time boot option, and does not change the normal boot +behavior of the system. This is useful for taking a system that normally boots +to the hard drive and startking a network install, or to go into the firmware +setup menu without having to hit a keystroke at the correct time on the console. + +Generally, it's a bit more convenient and direct to use the nodeboot(8) command, +which will follow up the boot device with an immediate power directive to take +effect. The `nodesetboot` command is still useful, particularly if you want +to use `nodesetboot setup` and then initiate a reboot from within +the operating system with ssh or similar rather than using the remote hardware +control. + +## OPTIONS + +* `-b`, `--bios`: + For a system that supports both BIOS and UEFI style boot, request BIOS style + boot if supported (some platforms will UEFI boot with this flag anyway). + +* `-p`, `--persist`: + For a system that supports it, mark the boot override to persist rather than + be a one time change. Many systems do not support this functionality. + +* `default`: + Request a normal default boot with no particular device override + +* `cd`: + Request boot from media. Note that this can include physical CD, + remote media mounted as CD/DVD, and detachable hard disks drives such as usb + key devices. + +* `network`: + Request boot to network + +* `setup`: + Request to enter the firmware configuration menu (e.g. F1 setup) on next boot. + +* `hd`: + Boot straight to hard disk drive + +## EXAMPLES + +* Set next boot to setup for four nodes: + `# nodesetboot n1-n4 setup` + `n1: setup` + `n3: setup` + `n2: setup` + `n4: setup` + +* Check boot override settings on four nodes: + `# nodesetboot n1-n4` + `n1: setup` + `n2: setup` + `n3: setup` + `n4: setup` + +## SEE ALSO + +nodeboot(8) diff --git a/confluent_client/doc/man/nodeshell.ronn b/confluent_client/doc/man/nodeshell.ronn new file mode 100644 index 00000000..7dd8c5ed --- /dev/null +++ b/confluent_client/doc/man/nodeshell.ronn @@ -0,0 +1,28 @@ +nodeshell(8) -- Execute command on many nodes in a noderange through ssh +========================================================================= + +## SYNOPSIS + +`nodeshell ` + +## DESCRIPTION + +Allows execution of a command on many nodes in parallel. Like noderun(8), it +accepts and interpolates confluent attribute expressions as documented in +nodeattribexpressions(5). + +## EXAMPLES + +* Running `echo hi` on for nodes: + `# nodeshell n1-n4 echo hi` + `n1: hi` + `n2: hi` + `n3: hi` + `n4: hi` + +* Setting a new static ip address temporarily on secondary interface of four nodes: + `# nodeshell n1-n4 ifconfig eth1 172.30.93.{n1}` + +## SEE ALSO + +noderun(8) diff --git a/confluent_client/makeman b/confluent_client/makeman new file mode 100644 index 00000000..feb804a0 --- /dev/null +++ b/confluent_client/makeman @@ -0,0 +1,8 @@ +#!/bin/sh +cd `dirname $0`/doc/man +mkdir -p ../../man/man5 +mkdir -p ../../man/man8 +ronn -r *.ronn +mv *.5 ../../man/man5/ +mv *.8 ../../man/man8/ + diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py new file mode 100644 index 00000000..11e25666 --- /dev/null +++ b/confluent_server/confluent/discovery/core.py @@ -0,0 +1,850 @@ +# Copyright 2016-2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This manages the detection and auto-configuration of nodes. +# Discovery sources may implement scans and may be passive or may provide +# both. + +# The phases and actions: +# - Detect - Notice the existance of a potentially supported target +# - Potentially apply a secure replacement for default credential +# (perhaps using some key identifier combined with some string +# denoting temporary use, and use confluent master integrity key +# to generate a password in a formulaic way?) +# - Do some universal reconfiguration if applicable (e.g. if something is +# part of an enclosure with an optionally enabled enclosure manager, +# check and request enclosure manager enablement +# - Throughout all of this, at this phase no sensitive data is divulged, +# only using credentials that are factory default or equivalent to +# factory default +# - Request transition to Locate +# - Locate - Use available cues to ascertain the physical location. This may +# be mac address lookup through switch or correlated by a server +# enclosure manager. If the location data suggests a node identity, +# then proceed to the 'verify' state +# - Verify - Given the current information and candidate upstream verifier, +# verify the authenticity of the servers claim in an automated way +# if possible. A few things may happen at this juncture +# - Verification outright fails (confirmed negative response) +# - Audit log entry created, element is not *allowed* to +# proceed +# - Verification not possible (neither good or bad) +# - If security policy is set to low, proceed to 'Manage' +# - Otherwise, log the detection event and stop (user +# would then manually bless the endpoint if applicable +# - Verification succeeds +# - If security policy is set to strict (or manual, whichever +# word works best, note the successfull verification, but +# do not manage +# - Otherwise, proceed to 'Manage' +# -Pre-configure - Given data up to this point, try to do some pre-config. +# For example, if located and X, then check for S, enable S +# This happens regardless of verify, as verify may depend on +# S +# - Manage +# - Create the node if autonode (Deferred) +# - If there is not a defined ip address, collect the current LLA and use +# that value. +# - If no username/password defined, generate a unique password, 20 bytes +# long, written to pass most complexity rules (15 random bytes, base64, +# retry until uppercase, lowercase, digit, and symbol all present) +# - Apply defined configuration to endpoint + +import confluent.config.configmanager as cfm +import confluent.discovery.protocols.pxe as pxe +#import confluent.discovery.protocols.ssdp as ssdp +import confluent.discovery.protocols.slp as slp +import confluent.discovery.handlers.imm as imm +import confluent.discovery.handlers.pxe as pxeh +import confluent.discovery.handlers.smm as smm +import confluent.discovery.handlers.xcc as xcc +import confluent.exceptions as exc +import confluent.log as log +import confluent.messages as msg +import confluent.networking.macmap as macmap +import confluent.noderange as noderange +import confluent.util as util +import traceback + +import eventlet +import eventlet.semaphore + +class nesteddict(dict): + + def __missing__(self, key): + v = self[key] = nesteddict() + return v + +nodehandlers = { + 'service:lenovo-smm': smm, + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': xcc, + 'service:management-hardware.IBM:integrated-management-module2': imm, + 'pxe-client': pxeh, +} + +servicenames = { + 'pxe-client': 'pxe-client', + 'service:lenovo-smm': 'lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': 'lenovo-xcc', + 'service:management-hardware.IBM:integrated-management-module2': 'lenovo-imm2', +} + +servicebyname = { + 'pxe-client': 'pxe-client', + 'lenovo-smm': 'service:lenovo-smm', + 'lenovo-xcc': 'service:management-hardware.Lenovo:lenovo-xclarity-controller', + 'lenovo-imm2': 'service:management-hardware.IBM:integrated-management-module2', +} +# Passive-only auto-detection protocols: +# PXE + +# Both passive and active +# SLP (passive mode listens for SLP DA and unicast interrogation of the system) +# mDNS +# SSD + +# Also there are location providers +# Switch +# chassis +# chassis may in turn describe more chassis + +# We normalize discovered node data to the following pieces of information: +# * Detected node name (if available, from switch discovery or similar or +# auto generated node name. +# * Model number +# * Model name +# * Serial number +# * System UUID (in x86 space, specifically whichever UUID would be in DMI) +# * Network interfaces and addresses +# * Switch connectivity information +# * enclosure information +# * Management TLS fingerprint if validated (switch publication or enclosure) +# * System TLS fingerprint if validated (switch publication or system manager) + + +#TODO: by serial, by uuid, by node +known_info = {} +known_services = {} +known_serials = {} +known_nodes = nesteddict() +unknown_info = {} +pending_nodes = {} + + +def send_discovery_datum(info): + addresses = info.get('addresses', []) + yield msg.KeyValueData({'nodename': info.get('nodename', '')}) + yield msg.KeyValueData({'ipaddrs': [x[0] for x in addresses]}) + yield msg.KeyValueData({'serialnumber': info.get('serialnumber', '')}) + yield msg.KeyValueData({'modelnumber': info.get('modelnumber', '')}) + yield msg.KeyValueData({'macs': [info.get('hwaddr', '')]}) + types = [] + for infotype in info.get('services', []): + if infotype in servicenames: + types.append(servicenames[infotype]) + yield msg.KeyValueData({'types': types}) + + +def _info_matches(info, criteria): + model = criteria.get('by-model', None) + devtype = criteria.get('by-type', None) + node = criteria.get('by-node', None) + serial = criteria.get('by-serial', None) + status = criteria.get('by-state', None) + if model and info.get('modelnumber', None) != model: + return False + if devtype and devtype not in info.get('services', []): + return False + if node and info.get('nodename', None) != node: + return False + if serial and info.get('serialnumber', None) != serial: + return False + if status and info.get('discostatus', None) != status: + return False + return True + + +def list_matching_nodes(criteria): + retnodes = [] + for node in known_nodes: + for mac in known_nodes[node]: + info = known_info[mac] + if _info_matches(info, criteria): + retnodes.append(node) + break + retnodes.sort(key=noderange.humanify_nodename) + return [msg.ChildCollection(node + '/') for node in retnodes] + + +def list_matching_serials(criteria): + for serial in sorted(list(known_serials)): + info = known_serials[serial] + if _info_matches(info, criteria): + yield msg.ChildCollection(serial + '/') + + +def list_matching_states(criteria): + return [msg.ChildCollection(x) for x in ('discovered/', 'identified/', + 'unidentified/')] + +def list_matching_macs(criteria): + for mac in sorted(list(known_info)): + info = known_info[mac] + if _info_matches(info, criteria): + yield msg.ChildCollection(mac.replace(':', '-')) + + +def list_matching_types(criteria): + rettypes = [] + for infotype in known_services: + typename = servicenames[infotype] + if ('by-model' not in criteria or + criteria['by-model'] in known_services[infotype]): + rettypes.append(typename) + return [msg.ChildCollection(typename + '/') + for typename in sorted(rettypes)] + + +def list_matching_models(criteria): + for model in sorted(list(detected_models())): + if ('by-type' not in criteria or + model in known_services[criteria['by-type']]): + yield msg.ChildCollection(model + '/') + + +def show_info(mac): + mac = mac.replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException(mac + ' not a known mac address') + for i in send_discovery_datum(known_info[mac]): + yield i + + +list_info = { + 'by-node': list_matching_nodes, + 'by-serial': list_matching_serials, + 'by-type': list_matching_types, + 'by-model': list_matching_models, + 'by-mac': list_matching_macs, + 'by-state': list_matching_states, +} + +multi_selectors = set([ + 'by-type', + 'by-model', + 'by-state', +]) + + +node_selectors = set([ + 'by-node', + #'by-uuid', + 'by-serial', +]) + + +single_selectors = set([ + 'by-mac', +]) + + +def _parameterize_path(pathcomponents): + listrequested = False + childcoll = True + if len(pathcomponents) % 2 == 1: + listrequested = pathcomponents[-1] + pathcomponents = pathcomponents[:-1] + pathit = iter(pathcomponents) + keyparams = {} + validselectors = multi_selectors | node_selectors | single_selectors + for key, val in zip(pathit, pathit): + if key not in validselectors: + raise exc.NotFoundException('{0} is not valid here'.format(key)) + if key == 'by-type': + keyparams[key] = servicebyname.get(val, None) + else: + keyparams[key] = val + validselectors.discard(key) + if key in single_selectors: + childcoll = False + validselectors = set([]) + elif key in node_selectors: + validselectors = single_selectors | set([]) + return validselectors, keyparams, listrequested, childcoll + + +def handle_api_request(configmanager, inputdata, operation, pathcomponents): + if operation == 'retrieve': + return handle_read_api_request(pathcomponents) + elif (operation in ('update', 'create') and + pathcomponents == ['discovery', 'rescan']): + if inputdata != {'rescan': 'start'}: + raise exc.InvalidArgumentException() + rescan() + return (msg.KeyValueData({'rescan': 'started'}),) + elif (operation in ('update', 'create')): + if 'node' not in inputdata: + raise exc.InvalidArgumentException('Missing node name in input') + _, queryparms, _, _ = _parameterize_path(pathcomponents[1:]) + if 'by-mac' not in queryparms: + raise exc.InvalidArgumentException('Must target using "by-mac"') + mac = queryparms['by-mac'].replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException('{0} not found'.format(mac)) + info = known_info[mac] + handler = info['handler'].NodeHandler(info, configmanager) + eval_node(configmanager, handler, info, inputdata['node'], + manual=True) + return [msg.AssignedResource(inputdata['node'])] + raise exc.NotImplementedException( + 'Unable to {0} to {1}'.format(operation, '/'.join(pathcomponents))) + + +def handle_read_api_request(pathcomponents): + # TODO(jjohnson2): This should be more generalized... + # odd indexes into components are 'by-'*, even indexes + # starting at 2 are parameters to previous index + subcats, queryparms, indexof, coll = _parameterize_path(pathcomponents[1:]) + if len(pathcomponents) == 1: + dirlist = [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + dirlist.append(msg.ChildCollection('rescan')) + return dirlist + if not coll: + return show_info(queryparms['by-mac']) + if not indexof: + return [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + if indexof not in list_info: + raise exc.NotFoundException('{0} is not found'.format(indexof)) + return list_info[indexof](queryparms) + + +def detected_services(): + for srv in known_services: + yield servicenames[srv] + + +def detected_models(): + knownmodels = set([]) + for info in known_info: + info = known_info[info] + if 'modelnumber' in info and info['modelnumber'] not in knownmodels: + knownmodels.add(info['modelnumber']) + yield info['modelnumber'] + + +def _recheck_nodes(nodeattribs, configmanager): + global rechecker + _map_unique_ids(nodeattribs) + # for the nodes whose attributes have changed, consider them as potential + # strangers + for node in nodeattribs: + if node in known_nodes: + for somemac in known_nodes[node]: + unknown_info[somemac] = known_nodes[node][somemac] + unknown_info[somemac]['discostatus'] = 'unidentified' + # Now we go through ones we did not find earlier + for mac in list(unknown_info): + try: + _recheck_single_unknown(configmanager, mac) + except Exception: + traceback.print_exc() + continue + # now we go through ones that were identified, but could not pass + # policy or hadn't been able to verify key + for nodename in pending_nodes: + info = pending_nodes[nodename] + handler = info['handler'].NodeHandler(info, configmanager) + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def _recheck_single_unknown(configmanager, mac): + global rechecker + global rechecktime + info = unknown_info.get(mac, None) + if not info: + return + if info['handler'] != pxeh and not info.get('addresses', None): + log.log({'info': 'Missing address information in ' + repr(info)}) + return + handler = info['handler'].NodeHandler(info, configmanager) + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + # addresses data is bad, clear it, to force repair next + # opportunity + info['addresses'] = [] + # TODO(jjohnson2): rescan due to bad peer addr data? + # not just wait around for the next announce + return + log.log( + { + 'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + # if cancel did not result in dead, then we are in progress + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, + configmanager) + return + nodename = get_nodename(configmanager, handler, info) + if nodename: + if handler.https_supported: + dp = configmanager.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def safe_detected(info): + eventlet.spawn_n(eval_detected, info) + + +def eval_detected(info): + try: + return detected(info) + except Exception as e: + traceback.print_exc() + + +def detected(info): + global rechecker + global rechecktime + if 'hwaddr' not in info: + return # For now, require hwaddr field to proceed + # later, manual and CMM discovery may act on SN and/or UUID + for service in info['services']: + if nodehandlers.get(service, None): + if service not in known_services: + known_services[service] = set([]) + handler = nodehandlers[service] + info['handler'] = handler + break + else: # no nodehandler, ignore for now + return + try: + snum = info['attributes']['enclosure-serial-number'][0].rstrip() + if snum: + info['serialnumber'] = snum + known_serials[info['serialnumber']] = info + except (KeyError, IndexError): + pass + try: + info['modelnumber'] = info['attributes']['enclosure-machinetype-model'][0] + known_services[service].add(info['modelnumber']) + except (KeyError, IndexError): + pass + if info['hwaddr'] in known_info and 'addresses' in info: + # we should tee these up for parsing when an enclosure comes up + # also when switch config parameters change, should discard + # and there's also if wiring is fixed... + # of course could periodically revisit known_nodes + # replace potentially stale address info + #TODO(jjohnson2): remove this + # temporary workaround for XCC not doing SLP DA over dedicated port + # bz 93219, fix submitted, but not in builds yet + # strictly speaking, going ipv4 only legitimately is mistreated here, + # but that should be an edge case + oldaddr = known_info[info['hwaddr']].get('addresses', []) + for addr in info['addresses']: + if addr[0].startswith('fe80::'): + break + else: + for addr in oldaddr: + if addr[0].startswith('fe80::'): + info['addresses'].append(addr) + if known_info[info['hwaddr']].get( + 'addresses', []) == info['addresses']: + # if the ip addresses match, then assume no changes + # now something resetting to defaults could, in theory + # have the same address, but need to be reset + # in that case, however, a user can clear pubkeys to force a check + return + known_info[info['hwaddr']] = info + cfg = cfm.ConfigManager(None) + handler = handler.NodeHandler(info, cfg) + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['addresses'] = [] + return + log.log( + {'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, cfg) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentfied' + #TODO, eventlet spawn after to recheck sooner, or somehow else + # influence periodic recheck to shorten delay? + return + nodename = get_nodename(cfg, handler, info) + if nodename and handler.https_supported: + dp = cfg.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + #TODO(jjohnson2): We might have to get UUID for certain searches... + #for now defer probe until inside eval_node. We might not have + #a nodename without probe in the future. + if nodename: + eval_node(cfg, handler, info, nodename) + else: + log.log( + {'info': 'Detected unknown {0} with hwaddr {1} at ' + 'address {2}'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['discostatus'] = 'unidentified' + unknown_info[info['hwaddr']] = info + + +def get_nodename(cfg, handler, info): + if not handler.https_supported: + curruuid = info['uuid'] + nodename = nodes_by_uuid.get(curruuid, None) + if nodename is None: + # TODO: if there are too many matches on port for a + # given type, error! Can't just arbitarily limit, + # shared nic with vms is possible and valid + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + currcert = handler.https_cert + if not currcert: + info['discofailure'] = 'nohttps' + return None + currprint = util.get_fingerprint(currcert) + nodename = nodes_by_fprint.get(currprint, None) + if not nodename: + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + + +def eval_node(cfg, handler, info, nodename, manual=False): + try: + handler.probe() # unicast interrogation as possible to get more data + # for now, we search switch only, ideally we search cmm, smm, and + # switch concurrently + # do some preconfig, for example, to bring a SMM online if applicable + handler.preconfig() + except Exception as e: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = 'An error occured during discovery, check the ' \ + 'trace and stderr logs, mac was {0} and ip was {1}' \ + ', the node or the containing enclosure was {2}' \ + ''.format(info['hwaddr'], handler.ipaddr, nodename) + traceback.print_exc() + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # first, if had a bay, it was in an enclosure. If it was discovered by + # switch, it is probably the enclosure manager and not + # the node directly. switch is ambiguous and we should leave it alone + if 'enclosure.bay' in info and handler.is_enclosure: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + log.log({'error': 'Something that is an enclosure reported a bay, ' + 'not possible'}) + if manual: + raise exc.InvalidArgumentException() + return + nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename)) + if not handler.is_enclosure and nl: + # The specified node is an enclosure (has nodes mapped to it), but + # what we are talking to is *not* an enclosure + if 'enclosure.bay' not in info: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = '{2} with mac {0} is in {1}, but unable to ' \ + 'determine bay number'.format(info['hwaddr'], + nodename, + handler.ipaddr) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # search for nodes fitting our description using filters + # lead with the most specific to have a small second pass + nl = cfg.filter_node_attributes( + 'enclosure.bay=' + info['enclosure.bay'], nl) + nl = list(nl) + if len(nl) != 1: + info['discofailure'] = 'ambigconfig' + if len(nl): + errorstr = 'The following nodes have duplicate ' \ + 'enclosure attributes: ' + ','.join(nl) + + else: + errorstr = 'The {0} in enclosure {1} bay {2} does not ' \ + 'seem to be a defined node ({3})'.format( + handler.devname, nodename, + info['enclosure.bay'], + handler.ipaddr, + ) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + return + nodename = nl[0] + if not discover_node(cfg, handler, info, nodename, manual): + # store it as pending, assuming blocked on enclosure + # assurance... + pending_nodes[nodename] = info + else: + # we can and did accurately discover by switch or in enclosure + if not discover_node(cfg, handler, info, nodename, manual): + pending_nodes[nodename] = info + + +def discover_node(cfg, handler, info, nodename, manual): + known_nodes[nodename][info['hwaddr']] = info + if info['hwaddr'] in unknown_info: + del unknown_info[info['hwaddr']] + info['discostatus'] = 'identified' + dp = cfg.get_node_attributes( + [nodename], ('discovery.policy', + 'pubkeys.tls_hardwaremanager')) + policy = dp.get(nodename, {}).get('discovery.policy', {}).get( + 'value', None) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + # TODO(jjohnson2): permissive requires we guarantee storage of + # the pubkeys, which is deferred for a little bit + # Also, 'secure', when we have the needed infrastructure done + # in some product or another. + if (policy == 'permissive' and handler.https_supported and lastfp and + not manual): + info['discofailure'] = 'fingerprint' + log.log({'info': 'Detected replacement of {0} with existing ' + 'fingerprint and permissive discovery policy, not ' + 'doing discovery unless discovery.policy=open or ' + 'pubkeys.tls_hardwaremanager attribute is cleared ' + 'first'.format(nodename)}) + return False # With a permissive policy, do not discover new + elif policy in ('open', 'permissive') or manual: + info['nodename'] = nodename + if not handler.https_supported: + # use uuid based scheme in lieu of tls cert, ideally only + # for stateless 'discovery' targets like pxe, where data does not + # change + if info['uuid'] in known_pxe_uuids: + return True + uuidinfo = cfg.get_node_attributes(nodename, 'id.uuid') + known_pxe_uuids[info['uuid']] = nodename + # TODO(jjohnson2): This is messing with the attrib database + # so it should only be possible if policy is 'open' + # + if manual or policy == 'open': + olduuid = uuidinfo.get(nodename, {}).get('id.uuid', None) + if 'uuid' in info and info['uuid'] != olduuid: + cfg.set_node_attributes( + {nodename: {'id.uuid': info['uuid']}}) + log.log({'info': 'Detected {0} ({1} with mac {2})'.format( + nodename, handler.devname, info['hwaddr'])}) + return True + elif manual or not util.cert_matches(lastfp, handler.https_cert): + # only 'discover' if it is not the same as last time + try: + handler.config(nodename) + except Exception as e: + info['discofailure'] = 'bug' + log.log( + {'error': + 'Error encountered trying to set up {0}, {1}'.format( + nodename, str(e))}) + traceback.print_exc() + return False + newnodeattribs = {} + if 'uuid' in info: + newnodeattribs['id.uuid'] = info['uuid'] + if handler.https_cert: + newnodeattribs['pubkeys.tls_hardwaremanager'] = \ + util.get_fingerprint(handler.https_cert) + if newnodeattribs: + cfg.set_node_attributes({nodename: newnodeattribs}) + log.log({'info': 'Discovered {0} ({1})'.format(nodename, + handler.devname)}) + info['discostatus'] = 'discovered' + return True + log.log({'info': 'Detected {0}, but discovery.policy is not set to a ' + 'value allowing discovery (open or permissive)'.format( + nodename)}) + info['discofailure'] = 'policy' + return False + + +attribwatcher = None +nodeaddhandler = None +needaddhandled = False + + +def _handle_nodelist_change(configmanager): + global needaddhandled + global nodeaddhandler + _recheck_nodes((), configmanager) + if needaddhandled: + needaddhandled = False + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + else: + nodeaddhandler = None + + +def newnodes(added, deleting, configmanager): + global attribwatcher + global needaddhandled + global nodeaddhandler + configmanager.remove_watcher(attribwatcher) + allnodes = configmanager.list_nodes() + attribwatcher = configmanager.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + if nodeaddhandler: + needaddhandled = True + else: + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + + + +rechecker = None +rechecktime = None +rechecklock = eventlet.semaphore.Semaphore() + +def _periodic_recheck(configmanager): + global rechecker + global rechecktime + rechecker = None + # There shouldn't be anything causing this to double up, but just in case + # use a semaphore to absolutely guarantee this doesn't multiply + with rechecklock: + try: + _recheck_nodes((), configmanager) + except Exception: + traceback.print_exc() + log.log({'error': 'Unexpected error during discovery, check debug ' + 'logs'}) + # if rechecker is set, it means that an accelerated schedule + # for rechecker was requested in the course of recheck_nodes + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, + configmanager) + + +def rescan(): + _map_unique_ids() + eventlet.spawn_n(slp.active_scan, safe_detected) + + +def start_detection(): + global attribwatcher + global rechecker + _map_unique_ids() + cfg = cfm.ConfigManager(None) + allnodes = cfg.list_nodes() + attribwatcher = cfg.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + cfg.watch_nodecollection(newnodes) + eventlet.spawn_n(slp.snoop, safe_detected) + eventlet.spawn_n(pxe.snoop, safe_detected) + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, cfg) + + # eventlet.spawn_n(ssdp.snoop, safe_detected) + + + +nodes_by_fprint = {} +nodes_by_uuid = {} +known_pxe_uuids = {} + +def _map_unique_ids(nodes=None): + global nodes_by_uuid + global nodes_by_fprint + nodes_by_uuid = {} + nodes_by_fprint = {} + # Map current known ids based on uuid and fingperprints for fast lookup + cfg = cfm.ConfigManager(None) + if nodes is None: + nodes = cfg.list_nodes() + bigmap = cfg.get_node_attributes(nodes, + ('id.uuid', + 'pubkeys.tls_hardwaremanager')) + uuid_by_nodes = {} + fprint_by_nodes = {} + for uuid in nodes_by_uuid: + node = nodes_by_uuid[uuid] + if node in bigmap: + uuid_by_nodes[node] = uuid + for fprint in nodes_by_fprint: + node = nodes_by_fprint[fprint] + if node in bigmap: + fprint_by_nodes[node] =fprint + for node in bigmap: + if node in uuid_by_nodes: + del nodes_by_uuid[uuid_by_nodes[node]] + if node in fprint_by_nodes: + del nodes_by_fprint[fprint_by_nodes[node]] + uuid = bigmap[node].get('id.uuid', {}).get('value', None) + if uuid: + nodes_by_uuid[uuid] = node + fprint = bigmap[node].get( + 'pubkeys.tls_hardwaremanager', {}).get('value', None) + if fprint: + nodes_by_fprint[fprint] = node + for uuid in known_pxe_uuids: + if uuid not in nodes_by_uuid: + nodes_by_uuid[uuid] = known_pxe_uuids[uuid] + + +if __name__ == '__main__': + start_detection() + while True: + eventlet.sleep(30) \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/bmc.py b/confluent_server/confluent/discovery/handlers/bmc.py new file mode 100644 index 00000000..62e12e4b --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/bmc.py @@ -0,0 +1,153 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.generic as generic +import confluent.exceptions as exc +import confluent.netutil as netutil +import eventlet.support.greendns + +# Provide foundation for general IPMI device configuration + +import pyghmi.exceptions as pygexc +ipmicommand = eventlet.import_patched('pyghmi.ipmi.command') +ipmicommand.session.select = eventlet.green.select +ipmicommand.session.threading = eventlet.green.threading +ipmicommand.session.socket.getaddrinfo = eventlet.support.greendns.getaddrinfo +getaddrinfo = eventlet.support.greendns.getaddrinfo + +DEFAULT_USER = 'USERID' +DEFAULT_PASS = 'PASSW0RD' + + +class NodeHandler(generic.NodeHandler): + + def _get_ipmicmd(self, user=DEFAULT_USER, password=DEFAULT_PASS): + return ipmicommand.Command(self.ipaddr, user, password) + + def __init__(self, info, configmanager): + super(NodeHandler, self).__init__(info, configmanager) + + def probe(self): + return + # TODO(jjohnson2): probe serial number and uuid + + def config(self, nodename, reset=False): + # TODO(jjohnson2): set ip parameters, user/pass, alert cfg maybe + # In general, try to use https automation, to make it consistent + # between hypothetical secure path and today. + try: + ic = self._get_ipmicmd() + passwd = DEFAULT_PASS + except pygexc.IpmiException as pi: + creds = self.configmanager.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword'], decrypt=True) + user = creds.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', None) + havecustomcreds = False + if user is not None and user != DEFAULT_USER: + havecustomcreds = True + else: + user = DEFAULT_USER + passwd = creds.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', None) + if passwd is not None and passwd != DEFAULT_PASS: + havecustomcreds = True + else: + passwd = DEFAULT_PASS + if havecustomcreds: + ic = self._get_ipmicmd(user, passwd) + else: + raise + currusers = ic.get_users() + lanchan = ic.get_network_channel() + userdata = ic.xraw_command(netfn=6, command=0x44, data=(lanchan, + 1)) + userdata = bytearray(userdata['data']) + maxusers = userdata[0] & 0b111111 + enabledusers = userdata[1] & 0b111111 + lockedusers = userdata[2] & 0b111111 + cfg = self.configmanager + cd = cfg.get_node_attributes( + nodename, ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword', + 'hardwaremanagement.manager'], True) + cd = cd.get(nodename, {}) + if ('secret.hardwaremanagementuser' not in cd or + 'secret.hardwaremanagementpassword' not in cd): + raise exc.TargetEndpointBadCredentials( + 'Missing user and/or password') + if ('hardwaremanagement.manager' in cd and + cd['hardwaremanagement.manager']['value'] and + not cd['hardwaremanagement.manager']['value'].startswith( + 'fe80::')): + newip = cd['hardwaremanagement.manager']['value'] + newipinfo = getaddrinfo(newip, 0)[0] + # This getaddrinfo is repeated in get_nic_config, could be + # optimized, albeit with a more convoluted api.. + newip = newipinfo[-1][0] + if ':' in newip: + raise exc.NotImplementedException('IPv6 remote config TODO') + netconfig = netutil.get_nic_config(cfg, nodename, ip=newip) + plen = netconfig['prefix'] + newip = '{0}/{1}'.format(newip, plen) + ic.set_net_configuration(ipv4_address=newip, + ipv4_configuration='static', + ipv4_gateway=netconfig['ipv4_gateway']) + elif self.ipaddr.startswith('fe80::'): + cfg.set_node_attributes( + {nodename: {'hardwaremanagement.manager': self.ipaddr}}) + else: + raise exc.TargetEndpointUnreachable( + 'hardwaremanagement.manager must be set to desired address') + newuser = cd['secret.hardwaremanagementuser']['value'] + newpass = cd['secret.hardwaremanagementpassword']['value'] + for uid in currusers: + if currusers[uid]['name'] == newuser: + # Use existing account that has been created + newuserslot = uid + break + else: + newuserslot = lockedusers + 1 + if newuserslot < 2: + newuserslot = 2 + ic.set_user_name(newuserslot, newuser) + ic.set_user_access(newuserslot, lanchan, + privilege_level='administrator') + if newpass != passwd: # don't mess with existing if no change + ic.set_user_password(newuserslot, password=newpass) + # Now to zap others + for uid in currusers: + if uid != newuserslot: + if uid <= lockedusers: # we cannot delete, settle for disable + ic.disable_user(uid, 'disable') + else: + # lead with the most critical thing, removing user access + ic.set_user_access(uid, channel=None, callback=False, + link_auth=False, ipmi_msg=False, + privilege_level='no_access') + # next, try to disable the password + ic.set_user_password(uid, mode='disable', password=None) + # ok, now we can be less paranoid + try: + ic.user_delete(uid) + except pygexc.IpmiException as ie: + if ie.ipmicode != 0xd5: # some response to the 0xff + # name... + # the user will remain, but that is life + raise + if reset: + ic.reset_bmc() + return diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py new file mode 100644 index 00000000..be5a2a57 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -0,0 +1,85 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import eventlet +webclient = eventlet.import_patched('pyghmi.util.webclient') + +class NodeHandler(object): + https_supported = True + is_enclosure = False + + def __init__(self, info, configmanager): + self._certfailreason = None + self._fp = None + self.info = info + self.configmanager = configmanager + targsa = None + # first let us prefer LLA if possible, since that's most stable + for sa in info['addresses']: + if sa[0].startswith('fe80'): + targsa = sa + break + else: + targsa = info['addresses'][0] + self.ipaddr = targsa[0] + return + + def probe(self): + # Use appropriate direct strategy to gather data such as + # serial number and uuid to flesh out data as needed + return + + def preconfig(self): + return + + @property + def discoverable_by_switch(self): + return True + + def _savecert(self, certificate): + self._fp = certificate + return True + + @property + def cert_fail_reason(self): + if self._certfailreason == 1: + return 'refused' + elif self._certfailreason == 2: + return 'unreachable' + + @property + def https_cert(self): + if self._fp: + return self._fp + if ':' in self.ipaddr: + ip = '[{0}]'.format(self.ipaddr) + else: + ip = self.ipaddr + wc = webclient.SecureHTTPConnection(ip, verifycallback=self._savecert) + try: + wc.connect() + except IOError as ie: + if ie.errno == errno.ECONNREFUSED: + self._certfailreason = 1 + return None + elif ie.errno == errno.EHOSTUNREACH: + self._certfailreason = 2 + return None + self._certfailreason = 2 + return None + except Exception: + self._certfailreason = 2 + return None + return self._fp \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/imm.py b/confluent_server/confluent/discovery/handlers/imm.py new file mode 100644 index 00000000..23feded7 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/imm.py @@ -0,0 +1,46 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler +import pyghmi.exceptions as pygexc +import pyghmi.ipmi.private.util as pygutil + + +class NodeHandler(bmchandler.NodeHandler): + devname = 'IMM' + + def probe(self): + try: + ipmicmd = self._get_ipmicmd() + guiddata = ipmicmd.xraw_command(netfn=6, command=8) + self.info['uuid'] = pygutil.decode_wireformat_uuid( + guiddata['data']) + ipmicmd.oem_init() + bayid = ipmicmd._oem.immhandler.get_property( + '/v2/cmm/sp/7') + if not bayid: + return + # + self.info['enclosure.bay'] = bayid + # enclosure.bay only happens for Flex, nextscale doesn't do it + # this way + except pygexc.IpmiException as ie: + print(repr(ie)) + raise + + +# TODO(jjohnson2): web based init config for future prevalidated cert scheme +# def config(self, nodename): +# return + diff --git a/confluent_server/confluent/discovery/handlers/pxe.py b/confluent_server/confluent/discovery/handlers/pxe.py new file mode 100644 index 00000000..2b43ffbd --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/pxe.py @@ -0,0 +1,39 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This contains functionality for passive detection and, one day, active +# response to pxe + + +import confluent.discovery.handlers.generic as generic + +class NodeHandler(generic.NodeHandler): + https_supported = False + is_enclosure = False + devname = 'PXE' + + def __init__(self, info, configmanager): + self.ipaddr = '' + self.cfm = configmanager + + @property + def cert_fail_reason(self): + return 'unsupported' + + @property + def https_cert(self): + return None + + def config(self, nodename): + return diff --git a/confluent_server/confluent/discovery/handlers/smm.py b/confluent_server/confluent/discovery/handlers/smm.py new file mode 100644 index 00000000..954c8e9d --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/smm.py @@ -0,0 +1,38 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler + +class NodeHandler(bmchandler.NodeHandler): + is_enclosure = True + devname = 'SMM' + + def config(self, nodename): + # SMM for now has to reset to assure configuration applies + super(NodeHandler, self).config(nodename) + +# notes for smm: +# POST to: +# https://172.30.254.160/data/changepwd +# oripwd=PASSW0RD&newpwd=Passw0rd!4321 +# got response: +# 0-ChangePwdlogin.htmlok +# requires relogin +# https://172.30.254.160/index.html +# post to: +# https://172.30.254.160/data/login +# with body user=USERID&password=Passw0rd!4321 +# yields: +# ok 0 index.html +# note forwardUrl, if password change needed, will indicate something else \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py new file mode 100644 index 00000000..f12e1569 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -0,0 +1,69 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler +import pyghmi.exceptions as pygexc +import pyghmi.ipmi.private.util as pygutil + + +class NodeHandler(bmchandler.NodeHandler): + devname = 'XCC' + + def probe(self): + try: + ipmicmd = self._get_ipmicmd() + guiddata = ipmicmd.xraw_command(netfn=6, command=8) + self.info['uuid'] = pygutil.decode_wireformat_uuid( + guiddata['data']) + ipmicmd.oem_init() + bayid = ipmicmd._oem.immhandler.get_property( + '/v2/cmm/sp/7') + if not bayid: + return + self.info['enclosure.bay'] = bayid + smmid = ipmicmd._oem.immhandler.get_property( + '/v2/ibmc/smm/chassis/uuid') + if not smmid: + return + smmid = smmid.lower().replace(' ', '') + smmid = '{0}-{1}-{2}-{3}-{4}'.format(smmid[:8], smmid[8:12], + smmid[12:16], smmid[16:20], + smmid[20:]) + self.info['enclosure.uuid'] = smmid + self.info['enclosure.type'] = 'smm' + except pygexc.IpmiException as ie: + print(repr(ie)) + raise + + def preconfig(self): + # attempt to enable SMM + #it's normal to get a 'not supported' (193) for systems without an SMM + ipmicmd = None + try: + ipmicmd = self._get_ipmicmd() + ipmicmd.xraw_command(netfn=0x3a, command=0xf1, data=(1,)) + except pygexc.IpmiException as e: + if e.ipmicode != 193: + # raise an issue if anything other than to be expected + raise + #TODO: decide how to clean out if important + #as it stands, this can step on itself + #if ipmicmd: + # ipmicmd.ipmi_session.logout() + + +# TODO(jjohnson2): web based init config for future prevalidated cert scheme +# def config(self, nodename): +# return + diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py new file mode 100644 index 00000000..cbb1f87a --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -0,0 +1,118 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We can listen to port 69 with SO_REUSEADDR to snoop port 69 *even* if dhcp +# is running (because the other dhcp servers do it already) + +# Goal is to detect and act on a DHCPDISCOVER, without actually having to do +# any offer + +# option 97 = UUID (wireformat) + +import eventlet.green.socket as socket +import struct + +pxearchs = { + '\x00\x00': 'bios-x86', + '\x00\x07': 'uefi-x64', + '\x00\x09': 'uefi-x64', + '\x00\x0b': 'uefi-aarch64', +} + + +def decode_uuid(rawguid): + lebytes = struct.unpack_from('HHI', buffer(rawguid[8:])) + return '{0:08X}-{1:04X}-{2:04X}-{3:04X}-{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], bebytes[1], bebytes[2]) + + +def find_info_in_options(rq, optidx): + uuid = None + arch = None + try: + while uuid is None or arch is None: + if rq[optidx] == 53: # DHCP message type + # we want only length 1 and only discover (type 1) + if rq[optidx + 1] != 1 or rq[optidx + 2] != 1: + return uuid, arch + optidx += 3 + elif rq[optidx] == 97: + if rq[optidx + 1] != 17: + # 16 bytes of uuid and one reserved byte + return uuid, arch + if rq[optidx + 2] != 0: # the reserved byte should be zero, + # anything else would be a new spec that we don't know yet + return uuid, arch + uuid = decode_uuid(rq[optidx + 3:optidx + 19]) + optidx += 19 + elif rq[optidx] == 93: + if rq[optidx + 1] != 2: + return uuid, arch + archraw = bytes(rq[optidx + 2:optidx + 4]) + if archraw in pxearchs: + arch = pxearchs[archraw] + optidx += 4 + else: + optidx += rq[optidx + 1] + 2 + except IndexError: + return uuid, arch + return uuid, arch + +def snoop(handler): + #TODO(jjohnson2): ipv6 socket and multicast for DHCPv6, should that be + #prominent + #TODO(jjohnson2): IP_PKTINFO, recvmsg to get the destination ip, per + #proxydhcp.c from xCAT + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 67)) + while True: + # Just need some delay, picked a prime number so that overlap with other + # timers might be reduced, though it really is probably nothing + (rq, peer) = net4.recvfrom(9000) + # if we have a small packet, just skip, it can't possible hold enough + # data and avoids some downstream IndexErrors that would be messy + # with try/except + if len(rq) < 64: + continue + rq = bytearray(rq) + if rq[0] == 1: # Boot request + addrlen = rq[2] + if addrlen > 16: # max address size in bootp is 16 bytes + continue + netaddr = rq[28:28+addrlen] + netaddr = ':'.join(['{0:02x}'.format(x) for x in netaddr]) + optidx = 0 + try: + optidx = rq.index('\x63\x82\x53\x63') + 4 + except ValueError: + continue + uuid, arch = find_info_in_options(rq, optidx) + if uuid is None: + continue + # We will fill out service to have something to byte into, + # but the nature of the beast is that we do not have peers, + # so that will not be present for a pxe snoop + handler({'hwaddr': netaddr, 'uuid': uuid, 'architecture': arch, + 'services': ('pxe-client',)}) + +if __name__ == '__main__': + def testsnoop(info): + print(repr(info)) + snoop(testsnoop) + + diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py new file mode 100644 index 00000000..f87ce3e8 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -0,0 +1,515 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.neighutil as neighutil +import confluent.util as util +import os +import random +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct +import subprocess + + +_slp_services = set([ + 'service:management-hardware.IBM:integrated-management-module2', + 'service:lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller', +]) + +# SLP has a lot of ambition that was unfulfilled in practice. +# So we have a static footer here to always use 'DEFAULT' scope, no LDAP +# predicates, and no authentication for service requests +srvreqfooter = b'\x00\x07DEFAULT\x00\x00\x00\x00' +# An empty instance of the attribute list extension +# which is defined in RFC 3059, used to indicate support for that capability +attrlistext = b'\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00' + + +def _parse_slp_header(packet): + packet = bytearray(packet) + if len(packet) < 16 or packet[0] != 2: + # discard packets that are obviously useless + return None + parsed = { + 'function': packet[1], + } + (offset, parsed['xid'], langlen) = struct.unpack('!IHH', + bytes(b'\x00' + packet[7:14])) + parsed['lang'] = packet[14:14 + langlen].decode('utf-8') + parsed['payload'] = packet[14 + langlen:] + if offset: + parsed['offset'] = 14 + langlen + parsed['extoffset'] = offset + return parsed + + +def _pop_url(payload): + urllen = struct.unpack('!H', bytes(payload[3:5]))[0] + url = bytes(payload[5:5+urllen]).decode('utf-8') + if payload[5+urllen] != 0: + raise Exception('Auth blocks unsupported') + payload = payload[5+urllen+1:] + return url, payload + + +def _parse_SrvRply(parsed): + """ Modify passed dictionary to have parsed data + + + :param parsed: + :return: + """ + payload = parsed['payload'] + ecode, ucount = struct.unpack('!HH', bytes(payload[0:4])) + if ecode: + parsed['errorcode'] = ecode + payload = payload[4:] + parsed['urls'] = [] + while ucount: + ucount -= 1 + url, payload = _pop_url(payload) + parsed['urls'].append(url) + + +def _parse_slp_packet(packet, peer, rsps, xidmap): + parsed = _parse_slp_header(packet) + if not parsed: + return + addr = peer[0] + if '%' in addr: + addr = addr[:addr.index('%')] + mac = None + if addr in neighutil.neightable: + identifier = neighutil.neightable[addr] + mac = identifier + else: + identifier = addr + if (identifier, parsed['xid']) in rsps: + # avoid obviously duplicate entries + parsed = rsps[(identifier, parsed['xid'])] + else: + rsps[(identifier, parsed['xid'])] = parsed + if mac and 'hwaddr' not in parsed: + parsed['hwaddr'] = mac + if parsed['xid'] in xidmap: + parsed['services'] = [xidmap[parsed['xid']]] + if 'addresses' in parsed: + if peer not in parsed['addresses']: + parsed['addresses'].append(peer) + else: + parsed['addresses'] = [peer] + if parsed['function'] == 2: # A service reply + _parse_SrvRply(parsed) + + +def _v6mcasthash(srvtype): + # The hash algorithm described by RFC 3111 + nums = bytearray(srvtype.encode('utf-8')) + hashval = 0 + for i in nums: + hashval *= 33 + hashval += i + hashval &= 0xffff # only need to track the lowest 16 bits + hashval &= 0x3ff + hashval |= 0x1000 + return '{0:x}'.format(hashval) + + +def _generate_slp_header(payload, multicast, functionid, xid, extoffset=0): + if multicast: + flags = 0x2000 + else: + flags = 0 + packetlen = len(payload) + 16 # we have a fixed 16 byte header supported + if extoffset: # if we have an offset, add 16 to account for this function + # generating a 16 byte header + extoffset += 16 + if packetlen > 1400: + # For now, we aren't intending to support large SLP transmits + # raise an exception to help identify if such a requirement emerges + raise Exception("TODO: Transmit overflow packets") + # We always do SLP v2, and only v2 + header = bytearray([2, functionid]) + # SLP uses 24 bit packed integers, so in such places we pack 32 then + # discard the high byte + header.extend(struct.pack('!IH', packetlen, flags)[1:]) + # '2' below refers to the length of the language tag + header.extend(struct.pack('!IHH', extoffset, xid, 2)[1:]) + # we only do english (in SLP world, it's not like non-english appears...) + header.extend(b'en') + return header + +def _generate_attr_request(service, xid): + service = service.encode('utf-8') + payload = bytearray(struct.pack('!HH', 0, len(service)) + service) + payload.extend(srvreqfooter) + header = _generate_slp_header(payload, False, functionid=6, xid=xid) + return header + payload + + + +def _generate_request_payload(srvtype, multicast, xid, prlist=''): + prlist = prlist.encode('utf-8') + payload = bytearray(struct.pack('!H', len(prlist)) + prlist) + srvtype = srvtype.encode('utf-8') + payload.extend(struct.pack('!H', len(srvtype)) + srvtype) + payload.extend(srvreqfooter) + extoffset = len(payload) + payload.extend(attrlistext) + header = _generate_slp_header(payload, multicast, functionid=1, xid=xid, + extoffset=extoffset) + return header + payload + + +def _find_srvtype(net, net4, srvtype, addresses, xid): + """Internal function to find a single service type + + Helper to do singleton requests to srvtype + + :param net: Socket active + :param srvtype: Service type to do now + :param addresses: Pass through of addresses argument from find_targets + :return: + """ + if addresses is None: + data = _generate_request_payload(srvtype, True, xid) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + v6addrs = [] + v6hash = _v6mcasthash(srvtype) + # do 'interface local' and 'link local' + # it shouldn't make sense, but some configurations work with interface + # local that do not work with link local + v6addrs.append(('ff01::1:' + v6hash, 427, 0, 0)) + v6addrs.append(('ff02::1:' + v6hash, 427, 0, 0)) + for idx in util.list_interface_indexes(): + # IPv6 multicast is by index, so lead with that + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, idx) + for sa in v6addrs: + try: + net.sendto(data, sa) + except socket.error: + # if we hit an interface without ipv6 multicast, + # this can cause an error, skip such an interface + # case in point, 'lo' + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(data, ('239.255.255.253', 427)) + net4.sendto(data, (bcast, 427)) + + +def _grab_rsps(socks, rsps, interval, xidmap): + r, _, _ = select.select(socks, (), (), interval) + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_slp_packet(rsp, peer, rsps, xidmap) + r, _, _ = select.select(socks, (), (), interval) + + + +def _parse_attrlist(attrstr): + attribs = {} + while attrstr: + if attrstr[0] == '(': + if ')' not in attrstr: + attribs['INCOMPLETE'] = True + return attribs + currattr = attrstr[1:attrstr.index(')')] + if '=' not in currattr: # Not allegedly kosher, but still.. + currattr = currattr.decode('utf-8') + attribs[currattr] = None + else: + attrname, attrval = currattr.split('=') + attrname = attrname.decode('utf-8') + attribs[attrname] = [] + for val in attrval.split(','): + try: + val = val.decode('utf-8') + except UnicodeDecodeError: + val = '*DECODEERROR*' + if val[:3] == '\\FF': # we should make this bytes + finalval = bytearray([]) + for bnum in attrval[3:].split('\\'): + if bnum == '': + continue + finalval.append(int(bnum, 16)) + val = finalval + if 'uuid' in attrname and len(val) == 16: + lebytes = struct.unpack_from( + 'HHI', buffer(val[8:])) + val = '{0:08X}-{1:04X}-{2:04X}-{3:04X}-' \ + '{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], + bebytes[1], bebytes[2] + ) + attribs[attrname].append(val) + attrstr = attrstr[attrstr.index(')'):] + elif attrstr[0] == ',': + attrstr = attrstr[1:] + elif ',' in attrstr: + currattr = attrstr[:attrstr.index(',')] + attribs[currattr] = None + attrstr = attrstr[attrstr.index(','):] + else: + currattr = attrstr + attribs[currattr] = None + attrstr = None + return attribs + + +def _parse_attrs(data, parsed): + headinfo = _parse_slp_header(data) + if headinfo['function'] != 7 or headinfo['xid'] != parsed['xid']: + return + payload = headinfo['payload'] + if struct.unpack('!H', bytes(payload[:2]))[0] != 0: + return + length = struct.unpack('!H', bytes(payload[2:4]))[0] + attrstr = bytes(payload[4:4+length]) + parsed['attributes'] = _parse_attrlist(attrstr) + + +def _add_attributes(parsed): + attrq = _generate_attr_request(parsed['services'][0], parsed['xid']) + target = None + # prefer reaching out to an fe80 if present, to be highly robust + # in face of network changes + for addr in parsed['addresses']: + if addr[0].startswith('fe80'): + target = addr + # however if no fe80 seen, roll with the first available address + if not target: + target = parsed['addresses'][0] + if len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + net.connect(target) + except socket.error: + return + net.sendall(attrq) + rsp = net.recv(8192) + net.close() + _parse_attrs(rsp, parsed) + + +def query_srvtypes(target): + """Query the srvtypes advertised by the target + + :param target: A sockaddr tuple (if you get the peer info) + """ + payload = b'\x00\x00\xff\xff\x00\x07DEFAULT' + header = _generate_slp_header(payload, False, functionid=9, xid=1) + packet = header + payload + if len(target) == 2: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + elif len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + raise Exception('Unrecognized target {0}'.format(repr(target))) + tries = 3 + connected = False + while tries and not connected: + tries -= 1 + try: + net.connect(target) + connected = True + except socket.error: + pass + if not connected: + return [u''] + net.sendall(packet) + rs = net.recv(8192) + net.close() + parsed = _parse_slp_header(rs) + if parsed: + payload = parsed['payload'] + if payload[:2] != '\x00\x00': + return + stypelen = struct.unpack('!H', bytes(payload[2:4]))[0] + stypes = payload[4:4+stypelen].decode('utf-8') + return stypes.split(',') + +def rescan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def snoop(handler): + """Watch for SLP activity + + handler will be called with a dictionary of relevant attributes + + :param handler: + :return: + """ + active_scan(handler) + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + slpg = socket.inet_pton(socket.AF_INET6, 'ff01::123') + slpg2 = socket.inet_pton(socket.AF_INET6, 'ff02::123') + for i6idx in util.list_interface_indexes(): + mreq = slpg + struct.pack('=I', i6idx) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + mreq = slpg2 + struct.pack('=I', i6idx) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + slpmcast = socket.inet_aton('239.255.255.253') + \ + socket.inet_aton(i4['addr']) + try: + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + slpmcast) + except socket.error as e: + if e.errno != 98: + raise + # socket in use can occur when aliased ipv4 are encountered + net.bind(('', 427)) + net4.bind(('', 427)) + + while True: + newmacs = set([]) + r, _, _ = select.select((net, net4), (), (), 60) + # clear known_peers and peerbymacaddress + # to avoid stale info getting in... + # rely upon the select(0.2) to catch rapid fire and aggregate ip + # addresses that come close together + # calling code needs to understand deeper context, as snoop + # will now yield dupe info over time + known_peers = set([]) + peerbymacaddress = {} + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + known_peers.add(peer) + mac = neighutil.neightable[ip] + if mac in peerbymacaddress: + peerbymacaddress[mac]['addresses'].append(peer) + else: + q = query_srvtypes(peer) + if not q or not q[0]: + # SLP might have started and not ready yet + # ignore for now + known_peers.discard(peer) + continue + peerbymacaddress[mac] = { + 'services': q, + 'addresses': [peer], + } + newmacs.add(mac) + r, _, _ = select.select((net, net4), (), (), 0.2) + for mac in newmacs: + peerbymacaddress[mac]['xid'] = 1 + _add_attributes(peerbymacaddress[mac]) + peerbymacaddress[mac]['hwaddr'] = mac + handler(peerbymacaddress[mac]) + + +def active_scan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def scan(srvtypes=_slp_services, addresses=None): + """Find targets providing matching requested srvtypes + + This is a generator that will iterate over respondants to the SrvType + requested. + + :param srvtypes: An iterable list of the service types to find + :param addresses: An iterable of addresses/ranges. Default is to scan + local network segment using multicast and broadcast. + Each address can be a single address, hyphen-delimited + range, or an IP/CIDR indication of a network. + :return: Iterable set of results + """ + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + # TODO: increase RCVBUF to max, mitigate chance of + # failure due to full buffer. + # SLP is very poor at scanning large counts and managing it, so we + # must make the best of it + # Some platforms/config default to IPV6ONLY, we are doing IPv4 + # too, so force it + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + # we are going to do broadcast, so allow that... + initxid = random.randint(0, 32768) + xididx = 0 + xidmap = {} + # First we give fast repsonders of each srvtype individual chances to be + # processed, mitigating volume of response traffic + rsps = {} + for srvtype in srvtypes: + xididx += 1 + _find_srvtype(net, net4, srvtype, addresses, initxid + xididx) + xidmap[initxid + xididx] = srvtype + _grab_rsps((net, net4), rsps, 0.1, xidmap) + # now do a more slow check to work to get stragglers, + # but fortunately the above should have taken the brunt of volume, so + # reduced chance of many responses overwhelming receive buffer. + _grab_rsps((net, net4), rsps, 1, xidmap) + # now to analyze and flesh out the responses + for id in rsps: + _add_attributes(rsps[id]) + del rsps[id]['payload'] + del rsps[id]['function'] + del rsps[id]['xid'] + yield rsps[id] + + +if __name__ == '__main__': + def testsnoop(a): + print(repr(a)) + snoop(testsnoop) \ No newline at end of file diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py new file mode 100644 index 00000000..de543697 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -0,0 +1,232 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Documented somewhat at +# http://buildingskb.schneider-electric.com/view.php?AID=15197 + +# Here is the payload of an SSDP 'announce', sent to the multicast v4/v6 1900 +# NOTIFY * HTTP/1.1 +# HOST: 239.255.255.250:1900 +# CACHE-CONTROL: max-age=1800 +# AL: https://172.30.254.151:8080/redfish/v1 +# SERVER: Linux/3.14.28-ltsi Redfish/1.0 +# NT: urn:dmtf-org:service:redfish-rest:1 +# USN: uuid:00000000-0000-0000-0005-000000000001::urn:dmtf-org:service:redfish-rest:1 +# NTS: ssdp:alive + + +import confluent.neighutil as neighutil +import confluent.util as util +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct + +mcastv4addr = '239.255.255.250' +mcastv6addr = 'ff02::c' + +ssdp6mcast = socket.inet_pton(socket.AF_INET6, mcastv6addr) +smsg = ('M-SEARCH * HTTP/1.1\r\n' + 'HOST: {0}:1900\r\n' + 'MAN: "ssdp:discover"\r\n' + 'ST: {1}\r\n' + 'MX: 3\r\n\r\n') + + +def scan(services, target=None): + for service in services: + for rply in _find_service(service, target): + yield rply + + +def snoop(handler, byehandler=None): + """Watch for SSDP notify messages + + The handler shall be called on any service coming online. + byehandler is called whenever a system advertises that it is departing. + If no byehandler is specified, byebye messages are ignored. The handler is + given (as possible), the mac address, a list of viable sockaddrs to reference + the peer, and the notification type (e.g. + 'urn:dmtf-org:service:redfish-rest:1' + + :param handler: A handler for online notifications from network + :param byehandler: Optional handler for devices going off the network + """ + # Normally, I like using v6/v4 agnostic socket. However, since we are + # dabbling in multicast wizardry here, such sockets can cause big problems, + # so we will have two distinct sockets + known_peers = set([]) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + for ifidx in util.list_interface_indexes(): + v6grp = ssdp6mcast + struct.pack('=I', ifidx) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, v6grp) + net6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + for i4 in util.list_ips(): + ssdp4mcast = socket.inet_pton(socket.AF_INET, mcastv4addr) + \ + socket.inet_aton(i4['addr']) + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + ssdp4mcast) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 1900)) + net6.bind(('', 1900)) + peerbymacaddress = {} + while True: + newmacs = set([]) + machandlers = {} + r, _, _ = select.select((net4, net6), (), (), 60) + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + rsp = rsp.split('\r\n') + method, _, _ = rsp[0].split(' ', 2) + if method == 'NOTIFY': + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + mac = neighutil.neightable[ip] + known_peers.add(peer) + newmacs.add(mac) + if mac in peerbymacaddress: + peerbymacaddress[mac]['peers'].append(peer) + else: + peerbymacaddress[mac] = { + 'hwaddr': mac, + 'peers': [peer], + } + peerdata = peerbymacaddress[mac] + for headline in rsp[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'NT': + peerdata['service'] = value + elif header == 'NTS': + if value == 'ssdp:byebye': + machandlers[mac] = byehandler + elif value == 'ssdp:alive': + machandlers[mac] = handler + r, _, _ = select.select((net4, net6), (), (), 0.1) + for mac in newmacs: + thehandler = machandlers.get(mac, None) + if thehandler: + thehandler(peerbymacaddress[mac]) + + +def _find_service(service, target): + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + if target: + addrs = socket.getaddrinfo(target, 1900, 0, socket.SOCK_DGRAM) + for addr in addrs: + host = addr[4][0] + if addr[0] == socket.AF_INET: + net4.sendto(smsg.format(host, service), addr[4]) + elif addr[0] == socket.AF_INET6: + host = '[{0}]'.format(host) + net6.sendto(smsg.format(host, service), addr[4]) + else: + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + for idx in util.list_interface_indexes(): + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, + idx) + try: + net6.sendto(smsg.format('[{0}]'.format(mcastv6addr), service + ), (mcastv6addr, 1900, 0, 0)) + except socket.error: + # ignore interfaces without ipv6 multicast causing error + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(smsg.format(mcastv4addr, service), + (mcastv4addr, 1900)) + net4.sendto(smsg.format(bcast, service), (bcast, 1900)) + # SSDP by spec encourages responses to spread out over a 3 second interval + # hence we must be a bit more patient + r, _, _ = select.select((net4, net6), (), (), 4) + peerdata = {} + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_ssdp(peer, rsp, peerdata) + r, _, _ = select.select((net4, net6), (), (), 4) + for nid in peerdata: + yield peerdata[nid] + + +def _parse_ssdp(peer, rsp, peerdata): + ip = peer[0].partition('%')[0] + nid = ip + mac = None + if ip in neighutil.neightable: + nid = neighutil.neightable[ip] + mac = nid + headlines = rsp.split('\r\n') + try: + _, code, _ = headlines[0].split(' ', 2) + except ValueError: + return + myurl = None + if code == '200': + if nid in peerdata: + peerdatum = peerdata[nid] + else: + peerdatum = { + 'peers': [peer], + 'hwaddr': mac, + } + peerdata[nid] = peerdatum + for headline in headlines[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'AL' or header == 'LOCATION': + myurl = value + if 'urls' not in peerdatum: + peerdatum['urls'] = [value] + elif value not in peerdatum['urls']: + peerdatum['urls'].append(value) + elif header == 'ST': + if 'services' not in peerdatum: + peerdatum['services'] = [value] + elif value not in peerdatum['services']: + peerdatum['services'].append(value) + + + +if __name__ == '__main__': + + for rsp in scan(['urn:dmtf-org:service:redfish-rest:1']): + print(repr(rsp)) + def fun(a): + print(repr(a)) + def byefun(a): + print('bye' + repr(a)) + snoop(fun, byefun) diff --git a/confluent_server/confluent/neighutil.py b/confluent_server/confluent/neighutil.py new file mode 100644 index 00000000..9da1d195 --- /dev/null +++ b/confluent_server/confluent/neighutil.py @@ -0,0 +1,64 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A consolidated manage of neighbor table information management. +# Ultimately, this should use AF_NETLINK, but in the interest of time, +# use ip neigh for the moment + +import eventlet.green.subprocess as subprocess +import os + +neightable = {} +neightime = 0 + +import re + +_validmac = re.compile('..:..:..:..:..:..') + + +def update_neigh(): + global neightable + global neightime + neightable = {} + if os.name == 'nt': + return + ipn = subprocess.Popen(['ip', 'neigh'], stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (neighdata, err) = ipn.communicate() + for entry in neighdata.split('\n'): + entry = entry.split(' ') + if len(entry) < 5 or not entry[4]: + continue + if entry[0] in ('192.168.0.100', '192.168.70.100', '192.168.70.125'): + # Note that these addresses are common static ip addresses + # that are hopelessly ambiguous if there are many + # so ignore such entries and move on + # ideally the system network steers clear of this landmine of + # a subnet, but just in case + continue + if not _validmac.match(entry[4]): + continue + neightable[entry[0]] = entry[4] + neightime = os.times()[4] + + +def refresh_neigh(): + global neightime + if os.name == 'nt': + return + if os.times()[4] > (neightime + 30): + update_neigh() diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py new file mode 100644 index 00000000..cbe9c2fe --- /dev/null +++ b/confluent_server/confluent/netutil.py @@ -0,0 +1,124 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# this will implement noderange grammar + + +import codecs +import struct +import eventlet.green.socket as socket +import eventlet.support.greendns +getaddrinfo = eventlet.support.greendns.getaddrinfo + + +def ip_on_same_subnet(first, second, prefix): + addrinf = socket.getaddrinfo(first, None, 0, socket.SOCK_STREAM)[0] + fam = addrinf[0] + ip = socket.inet_pton(fam, addrinf[-1][0]) + ip = int(codecs.encode(bytes(ip), 'hex'), 16) + addrinf = socket.getaddrinfo(second, None, 0, socket.SOCK_STREAM)[0] + if fam != addrinf[0]: + return False + oip = socket.inet_pton(fam, addrinf[-1][0]) + oip = int(codecs.encode(bytes(oip), 'hex'), 16) + if fam == socket.AF_INET: + addrlen = 32 + elif fam == socket.AF_INET6: + addrlen = 128 + else: + raise Exception("Unknown address family {0}".format(fam)) + mask = 2 ** prefix - 1 << (addrlen - prefix) + return ip & mask == oip & mask + + +# TODO(jjohnson2): have a method to arbitrate setting methods, to aid +# in correct matching of net.* based on parameters, mainly for pxe +# The scheme for pxe: +# For one: the candidate net.* should have pxe set to true, to help +# disambiguate from interfaces meant for bmc access +# bmc relies upon hardwaremanagement.manager, plus we don't collect +# that mac address +# the ip as reported by recvmsg to match the subnet of that net.* interface +# if switch and port available, that should match. +def get_nic_config(configmanager, node, ip=None, mac=None): + """Fetch network configuration parameters for a nic + + For a given node and interface, find and retrieve the pertinent network + configuration data. The desired configuration can be searched + either by ip or by mac. + + :param configmanager: The relevant confluent.config.ConfigManager + instance. + :param node: The name of the node + :param ip: An IP address on the intended subnet + :param mac: The mac address of the interface + + :returns: A dict of parameters, 'ipv4_gateway', .... + """ + # ip parameter *could* be the result of recvmsg with cmsg to tell + # pxe *our* ip address, or it could be the desired ip address + #TODO(jjohnson2): ip address, prefix length, mac address, + # join a bond/bridge, vlan configs, etc. + # also other nic criteria, physical location, driver and index... + nodenetattribs = configmanager.get_node_attributes( + node, 'net*.ipv4_gateway').get(node, {}) + cfgdata = { + 'ipv4_gateway': None, + 'prefix': None, + } + if ip is not None: + prefixlen = get_prefix_len_for_ip(ip) + cfgdata['prefix'] = prefixlen + for setting in nodenetattribs: + gw = nodenetattribs[setting].get('value', None) + if gw is None: + continue + if ip_on_same_subnet(ip, gw, prefixlen): + cfgdata['ipv4_gateway'] = gw + break + return cfgdata + + +def get_prefix_len_for_ip(ip): + # for now, we'll use the system route table + # later may provide for configuration lookup to override the route + # table + ip = getaddrinfo(ip, 0, socket.AF_INET)[0][-1][0] + try: + ipn = socket.inet_aton(ip) + except socket.error: # For now, assume 64 for ipv6 + return 64 + # It comes out big endian, regardless of host arch + ipn = struct.unpack('>I', ipn)[0] + rf = open('/proc/net/route') + ri = rf.read() + rf.close() + ri = ri.split('\n')[1:] + for rl in ri: + if not rl: + continue + rd = rl.split('\t') + if rd[1] == '00000000': # default gateway, not useful for this + continue + # don't have big endian to look at, assume that it is host endian + maskn = struct.unpack('I', struct.pack('>I', int(rd[7], 16)))[0] + netn = struct.unpack('I', struct.pack('>I', int(rd[1], 16)))[0] + if ipn & maskn == netn: + nbits = 0 + while maskn: + nbits += 1 + maskn = maskn << 1 & 0xffffffff + return nbits + raise exc.NotImplementedException("Non local addresses not supported") \ No newline at end of file diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py new file mode 100644 index 00000000..3ff04ebf --- /dev/null +++ b/confluent_server/confluent/networking/lldp.py @@ -0,0 +1,131 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This provides the implementation of locating MAC addresses on ethernet +# switches. It is, essentially, a port of 'MacMap.pm' to confluent. +# However, there are enhancements. +# For one, each switch interrogation is handled in an eventlet 'thread' +# For another, MAC addresses are checked in the dictionary on every +# switch return, rather than waiting for all switches to check in +# (which makes it more responsive when there is a missing or bad switch) +# Also, we track the quantity, actual ifName value, and provide a mechanism +# to detect ambiguous result (e.g. if two matches are found, can log an error +# rather than doing the wrong one, complete with the detected ifName value). +# Further, the map shall be available to all facets of the codebase, not just +# the discovery process, so that the cached data maintenance will pay off +# for direct queries + +# Provides support for viewing and processing lldp data for switches + +import confluent.exceptions as exc +import confluent.log as log +import confluent.snmputil as snmp +from eventlet.greenpool import GreenPool +import re + +# The interesting OIDs are: +# 1.0.8802.1.1.2.1.3.7.1.4 - Lookup of LLDP index id to description +# Yet another fun fact, the LLDP port index frequent +# does *not* map to ifName, like a sane +# implementation would do. Assume ifName equality +# but provide a way for 1.3.6.1.2.1.1 indicated +# ids to provide custom functions +# (1.0.8802.1.1.2.1.3.7.1.2 - theoretically this process is only very useful +# if this is '5' meaning 'same as ifName per +# 802.1AB-2005, however at *least* 7 has +# been observed to produce same results +# For now we'll optimistically assume +# equality to ifName +# 1.0.8802.1.1.2.1.4.1.1 - The information about the remote systems attached +# indexed by time index, local port, and an +# incrementing value +# 1.0.8802.1.1.2.1.4.1.1.5 - chassis id - in theory might have been useful, in +# practice limited as the potential to correlate +# to other contexts is limited. As a result, +# our strategy will be to ignore this and focus +# instead on bridge-mib/qbridge-mib indicate data +# a potential exception would be pulling in things +# that are fundamentally network equipment, +# where significant ambiguity may exist. +# While in a 'host' scenario, there is ambiguity +# it is more controlled (virtual machines are given +# special treatment, and strategies exist for +# disambiguating shared management/data port, and +# other functions do not interact with our discovery +# framework +# # 1.0.8802.1.1.2.1.4.1.1.9 - SysName - could be handy hint in some scenarios +# # 1.0.8802.1.1.2.1.4.1.1.10 - SysDesc - good stuff + + +def lenovoname(idx, desc): + if desc.isdigit(): + return 'Ethernet' + str(idx) + return desc + +nameoverrides = [ + (re.compile('20301\..*'), lenovoname), +] + + +def _lldpdesc_to_ifname(switchid, idx, desc): + for tform in nameoverrides: + if tform[0].match(switchid): + desc = tform[1](idx, desc) + return desc + + +def _extract_neighbor_data_b(args): + """Build LLDP data about elements connected to switch + + args are carried as a tuple, because of eventlet convenience + """ + switch, password, user = args + conn = snmp.Session(switch, password, user) + sid = None + lldpdata = {} + for sysid in conn.walk('1.3.6.1.2.1.1.2'): + sid = str(sysid[1][6:]) + idxtoifname = {} + for oidindex in conn.walk('1.0.8802.1.1.2.1.3.7.1.4'): + idx = oidindex[0][-1] + idxtoifname[idx] = _lldpdesc_to_ifname(sid, idx, str(oidindex[1])) + for remotedesc in conn.walk('1.0.8802.1.1.2.1.4.1.1.10'): + iname = idxtoifname[remotedesc[0][-2]] + lldpdata[iname] = {'description': str(remotedesc[1])} + for remotename in conn.walk('1.0.8802.1.1.2.1.4.1.1.9'): + iname = idxtoifname[remotename[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['name'] = str(remotename[1]) + for remoteid in conn.walk('1.0.8802.1.1.2.1.4.1.1.5'): + iname = idxtoifname[remoteid[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['chassisid'] = str(remoteid[1]) + print(repr(lldpdata)) + + +def _extract_neighbor_data(args): + try: + _extract_neighbor_data_b(args) + except Exception: + log.logtrace() + +if __name__ == '__main__': + # a quick one-shot test, args are switch and snmpv1 string for now + # (should do three argument form for snmpv3 test + import sys + _extract_neighbor_data((sys.argv[1], sys.argv[2])) diff --git a/confluent_server/dbgtools/confluentdbgcli.py b/confluent_server/dbgtools/confluentdbgcli.py new file mode 100644 index 00000000..04ba8783 --- /dev/null +++ b/confluent_server/dbgtools/confluentdbgcli.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Note that this script has a high chance of breaking confluent, so +# do not be surprised if confluent crashes as you exit... + +import select +import socket +import readline +import sys +import threading + +readline.parse_and_bind('tab: complete') +conn = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +conn.connect('/var/run/confluent/dbg.sock') + +pendingoutput = None + +class GetInput(threading.Thread): + def run(self): + global pendingoutput + while True: + try: + pendingoutput = raw_input('') + except EOFError: + pendingoutput = False + break + + +inputthread = GetInput() +inputthread.start() +while True: + r, _, _ = select.select((conn,), (), (), 0.1) + if conn in r: + sys.stdout.write(conn.recv(1)) + if pendingoutput is not None: + if pendingoutput is False: + conn.shutdown(socket.SHUT_WR) + sys.exit(1) + else: + conn.sendall(pendingoutput + '\n') + pendingoutput = None + sys.stdout.flush() From bc1b1a9676e54d96dba57659dcae89771679bb69 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 21 Jun 2017 14:30:54 -0400 Subject: [PATCH 06/31] Update nodeshell documentation on stdout/stderr --- confluent_client/doc/man/nodeshell.ronn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodeshell.ronn b/confluent_client/doc/man/nodeshell.ronn index 7dd8c5ed..888e8f51 100644 --- a/confluent_client/doc/man/nodeshell.ronn +++ b/confluent_client/doc/man/nodeshell.ronn @@ -9,7 +9,8 @@ nodeshell(8) -- Execute command on many nodes in a noderange through ssh Allows execution of a command on many nodes in parallel. Like noderun(8), it accepts and interpolates confluent attribute expressions as documented in -nodeattribexpressions(5). +nodeattribexpressions(5). `nodeshell` provides stdout as stdout and stderr +as stderr, unlike psh which combines all stdout and stderr into stdout. ## EXAMPLES From 1526a9b92d208223d74bfcf2ecdb52c366705a37 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Jun 2017 15:42:30 -0400 Subject: [PATCH 07/31] Add functions to stub out globbing noderanges use [], and bash can do undesirable things. Since these commands by and large do not reference files, or at least files likely to be globbed, disable globbing. --- confluent_client/confluent_env.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 9580f10e..2d6231bc 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -2,3 +2,19 @@ PATH=/opt/confluent/bin:$PATH export PATH MANPATH=/opt/confluent/share/man:$MANPATH export MANPATH +alias confetty='set -f;confetty';confetty(){ command confetty ; set +f;} +alias nodeattrib='set -f;nodeattrib';nodeattrib(){ command nodeattrib ; set +f;} +alias nodeboot='set -f;nodeboot';nodeboot(){ command nodeboot ; set +f;} +alias nodeconsole='set -f;nodeconsole';nodeconsole(){ command nodeconsole ; set +f;} +alias nodeeventlog='set -f;nodeeventlog';nodeeventlog(){ command nodeeventlog ; set +f;} +alias nodefirmware='set -f;nodefirmware';nodefirmware(){ command nodefirmware ; set +f;} +alias nodegroupattrib='set -f;nodegroupattrib';nodegroupattrib(){ command nodegroupattrib ; set +f;} +alias nodehealth='set -f;nodehealth';nodehealth(){ command nodehealth ; set +f;} +alias nodeidentify='set -f;nodeidentify';nodeidentify(){ command nodeidentify ; set +f;} +alias nodeinventory='set -f;nodeinventory';nodeinventory(){ command nodeinventory ; set +f;} +alias nodelist='set -f;nodelist';nodelist(){ command nodelist ; set +f;} +alias nodepower='set -f;nodepower';nodepower(){ command nodepower ; set +f;} +alias noderun='set -f;noderun';noderun(){ command noderun ; set +f;} +alias nodesensors='set -f;nodesensors';nodesensors(){ command nodesensors ; set +f;} +alias nodesetboot='set -f;nodesetboot';nodesetboot(){ command nodesetboot ; set +f;} +alias nodeshell='set -f;nodeshell';nodeshell(){ command nodeshell ; set +f;} From e4186841b4699ea22effa9352329c835a6838d2d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Jun 2017 15:50:24 -0400 Subject: [PATCH 08/31] Correct the stub functions to work --- confluent_client/confluent_env.sh | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 2d6231bc..b8cfddc8 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -2,19 +2,19 @@ PATH=/opt/confluent/bin:$PATH export PATH MANPATH=/opt/confluent/share/man:$MANPATH export MANPATH -alias confetty='set -f;confetty';confetty(){ command confetty ; set +f;} -alias nodeattrib='set -f;nodeattrib';nodeattrib(){ command nodeattrib ; set +f;} -alias nodeboot='set -f;nodeboot';nodeboot(){ command nodeboot ; set +f;} -alias nodeconsole='set -f;nodeconsole';nodeconsole(){ command nodeconsole ; set +f;} -alias nodeeventlog='set -f;nodeeventlog';nodeeventlog(){ command nodeeventlog ; set +f;} -alias nodefirmware='set -f;nodefirmware';nodefirmware(){ command nodefirmware ; set +f;} -alias nodegroupattrib='set -f;nodegroupattrib';nodegroupattrib(){ command nodegroupattrib ; set +f;} -alias nodehealth='set -f;nodehealth';nodehealth(){ command nodehealth ; set +f;} -alias nodeidentify='set -f;nodeidentify';nodeidentify(){ command nodeidentify ; set +f;} -alias nodeinventory='set -f;nodeinventory';nodeinventory(){ command nodeinventory ; set +f;} -alias nodelist='set -f;nodelist';nodelist(){ command nodelist ; set +f;} -alias nodepower='set -f;nodepower';nodepower(){ command nodepower ; set +f;} -alias noderun='set -f;noderun';noderun(){ command noderun ; set +f;} -alias nodesensors='set -f;nodesensors';nodesensors(){ command nodesensors ; set +f;} -alias nodesetboot='set -f;nodesetboot';nodesetboot(){ command nodesetboot ; set +f;} -alias nodeshell='set -f;nodeshell';nodeshell(){ command nodeshell ; set +f;} +alias confetty='set -f;confetty';confetty(){ command confetty "$@"; set +f;} +alias nodeattrib='set -f;nodeattrib';nodeattrib(){ command nodeattrib "$@"; set +f;} +alias nodeboot='set -f;nodeboot';nodeboot(){ command nodeboot "$@"; set +f;} +alias nodeconsole='set -f;nodeconsole';nodeconsole(){ command nodeconsole "$@"; set +f;} +alias nodeeventlog='set -f;nodeeventlog';nodeeventlog(){ command nodeeventlog "$@"; set +f;} +alias nodefirmware='set -f;nodefirmware';nodefirmware(){ command nodefirmware "$@"; set +f;} +alias nodegroupattrib='set -f;nodegroupattrib';nodegroupattrib(){ command nodegroupattrib "$@"; set +f;} +alias nodehealth='set -f;nodehealth';nodehealth(){ command nodehealth "$@"; set +f;} +alias nodeidentify='set -f;nodeidentify';nodeidentify(){ command nodeidentify "$@"; set +f;} +alias nodeinventory='set -f;nodeinventory';nodeinventory(){ command nodeinventory "$@"; set +f;} +alias nodelist='set -f;nodelist';nodelist(){ command nodelist "$@"; set +f;} +alias nodepower='set -f;nodepower';nodepower(){ command nodepower "$@"; set +f;} +alias noderun='set -f;noderun';noderun(){ command noderun "$@"; set +f;} +alias nodesensors='set -f;nodesensors';nodesensors(){ command nodesensors "$@"; set +f;} +alias nodesetboot='set -f;nodesetboot';nodesetboot(){ command nodesetboot "$@"; set +f;} +alias nodeshell='set -f;nodeshell';nodeshell(){ command nodeshell "$@"; set +f;} From 9078fb01c49244c52319351d2ef9f34a3a2ebf16 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 23 Jun 2017 08:45:34 -0400 Subject: [PATCH 09/31] Fix missing data from 1.5.0 merge --- confluent_client/makeman | 0 confluent_server/confluent/discovery/__init__.py | 0 confluent_server/confluent/discovery/handlers/__init__.py | 0 confluent_server/confluent/discovery/protocols/__init__.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 confluent_client/makeman create mode 100644 confluent_server/confluent/discovery/__init__.py create mode 100644 confluent_server/confluent/discovery/handlers/__init__.py create mode 100644 confluent_server/confluent/discovery/protocols/__init__.py diff --git a/confluent_client/makeman b/confluent_client/makeman old mode 100644 new mode 100755 diff --git a/confluent_server/confluent/discovery/__init__.py b/confluent_server/confluent/discovery/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/confluent_server/confluent/discovery/handlers/__init__.py b/confluent_server/confluent/discovery/handlers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/confluent_server/confluent/discovery/protocols/__init__.py b/confluent_server/confluent/discovery/protocols/__init__.py new file mode 100644 index 00000000..e69de29b From 8fab1ce9ebb5b8f6bbf56e7fbb776c9324949c62 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 26 Jun 2017 08:46:22 -0400 Subject: [PATCH 10/31] Fix permissions on buildindex.sh --- confluent_client/doc/man/buildindex.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 confluent_client/doc/man/buildindex.sh diff --git a/confluent_client/doc/man/buildindex.sh b/confluent_client/doc/man/buildindex.sh old mode 100644 new mode 100755 From 8126ec3791b9879111880a4f9e6874ccf492f81c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sat, 10 Jun 2017 17:29:48 -0400 Subject: [PATCH 11/31] Implement 'memory' console.logging scheme Provide way for VT to be maintained without log to file. --- confluent_server/confluent/consoleserver.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index defa6089..bcb39dd7 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -208,11 +208,12 @@ class ConsoleHandler(object): self._isondemand = False elif 'console.logging' not in attrvalue[self.node]: self._isondemand = False - elif (attrvalue[self.node]['console.logging']['value'] not in ( - 'full', '')): - self._isondemand = True - elif (attrvalue[self.node]['console.logging']['value']) == 'none': - self._dologging = False + else: + if (attrvalue[self.node]['console.logging']['value'] not in ( + 'full', '', 'buffer')): + self._isondemand = True + if (attrvalue[self.node]['console.logging']['value']) in ('none', 'memory'): + self._dologging = False def get_buffer_age(self): """Return age of buffered data @@ -247,7 +248,7 @@ class ConsoleHandler(object): return else: self._ondemand() - if logvalue == 'none': + if logvalue in ('none', 'memory'): self._dologging = False if not self._isondemand or self.livesessions: eventlet.spawn(self._connect) From ff6e4d7ab6adc04e9e2934f01415e29584f54045 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 26 Jun 2017 09:23:41 -0400 Subject: [PATCH 12/31] Fix up slp.py for windows compatibility python in Windows does not have a required constant in socket, hard bake the value if not defined in the module. --- .../confluent/discovery/protocols/slp.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index f87ce3e8..c037a815 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -38,6 +38,12 @@ srvreqfooter = b'\x00\x07DEFAULT\x00\x00\x00\x00' # which is defined in RFC 3059, used to indicate support for that capability attrlistext = b'\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00' +try: + IPPROTO_IPV6 = socket.IPPROTO_IPV6 +except AttributeError: + IPPROTO_IPV6 = 41 # Assume Windows value if socket is missing it + + def _parse_slp_header(packet): packet = bytearray(packet) @@ -197,7 +203,7 @@ def _find_srvtype(net, net4, srvtype, addresses, xid): v6addrs.append(('ff02::1:' + v6hash, 427, 0, 0)) for idx in util.list_interface_indexes(): # IPv6 multicast is by index, so lead with that - net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, idx) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, idx) for sa in v6addrs: try: net.sendto(data, sa) @@ -378,14 +384,14 @@ def snoop(handler): """ active_scan(handler) net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) - net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) slpg = socket.inet_pton(socket.AF_INET6, 'ff01::123') slpg2 = socket.inet_pton(socket.AF_INET6, 'ff02::123') for i6idx in util.list_interface_indexes(): mreq = slpg + struct.pack('=I', i6idx) - net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) mreq = slpg2 + struct.pack('=I', i6idx) - net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) net.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) @@ -483,7 +489,7 @@ def scan(srvtypes=_slp_services, addresses=None): # must make the best of it # Some platforms/config default to IPV6ONLY, we are doing IPv4 # too, so force it - net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + #net.setsockopt(IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) # we are going to do broadcast, so allow that... initxid = random.randint(0, 32768) xididx = 0 From 86ed339b48e32e0c48c41e7a63e9d1a06cddc3b8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 9 Jun 2017 12:24:35 -0400 Subject: [PATCH 13/31] Add localonly mode to SLP Sometimes in a likely mismatched IP situation, some SLP things will manage to reply and slow down. For now in the case of mismatched IPv4 being likely, provide a mode fixated on link local. --- confluent_server/confluent/discovery/protocols/slp.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index c037a815..65111011 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -28,6 +28,8 @@ _slp_services = set([ 'service:management-hardware.IBM:integrated-management-module2', 'service:lenovo-smm', 'service:management-hardware.Lenovo:lenovo-xclarity-controller', + 'service:management-hardware.IBM:chassis-management-module', + 'service:management-hardware.Lenovo:chassis-management-module', ]) # SLP has a lot of ambition that was unfulfilled in practice. @@ -468,7 +470,7 @@ def active_scan(handler): handler(scanned) -def scan(srvtypes=_slp_services, addresses=None): +def scan(srvtypes=_slp_services, addresses=None, localonly=False): """Find targets providing matching requested srvtypes This is a generator that will iterate over respondants to the SrvType @@ -508,6 +510,12 @@ def scan(srvtypes=_slp_services, addresses=None): _grab_rsps((net, net4), rsps, 1, xidmap) # now to analyze and flesh out the responses for id in rsps: + if localonly: + for addr in rsps[id]['addresses']: + if 'fe80' in addr[0]: + break + else: + continue _add_attributes(rsps[id]) del rsps[id]['payload'] del rsps[id]['function'] From a3c06a00bf2a43fbda93be6999d578a8534b52c8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 27 Jun 2017 14:04:26 -0400 Subject: [PATCH 14/31] Fix confetty behavior when TERM not set Particularly in non-interactive ssh, TERM is not set. Assume empty string if TERM is not set. --- confluent_client/bin/confetty | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index cccaa45c..ef392d35 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -143,7 +143,7 @@ def updatestatus(stateinfo={}): info.append(time.strftime('%H:%M', time.localtime(showtime))) if info: status += ' [' + ','.join(info) + ']' - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;console: %s\x07' % status) sys.stdout.flush() @@ -175,7 +175,7 @@ def recurse_format(datum, levels=0): def prompt(): - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;confetty: %s\x07' % target) try: return raw_input(target + ' -> ') @@ -337,7 +337,7 @@ def do_command(command, server): return argv[0] = argv[0].lower() if argv[0] == 'exit': - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;\x07') sys.exit(0) elif argv[0] in ('help', '?'): @@ -583,7 +583,7 @@ def quitconfetty(code=0, fullexit=False, fixterm=True): # Request default color scheme, to undo potential weirdness of terminal sys.stdout.write('\x1b[m') if fullexit: - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;\x07') sys.exit(code) else: From a894624403334e803bfc0dd1c497d029c285bbbf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 27 Jun 2017 14:48:33 -0400 Subject: [PATCH 15/31] Fix python 2.6 compatibility in log python 2.6 requires a numeric argument to {} --- confluent_server/confluent/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/log.py b/confluent_server/confluent/log.py index aa1f260b..bf468827 100644 --- a/confluent_server/confluent/log.py +++ b/confluent_server/confluent/log.py @@ -449,11 +449,11 @@ class TimedAndSizeRotatingFileHandler(BaseRotatingHandler): odtfn = dtfn append=1 while os.path.exists(dbfn): - dbfn = odbfn + '.{}'.format(append) + dbfn = odbfn + '.{0}'.format(append) append += 1 append=1 while os.path.exists(dtfn): - dtfn = odtfn + '.{}'.format(append) + dtfn = odtfn + '.{0}'.format(append) append += 1 if os.path.exists(self.binpath): os.rename(self.binpath, dbfn) From ea9caa470b6d8b14597c2e06e9ca2c4f895cfa19 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 27 Jun 2017 15:06:59 -0400 Subject: [PATCH 16/31] Fix unexpected error on retrieving nodegroup attributes Detect and more specifically report error message when an unknown group is used for nodegroup commands. --- confluent_server/confluent/core.py | 3 +- confluent_server/confluent/exceptions.py | 34 +++++++++++-------- .../plugins/configuration/attributes.py | 9 ++++- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index c2d647c6..e5003ef9 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -363,7 +363,8 @@ def delete_node_collection(collectionpath, configmanager, isnoderange): def enumerate_nodegroup_collection(collectionpath, configmanager): nodegroup = collectionpath[1] if not configmanager.is_nodegroup(nodegroup): - raise exc.NotFoundException("Invalid element requested") + raise exc.NotFoundException( + 'Invalid nodegroup: {0} not found'.format(nodegroup)) del collectionpath[0:2] collection = nested_lookup(nodegroupresources, collectionpath) return iterate_resources(collection) diff --git a/confluent_server/confluent/exceptions.py b/confluent_server/confluent/exceptions.py index ca618b0e..47f9efdf 100644 --- a/confluent_server/confluent/exceptions.py +++ b/confluent_server/confluent/exceptions.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,75 +21,81 @@ import json class ConfluentException(Exception): apierrorcode = 500 - apierrorstr = 'Unexpected Error' + _apierrorstr = 'Unexpected Error' def get_error_body(self): - errstr = ' - '.join((self.apierrorstr, str(self))) + errstr = ' - '.join((self._apierrorstr, str(self))) return json.dumps({'error': errstr }) + @property + def apierrorstr(self): + if str(self): + return self._apierrorstr + ' - ' + str(self) + return self._apierrorstr + class NotFoundException(ConfluentException): # Something that could be construed as a name was not found # basically, picture an http error code 404 apierrorcode = 404 - apierrorstr = 'Request path not recognized' + _apierrorstr = 'Target not found' class InvalidArgumentException(ConfluentException): # Something from the remote client wasn't correct # like http code 400 apierrorcode = 400 - apierrorstr = 'Bad Request' + _apierrorstr = 'Bad Request' class TargetEndpointUnreachable(ConfluentException): # A target system was unavailable. For example, a BMC # was unreachable. http code 504 apierrorcode = 504 - apierrorstr = 'Unreachable Target' + _apierrorstr = 'Unreachable Target' class TargetEndpointBadCredentials(ConfluentException): # target was reachable, but authentication/authorization # failed apierrorcode = 502 - apierrorstr = 'Bad Credentials' + _apierrorstr = 'Bad Credentials' class LockedCredentials(ConfluentException): # A request was performed that required a credential, but the credential # store is locked - apierrorstr = 'Credential store locked' + _apierrorstr = 'Credential store locked' class ForbiddenRequest(ConfluentException): # The client request is not allowed by authorization engine apierrorcode = 403 - apierrorstr = 'Forbidden' + _apierrorstr = 'Forbidden' class NotImplementedException(ConfluentException): # The current configuration/plugin is unable to perform # the requested task. http code 501 apierrorcode = 501 - apierrorstr = '501 - Not Implemented' + _apierrorstr = '501 - Not Implemented' class GlobalConfigError(ConfluentException): # The configuration in the global config file is not right - apierrorstr = 'Global configuration contains an error' + _apierrorstr = 'Global configuration contains an error' class TargetResourceUnavailable(ConfluentException): # This is meant for scenarios like asking to read a sensor that is # currently unavailable. This may be a persistent or transient state apierrocode = 503 - apierrorstr = 'Target Resource Unavailable' + _apierrorstr = 'Target Resource Unavailable' class PubkeyInvalid(ConfluentException): apierrorcode = 502 - apierrorstr = '502 - Invalid certificate or key on target' + _apierrorstr = '502 - Invalid certificate or key on target' def __init__(self, text, certificate, fingerprint, attribname, event): super(PubkeyInvalid, self).__init__(self, text) @@ -106,7 +112,7 @@ class PubkeyInvalid(ConfluentException): class LoggedOut(ConfluentException): apierrorcode = 401 - apierrorstr = '401 - Logged out' + _apierrorstr = '401 - Logged out' def get_error_body(self): return '{"loggedout": 1}' diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 11e00f8f..1139f3f5 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -1,4 +1,5 @@ # Copyright 2014 IBM Corporation +# Copyright 2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,7 +27,13 @@ def retrieve(nodes, element, configmanager, inputdata): def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): - grpcfg = configmanager.get_nodegroup_attributes(nodegroup) + try: + grpcfg = configmanager.get_nodegroup_attributes(nodegroup) + except KeyError: + if not configmanager.is_nodegroup(nodegroup): + raise exc.NotFoundException( + 'Invalid nodegroup: {0} not found'.format(nodegroup)) + raise if element == 'all': theattrs = set(allattributes.node).union(set(grpcfg)) theattrs.add('nodes') From 3bcf236744b8151e16a2ccf858fa31d8bc18ed17 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 28 Jun 2017 10:13:50 -0400 Subject: [PATCH 17/31] Fix serial numbers with leading spaces Sometimes the string may have leading spaces, remove those for sane presentation. --- confluent_server/confluent/discovery/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 11e25666..6a256114 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -448,7 +448,7 @@ def detected(info): else: # no nodehandler, ignore for now return try: - snum = info['attributes']['enclosure-serial-number'][0].rstrip() + snum = info['attributes']['enclosure-serial-number'][0].strip() if snum: info['serialnumber'] = snum known_serials[info['serialnumber']] = info From dc23793d32d8412dacbbca7e7f0559ad86ac9ca8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 28 Jun 2017 16:13:45 -0400 Subject: [PATCH 18/31] Reorder SLP services Make the most important one likely to be first, so that the attributes filled out later to fixate on the most relevant. --- confluent_server/confluent/discovery/protocols/slp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index 65111011..966db891 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -443,8 +443,15 @@ def snoop(handler): # ignore for now known_peers.discard(peer) continue + # we want to prioritize the very well known services + svcs = [] + for svc in q: + if svc in _slp_services: + svcs.insert(0, svc) + else: + svcs.append(svc) peerbymacaddress[mac] = { - 'services': q, + 'services': svcs, 'addresses': [peer], } newmacs.add(mac) From 5dc2d8c8be7168e4f589f16ff872ee70195ff816 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 28 Jun 2017 16:21:57 -0400 Subject: [PATCH 19/31] Prefer SLP data on probe if available Avoid attempting login on probe if at all possible. --- .../confluent/discovery/handlers/xcc.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index f12e1569..1337f33a 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -15,6 +15,8 @@ import confluent.discovery.handlers.bmc as bmchandler import pyghmi.exceptions as pygexc import pyghmi.ipmi.private.util as pygutil +import string +import struct class NodeHandler(bmchandler.NodeHandler): @@ -22,6 +24,31 @@ class NodeHandler(bmchandler.NodeHandler): def probe(self): try: + slpattrs = self.info.get('attributes', {}) + try: + ff = slpattrs.get('enclosure-form-factor', [''])[0] + except IndexError: + return + if ff != 'dense-computing': + # do not probe unless it's a dense platform + return + wronguuid = slpattrs.get('node-uuid', [''])[0] + if wronguuid: + # we need to fix the first three portions of the uuid + uuidprefix = wronguuid.split('-')[:3] + uuidprefix = struct.pack( + ' Date: Thu, 29 Jun 2017 10:17:17 -0400 Subject: [PATCH 20/31] Add a stateless configmanager mode Certain embodiments of confluent may not want to persist configuration. Enable an opt-in full-volatile config mode. --- .../confluent/config/configmanager.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index c4a14c39..74a9f949 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -90,6 +90,8 @@ _masterintegritykey = None _dirtylock = threading.RLock() _config_areas = ('nodegroups', 'nodes', 'usergroups', 'users') tracelog = None +statelessmode = False +_cfgstore = None def _mkpath(pathname): try: @@ -495,6 +497,8 @@ class ConfigManager(object): def __init__(self, tenant, decrypt=False, username=None): global _cfgstore + if _cfgstore is None: + init() self.decrypt = decrypt self.current_user = username if tenant is None: @@ -1408,6 +1412,8 @@ class ConfigManager(object): @classmethod def _bg_sync_to_file(cls): + if statelessmode: + return with cls._syncstate: if cls._syncrunning: cls._writepending = True @@ -1421,6 +1427,8 @@ class ConfigManager(object): @classmethod def _sync_to_file(cls): + if statelessmode: + return if 'dirtyglobals' in _cfgstore: with _dirtylock: dirtyglobals = copy.deepcopy(_cfgstore['dirtyglobals']) @@ -1566,11 +1574,15 @@ def dump_db_to_directory(location, password, redact=None): except OSError: pass - -try: - ConfigManager._read_from_path() -except IOError: - _cfgstore = {} +def init(stateless=False): + global _cfgstore + if stateless: + _cfgstore = {} + return + try: + ConfigManager._read_from_path() + except IOError: + _cfgstore = {} # some unit tests worth implementing: From 0745ab0fdf130322c003cb5574d58b55bd821b90 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 9 Jun 2017 18:57:30 -0400 Subject: [PATCH 21/31] Auto-encode unicode if needed If unicode comes in to be crypted and fails, be explicit about utf-8 encoding. --- confluent_server/confluent/config/configmanager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 74a9f949..87816be7 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -261,7 +261,10 @@ def crypt_value(value, neededpad = 16 - (len(value) % 16) pad = chr(neededpad) * neededpad value += pad - cryptval = crypter.encrypt(value) + try: + cryptval = crypter.encrypt(value) + except TypeError: + cryptval = crypter.encrypt(value.encode('utf-8')) hmac = HMAC.new(integritykey, cryptval, SHA256).digest() return iv, cryptval, hmac From 1116fed1e7d710aaabbb241985fa553f531dc054 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 29 Jun 2017 10:22:45 -0400 Subject: [PATCH 22/31] Minor style fix --- confluent_server/confluent/discovery/core.py | 2 +- confluent_server/confluent/discovery/protocols/slp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 6a256114..b2c6ff4f 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -847,4 +847,4 @@ def _map_unique_ids(nodes=None): if __name__ == '__main__': start_detection() while True: - eventlet.sleep(30) \ No newline at end of file + eventlet.sleep(30) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index 966db891..b84b0e27 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -533,4 +533,4 @@ def scan(srvtypes=_slp_services, addresses=None, localonly=False): if __name__ == '__main__': def testsnoop(a): print(repr(a)) - snoop(testsnoop) \ No newline at end of file + snoop(testsnoop) From fa6650a072484425d6080aee7a65932cd02a4577 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 29 Jun 2017 10:33:10 -0400 Subject: [PATCH 23/31] Non-linux fallback for list_interface_indexes() When errors suggest we are not in linux, make a best effort using netifaces. --- confluent_server/confluent/util.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 4fd60fbf..5e2d6e1e 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -37,7 +37,14 @@ def list_interface_indexes(): yield intidx except (IOError, OSError): # Probably situation is non-Linux, just do limited support for - # such platforms until other people come alonge + # such platforms until other people come along + for iface in netifaces.interfaces(): + addrinfo = netifaces.ifaddresses(iface).get(socket.AF_INET6, []) + for addr in addrinfo: + v6addr = addr.get('addr', '').partition('%')[2] + if v6addr: + yield(int(v6addr)) + break return From c2115f4df9494d51895e90fdaa1a08723c836bb1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 29 Jun 2017 13:08:23 -0400 Subject: [PATCH 24/31] Fix get/set global with the stateless mode change Since supporting stateless made the init() optional, the *_global functions would not work if called first. Correct this oversight. --- confluent_server/confluent/config/configmanager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 87816be7..4ce49aba 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -302,6 +302,8 @@ def get_global(globalname): :param globalname: The global parameter name to read """ + if _cfgstore is None: + init() try: return _cfgstore['globals'][globalname] except KeyError: @@ -318,6 +320,8 @@ def set_global(globalname, value): :param globalname: The global parameter name to store :param value: The value to set the global parameter to. """ + if _cfgstore is None: + init() with _dirtylock: if 'dirtyglobals' not in _cfgstore: _cfgstore['dirtyglobals'] = set() From 5a2e4a669befc2de1a774b1a997d6ea82d64c1c3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 29 Jun 2017 16:58:02 -0400 Subject: [PATCH 25/31] Add a library for confluent version of 'xcoll' Add a diff function with colorized intra-line diff info --- confluent_client/confluent/textgroup.py | 106 ++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 confluent_client/confluent/textgroup.py diff --git a/confluent_client/confluent/textgroup.py b/confluent_client/confluent/textgroup.py new file mode 100644 index 00000000..507571f0 --- /dev/null +++ b/confluent_client/confluent/textgroup.py @@ -0,0 +1,106 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import difflib +import sys + +try: + range = xrange +except NameError: + pass + + +def _colorize_line(orig, mask): + highlighted = False + newline = orig[0] + for i in range(1, len(orig)): + if i > len(mask) - 1: + if highlighted: + newline += '\x1b[0m' + newline += orig[i:] + break + if highlighted and mask[i] == ' ': + highlighted = False + newline += '\x1b[0m' + elif not highlighted and mask[i] != ' ': + highlighted = True + newline += '\x1b[31m' + newline += orig[i] + newline += '\x1b[0m' + return newline + + +def colordiff(first, second): + diffdata = list(difflib.ndiff(first, second)) + for i in range(len(diffdata)): + if i < len(diffdata) - 1 and diffdata[i + 1].startswith('?'): + yield _colorize_line(diffdata[i], diffdata[i + 1]) + elif diffdata[i].startswith('?'): + continue + else: + yield diffdata[i] + + +class GroupedData(object): + + def __init__(self): + self.bynode = {} + self.byoutput = {} + + def generate_byoutput(self): + self.byoutput = {} + for n in self.bynode: + output = '\n'.join(self.bynode[n]) + if output not in self.byoutput: + self.byoutput[output] = set([n]) + else: + self.byoutput[output].add(n) + + def add_line(self, node, line): + if node not in self.bynode: + self.bynode[node] = [line] + else: + self.bynode[node].append(line) + + def print_deviants(self, output=sys.stdout, skipmodal=True): + self.generate_byoutput() + modaloutput = None + ismodal = True + for outdata in reversed( + sorted(self.byoutput, key=lambda x: len(self.byoutput[x]))): + if modaloutput is None: + modaloutput = outdata + if skipmodal: + skipmodal = False + ismodal = False + continue + output.write('====================================\n') + output.write(','.join(sorted(self.byoutput[outdata]))) + output.write('\n====================================\n') + if ismodal: + ismodal = False + output.write(outdata) + else: + output.write('\n'.join(colordiff(modaloutput.split('\n'), + outdata.split('\n')))) + output.write('\n\n') + output.flush() + +if __name__ == '__main__': + groupoutput = GroupedData() + for line in sys.stdin.read().split('\n'): + if not line: + continue + groupoutput.add_line(*line.split(': ', 1)) + groupoutput.print_deviants() \ No newline at end of file From a922de2ed69ec7b6d9ee6911e15da8a260c6b32e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 18 Jul 2017 14:17:39 -0400 Subject: [PATCH 26/31] Have nodeboot continue on non-failed nodes If even one of a noderange failed to complete the set boot device step, none would progress. Change the behavior so that exit code still happens and processing does stop on timed out nodes, but go ahead and reboot those that were fine. --- confluent_client/bin/nodeboot | 12 ++++++------ confluent_client/confluent/client.py | 10 ++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/confluent_client/bin/nodeboot b/confluent_client/bin/nodeboot index ee1f4eba..6edd7bf9 100755 --- a/confluent_client/bin/nodeboot +++ b/confluent_client/bin/nodeboot @@ -61,11 +61,11 @@ if options.biosmode: else: bootmode = 'uefi' +errnodes = set([]) rc = session.simple_noderange_command(noderange, '/boot/nextdevice', bootdev, bootmode=bootmode, - persistent=options.persist) - -if rc: - sys.exit(rc) -else: - sys.exit(session.simple_noderange_command(noderange, '/power/state', 'boot')) \ No newline at end of file + persistent=options.persist, + errnodes=errnodes) +noderange = noderange + ',-(' + ','.join(errnodes) + ')' +rc |= session.simple_noderange_command(noderange, '/power/state', 'boot') +sys.exit(rc) diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index 1936ab89..f9283dd0 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -77,7 +77,7 @@ class Command(object): def add_precede_key(self, keyname): self._prevkeyname = keyname - def handle_results(self, ikey, rc, res): + def handle_results(self, ikey, rc, res, errnodes=None): if 'error' in res: sys.stderr.write('Error: {0}\n'.format(res['error'])) if 'errorcode' in res: @@ -89,6 +89,8 @@ class Command(object): res = res['databynode'] for node in res: if 'error' in res[node]: + if errnodes is not None: + errnodes.add(node) sys.stderr.write('{0}: Error: {1}\n'.format( node, res[node]['error'])) if 'errorcode' in res[node]: @@ -110,7 +112,7 @@ class Command(object): return rc def simple_noderange_command(self, noderange, resource, input=None, - key=None, **kwargs): + key=None, errnodes=None, **kwargs): try: rc = 0 if resource[0] == '/': @@ -123,12 +125,12 @@ class Command(object): if input is None: for res in self.read('/noderange/{0}/{1}'.format( noderange, resource)): - rc = self.handle_results(ikey, rc, res) + rc = self.handle_results(ikey, rc, res, errnodes) else: kwargs[ikey] = input for res in self.update('/noderange/{0}/{1}'.format( noderange, resource), kwargs): - rc = self.handle_results(ikey, rc, res) + rc = self.handle_results(ikey, rc, res, errnodes) return rc except KeyboardInterrupt: print('') From 2795dfe7b9c36d7a5a6b9348be0fce32bb915065 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 18 Jul 2017 15:37:37 -0400 Subject: [PATCH 27/31] Only mod noderange if bad nodes detected This was triggering a defect in noderange. The defect should be fixed, but in the meantime, avoid tripping over it and looking weird anyway. --- confluent_client/bin/nodeboot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_client/bin/nodeboot b/confluent_client/bin/nodeboot index 6edd7bf9..f8aa1adf 100755 --- a/confluent_client/bin/nodeboot +++ b/confluent_client/bin/nodeboot @@ -66,6 +66,7 @@ rc = session.simple_noderange_command(noderange, '/boot/nextdevice', bootdev, bootmode=bootmode, persistent=options.persist, errnodes=errnodes) -noderange = noderange + ',-(' + ','.join(errnodes) + ')' +if errnodes: + noderange = noderange + ',-(' + ','.join(errnodes) + ')' rc |= session.simple_noderange_command(noderange, '/power/state', 'boot') sys.exit(rc) From 60756d9b4127aeb2a78e871972f0f675e6e601ed Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Jul 2017 10:12:53 -0400 Subject: [PATCH 28/31] Fix handling of numeric enclosure.bay enclosure.bay is integer rather than string now. Fix the filter to use format, which is more robust in numeric versus string anyway. Also, consistently make the underlying data integer rather than sometimes string. --- confluent_server/confluent/discovery/core.py | 2 +- confluent_server/confluent/discovery/handlers/xcc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index b2c6ff4f..e8fd1fa9 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -608,7 +608,7 @@ def eval_node(cfg, handler, info, nodename, manual=False): # search for nodes fitting our description using filters # lead with the most specific to have a small second pass nl = cfg.filter_node_attributes( - 'enclosure.bay=' + info['enclosure.bay'], nl) + 'enclosure.bay={0}'.format(info['enclosure.bay']), nl) nl = list(nl) if len(nl) != 1: info['discofailure'] = 'ambigconfig' diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 1337f33a..de80b9f2 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -58,7 +58,7 @@ class NodeHandler(bmchandler.NodeHandler): '/v2/cmm/sp/7') if not bayid: return - self.info['enclosure.bay'] = bayid + self.info['enclosure.bay'] = int(bayid) smmid = ipmicmd._oem.immhandler.get_property( '/v2/ibmc/smm/chassis/uuid') if not smmid: From ba9ea1acd8f83104dca195a94f010ec226029fe7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Jul 2017 10:20:22 -0400 Subject: [PATCH 29/31] Treat empty string same as undefined If an administrator clears the cert fingerprint, they will likely set it to ''. In such a case, go down the 'no fingerprint' path rather than reject it. --- confluent_server/confluent/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 5e2d6e1e..3e41bd86 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -121,8 +121,9 @@ class TLSCertVerifier(object): fingerprint = get_fingerprint(certificate) storedprint = self.cfm.get_node_attributes(self.node, (self.fieldname,) ) - if self.fieldname not in storedprint[self.node]: # no stored value, check - # policy for next action + if (self.fieldname not in storedprint[self.node] or + storedprint[self.node][self.fieldname]['value'] == ''): + # no stored value, check policy for next action newpolicy = self.cfm.get_node_attributes(self.node, ('pubkeys.addpolicy',)) if ('pubkeys.addpolicy' in newpolicy[self.node] and From 1dd40d36a153a08ffd114afe6d81ac673da740f6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 20 Jul 2017 11:05:06 -0400 Subject: [PATCH 30/31] Breakup logentries exceeding 65k The data length of a log entry must not exceed 65k. If an attempt is made to log that much, break it up and duplicate the records. It may make sense to indicate a continuation explicitly, but for now just extend. --- confluent_server/confluent/log.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/confluent_server/confluent/log.py b/confluent_server/confluent/log.py index bf468827..575d06bf 100644 --- a/confluent_server/confluent/log.py +++ b/confluent_server/confluent/log.py @@ -540,6 +540,12 @@ class Logger(object): tstamp = entry[1] data = entry[2] evtdata = entry[3] + if len(data) > 65535: + # our max log entry is 65k, take only the first 65k and put + # rest back on as a continuation + entry[2] = data[65535:] + self.logentries.appendleft(entry) + data = data[:65535] textdate = '' if self.isconsole and ltype != 2: textdate = time.strftime( From 0bf21238aa4fb171651de758b9a60b86dc520e79 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 25 Jul 2017 12:07:18 -0400 Subject: [PATCH 31/31] Add bay number to IMM and XCC The bay number can be opportunisticly grabbed, provide that info in the discovery api. In future, should add 'by-bay' once we have enclosure data as well. --- confluent_server/confluent/discovery/core.py | 3 + .../confluent/discovery/handlers/generic.py | 6 ++ .../confluent/discovery/handlers/imm.py | 61 ++++++++++++++---- .../confluent/discovery/handlers/xcc.py | 62 ++----------------- 4 files changed, 65 insertions(+), 67 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index e8fd1fa9..4934f728 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -148,6 +148,8 @@ def send_discovery_datum(info): yield msg.KeyValueData({'ipaddrs': [x[0] for x in addresses]}) yield msg.KeyValueData({'serialnumber': info.get('serialnumber', '')}) yield msg.KeyValueData({'modelnumber': info.get('modelnumber', '')}) + if 'enclosure.bay' in info: + yield msg.KeyValueData({'bay': int(info['enclosure.bay'])}) yield msg.KeyValueData({'macs': [info.get('hwaddr', '')]}) types = [] for infotype in info.get('services', []): @@ -488,6 +490,7 @@ def detected(info): known_info[info['hwaddr']] = info cfg = cfm.ConfigManager(None) handler = handler.NodeHandler(info, cfg) + handler.scan() if handler.https_supported and not handler.https_cert: if handler.cert_fail_reason == 'unreachable': log.log( diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py index be5a2a57..d13108d4 100644 --- a/confluent_server/confluent/discovery/handlers/generic.py +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -36,6 +36,12 @@ class NodeHandler(object): self.ipaddr = targsa[0] return + def scan(self): + # Do completely passive things to enhance data. + # Probe is permitted to for example attempt a login + # scan *only* does what it can without a login attempt + return + def probe(self): # Use appropriate direct strategy to gather data such as # serial number and uuid to flesh out data as needed diff --git a/confluent_server/confluent/discovery/handlers/imm.py b/confluent_server/confluent/discovery/handlers/imm.py index 23feded7..66cc4c82 100644 --- a/confluent_server/confluent/discovery/handlers/imm.py +++ b/confluent_server/confluent/discovery/handlers/imm.py @@ -15,13 +15,50 @@ import confluent.discovery.handlers.bmc as bmchandler import pyghmi.exceptions as pygexc import pyghmi.ipmi.private.util as pygutil - +import string +import struct class NodeHandler(bmchandler.NodeHandler): devname = 'IMM' - def probe(self): + def scan(self): + slpattrs = self.info.get('attributes', {}) + self.isdense = False try: + ff = slpattrs.get('enclosure-form-factor', [''])[0] + except IndexError: + return + if ff not in ('dense-computing', 'BC2'): + # do not probe unless it's a dense platform + return + self.isdense = True + wronguuid = slpattrs.get('node-uuid', [''])[0] + if wronguuid: + # we need to fix the first three portions of the uuid + uuidprefix = wronguuid.split('-')[:3] + uuidprefix = struct.pack( + '