diff --git a/confluent_client/MANIFEST.in b/confluent_client/MANIFEST.in index 2a8d2b80..a0e989c3 100644 --- a/confluent_client/MANIFEST.in +++ b/confluent_client/MANIFEST.in @@ -1 +1,2 @@ -include confluent_env.sh \ No newline at end of file +include confluent_env.sh +include confluent_env.csh diff --git a/confluent_client/bin/confetty b/confluent_client/bin/confetty index 89296ff6..ef392d35 100755 --- a/confluent_client/bin/confetty +++ b/confluent_client/bin/confetty @@ -47,6 +47,7 @@ import optparse import os import select import shlex +import signal import socket import sys import time @@ -56,7 +57,10 @@ try: import tty except ImportError: pass - +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass exitcode = 0 consoleonly = False consolename = "" @@ -84,6 +88,32 @@ netserver = None laststate = {} +def print_help(): + print("confetty provides a filesystem like interface to confluent. " + "Navigation is done using the same commands as would be used in a " + "filesystem. Tab completion is supported to aid in navigation," + "as is up arrow to recall previous commands and control-r to search" + "previous command history, similar to using bash\n\n" + "The supported commands are:\n" + "cd [location] - Set the current command context, similar to a " + "working directory.\n" + "show [resource] - Present the information about the specified " + "resource, or current context if omitted.\n" + "create [resource] attributename=value attributename=value - Create " + "a new instance of a resource.\n" + "remove [resource] - Remove a resource from a list\n" + "set [resource] attributename=value attributename=value - Change " + "the specified attributes value for the given resource name\n" + "unset [resource] attributename - Clear any value for the given " + "attribute names on a resource.\n" + "start [resource] - When used on a text session resource, it " + "enters remote terminal mode. In this mode, use 'ctrl-e, c, ?' for " + "help" + ) + #TODO(jjohnson2): lookup context help for 'target' variable, perhaps + #common with the api document + + def updatestatus(stateinfo={}): status = consolename info = [] @@ -106,14 +136,14 @@ def updatestatus(stateinfo={}): if 'showtime' in laststate: showtime = laststate['showtime'] age = time.time() - laststate['showtime'] - if age > 86400: # older than one day + if age > 86400: # older than one day # disambiguate by putting date in and time info.append(time.strftime('%m-%dT%H:%M', time.localtime(showtime))) else: info.append(time.strftime('%H:%M', time.localtime(showtime))) if info: status += ' [' + ','.join(info) + ']' - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;console: %s\x07' % status) sys.stdout.flush() @@ -145,7 +175,7 @@ def recurse_format(datum, levels=0): def prompt(): - if os.environ['TERM'] not in ('linux'): + if os.environ.get('TERM', '') not in ('linux'): sys.stdout.write('\x1b]0;confetty: %s\x07' % target) try: return raw_input(target + ' -> ') @@ -169,6 +199,7 @@ valid_commands = [ 'remove', 'rm', 'delete', + 'help', ] candidates = None @@ -238,7 +269,7 @@ def parse_command(command): try: args = shlex.split(command, posix=True) except ValueError as ve: - print('Error: ' + ve.message) + print('Error: ' + str(ve)) return [] return args @@ -306,7 +337,11 @@ def do_command(command, server): return argv[0] = argv[0].lower() if argv[0] == 'exit': + if os.environ.get('TERM', '') not in ('linux'): + sys.stdout.write('\x1b]0;\x07') sys.exit(0) + elif argv[0] in ('help', '?'): + return print_help() elif argv[0] == 'cd': otarget = target if len(argv) > 1: @@ -348,6 +383,21 @@ def do_command(command, server): elif argv[0] in ('cat', 'show', 'ls', 'dir'): if len(argv) > 1: targpath = fullpath_target(argv[1]) + if argv[0] in ('ls', 'dir'): + if targpath[-1] != '/': + # could still be a directory, fetch the parent.. + childname = targpath[targpath.rindex('/') + 1:] + parentpath = targpath[:targpath.rindex('/') + 1] + if parentpath != '/noderange/': + # if it were /noderange/, then it's a directory + # even though parent won't tell us that + for res in session.read(parentpath, server): + try: + if res['item']['href'] == childname: + print(childname) + return + except KeyError: + pass else: targpath = target for res in session.read(targpath): @@ -418,6 +468,10 @@ def createresource(args): def makecall(callout, args): global exitcode for response in callout(*args): + if 'deleted' in response: + print("Deleted: " + response['deleted']) + if 'created' in response: + print("Created: " + response['created']) if 'error' in response: if 'errorcode' in response: exitcode = response['errorcode'] @@ -526,7 +580,11 @@ def quitconfetty(code=0, fullexit=False, fixterm=True): fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, currfl ^ os.O_NONBLOCK) if oldtcattr is not None: termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, oldtcattr) + # Request default color scheme, to undo potential weirdness of terminal + sys.stdout.write('\x1b[m') if fullexit: + if os.environ.get('TERM', '') not in ('linux'): + sys.stdout.write('\x1b]0;\x07') sys.exit(code) else: tlvdata.send(session.connection, {'operation': 'stop', @@ -651,11 +709,11 @@ def conserver_command(filehandle, localcommand): else: print("Unknown power state.]\r") - check_power_state() + #check_power_state() elif localcommand[0] == '?': print("help]\r") - print(". disconnect\r") + print(". exit console\r") print("b break\r") print("o reopen\r") print("po power off\r") @@ -744,6 +802,8 @@ if sys.stdout.isatty(): readline.parse_and_bind("tab: complete") readline.parse_and_bind("set bell-style none") + dl = readline.get_completer_delims().replace('-', '') + readline.set_completer_delims(dl) readline.set_completer(completer) doexit = False @@ -767,10 +827,11 @@ def check_power_state(): global powerstate, powertime for rsp in session.read('/nodes/' + consolename + '/power/state'): if type(rsp) == dict and 'state' in rsp: - powerstate = rsp['state']['value'] + newpowerstate = rsp['state']['value'] powertime = time.time() - if powerstate == 'off': - sys.stdout.write("\r\n[powered off]\r\n") + if newpowerstate != powerstate and newpowerstate == 'off': + sys.stdout.write("\x1b[2J\x1b[;H[powered off]\r\n") + powerstate = newpowerstate elif type(rsp) == dict and '_requestdone' in rsp: break elif type(rsp) == dict: @@ -799,7 +860,12 @@ while inconsole or not doexit: updatestatus(data) continue if data is not None: - sys.stdout.write(data) + try: + sys.stdout.write(data) + except IOError: # Some times circumstances are bad + # resort to byte at a time... + for d in data: + sys.stdout.write(d) now = time.time() if ('showtime' not in laststate or (now // 60) != laststate['showtime'] // 60): @@ -829,13 +895,15 @@ while inconsole or not doexit: sys.stdout.write("\r\n[remote disconnected]\r\n") break else: - myinput = fh.read() - myinput = check_escape_seq(myinput, fh) - if myinput: - tlvdata.send(session.connection, myinput) - if powerstate is None or powertime < time.time() - 60: # Check powerstate every 60 seconds - check_power_state() - + try: + myinput = fh.read() + myinput = check_escape_seq(myinput, fh) + if myinput: + tlvdata.send(session.connection, myinput) + except IOError: + pass + #if powerstate is None or powertime < time.time() - 60: # Check powerstate every 60 seconds + # check_power_state() else: currcommand = prompt() try: diff --git a/confluent_client/bin/nodeattrib b/confluent_client/bin/nodeattrib index c36942d2..5db662a1 100755 --- a/confluent_client/bin/nodeattrib +++ b/confluent_client/bin/nodeattrib @@ -19,8 +19,14 @@ __author__ = 'alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass + path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -29,13 +35,14 @@ if path.startswith('/opt'): import confluent.client as client argparser = optparse.OptionParser( - usage='''\n %prog [options] noderange [list of attributes] \ - \n %prog [options] noderange attribute1=value1,attribute2=value,... + usage='''\n %prog [-b] noderange [list of attributes] \ + \n %prog -c noderange \ + \n %prog noderange attribute1=value1 attribute2=value,... \n ''') argparser.add_option('-b', '--blame', action='store_true', help='Show information about how attributes inherited') argparser.add_option('-c', '--clear', action='store_true', - help='Clear variables') + help='Clear attributes') (options, args) = argparser.parse_args() @@ -46,7 +53,8 @@ try: noderange = args[0] nodelist = '/noderange/{0}/nodes/'.format(noderange) except IndexError: - nodelist = '/nodes/' + argparser.print_help() + sys.exit(1) session = client.Command() exitcode = 0 @@ -54,7 +62,7 @@ exitcode = 0 nodetype="noderange" if len(args) > 1: - if "=" in args[1]: + if "=" in args[1] or options.clear: exitcode=client.updateattrib(session,args,nodetype, noderange, options) try: # setting user output to what the user inputs @@ -65,6 +73,7 @@ if len(args) > 1: showtype = 'current' requestargs=args[2:] else: + showtype = 'all' requestargs=args[1:] except: pass diff --git a/confluent_client/bin/nodeboot b/confluent_client/bin/nodeboot index 9aa940a6..f8aa1adf 100755 --- a/confluent_client/bin/nodeboot +++ b/confluent_client/bin/nodeboot @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -56,11 +61,12 @@ if options.biosmode: else: bootmode = 'uefi' +errnodes = set([]) rc = session.simple_noderange_command(noderange, '/boot/nextdevice', bootdev, bootmode=bootmode, - persistent=options.persist) - -if rc: - sys.exit(rc) -else: - sys.exit(session.simple_noderange_command(noderange, '/power/state', 'boot')) \ No newline at end of file + persistent=options.persist, + errnodes=errnodes) +if errnodes: + noderange = noderange + ',-(' + ','.join(errnodes) + ')' +rc |= session.simple_noderange_command(noderange, '/power/state', 'boot') +sys.exit(rc) diff --git a/confluent_client/bin/nodeeventlog b/confluent_client/bin/nodeeventlog index fc7a68a1..b467ad28 100755 --- a/confluent_client/bin/nodeeventlog +++ b/confluent_client/bin/nodeeventlog @@ -18,8 +18,13 @@ from datetime import datetime as dt import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -28,7 +33,7 @@ if path.startswith('/opt'): import confluent.client as client argparser = optparse.OptionParser( - usage="Usage: %prog [options] noderange (clear)") + usage="Usage: %prog [options] noderange [clear]") (options, args) = argparser.parse_args() try: noderange = args[0] diff --git a/confluent_client/bin/nodefirmware b/confluent_client/bin/nodefirmware index 1b51d6ec..5c14d6b4 100755 --- a/confluent_client/bin/nodefirmware +++ b/confluent_client/bin/nodefirmware @@ -17,7 +17,13 @@ import optparse import os +import signal import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodegroupattrib b/confluent_client/bin/nodegroupattrib index f0fa7051..a7915f13 100755 --- a/confluent_client/bin/nodegroupattrib +++ b/confluent_client/bin/nodegroupattrib @@ -19,8 +19,13 @@ __author__ = 'alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -57,6 +62,7 @@ exitcode = 0 #Sets attributes if len(args) > 1: + showtype = 'all' exitcode=client.updateattrib(session,args,nodetype, nodegroups, options) try: # setting user output to what the user inputs diff --git a/confluent_client/bin/nodehealth b/confluent_client/bin/nodehealth index 294a73b4..31dc21f5 100755 --- a/confluent_client/bin/nodehealth +++ b/confluent_client/bin/nodehealth @@ -18,8 +18,13 @@ import codecs import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodeidentify b/confluent_client/bin/nodeidentify index 2cbf573f..0618b12e 100755 --- a/confluent_client/bin/nodeidentify +++ b/confluent_client/bin/nodeidentify @@ -17,7 +17,12 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) diff --git a/confluent_client/bin/nodeinventory b/confluent_client/bin/nodeinventory index 83a44bd7..eeaba71e 100755 --- a/confluent_client/bin/nodeinventory +++ b/confluent_client/bin/nodeinventory @@ -17,7 +17,13 @@ import optparse import os +import signal import sys + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -77,6 +83,8 @@ try: except IndexError: argparser.print_help() sys.exit(1) +if len(args) > 1 and args[1] == 'firm': + os.execlp('nodefirmware', 'nodefirmware', noderange) try: session = client.Command() for res in session.read('/noderange/{0}/inventory/hardware/all/all'.format( diff --git a/confluent_client/bin/nodelist b/confluent_client/bin/nodelist index ef3816e6..9892fa79 100755 --- a/confluent_client/bin/nodelist +++ b/confluent_client/bin/nodelist @@ -19,8 +19,13 @@ __author__ = 'jjohnson2,alin37' import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,7 +35,8 @@ import confluent.client as client def main(): argparser = optparse.OptionParser( - usage="Usage: %prog [options] noderange [list of attributes]") + usage="Usage: %prog noderange\n" + " or: %prog [options] noderange ...") argparser.add_option('-b', '--blame', action='store_true', help='Show information about how attributes inherited') (options, args) = argparser.parse_args() @@ -59,4 +65,4 @@ def main(): sys.exit(exitcode) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/confluent_client/bin/nodepower b/confluent_client/bin/nodepower index 5dd0b007..c1169ea0 100755 --- a/confluent_client/bin/nodepower +++ b/confluent_client/bin/nodepower @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -43,6 +48,9 @@ if len(sys.argv) > 2: elif not sys.argv[2] in ('stat', 'state', 'status'): setstate = sys.argv[2] +if setstate not in (None, 'on', 'off', 'shutdown', 'boot', 'reset'): + argparser.print_help() + sys.exit(1) session = client.Command() exitcode = 0 session.add_precede_key('oldstate') diff --git a/confluent_client/bin/noderun b/confluent_client/bin/noderun index dbebd4b2..81b036cd 100755 --- a/confluent_client/bin/noderun +++ b/confluent_client/bin/noderun @@ -15,13 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import deque import optparse import os import select import shlex +import signal import subprocess import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,57 +36,75 @@ if path.startswith('/opt'): import confluent.client as client -argparser = optparse.OptionParser( - usage="Usage: %prog node commandexpression", - epilog="Expressions are the same as in attributes, e.g. " - "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") -argparser.disable_interspersed_args() -(options, args) = argparser.parse_args() -if len(args) < 2: - argparser.print_help() - sys.exit(1) -c = client.Command() -cmdstr = " ".join(args[1:]) +def run(): + concurrentprocs = 168 + # among other things, FD_SETSIZE limits. Besides, spawning too many + # processes can be unkind for the unaware on memory pressure and such... + argparser = optparse.OptionParser( + usage="Usage: %prog node commandexpression", + epilog="Expressions are the same as in attributes, e.g. " + "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") + argparser.disable_interspersed_args() + (options, args) = argparser.parse_args() + if len(args) < 2: + argparser.print_help() + sys.exit(1) + c = client.Command() + cmdstr = " ".join(args[1:]) -nodeforpopen = {} -popens = [] -for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), - {'expression': cmdstr}): - ex = exp['databynode'] - for node in ex: - cmd = ex[node]['value'].encode('utf-8') - cmdv = shlex.split(cmd) - nopen = subprocess.Popen( - cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - popens.append(nopen) - nodeforpopen[nopen] = node + currprocs = 0 + all = set([]) + pipedesc = {} + pendingexecs = deque() -all = set([]) -pipedesc = {} -exitcode = 0 -for pop in popens: - node = nodeforpopen[pop] - pipedesc[pop.stdout] = { 'node': node, 'popen': pop, 'type': 'stdout'} - pipedesc[pop.stderr] = {'node': node, 'popen': pop, 'type': 'stderr'} - all.add(pop.stdout) - all.add(pop.stderr) -rdy, _, _ = select.select(all, [], [], 10) -while all and rdy: - for r in rdy: - data = r.readline() - desc = pipedesc[r] - if data: - node = desc['node'] - if desc['type'] == 'stdout': - sys.stdout.write('{0}: {1}'.format(node,data)) + for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), + {'expression': cmdstr}): + ex = exp['databynode'] + for node in ex: + cmd = ex[node]['value'].encode('utf-8') + cmdv = shlex.split(cmd) + if currprocs < concurrentprocs: + currprocs += 1 + run_cmdv(node, cmdv, all, pipedesc) else: - sys.stderr.write('{0}: {1}'.format(node, data)) - else: - pop = desc['popen'] - ret = pop.poll() - if ret is not None: - exitcode = exitcode | ret - all.discard(r) - if all: - rdy, _, _ = select.select(all, [], [], 10) -sys.exit(exitcode) \ No newline at end of file + pendingexecs.append((node, cmdv)) + + exitcode = 0 + rdy, _, _ = select.select(all, [], [], 10) + while all: + for r in rdy: + data = r.readline() + desc = pipedesc[r] + if data: + node = desc['node'] + if desc['type'] == 'stdout': + sys.stdout.write('{0}: {1}'.format(node,data)) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) + else: + pop = desc['popen'] + ret = pop.poll() + if ret is not None: + exitcode = exitcode | ret + all.discard(r) + if desc['type'] == 'stdout' and pendingexecs: + node, cmdv = pendingexecs.popleft() + run_cmdv(node, cmdv, all, pipedesc) + if all: + rdy, _, _ = select.select(all, [], [], 10) + sys.exit(exitcode) + + +def run_cmdv(node, cmdv, all, pipedesc): + nopen = subprocess.Popen( + cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, + 'type': 'stdout'} + pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, + 'type': 'stderr'} + all.add(nopen.stdout) + all.add(nopen.stderr) + + +if __name__ == '__main__': + run() \ No newline at end of file diff --git a/confluent_client/bin/nodesensors b/confluent_client/bin/nodesensors index 3c9827a3..3b7c9618 100755 --- a/confluent_client/bin/nodesensors +++ b/confluent_client/bin/nodesensors @@ -19,9 +19,14 @@ import csv import datetime import optparse import os +import signal import sys import time +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodesetboot b/confluent_client/bin/nodesetboot index 65f8aaeb..524d30d3 100755 --- a/confluent_client/bin/nodesetboot +++ b/confluent_client/bin/nodesetboot @@ -17,8 +17,13 @@ import optparse import os +import signal import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): diff --git a/confluent_client/bin/nodeshell b/confluent_client/bin/nodeshell index 74a5cf15..30f84242 100755 --- a/confluent_client/bin/nodeshell +++ b/confluent_client/bin/nodeshell @@ -15,13 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import deque import optparse import os import select import shlex +import signal import subprocess import sys +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): @@ -30,57 +36,75 @@ if path.startswith('/opt'): import confluent.client as client -argparser = optparse.OptionParser( - usage="Usage: %prog node commandexpression", - epilog="Expressions are the same as in attributes, e.g. " - "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") -argparser.disable_interspersed_args() -(options, args) = argparser.parse_args() -if len(args) < 2: - argparser.print_help() - sys.exit(1) -c = client.Command() -cmdstr = " ".join(args[1:]) +def run(): + concurrentprocs = 168 + # among other things, FD_SETSIZE limits. Besides, spawning too many + # processes can be unkind for the unaware on memory pressure and such... + argparser = optparse.OptionParser( + usage="Usage: %prog node commandexpression", + epilog="Expressions are the same as in attributes, e.g. " + "'ipmitool -H {hardwaremanagement.manager}' will be expanded.") + argparser.disable_interspersed_args() + (options, args) = argparser.parse_args() + if len(args) < 2: + argparser.print_help() + sys.exit(1) + c = client.Command() + cmdstr = " ".join(args[1:]) -nodeforpopen = {} -popens = [] -for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), - {'expression': cmdstr}): - ex = exp['databynode'] - for node in ex: - cmd = ex[node]['value'].encode('utf-8') - cmdv = ['ssh', node] + shlex.split(cmd) - nopen = subprocess.Popen( - cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - popens.append(nopen) - nodeforpopen[nopen] = node + currprocs = 0 + all = set([]) + pipedesc = {} + pendingexecs = deque() -all = set([]) -pipedesc = {} -exitcode = 0 -for pop in popens: - node = nodeforpopen[pop] - pipedesc[pop.stdout] = { 'node': node, 'popen': pop, 'type': 'stdout'} - pipedesc[pop.stderr] = {'node': node, 'popen': pop, 'type': 'stderr'} - all.add(pop.stdout) - all.add(pop.stderr) -rdy, _, _ = select.select(all, [], [], 10) -while all and rdy: - for r in rdy: - data = r.readline() - desc = pipedesc[r] - if data: - node = desc['node'] - if desc['type'] == 'stdout': - sys.stdout.write('{0}: {1}'.format(node,data)) + for exp in c.create('/noderange/{0}/attributes/expression'.format(args[0]), + {'expression': cmdstr}): + ex = exp['databynode'] + for node in ex: + cmd = ex[node]['value'].encode('utf-8') + cmdv = ['ssh', node] + shlex.split(cmd) + if currprocs < concurrentprocs: + currprocs += 1 + run_cmdv(node, cmdv, all, pipedesc) else: - sys.stderr.write('{0}: {1}'.format(node, data)) - else: - pop = desc['popen'] - ret = pop.poll() - if ret is not None: - exitcode = exitcode | ret - all.discard(r) - if all: - rdy, _, _ = select.select(all, [], [], 10) -sys.exit(exitcode) \ No newline at end of file + pendingexecs.append((node, cmdv)) + + exitcode = 0 + rdy, _, _ = select.select(all, [], [], 10) + while all: + for r in rdy: + data = r.readline() + desc = pipedesc[r] + if data: + node = desc['node'] + if desc['type'] == 'stdout': + sys.stdout.write('{0}: {1}'.format(node,data)) + else: + sys.stderr.write('{0}: {1}'.format(node, data)) + else: + pop = desc['popen'] + ret = pop.poll() + if ret is not None: + exitcode = exitcode | ret + all.discard(r) + if desc['type'] == 'stdout' and pendingexecs: + node, cmdv = pendingexecs.popleft() + run_cmdv(node, cmdv, all, pipedesc) + if all: + rdy, _, _ = select.select(all, [], [], 10) + sys.exit(exitcode) + + +def run_cmdv(node, cmdv, all, pipedesc): + nopen = subprocess.Popen( + cmdv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + pipedesc[nopen.stdout] = {'node': node, 'popen': nopen, + 'type': 'stdout'} + pipedesc[nopen.stderr] = {'node': node, 'popen': nopen, + 'type': 'stderr'} + all.add(nopen.stdout) + all.add(nopen.stderr) + + +if __name__ == '__main__': + run() \ No newline at end of file diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index 3d14ef90..f9283dd0 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -77,7 +77,7 @@ class Command(object): def add_precede_key(self, keyname): self._prevkeyname = keyname - def handle_results(self, ikey, rc, res): + def handle_results(self, ikey, rc, res, errnodes=None): if 'error' in res: sys.stderr.write('Error: {0}\n'.format(res['error'])) if 'errorcode' in res: @@ -89,6 +89,8 @@ class Command(object): res = res['databynode'] for node in res: if 'error' in res[node]: + if errnodes is not None: + errnodes.add(node) sys.stderr.write('{0}: Error: {1}\n'.format( node, res[node]['error'])) if 'errorcode' in res[node]: @@ -110,7 +112,7 @@ class Command(object): return rc def simple_noderange_command(self, noderange, resource, input=None, - key=None, **kwargs): + key=None, errnodes=None, **kwargs): try: rc = 0 if resource[0] == '/': @@ -123,12 +125,12 @@ class Command(object): if input is None: for res in self.read('/noderange/{0}/{1}'.format( noderange, resource)): - rc = self.handle_results(ikey, rc, res) + rc = self.handle_results(ikey, rc, res, errnodes) else: kwargs[ikey] = input for res in self.update('/noderange/{0}/{1}'.format( noderange, resource), kwargs): - rc = self.handle_results(ikey, rc, res) + rc = self.handle_results(ikey, rc, res, errnodes) return rc except KeyboardInterrupt: print('') @@ -274,7 +276,7 @@ def attrrequested(attr, attrlist, seenattributes): if candidate == attr: seenattributes.add(truename) return True - elif '.' not in candidate and attr.startswith(candidate + '.'): + elif attr.startswith(candidate + '.'): seenattributes.add(truename) return True return False @@ -309,12 +311,12 @@ def printattributes(session, requestargs, showtype, nodetype, noderange, options '{2}'.format(node, attr, currattr['broken']) elif isinstance(currattr, list) or isinstance(currattr, tuple): - attrout = '{0}: {1}: {2}'.format(node, attr, ', '.join(map(str, currattr))) + attrout = '{0}: {1}: {2}'.format(node, attr, ','.join(map(str, currattr))) elif isinstance(currattr, dict): dictout = [] for k, v in currattr.items: dictout.append("{0}={1}".format(k, v)) - attrout = '{0}: {1}: {2}'.format(node, attr, ', '.join(map(str, dictout))) + attrout = '{0}: {1}: {2}'.format(node, attr, ','.join(map(str, dictout))) else: print ("CODE ERROR" + repr(attr)) @@ -367,28 +369,17 @@ def printgroupattributes(session, requestargs, showtype, nodetype, noderange, op attrout = '{0}: {1}: *ERROR* BROKEN EXPRESSION: ' \ '{2}'.format(noderange, attr, currattr['broken']) + elif 'expression' in currattr: + attrout = '{0}: {1}: (will derive from expression {2})'.format(noderange, attr, currattr['expression']) elif isinstance(currattr, list) or isinstance(currattr, tuple): - attrout = '{0}: {1}: {2}'.format(noderange, attr, ', '.join(map(str, currattr))) + attrout = '{0}: {1}: {2}'.format(noderange, attr, ','.join(map(str, currattr))) elif isinstance(currattr, dict): dictout = [] for k, v in currattr.items: dictout.append("{0}={1}".format(k, v)) - attrout = '{0}: {1}: {2}'.format(noderange, attr, ', '.join(map(str, dictout))) + attrout = '{0}: {1}: {2}'.format(noderange, attr, ','.join(map(str, dictout))) else: print ("CODE ERROR" + repr(attr)) - - if options.blame or 'broken' in currattr: - blamedata = [] - if 'inheritedfrom' in currattr: - blamedata.append('inherited from group {0}'.format( - currattr['inheritedfrom'] - )) - if 'expression' in currattr: - blamedata.append( - 'derived from expression "{0}"'.format( - currattr['expression'])) - if blamedata: - attrout += ' (' + ', '.join(blamedata) + ')' print attrout if not exitcode: if requestargs: diff --git a/confluent_client/confluent/textgroup.py b/confluent_client/confluent/textgroup.py new file mode 100644 index 00000000..507571f0 --- /dev/null +++ b/confluent_client/confluent/textgroup.py @@ -0,0 +1,106 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import difflib +import sys + +try: + range = xrange +except NameError: + pass + + +def _colorize_line(orig, mask): + highlighted = False + newline = orig[0] + for i in range(1, len(orig)): + if i > len(mask) - 1: + if highlighted: + newline += '\x1b[0m' + newline += orig[i:] + break + if highlighted and mask[i] == ' ': + highlighted = False + newline += '\x1b[0m' + elif not highlighted and mask[i] != ' ': + highlighted = True + newline += '\x1b[31m' + newline += orig[i] + newline += '\x1b[0m' + return newline + + +def colordiff(first, second): + diffdata = list(difflib.ndiff(first, second)) + for i in range(len(diffdata)): + if i < len(diffdata) - 1 and diffdata[i + 1].startswith('?'): + yield _colorize_line(diffdata[i], diffdata[i + 1]) + elif diffdata[i].startswith('?'): + continue + else: + yield diffdata[i] + + +class GroupedData(object): + + def __init__(self): + self.bynode = {} + self.byoutput = {} + + def generate_byoutput(self): + self.byoutput = {} + for n in self.bynode: + output = '\n'.join(self.bynode[n]) + if output not in self.byoutput: + self.byoutput[output] = set([n]) + else: + self.byoutput[output].add(n) + + def add_line(self, node, line): + if node not in self.bynode: + self.bynode[node] = [line] + else: + self.bynode[node].append(line) + + def print_deviants(self, output=sys.stdout, skipmodal=True): + self.generate_byoutput() + modaloutput = None + ismodal = True + for outdata in reversed( + sorted(self.byoutput, key=lambda x: len(self.byoutput[x]))): + if modaloutput is None: + modaloutput = outdata + if skipmodal: + skipmodal = False + ismodal = False + continue + output.write('====================================\n') + output.write(','.join(sorted(self.byoutput[outdata]))) + output.write('\n====================================\n') + if ismodal: + ismodal = False + output.write(outdata) + else: + output.write('\n'.join(colordiff(modaloutput.split('\n'), + outdata.split('\n')))) + output.write('\n\n') + output.flush() + +if __name__ == '__main__': + groupoutput = GroupedData() + for line in sys.stdin.read().split('\n'): + if not line: + continue + groupoutput.add_line(*line.split(': ', 1)) + groupoutput.print_deviants() \ No newline at end of file diff --git a/confluent_client/confluent_env.csh b/confluent_client/confluent_env.csh new file mode 100644 index 00000000..0ace4e8d --- /dev/null +++ b/confluent_client/confluent_env.csh @@ -0,0 +1 @@ +setenv PATH /opt/confluent/bin:$PATH diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 01eededd..b8cfddc8 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -1,2 +1,20 @@ PATH=/opt/confluent/bin:$PATH export PATH +MANPATH=/opt/confluent/share/man:$MANPATH +export MANPATH +alias confetty='set -f;confetty';confetty(){ command confetty "$@"; set +f;} +alias nodeattrib='set -f;nodeattrib';nodeattrib(){ command nodeattrib "$@"; set +f;} +alias nodeboot='set -f;nodeboot';nodeboot(){ command nodeboot "$@"; set +f;} +alias nodeconsole='set -f;nodeconsole';nodeconsole(){ command nodeconsole "$@"; set +f;} +alias nodeeventlog='set -f;nodeeventlog';nodeeventlog(){ command nodeeventlog "$@"; set +f;} +alias nodefirmware='set -f;nodefirmware';nodefirmware(){ command nodefirmware "$@"; set +f;} +alias nodegroupattrib='set -f;nodegroupattrib';nodegroupattrib(){ command nodegroupattrib "$@"; set +f;} +alias nodehealth='set -f;nodehealth';nodehealth(){ command nodehealth "$@"; set +f;} +alias nodeidentify='set -f;nodeidentify';nodeidentify(){ command nodeidentify "$@"; set +f;} +alias nodeinventory='set -f;nodeinventory';nodeinventory(){ command nodeinventory "$@"; set +f;} +alias nodelist='set -f;nodelist';nodelist(){ command nodelist "$@"; set +f;} +alias nodepower='set -f;nodepower';nodepower(){ command nodepower "$@"; set +f;} +alias noderun='set -f;noderun';noderun(){ command noderun "$@"; set +f;} +alias nodesensors='set -f;nodesensors';nodesensors(){ command nodesensors "$@"; set +f;} +alias nodesetboot='set -f;nodesetboot';nodesetboot(){ command nodesetboot "$@"; set +f;} +alias nodeshell='set -f;nodeshell';nodeshell(){ command nodeshell "$@"; set +f;} diff --git a/confluent_client/doc/man/buildindex.sh b/confluent_client/doc/man/buildindex.sh new file mode 100755 index 00000000..7476716a --- /dev/null +++ b/confluent_client/doc/man/buildindex.sh @@ -0,0 +1 @@ +for i in *.ronn; do echo -n `head -n 1 $i|awk '{print $1}'`; echo " $i"; done > index.txt diff --git a/confluent_client/doc/man/confetty.ronn b/confluent_client/doc/man/confetty.ronn index b7c4758b..8c4d3e60 100644 --- a/confluent_client/doc/man/confetty.ronn +++ b/confluent_client/doc/man/confetty.ronn @@ -1,9 +1,10 @@ -confetty(1) --- Interactive confluent client +confetty(8) --- Interactive confluent client ================================================= ## SYNOPSIS -`confetty` +`confetty` +`confetty ` ## DESCRIPTION @@ -33,5 +34,3 @@ commands. Start a console session indicated by **ELEMENT** (e.g. /nodes/n1/console/session) * `rm` **ELEMENT** Request removal of an element. (e.g. rm events/hardware/log clears log from a node) - - diff --git a/confluent_client/doc/man/confluent.ronn b/confluent_client/doc/man/confluent.ronn new file mode 100644 index 00000000..633cef86 --- /dev/null +++ b/confluent_client/doc/man/confluent.ronn @@ -0,0 +1,14 @@ +confluent(8) -- Start the confluent server +========================================================= + +## SYNOPSIS + +`confluent` + +## DESCRIPTION + +**confluent** is the name of the server daemon. It is normally run +through the init subsystem rather than executed directly. All confluent +commands connect to confluent daemon. It provides the web interface, debug, +and unix socket connectivity. + diff --git a/confluent_client/doc/man/confluentdbutil.ronn b/confluent_client/doc/man/confluentdbutil.ronn new file mode 100644 index 00000000..e7dc9d39 --- /dev/null +++ b/confluent_client/doc/man/confluentdbutil.ronn @@ -0,0 +1,26 @@ +confluentdbutil(8) -- Backup or restore confluent database +========================================================= + +## SYNOPSIS + + +`confluentdbutil [options] ` + +## DESCRIPTION + +**confluentdbutil** is a utility to export/import the confluent attributes +to/from json files. The path is a directory that holds the json version. +In order to perform restore, the confluent service must not be running. It +is required to indicate how to treat the usernames/passwords are treated in +the json files (password protected, removed from the files, or unprotected). + +## OPTIONS + +* `-p`, `--password`: + If specified, information such as usernames and passwords will be encrypted + using the given password. +* `-r`, `--redact`: + Indicates to replace usernames and passwords with a dummy string rather + than included. +* `-u`, `--unprotected`: + The keys.json file will include the encryption keys without any protection. \ No newline at end of file diff --git a/confluent_client/doc/man/nodeattrib.ronn b/confluent_client/doc/man/nodeattrib.ronn index dc330b0c..92f2f89b 100644 --- a/confluent_client/doc/man/nodeattrib.ronn +++ b/confluent_client/doc/man/nodeattrib.ronn @@ -1,75 +1,80 @@ -nodeattrib(1) -- List or change confluent nodes attributes +nodeattrib(8) -- List or change confluent nodes attributes ========================================================= ## SYNOPSIS -`nodeattrib` `noderange` [ current | all ] -`nodeattrib` `noderange` [-b] [...] -`nodeattrib` `noderange` [ ...] -`nodeattrib` `noderange` [-c] [ ...] +`nodeattrib [-b] [...]` +`nodeattrib [ ...]` +`nodeattrib -c ...` ## DESCRIPTION -**nodeattrib** queries the confluent server to get information about nodes. In +**nodeattrib** manages the attributes of confluent nodes. In the simplest form, it simply takes the given noderange(5) and lists the matching nodes, one line at a time. If a list of node attribute names are given, the value of those are also displayed. If `-b` is specified, it will also display information on -how inherited and expression based attributes are defined. There is more -information on node attributes in nodeattributes(5) man page. +how inherited and expression based attributes are defined. Attributes can be +straightforward values, or an expression as documented in nodeattribexpressions(5). +For a full list of attributes, run `nodeattrib all` against a node. If `-c` is specified, this will set the nodeattribute to a null valid. This is different from setting the value to an empty string. +Note that `nodeattrib ` will likely not provide the expected behavior. +See nodegroupattrib(8) command on how to manage attributes on a group level. + ## OPTIONS * `-b`, `--blame`: Annotate inherited and expression based attributes to show their base value. * `-c`, `--clear`: - Clear given nodeattributes since '' is not the same as empty + Clear specified nodeattributes ## EXAMPLES * Listing matching nodes of a simple noderange: - `# nodeattrib n1-n2` - `n1`: console.method: ipmi - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: ipmi - `n2`: hardwaremanagement.manager: 172.30.3.2 + `# nodeattrib n1-n2` + `n1: console.method: ipmi` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: ipmi` + `n2: hardwaremanagement.manager: 172.30.3.2` * Getting an attribute of nodes matching a noderange: - `# nodeattrib n1,n2 hardwaremanagement.manager` - `n1: hardwaremanagement.manager: 172.30.3.1` - `n2: hardwaremanagement.manager: 172.30.3.2` + `# nodeattrib n1,n2 hardwaremanagement.manager` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: hardwaremanagement.manager: 172.30.3.2` * Getting a group of attributes while determining what group defines them: - `# nodeattrib n1,n2 hardwaremanagement --blame` - `n1: hardwaremanagement.manager: 172.30.3.1` - `n1: hardwaremanagement.method: ipmi (inherited from group everything)` - `n1: hardwaremanagement.switch: r8e1` - `n1: hardwaremanagement.switchport: 14` - `n2: hardwaremanagement.manager: 172.30.3.2` - `n2: hardwaremanagement.method: ipmi (inherited from group everything)` - `n2: hardwaremanagement.switch: r8e1` - `n2: hardwaremanagement.switchport: 2` + `# nodeattrib n1,n2 hardwaremanagement --blame` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n1: hardwaremanagement.method: ipmi (inherited from group everything)` + `n1: hardwaremanagement.switch: r8e1` + `n1: hardwaremanagement.switchport: 14` + `n2: hardwaremanagement.manager: 172.30.3.2` + `n2: hardwaremanagement.method: ipmi (inherited from group everything)` + `n2: hardwaremanagement.switch: r8e1` + `n2: hardwaremanagement.switchport: 2` - * Listing matching nodes of a simple noderange that are set: - `# nodeattrib n1-n2 current` - `n1`: console.method: ipmi - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: ipmi - `n2`: hardwaremanagement.manager: 172.30.3.2 +* Listing matching nodes of a simple noderange that are set: + `# nodeattrib n1-n2 current` + `n1: console.method: ipmi` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: ipmi` + `n2: hardwaremanagement.manager: 172.30.3.2` - * Change attribute on nodes of a simple noderange: - `# nodeattrib n1-n2 console.method=serial` - `n1`: console.method: serial - `n1`: hardwaremanagement.manager: 172.30.3.1 - `n2`: console.method: serial - `n2`: hardwaremanagement.manager: 172.30.3.2 - - * Clear attribute on nodes of a simple noderange, if you want to retain the variable set the attribute to "": - `# nodeattrib n1-n2 -c console.method` - `# nodeattrib n1-n2 console.method` - Error: console.logging not a valid attribute +* Change attribute on nodes of a simple noderange: + `# nodeattrib n1-n2 console.method=serial` + `n1: console.method: serial` + `n1: hardwaremanagement.manager: 172.30.3.1` + `n2: console.method: serial` + `n2: hardwaremanagement.manager: 172.30.3.2` +* Clear attribute on nodes of a simple noderange, if you want to retain the variable set the attribute to "": + `# nodeattrib n1-n2 -c console.method` + `# nodeattrib n1-n2 console.method` + `n1: console.method: ` + `n2: console.method: ` +## SEE ALSO +nodegroupattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodeattribexpressions.ronn b/confluent_client/doc/man/nodeattribexpressions.ronn new file mode 100644 index 00000000..61277bad --- /dev/null +++ b/confluent_client/doc/man/nodeattribexpressions.ronn @@ -0,0 +1,64 @@ +nodeattribexpressions(5) -- Confluent attribute expression syntax +================================================================= + +## DESCRIPTION + +In confluent, any attribute may either be a straightforward value, or an +expression to generate the value. + +An expression will contain some directives wrapped in `{}` characters. Within +`{}` are a number of potential substitute values and operations. + +The most common operation is to extract a number from the nodename. These +values are available as n1, n2, etc. So for example attributes for a node named +b1o2r3u4 would have {n1} as 1, {n2} as 2, {n3} as 3, and {n4} as 4. +Additionally, {n0} is special as representing the last number in a name, so in +the b1o2r3u4 example, {n0} would be 4. + +Frequently a value derives from a number in the node name, but must undergo a +transform to be useful. As an example, if we have a scheme where nodes are +numbered n1-n512, and they are arranged 1-42 in rack1, 43-84 in rack2, and so +forth, it is convenient to perform arithmetic on the extracted number. Here is +an example of codifying the above scheme, and setting the u to the remainder: + +`location.rack=rack{(n1-1)/42+1}` +`location.u={(n1-1)%42+1}` + +Note how text may be mixed into expressions, only data within {} will receive +special treatment. Here we also had to adjust by subtracting 1 and adding it +back to make the math work as expected. + +It is sometimes the case that the number must be formatted a different way, +either specifying 0 padding or converting to hexadecimal. This can be done by a +number of operators at the end to indicate formatting changes. + +`{n1:02x} - Zero pad to two decimal places, and convert to hexadecimal, as mightbe used for generating MAC addresses` +`{n1:x} - Hexadecimal without padding, as may be used in a generated IPv6 address` +`{n1:X} - Uppercase hexadecimal` +`{n1:02d} - Zero pad a normal numeric representation of the number.` + +Another common element to pull into an expression is the node name in whole: + +`hardwaremanagement.manager={nodename}-imm` + +Additionally other attributes may be pulled in: + +`hardwaremanagement.switchport={location.u}` + +Multiple expressions are permissible within a single attribute: + +`hardwaremanagement.manager={nodename}-{hardwaremanagement.method}` + +A note to developers: in general the API layer will automatically recognize a +generic set attribute to string with expression syntax and import it as an +expression. For example, submitting the following JSON: + +`{ 'location.rack': '{n1}' }` + +Will auto-detect {n1} as an expression and assign it normally. If wanting to +set that value verbatim, it can either be escaped by doubling the {} or by +explicitly declaring it as a value: + +`{ 'location.rack': '{{n1}}' }` + +`{ 'location.rack': { 'value': '{n1}' } }` diff --git a/confluent_client/doc/man/nodeboot.ronn b/confluent_client/doc/man/nodeboot.ronn new file mode 100644 index 00000000..9b067556 --- /dev/null +++ b/confluent_client/doc/man/nodeboot.ronn @@ -0,0 +1,34 @@ +nodeboot(8) -- Reboot a confluent node to a specific device +========================================================= + +## SYNOPSIS + +`nodeboot ` +`nodeboot ` [net|setup] + +## DESCRIPTION + +**nodeboot** reboots nodes in a noderange. If an additional argument is given, +it sets the node to specifically boot to that as the next boot. + +## EXAMPLES +* Booting n3 and n4 to the default boot behavior: + `# nodeboot n3-n4` + `n3: default` + `n4: default` + `n3: on->reset` + `n4: on->reset` + +* Booting n1 and n2 to setup menu: + `# nodeboot n1-n2 setup` + `n2: setup` + `n1: setup` + `n2: on->reset` + `n1: on->reset` + +* Booting n3 and n4 to network: + `# nodeboot n3-n4 net` + `n3: network` + `n4: network` + `n4: on->reset` + `n3: off->on` diff --git a/confluent_client/doc/man/nodeconsole.ronn b/confluent_client/doc/man/nodeconsole.ronn index c7e3eb4d..95ceeaa3 100644 --- a/confluent_client/doc/man/nodeconsole.ronn +++ b/confluent_client/doc/man/nodeconsole.ronn @@ -1,14 +1,14 @@ -nodeconsole(1) -- Open a console to a confluent node +nodeconsole(8) -- Open a console to a confluent node ===================================================== ## SYNOPSIS -`nodeconsole` `node` +`nodeconsole ` ## DESCRIPTION **nodeconsole** opens an interactive console session to a given node. This is the text or serial console of a system. Exiting is done by hitting `Ctrl-e`, then `c`, - then `.`. Note that console output by default is additionally logged to + then `.`. Note that console output by default is additionally logged to `/var/log/confluent/consoles/`**NODENAME**. ## ESCAPE SEQUENCE COMMANDS @@ -20,11 +20,29 @@ keystroke will be interpreted as a command. The following commands are availabl * `.`: Exit the session and return to the command prompt * `b`: + [send Break] Send a break to the remote console when possible (some console plugins may not support this) * `o`: + [reOpen] Request confluent to disconnect and reconnect to console. For example if there is suspicion that the console has gone inoperable, but would work if reconnected. +* `po`: + [Power Off] + Power off server immediately, without waiting for OS to shutdown +* `ps`: + [Power Shutdown] + Request OS shut down gracefully, and then power off +* `pb`: + [Power Boot] + Cause system to immediately boot, resetting or turning on as appropriate. + Hitting enter is required to execute the reboot rather than another pb sequence +* `pbs`: + [Power Boot Setup] + Request immediate boot ultimately landing in interactive firmware setup +* `pbn`: + [Power Boot Network] + Request immediate boot to network * `?`: Get a list of supported commands -* ``: - Abandon entering an escape sequence command +* ``: + Hit enter to skip entering a command at the escape prompt. diff --git a/confluent_client/doc/man/nodeeventlog.ronn b/confluent_client/doc/man/nodeeventlog.ronn new file mode 100644 index 00000000..69e78480 --- /dev/null +++ b/confluent_client/doc/man/nodeeventlog.ronn @@ -0,0 +1,33 @@ +nodeeventlog(8) -- Pull eventlog from confluent nodes +============================================================ + +## SYNOPSIS + +`nodeeventlog ` +`nodeeventlog [clear]` + +## DESCRIPTION + +`nodeeventlog` pulls and optionally clears the event log from the requested +noderange. + +## EXAMPLES +* Pull the event log from n2 and n3: + `# nodeeventlog n2,n3` + `n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` + `n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` + `n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` + +* Pull and clear the event log from n2 and n3: +`# nodeeventlog n2,n3 clear` +`n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` +`n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` +`n2: 05/03/2017 11:48:29 System Firmware - Progress - Starting OS boot` +`n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` +`n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` +`# nodeeventlog n2,n3` +`n2: 05/03/2017 11:48:48 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:48:52 Event Log Disabled - SEL Fullness - Log clear` diff --git a/confluent_client/doc/man/nodefirmware.ronn b/confluent_client/doc/man/nodefirmware.ronn new file mode 100644 index 00000000..4aa7bf83 --- /dev/null +++ b/confluent_client/doc/man/nodefirmware.ronn @@ -0,0 +1,30 @@ +nodefirmware(8) -- Report firmware information on confluent nodes +================================================================= + +## SYNOPSIS + +`nodefirmware ` + +## DESCRIPTION + +`nodefirmware` reports various pieces of firmware on confluent nodes. + +## EXAMPLES + +* Pull firmware from a node: +`# nodefirmware r1` +`r1: IMM: 3.70 (TCOO26H 2016-11-29T05:09:51)` +`r1: IMM Backup: 1.71 (TCOO10D 2015-04-17T00:00:00)` +`r1: IMM Trusted Image: TCOO26H` +`r1: UEFI: 2.31 (TCE128I 2016-12-13T00:00:00)` +`r1: UEFI Backup: 2.20 (TCE126O)` +`r1: FPGA: 3.2.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Bootcode: 1.38` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller MBA: 16.8.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Firmware Package: 0.0.0a` +`r1: ServeRAID M1215 MegaRAID Controller Firmware: 24.12.0-0038 (2016-10-20T00:00:00)` +`r1: ServeRAID M1215 Disk 28 MBF2600RC: SB2C` +`r1: ServeRAID M1215 Disk 29 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 0 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 1 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 2 MBF2600RC: SB2C` diff --git a/confluent_client/doc/man/nodegroupattrib.ronn b/confluent_client/doc/man/nodegroupattrib.ronn new file mode 100644 index 00000000..57ada27c --- /dev/null +++ b/confluent_client/doc/man/nodegroupattrib.ronn @@ -0,0 +1,42 @@ +nodegroupattrib(8) -- List or change confluent nodegroup attributes +=================================================================== + +## SYNOPSIS + +`nodegroupattrib [ current | all ]` +`nodegroupattrib [...]` +`nodegroupattrib [ ...]` +`nodegroupattrib [-c] [ ...]` + +## DESCRIPTION + +`nodegroupattrip` queries the confluent server to get information about nodes. +In the simplest form, it simply takes the given group and lists the attributes of that group. + +Contrasted with nodeattrib(8), settings managed by nodegroupattrib will be added +and removed from a node as it is added or removed from a group. If an attribute +is set using nodeattrib(8) against a noderange(5) that happens to be a group name, +nodeattrib(8) individually sets attributes directly on each individual node that is +currently a member of that group. Removing group membership or adding a new +node after using the nodeattrib(8) command will not have attributes change automatically. +It's easiest to see by using the `nodeattrib -b` to understand how +the attributes are set on the node versus a group to which a node belongs. + +## OPTIONS + +* `-c`, `--clear`: + Clear specified nodeattributes. + +## EXAMPLES + +* Show attributes of a group called `demogrp`: + `# nodegroupattrib demogrp` + `demogrp: hardwaremanagement.manager: (will derive from expression 10.30.{n0/255}.{n0%255})` + `demogrp: nodes: n12,n13,n10,n11,n9,n1,n2,n3,n4` + +* Set location.u to be the remainder of first number in node name when divided by 42: + `# nodegroupattrib demogrp location.u={n1%42}` + +## SEE ALSO + +nodeattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodehealth.ronn b/confluent_client/doc/man/nodehealth.ronn new file mode 100644 index 00000000..b8baa722 --- /dev/null +++ b/confluent_client/doc/man/nodehealth.ronn @@ -0,0 +1,22 @@ +nodehealth(8) -- Show health summary of confluent nodes +======================================================== + +## SYNOPSIS + +`nodehealth ` + +## DESCRIPTION + +`nodehealth` reports the current health assessment of a confluent node. It +will report either `ok`, `warning`, `critical`, or `failed`, along with +a string explaining the reason for any result other than `ok`. + +## EXAMPLES + +* Pull health summary of 5 nodes: + `# nodehealth n1-n4,r1` + `n1: critical (Mezz Exp 2 Fault:Critical)` + `n3: ok` + `n2: ok` + `r1: ok` + `n4: ok` diff --git a/confluent_client/doc/man/nodeidentify.ronn b/confluent_client/doc/man/nodeidentify.ronn new file mode 100644 index 00000000..3a9cbe69 --- /dev/null +++ b/confluent_client/doc/man/nodeidentify.ronn @@ -0,0 +1,31 @@ +nodeidentify(8) -- Control the identify LED of confluent nodes +========================================================= + +## SYNOPSIS + +`nodidentify [on|off]` + +## DESCRIPTION + +`nodeidentify` allows you to turn on or off the location LED of conflueunt nodes, +making it easier to determine the physical location of the nodes. The following +options are supported: + +* `on`: Turn on the identify LED +* `off`: Turn off the identify LED + +## EXAMPLES: + +* Turn on the identify LED on nodes n1 through n4: + `# nodeidentify n1-n4 on` + `n1: on` + `n2: on` + `n3: on` + `n4: on` + +* Turn off the identify LED on nodes n1 thorugh n4: + `# nodeidentify n1-n4 off` + `n1: off` + `n2: off` + `n4: off` + `n3: off` diff --git a/confluent_client/doc/man/nodeinventory.ronn b/confluent_client/doc/man/nodeinventory.ronn new file mode 100644 index 00000000..802b1c87 --- /dev/null +++ b/confluent_client/doc/man/nodeinventory.ronn @@ -0,0 +1,106 @@ +nodeinventory(8) -- Get hardware inventory of confluent node +=============================================================== + +## SYNOPSIS + +`nodeinventory ` + +## DESCRIPTION + +`nodeinventory` pulls information about hardware of a node. This includes +information such as adapters, serial numbers, processors, and memory modules, +as supported by the platforms hardware management implementation. + +## EXAMPLES + +* Pulling inventory of a node named r1: + `# nodeinventory r1` + `r1: System MAC Address 1: 40:f2:e9:af:45:a0` + `r1: System MAC Address 2: 40:f2:e9:af:45:a1` + `r1: System MAC Address 3: 40:f2:e9:af:45:a2` + `r1: System MAC Address 4: 40:f2:e9:af:45:a3` + `r1: System Board manufacturer: IBM` + `r1: System Product name: System x3650 M5` + `r1: System Device ID: 32` + `r1: System Revision: 9` + `r1: System Product ID: 323` + `r1: System Board model: 00KG915` + `r1: System Device Revision: 0` + `r1: System Serial Number: E2K4831` + `r1: System Board manufacture date: 2014-10-20T12:00` + `r1: System Board serial number: Y010UF4AL0B5` + `r1: System Manufacturer: IBM` + `r1: System FRU Number: 00FK639` + `r1: System Board product name: System Board` + `r1: System Model: 5462AC1` + `r1: System UUID: 1B29CE46-765E-31A3-A3B9-B5FB934F15AB` + `r1: System Hardware Version: 0x0000` + `r1: System Manufacturer ID: 20301` + `r1: System Chassis serial number: E2K4831` + `r1: System Asset Number: ` + `r1: System Chassis type: Other` + `r1: Power Supply 1 Board model: 94Y8136` + `r1: Power Supply 1 Board manufacturer: EMER` + `r1: Power Supply 1 FRU Number: 94Y8137` + `r1: Power Supply 1 Board product name: IBM Designed Device` + `r1: Power Supply 1 Board manufacture date: 2014-11-08T00:00` + `r1: Power Supply 1 Board serial number: K13814B88ED` + `r1: Power Supply 1 Revision: 49` + `r1: Power Supply 2: Not Present` + `r1: DASD Backplane 1 Board model: 00JY139` + `r1: DASD Backplane 1 Board manufacturer: WIST` + `r1: DASD Backplane 1 FRU Number: 00FJ756` + `r1: DASD Backplane 1 Board product name: IBM Designed Device` + `r1: DASD Backplane 1 Board manufacture date: 2014-08-28T00:00` + `r1: DASD Backplane 1 Board serial number: Y011UF48W02U` + `r1: DASD Backplane 1 Revision: 0` + `r1: DASD Backplane 2: Not Present` + `r1: DASD Backplane 3: Not Present` + `r1: DASD Backplane 4: Not Present` + `r1: DASD Backplane 5 Board model: 00YJ530` + `r1: DASD Backplane 5 Board manufacturer: WIST` + `r1: DASD Backplane 5 FRU Number: 00AL953` + `r1: DASD Backplane 5 Board product name: IBM Designed Device` + `r1: DASD Backplane 5 Board manufacture date: 2016-02-04T00:00` + `r1: DASD Backplane 5 Board serial number: Y010UF624024` + `r1: DASD Backplane 5 Revision: 0` + `r1: DASD Backplane 6: Not Present` + `r1: CPU 1 Hardware Version: Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz` + `r1: CPU 1 Asset Number: Unknown` + `r1: CPU 1 Manufacturer: Intel(R) Corporation` + `r1: CPU 2: Not Present` + `r1: ML2 Card: Not Present` + `r1: DIMM 1: Not Present` + `r1: DIMM 2: Not Present` + `r1: DIMM 3: Not Present` + `r1: DIMM 4: Not Present` + `r1: DIMM 5: Not Present` + `r1: DIMM 6: Not Present` + `r1: DIMM 7: Not Present` + `r1: DIMM 8: Not Present` + `r1: DIMM 9: Not Present` + `r1: DIMM 10: Not Present` + `r1: DIMM 11: Not Present` + `r1: DIMM 12: Not Present` + `r1: DIMM 13: Not Present` + `r1: DIMM 14: Not Present` + `r1: DIMM 15: Not Present` + `r1: DIMM 16: Not Present` + `r1: DIMM 17: Not Present` + `r1: DIMM 18: Not Present` + `r1: DIMM 19: Not Present` + `r1: DIMM 20: Not Present` + `r1: DIMM 21: Not Present` + `r1: DIMM 22: Not Present` + `r1: DIMM 23: Not Present` + `r1: DIMM 24: Not Present` + `r1: X8 PCI 1: Not Present` + `r1: X8 PCI 2: Not Present` + `r1: X8 PCI 6: Not Present` + `r1: X8 PCI 7: Not Present` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 1: 40:f2:e9:af:45:a0` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 2: 40:f2:e9:af:45:a1` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 3: 40:f2:e9:af:45:a2` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 4: 40:f2:e9:af:45:a3` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller PCI slot: 1b:00` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller location: Onboard` diff --git a/confluent_client/doc/man/nodelist.ronn b/confluent_client/doc/man/nodelist.ronn index b231cc54..e78c3713 100644 --- a/confluent_client/doc/man/nodelist.ronn +++ b/confluent_client/doc/man/nodelist.ronn @@ -1,10 +1,10 @@ -nodelist(1) -- List confluent nodes and their attributes +nodelist(8) -- List confluent nodes and their attributes ========================================================= ## SYNOPSIS -`nodelist` `noderange` -`nodelist` `noderange` [-b] [...] +`nodelist ` +`nodelist [-b] ...` ## DESCRIPTION @@ -45,4 +45,3 @@ information on node attributes in nodeattributes(5) man page. `n2: hardwaremanagement.method: ipmi (inherited from group everything)` `n2: hardwaremanagement.switch: r8e1` `n2: hardwaremanagement.switchport: 2` - diff --git a/confluent_client/doc/man/nodepower.ronn b/confluent_client/doc/man/nodepower.ronn new file mode 100644 index 00000000..8249c95b --- /dev/null +++ b/confluent_client/doc/man/nodepower.ronn @@ -0,0 +1,43 @@ +nodepower(8) -- Check or change power state of confluent nodes +========================================================= + +## SYNOPSIS + +`nodepower ` +`nodepower [on|off|boot|shutdown|reset|status]` + +## DESCRIPTION + +**nodepower** with only a noderange will retrieve current power state of nodes +through confluent. When given an additional argument, it will request a change +to the power state of the nodes. The following arguments are recognized: + +* `on`: Turn on the specified noderange. Nothing will happen to nodes of +the noderange that are already on. +* `off`: Immediately turn off the specified noderange, without waiting for OS +to shutdown. Nothing will happen to nodes of the noderange that are already on. +* `boot`: Immediately boot a system. This will power on nodes of the noderange +that are off, and reset nodes of the noderange that are on. The previous state +will be reflected in the output. +* `shutdown`: Request the OS gracefully shut down. Nothing will happen for +nodes that are off, and nodes will not shutdown if the OS fails to gracefully +respond. +* `reset`: Request immediate reset of nodes of the noderange. Nodes that are +off will not react to this request. +* `status`: Behave identically to having no argument passed at all. + +## EXAMPLES +* Get power state of nodes n1 through n4: + `# nodepower n1-n4` + `n1: on` + `n2: on` + `n3: on` + `n4: off` + + +* Forcing a reboot of nodes n1-n4: + `# nodepower n1-n4 boot` + `n3: on->reset` + `n1: on->reset` + `n2: on->reset` + `n4: off->on` diff --git a/confluent_client/doc/man/noderun.ronn b/confluent_client/doc/man/noderun.ronn new file mode 100644 index 00000000..626bb685 --- /dev/null +++ b/confluent_client/doc/man/noderun.ronn @@ -0,0 +1,53 @@ +noderun(8) -- Run arbitrary commands per node in a noderange +============================================================= + +## SYNOPSIS + +`noderun ` + +## DESCRIPTION + +`noderun` will take a given command and execute it in parallel once per node +in the specified noderange. Attribute expressions as documented in +nodeattribexpressions(5) are expanded prior to execution of the command. For +noderun, the commands are locally executed. To execute commands on the nodes +themselves, see nodeshell(8). + +## EXAMPLES + +* Run ping against nodes n1 through n4: + `# noderun n1-n4 ping -c 1 {nodename}` + `n3: PING n3 (172.30.2.3) 56(84) bytes of data.` + `n3: 64 bytes from n3 (172.30.2.3): icmp_seq=1 ttl=64 time=0.387 ms` + `n3: ` + `n3: --- n3 ping statistics ---` + `n3: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n3: rtt min/avg/max/mdev = 0.387/0.387/0.387/0.000 ms` + `n4: PING n4 (172.30.2.4) 56(84) bytes of data.` + `n4: 64 bytes from n4 (172.30.2.4): icmp_seq=1 ttl=64 time=0.325 ms` + `n4: ` + `n4: --- n4 ping statistics ---` + `n4: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n4: rtt min/avg/max/mdev = 0.325/0.325/0.325/0.000 ms` + `n2: PING n2 (172.30.2.2) 56(84) bytes of data.` + `n2: From odin (172.30.0.6) icmp_seq=1 Destination Host Unreachable` + `n2: ` + `n2: --- n2 ping statistics ---` + `n2: 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 3000ms` + `n2: ` + `n1: PING n1 (172.30.2.1) 56(84) bytes of data.` + `n1: ` + `n1: --- n1 ping statistics ---` + `n1: 1 packets transmitted, 0 received, 100% packet loss, time 10000ms` + `n1: ` + +* Run an ipmitool raw command against the management controllers of n1 through n4: + `# noderun n1-n4 ipmitool -I lanplus -U USERID -E -H {hardwaremanagement.manager} raw 0 1` + `n3: 01 10 00` + `n1: 01 10 00` + `n4: 01 10 00` + `n2: 01 10 00` + +## SEE ALSO + +nodeshell(8) diff --git a/confluent_client/doc/man/nodesensors.ronn b/confluent_client/doc/man/nodesensors.ronn index 1f5a8467..44deb352 100644 --- a/confluent_client/doc/man/nodesensors.ronn +++ b/confluent_client/doc/man/nodesensors.ronn @@ -1,9 +1,9 @@ -nodesensors(1) --- Retrieve telemetry for sensors of confluent nodes +nodesensors(8) --- Retrieve telemetry for sensors of confluent nodes ==================================================================== ## SYNOPSIS -`nodesensors` `noderange` [-c] [-i ] [-n ] [...] +`nodesensors [-c] [-i ] [-n ] [...]` ## DESCRIPTION diff --git a/confluent_client/doc/man/nodesetboot.ronn b/confluent_client/doc/man/nodesetboot.ronn new file mode 100644 index 00000000..349a9a47 --- /dev/null +++ b/confluent_client/doc/man/nodesetboot.ronn @@ -0,0 +1,69 @@ +nodesetboot(8) -- Check or set next boot device for noderange +==================================================== + +## SYNOPSIS + +`nodesetboot ` +`nodesetboot [options] [default|cd|network|setup|hd]` + +## DESCRIPTION + +Requests that the next boot occur from the specified device. Unless otherwise +specified, this is a one time boot option, and does not change the normal boot +behavior of the system. This is useful for taking a system that normally boots +to the hard drive and startking a network install, or to go into the firmware +setup menu without having to hit a keystroke at the correct time on the console. + +Generally, it's a bit more convenient and direct to use the nodeboot(8) command, +which will follow up the boot device with an immediate power directive to take +effect. The `nodesetboot` command is still useful, particularly if you want +to use `nodesetboot setup` and then initiate a reboot from within +the operating system with ssh or similar rather than using the remote hardware +control. + +## OPTIONS + +* `-b`, `--bios`: + For a system that supports both BIOS and UEFI style boot, request BIOS style + boot if supported (some platforms will UEFI boot with this flag anyway). + +* `-p`, `--persist`: + For a system that supports it, mark the boot override to persist rather than + be a one time change. Many systems do not support this functionality. + +* `default`: + Request a normal default boot with no particular device override + +* `cd`: + Request boot from media. Note that this can include physical CD, + remote media mounted as CD/DVD, and detachable hard disks drives such as usb + key devices. + +* `network`: + Request boot to network + +* `setup`: + Request to enter the firmware configuration menu (e.g. F1 setup) on next boot. + +* `hd`: + Boot straight to hard disk drive + +## EXAMPLES + +* Set next boot to setup for four nodes: + `# nodesetboot n1-n4 setup` + `n1: setup` + `n3: setup` + `n2: setup` + `n4: setup` + +* Check boot override settings on four nodes: + `# nodesetboot n1-n4` + `n1: setup` + `n2: setup` + `n3: setup` + `n4: setup` + +## SEE ALSO + +nodeboot(8) diff --git a/confluent_client/doc/man/nodeshell.ronn b/confluent_client/doc/man/nodeshell.ronn new file mode 100644 index 00000000..888e8f51 --- /dev/null +++ b/confluent_client/doc/man/nodeshell.ronn @@ -0,0 +1,29 @@ +nodeshell(8) -- Execute command on many nodes in a noderange through ssh +========================================================================= + +## SYNOPSIS + +`nodeshell ` + +## DESCRIPTION + +Allows execution of a command on many nodes in parallel. Like noderun(8), it +accepts and interpolates confluent attribute expressions as documented in +nodeattribexpressions(5). `nodeshell` provides stdout as stdout and stderr +as stderr, unlike psh which combines all stdout and stderr into stdout. + +## EXAMPLES + +* Running `echo hi` on for nodes: + `# nodeshell n1-n4 echo hi` + `n1: hi` + `n2: hi` + `n3: hi` + `n4: hi` + +* Setting a new static ip address temporarily on secondary interface of four nodes: + `# nodeshell n1-n4 ifconfig eth1 172.30.93.{n1}` + +## SEE ALSO + +noderun(8) diff --git a/confluent_client/makeman b/confluent_client/makeman new file mode 100755 index 00000000..feb804a0 --- /dev/null +++ b/confluent_client/makeman @@ -0,0 +1,8 @@ +#!/bin/sh +cd `dirname $0`/doc/man +mkdir -p ../../man/man5 +mkdir -p ../../man/man8 +ronn -r *.ronn +mv *.5 ../../man/man5/ +mv *.8 ../../man/man8/ + diff --git a/confluent_client/setup.py.tmpl b/confluent_client/setup.py.tmpl index f8768bd5..59db1b33 100644 --- a/confluent_client/setup.py.tmpl +++ b/confluent_client/setup.py.tmpl @@ -1,7 +1,13 @@ from setuptools import setup import os -scriptlist = ['bin/{0}'.format(d) for d in os.listdir('bin/')] +data_files = [('/etc/profile.d', ['confluent_env.sh', 'confluent_env.csh'])] +try: + scriptlist = ['bin/{0}'.format(d) for d in os.listdir('bin/')] + data_files.append(('/opt/confluent/share/man/man5', ['man/man5/' + x for x in os.listdir('man/man5')])) + data_files.append(('/opt/confluent/share/man/man8', ['man/man8/' + x for x in os.listdir('man/man8')])) +except OSError: + pass setup( name='confluent_client', @@ -11,5 +17,5 @@ setup( url='http://xcat.sf.net/', packages=['confluent'], scripts=scriptlist, - data_files=[('/etc/profile.d', ['confluent_env.sh'])], + data_files=data_files, ) diff --git a/confluent_server/buildrpm b/confluent_server/buildrpm index 627f7dc7..14071382 100755 --- a/confluent_server/buildrpm +++ b/confluent_server/buildrpm @@ -1,5 +1,8 @@ #!/bin/sh cd `dirname $0` +if [ -x ./makeman ]; then + ./makeman +fi ./makesetup VERSION=`cat VERSION` PKGNAME=$(basename $(pwd)) diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 468358b1..27a70c06 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -16,50 +16,69 @@ # limitations under the License. -#This defines the attributes of variou classes of things +#This defines the attributes of various classes of things # 'nic', meant to be a nested structure under node -nic = { - 'name': { - 'description': 'Name in ip/ifconfig as desired by administrator', - }, - 'port': { - 'description': 'Port that this nic connects to', - }, - 'switch': { - 'description': 'Switch that this nic connects to', - }, - 'customhardwareaddress': { - 'description': 'Mac address to push to nic', - }, - 'dnssuffix': { - 'description': ('String to place after nodename, but before' - 'Network.Domain to derive FQDN for this NIC'), - }, - 'hardwareaddress': { - 'description': 'Active mac address on this nic (factory or custom)' - }, - 'ipaddresses': { - 'description': 'Set of IPv4 and IPv6 addresses in CIDR format' - }, - 'pvid': { - 'description': 'PVID of port on switch this nic connects to', - }, - 'mtu': { - 'description': 'Requested MTU to configure on this interface', - }, - 'vlans': { - 'description': 'Tagged VLANs to apply to nic/switch', - }, - 'dhcpv4enabled': { - 'description': ('Whether DHCP should be attempted to acquire IPv4' - 'address on this interface'), - }, - 'dhcpv6enabled': { - 'description': ('Whether DHCP should be attempted to acquire IPv6' - 'address on this interface'), - }, -} +# changing mind on design, flattening to a single attribute, a *touch* less +# flexible at the top end, but much easier on the low end +# now net..attribute scheme +# similarly, leaning toward comma delimited ip addresses, since 99.99% of the +# time each nic will have one ip address +# vlan specification will need to be thought about a tad, each ip could be on +# a distinct vlan, but could have a vlan without an ip for sake of putting +# to a bridge. Current thought is +# vlans attribute would be comma delimited referring to the same index +# as addresses, with either 'native' or a number for vlan id +# the 'joinbridge' attribute would have some syntax like @ to indicate +# joining only a vlan of the nic to the bridge +# 'joinbond' attribute would not support vlans. + +#nic = { +# 'name': { +# 'description': 'Name in ip/ifconfig as desired by administrator', +# }, +# 'biosdevname': { +# 'description': '"biosdevname" scheme to identify the adapter. If not' +# 'mac address match is preferred, then biosdevname, then' +# 'name.', +# }, +# 'port': { +# 'description': 'Port that this nic connects to', +# }, +# 'switch': { +# 'description': 'Switch that this nic connects to', +# }, +# 'customhardwareaddress': { +# 'description': 'Mac address to push to nic', +# }, +# 'dnssuffix': { +# 'description': ('String to place after nodename, but before' +# 'Network.Domain to derive FQDN for this NIC'), +# }, +# 'hardwareaddress': { +# 'description': 'Active mac address on this nic (factory or custom)' +# }, +# 'ipaddresses': { +# 'description': 'Set of IPv4 and IPv6 addresses in CIDR format' +# }, +# 'pvid': { +# 'description': 'PVID of port on switch this nic connects to', +# }, +# 'mtu': { +# 'description': 'Requested MTU to configure on this interface', +# }, +# 'vlans': { +# 'description': 'Tagged VLANs to apply to nic/switch', +# }, +# 'dhcpv4enabled': { +# 'description': ('Whether DHCP should be attempted to acquire IPv4' +# 'address on this interface'), +# }, +# 'dhcpv6enabled': { +# 'description': ('Whether DHCP should be attempted to acquire IPv6' +# 'address on this interface'), +# }, +#} user = { 'password': { @@ -71,7 +90,6 @@ user = { node = { 'groups': { 'type': list, - 'default': 'all', 'description': ('List of static groups for which this node is ' 'considered a member'), }, @@ -81,6 +99,72 @@ node = { #'id': { # 'description': ('Numeric identifier for node') #}, + # autonode is the feature of generating nodes based on connectivity to + # current node. In recursive autonode, for now we just allow endpoint to + # either be a server directly *or* a server enclosure. This precludes + # for the moment a concept of nested arbitrarily deep, but for now do this. + # hypothetically, one could imagine supporting an array and 'popping' + # names until reaching end. Not worth implementing at this point. If + # a traditional switch is added, it needs some care and feeding anyway. + # If a more exciting scheme presents itself, well we won't have to +# # own discovering switches anyway. +# 'autonode.servername': { +# 'description': ('Template for creating nodenames for automatic ' +# 'creation of nodes detected as children of ' +# 'this node. For example, a node in a server ' +# 'enclosure bay or a server connected to a switch or ' +# 'an enclosure manager connected to a switch. Certain ' +# 'special template parameters are available and can ' +# 'be used alongside usual config template directives. ' +# '"discovered.nodenumber" will be replaced with the ' +# 'bay or port number where the child node is connected.' +# ), +# }, +# 'autonode.servergroups': { +# 'type': list, +# 'description': ('A list of groups to which discovered nodes will ' +# 'belong to. As in autonode.servername, "discovered." ' +# 'variable names will be substituted in special context') +# }, +# 'autonode.enclosurename': { +# 'description': ('Template for creating nodenames when the discovered ' +# 'node is an enclosure that will in turn generate nodes.' +# ) +# }, +# 'autonode.enclosuregroups': { +# 'type': list, +# 'description': ('A list of groups to which a discovered node will be' +# 'placed, presuming that node is an enclosure.') +# }, +#For now, we consider this eventuality if needed. For now emphasize paradigm +# of group membership and see how far that goes. +# 'autonode.copyattribs': { +# 'type': list, +# 'description': ('A list of attributes to copy from the node generator ' +# 'to the generated node. Expressions will be copied ' +# 'over without evaluation, so will be evaluated ' +# 'in the context of the generated node, rather than the' +# 'parent node. By default, an enclosure will copy over' +# 'autonode.servername, so that would not need to be ' +# 'copied ') +# }, + 'discovery.policy': { + 'description': 'Policy to use for auto-configuration of discovered ' + 'and identified nodes. Valid values are "manual", ' + '"permissive", or "open". "manual" means nodes are ' + 'detected, but not autoconfigured until a user ' + 'approves. "permissive" indicates to allow discovery, ' + 'so long as the node has no existing public key. ' + '"open" allows discovery even if a known public key ' + 'is already stored', + }, + 'info.note': { + 'description': 'A field used for administrators to make arbitrary ' + 'notations about nodes. This is meant entirely for ' + 'human use and not programmatic use, so it can be ' + 'freeform text data without concern for issues in how ' + 'the server will process it.', + }, 'location.room': { 'description': 'Room description for the node', }, @@ -195,17 +279,6 @@ node = { 'description': 'The method used to perform operations such as power ' 'control, get sensor data, get inventory, and so on. ' }, - 'hardwaremanagement.switch': { - 'description': 'The switch to which the hardware manager is connected.' - ' Only relevant if using switch based discovery via the' - ' hardware manager (Lenovo IMMs and CMMs). Not ' - 'applicable to Lenovo Flex nodes.' - }, - 'hardwaremanagement.switchport': { - 'description': 'The port of the switch that the hardware manager is ' - 'connected. See documentation of ' - 'hardwaremanagement.switch for more detail.' - }, 'enclosure.manager': { 'description': "The management device for this node's chassis", # 'appliesto': ['system'], @@ -223,9 +296,32 @@ node = { # 'id.serial': { # 'description': 'The manufacturer serial number of node', # }, -# 'id.uuid': { -# 'description': 'The UUID of the node as presented in DMI', -# }, + 'id.uuid': { + 'description': 'The UUID of the node as presented in DMI.', + }, + 'net.ipv4_gateway': { + 'description': 'The IPv4 gateway to use if applicable. As is the ' + 'case for other net attributes, net.eth0.ipv4_gateway ' + 'and similar is accepted.' + }, + # 'net.pxe': { 'description': 'Whether pxe will be used on this interface' + # TODO(jjohnson2): Above being 'true' will control whether mac addresses + # are stored in this nics attribute on pxe-client discovery, since + # pxe discovery is ambiguous for BMC and system on same subnet, + # or even both on the same port and same subnet + 'net.switch': { + 'description': 'An ethernet switch the node is connected to. Note ' + 'that net.* attributes may be indexed by interface. ' + 'For example instead of using net.switch, it is ' + 'possible to use net.eth0.switch and net.eth1.switch ' + 'or net.0.switch and net.1.switch to define multiple ' + 'sets of net connectivity associated with each other.' + }, + 'net.switchport': { + 'description': 'The port on the switch that corresponds to this node. ' + 'See information on net.switch for more on the ' + 'flexibility of net.* attributes.' + }, # 'id.modelnumber': { # 'description': 'The manufacturer dictated model number for the node', # }, diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index d9600a3a..4ce49aba 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -72,6 +72,8 @@ import confluent.exceptions as exc import copy import cPickle import errno +import eventlet +import fnmatch import json import operator import os @@ -88,6 +90,8 @@ _masterintegritykey = None _dirtylock = threading.RLock() _config_areas = ('nodegroups', 'nodes', 'usergroups', 'users') tracelog = None +statelessmode = False +_cfgstore = None def _mkpath(pathname): try: @@ -151,6 +155,29 @@ def _format_key(key, password=None): return {"unencryptedvalue": key} +def _do_notifier(cfg, watcher, callback): + try: + callback(nodeattribs=watcher['nodeattrs'], configmanager=cfg) + except Exception: + logException() + + +def logException(): + global tracelog + if tracelog is None: + tracelog = confluent.log.Logger('trace') + tracelog.log(traceback.format_exc(), + ltype=confluent.log.DataTypes.event, + event=confluent.log.Events.stacktrace) + + +def _do_add_watcher(watcher, added, configmanager): + try: + watcher(added=added, deleting=[], configmanager=configmanager) + except Exception: + logException() + + def init_masterkey(password=None): global _masterkey global _masterintegritykey @@ -198,6 +225,26 @@ def decrypt_value(cryptvalue, return value[0:-padsize] +def attribute_is_invalid(attrname, attrval): + if attrname.startswith('custom.'): + # No type checking or name checking is provided for custom, + # it's not possible + return False + if attrname.startswith('net.'): + # For net.* attribtues, split on the dots and put back together + # longer term we might want a generic approach, but + # right now it's just net. attributes + netattrparts = attrname.split('.') + attrname = netattrparts[0] + '.' + netattrparts[-1] + if attrname not in allattributes.node: + # Otherwise, it must be in the allattributes key list + return True + if 'type' in allattributes.node[attrname]: + if not isinstance(attrval, allattributes.node[attrname]['type']): + # provide type checking for attributes with a specific type + return True + return False + def crypt_value(value, key=None, integritykey=None): @@ -214,7 +261,10 @@ def crypt_value(value, neededpad = 16 - (len(value) % 16) pad = chr(neededpad) * neededpad value += pad - cryptval = crypter.encrypt(value) + try: + cryptval = crypter.encrypt(value) + except TypeError: + cryptval = crypter.encrypt(value.encode('utf-8')) hmac = HMAC.new(integritykey, cryptval, SHA256).digest() return iv, cryptval, hmac @@ -252,6 +302,8 @@ def get_global(globalname): :param globalname: The global parameter name to read """ + if _cfgstore is None: + init() try: return _cfgstore['globals'][globalname] except KeyError: @@ -268,6 +320,8 @@ def set_global(globalname, value): :param globalname: The global parameter name to store :param value: The value to set the global parameter to. """ + if _cfgstore is None: + init() with _dirtylock: if 'dirtyglobals' not in _cfgstore: _cfgstore['dirtyglobals'] = set() @@ -372,8 +426,8 @@ class _ExpressionFormat(string.Formatter): if optype not in self._supported_ops: raise Exception("Unsupported operation") op = self._supported_ops[optype] - return op(self._handle_ast_node(node.left), - self._handle_ast_node(node.right)) + return op(int(self._handle_ast_node(node.left)), + int(self._handle_ast_node(node.right))) def _decode_attribute(attribute, nodeobj, formatter=None, decrypt=False): @@ -450,6 +504,8 @@ class ConfigManager(object): def __init__(self, tenant, decrypt=False, username=None): global _cfgstore + if _cfgstore is None: + init() self.decrypt = decrypt self.current_user = username if tenant is None: @@ -551,7 +607,9 @@ class ConfigManager(object): def watch_attributes(self, nodes, attributes, callback): """ - Watch a list of attributes for changes on a list of nodes + Watch a list of attributes for changes on a list of nodes. The + attributes may be literal, or a filename style wildcard like + 'net*.switch' :param nodes: An iterable of node names to be watching :param attributes: An iterable of attribute names to be notified about @@ -579,6 +637,10 @@ class ConfigManager(object): } else: attribwatchers[node][attribute][notifierid] = callback + if '*' in attribute: + currglobs = attribwatchers[node].get('_attrglobs', set([])) + currglobs.add(attribute) + attribwatchers[node]['_attrglobs'] = currglobs return notifierid def watch_nodecollection(self, callback): @@ -786,9 +848,11 @@ class ConfigManager(object): if decrypt is None: decrypt = self.decrypt retdict = {} - relattribs = attributes if isinstance(nodelist, str) or isinstance(nodelist, unicode): nodelist = [nodelist] + if isinstance(attributes, str) or isinstance(attributes, unicode): + attributes = [attributes] + relattribs = attributes for node in nodelist: if node not in self._cfgstore['nodes']: continue @@ -800,6 +864,10 @@ class ConfigManager(object): if attribute.startswith('_'): # skip private things continue + if '*' in attribute: + for attr in fnmatch.filter(list(cfgnodeobj), attribute): + nodeobj[attr] = _decode_attribute(attr, cfgnodeobj, + decrypt=decrypt) if attribute not in cfgnodeobj: continue # since the formatter is not passed in, the calculator is @@ -916,11 +984,8 @@ class ConfigManager(object): raise ValueError("{0} group does not exist".format(group)) for attr in attribmap[group].iterkeys(): if (attr not in ('nodes', 'noderange') and - (attr not in allattributes.node or - ('type' in allattributes.node[attr] and - not isinstance(attribmap[group][attr], - allattributes.node[attr]['type'])))): - raise ValueError("nodes attribute is invalid") + attribute_is_invalid(attr, attribmap[group][attr])): + raise ValueError("{0} attribute is invalid".format(attr)) if attr == 'nodes': if not isinstance(attribmap[group][attr], list): if type(attribmap[group][attr]) is unicode or type(attribmap[group][attr]) is str: @@ -1019,7 +1084,7 @@ class ConfigManager(object): return notifdata = {} attribwatchers = self._attribwatchers[self.tenant] - for node in nodeattrs.iterkeys(): + for node in nodeattrs: if node not in attribwatchers: continue attribwatcher = attribwatchers[node] @@ -1032,10 +1097,21 @@ class ConfigManager(object): # to deletion, to make all watchers aware of the removed # node and take appropriate action checkattrs = attribwatcher + globattrs = {} + for attrglob in attribwatcher.get('_attrglobs', []): + for matched in fnmatch.filter(list(checkattrs), attrglob): + globattrs[matched] = attrglob for attrname in checkattrs: - if attrname not in attribwatcher: + if attrname == '_attrglobs': continue - for notifierid in attribwatcher[attrname].iterkeys(): + watchkey = attrname + # the attrib watcher could still have a glob + if attrname not in attribwatcher: + if attrname in globattrs: + watchkey = globattrs[attrname] + else: + continue + for notifierid in attribwatcher[watchkey]: if notifierid in notifdata: if node in notifdata[notifierid]['nodeattrs']: notifdata[notifierid]['nodeattrs'][node].append( @@ -1046,18 +1122,12 @@ class ConfigManager(object): else: notifdata[notifierid] = { 'nodeattrs': {node: [attrname]}, - 'callback': attribwatcher[attrname][notifierid] + 'callback': attribwatcher[watchkey][notifierid] } for watcher in notifdata.itervalues(): callback = watcher['callback'] - try: - callback(nodeattribs=watcher['nodeattrs'], configmanager=self) - except Exception: - global tracelog - if tracelog is None: - tracelog = confluent.log.Logger('trace') - tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, - event=log.Events.stacktrace) + eventlet.spawn_n(_do_notifier, self, watcher, callback) + def del_nodes(self, nodes): if self.tenant in self._nodecollwatchers: @@ -1154,11 +1224,7 @@ class ConfigManager(object): if ('everything' in self._cfgstore['nodegroups'] and 'everything' not in attribmap[node]['groups']): attribmap[node]['groups'].append('everything') - elif (attrname not in allattributes.node or - ('type' in allattributes.node[attrname] and - not isinstance( - attrval, - allattributes.node[attrname]['type']))): + elif attribute_is_invalid(attrname, attrval): errstr = "{0} attribute on node {1} is invalid".format( attrname, node) raise ValueError(errstr) @@ -1206,7 +1272,7 @@ class ConfigManager(object): if self.tenant in self._nodecollwatchers: nodecollwatchers = self._nodecollwatchers[self.tenant] for watcher in nodecollwatchers.itervalues(): - watcher(added=newnodes, deleting=[], configmanager=self) + eventlet.spawn_n(_do_add_watcher, watcher, newnodes, self) self._bg_sync_to_file() #TODO: wait for synchronization to suceed/fail??) @@ -1353,6 +1419,8 @@ class ConfigManager(object): @classmethod def _bg_sync_to_file(cls): + if statelessmode: + return with cls._syncstate: if cls._syncrunning: cls._writepending = True @@ -1366,6 +1434,8 @@ class ConfigManager(object): @classmethod def _sync_to_file(cls): + if statelessmode: + return if 'dirtyglobals' in _cfgstore: with _dirtylock: dirtyglobals = copy.deepcopy(_cfgstore['dirtyglobals']) @@ -1511,11 +1581,15 @@ def dump_db_to_directory(location, password, redact=None): except OSError: pass - -try: - ConfigManager._read_from_path() -except IOError: - _cfgstore = {} +def init(stateless=False): + global _cfgstore + if stateless: + _cfgstore = {} + return + try: + ConfigManager._read_from_path() + except IOError: + _cfgstore = {} # some unit tests worth implementing: diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index d265837f..bcb39dd7 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -1,6 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation +# Copyright 2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,6 +21,7 @@ # we track nodes that are actively being logged, watched, or have attached # there should be no more than one handler per node +import codecs import collections import confluent.config.configmanager as configmodule import confluent.exceptions as exc @@ -29,6 +31,7 @@ import confluent.core as plugin import confluent.util as util import eventlet import eventlet.event +import pyte import random import time import traceback @@ -37,6 +40,100 @@ _handled_consoles = {} _tracelog = None +try: + range = xrange +except NameError: + pass + +pytecolors2ansi = { + 'black': 0, + 'red': 1, + 'green': 2, + 'brown': 3, + 'blue': 4, + 'magenta': 5, + 'cyan': 6, + 'white': 7, + 'default': 9, +} +# might be able to use IBMPC map from pyte charsets, +# in that case, would have to mask out certain things (like ESC) +# in the same way that Screen's draw method would do +# for now at least get some of the arrows in there (note ESC is one +# of those arrows... so skip it... +ansichars = dict(zip((0x18, 0x19), u'\u2191\u2193')) + + +def _utf8_normalize(data, shiftin, decoder): + # first we give the stateful decoder a crack at the byte stream, + # we may come up empty in the event of a partial multibyte + try: + data = decoder.decode(data) + except UnicodeDecodeError: + # first order of business is to reset the state of + # the decoder to a clean one, so we can switch back to utf-8 + # when things change, for example going from an F1 setup menu stuck + # in the old days to a modern platform using utf-8 + decoder.setstate(codecs.getincrementaldecoder('utf-8')().getstate()) + # Ok, so we have something that is not valid UTF-8, + # our next stop is to try CP437. We don't try incremental + # decode, since cp437 is single byte + # replace is silly here, since there does not exist invalid c437, + # but just in case + data = data.decode('cp437', 'replace') + # Finally, the low part of ascii is valid utf-8, but we are going to be + # more interested in the cp437 versions (since this is console *output* + # not input + if shiftin is None: + data = data.translate(ansichars) + return data.encode('utf-8') + + +def pytechars2line(chars, maxlen=None): + line = '\x1b[m' # start at default params + lb = False # last bold + li = False # last italic + lu = False # last underline + ls = False # last strikethrough + lr = False # last reverse + lfg = 'default' # last fg color + lbg = 'default' # last bg color + hasdata = False + len = 1 + for charidx in range(maxlen): + char = chars[charidx] + csi = [] + if char.fg != lfg: + csi.append(30 + pytecolors2ansi[char.fg]) + lfg = char.fg + if char.bg != lbg: + csi.append(40 + pytecolors2ansi[char.bg]) + lbg = char.bg + if char.bold != lb: + lb = char.bold + csi.append(1 if lb else 22) + if char.italics != li: + li = char.italics + csi.append(3 if li else 23) + if char.underscore != lu: + lu = char.underscore + csi.append(4 if lu else 24) + if char.strikethrough != ls: + ls = char.strikethrough + csi.append(9 if ls else 29) + if char.reverse != lr: + lr = char.reverse + csi.append(7 if lr else 27) + if csi: + line += b'\x1b[' + b';'.join(['{0}'.format(x) for x in csi]) + b'm' + if not hasdata and char.data.encode('utf-8').rstrip(): + hasdata = True + line += char.data.encode('utf-8') + if maxlen and len >= maxlen: + break + len += 1 + return line, hasdata + class ConsoleHandler(object): _plugin_path = '/nodes/{0}/_console/session' @@ -44,6 +141,7 @@ class ConsoleHandler(object): _genwatchattribs = frozenset(('console.method', 'console.logging')) def __init__(self, node, configmanager): + self.clearpending = False self._dologging = True self._isondemand = False self.error = None @@ -51,14 +149,15 @@ class ConsoleHandler(object): self.node = node self.connectstate = 'unconnected' self._isalive = True - self.buffer = bytearray() + self.buffer = pyte.Screen(100, 31) + self.termstream = pyte.ByteStream() + self.termstream.attach(self.buffer) self.livesessions = set([]) + self.utf8decoder = codecs.getincrementaldecoder('utf-8')() if self._logtobuffer: self.logger = log.Logger(node, console=True, tenant=configmanager.tenant) - (text, termstate, timestamp) = self.logger.read_recent_text(8192) - else: - (text, termstate, timestamp) = ('', 0, False) + (text, termstate, timestamp) = (b'', 0, False) # when reading from log file, we will use wall clock # it should usually match walltime. self.lasttime = 0 @@ -70,7 +169,7 @@ class ConsoleHandler(object): # wall clock has gone backwards, use current time as best # guess self.lasttime = util.monotonic_time() - self.buffer += text + self.clearbuffer() self.appmodedetected = False self.shiftin = None self.reconnect = None @@ -91,6 +190,16 @@ class ConsoleHandler(object): self.connectstate = 'connecting' eventlet.spawn(self._connect) + def feedbuffer(self, data): + try: + self.termstream.feed(data) + except StopIteration: # corrupt parser state, start over + self.termstream = pyte.ByteStream() + self.termstream.attach(self.buffer) + except Exception: + _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, + event=log.Events.stacktrace) + def check_isondemand(self): self._dologging = True attrvalue = self.cfgmgr.get_node_attributes( @@ -99,11 +208,12 @@ class ConsoleHandler(object): self._isondemand = False elif 'console.logging' not in attrvalue[self.node]: self._isondemand = False - elif (attrvalue[self.node]['console.logging']['value'] not in ( - 'full', '')): - self._isondemand = True - elif (attrvalue[self.node]['console.logging']['value']) == 'none': - self._dologging = False + else: + if (attrvalue[self.node]['console.logging']['value'] not in ( + 'full', '', 'buffer')): + self._isondemand = True + if (attrvalue[self.node]['console.logging']['value']) in ('none', 'memory'): + self._dologging = False def get_buffer_age(self): """Return age of buffered data @@ -138,7 +248,7 @@ class ConsoleHandler(object): return else: self._ondemand() - if logvalue == 'none': + if logvalue in ('none', 'memory'): self._dologging = False if not self._isondemand or self.livesessions: eventlet.spawn(self._connect) @@ -157,10 +267,18 @@ class ConsoleHandler(object): else: self._console.ping() + def clearbuffer(self): + self.feedbuffer( + '\x1bc[no replay buffer due to console.logging attribute set to ' + 'none or interactive,\r\nconnection loss, or service restart]') + self.clearpending = True + def _disconnect(self): if self.connectionthread: self.connectionthread.kill() self.connectionthread = None + # clear the terminal buffer when disconnected + self.clearbuffer() if self._console: self.log( logdata='console disconnected', ltype=log.DataTypes.event, @@ -200,6 +318,7 @@ class ConsoleHandler(object): _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) if not isinstance(self._console, conapi.Console): + self.clearbuffer() self.connectstate = 'unconnected' self.error = 'misconfigured' self._send_rcpts({'connectstate': self.connectstate, @@ -219,6 +338,7 @@ class ConsoleHandler(object): try: self._console.connect(self.get_console_output) except exc.TargetEndpointBadCredentials: + self.clearbuffer() self.error = 'badcredentials' self.connectstate = 'unconnected' self._send_rcpts({'connectstate': self.connectstate, @@ -228,6 +348,7 @@ class ConsoleHandler(object): self.reconnect = eventlet.spawn_after(retrytime, self._connect) return except exc.TargetEndpointUnreachable: + self.clearbuffer() self.error = 'unreachable' self.connectstate = 'unconnected' self._send_rcpts({'connectstate': self.connectstate, @@ -237,6 +358,7 @@ class ConsoleHandler(object): self.reconnect = eventlet.spawn_after(retrytime, self._connect) return except Exception: + self.clearbuffer() _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) self.error = 'unknown' @@ -257,6 +379,7 @@ class ConsoleHandler(object): self._send_rcpts({'connectstate': self.connectstate}) def _got_disconnected(self): + self.clearbuffer() if self.connectstate != 'unconnected': self.connectstate = 'unconnected' self.log( @@ -278,12 +401,6 @@ class ConsoleHandler(object): self.connectionthread.kill() self.connectionthread = None - def flushbuffer(self): - # Logging is handled in a different stream - # this buffer is now just for having screen redraw on - # connect - self.buffer = bytearray(self.buffer[-8192:]) - def get_console_output(self, data): # Spawn as a greenthread, return control as soon as possible # to the console object @@ -354,19 +471,18 @@ class ConsoleHandler(object): eventdata |= 2 self.log(data, eventdata=eventdata) self.lasttime = util.monotonic_time() - if isinstance(data, bytearray) or isinstance(data, bytes): - self.buffer += data - else: - self.buffer += data.encode('utf-8') + self.feedbuffer(data) # TODO: analyze buffer for registered events, examples: # panics # certificate signing request - if len(self.buffer) > 16384: - self.flushbuffer() - self._send_rcpts(data) + if self.clearpending: + self.clearpending = False + self.feedbuffer(b'\x1bc') + self._send_rcpts(b'\x1bc') + self._send_rcpts(_utf8_normalize(data, self.shiftin, self.utf8decoder)) def _send_rcpts(self, data): - for rcpt in self.livesessions: + for rcpt in list(self.livesessions): try: rcpt.data_handler(data) except: # No matter the reason, advance to next recipient @@ -385,7 +501,26 @@ class ConsoleHandler(object): 'connectstate': self.connectstate, 'clientcount': len(self.livesessions), } - retdata = '' + retdata = b'\x1b[H\x1b[J' # clear screen + pendingbl = b'' # pending blank lines + maxlen = 0 + for line in self.buffer.display: + line = line.rstrip() + if len(line) > maxlen: + maxlen = len(line) + for line in range(self.buffer.lines): + nline, notblank = pytechars2line(self.buffer.buffer[line], maxlen) + if notblank: + if pendingbl: + retdata += pendingbl + pendingbl = b'' + retdata += nline + '\r\n' + else: + pendingbl += nline + '\r\n' + if len(retdata) > 6: + retdata = retdata[:-2] # remove the last \r\n + retdata += b'\x1b[{0};{1}H'.format(self.buffer.cursor.y + 1, + self.buffer.cursor.x + 1) if self.shiftin is not None: # detected that terminal requested a # shiftin character set, relay that to the terminal that cannected retdata += '\x1b)' + self.shiftin @@ -393,27 +528,16 @@ class ConsoleHandler(object): retdata += '\x1b[?1h' else: retdata += '\x1b[?1l' - # an alternative would be to emulate a VT100 to know what the - # whole screen would look like - # this is one scheme to clear screen, move cursor then clear - bufidx = self.buffer.rfind('\x1b[H\x1b[J') - if bufidx >= 0: - return retdata + str(self.buffer[bufidx:]), connstate - # another scheme is the 2J scheme - bufidx = self.buffer.rfind('\x1b[2J') - if bufidx >= 0: - # there was some sort of clear screen event - # somewhere in the buffer, replay from that point - # in hopes that it reproduces the screen - return retdata + str(self.buffer[bufidx:]), connstate - else: - # we have no indication of last erase, play back last kibibyte - # to give some sense of context anyway - return retdata + str(self.buffer[-1024:]), connstate + return retdata, connstate def write(self, data): if self.connectstate == 'connected': - self._console.write(data) + try: + self._console.write(data) + except Exception: + _tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, + event=log.Events.stacktrace) + self._got_disconnected() def disconnect_node(node, configmanager): diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 608e8206..e5003ef9 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,11 +33,14 @@ # functions. Console is special and just get's passed through # see API.txt +import confluent import confluent.alerts as alerts import confluent.config.attributes as attrscheme +import confluent.discovery.core as disco import confluent.interface.console as console import confluent.exceptions as exc import confluent.messages as msg +import confluent.networking.macmap as macmap import confluent.noderange as noderange try: import confluent.shellmodule as shellmodule @@ -100,7 +103,8 @@ def load_plugins(): sys.path.pop(1) -rootcollections = ['noderange/', 'nodes/', 'nodegroups/', 'users/', 'events/'] +rootcollections = ['discovery/', 'events/', 'networking/', + 'noderange/', 'nodes/', 'nodegroups/', 'users/', 'version'] class PluginRoute(object): @@ -344,11 +348,14 @@ def delete_nodegroup_collection(collectionpath, configmanager): raise Exception("Not implemented") -def delete_node_collection(collectionpath, configmanager): +def delete_node_collection(collectionpath, configmanager, isnoderange): if len(collectionpath) == 2: # just node - node = collectionpath[-1] - configmanager.del_nodes([node]) - yield msg.DeletedResource(node) + nodes = [collectionpath[-1]] + if isnoderange: + nodes = noderange.NodeRange(nodes[0], configmanager).nodes + configmanager.del_nodes(nodes) + for node in nodes: + yield msg.DeletedResource(node) else: raise Exception("Not implemented") @@ -356,7 +363,8 @@ def delete_node_collection(collectionpath, configmanager): def enumerate_nodegroup_collection(collectionpath, configmanager): nodegroup = collectionpath[1] if not configmanager.is_nodegroup(nodegroup): - raise exc.NotFoundException("Invalid element requested") + raise exc.NotFoundException( + 'Invalid nodegroup: {0} not found'.format(nodegroup)) del collectionpath[0:2] collection = nested_lookup(nodegroupresources, collectionpath) return iterate_resources(collection) @@ -392,6 +400,7 @@ def create_group(inputdata, configmanager): configmanager.add_group_attributes(attribmap) except ValueError as e: raise exc.InvalidArgumentException(str(e)) + yield msg.CreatedResource(groupname) def create_node(inputdata, configmanager): @@ -405,6 +414,25 @@ def create_node(inputdata, configmanager): configmanager.add_node_attributes(attribmap) except ValueError as e: raise exc.InvalidArgumentException(str(e)) + yield msg.CreatedResource(nodename) + + +def create_noderange(inputdata, configmanager): + try: + noder = inputdata['name'] + del inputdata['name'] + attribmap = {} + for node in noderange.NodeRange(noder).nodes: + attribmap[node] = inputdata + except KeyError: + raise exc.InvalidArgumentException('name not specified') + try: + configmanager.add_node_attributes(attribmap) + except ValueError as e: + raise exc.InvalidArgumentException(str(e)) + for node in attribmap: + yield msg.CreatedResource(node) + def enumerate_collections(collections): @@ -419,7 +447,7 @@ def handle_nodegroup_request(configmanager, inputdata, if len(pathcomponents) < 2: if operation == "create": inputdata = msg.InputAttributes(pathcomponents, inputdata) - create_group(inputdata.attribs, configmanager) + return create_group(inputdata.attribs, configmanager) allgroups = list(configmanager.get_groups()) try: allgroups.sort(key=noderange.humanify_nodename) @@ -458,6 +486,16 @@ def handle_nodegroup_request(configmanager, inputdata, raise Exception("unknown case encountered") +class BadPlugin(object): + def __init__(self, node, plugin): + self.node = node + self.plugin = plugin + + def error(self, *args, **kwargs): + yield msg.ConfluentNodeError( + self.node, self.plugin + ' is not a supported plugin') + + def handle_node_request(configmanager, inputdata, operation, pathcomponents, autostrip=True): iscollection = False @@ -489,11 +527,14 @@ def handle_node_request(configmanager, inputdata, operation, # this is enumerating a list of nodes or just empty noderange if isnoderange and operation == "retrieve": return iterate_collections([]) + elif isnoderange and operation == "create": + inputdata = msg.InputAttributes(pathcomponents, inputdata) + return create_noderange(inputdata.attribs, configmanager) elif isnoderange or operation == "delete": raise exc.InvalidArgumentException() if operation == "create": inputdata = msg.InputAttributes(pathcomponents, inputdata) - create_node(inputdata.attribs, configmanager) + return create_node(inputdata.attribs, configmanager) allnodes = list(configmanager.list_nodes()) try: allnodes.sort(key=noderange.humanify_nodename) @@ -524,7 +565,8 @@ def handle_node_request(configmanager, inputdata, operation, raise exc.InvalidArgumentException('Custom interface required for resource') if iscollection: if operation == "delete": - return delete_node_collection(pathcomponents, configmanager) + return delete_node_collection(pathcomponents, configmanager, + isnoderange) elif operation == "retrieve": return enumerate_node_collection(pathcomponents, configmanager) else: @@ -561,7 +603,11 @@ def handle_node_request(configmanager, inputdata, operation, if attrname in nodeattr[node]: plugpath = nodeattr[node][attrname]['value'] if plugpath is not None: - hfunc = getattr(pluginmap[plugpath], operation) + try: + hfunc = getattr(pluginmap[plugpath], operation) + except KeyError: + nodesbyhandler[BadPlugin(node, plugpath).error] = [node] + continue if hfunc in nodesbyhandler: nodesbyhandler[hfunc].append(node) else: @@ -588,6 +634,14 @@ def handle_node_request(configmanager, inputdata, operation, # return stripnode(passvalues[0], nodes[0]) +def handle_discovery(pathcomponents, operation, configmanager, inputdata): + if pathcomponents[0] == 'detected': + pass + +def handle_discovery(pathcomponents, operation, configmanager, inputdata): + if pathcomponents[0] == 'detected': + pass + def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): """Given a full path request, return an object. @@ -612,6 +666,14 @@ def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): # single node request of some sort return handle_node_request(configmanager, inputdata, operation, pathcomponents, autostrip) + elif pathcomponents[0] == 'discovery': + return disco.handle_api_request( + configmanager, inputdata, operation, pathcomponents) + elif pathcomponents[0] == 'networking': + return macmap.handle_api_request( + configmanager, inputdata, operation, pathcomponents) + elif pathcomponents[0] == 'version': + return (msg.Attributes(kv={'version': confluent.__version__}),) elif pathcomponents[0] == 'users': # TODO: when non-administrator accounts exist, # they must only be allowed to see their own user @@ -646,5 +708,8 @@ def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): raise exc.NotFoundException() if operation == 'update': return alerts.decode_alert(inputdata, configmanager) + elif pathcomponents[0] == 'discovery': + return handle_discovery(pathcomponents[1:], operation, configmanager, + inputdata) else: raise exc.NotFoundException() diff --git a/confluent_server/confluent/discovery/__init__.py b/confluent_server/confluent/discovery/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py new file mode 100644 index 00000000..4934f728 --- /dev/null +++ b/confluent_server/confluent/discovery/core.py @@ -0,0 +1,853 @@ +# Copyright 2016-2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This manages the detection and auto-configuration of nodes. +# Discovery sources may implement scans and may be passive or may provide +# both. + +# The phases and actions: +# - Detect - Notice the existance of a potentially supported target +# - Potentially apply a secure replacement for default credential +# (perhaps using some key identifier combined with some string +# denoting temporary use, and use confluent master integrity key +# to generate a password in a formulaic way?) +# - Do some universal reconfiguration if applicable (e.g. if something is +# part of an enclosure with an optionally enabled enclosure manager, +# check and request enclosure manager enablement +# - Throughout all of this, at this phase no sensitive data is divulged, +# only using credentials that are factory default or equivalent to +# factory default +# - Request transition to Locate +# - Locate - Use available cues to ascertain the physical location. This may +# be mac address lookup through switch or correlated by a server +# enclosure manager. If the location data suggests a node identity, +# then proceed to the 'verify' state +# - Verify - Given the current information and candidate upstream verifier, +# verify the authenticity of the servers claim in an automated way +# if possible. A few things may happen at this juncture +# - Verification outright fails (confirmed negative response) +# - Audit log entry created, element is not *allowed* to +# proceed +# - Verification not possible (neither good or bad) +# - If security policy is set to low, proceed to 'Manage' +# - Otherwise, log the detection event and stop (user +# would then manually bless the endpoint if applicable +# - Verification succeeds +# - If security policy is set to strict (or manual, whichever +# word works best, note the successfull verification, but +# do not manage +# - Otherwise, proceed to 'Manage' +# -Pre-configure - Given data up to this point, try to do some pre-config. +# For example, if located and X, then check for S, enable S +# This happens regardless of verify, as verify may depend on +# S +# - Manage +# - Create the node if autonode (Deferred) +# - If there is not a defined ip address, collect the current LLA and use +# that value. +# - If no username/password defined, generate a unique password, 20 bytes +# long, written to pass most complexity rules (15 random bytes, base64, +# retry until uppercase, lowercase, digit, and symbol all present) +# - Apply defined configuration to endpoint + +import confluent.config.configmanager as cfm +import confluent.discovery.protocols.pxe as pxe +#import confluent.discovery.protocols.ssdp as ssdp +import confluent.discovery.protocols.slp as slp +import confluent.discovery.handlers.imm as imm +import confluent.discovery.handlers.pxe as pxeh +import confluent.discovery.handlers.smm as smm +import confluent.discovery.handlers.xcc as xcc +import confluent.exceptions as exc +import confluent.log as log +import confluent.messages as msg +import confluent.networking.macmap as macmap +import confluent.noderange as noderange +import confluent.util as util +import traceback + +import eventlet +import eventlet.semaphore + +class nesteddict(dict): + + def __missing__(self, key): + v = self[key] = nesteddict() + return v + +nodehandlers = { + 'service:lenovo-smm': smm, + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': xcc, + 'service:management-hardware.IBM:integrated-management-module2': imm, + 'pxe-client': pxeh, +} + +servicenames = { + 'pxe-client': 'pxe-client', + 'service:lenovo-smm': 'lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': 'lenovo-xcc', + 'service:management-hardware.IBM:integrated-management-module2': 'lenovo-imm2', +} + +servicebyname = { + 'pxe-client': 'pxe-client', + 'lenovo-smm': 'service:lenovo-smm', + 'lenovo-xcc': 'service:management-hardware.Lenovo:lenovo-xclarity-controller', + 'lenovo-imm2': 'service:management-hardware.IBM:integrated-management-module2', +} +# Passive-only auto-detection protocols: +# PXE + +# Both passive and active +# SLP (passive mode listens for SLP DA and unicast interrogation of the system) +# mDNS +# SSD + +# Also there are location providers +# Switch +# chassis +# chassis may in turn describe more chassis + +# We normalize discovered node data to the following pieces of information: +# * Detected node name (if available, from switch discovery or similar or +# auto generated node name. +# * Model number +# * Model name +# * Serial number +# * System UUID (in x86 space, specifically whichever UUID would be in DMI) +# * Network interfaces and addresses +# * Switch connectivity information +# * enclosure information +# * Management TLS fingerprint if validated (switch publication or enclosure) +# * System TLS fingerprint if validated (switch publication or system manager) + + +#TODO: by serial, by uuid, by node +known_info = {} +known_services = {} +known_serials = {} +known_nodes = nesteddict() +unknown_info = {} +pending_nodes = {} + + +def send_discovery_datum(info): + addresses = info.get('addresses', []) + yield msg.KeyValueData({'nodename': info.get('nodename', '')}) + yield msg.KeyValueData({'ipaddrs': [x[0] for x in addresses]}) + yield msg.KeyValueData({'serialnumber': info.get('serialnumber', '')}) + yield msg.KeyValueData({'modelnumber': info.get('modelnumber', '')}) + if 'enclosure.bay' in info: + yield msg.KeyValueData({'bay': int(info['enclosure.bay'])}) + yield msg.KeyValueData({'macs': [info.get('hwaddr', '')]}) + types = [] + for infotype in info.get('services', []): + if infotype in servicenames: + types.append(servicenames[infotype]) + yield msg.KeyValueData({'types': types}) + + +def _info_matches(info, criteria): + model = criteria.get('by-model', None) + devtype = criteria.get('by-type', None) + node = criteria.get('by-node', None) + serial = criteria.get('by-serial', None) + status = criteria.get('by-state', None) + if model and info.get('modelnumber', None) != model: + return False + if devtype and devtype not in info.get('services', []): + return False + if node and info.get('nodename', None) != node: + return False + if serial and info.get('serialnumber', None) != serial: + return False + if status and info.get('discostatus', None) != status: + return False + return True + + +def list_matching_nodes(criteria): + retnodes = [] + for node in known_nodes: + for mac in known_nodes[node]: + info = known_info[mac] + if _info_matches(info, criteria): + retnodes.append(node) + break + retnodes.sort(key=noderange.humanify_nodename) + return [msg.ChildCollection(node + '/') for node in retnodes] + + +def list_matching_serials(criteria): + for serial in sorted(list(known_serials)): + info = known_serials[serial] + if _info_matches(info, criteria): + yield msg.ChildCollection(serial + '/') + + +def list_matching_states(criteria): + return [msg.ChildCollection(x) for x in ('discovered/', 'identified/', + 'unidentified/')] + +def list_matching_macs(criteria): + for mac in sorted(list(known_info)): + info = known_info[mac] + if _info_matches(info, criteria): + yield msg.ChildCollection(mac.replace(':', '-')) + + +def list_matching_types(criteria): + rettypes = [] + for infotype in known_services: + typename = servicenames[infotype] + if ('by-model' not in criteria or + criteria['by-model'] in known_services[infotype]): + rettypes.append(typename) + return [msg.ChildCollection(typename + '/') + for typename in sorted(rettypes)] + + +def list_matching_models(criteria): + for model in sorted(list(detected_models())): + if ('by-type' not in criteria or + model in known_services[criteria['by-type']]): + yield msg.ChildCollection(model + '/') + + +def show_info(mac): + mac = mac.replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException(mac + ' not a known mac address') + for i in send_discovery_datum(known_info[mac]): + yield i + + +list_info = { + 'by-node': list_matching_nodes, + 'by-serial': list_matching_serials, + 'by-type': list_matching_types, + 'by-model': list_matching_models, + 'by-mac': list_matching_macs, + 'by-state': list_matching_states, +} + +multi_selectors = set([ + 'by-type', + 'by-model', + 'by-state', +]) + + +node_selectors = set([ + 'by-node', + #'by-uuid', + 'by-serial', +]) + + +single_selectors = set([ + 'by-mac', +]) + + +def _parameterize_path(pathcomponents): + listrequested = False + childcoll = True + if len(pathcomponents) % 2 == 1: + listrequested = pathcomponents[-1] + pathcomponents = pathcomponents[:-1] + pathit = iter(pathcomponents) + keyparams = {} + validselectors = multi_selectors | node_selectors | single_selectors + for key, val in zip(pathit, pathit): + if key not in validselectors: + raise exc.NotFoundException('{0} is not valid here'.format(key)) + if key == 'by-type': + keyparams[key] = servicebyname.get(val, None) + else: + keyparams[key] = val + validselectors.discard(key) + if key in single_selectors: + childcoll = False + validselectors = set([]) + elif key in node_selectors: + validselectors = single_selectors | set([]) + return validselectors, keyparams, listrequested, childcoll + + +def handle_api_request(configmanager, inputdata, operation, pathcomponents): + if operation == 'retrieve': + return handle_read_api_request(pathcomponents) + elif (operation in ('update', 'create') and + pathcomponents == ['discovery', 'rescan']): + if inputdata != {'rescan': 'start'}: + raise exc.InvalidArgumentException() + rescan() + return (msg.KeyValueData({'rescan': 'started'}),) + elif (operation in ('update', 'create')): + if 'node' not in inputdata: + raise exc.InvalidArgumentException('Missing node name in input') + _, queryparms, _, _ = _parameterize_path(pathcomponents[1:]) + if 'by-mac' not in queryparms: + raise exc.InvalidArgumentException('Must target using "by-mac"') + mac = queryparms['by-mac'].replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException('{0} not found'.format(mac)) + info = known_info[mac] + handler = info['handler'].NodeHandler(info, configmanager) + eval_node(configmanager, handler, info, inputdata['node'], + manual=True) + return [msg.AssignedResource(inputdata['node'])] + raise exc.NotImplementedException( + 'Unable to {0} to {1}'.format(operation, '/'.join(pathcomponents))) + + +def handle_read_api_request(pathcomponents): + # TODO(jjohnson2): This should be more generalized... + # odd indexes into components are 'by-'*, even indexes + # starting at 2 are parameters to previous index + subcats, queryparms, indexof, coll = _parameterize_path(pathcomponents[1:]) + if len(pathcomponents) == 1: + dirlist = [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + dirlist.append(msg.ChildCollection('rescan')) + return dirlist + if not coll: + return show_info(queryparms['by-mac']) + if not indexof: + return [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + if indexof not in list_info: + raise exc.NotFoundException('{0} is not found'.format(indexof)) + return list_info[indexof](queryparms) + + +def detected_services(): + for srv in known_services: + yield servicenames[srv] + + +def detected_models(): + knownmodels = set([]) + for info in known_info: + info = known_info[info] + if 'modelnumber' in info and info['modelnumber'] not in knownmodels: + knownmodels.add(info['modelnumber']) + yield info['modelnumber'] + + +def _recheck_nodes(nodeattribs, configmanager): + global rechecker + _map_unique_ids(nodeattribs) + # for the nodes whose attributes have changed, consider them as potential + # strangers + for node in nodeattribs: + if node in known_nodes: + for somemac in known_nodes[node]: + unknown_info[somemac] = known_nodes[node][somemac] + unknown_info[somemac]['discostatus'] = 'unidentified' + # Now we go through ones we did not find earlier + for mac in list(unknown_info): + try: + _recheck_single_unknown(configmanager, mac) + except Exception: + traceback.print_exc() + continue + # now we go through ones that were identified, but could not pass + # policy or hadn't been able to verify key + for nodename in pending_nodes: + info = pending_nodes[nodename] + handler = info['handler'].NodeHandler(info, configmanager) + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def _recheck_single_unknown(configmanager, mac): + global rechecker + global rechecktime + info = unknown_info.get(mac, None) + if not info: + return + if info['handler'] != pxeh and not info.get('addresses', None): + log.log({'info': 'Missing address information in ' + repr(info)}) + return + handler = info['handler'].NodeHandler(info, configmanager) + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + # addresses data is bad, clear it, to force repair next + # opportunity + info['addresses'] = [] + # TODO(jjohnson2): rescan due to bad peer addr data? + # not just wait around for the next announce + return + log.log( + { + 'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + # if cancel did not result in dead, then we are in progress + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, + configmanager) + return + nodename = get_nodename(configmanager, handler, info) + if nodename: + if handler.https_supported: + dp = configmanager.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def safe_detected(info): + eventlet.spawn_n(eval_detected, info) + + +def eval_detected(info): + try: + return detected(info) + except Exception as e: + traceback.print_exc() + + +def detected(info): + global rechecker + global rechecktime + if 'hwaddr' not in info: + return # For now, require hwaddr field to proceed + # later, manual and CMM discovery may act on SN and/or UUID + for service in info['services']: + if nodehandlers.get(service, None): + if service not in known_services: + known_services[service] = set([]) + handler = nodehandlers[service] + info['handler'] = handler + break + else: # no nodehandler, ignore for now + return + try: + snum = info['attributes']['enclosure-serial-number'][0].strip() + if snum: + info['serialnumber'] = snum + known_serials[info['serialnumber']] = info + except (KeyError, IndexError): + pass + try: + info['modelnumber'] = info['attributes']['enclosure-machinetype-model'][0] + known_services[service].add(info['modelnumber']) + except (KeyError, IndexError): + pass + if info['hwaddr'] in known_info and 'addresses' in info: + # we should tee these up for parsing when an enclosure comes up + # also when switch config parameters change, should discard + # and there's also if wiring is fixed... + # of course could periodically revisit known_nodes + # replace potentially stale address info + #TODO(jjohnson2): remove this + # temporary workaround for XCC not doing SLP DA over dedicated port + # bz 93219, fix submitted, but not in builds yet + # strictly speaking, going ipv4 only legitimately is mistreated here, + # but that should be an edge case + oldaddr = known_info[info['hwaddr']].get('addresses', []) + for addr in info['addresses']: + if addr[0].startswith('fe80::'): + break + else: + for addr in oldaddr: + if addr[0].startswith('fe80::'): + info['addresses'].append(addr) + if known_info[info['hwaddr']].get( + 'addresses', []) == info['addresses']: + # if the ip addresses match, then assume no changes + # now something resetting to defaults could, in theory + # have the same address, but need to be reset + # in that case, however, a user can clear pubkeys to force a check + return + known_info[info['hwaddr']] = info + cfg = cfm.ConfigManager(None) + handler = handler.NodeHandler(info, cfg) + handler.scan() + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['addresses'] = [] + return + log.log( + {'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, cfg) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentfied' + #TODO, eventlet spawn after to recheck sooner, or somehow else + # influence periodic recheck to shorten delay? + return + nodename = get_nodename(cfg, handler, info) + if nodename and handler.https_supported: + dp = cfg.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + #TODO(jjohnson2): We might have to get UUID for certain searches... + #for now defer probe until inside eval_node. We might not have + #a nodename without probe in the future. + if nodename: + eval_node(cfg, handler, info, nodename) + else: + log.log( + {'info': 'Detected unknown {0} with hwaddr {1} at ' + 'address {2}'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['discostatus'] = 'unidentified' + unknown_info[info['hwaddr']] = info + + +def get_nodename(cfg, handler, info): + if not handler.https_supported: + curruuid = info['uuid'] + nodename = nodes_by_uuid.get(curruuid, None) + if nodename is None: + # TODO: if there are too many matches on port for a + # given type, error! Can't just arbitarily limit, + # shared nic with vms is possible and valid + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + currcert = handler.https_cert + if not currcert: + info['discofailure'] = 'nohttps' + return None + currprint = util.get_fingerprint(currcert) + nodename = nodes_by_fprint.get(currprint, None) + if not nodename: + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + + +def eval_node(cfg, handler, info, nodename, manual=False): + try: + handler.probe() # unicast interrogation as possible to get more data + # for now, we search switch only, ideally we search cmm, smm, and + # switch concurrently + # do some preconfig, for example, to bring a SMM online if applicable + handler.preconfig() + except Exception as e: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = 'An error occured during discovery, check the ' \ + 'trace and stderr logs, mac was {0} and ip was {1}' \ + ', the node or the containing enclosure was {2}' \ + ''.format(info['hwaddr'], handler.ipaddr, nodename) + traceback.print_exc() + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # first, if had a bay, it was in an enclosure. If it was discovered by + # switch, it is probably the enclosure manager and not + # the node directly. switch is ambiguous and we should leave it alone + if 'enclosure.bay' in info and handler.is_enclosure: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + log.log({'error': 'Something that is an enclosure reported a bay, ' + 'not possible'}) + if manual: + raise exc.InvalidArgumentException() + return + nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename)) + if not handler.is_enclosure and nl: + # The specified node is an enclosure (has nodes mapped to it), but + # what we are talking to is *not* an enclosure + if 'enclosure.bay' not in info: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = '{2} with mac {0} is in {1}, but unable to ' \ + 'determine bay number'.format(info['hwaddr'], + nodename, + handler.ipaddr) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # search for nodes fitting our description using filters + # lead with the most specific to have a small second pass + nl = cfg.filter_node_attributes( + 'enclosure.bay={0}'.format(info['enclosure.bay']), nl) + nl = list(nl) + if len(nl) != 1: + info['discofailure'] = 'ambigconfig' + if len(nl): + errorstr = 'The following nodes have duplicate ' \ + 'enclosure attributes: ' + ','.join(nl) + + else: + errorstr = 'The {0} in enclosure {1} bay {2} does not ' \ + 'seem to be a defined node ({3})'.format( + handler.devname, nodename, + info['enclosure.bay'], + handler.ipaddr, + ) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + return + nodename = nl[0] + if not discover_node(cfg, handler, info, nodename, manual): + # store it as pending, assuming blocked on enclosure + # assurance... + pending_nodes[nodename] = info + else: + # we can and did accurately discover by switch or in enclosure + if not discover_node(cfg, handler, info, nodename, manual): + pending_nodes[nodename] = info + + +def discover_node(cfg, handler, info, nodename, manual): + known_nodes[nodename][info['hwaddr']] = info + if info['hwaddr'] in unknown_info: + del unknown_info[info['hwaddr']] + info['discostatus'] = 'identified' + dp = cfg.get_node_attributes( + [nodename], ('discovery.policy', + 'pubkeys.tls_hardwaremanager')) + policy = dp.get(nodename, {}).get('discovery.policy', {}).get( + 'value', None) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + # TODO(jjohnson2): permissive requires we guarantee storage of + # the pubkeys, which is deferred for a little bit + # Also, 'secure', when we have the needed infrastructure done + # in some product or another. + if (policy == 'permissive' and handler.https_supported and lastfp and + not manual): + info['discofailure'] = 'fingerprint' + log.log({'info': 'Detected replacement of {0} with existing ' + 'fingerprint and permissive discovery policy, not ' + 'doing discovery unless discovery.policy=open or ' + 'pubkeys.tls_hardwaremanager attribute is cleared ' + 'first'.format(nodename)}) + return False # With a permissive policy, do not discover new + elif policy in ('open', 'permissive') or manual: + info['nodename'] = nodename + if not handler.https_supported: + # use uuid based scheme in lieu of tls cert, ideally only + # for stateless 'discovery' targets like pxe, where data does not + # change + if info['uuid'] in known_pxe_uuids: + return True + uuidinfo = cfg.get_node_attributes(nodename, 'id.uuid') + known_pxe_uuids[info['uuid']] = nodename + # TODO(jjohnson2): This is messing with the attrib database + # so it should only be possible if policy is 'open' + # + if manual or policy == 'open': + olduuid = uuidinfo.get(nodename, {}).get('id.uuid', None) + if 'uuid' in info and info['uuid'] != olduuid: + cfg.set_node_attributes( + {nodename: {'id.uuid': info['uuid']}}) + log.log({'info': 'Detected {0} ({1} with mac {2})'.format( + nodename, handler.devname, info['hwaddr'])}) + return True + elif manual or not util.cert_matches(lastfp, handler.https_cert): + # only 'discover' if it is not the same as last time + try: + handler.config(nodename) + except Exception as e: + info['discofailure'] = 'bug' + log.log( + {'error': + 'Error encountered trying to set up {0}, {1}'.format( + nodename, str(e))}) + traceback.print_exc() + return False + newnodeattribs = {} + if 'uuid' in info: + newnodeattribs['id.uuid'] = info['uuid'] + if handler.https_cert: + newnodeattribs['pubkeys.tls_hardwaremanager'] = \ + util.get_fingerprint(handler.https_cert) + if newnodeattribs: + cfg.set_node_attributes({nodename: newnodeattribs}) + log.log({'info': 'Discovered {0} ({1})'.format(nodename, + handler.devname)}) + info['discostatus'] = 'discovered' + return True + log.log({'info': 'Detected {0}, but discovery.policy is not set to a ' + 'value allowing discovery (open or permissive)'.format( + nodename)}) + info['discofailure'] = 'policy' + return False + + +attribwatcher = None +nodeaddhandler = None +needaddhandled = False + + +def _handle_nodelist_change(configmanager): + global needaddhandled + global nodeaddhandler + _recheck_nodes((), configmanager) + if needaddhandled: + needaddhandled = False + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + else: + nodeaddhandler = None + + +def newnodes(added, deleting, configmanager): + global attribwatcher + global needaddhandled + global nodeaddhandler + configmanager.remove_watcher(attribwatcher) + allnodes = configmanager.list_nodes() + attribwatcher = configmanager.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + if nodeaddhandler: + needaddhandled = True + else: + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + + + +rechecker = None +rechecktime = None +rechecklock = eventlet.semaphore.Semaphore() + +def _periodic_recheck(configmanager): + global rechecker + global rechecktime + rechecker = None + # There shouldn't be anything causing this to double up, but just in case + # use a semaphore to absolutely guarantee this doesn't multiply + with rechecklock: + try: + _recheck_nodes((), configmanager) + except Exception: + traceback.print_exc() + log.log({'error': 'Unexpected error during discovery, check debug ' + 'logs'}) + # if rechecker is set, it means that an accelerated schedule + # for rechecker was requested in the course of recheck_nodes + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, + configmanager) + + +def rescan(): + _map_unique_ids() + eventlet.spawn_n(slp.active_scan, safe_detected) + + +def start_detection(): + global attribwatcher + global rechecker + _map_unique_ids() + cfg = cfm.ConfigManager(None) + allnodes = cfg.list_nodes() + attribwatcher = cfg.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + cfg.watch_nodecollection(newnodes) + eventlet.spawn_n(slp.snoop, safe_detected) + eventlet.spawn_n(pxe.snoop, safe_detected) + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, cfg) + + # eventlet.spawn_n(ssdp.snoop, safe_detected) + + + +nodes_by_fprint = {} +nodes_by_uuid = {} +known_pxe_uuids = {} + +def _map_unique_ids(nodes=None): + global nodes_by_uuid + global nodes_by_fprint + nodes_by_uuid = {} + nodes_by_fprint = {} + # Map current known ids based on uuid and fingperprints for fast lookup + cfg = cfm.ConfigManager(None) + if nodes is None: + nodes = cfg.list_nodes() + bigmap = cfg.get_node_attributes(nodes, + ('id.uuid', + 'pubkeys.tls_hardwaremanager')) + uuid_by_nodes = {} + fprint_by_nodes = {} + for uuid in nodes_by_uuid: + node = nodes_by_uuid[uuid] + if node in bigmap: + uuid_by_nodes[node] = uuid + for fprint in nodes_by_fprint: + node = nodes_by_fprint[fprint] + if node in bigmap: + fprint_by_nodes[node] =fprint + for node in bigmap: + if node in uuid_by_nodes: + del nodes_by_uuid[uuid_by_nodes[node]] + if node in fprint_by_nodes: + del nodes_by_fprint[fprint_by_nodes[node]] + uuid = bigmap[node].get('id.uuid', {}).get('value', None) + if uuid: + nodes_by_uuid[uuid] = node + fprint = bigmap[node].get( + 'pubkeys.tls_hardwaremanager', {}).get('value', None) + if fprint: + nodes_by_fprint[fprint] = node + for uuid in known_pxe_uuids: + if uuid not in nodes_by_uuid: + nodes_by_uuid[uuid] = known_pxe_uuids[uuid] + + +if __name__ == '__main__': + start_detection() + while True: + eventlet.sleep(30) diff --git a/confluent_server/confluent/discovery/handlers/__init__.py b/confluent_server/confluent/discovery/handlers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/confluent_server/confluent/discovery/handlers/bmc.py b/confluent_server/confluent/discovery/handlers/bmc.py new file mode 100644 index 00000000..62e12e4b --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/bmc.py @@ -0,0 +1,153 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.generic as generic +import confluent.exceptions as exc +import confluent.netutil as netutil +import eventlet.support.greendns + +# Provide foundation for general IPMI device configuration + +import pyghmi.exceptions as pygexc +ipmicommand = eventlet.import_patched('pyghmi.ipmi.command') +ipmicommand.session.select = eventlet.green.select +ipmicommand.session.threading = eventlet.green.threading +ipmicommand.session.socket.getaddrinfo = eventlet.support.greendns.getaddrinfo +getaddrinfo = eventlet.support.greendns.getaddrinfo + +DEFAULT_USER = 'USERID' +DEFAULT_PASS = 'PASSW0RD' + + +class NodeHandler(generic.NodeHandler): + + def _get_ipmicmd(self, user=DEFAULT_USER, password=DEFAULT_PASS): + return ipmicommand.Command(self.ipaddr, user, password) + + def __init__(self, info, configmanager): + super(NodeHandler, self).__init__(info, configmanager) + + def probe(self): + return + # TODO(jjohnson2): probe serial number and uuid + + def config(self, nodename, reset=False): + # TODO(jjohnson2): set ip parameters, user/pass, alert cfg maybe + # In general, try to use https automation, to make it consistent + # between hypothetical secure path and today. + try: + ic = self._get_ipmicmd() + passwd = DEFAULT_PASS + except pygexc.IpmiException as pi: + creds = self.configmanager.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword'], decrypt=True) + user = creds.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', None) + havecustomcreds = False + if user is not None and user != DEFAULT_USER: + havecustomcreds = True + else: + user = DEFAULT_USER + passwd = creds.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', None) + if passwd is not None and passwd != DEFAULT_PASS: + havecustomcreds = True + else: + passwd = DEFAULT_PASS + if havecustomcreds: + ic = self._get_ipmicmd(user, passwd) + else: + raise + currusers = ic.get_users() + lanchan = ic.get_network_channel() + userdata = ic.xraw_command(netfn=6, command=0x44, data=(lanchan, + 1)) + userdata = bytearray(userdata['data']) + maxusers = userdata[0] & 0b111111 + enabledusers = userdata[1] & 0b111111 + lockedusers = userdata[2] & 0b111111 + cfg = self.configmanager + cd = cfg.get_node_attributes( + nodename, ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword', + 'hardwaremanagement.manager'], True) + cd = cd.get(nodename, {}) + if ('secret.hardwaremanagementuser' not in cd or + 'secret.hardwaremanagementpassword' not in cd): + raise exc.TargetEndpointBadCredentials( + 'Missing user and/or password') + if ('hardwaremanagement.manager' in cd and + cd['hardwaremanagement.manager']['value'] and + not cd['hardwaremanagement.manager']['value'].startswith( + 'fe80::')): + newip = cd['hardwaremanagement.manager']['value'] + newipinfo = getaddrinfo(newip, 0)[0] + # This getaddrinfo is repeated in get_nic_config, could be + # optimized, albeit with a more convoluted api.. + newip = newipinfo[-1][0] + if ':' in newip: + raise exc.NotImplementedException('IPv6 remote config TODO') + netconfig = netutil.get_nic_config(cfg, nodename, ip=newip) + plen = netconfig['prefix'] + newip = '{0}/{1}'.format(newip, plen) + ic.set_net_configuration(ipv4_address=newip, + ipv4_configuration='static', + ipv4_gateway=netconfig['ipv4_gateway']) + elif self.ipaddr.startswith('fe80::'): + cfg.set_node_attributes( + {nodename: {'hardwaremanagement.manager': self.ipaddr}}) + else: + raise exc.TargetEndpointUnreachable( + 'hardwaremanagement.manager must be set to desired address') + newuser = cd['secret.hardwaremanagementuser']['value'] + newpass = cd['secret.hardwaremanagementpassword']['value'] + for uid in currusers: + if currusers[uid]['name'] == newuser: + # Use existing account that has been created + newuserslot = uid + break + else: + newuserslot = lockedusers + 1 + if newuserslot < 2: + newuserslot = 2 + ic.set_user_name(newuserslot, newuser) + ic.set_user_access(newuserslot, lanchan, + privilege_level='administrator') + if newpass != passwd: # don't mess with existing if no change + ic.set_user_password(newuserslot, password=newpass) + # Now to zap others + for uid in currusers: + if uid != newuserslot: + if uid <= lockedusers: # we cannot delete, settle for disable + ic.disable_user(uid, 'disable') + else: + # lead with the most critical thing, removing user access + ic.set_user_access(uid, channel=None, callback=False, + link_auth=False, ipmi_msg=False, + privilege_level='no_access') + # next, try to disable the password + ic.set_user_password(uid, mode='disable', password=None) + # ok, now we can be less paranoid + try: + ic.user_delete(uid) + except pygexc.IpmiException as ie: + if ie.ipmicode != 0xd5: # some response to the 0xff + # name... + # the user will remain, but that is life + raise + if reset: + ic.reset_bmc() + return diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py new file mode 100644 index 00000000..d13108d4 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -0,0 +1,91 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import eventlet +webclient = eventlet.import_patched('pyghmi.util.webclient') + +class NodeHandler(object): + https_supported = True + is_enclosure = False + + def __init__(self, info, configmanager): + self._certfailreason = None + self._fp = None + self.info = info + self.configmanager = configmanager + targsa = None + # first let us prefer LLA if possible, since that's most stable + for sa in info['addresses']: + if sa[0].startswith('fe80'): + targsa = sa + break + else: + targsa = info['addresses'][0] + self.ipaddr = targsa[0] + return + + def scan(self): + # Do completely passive things to enhance data. + # Probe is permitted to for example attempt a login + # scan *only* does what it can without a login attempt + return + + def probe(self): + # Use appropriate direct strategy to gather data such as + # serial number and uuid to flesh out data as needed + return + + def preconfig(self): + return + + @property + def discoverable_by_switch(self): + return True + + def _savecert(self, certificate): + self._fp = certificate + return True + + @property + def cert_fail_reason(self): + if self._certfailreason == 1: + return 'refused' + elif self._certfailreason == 2: + return 'unreachable' + + @property + def https_cert(self): + if self._fp: + return self._fp + if ':' in self.ipaddr: + ip = '[{0}]'.format(self.ipaddr) + else: + ip = self.ipaddr + wc = webclient.SecureHTTPConnection(ip, verifycallback=self._savecert) + try: + wc.connect() + except IOError as ie: + if ie.errno == errno.ECONNREFUSED: + self._certfailreason = 1 + return None + elif ie.errno == errno.EHOSTUNREACH: + self._certfailreason = 2 + return None + self._certfailreason = 2 + return None + except Exception: + self._certfailreason = 2 + return None + return self._fp \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/imm.py b/confluent_server/confluent/discovery/handlers/imm.py new file mode 100644 index 00000000..66cc4c82 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/imm.py @@ -0,0 +1,85 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler +import pyghmi.exceptions as pygexc +import pyghmi.ipmi.private.util as pygutil +import string +import struct + +class NodeHandler(bmchandler.NodeHandler): + devname = 'IMM' + + def scan(self): + slpattrs = self.info.get('attributes', {}) + self.isdense = False + try: + ff = slpattrs.get('enclosure-form-factor', [''])[0] + except IndexError: + return + if ff not in ('dense-computing', 'BC2'): + # do not probe unless it's a dense platform + return + self.isdense = True + wronguuid = slpattrs.get('node-uuid', [''])[0] + if wronguuid: + # we need to fix the first three portions of the uuid + uuidprefix = wronguuid.split('-')[:3] + uuidprefix = struct.pack( + '0-ChangePwdlogin.htmlok +# requires relogin +# https://172.30.254.160/index.html +# post to: +# https://172.30.254.160/data/login +# with body user=USERID&password=Passw0rd!4321 +# yields: +# ok 0 index.html +# note forwardUrl, if password change needed, will indicate something else \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py new file mode 100644 index 00000000..b47ea69e --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -0,0 +1,46 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.imm as immhandler +import pyghmi.exceptions as pygexc + + + +class NodeHandler(immhandler.NodeHandler): + devname = 'XCC' + + def preconfig(self): + ff = self.info.get('attributes', {}).get('enclosure-form-factor', '') + if ff != 'dense-computing': + return + # attempt to enable SMM + #it's normal to get a 'not supported' (193) for systems without an SMM + ipmicmd = None + try: + ipmicmd = self._get_ipmicmd() + ipmicmd.xraw_command(netfn=0x3a, command=0xf1, data=(1,)) + except pygexc.IpmiException as e: + if e.ipmicode != 193: + # raise an issue if anything other than to be expected + raise + #TODO: decide how to clean out if important + #as it stands, this can step on itself + #if ipmicmd: + # ipmicmd.ipmi_session.logout() + + +# TODO(jjohnson2): web based init config for future prevalidated cert scheme +# def config(self, nodename): +# return + diff --git a/confluent_server/confluent/discovery/protocols/__init__.py b/confluent_server/confluent/discovery/protocols/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py new file mode 100644 index 00000000..cbb1f87a --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -0,0 +1,118 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We can listen to port 69 with SO_REUSEADDR to snoop port 69 *even* if dhcp +# is running (because the other dhcp servers do it already) + +# Goal is to detect and act on a DHCPDISCOVER, without actually having to do +# any offer + +# option 97 = UUID (wireformat) + +import eventlet.green.socket as socket +import struct + +pxearchs = { + '\x00\x00': 'bios-x86', + '\x00\x07': 'uefi-x64', + '\x00\x09': 'uefi-x64', + '\x00\x0b': 'uefi-aarch64', +} + + +def decode_uuid(rawguid): + lebytes = struct.unpack_from('HHI', buffer(rawguid[8:])) + return '{0:08X}-{1:04X}-{2:04X}-{3:04X}-{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], bebytes[1], bebytes[2]) + + +def find_info_in_options(rq, optidx): + uuid = None + arch = None + try: + while uuid is None or arch is None: + if rq[optidx] == 53: # DHCP message type + # we want only length 1 and only discover (type 1) + if rq[optidx + 1] != 1 or rq[optidx + 2] != 1: + return uuid, arch + optidx += 3 + elif rq[optidx] == 97: + if rq[optidx + 1] != 17: + # 16 bytes of uuid and one reserved byte + return uuid, arch + if rq[optidx + 2] != 0: # the reserved byte should be zero, + # anything else would be a new spec that we don't know yet + return uuid, arch + uuid = decode_uuid(rq[optidx + 3:optidx + 19]) + optidx += 19 + elif rq[optidx] == 93: + if rq[optidx + 1] != 2: + return uuid, arch + archraw = bytes(rq[optidx + 2:optidx + 4]) + if archraw in pxearchs: + arch = pxearchs[archraw] + optidx += 4 + else: + optidx += rq[optidx + 1] + 2 + except IndexError: + return uuid, arch + return uuid, arch + +def snoop(handler): + #TODO(jjohnson2): ipv6 socket and multicast for DHCPv6, should that be + #prominent + #TODO(jjohnson2): IP_PKTINFO, recvmsg to get the destination ip, per + #proxydhcp.c from xCAT + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 67)) + while True: + # Just need some delay, picked a prime number so that overlap with other + # timers might be reduced, though it really is probably nothing + (rq, peer) = net4.recvfrom(9000) + # if we have a small packet, just skip, it can't possible hold enough + # data and avoids some downstream IndexErrors that would be messy + # with try/except + if len(rq) < 64: + continue + rq = bytearray(rq) + if rq[0] == 1: # Boot request + addrlen = rq[2] + if addrlen > 16: # max address size in bootp is 16 bytes + continue + netaddr = rq[28:28+addrlen] + netaddr = ':'.join(['{0:02x}'.format(x) for x in netaddr]) + optidx = 0 + try: + optidx = rq.index('\x63\x82\x53\x63') + 4 + except ValueError: + continue + uuid, arch = find_info_in_options(rq, optidx) + if uuid is None: + continue + # We will fill out service to have something to byte into, + # but the nature of the beast is that we do not have peers, + # so that will not be present for a pxe snoop + handler({'hwaddr': netaddr, 'uuid': uuid, 'architecture': arch, + 'services': ('pxe-client',)}) + +if __name__ == '__main__': + def testsnoop(info): + print(repr(info)) + snoop(testsnoop) + + diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py new file mode 100644 index 00000000..b84b0e27 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -0,0 +1,536 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.neighutil as neighutil +import confluent.util as util +import os +import random +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct +import subprocess + + +_slp_services = set([ + 'service:management-hardware.IBM:integrated-management-module2', + 'service:lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller', + 'service:management-hardware.IBM:chassis-management-module', + 'service:management-hardware.Lenovo:chassis-management-module', +]) + +# SLP has a lot of ambition that was unfulfilled in practice. +# So we have a static footer here to always use 'DEFAULT' scope, no LDAP +# predicates, and no authentication for service requests +srvreqfooter = b'\x00\x07DEFAULT\x00\x00\x00\x00' +# An empty instance of the attribute list extension +# which is defined in RFC 3059, used to indicate support for that capability +attrlistext = b'\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00' + +try: + IPPROTO_IPV6 = socket.IPPROTO_IPV6 +except AttributeError: + IPPROTO_IPV6 = 41 # Assume Windows value if socket is missing it + + + +def _parse_slp_header(packet): + packet = bytearray(packet) + if len(packet) < 16 or packet[0] != 2: + # discard packets that are obviously useless + return None + parsed = { + 'function': packet[1], + } + (offset, parsed['xid'], langlen) = struct.unpack('!IHH', + bytes(b'\x00' + packet[7:14])) + parsed['lang'] = packet[14:14 + langlen].decode('utf-8') + parsed['payload'] = packet[14 + langlen:] + if offset: + parsed['offset'] = 14 + langlen + parsed['extoffset'] = offset + return parsed + + +def _pop_url(payload): + urllen = struct.unpack('!H', bytes(payload[3:5]))[0] + url = bytes(payload[5:5+urllen]).decode('utf-8') + if payload[5+urllen] != 0: + raise Exception('Auth blocks unsupported') + payload = payload[5+urllen+1:] + return url, payload + + +def _parse_SrvRply(parsed): + """ Modify passed dictionary to have parsed data + + + :param parsed: + :return: + """ + payload = parsed['payload'] + ecode, ucount = struct.unpack('!HH', bytes(payload[0:4])) + if ecode: + parsed['errorcode'] = ecode + payload = payload[4:] + parsed['urls'] = [] + while ucount: + ucount -= 1 + url, payload = _pop_url(payload) + parsed['urls'].append(url) + + +def _parse_slp_packet(packet, peer, rsps, xidmap): + parsed = _parse_slp_header(packet) + if not parsed: + return + addr = peer[0] + if '%' in addr: + addr = addr[:addr.index('%')] + mac = None + if addr in neighutil.neightable: + identifier = neighutil.neightable[addr] + mac = identifier + else: + identifier = addr + if (identifier, parsed['xid']) in rsps: + # avoid obviously duplicate entries + parsed = rsps[(identifier, parsed['xid'])] + else: + rsps[(identifier, parsed['xid'])] = parsed + if mac and 'hwaddr' not in parsed: + parsed['hwaddr'] = mac + if parsed['xid'] in xidmap: + parsed['services'] = [xidmap[parsed['xid']]] + if 'addresses' in parsed: + if peer not in parsed['addresses']: + parsed['addresses'].append(peer) + else: + parsed['addresses'] = [peer] + if parsed['function'] == 2: # A service reply + _parse_SrvRply(parsed) + + +def _v6mcasthash(srvtype): + # The hash algorithm described by RFC 3111 + nums = bytearray(srvtype.encode('utf-8')) + hashval = 0 + for i in nums: + hashval *= 33 + hashval += i + hashval &= 0xffff # only need to track the lowest 16 bits + hashval &= 0x3ff + hashval |= 0x1000 + return '{0:x}'.format(hashval) + + +def _generate_slp_header(payload, multicast, functionid, xid, extoffset=0): + if multicast: + flags = 0x2000 + else: + flags = 0 + packetlen = len(payload) + 16 # we have a fixed 16 byte header supported + if extoffset: # if we have an offset, add 16 to account for this function + # generating a 16 byte header + extoffset += 16 + if packetlen > 1400: + # For now, we aren't intending to support large SLP transmits + # raise an exception to help identify if such a requirement emerges + raise Exception("TODO: Transmit overflow packets") + # We always do SLP v2, and only v2 + header = bytearray([2, functionid]) + # SLP uses 24 bit packed integers, so in such places we pack 32 then + # discard the high byte + header.extend(struct.pack('!IH', packetlen, flags)[1:]) + # '2' below refers to the length of the language tag + header.extend(struct.pack('!IHH', extoffset, xid, 2)[1:]) + # we only do english (in SLP world, it's not like non-english appears...) + header.extend(b'en') + return header + +def _generate_attr_request(service, xid): + service = service.encode('utf-8') + payload = bytearray(struct.pack('!HH', 0, len(service)) + service) + payload.extend(srvreqfooter) + header = _generate_slp_header(payload, False, functionid=6, xid=xid) + return header + payload + + + +def _generate_request_payload(srvtype, multicast, xid, prlist=''): + prlist = prlist.encode('utf-8') + payload = bytearray(struct.pack('!H', len(prlist)) + prlist) + srvtype = srvtype.encode('utf-8') + payload.extend(struct.pack('!H', len(srvtype)) + srvtype) + payload.extend(srvreqfooter) + extoffset = len(payload) + payload.extend(attrlistext) + header = _generate_slp_header(payload, multicast, functionid=1, xid=xid, + extoffset=extoffset) + return header + payload + + +def _find_srvtype(net, net4, srvtype, addresses, xid): + """Internal function to find a single service type + + Helper to do singleton requests to srvtype + + :param net: Socket active + :param srvtype: Service type to do now + :param addresses: Pass through of addresses argument from find_targets + :return: + """ + if addresses is None: + data = _generate_request_payload(srvtype, True, xid) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + v6addrs = [] + v6hash = _v6mcasthash(srvtype) + # do 'interface local' and 'link local' + # it shouldn't make sense, but some configurations work with interface + # local that do not work with link local + v6addrs.append(('ff01::1:' + v6hash, 427, 0, 0)) + v6addrs.append(('ff02::1:' + v6hash, 427, 0, 0)) + for idx in util.list_interface_indexes(): + # IPv6 multicast is by index, so lead with that + net.setsockopt(IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, idx) + for sa in v6addrs: + try: + net.sendto(data, sa) + except socket.error: + # if we hit an interface without ipv6 multicast, + # this can cause an error, skip such an interface + # case in point, 'lo' + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(data, ('239.255.255.253', 427)) + net4.sendto(data, (bcast, 427)) + + +def _grab_rsps(socks, rsps, interval, xidmap): + r, _, _ = select.select(socks, (), (), interval) + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_slp_packet(rsp, peer, rsps, xidmap) + r, _, _ = select.select(socks, (), (), interval) + + + +def _parse_attrlist(attrstr): + attribs = {} + while attrstr: + if attrstr[0] == '(': + if ')' not in attrstr: + attribs['INCOMPLETE'] = True + return attribs + currattr = attrstr[1:attrstr.index(')')] + if '=' not in currattr: # Not allegedly kosher, but still.. + currattr = currattr.decode('utf-8') + attribs[currattr] = None + else: + attrname, attrval = currattr.split('=') + attrname = attrname.decode('utf-8') + attribs[attrname] = [] + for val in attrval.split(','): + try: + val = val.decode('utf-8') + except UnicodeDecodeError: + val = '*DECODEERROR*' + if val[:3] == '\\FF': # we should make this bytes + finalval = bytearray([]) + for bnum in attrval[3:].split('\\'): + if bnum == '': + continue + finalval.append(int(bnum, 16)) + val = finalval + if 'uuid' in attrname and len(val) == 16: + lebytes = struct.unpack_from( + 'HHI', buffer(val[8:])) + val = '{0:08X}-{1:04X}-{2:04X}-{3:04X}-' \ + '{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], + bebytes[1], bebytes[2] + ) + attribs[attrname].append(val) + attrstr = attrstr[attrstr.index(')'):] + elif attrstr[0] == ',': + attrstr = attrstr[1:] + elif ',' in attrstr: + currattr = attrstr[:attrstr.index(',')] + attribs[currattr] = None + attrstr = attrstr[attrstr.index(','):] + else: + currattr = attrstr + attribs[currattr] = None + attrstr = None + return attribs + + +def _parse_attrs(data, parsed): + headinfo = _parse_slp_header(data) + if headinfo['function'] != 7 or headinfo['xid'] != parsed['xid']: + return + payload = headinfo['payload'] + if struct.unpack('!H', bytes(payload[:2]))[0] != 0: + return + length = struct.unpack('!H', bytes(payload[2:4]))[0] + attrstr = bytes(payload[4:4+length]) + parsed['attributes'] = _parse_attrlist(attrstr) + + +def _add_attributes(parsed): + attrq = _generate_attr_request(parsed['services'][0], parsed['xid']) + target = None + # prefer reaching out to an fe80 if present, to be highly robust + # in face of network changes + for addr in parsed['addresses']: + if addr[0].startswith('fe80'): + target = addr + # however if no fe80 seen, roll with the first available address + if not target: + target = parsed['addresses'][0] + if len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + net.connect(target) + except socket.error: + return + net.sendall(attrq) + rsp = net.recv(8192) + net.close() + _parse_attrs(rsp, parsed) + + +def query_srvtypes(target): + """Query the srvtypes advertised by the target + + :param target: A sockaddr tuple (if you get the peer info) + """ + payload = b'\x00\x00\xff\xff\x00\x07DEFAULT' + header = _generate_slp_header(payload, False, functionid=9, xid=1) + packet = header + payload + if len(target) == 2: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + elif len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + raise Exception('Unrecognized target {0}'.format(repr(target))) + tries = 3 + connected = False + while tries and not connected: + tries -= 1 + try: + net.connect(target) + connected = True + except socket.error: + pass + if not connected: + return [u''] + net.sendall(packet) + rs = net.recv(8192) + net.close() + parsed = _parse_slp_header(rs) + if parsed: + payload = parsed['payload'] + if payload[:2] != '\x00\x00': + return + stypelen = struct.unpack('!H', bytes(payload[2:4]))[0] + stypes = payload[4:4+stypelen].decode('utf-8') + return stypes.split(',') + +def rescan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def snoop(handler): + """Watch for SLP activity + + handler will be called with a dictionary of relevant attributes + + :param handler: + :return: + """ + active_scan(handler) + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + slpg = socket.inet_pton(socket.AF_INET6, 'ff01::123') + slpg2 = socket.inet_pton(socket.AF_INET6, 'ff02::123') + for i6idx in util.list_interface_indexes(): + mreq = slpg + struct.pack('=I', i6idx) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + mreq = slpg2 + struct.pack('=I', i6idx) + net.setsockopt(IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + slpmcast = socket.inet_aton('239.255.255.253') + \ + socket.inet_aton(i4['addr']) + try: + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + slpmcast) + except socket.error as e: + if e.errno != 98: + raise + # socket in use can occur when aliased ipv4 are encountered + net.bind(('', 427)) + net4.bind(('', 427)) + + while True: + newmacs = set([]) + r, _, _ = select.select((net, net4), (), (), 60) + # clear known_peers and peerbymacaddress + # to avoid stale info getting in... + # rely upon the select(0.2) to catch rapid fire and aggregate ip + # addresses that come close together + # calling code needs to understand deeper context, as snoop + # will now yield dupe info over time + known_peers = set([]) + peerbymacaddress = {} + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + known_peers.add(peer) + mac = neighutil.neightable[ip] + if mac in peerbymacaddress: + peerbymacaddress[mac]['addresses'].append(peer) + else: + q = query_srvtypes(peer) + if not q or not q[0]: + # SLP might have started and not ready yet + # ignore for now + known_peers.discard(peer) + continue + # we want to prioritize the very well known services + svcs = [] + for svc in q: + if svc in _slp_services: + svcs.insert(0, svc) + else: + svcs.append(svc) + peerbymacaddress[mac] = { + 'services': svcs, + 'addresses': [peer], + } + newmacs.add(mac) + r, _, _ = select.select((net, net4), (), (), 0.2) + for mac in newmacs: + peerbymacaddress[mac]['xid'] = 1 + _add_attributes(peerbymacaddress[mac]) + peerbymacaddress[mac]['hwaddr'] = mac + handler(peerbymacaddress[mac]) + + +def active_scan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def scan(srvtypes=_slp_services, addresses=None, localonly=False): + """Find targets providing matching requested srvtypes + + This is a generator that will iterate over respondants to the SrvType + requested. + + :param srvtypes: An iterable list of the service types to find + :param addresses: An iterable of addresses/ranges. Default is to scan + local network segment using multicast and broadcast. + Each address can be a single address, hyphen-delimited + range, or an IP/CIDR indication of a network. + :return: Iterable set of results + """ + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + # TODO: increase RCVBUF to max, mitigate chance of + # failure due to full buffer. + # SLP is very poor at scanning large counts and managing it, so we + # must make the best of it + # Some platforms/config default to IPV6ONLY, we are doing IPv4 + # too, so force it + #net.setsockopt(IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + # we are going to do broadcast, so allow that... + initxid = random.randint(0, 32768) + xididx = 0 + xidmap = {} + # First we give fast repsonders of each srvtype individual chances to be + # processed, mitigating volume of response traffic + rsps = {} + for srvtype in srvtypes: + xididx += 1 + _find_srvtype(net, net4, srvtype, addresses, initxid + xididx) + xidmap[initxid + xididx] = srvtype + _grab_rsps((net, net4), rsps, 0.1, xidmap) + # now do a more slow check to work to get stragglers, + # but fortunately the above should have taken the brunt of volume, so + # reduced chance of many responses overwhelming receive buffer. + _grab_rsps((net, net4), rsps, 1, xidmap) + # now to analyze and flesh out the responses + for id in rsps: + if localonly: + for addr in rsps[id]['addresses']: + if 'fe80' in addr[0]: + break + else: + continue + _add_attributes(rsps[id]) + del rsps[id]['payload'] + del rsps[id]['function'] + del rsps[id]['xid'] + yield rsps[id] + + +if __name__ == '__main__': + def testsnoop(a): + print(repr(a)) + snoop(testsnoop) diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py new file mode 100644 index 00000000..de543697 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -0,0 +1,232 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Documented somewhat at +# http://buildingskb.schneider-electric.com/view.php?AID=15197 + +# Here is the payload of an SSDP 'announce', sent to the multicast v4/v6 1900 +# NOTIFY * HTTP/1.1 +# HOST: 239.255.255.250:1900 +# CACHE-CONTROL: max-age=1800 +# AL: https://172.30.254.151:8080/redfish/v1 +# SERVER: Linux/3.14.28-ltsi Redfish/1.0 +# NT: urn:dmtf-org:service:redfish-rest:1 +# USN: uuid:00000000-0000-0000-0005-000000000001::urn:dmtf-org:service:redfish-rest:1 +# NTS: ssdp:alive + + +import confluent.neighutil as neighutil +import confluent.util as util +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct + +mcastv4addr = '239.255.255.250' +mcastv6addr = 'ff02::c' + +ssdp6mcast = socket.inet_pton(socket.AF_INET6, mcastv6addr) +smsg = ('M-SEARCH * HTTP/1.1\r\n' + 'HOST: {0}:1900\r\n' + 'MAN: "ssdp:discover"\r\n' + 'ST: {1}\r\n' + 'MX: 3\r\n\r\n') + + +def scan(services, target=None): + for service in services: + for rply in _find_service(service, target): + yield rply + + +def snoop(handler, byehandler=None): + """Watch for SSDP notify messages + + The handler shall be called on any service coming online. + byehandler is called whenever a system advertises that it is departing. + If no byehandler is specified, byebye messages are ignored. The handler is + given (as possible), the mac address, a list of viable sockaddrs to reference + the peer, and the notification type (e.g. + 'urn:dmtf-org:service:redfish-rest:1' + + :param handler: A handler for online notifications from network + :param byehandler: Optional handler for devices going off the network + """ + # Normally, I like using v6/v4 agnostic socket. However, since we are + # dabbling in multicast wizardry here, such sockets can cause big problems, + # so we will have two distinct sockets + known_peers = set([]) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + for ifidx in util.list_interface_indexes(): + v6grp = ssdp6mcast + struct.pack('=I', ifidx) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, v6grp) + net6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + for i4 in util.list_ips(): + ssdp4mcast = socket.inet_pton(socket.AF_INET, mcastv4addr) + \ + socket.inet_aton(i4['addr']) + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + ssdp4mcast) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 1900)) + net6.bind(('', 1900)) + peerbymacaddress = {} + while True: + newmacs = set([]) + machandlers = {} + r, _, _ = select.select((net4, net6), (), (), 60) + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + rsp = rsp.split('\r\n') + method, _, _ = rsp[0].split(' ', 2) + if method == 'NOTIFY': + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + mac = neighutil.neightable[ip] + known_peers.add(peer) + newmacs.add(mac) + if mac in peerbymacaddress: + peerbymacaddress[mac]['peers'].append(peer) + else: + peerbymacaddress[mac] = { + 'hwaddr': mac, + 'peers': [peer], + } + peerdata = peerbymacaddress[mac] + for headline in rsp[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'NT': + peerdata['service'] = value + elif header == 'NTS': + if value == 'ssdp:byebye': + machandlers[mac] = byehandler + elif value == 'ssdp:alive': + machandlers[mac] = handler + r, _, _ = select.select((net4, net6), (), (), 0.1) + for mac in newmacs: + thehandler = machandlers.get(mac, None) + if thehandler: + thehandler(peerbymacaddress[mac]) + + +def _find_service(service, target): + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + if target: + addrs = socket.getaddrinfo(target, 1900, 0, socket.SOCK_DGRAM) + for addr in addrs: + host = addr[4][0] + if addr[0] == socket.AF_INET: + net4.sendto(smsg.format(host, service), addr[4]) + elif addr[0] == socket.AF_INET6: + host = '[{0}]'.format(host) + net6.sendto(smsg.format(host, service), addr[4]) + else: + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + for idx in util.list_interface_indexes(): + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, + idx) + try: + net6.sendto(smsg.format('[{0}]'.format(mcastv6addr), service + ), (mcastv6addr, 1900, 0, 0)) + except socket.error: + # ignore interfaces without ipv6 multicast causing error + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(smsg.format(mcastv4addr, service), + (mcastv4addr, 1900)) + net4.sendto(smsg.format(bcast, service), (bcast, 1900)) + # SSDP by spec encourages responses to spread out over a 3 second interval + # hence we must be a bit more patient + r, _, _ = select.select((net4, net6), (), (), 4) + peerdata = {} + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_ssdp(peer, rsp, peerdata) + r, _, _ = select.select((net4, net6), (), (), 4) + for nid in peerdata: + yield peerdata[nid] + + +def _parse_ssdp(peer, rsp, peerdata): + ip = peer[0].partition('%')[0] + nid = ip + mac = None + if ip in neighutil.neightable: + nid = neighutil.neightable[ip] + mac = nid + headlines = rsp.split('\r\n') + try: + _, code, _ = headlines[0].split(' ', 2) + except ValueError: + return + myurl = None + if code == '200': + if nid in peerdata: + peerdatum = peerdata[nid] + else: + peerdatum = { + 'peers': [peer], + 'hwaddr': mac, + } + peerdata[nid] = peerdatum + for headline in headlines[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'AL' or header == 'LOCATION': + myurl = value + if 'urls' not in peerdatum: + peerdatum['urls'] = [value] + elif value not in peerdatum['urls']: + peerdatum['urls'].append(value) + elif header == 'ST': + if 'services' not in peerdatum: + peerdatum['services'] = [value] + elif value not in peerdatum['services']: + peerdatum['services'].append(value) + + + +if __name__ == '__main__': + + for rsp in scan(['urn:dmtf-org:service:redfish-rest:1']): + print(repr(rsp)) + def fun(a): + print(repr(a)) + def byefun(a): + print('bye' + repr(a)) + snoop(fun, byefun) diff --git a/confluent_server/confluent/exceptions.py b/confluent_server/confluent/exceptions.py index 5bbead51..47f9efdf 100644 --- a/confluent_server/confluent/exceptions.py +++ b/confluent_server/confluent/exceptions.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,69 +21,81 @@ import json class ConfluentException(Exception): apierrorcode = 500 - apierrorstr = 'Unexpected Error' + _apierrorstr = 'Unexpected Error' def get_error_body(self): - errstr = ' - '.join((self.apierrorstr, str(self))) + errstr = ' - '.join((self._apierrorstr, str(self))) return json.dumps({'error': errstr }) + @property + def apierrorstr(self): + if str(self): + return self._apierrorstr + ' - ' + str(self) + return self._apierrorstr + class NotFoundException(ConfluentException): # Something that could be construed as a name was not found # basically, picture an http error code 404 apierrorcode = 404 - apierrorstr = 'Request path not recognized' + _apierrorstr = 'Target not found' class InvalidArgumentException(ConfluentException): # Something from the remote client wasn't correct # like http code 400 apierrorcode = 400 - apierrorstr = 'Bad Request' + _apierrorstr = 'Bad Request' class TargetEndpointUnreachable(ConfluentException): # A target system was unavailable. For example, a BMC # was unreachable. http code 504 apierrorcode = 504 - apierrorstr = 'Unreachable Target' + _apierrorstr = 'Unreachable Target' class TargetEndpointBadCredentials(ConfluentException): # target was reachable, but authentication/authorization # failed apierrorcode = 502 - apierrorstr = 'Bad Credentials' + _apierrorstr = 'Bad Credentials' class LockedCredentials(ConfluentException): # A request was performed that required a credential, but the credential # store is locked - apierrorstr = 'Credential store locked' + _apierrorstr = 'Credential store locked' class ForbiddenRequest(ConfluentException): # The client request is not allowed by authorization engine apierrorcode = 403 - apierrorstr = 'Forbidden' + _apierrorstr = 'Forbidden' class NotImplementedException(ConfluentException): # The current configuration/plugin is unable to perform # the requested task. http code 501 apierrorcode = 501 - apierrorstr = '501 - Not Implemented' + _apierrorstr = '501 - Not Implemented' class GlobalConfigError(ConfluentException): # The configuration in the global config file is not right - apierrorstr = 'Global configuration contains an error' + _apierrorstr = 'Global configuration contains an error' +class TargetResourceUnavailable(ConfluentException): + # This is meant for scenarios like asking to read a sensor that is + # currently unavailable. This may be a persistent or transient state + apierrocode = 503 + _apierrorstr = 'Target Resource Unavailable' + class PubkeyInvalid(ConfluentException): apierrorcode = 502 - apierrorstr = '502 - Invalid certificate or key on target' + _apierrorstr = '502 - Invalid certificate or key on target' def __init__(self, text, certificate, fingerprint, attribname, event): super(PubkeyInvalid, self).__init__(self, text) @@ -100,7 +112,7 @@ class PubkeyInvalid(ConfluentException): class LoggedOut(ConfluentException): apierrorcode = 401 - apierrorstr = '401 - Logged out' + _apierrorstr = '401 - Logged out' def get_error_body(self): return '{"loggedout": 1}' diff --git a/confluent_server/confluent/log.py b/confluent_server/confluent/log.py index c63ddf12..575d06bf 100644 --- a/confluent_server/confluent/log.py +++ b/confluent_server/confluent/log.py @@ -449,11 +449,11 @@ class TimedAndSizeRotatingFileHandler(BaseRotatingHandler): odtfn = dtfn append=1 while os.path.exists(dbfn): - dbfn = odbfn + '.{}'.format(append) + dbfn = odbfn + '.{0}'.format(append) append += 1 append=1 while os.path.exists(dtfn): - dtfn = odtfn + '.{}'.format(append) + dtfn = odtfn + '.{0}'.format(append) append += 1 if os.path.exists(self.binpath): os.rename(self.binpath, dbfn) @@ -540,6 +540,12 @@ class Logger(object): tstamp = entry[1] data = entry[2] evtdata = entry[3] + if len(data) > 65535: + # our max log entry is 65k, take only the first 65k and put + # rest back on as a continuation + entry[2] = data[65535:] + self.logentries.appendleft(entry) + data = data[:65535] textdate = '' if self.isconsole and ltype != 2: textdate = time.strftime( @@ -743,6 +749,7 @@ tracelog = None def log(logdata=None, ltype=None, event=0, eventdata=None): + global globaleventlog if globaleventlog is None: globaleventlog = Logger('events') globaleventlog.log(logdata, ltype, event, eventdata) diff --git a/confluent_server/confluent/main.py b/confluent_server/confluent/main.py index 7457dbdf..b8439b3f 100644 --- a/confluent_server/confluent/main.py +++ b/confluent_server/confluent/main.py @@ -39,6 +39,7 @@ except ImportError: #On platforms without pwd, give up on the sockapi in general and be http #only for now pass +import confluent.discovery.core as disco import eventlet dbgif = False if map(int, (eventlet.__version__.split('.'))) > [0, 18]: @@ -238,6 +239,7 @@ def run(): sock_bind_host, sock_bind_port = _get_connector_config('socket') webservice = httpapi.HttpApi(http_bind_host, http_bind_port) webservice.start() + disco.start_detection() try: sockservice = sockapi.SockApi(sock_bind_host, sock_bind_port) sockservice.start() diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index fc4a7df2..ac26d3da 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015-2016 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -194,6 +194,17 @@ class ConfluentNodeError(object): raise Exception(self.error) +class ConfluentResourceUnavailable(ConfluentNodeError): + apicode = 503 + + def __init__(self, node, errstr='Unavailable'): + self.node = node + self.error = errstr + + def strip_node(self, node): + raise exc.TargetResourceUnavailable() + + class ConfluentTargetTimeout(ConfluentNodeError): apicode = 504 @@ -228,9 +239,19 @@ class ConfluentTargetInvalidCredentials(ConfluentNodeError): class DeletedResource(ConfluentMessage): + notnode = True def __init__(self, resource): - self.kvpairs = {} + self.kvpairs = {'deleted': resource} +class CreatedResource(ConfluentMessage): + notnode = True + def __init__(self, resource): + self.kvpairs = {'created': resource} + +class AssignedResource(ConfluentMessage): + notnode = True + def __init__(self, resource): + self.kvpairs = {'assigned': resource} class ConfluentChoiceMessage(ConfluentMessage): valid_values = set() @@ -325,9 +346,16 @@ class ChildCollection(LinkRelation): extension) +# TODO(jjohnson2): enhance the following to support expressions: +# InputNetworkConfiguration +# InputMCI +# InputDomainName +# InputNTPServer def get_input_message(path, operation, inputdata, nodes=None, multinode=False): if path[0] == 'power' and path[1] == 'state' and operation != 'retrieve': return InputPowerMessage(path, nodes, inputdata) + elif path == ['attributes', 'expression']: + return InputExpression(path, inputdata, nodes) elif path[0] in ('attributes', 'users') and operation != 'retrieve': return InputAttributes(path, inputdata, nodes) elif path == ['boot', 'nextdevice'] and operation != 'retrieve': @@ -387,7 +415,47 @@ class InputAlertData(ConfluentMessage): return self.alertparams +class InputExpression(ConfluentMessage): + # This is specifically designed to suppress the expansion of an expression + # so that it can make it intact to the pertinent configmanager function + def __init__(self, path, inputdata, nodes=None): + self.nodeattribs = {} + nestedmode = False + if not inputdata: + raise exc.InvalidArgumentException('no request data provided') + if nodes is None: + self.attribs = inputdata + return + for node in nodes: + if node in inputdata: + nestedmode = True + self.nodeattribs[node] = inputdata[node] + if nestedmode: + for key in inputdata: + if key not in nodes: + raise exc.InvalidArgumentException + else: + for node in nodes: + self.nodeattribs[node] = inputdata + + def get_attributes(self, node): + if node not in self.nodeattribs: + return {} + nodeattr = deepcopy(self.nodeattribs[node]) + return nodeattr + + class InputAttributes(ConfluentMessage): + # This is particularly designed for attributes, where a simple string + # should become either a string value or a dict with {'expression':} to + # preserve the client provided expression for posterity, rather than + # immediate consumption. + # for things like node configuration or similar, a different class is + # appropriate since it nedes to immediately expand an expression. + # with that class, the 'InputExpression' and calling code in attributes.py + # might be deprecated in favor of the generic expression expander + # and a small function in attributes.py to reflect the expansion back + # to the client def __init__(self, path, inputdata, nodes=None): self.nodeattribs = {} nestedmode = False @@ -468,12 +536,13 @@ class InputCredential(ConfluentMessage): if len(path) == 4: inputdata['uid'] = path[-1] # if the operation is 'create' check if all fields are present - elif ('uid' not in inputdata or 'privilege_level' not in inputdata or - 'username' not in inputdata or 'password' not in inputdata): - raise exc.InvalidArgumentException('all fields are required') - - if 'uid' not in inputdata: - raise exc.InvalidArgumentException('uid is missing') + missingattrs = [] + for attrname in ('uid', 'privilege_level', 'username', 'password'): + if attrname not in inputdata: + missingattrs.append(attrname) + if missingattrs: + raise exc.InvalidArgumentException( + 'Required fields missing: {0}'.format(','.join(missingattrs))) if (isinstance(inputdata['uid'], str) and not inputdata['uid'].isdigit()): raise exc.InvalidArgumentException('uid must be a number') diff --git a/confluent_server/confluent/neighutil.py b/confluent_server/confluent/neighutil.py new file mode 100644 index 00000000..9da1d195 --- /dev/null +++ b/confluent_server/confluent/neighutil.py @@ -0,0 +1,64 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A consolidated manage of neighbor table information management. +# Ultimately, this should use AF_NETLINK, but in the interest of time, +# use ip neigh for the moment + +import eventlet.green.subprocess as subprocess +import os + +neightable = {} +neightime = 0 + +import re + +_validmac = re.compile('..:..:..:..:..:..') + + +def update_neigh(): + global neightable + global neightime + neightable = {} + if os.name == 'nt': + return + ipn = subprocess.Popen(['ip', 'neigh'], stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (neighdata, err) = ipn.communicate() + for entry in neighdata.split('\n'): + entry = entry.split(' ') + if len(entry) < 5 or not entry[4]: + continue + if entry[0] in ('192.168.0.100', '192.168.70.100', '192.168.70.125'): + # Note that these addresses are common static ip addresses + # that are hopelessly ambiguous if there are many + # so ignore such entries and move on + # ideally the system network steers clear of this landmine of + # a subnet, but just in case + continue + if not _validmac.match(entry[4]): + continue + neightable[entry[0]] = entry[4] + neightime = os.times()[4] + + +def refresh_neigh(): + global neightime + if os.name == 'nt': + return + if os.times()[4] > (neightime + 30): + update_neigh() diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py new file mode 100644 index 00000000..cbe9c2fe --- /dev/null +++ b/confluent_server/confluent/netutil.py @@ -0,0 +1,124 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# this will implement noderange grammar + + +import codecs +import struct +import eventlet.green.socket as socket +import eventlet.support.greendns +getaddrinfo = eventlet.support.greendns.getaddrinfo + + +def ip_on_same_subnet(first, second, prefix): + addrinf = socket.getaddrinfo(first, None, 0, socket.SOCK_STREAM)[0] + fam = addrinf[0] + ip = socket.inet_pton(fam, addrinf[-1][0]) + ip = int(codecs.encode(bytes(ip), 'hex'), 16) + addrinf = socket.getaddrinfo(second, None, 0, socket.SOCK_STREAM)[0] + if fam != addrinf[0]: + return False + oip = socket.inet_pton(fam, addrinf[-1][0]) + oip = int(codecs.encode(bytes(oip), 'hex'), 16) + if fam == socket.AF_INET: + addrlen = 32 + elif fam == socket.AF_INET6: + addrlen = 128 + else: + raise Exception("Unknown address family {0}".format(fam)) + mask = 2 ** prefix - 1 << (addrlen - prefix) + return ip & mask == oip & mask + + +# TODO(jjohnson2): have a method to arbitrate setting methods, to aid +# in correct matching of net.* based on parameters, mainly for pxe +# The scheme for pxe: +# For one: the candidate net.* should have pxe set to true, to help +# disambiguate from interfaces meant for bmc access +# bmc relies upon hardwaremanagement.manager, plus we don't collect +# that mac address +# the ip as reported by recvmsg to match the subnet of that net.* interface +# if switch and port available, that should match. +def get_nic_config(configmanager, node, ip=None, mac=None): + """Fetch network configuration parameters for a nic + + For a given node and interface, find and retrieve the pertinent network + configuration data. The desired configuration can be searched + either by ip or by mac. + + :param configmanager: The relevant confluent.config.ConfigManager + instance. + :param node: The name of the node + :param ip: An IP address on the intended subnet + :param mac: The mac address of the interface + + :returns: A dict of parameters, 'ipv4_gateway', .... + """ + # ip parameter *could* be the result of recvmsg with cmsg to tell + # pxe *our* ip address, or it could be the desired ip address + #TODO(jjohnson2): ip address, prefix length, mac address, + # join a bond/bridge, vlan configs, etc. + # also other nic criteria, physical location, driver and index... + nodenetattribs = configmanager.get_node_attributes( + node, 'net*.ipv4_gateway').get(node, {}) + cfgdata = { + 'ipv4_gateway': None, + 'prefix': None, + } + if ip is not None: + prefixlen = get_prefix_len_for_ip(ip) + cfgdata['prefix'] = prefixlen + for setting in nodenetattribs: + gw = nodenetattribs[setting].get('value', None) + if gw is None: + continue + if ip_on_same_subnet(ip, gw, prefixlen): + cfgdata['ipv4_gateway'] = gw + break + return cfgdata + + +def get_prefix_len_for_ip(ip): + # for now, we'll use the system route table + # later may provide for configuration lookup to override the route + # table + ip = getaddrinfo(ip, 0, socket.AF_INET)[0][-1][0] + try: + ipn = socket.inet_aton(ip) + except socket.error: # For now, assume 64 for ipv6 + return 64 + # It comes out big endian, regardless of host arch + ipn = struct.unpack('>I', ipn)[0] + rf = open('/proc/net/route') + ri = rf.read() + rf.close() + ri = ri.split('\n')[1:] + for rl in ri: + if not rl: + continue + rd = rl.split('\t') + if rd[1] == '00000000': # default gateway, not useful for this + continue + # don't have big endian to look at, assume that it is host endian + maskn = struct.unpack('I', struct.pack('>I', int(rd[7], 16)))[0] + netn = struct.unpack('I', struct.pack('>I', int(rd[1], 16)))[0] + if ipn & maskn == netn: + nbits = 0 + while maskn: + nbits += 1 + maskn = maskn << 1 & 0xffffffff + return nbits + raise exc.NotImplementedException("Non local addresses not supported") \ No newline at end of file diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py new file mode 100644 index 00000000..3ff04ebf --- /dev/null +++ b/confluent_server/confluent/networking/lldp.py @@ -0,0 +1,131 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This provides the implementation of locating MAC addresses on ethernet +# switches. It is, essentially, a port of 'MacMap.pm' to confluent. +# However, there are enhancements. +# For one, each switch interrogation is handled in an eventlet 'thread' +# For another, MAC addresses are checked in the dictionary on every +# switch return, rather than waiting for all switches to check in +# (which makes it more responsive when there is a missing or bad switch) +# Also, we track the quantity, actual ifName value, and provide a mechanism +# to detect ambiguous result (e.g. if two matches are found, can log an error +# rather than doing the wrong one, complete with the detected ifName value). +# Further, the map shall be available to all facets of the codebase, not just +# the discovery process, so that the cached data maintenance will pay off +# for direct queries + +# Provides support for viewing and processing lldp data for switches + +import confluent.exceptions as exc +import confluent.log as log +import confluent.snmputil as snmp +from eventlet.greenpool import GreenPool +import re + +# The interesting OIDs are: +# 1.0.8802.1.1.2.1.3.7.1.4 - Lookup of LLDP index id to description +# Yet another fun fact, the LLDP port index frequent +# does *not* map to ifName, like a sane +# implementation would do. Assume ifName equality +# but provide a way for 1.3.6.1.2.1.1 indicated +# ids to provide custom functions +# (1.0.8802.1.1.2.1.3.7.1.2 - theoretically this process is only very useful +# if this is '5' meaning 'same as ifName per +# 802.1AB-2005, however at *least* 7 has +# been observed to produce same results +# For now we'll optimistically assume +# equality to ifName +# 1.0.8802.1.1.2.1.4.1.1 - The information about the remote systems attached +# indexed by time index, local port, and an +# incrementing value +# 1.0.8802.1.1.2.1.4.1.1.5 - chassis id - in theory might have been useful, in +# practice limited as the potential to correlate +# to other contexts is limited. As a result, +# our strategy will be to ignore this and focus +# instead on bridge-mib/qbridge-mib indicate data +# a potential exception would be pulling in things +# that are fundamentally network equipment, +# where significant ambiguity may exist. +# While in a 'host' scenario, there is ambiguity +# it is more controlled (virtual machines are given +# special treatment, and strategies exist for +# disambiguating shared management/data port, and +# other functions do not interact with our discovery +# framework +# # 1.0.8802.1.1.2.1.4.1.1.9 - SysName - could be handy hint in some scenarios +# # 1.0.8802.1.1.2.1.4.1.1.10 - SysDesc - good stuff + + +def lenovoname(idx, desc): + if desc.isdigit(): + return 'Ethernet' + str(idx) + return desc + +nameoverrides = [ + (re.compile('20301\..*'), lenovoname), +] + + +def _lldpdesc_to_ifname(switchid, idx, desc): + for tform in nameoverrides: + if tform[0].match(switchid): + desc = tform[1](idx, desc) + return desc + + +def _extract_neighbor_data_b(args): + """Build LLDP data about elements connected to switch + + args are carried as a tuple, because of eventlet convenience + """ + switch, password, user = args + conn = snmp.Session(switch, password, user) + sid = None + lldpdata = {} + for sysid in conn.walk('1.3.6.1.2.1.1.2'): + sid = str(sysid[1][6:]) + idxtoifname = {} + for oidindex in conn.walk('1.0.8802.1.1.2.1.3.7.1.4'): + idx = oidindex[0][-1] + idxtoifname[idx] = _lldpdesc_to_ifname(sid, idx, str(oidindex[1])) + for remotedesc in conn.walk('1.0.8802.1.1.2.1.4.1.1.10'): + iname = idxtoifname[remotedesc[0][-2]] + lldpdata[iname] = {'description': str(remotedesc[1])} + for remotename in conn.walk('1.0.8802.1.1.2.1.4.1.1.9'): + iname = idxtoifname[remotename[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['name'] = str(remotename[1]) + for remoteid in conn.walk('1.0.8802.1.1.2.1.4.1.1.5'): + iname = idxtoifname[remoteid[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['chassisid'] = str(remoteid[1]) + print(repr(lldpdata)) + + +def _extract_neighbor_data(args): + try: + _extract_neighbor_data_b(args) + except Exception: + log.logtrace() + +if __name__ == '__main__': + # a quick one-shot test, args are switch and snmpv1 string for now + # (should do three argument form for snmpv3 test + import sys + _extract_neighbor_data((sys.argv[1], sys.argv[2])) diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index b764929a..441b2d38 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2016 Lenovo +# Copyright 2016-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,16 +31,24 @@ # this module will provide mac to switch and full 'ifName' label # This functionality is restricted to the null tenant +if __name__ == '__main__': + import sys + import confluent.config.configmanager as cfm import confluent.exceptions as exc import confluent.log as log +import confluent.messages as msg import confluent.snmputil as snmp +import confluent.util as util from eventlet.greenpool import GreenPool +import eventlet +import eventlet.semaphore import re _macmap = {} _macsbyswitch = {} _nodesbymac = {} _switchportmap = {} +vintage = None _whitelistnames = ( @@ -90,7 +98,19 @@ def _namesmatch(switchdesc, userdesc): def _map_switch(args): try: return _map_switch_backend(args) + except UnicodeError: + log.log({'error': "Cannot resolve switch '{0}' to an address".format( + args[0])}) + except exc.TargetEndpointUnreachable: + log.log({'error': "Timeout or bad SNMPv1 community string trying to " + "reach switch '{0}'".format( + args[0])}) + except exc.TargetEndpointBadCredentials: + log.log({'error': "Bad SNMPv3 credentials for \'{0}\'".format( + args[0])}) except Exception as e: + log.log({'error': 'Unexpected condition trying to reach switch "{0}"' + ' check trace log for more'.format(args[0])}) log.logtrace() @@ -120,7 +140,13 @@ def _map_switch_backend(args): # fallback if ifName is empty # global _macmap - switch, password, user = args + if len(args) == 3: + switch, password, user = args + if not user: + user = None + else: + switch, password = args + user = None haveqbridge = False mactobridge = {} conn = snmp.Session(switch, password, user) @@ -135,12 +161,24 @@ def _map_switch_backend(args): ) mactobridge[macaddr] = int(bridgeport) if not haveqbridge: - raise exc.NotImplementedException('TODO: Bridge-MIB without QBRIDGE') + for vb in conn.walk('1.3.6.1.2.1.17.4.3.1.2'): + oid, bridgeport = vb + if not bridgeport: + continue + oid = str(oid).rsplit('.', 6) + macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format( + *([int(x) for x in oid[-6:]]) + ) + mactobridge[macaddr] = int(bridgeport) bridgetoifmap = {} for vb in conn.walk('1.3.6.1.2.1.17.1.4.1.2'): bridgeport, ifidx = vb bridgeport = int(str(bridgeport).rsplit('.', 1)[1]) - bridgetoifmap[bridgeport] = int(ifidx) + try: + bridgetoifmap[bridgeport] = int(ifidx) + except ValueError: + # ifidx might be '', skip in such a case + continue ifnamemap = {} havenames = False for vb in conn.walk('1.3.6.1.2.1.31.1.1.1.1'): @@ -156,17 +194,41 @@ def _map_switch_backend(args): ifidx = int(str(ifidx).rsplit('.', 1)[1]) ifnamemap[ifidx] = str(ifname) maccounts = {} + bridgetoifvalid = False for mac in mactobridge: - ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + try: + ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + bridgetoifvalid = True + except KeyError: + continue if ifname not in maccounts: maccounts[ifname] = 1 else: maccounts[ifname] += 1 + if not bridgetoifvalid: + bridgetoifmap = {} + # Not a single mac address resolved to an interface index, chances are + # that the switch is broken, and the mactobridge is reporting ifidx + # instead of bridge port index + # try again, skipping the bridgetoifmap lookup + for mac in mactobridge: + try: + ifname = ifnamemap[mactobridge[mac]] + bridgetoifmap[mactobridge[mac]] = mactobridge[mac] + except KeyError: + continue + if ifname not in maccounts: + maccounts[ifname] = 1 + else: + maccounts[ifname] += 1 _macsbyswitch[switch] = {} for mac in mactobridge: # We want to merge it so that when a mac appears in multiple # places, it is captured. - ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + try: + ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]] + except KeyError: + continue if mac in _macmap: _macmap[mac].append((switch, ifname, maccounts[ifname])) else: @@ -178,14 +240,34 @@ def _map_switch_backend(args): nodename = _nodelookup(switch, ifname) if nodename is not None: if mac in _nodesbymac and _nodesbymac[mac] != nodename: - log.log({'warning': '{0} and {1} described by ambiguous' + # For example, listed on both a real edge port + # and by accident a trunk port + log.log({'error': '{0} and {1} described by ambiguous' ' switch topology values'.format(nodename, _nodesbymac[mac] )}) - _nodesbymac[mac] = nodename + _nodesbymac[mac] = None + else: + _nodesbymac[mac] = nodename -def update_macmap(configmanager): +def find_node_by_mac(mac, configmanager): + now = util.monotonic_time() + if vintage and (now - vintage) < 90 and mac in _nodesbymac: + return _nodesbymac[mac] + # do not actually sweep switches more than once every 30 seconds + # however, if there is an update in progress, wait on it + for _ in update_macmap(configmanager, vintage and (now - vintage) < 30): + if mac in _nodesbymac: + return _nodesbymac[mac] + # If update_mac bailed out, still check one last time + return _nodesbymac.get(mac, None) + + +mapupdating = eventlet.semaphore.Semaphore() + + +def update_macmap(configmanager, impatient=False): """Interrogate switches to build/update mac table Begin a rebuild process. This process is a generator that will yield @@ -193,57 +275,205 @@ def update_macmap(configmanager): recheck the cache as results become possible, rather than having to wait for the process to complete to interrogate. """ + if mapupdating.locked(): + while mapupdating.locked(): + eventlet.sleep(1) + yield None + return + if impatient: + return + completions = _full_updatemacmap(configmanager) + for completion in completions: + try: + yield completion + except GeneratorExit: + # the calling function has stopped caring, but we want to finish + # the sweep, background it + eventlet.spawn_n(_finish_update, completions) + raise + +def _finish_update(completions): + for _ in completions: + pass + +def _full_updatemacmap(configmanager): + global vintage global _macmap global _nodesbymac global _switchportmap - # Clear all existing entries - _macmap = {} - _nodesbymac = {} - _switchportmap = {} - if configmanager.tenant is not None: - raise exc.ForbiddenRequest('Network topology not available to tenants') - nodelocations = configmanager.get_node_attributes( - configmanager.list_nodes(), ('hardwaremanagement.switch', - 'hardwaremanagement.switchport')) - switches = set([]) - for node in nodelocations: - cfg = nodelocations[node] - if 'hardwaremanagement.switch' in cfg: - curswitch = cfg['hardwaremanagement.switch']['value'] - switches.add(curswitch) - if 'hardwaremanagement.switchport' in cfg: - portname = cfg['hardwaremanagement.switchport']['value'] - if curswitch not in _switchportmap: - _switchportmap[curswitch] = {} - if portname in _switchportmap[curswitch]: - log.log({'warning': 'Duplicate switch topology config for ' - '{0} and {1}'.format(node, - _switchportmap[ - curswitch][ - portname])}) - _switchportmap[curswitch][portname] = node - switchcfg = configmanager.get_node_attributes( - switches, ('secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassword'), decrypt=True) - switchauth = [] - for switch in switches: - password = 'public' - user = None - if (switch in switchcfg and - 'secret.hardwaremanagementpassword' in switchcfg[switch]): - password = switchcfg[switch]['secret.hardwaremanagementpassword'][ - 'value'] - if 'secret.hardwaremanagementuser' in switchcfg[switch]: - user = switchcfg[switch]['secret.hardwaremanagementuser'][ - 'value'] - switchauth.append((switch, password, user)) - pool = GreenPool() - for res in pool.imap(_map_switch, switchauth): - yield res - print(repr(_macmap)) + global _macsbyswitch + with mapupdating: + vintage = util.monotonic_time() + # Clear all existing entries + _macmap = {} + _nodesbymac = {} + _switchportmap = {} + _macsbyswitch = {} + if configmanager.tenant is not None: + raise exc.ForbiddenRequest( + 'Network topology not available to tenants') + nodelocations = configmanager.get_node_attributes( + configmanager.list_nodes(), ('net*.switch', 'net*.switchport')) + switches = set([]) + for node in nodelocations: + cfg = nodelocations[node] + for attr in cfg: + if not attr.endswith('.switch') or 'value' not in cfg[attr]: + continue + curswitch = cfg[attr].get('value', None) + if not curswitch: + continue + switches.add(curswitch) + switchportattr = attr + 'port' + if switchportattr in cfg: + portname = cfg[switchportattr].get('value', '') + if not portname: + continue + if curswitch not in _switchportmap: + _switchportmap[curswitch] = {} + if portname in _switchportmap[curswitch]: + log.log({'error': 'Duplicate switch topology config ' + 'for {0} and {1}'.format( + node, + _switchportmap[curswitch][ + portname])}) + _switchportmap[curswitch][portname] = None + else: + _switchportmap[curswitch][portname] = node + switchcfg = configmanager.get_node_attributes( + switches, ('secret.hardwaremanagementuser', 'secret.snmpcommunity', + 'secret.hardwaremanagementpassword'), decrypt=True) + switchauth = [] + for switch in switches: + if not switch: + continue + switchparms = switchcfg.get(switch, {}) + user = None + password = switchparms.get( + 'secret.snmpcommunity', {}).get('value', None) + if not password: + password = switchparms.get( + 'secret.hardwaremanagementpassword', {}).get('value', + 'public') + user = switchparms.get( + 'secret.hardwaremanagementuser', {}).get('value', None) + switchauth.append((switch, password, user)) + pool = GreenPool() + for ans in pool.imap(_map_switch, switchauth): + vintage = util.monotonic_time() + yield ans + + +def _dump_locations(info, macaddr, nodename=None): + yield msg.KeyValueData({'possiblenode': nodename, 'mac': macaddr}) + retdata = {} + portinfo = [] + for location in info: + portinfo.append({'switch': location[0], + 'port': location[1], 'macsonport': location[2]}) + retdata['ports'] = sorted(portinfo, key=lambda x: x['macsonport'], + reverse=True) + yield msg.KeyValueData(retdata) + + +def handle_api_request(configmanager, inputdata, operation, pathcomponents): + if operation == 'retrieve': + return handle_read_api_request(pathcomponents) + if (operation in ('update', 'create') and + pathcomponents == ['networking', 'macs', 'rescan']): + if inputdata != {'rescan': 'start'}: + raise exc.InvalidArgumentException() + eventlet.spawn_n(rescan, configmanager) + return [msg.KeyValueData({'rescan': 'started'})] + raise exc.NotImplementedException( + 'Operation {0} on {1} not implemented'.format( + operation, '/'.join(pathcomponents))) + + +def handle_read_api_request(pathcomponents): + # TODO(jjohnson2): discovery core.py api handler design, apply it here + # to make this a less tangled mess as it gets extended + if len(pathcomponents) == 1: + return [msg.ChildCollection('macs/')] + elif len(pathcomponents) == 2: + return [msg.ChildCollection(x) for x in (# 'by-node/', + 'by-mac/', 'by-switch/', + 'rescan')] + if False and pathcomponents[2] == 'by-node': + # TODO: should be list of node names, and then under that 'by-mac' + if len(pathcomponents) == 3: + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(list(_nodesbymac))] + elif len(pathcomponents) == 4: + macaddr = pathcomponents[-1].replace('-', ':') + return dump_macinfo(macaddr) + elif pathcomponents[2] == 'by-mac': + if len(pathcomponents) == 3: + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(list(_macmap))] + elif len(pathcomponents) == 4: + return dump_macinfo(pathcomponents[-1]) + elif pathcomponents[2] == 'by-switch': + if len(pathcomponents) == 3: + return [msg.ChildCollection(x + '/') + for x in sorted(list(_macsbyswitch))] + + if len(pathcomponents) == 4: + return [msg.ChildCollection('by-port/')] + if len(pathcomponents) == 5: + switchname = pathcomponents[-2] + if switchname not in _macsbyswitch: + raise exc.NotFoundException( + 'No known macs for switch {0}'.format(switchname)) + return [msg.ChildCollection(x.replace('/', '-') + '/') + for x in sorted(list(_macsbyswitch[switchname]))] + if len(pathcomponents) == 6: + return [msg.ChildCollection('by-mac/')] + if len(pathcomponents) == 7: + switchname = pathcomponents[-4] + portname = pathcomponents[-2] + try: + if portname not in _macsbyswitch[switchname]: + portname = portname.replace('-', '/') + maclist = _macsbyswitch[switchname][portname] + except KeyError: + raise exc.NotFoundException('No known macs for switch {0} ' + 'port {1}'.format(switchname, + portname)) + return [msg.ChildCollection(x.replace(':', '-')) + for x in sorted(maclist)] + if len(pathcomponents) == 8: + return dump_macinfo(pathcomponents[-1]) + raise exc.NotFoundException('Unrecognized path {0}'.format( + '/'.join(pathcomponents))) + + +def dump_macinfo(macaddr): + macaddr = macaddr.replace('-', ':') + info = _macmap.get(macaddr, None) + if info is None: + raise exc.NotFoundException( + '{0} not found in mac table of ' + 'any known switches'.format(macaddr)) + return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, None)) + + +def rescan(cfg): + for _ in update_macmap(cfg): + pass if __name__ == '__main__': - # invoke as switch community - import sys - _map_switch(sys.argv[1], sys.argv[2]) + cg = cfm.ConfigManager(None) + for res in update_macmap(cg): + print("map has updated") + if len(sys.argv) > 1: + print(repr(_macmap[sys.argv[1]])) + print(repr(_nodesbymac[sys.argv[1]])) + else: + print("Mac to Node lookup table: -------------------") + print(repr(_nodesbymac)) + print("Mac to location lookup table: -------------------") + print(repr(_macmap)) + print("switch to fdb lookup table: -------------------") + print(repr(_macsbyswitch)) \ No newline at end of file diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 1899e6f2..79a77777 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -170,6 +170,17 @@ class NodeRange(object): def _expandstring(self, element, filternodes=None): prefix = '' + if element[0][0] in ('/', '~'): + element = ''.join(element) + nameexpression = element[1:] + if self.cfm is None: + raise Exception('Verification configmanager required') + return set(self.cfm.filter_nodenames(nameexpression, filternodes)) + elif '=' in element[0] or '!~' in element[0]: + element = ''.join(element) + if self.cfm is None: + raise Exception('Verification configmanager required') + return set(self.cfm.filter_node_attributes(element, filternodes)) for idx in xrange(len(element)): if element[idx][0] == '[': nodes = set([]) @@ -191,19 +202,10 @@ class NodeRange(object): nodes |= NodeRange( grpcfg['noderange']['value'], self.cfm).nodes return nodes - if '-' in element and ':' not in element: - return self.expandrange(element, '-') - elif ':' in element: # : range for less ambiguity + if ':' in element: # : range for less ambiguity return self.expandrange(element, ':') - elif '=' in element or '!~' in element: - if self.cfm is None: - raise Exception('Verification configmanager required') - return set(self.cfm.filter_node_attributes(element, filternodes)) - elif element[0] in ('/', '~'): - nameexpression = element[1:] - if self.cfm is None: - raise Exception('Verification configmanager required') - return set(self.cfm.filter_nodenames(nameexpression, filternodes)) + elif '-' in element: + return self.expandrange(element, '-') elif '+' in element: element, increment = element.split('+') try: diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index 10aa3307..1139f3f5 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -1,4 +1,5 @@ # Copyright 2014 IBM Corporation +# Copyright 2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,16 +27,24 @@ def retrieve(nodes, element, configmanager, inputdata): def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): - grpcfg = configmanager.get_nodegroup_attributes(nodegroup) + try: + grpcfg = configmanager.get_nodegroup_attributes(nodegroup) + except KeyError: + if not configmanager.is_nodegroup(nodegroup): + raise exc.NotFoundException( + 'Invalid nodegroup: {0} not found'.format(nodegroup)) + raise if element == 'all': - nodes = [] - if 'nodes' in grpcfg: - nodes = list(grpcfg['nodes']) - yield msg.ListAttributes(kv={'nodes': nodes}, - desc="The nodes belonging to this group") - for attribute in sorted(allattributes.node.iterkeys()): + theattrs = set(allattributes.node).union(set(grpcfg)) + theattrs.add('nodes') + for attribute in sorted(theattrs): if attribute == 'groups': continue + if attribute == 'nodes': + yield msg.ListAttributes( + kv={'nodes': list(grpcfg.get('nodes', []))}, + desc="The nodes belonging to this group") + continue if attribute in grpcfg: val = grpcfg[attribute] else: @@ -45,13 +54,17 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): kv={attribute: val}, desc=allattributes.node[attribute]['description']) elif isinstance(val, list): - raise Exception("TODO") + yield msg.ListAttributes( + kv={attribute: val}, + desc=allattributes.node.get( + attribute, {}).get('description', '')) else: yield msg.Attributes( kv={attribute: val}, - desc=allattributes.node[attribute]['description']) + desc=allattributes.node.get(attribute, {}).get( + 'description', '')) if element == 'current': - for attribute in sorted(grpcfg.iterkeys()): + for attribute in sorted(list(grpcfg)): currattr = grpcfg[attribute] if attribute == 'nodes': desc = 'The nodes belonging to this group' @@ -61,7 +74,7 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): try: desc = allattributes.node[attribute]['description'] except KeyError: - desc = 'Unknown' + desc = '' if 'value' in currattr or 'expression' in currattr: yield msg.Attributes(kv={attribute: currattr}, desc=desc) elif 'cryptvalue' in currattr: @@ -86,7 +99,8 @@ def retrieve_nodes(nodes, element, configmanager, inputdata): attributes = configmanager.get_node_attributes(nodes) if element[-1] == 'all': for node in nodes: - for attribute in sorted(allattributes.node.iterkeys()): + theattrs = set(allattributes.node).union(set(attributes[node])) + for attribute in sorted(theattrs): if attribute in attributes[node]: # have a setting for it val = attributes[node][attribute] elif attribute == 'groups': # no setting, provide a blank @@ -96,23 +110,26 @@ def retrieve_nodes(nodes, element, configmanager, inputdata): if attribute.startswith('secret.'): yield msg.CryptedAttributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) elif isinstance(val, list): yield msg.ListAttributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) else: yield msg.Attributes( node, {attribute: val}, - allattributes.node[attribute]['description']) + allattributes.node.get( + attribute, {}).get('description', '')) elif element[-1] == 'current': - for node in attributes.iterkeys(): + for node in sorted(list(attributes)): for attribute in sorted(attributes[node].iterkeys()): currattr = attributes[node][attribute] try: desc = allattributes.node[attribute]['description'] except KeyError: - desc = 'Unknown' + desc = '' if 'value' in currattr or 'expression' in currattr: yield msg.Attributes(node, {attribute: currattr}, desc) elif 'cryptvalue' in currattr: diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index ff956a9a..c5c1d72d 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -36,7 +36,8 @@ console.session.socket.getaddrinfo = eventlet.support.greendns.getaddrinfo def exithandler(): - console.session.iothread.join() + if console.session.iothread is not None: + console.session.iothread.join() atexit.register(exithandler) @@ -52,6 +53,15 @@ sensor_categories = { } +class EmptySensor(object): + def __init__(self, name): + self.name = name + self.value = None + self.states = ['Unavailable'] + self.units = None + self.health = 'ok' + + def hex2bin(hexstring): hexvals = hexstring.split(':') if len(hexvals) < 2: @@ -300,7 +310,6 @@ def perform_requests(operator, nodes, element, cfg, inputdata): pass - def perform_request(operator, node, element, configdata, inputdata, cfg, results): try: @@ -361,7 +370,7 @@ class IpmiHandler(object): ipmisess.wait_for_rsp(180) if not (self.broken or self.loggedin): raise exc.TargetEndpointUnreachable( - "Login process to " + bmc + " died") + "Login process to " + connparams['bmc'] + " died") except socket.gaierror as ge: if ge[0] == -2: raise exc.TargetEndpointUnreachable(ge[1]) @@ -599,29 +608,31 @@ class IpmiHandler(object): self.sensormap[simplify_name(resourcename)] = resourcename def read_sensors(self, sensorname): - try: - if sensorname == 'all': - sensors = self.ipmicmd.get_sensor_descriptions() - readings = [] - for sensor in filter(self.match_sensor, sensors): - try: - reading = self.ipmicmd.get_sensor_reading( - sensor['name']) - except pygexc.IpmiException as ie: - if ie.ipmicode == 203: - continue - raise - if hasattr(reading, 'health'): - reading.health = _str_health(reading.health) - readings.append(reading) - self.output.put(msg.SensorReadings(readings, name=self.node)) - else: - self.make_sensor_map() - if sensorname not in self.sensormap: - self.output.put( - msg.ConfluentTargetNotFound(self.node, - 'Sensor not found')) - return + if sensorname == 'all': + sensors = self.ipmicmd.get_sensor_descriptions() + readings = [] + for sensor in filter(self.match_sensor, sensors): + try: + reading = self.ipmicmd.get_sensor_reading( + sensor['name']) + except pygexc.IpmiException as ie: + if ie.ipmicode == 203: + self.output.put(msg.SensorReadings([EmptySensor( + sensor['name'])], name=self.node)) + continue + raise + if hasattr(reading, 'health'): + reading.health = _str_health(reading.health) + readings.append(reading) + self.output.put(msg.SensorReadings(readings, name=self.node)) + else: + self.make_sensor_map() + if sensorname not in self.sensormap: + self.output.put( + msg.ConfluentTargetNotFound(self.node, + 'Sensor not found')) + return + try: reading = self.ipmicmd.get_sensor_reading( self.sensormap[sensorname]) if hasattr(reading, 'health'): @@ -629,8 +640,13 @@ class IpmiHandler(object): self.output.put( msg.SensorReadings([reading], name=self.node)) - except pygexc.IpmiException: - self.output.put(msg.ConfluentTargetTimeout(self.node)) + except pygexc.IpmiException as ie: + if ie.ipmicode == 203: + self.output.put(msg.ConfluentResourceUnavailable( + self.node, 'Unavailable' + )) + else: + self.output.put(msg.ConfluentTargetTimeout(self.node)) def list_inventory(self): try: diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py index 9b5afd0d..ca467037 100644 --- a/confluent_server/confluent/snmputil.py +++ b/confluent_server/confluent/snmputil.py @@ -24,6 +24,7 @@ import confluent.exceptions as exc import eventlet from eventlet.support.greendns import getaddrinfo +import pysnmp.smi.error as snmperr import socket snmp = eventlet.import_patched('pysnmp.hlapi') @@ -85,14 +86,22 @@ class Session(object): walking = snmp.bulkCmd(self.eng, self.authdata, tp, ctx, 0, 10, obj, lexicographicMode=False) - for rsp in walking: - errstr, errnum, erridx, answers = rsp - if errstr: - raise exc.TargetEndpointUnreachable(str(errstr)) - elif errnum: - raise exc.ConfluentException(errnum.prettyPrint()) - for ans in answers: - yield ans + try: + for rsp in walking: + errstr, errnum, erridx, answers = rsp + if errstr: + errstr = str(errstr) + if errstr in ('unknownUserName', 'wrongDigest'): + raise exc.TargetEndpointBadCredentials(errstr) + # need to do bad credential versus timeout + raise exc.TargetEndpointUnreachable(errstr) + elif errnum: + raise exc.ConfluentException(errnum.prettyPrint()) + for ans in answers: + yield ans + except snmperr.WrongValueError: + raise exc.TargetEndpointBadCredentials('Invalid SNMPv3 password') + if __name__ == '__main__': diff --git a/confluent_server/confluent/sockapi.py b/confluent_server/confluent/sockapi.py index 679c6140..558c7c9b 100644 --- a/confluent_server/confluent/sockapi.py +++ b/confluent_server/confluent/sockapi.py @@ -234,8 +234,16 @@ def start_term(authname, cfm, connection, params, path, authdata, skipauth): consession.reopen() continue else: - process_request(connection, data, cfm, authdata, authname, - skipauth) + try: + process_request(connection, data, cfm, authdata, authname, + skipauth) + except Exception: + tracelog.log(traceback.format_exc(), + ltype=log.DataTypes.event, + event=log.Events.stacktrace) + send_data(connection, {'errorcode': 500, + 'error': 'Unexpected error'}) + send_data(connection, {'_requestdone': 1}) continue if not data: consession.destroy() diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 67d1e0d0..3e41bd86 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -1,7 +1,7 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2014 IBM Corporation -# Copyright 2015 Lenovo +# Copyright 2015-2017 Lenovo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,10 +20,43 @@ import base64 import confluent.exceptions as cexc import confluent.log as log import hashlib +import netifaces import os import struct +def list_interface_indexes(): + # Getting the interface indexes in a portable manner + # would be better, but there's difficulty from a python perspective. + # For now be linux specific + try: + for iface in os.listdir('/sys/class/net/'): + ifile = open('/sys/class/net/{0}/ifindex'.format(iface), 'r') + intidx = int(ifile.read()) + ifile.close() + yield intidx + except (IOError, OSError): + # Probably situation is non-Linux, just do limited support for + # such platforms until other people come along + for iface in netifaces.interfaces(): + addrinfo = netifaces.ifaddresses(iface).get(socket.AF_INET6, []) + for addr in addrinfo: + v6addr = addr.get('addr', '').partition('%')[2] + if v6addr: + yield(int(v6addr)) + break + return + + +def list_ips(): + # Used for getting addresses to indicate the multicast address + # as well as getting all the broadcast addresses + for iface in netifaces.interfaces(): + addrs = netifaces.ifaddresses(iface) + if netifaces.AF_INET in addrs: + for addr in addrs[netifaces.AF_INET]: + yield addr + def randomstring(length=20): """Generate a random string of requested length @@ -61,6 +94,23 @@ def monotonic_time(): # for now, just support POSIX systems return os.times()[4] + +def get_fingerprint(certificate, algo='sha512'): + if algo != 'sha512': + raise Exception("TODO: Non-sha512") + return 'sha512$' + hashlib.sha512(certificate).hexdigest() + + +def cert_matches(fingerprint, certificate): + if not fingerprint or not certificate: + return False + algo, _, fp = fingerprint.partition('$') + newfp = None + if algo == 'sha512': + newfp = get_fingerprint(certificate) + return newfp and fingerprint == newfp + + class TLSCertVerifier(object): def __init__(self, configmanager, node, fieldname): self.cfm = configmanager @@ -68,11 +118,12 @@ class TLSCertVerifier(object): self.fieldname = fieldname def verify_cert(self, certificate): - fingerprint = 'sha512$' + hashlib.sha512(certificate).hexdigest() + fingerprint = get_fingerprint(certificate) storedprint = self.cfm.get_node_attributes(self.node, (self.fieldname,) ) - if self.fieldname not in storedprint[self.node]: # no stored value, check - # policy for next action + if (self.fieldname not in storedprint[self.node] or + storedprint[self.node][self.fieldname]['value'] == ''): + # no stored value, check policy for next action newpolicy = self.cfm.get_node_attributes(self.node, ('pubkeys.addpolicy',)) if ('pubkeys.addpolicy' in newpolicy[self.node] and diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 6a8c7aec..37800cdc 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -12,7 +12,7 @@ Group: Development/Libraries BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot Prefix: %{_prefix} BuildArch: noarch -Requires: python-pyghmi, python-eventlet, python-greenlet, python-crypto >= 2.6.1, confluent_client, pyparsing, python-paramiko, python-dns +Requires: python-pyghmi, python-eventlet, python-greenlet, python-crypto >= 2.6.1, confluent_client, pyparsing, python-paramiko, python-dns, python-netifaces, python2-pyasn1, python-pysnmp, python-pyte Vendor: Jarrod Johnson Url: http://xcat.sf.net/ @@ -34,7 +34,8 @@ grep -v confluent/__init__.py INSTALLED_FILES.bare > INSTALLED_FILES cat INSTALLED_FILES %post -if [ -x /usr/bin/systemctl ]; then /usr/bin/systemctl try-restart confluent; fi +if [ -x /usr/bin/systemctl ]; then /usr/bin/systemctl try-restart confluent >& /dev/null; fi +true %clean rm -rf $RPM_BUILD_ROOT diff --git a/confluent_server/confluentdbgcli.py b/confluent_server/confluentdbgcli.py new file mode 100644 index 00000000..4ed804b5 --- /dev/null +++ b/confluent_server/confluentdbgcli.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2014 IBM Corporation +# Copyright 2015-2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import readline +import socket + +connection = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +self.connection.connect('/var/run/confluent/dbg.sock') + +readline.parse_and_bind("tab: complete") +readline.parse_and_bind("set bell-style none") + diff --git a/confluent_server/dbgtools/confluentdbgcli.py b/confluent_server/dbgtools/confluentdbgcli.py new file mode 100644 index 00000000..04ba8783 --- /dev/null +++ b/confluent_server/dbgtools/confluentdbgcli.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Note that this script has a high chance of breaking confluent, so +# do not be surprised if confluent crashes as you exit... + +import select +import socket +import readline +import sys +import threading + +readline.parse_and_bind('tab: complete') +conn = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +conn.connect('/var/run/confluent/dbg.sock') + +pendingoutput = None + +class GetInput(threading.Thread): + def run(self): + global pendingoutput + while True: + try: + pendingoutput = raw_input('') + except EOFError: + pendingoutput = False + break + + +inputthread = GetInput() +inputthread.start() +while True: + r, _, _ = select.select((conn,), (), (), 0.1) + if conn in r: + sys.stdout.write(conn.recv(1)) + if pendingoutput is not None: + if pendingoutput is False: + conn.shutdown(socket.SHUT_WR) + sys.exit(1) + else: + conn.sendall(pendingoutput + '\n') + pendingoutput = None + sys.stdout.flush() diff --git a/confluent_server/dbgtools/processhangtraces.py b/confluent_server/dbgtools/processhangtraces.py new file mode 100644 index 00000000..9e93acfe --- /dev/null +++ b/confluent_server/dbgtools/processhangtraces.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +threadtraces = {} + +with open(sys.argv[1]) as tracefile: + traces = tracefile.read() + currtrace = None + for line in traces.split("\n"): + if line.startswith("Thread trace:"): + if currtrace is not None: + if currtrace not in threadtraces: + threadtraces[currtrace] = 0 + threadtraces[currtrace] += 1 + currtrace = line + elif currtrace is not None: + currtrace += line + '\n' +for trace in sorted(threadtraces, key=lambda x: threadtraces[x]): + print('Following stack seen {0} times'.format(threadtraces[trace])) + print(trace) diff --git a/confluent_server/makesetup b/confluent_server/makesetup index 2ee30143..26bb712a 100755 --- a/confluent_server/makesetup +++ b/confluent_server/makesetup @@ -6,3 +6,4 @@ if [ "$NUMCOMMITS" != "$VERSION" ]; then fi echo $VERSION > VERSION sed -e "s/#VERSION#/$VERSION/" setup.py.tmpl > setup.py +echo '__version__ = "'$VERSION'"' > confluent/__init__.py diff --git a/confluent_server/setup.py.tmpl b/confluent_server/setup.py.tmpl index fdbbee84..445e2d08 100644 --- a/confluent_server/setup.py.tmpl +++ b/confluent_server/setup.py.tmpl @@ -9,6 +9,10 @@ setup( url='http://xcat.sf.net/', description='confluent systems management server', packages=['confluent', 'confluent/config', 'confluent/interface', + 'confluent/discovery/', + 'confluent/discovery/protocols/', + 'confluent/discovery/handlers/', + 'confluent/networking/', 'confluent/plugins/hardwaremanagement/', 'confluent/plugins/shell/', 'confluent/plugins/configuration/'],