diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst index a6c0765d2..8a6158914 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst @@ -108,7 +108,7 @@ For system x machine, on sles10 set the crashkernelsize attribute like this: :: chdef -t osimage crashkernelsize=M@16M -On sles11 and rhels6 set the crashkernelsize attribute like this: :: +On sles11, rhels6 and above set the crashkernelsize attribute like this: :: chdef -t osimage crashkernelsize=M @@ -118,13 +118,21 @@ Where recommended value is 256. For more information about the size can r ``_. ``_. + + ``_. For system p machine, set the crashkernelsize attribute to this: :: chdef -t osimage crashkernelsize=@32M +For Power System AC922, set the crashkernelsize attribute to this: :: + + chdef -t osimage crashkernelsize=M + Where recommended value is 256, more information can refer the kdump document for the system x. +**Notes**: The ``crashkernel=`` option can be defined in multiple ways. If it is hard to decide the specific crashkernelsize, auto value can be used, the auto value enables automatic configuration of reserved memory based on the total amount of memory in the system like ``crashkernel=auto``. + When your node starts, and you get a kdump start error like this: :: Your running kernel is using more than 70% of the amount of space you reserved for kdump, you should consider increasing your crashkernel diff --git a/xCAT-openbmc-py/lib/python/agent/hwctl/eventlog.py b/xCAT-openbmc-py/lib/python/agent/hwctl/eventlog.py new file mode 100644 index 000000000..0c8f6378d --- /dev/null +++ b/xCAT-openbmc-py/lib/python/agent/hwctl/eventlog.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +############################################################################### +# IBM(c) 2018 EPL license http://www.eclipse.org/legal/epl-v10.html +############################################################################### +# -*- coding: utf-8 -*- +# + +class EventlogInterface(object): + """Interface for eventlog-related actions.""" + interface_type = 'eventlog' + version = '1.0' + + def get_eventlog_info(self, task, number_of_records="all"): + """Return the eventlog info of the task's nodes. + + :param number_of_records: number of records to display. + :param task: a Task instance containing the nodes to act on. + :return eventlog list + """ + return task.run('get_ev_info', number_of_records) + + def clear_all_eventlog_records(self, task): + """Clear all eventlog records. + + :param task: a Task instance containing the nodes to act on. + :return + """ + return task.run('clear_all_ev_records') + + def resolve_eventlog_records(self, task, resolve_list="LED"): + """Return the eventlog info of the task's nodes. + + :param resolve: list of eventlog ids to resolve or LED label. + :param task: a Task instance containing the nodes to act on. + :return eventlog list of resolved entries + """ + return task.run('resolve_ev_records', resolve_list) + +class DefaultEventlogManager(EventlogInterface): + """Interface for eventlog-related actions.""" + pass diff --git a/xCAT-openbmc-py/lib/python/agent/hwctl/executor/openbmc_eventlog.py b/xCAT-openbmc-py/lib/python/agent/hwctl/executor/openbmc_eventlog.py new file mode 100644 index 000000000..243b469f7 --- /dev/null +++ b/xCAT-openbmc-py/lib/python/agent/hwctl/executor/openbmc_eventlog.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +############################################################################### +# IBM(c) 2018 EPL license http://www.eclipse.org/legal/epl-v10.html +############################################################################### +# -*- coding: utf-8 -*- +# + +from __future__ import print_function +import gevent +import time + +from common.task import ParallelNodesCommand +from common.exceptions import SelfClientException, SelfServerException +from hwctl import openbmc_client as openbmc +from common import utils + +import logging +logger = logging.getLogger('xcatagent') + +class OpenBMCEventlogTask(ParallelNodesCommand): + """Executor for eventlog-related actions.""" + + def get_ev_info(self, num_to_display, **kw): + + node = kw['node'] + number_to_display = 0 + try: + # Number of records to display from the end + number_to_display = 0-int(num_to_display[0]) + except Exception: + # All records to display + number_to_display = 0 + + obmc = openbmc.OpenBMCRest(name=node, nodeinfo=kw['nodeinfo'], messager=self.callback, debugmode=self.debugmode, verbose=self.verbose) + eventlog_info = [] + try: + obmc.login() + + # Get all eventlog records + eventlog_info_dict = obmc.get_eventlog_info() + + keys = eventlog_info_dict.keys() + # Sort thy keys in natural order + keys.sort(key=lambda x : int(x[0:])) + + # Display all, or specified number of records from the end + for key in list(keys)[number_to_display:]: + self.callback.info('%s: %s' % (node, eventlog_info_dict[key])) + eventlog_info += eventlog_info_dict[key] + + except (SelfServerException, SelfClientException) as e: + self.callback.info('%s: %s' % (node, e.message)) + + return eventlog_info + + def clear_all_ev_records(self, **kw): + + node = kw['node'] + + def resolve_ev_records(self, resolve_list, **kw): + + node = kw['node'] diff --git a/xCAT-openbmc-py/lib/python/agent/hwctl/openbmc_client.py b/xCAT-openbmc-py/lib/python/agent/hwctl/openbmc_client.py index 7c0e6e974..8b1dc1605 100644 --- a/xCAT-openbmc-py/lib/python/agent/hwctl/openbmc_client.py +++ b/xCAT-openbmc-py/lib/python/agent/hwctl/openbmc_client.py @@ -5,6 +5,7 @@ # -*- coding: utf-8 -*- # +import os import requests import json import time @@ -218,6 +219,11 @@ RSPCONFIG_APIS = { }, } +EVENTLOG_URL = "/logging/enumerate" +RAS_POLICY_TABLE = "/opt/ibm/ras/lib/policyTable.json" +RAS_POLICY_MSG = "Install the OpenBMC RAS package to obtain more details logging messages." +RAS_NOT_FOUND_MSG = " Not found in policy table: " + RESULT_OK = 'ok' RESULT_FAIL = 'fail' @@ -550,6 +556,99 @@ class OpenBMCRest(object): return bool(func_list), fw_dict + # Extract all eventlog info and parse it + def get_eventlog_info(self): + + eventlog_data = self.request('GET', EVENTLOG_URL, cmd='get_eventlog_info') + + return self.parse_eventlog_data(eventlog_data) + + # Parse eventlog data and build a dictionary with eventid as a key + def parse_eventlog_data(self, eventlog_data): + + # Check if policy table file is there + ras_event_mapping = {} + if os.path.isfile(RAS_POLICY_TABLE): + with open(RAS_POLICY_TABLE, "r") as data_file: + policy_hash = json.load(data_file) + if policy_hash: + ras_event_mapping = policy_hash['events'] + else: + self.messager.info(RAS_POLICY_MSG) + data_file.close() + else: + self.messager.info(RAS_POLICY_MSG) + try: + eventlog_dict = {} + for key, value in sorted(eventlog_data.items()): + id, event_log_line = self.parse_eventlog_data_record(value, ras_event_mapping) + if int(id) != 0: + eventlog_dict[str(id)] = event_log_line + return eventlog_dict + except KeyError: + error = 'Error: Received wrong format response: %s' % eventlog_data + raise SelfServerException(error) + + # Parse a single eventlog entry and return data in formatted string + def parse_eventlog_data_record(self, event_log_entry, ras_event_mapping): + formatted_line = "" + callout_data = "" + LED_tag = " [LED]" + timestamp_str = "" + message_str = "" + pid_str = "" + resolved_str = "" + id_str = "0" + callout = False + for (sub_key, v) in event_log_entry.items(): + if sub_key == 'AdditionalData': + for (data_key) in v: + additional_data = data_key.split("="); + if additional_data[0] == 'ESEL': + esel = additional_data[1] + # Placeholder, not currently used + elif additional_data[0] == '_PID': + pid_str = "PID: " + str(additional_data[1]) + ")," + elif 'CALLOUT_DEVICE_PATH' in additional_data[0]: + callout = True + callout_data = "I2C" + elif 'CALLOUT_INVENTORY_PATH' in additional_data[0]: + callout = True + callout_data = additional_data[1] + elif 'CALLOUT' in additional_data[0]: + callout = True + elif 'GPU' in additional_data[0]: + callout_data="/xyz/openbmc_project/inventory/system/chassis/motherboard/gpu" + elif 'PROCEDURE' in additional_data[0]: + callout_data = '{:x}'.format(int(additional_data[1])) #Convert to hext + elif sub_key == 'Timestamp': + timestamp = time.localtime(v / 1000) + timestamp_str = time.strftime("%m/%d/%Y %T", timestamp) + elif sub_key == 'Id': + id_str = str(v) + elif sub_key == 'Resolved': + resolved_str = " Resolved: " + str(v) + elif sub_key == 'Message': + message_str = v + if callout_data: + message_str += "||" + callout_data + + # If event data mapping was read in from RAS policy table, display a more detailed message + if ras_event_mapping: + if message_str in ras_event_mapping: + event_type = ras_event_mapping[message_str]['EventType'] + event_message = ras_event_mapping[message_str]['Message'] + severity = ras_event_mapping[message_str]['Severity'] + affect = ras_event_mapping[message_str]['AffectedSubsystem'] + formatted_line = timestamp_str + " [" + id_str +"]" + ": " + event_type + ", " + "(" + severity + ") " + event_message + " (AffectedSubsystem: " + affect + ", " + pid_str + resolved_str + else: + formatted_line = timestamp_str + " [" + id_str +"]" + ":" + RAS_NOT_FOUND_MSG + message_str + " (" + pid_str + resolved_str + else: + formatted_line = timestamp_str + " [" + id_str +"]" + ": " + message_str + " (" + pid_str + resolved_str + if callout: + formatted_line += LED_tag + return id_str, formatted_line + def set_apis_values(self, key, value): attr_info = RSPCONFIG_APIS[key] if 'set_url' not in attr_info: diff --git a/xCAT-openbmc-py/lib/python/agent/xcatagent/openbmc.py b/xCAT-openbmc-py/lib/python/agent/xcatagent/openbmc.py index 19e8e9726..6d382d140 100644 --- a/xCAT-openbmc-py/lib/python/agent/xcatagent/openbmc.py +++ b/xCAT-openbmc-py/lib/python/agent/xcatagent/openbmc.py @@ -20,12 +20,14 @@ from hwctl.executor.openbmc_inventory import OpenBMCInventoryTask from hwctl.executor.openbmc_power import OpenBMCPowerTask from hwctl.executor.openbmc_sensor import OpenBMCSensorTask from hwctl.executor.openbmc_bmcconfig import OpenBMCBmcConfigTask +from hwctl.executor.openbmc_eventlog import OpenBMCEventlogTask from hwctl.beacon import DefaultBeaconManager from hwctl.setboot import DefaultBootManager from hwctl.inventory import DefaultInventoryManager from hwctl.power import DefaultPowerManager from hwctl.sensor import DefaultSensorManager from hwctl.bmcconfig import DefaultBmcConfigManager +from hwctl.eventlog import DefaultEventlogManager from xcatagent import base import openbmc_rest @@ -146,6 +148,9 @@ SETBOOT_SET_OPTIONS = ('cd', 'def', 'default', 'hd', 'net') VITALS_OPTIONS = ('all', 'altitude', 'fanspeed', 'leds', 'power', 'temp', 'voltage', 'wattage') +# global variables of reventlog +EVENTLOG_OPTIONS = ('list', 'clear', 'resolved') + class OpenBMC(base.BaseDriver): headers = {'Content-Type': 'application/json'} @@ -835,6 +840,50 @@ class OpenBMCManager(base.BaseManager): else: DefaultSensorManager().get_sensor_info(runner, action) + def reventlog(self, nodesinfo, args): + + # 1, parse agrs + if not args: + args = ['all'] + + reventlog_usage = """ + Usage: + reventlog [-V|--verbose] resolved + reventlog [-V|--verbose] clear + reventlog [-V|--verbose] list + + Options: + -V --verbose eventlog verbose mode. + """ + + try: + opts = docopt(reventlog_usage, argv=args) + + self.verbose = opts.pop('--verbose') + action = [k for k,v in opts.items() if v][0] + except Exception as e: + self.messager.error("Failed to parse arguments for reventlog: %s" % args) + return + + # 2, validate the args + if action not in EVENTLOG_OPTIONS: + self.messager.error("Not supported subcommand for reventlog: %s" % action) + return + + # 3, run the subcommands + runner = OpenBMCEventlogTask(nodesinfo, callback=self.messager, debugmode=self.debugmode, verbose=self.verbose) + self.messager.info('revetlog.py processing action=%s args=%s' % (action, args)) + if action == 'clear': + DefaultEventlogManager().clear_all_eventlog_records(runner) + elif action == 'resolved': + eventlog_id_list = opts.pop('') + DefaultEventlogManager().resolve_eventlog_records(runner, eventlog_id_list) + elif action == 'list': + eventlog_number_of_records = opts.pop('') + DefaultEventlogManager().get_eventlog_info(runner, eventlog_number_of_records) + else: + DefaultEventlogManager().get_eventlog_info(runner, "all") + def _get_full_path(self,file_path): if type(self.cwd) == 'unicode': dir_path = self.cwd diff --git a/xCAT-openbmc-py/lib/python/agent/xcatagent/server.py b/xCAT-openbmc-py/lib/python/agent/xcatagent/server.py index f9b16bda4..cb6ba5a41 100644 --- a/xCAT-openbmc-py/lib/python/agent/xcatagent/server.py +++ b/xCAT-openbmc-py/lib/python/agent/xcatagent/server.py @@ -94,8 +94,9 @@ class Server(object): func = getattr(manager, req['command']) # translate unicode string to normal string to avoid docopt error new_args=[] - for a in req['args']: - new_args.append(a.encode('utf-8')) + if req['args']: + for a in req['args']: + new_args.append(a.encode('utf-8')) # call the function in the specified manager func(req['nodeinfo'], new_args) # after the method returns, the request should be handled diff --git a/xCAT-server/lib/xcat/plugins/dhcp.pm b/xCAT-server/lib/xcat/plugins/dhcp.pm index c824641be..0eb224ba7 100644 --- a/xCAT-server/lib/xcat/plugins/dhcp.pm +++ b/xCAT-server/lib/xcat/plugins/dhcp.pm @@ -59,6 +59,7 @@ my $dhcpconffile = $^O eq 'aix' ? '/etc/dhcpsd.cnf' : '/etc/dhcpd.conf'; my %dynamicranges; #track dynamic ranges defined to see if a host that resolves is actually a dynamic address my %netcfgs; my $distro = xCAT::Utils->osver(); +my $checkdomain=0; # dhcp 4.x will use /etc/dhcp/dhcpd.conf as the config file my $dhcp6conffile; @@ -1931,6 +1932,14 @@ sub process_request addnet($line[0], $line[2]); } } + if ($checkdomain) + { + $callback->({ error => [ "above error fail to generate new dhcp configuration file, restore dhcp configuration file $dhcpconffile" ], errorcode => [1] }); + my $backupfile = $dhcpconffile.".xcatbak"; + rename("$backupfile", $dhcpconffile); + xCAT::MsgUtils->trace($verbose_on_off, "d", "dhcp: Restore dhcp configuration file to $dhcpconffile"); + exit 1; + } foreach (@nrn6) { #do the ipv6 networks addnet6($_); #already did all the filtering before putting into nrn6 } @@ -2440,10 +2449,12 @@ sub addnet } else { $callback->( { - warning => [ - "No $net specific entry for domain, and no domain defined in site table." - ] + error => [ + "No domain defined for $net entry in networks table, and no domain defined in site table." + ], + errorcode => [1] }); + $checkdomain=1; } if ($ent and $ent->{nameservers}) diff --git a/xCAT-server/lib/xcat/plugins/openbmc2.pm b/xCAT-server/lib/xcat/plugins/openbmc2.pm index 975cafaaf..1bcf513a0 100644 --- a/xCAT-server/lib/xcat/plugins/openbmc2.pm +++ b/xCAT-server/lib/xcat/plugins/openbmc2.pm @@ -38,9 +38,14 @@ sub handled_commands { rsetboot => 'nodehm:mgt=openbmc', rvitals => 'nodehm:mgt=openbmc', rspconfig => 'nodehm:mgt=openbmc', + reventlog => 'nodehm:mgt=openbmc', }; } +# Common logging messages: +my $usage_errormsg = "Usage error."; +my $reventlog_no_id_resolved_errormsg = "Provide a comma separated list of IDs to be resolved. Example: 'resolved=x,y,z'"; + my %node_info = (); my $callback; @@ -255,6 +260,26 @@ sub parse_args { } } elsif ($command eq 'rspconfig') { xCAT_plugin::openbmc::parse_args('rspconfig', $extrargs, $noderange); + } elsif ($command eq "reventlog") { + $subcommand = "all" if (!defined($ARGV[0])); + if ($subcommand =~ /^resolved=(.*)/) { + my $value = $1; + if (not $value) { + return ([ 1, "$usage_errormsg $reventlog_no_id_resolved_errormsg" ]); + } + + my $nodes_num = @$noderange; + if (@$noderange > 1) { + return ([ 1, "Resolving faults over a xCAT noderange is not recommended." ]); + } + + xCAT::SvrUtils::sendmsg("Attempting to resolve the following log entries: $value...", $callback); + } elsif ($subcommand !~ /^\d+$|^all$|^clear$/) { + if ($subcommand =~ "resolved") { + return ([ 1, "$usage_errormsg $reventlog_no_id_resolved_errormsg" ]); + } + return ([ 1, "Unsupported command: $command $subcommand" ]); + } } else { return ([ 1, "Unsupported command: $command" ]); } @@ -358,6 +383,24 @@ sub refactor_args { } } } + if ($command eq "reventlog") { + if (!defined($extrargs->[0])) { + # If no parameters are passed, default to list all records + $request->{arg} = ["list","all"]; + } + else { + $subcommand = $extrargs->[0]; + } + if ($subcommand =~ /^\d+$/) { + unshift @$extrargs, "list"; + } + elsif ($subcommand =~/^resolved=(.*)/) { + unshift @$extrargs, "resolved"; + } + elsif ($subcommand =~/^all$/) { + unshift @$extrargs, "list"; + } + } return 0; } diff --git a/xCAT-test/autotest/testcase/reventlog/cases0 b/xCAT-test/autotest/testcase/reventlog/cases0 index 7a1e7a1a1..82fa7cc30 100644 --- a/xCAT-test/autotest/testcase/reventlog/cases0 +++ b/xCAT-test/autotest/testcase/reventlog/cases0 @@ -7,7 +7,7 @@ end start:reventlog_all cmd:reventlog $$CN all check:rc==0 -check:output=~$$CN\s*:\s*.*\d\d/\d\d/\d\d\s*\S+ +check:output=~$$CN\s*:\s*.*\d\d/\d\d/\d\d\s*\S+|No attributes returned from the BMC end start:reventlog_clear @@ -19,7 +19,7 @@ end start:reventlog_numofentries cmd:reventlog $$CN 5 check:rc==0 -check:output=~$$CN\s*:\s*.*\d\d/\d\d/\d\d\s*\S+|$$CN: no SEL entries|Entry +check:output=~$$CN\s*:\s*.*\d\d/\d\d/\d\d\s*\S+|$$CN: no SEL entries|Entry|No attributes returned from the BMC end start:reventlog_s_openbmc diff --git a/xCAT-test/autotest/testcase/rspconfig/cases0 b/xCAT-test/autotest/testcase/rspconfig/cases0 index c5c0a8183..5dc0ee6a8 100644 --- a/xCAT-test/autotest/testcase/rspconfig/cases0 +++ b/xCAT-test/autotest/testcase/rspconfig/cases0 @@ -269,7 +269,9 @@ cmd:mkdir -p /tmp/rspconfig_set_hostname_equal_star_with_bmc_is_hostname check:rc == 0 cmd:lsdef $$CN -z > /tmp/rspconfig_set_hostname_equal_star_with_bmc_is_hostname/$$CN.stanza check:rc == 0 -cmd:chdef -t node -o bogus_bmc_hostname groups=bmc ip=10.6.17.100 +cmd:chdef -t node -o bogus_bmc_hostname groups=bmc ip=__GETNODEATTR($$CN,bmc)__ +check:rc == 0 +cmd:lsdef bogus_bmc_hostname check:rc == 0 cmd:makehosts bogus_bmc_hostname check:rc == 0