From 0caa9ec8d3075d17c439946824d1fc2cdb186e7b Mon Sep 17 00:00:00 2001 From: GONG Jie Date: Thu, 27 Aug 2015 12:38:09 +0800 Subject: [PATCH 01/64] Add new configuration files for rsyslog and logrotate --- xCAT/etc/logrotate.d/xcat | 7 +++++++ xCAT/etc/rsyslog.d/xcat-cluster.conf | 2 ++ xCAT/etc/rsyslog.d/xcat-compute.conf | 2 ++ 3 files changed, 11 insertions(+) create mode 100644 xCAT/etc/logrotate.d/xcat create mode 100644 xCAT/etc/rsyslog.d/xcat-cluster.conf create mode 100644 xCAT/etc/rsyslog.d/xcat-compute.conf diff --git a/xCAT/etc/logrotate.d/xcat b/xCAT/etc/logrotate.d/xcat new file mode 100644 index 000000000..65d65abfb --- /dev/null +++ b/xCAT/etc/logrotate.d/xcat @@ -0,0 +1,7 @@ +/var/log/xcat/*.log { + missingok + sharedscripts + postrotate + /bin/kill -HUP `cat /var/run/syslogd.pid 2> /dev/null` 2> /dev/null || true + endscript +} diff --git a/xCAT/etc/rsyslog.d/xcat-cluster.conf b/xCAT/etc/rsyslog.d/xcat-cluster.conf new file mode 100644 index 000000000..bda027146 --- /dev/null +++ b/xCAT/etc/rsyslog.d/xcat-cluster.conf @@ -0,0 +1,2 @@ +$template xCATTraditionalFormat0,"%timegenerated% %HOSTNAME% %syslogtag% %msg:::drop-last-lf%\n" +:syslogtag, isequal, "xcat" /var/log/xcat/cluster.log;xCATTraditionalFormat0 diff --git a/xCAT/etc/rsyslog.d/xcat-compute.conf b/xCAT/etc/rsyslog.d/xcat-compute.conf new file mode 100644 index 000000000..23ee24fd3 --- /dev/null +++ b/xCAT/etc/rsyslog.d/xcat-compute.conf @@ -0,0 +1,2 @@ +$template xCATTraditionalFormat9,"%timegenerated% %HOSTNAME% %syslogtag% %msg:::drop-last-lf%\n" +:hostname, [!]isequal, $myhostname /var/log/xcat/computes.log;xCATTraditionalFormat9 From 3f12208038ae45181806b213e668ea5c94fdec19 Mon Sep 17 00:00:00 2001 From: bybai Date: Fri, 28 Aug 2015 03:14:42 -0400 Subject: [PATCH 02/64] add doc for using_updatenode --- .../manage_clusters/common/updatenode.rst | 452 +++++++++++++++++- 1 file changed, 451 insertions(+), 1 deletion(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst index 39d885ca6..21a697b39 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst @@ -1,2 +1,452 @@ -Update Compute Node +Using Updatenode =================== + +Introduction +------------------ + +The xCAT platform-specific cookbooks explain how to initially deploy your nodes. After initial node deployment, you inevitably need to make changes/updates to your nodes. The updatenode command is for this purpose. It allows you to add or modify the following things on your nodes: + +#. Add additional software +#. Synchronize new/updated configuration files +#. Rerun postscripts +#. Update ssh keys and xCAT certificates + +Each of these will be explained in the document. The basic way to use updatenode is to set the definition of nodes on the management node the way you want it and then run updatenode to push those changes out to the actual nodes. Using options to the command, you can control which of the above categories updatenode pushes out to the nodes. + +Most of what is described in this document applies to **stateful** and **stateless** nodes. +In addition to the information in this document, check out the updatenode man page. + +Add Additional Software (Linux Only) +------------------------------------ + +The name of the rpms that will be installed on the node are stored in the packages list files. There are **two kinds of package list files**: + +#. The **package list file** contains the names of the rpms that comes from the os distro. They are stored in **.pkglist** file. +#. The **other package list file** contains the names of the rpms that do **NOT** come from the os distro. They are stored in **.otherpkgs.pkglist** file. + +The path to the package lists will be read from the osimage definition. Which osimage a node is using is specified by the provmethod attribute. To display this value for a node: :: + + lsdef node1 -i provmethod + Object name: dx360m3n03 + provmethod=rhels6.3-x86_64-netboot-compute + +You can display this details of this osimage by running the following command, supplying your osimage name: :: + + lsdef -t osimage rhels6.3-x86_64-netboot-compute + Object name: rhels6.3-x86_64-netboot-compute + exlist=/opt/xcat/share/xcat/netboot/rhels6.3/compute.exlist + imagetype=linux + osarch=x86_64 + osname=Linux + osvers=rhels6.3 + otherpkgdir=/install/post/otherpkgs/rhels6.3/x86_64 + otherpkglist=/install/custom/netboot/rh/compute.otherpkgs.pkglist + pkgdir=/install/rhels6/x86_64 + pkglist=/opt/xcat/share/xcat/netboot/rhels6/compute.pkglist + postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall + profile=compute + provmethod=netboot + rootimgdir=/install/netboot/rhels6.3/x86_64/compute + synclists=/install/custom/netboot/compute.synclist + +You can set the pkglist and otherpkglist using the following command: :: + + chdef -t osimage rhels6.3-x86_64-netboot-compute pkglist=/opt/xcat/share/xcat/netboot/rh/compute.pkglist\ + otherpkglist=/install/custom/netboot/rh/my.otherpkgs.pkglist + +Installing Additional OS Distro Packages +---------------------------------------- + +For rpms from the OS distro, add the new rpm names (without the version number) in the .pkglist file. For example, file /install/custom/netboot/sles/compute.pkglist will look like this after adding perl-DBI:: + + bash + nfs-utils + openssl + dhcpcd + kernel-smp + openssh + procps + psmisc + resmgr + wget + rsync + timezone + perl-DBI + +If you have newer updates to some of your operating system packages that you would like to apply to your OS image, you can place them in another directory, and add that directory to your osimage pkgdir attribute. For example, with the osimage defined above, if you have a new openssl package that you need to update for security fixes, you could place it in a directory, create repository data, and add that directory to your pkgdir: :: + + mkdir -p /install/osupdates/rhels6.3/x86_64 + cd /install/osupdates/rhels6.3/x86_64 + cp . + createrepo . + chdef -t osimage rhels6.3-x86_64-netboot-compute pkgdir=/install/rhels6/x86_64,/install/osupdates/rhels6.3/x86_64 + +Note:If the objective node is not installed by xCAT,please make sure the correct osimage pkgdir attribute so that you could get the correct repository data. + +Install Additional non-OS rpms +------------------------------ + +Installing Additional Packages Using an Otherpkgs Pkglist +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you have additional rpms (rpms not in the distro) that you also want installed, make a directory to hold them, create a list of the rpms you want installed, and add that information to the osimage definition: + +#. Create a directory to hold the additional rpms: :: + + mkdir -p /install/post/otherpkgs/rh/x86_64 + cd /install/post/otherpkgs/rh/x86_64 + cp /myrpms/* . + createrepo . + + NOTE: when the management node is rhels6.x, and the otherpkgs repository data is for rhels5.x, + we should run createrepo with "-s md5". Such as: :: + + createrepo -s md5 . + +#. Create a file that lists the additional rpms that should be installed. For example, in /install/custom/netboot/rh/compute.otherpkgs.pkglist put: :: + + myrpm1 + myrpm2 + myrpm3 + +#. Add both the directory and the file to the osimage definition: :: + + chdef -t osimage mycomputeimage otherpkgdir=/install/post/otherpkgs/rh/x86_64 \ + otherpkglist=/install/custom/netboot/rh/compute.otherpkgs.pkglist + + If you add more rpms at a later time, you must run createrepo again. The createrepo command is in the createrepo rpm, which for RHEL is in the 1st DVD, but for SLES is in the SDK DVD. + + If you have **multiple sets** of rpms that you want to **keep separate** to keep them organized, you can put them in separate sub-directories in the otherpkgdir: + + 1. Run createrepo in each sub-directory. + + 2. In your otherpkgs.pkglist, list at least 1 file from each sub-directory. (During installation, + xCAT will define a yum or zypper repository for each directory you reference in your + otherpkgs.pkglist.) + + For example: :: + + xcat/xcat-core/xCATsn + xcat/xcat-dep/rh6/x86_64/conserver-xcat + + There are some examples of otherpkgs.pkglist in /opt/xcat/share/xcat/netboot//service.*.otherpkgs.pkglist that show the format. + + Note: the otherpkgs postbootscript should by default be associated with every node. Use lsdef to check: :: + + lsdef node1 -i postbootscripts + + If it is not, you need to add it. For example, add it for all of the nodes in the "compute" group: :: + + chdef -p -t group compute postbootscripts=otherpkgs + + For the format of the .otherpkg.pklist file, go to Appendix_A:File_Format_for.pkglist_File + + +Update Stateful Nodes +^^^^^^^^^^^^^^^^^^^^^ + +Run the updatenode command to push the new software to the nodes: :: + + updatenode -S + + +The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. + +If you have a configuration script that is necessary to configure the new software, then instead run: :: + + cp myconfigscript /install/postscripts/ + chdef -p -t compute postbootscripts=myconfigscript + updatenode ospkgs,otherpkgs,myconfigscript + + +The next time you re-install these nodes, the additional software will be automatically installed. + +Update Stateless Nodes +^^^^^^^^^^^^^^^^^^^^^^ + +Run the updatenode command to push the new software to the nodes: :: + + updatenode -S + + +The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. + +If you have a configuration script that is necessary to configure the new software, then instead run: :: + + cp myconfigscript /install/postscripts/ + chdef -p -t compute postbootscripts=myconfigscript + updatenode ospkgs,otherpkgs,myconfigscript + +**You must also do this next step**, otherwise the next time you reboot the stateless nodes, the new software won't be on the nodes. Run genimage and packimage to install the extra rpms into the image: :: + + genimage + packimage + +Update the delta changes in Sysclone environment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Updatenode can also be used in Sysclone environment to push delta changes to target node. After capturing the delta changes from the golden client to management node, just run below command to push delta changes to target nodes. See **TODO:Using_Clone_to_Deploy_Server#Update_Nodes_Later_On_** for more information. :: + + updatenode -S + +Rerun Postscripts or Run Additional Postcripts with the updatenode Command +-------------------------------------------------------------------------- + +You can use the updatenode command to perform the following functions after the nodes are up and running: + + * Rerun postscripts defined in the postscripts table. You might want to do this, for example, if you changed database attributes that affect the running of the postscripts. + * Run any additional postscript one time. (If you want it run every time the node is deployed, you should add it to the postscript or postbootscript attribute of the nodes or node group.) The reason you might want to run a postscript on the nodes once, instead of running a script via xdsh or psh, is that the former approach will make a lot of environment variables available to the postscript that contain the node database values. See [Postscripts_and_Prescripts] for more information. + +To rerun all the postscripts for the nodes. (In general, xCAT postscripts are structured such that it is not harmful to run them multiple times.) :: + + updatenode -P + + +To rerun just the syslog postscript for the nodes: :: + + updatenode -P syslog + +To run a list of your own postscripts, make sure the scripts are copied to /install/postscripts directory, then: :: + + updatenode -P "script1,script2" + +If you need to, you can also pass arguments to your scripts (this will work in xCAT 2.6.7 and greater): :: + + updatenode -P "script1 p1 p2,script2" + +mypostscript template for updatenode + +As of xCAT 2.8, you can customize what attributes you want made available to the post*script, using the shipped mypostscript.tmpl file. + +[[**TODO**:include ref=Template_of_mypostscript]] + +Update the ssh Keys and Credentials on the Nodes +------------------------------------------------ + +If after node deployment, the ssh keys or xCAT ssl credentials become corrupted, xCAT provides a way to quickly fix the keys and credentials on your Service and compute nodes: :: + + updatenode -K + +Note: this option can't be used with any of the other updatenode options. + +syncfiles to the nodes +---------------------- + +If after install, you would like to sync files to the nodes, use the instructions in the next section on "Setting up syncfile for updatenode" and then run: :: + + updatenode -F + + +**With the updatenode command the syncfiles postscript cannot be used to sync files to the nodes.** Therefore, if you run updatenode <noderange> -P syncfiles, nothing will be done. A messages will be logged that you must use updatenode <noderange> -F to sync files using updatenode. + +Setting up syncfile for updatenode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +[[**TODO**:include ref=The_location_of_synclist_file_for_updatenode_and_install_process]] + +Appendix A: File Format for otherpkgs.pkglist File +-------------------------------------------------- + +The otherpkgs.pklist file can contain the following types of entries: + + * rpm name without version numbers + * otherpkgs subdirectory plus rpm name + * blank lines + * comment lines starting with # + * #INCLUDE: # to include other pkglist files + * #NEW_INSTALL_LIST# to signify that the following rpms will be installed with a new rpm install command (zypper, yum, or rpm as determined by the function using this file) + * #ENV:# to specify environment variable(s) for a sperate rpm install command + * rpms to remove before installing marked with a "-" + * rpms to remove after installing marked with a "--" + +These are described in more details in the following sections. + +RPM Names +--------- + +A simple otherpkgs.pkglist file just contains the the name of the rpm file without the version numbers. + +For example, if you put the following three rpms under /install/post/otherpkgs/<os>/<arch>/ directory, :: + + rsct.core-2.5.3.1-09120.ppc.rpm + rsct.core.utils-2.5.3.1-09118.ppc.rpm + src-1.3.0.4-09118.ppc.rpm + +The otherpkgs.pkglist file will be like this: :: + + src + rsct.core + rsct.core.utils + +RPM Names with otherpkgs Subdirectories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you create a subdirectory under /install/post/otherpkgs/<os>/<arch>/, say rsct, the otherpkgs.pkglist file will be like this: :: + + rsct/src + rsct/rsct.core + rsct/rsct.core.utils + +Include Other pkglist Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can group some rpms in a file and include that file in the otherpkgs.pkglist file using #INCLUDE:# format. :: + + rsct/src + rsct/rsct.core + rsct/rsct.core.utils + #INCLUDE:/install/post/otherpkgs/myotherlist# + +where /install/post/otherpkgs/myotherlist is another package list file that follows the same format. + +Note the trailing "#" character at the end of the line. It is important to specify this character for correct pkglist parsing. + +Multiple Install Lists +^^^^^^^^^^^^^^^^^^^^^^ + +The #NEW_INSTALL_LIST# statement is supported in xCAT 2.4 and later. + +You can specify that separate calls should be made to the rpm install program (zypper, yum, rpm) for groups of rpms by specifying the entry #NEW_INSTALL_LIST# on a line by itself as a separator in your pkglist file. All rpms listed up to this separator will be installed together. You can have as many separators as you wish in your pkglist file, and each sublist will be installed separately in the order they appear in the file. + +For example: :: + + compilers/vacpp.rte + compilers/vac.lib + compilers/vacpp.lib + compilers/vacpp.rte.lnk + #NEW_INSTALL_LIST# + pe/IBM_pe_license + +Environment Variable List +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The #ENV statement is supported on Redhat and SLES in xCAT 2.6.9 and later. + +You can specify environment variable(s) for each rpm install call by entry "#ENV:#". The environment variables also apply to rpm(s) remove call if there is rpm(s) needed to be removed in the sublist. + +For example: :: + + #ENV:INUCLIENTS=1 INUBOSTYPE=1# + rsct/rsct.core + rsct/rsct.core.utils + rsct/src + +Be same as, :: + + #ENV:INUCLIENTS=1# + #ENV:INUBOSTYPE=1# + rsct/rsct.core + rsct/rsct.core.utils + rsct/src + +Remove RPMs Before Installing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The "-" syntax is supported in xCAT 2.3 and later. + +You can also specify in this file that certain rpms to be removed before installing the new software. This is done by adding '-' before the rpm names you want to remove. For example: :: + + rsct/src + rsct/rsct.core + rsct/rsct.core.utils + #INCLUDE:/install/post/otherpkgs/myotherlist# + -perl-doc + + +If you have #NEW_INSTALL_LIST# separators in your pkglist file, the rpms will be removed before the install of the sublist that the "-" appears in. + +Remove RPMs After Installing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The "--" syntax is supported in xCAT 2.3 and later. + +You can also specify in this file that certain rpms to be removed after installing the new software. This is done by adding '--' before the rpm names you want to remove. For example: :: + + pe/IBM_pe_license + --ibm-java2-ppc64-jre + +If you have #NEW_INSTALL_LIST# separators in your pkglist file, the rpms will be removed after the install of the sublist that the "--" appears in. + +Appendix B: File Format for .pkglist File +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The .pklist file is used to specify the rpm and the group/pattern names from os distro that will be installed on the nodes. It can contain the following types of entries: :: + + * rpm name without version numbers + * group/pattern name marked with a '@' (for full install only) + * rpms to removed after the installation marked with a "-" (for full install only) + +These are described in more details in the following sections. + +RPM Names +^^^^^^^^^ + +A simple .pkglist file just contains the the name of the rpm file without the version numbers. + +For example, :: + + openssl + xntp + rsync + glibc-devel.i686 + +Include pkglist Files +^^^^^^^^^^^^^^^^^^^^^ + +The #INCLUDE statement is supported in the pkglist file. + +You can group some rpms in a file and include that file in the pkglist file using #INCLUDE:# format. :: + + openssl + xntp + rsync + glibc-devel.1686 + #INCLUDE:/install/post/custom/rh/myotherlist# + +where /install/post/custom/rh/myotherlist is another package list file that follows the same format. + +Note: the trailing "#" character at the end of the line. It is important to specify this character for correct pkglist parsing. + +Group/Pattern Names +^^^^^^^^^^^^^^^^^^^ + +It is only supported for statefull deployment. + +In Linux, a groups of rpms can be packaged together into one package. It is called a **group** on RedHat, CentOS, Fedora and Scientific Linux. To get the a list of available groups, run :: + + yum grouplist + +On SLES, it is called a **pattern**. To list all the available patterns, run :: + + zypper se -t pattern + +You can specify in this file the group/pattern names by adding a '@' and a space before the group/pattern names. For example: :: + + @ base + +Remove RPMs After Installing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is only supported for statefull deployment. + +You can specify in this file that certain rpms to be removed after installing the new software. This is done by adding '-' before the rpm names you want to remove. For example: :: + + wget + +Appendix C: Debugging Tips +-------------------------- + +Internally updatenode command uses the xdsh in the following ways: + +Linux: xdsh -e /install/postscripts/xcatdsklspost -m + +AIX: xdsh -e /install/postscripts/xcataixspost -m -c 1 + +where is a comma separated postscript like ospkgs,otherpkgs etc. + + * wget is used in xcatdsklspost/xcataixpost to get all the postscripts from the to the node. You can check /tmp/wget.log file on the node to see if wget was successful or not. You need to make sure the /xcatpost directory has enough space to hold the postscripts. + * A file called /xcatpost/mypostscript (Linux) or /xcatpost/myxcatpost_ (AIX) is created on the node which contains the environmental variables and scripts to be run. Please make sure this file exists and it contains correct info. You can also run this file on the node manually to debug. + * For ospkgs/otherpkgs, if /install is not mounted on the , it will download all the rpms from the to the node using wget. Please make sure /tmp and /xcatpost have enough space to hold the rpms and please check /tmp/wget.log for errors. + * For ospkgs/otherpkgs, If zypper or yum is installed on the node, it will be used the command to install the rpms. Please make sure to run createrepo on the source direcory on the every time a rpm is added or removed. Otherwise, the rpm command will be used, in this case, please make sure all the necessary depended rpms are copied in the same source directory. + * You can append -x on the first line of ospkgs/otherpkgs to get more debug info. + From 4e6ce2860d8d4269370eb0717ae1a601d0f0f9a7 Mon Sep 17 00:00:00 2001 From: bybai Date: Fri, 28 Aug 2015 03:23:20 -0400 Subject: [PATCH 03/64] add TODO for :Postscripts_and_Prescripts --- .../guides/admin-guides/manage_clusters/common/updatenode.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst index 21a697b39..a65db63e5 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst @@ -195,7 +195,7 @@ Rerun Postscripts or Run Additional Postcripts with the updatenode Command You can use the updatenode command to perform the following functions after the nodes are up and running: * Rerun postscripts defined in the postscripts table. You might want to do this, for example, if you changed database attributes that affect the running of the postscripts. - * Run any additional postscript one time. (If you want it run every time the node is deployed, you should add it to the postscript or postbootscript attribute of the nodes or node group.) The reason you might want to run a postscript on the nodes once, instead of running a script via xdsh or psh, is that the former approach will make a lot of environment variables available to the postscript that contain the node database values. See [Postscripts_and_Prescripts] for more information. + * Run any additional postscript one time. (If you want it run every time the node is deployed, you should add it to the postscript or postbootscript attribute of the nodes or node group.) The reason you might want to run a postscript on the nodes once, instead of running a script via xdsh or psh, is that the former approach will make a lot of environment variables available to the postscript that contain the node database values. See [[**TODO** :Postscripts_and_Prescripts]] for more information. To rerun all the postscripts for the nodes. (In general, xCAT postscripts are structured such that it is not harmful to run them multiple times.) :: From 4a91e86eafc177dcf1cba01bb9d2510e539f119c Mon Sep 17 00:00:00 2001 From: bybai Date: Mon, 31 Aug 2015 02:18:28 -0400 Subject: [PATCH 04/64] add software and firmware inventory doc --- docs/source/advanced/firmware_inventory.rst | 41 +++++++++++++++++++++ docs/source/advanced/index.rst | 1 + 2 files changed, 42 insertions(+) create mode 100644 docs/source/advanced/firmware_inventory.rst diff --git a/docs/source/advanced/firmware_inventory.rst b/docs/source/advanced/firmware_inventory.rst new file mode 100644 index 000000000..cca394248 --- /dev/null +++ b/docs/source/advanced/firmware_inventory.rst @@ -0,0 +1,41 @@ +Software and Firmware Inventory +=============================== + +xCAT provides a command '**sinv'** that checks the software and firmware configuration in this cluster. + +The command creates an inventory of the input software/firmware check, comparing to other machines in the cluster and produces an output of node that are installed the same and those that are not. + +This command uses the xdsh parallel command, so it is in itself a parallel command, and thus can be run on multiple cluster nodes at one time and is hierarchical. + +The sinv command is designed to check the configuration of the nodes in a cluster. The command takes as input command line flags, and one or more templates which will be compared against the output of the xdsh command, designated to be run on the nodes in the noderange. + +The nodes will then be grouped according to the template they match and a report returned to the administrator in the output file designated or to stdout. + +sinv supports checking the output from the rinv or xdsh command. + +For example, if you wanted to check the ssh level on all the nodes and make sure they were the same as on the service node, you would first generate a template from the "good" service node (sn1) by running the following: :: + + xdsh sn1 "rpm -qa | grep ssh " | xdshcoll > /tmp/sinv/sinv.template + +To execute sinv using the sinv.template generated above on the nodegroup, testnodes ,writing output report to /tmp/sinv.output, enter: :: + + sinv -c "xdsh testnodes rpm -qa | grep ssh" -p /tmp/sinv/sinv.template -o /tmp/sinv.output + +The report will look something like this, if every node matches: + + Command started with following input: :: + + xdsh cmd:xdsh testnodes rpm -qa | grep ssh. + Template path:/tmp/sinv/sinv.template. + Template cnt:0. + Remove template:NO. + Output file:/tmp/sinv/sinv.output. + Exactmatch:NO. + Ignorefirst:NO. + Seed node:None. + file:None. + The following nodes match /tmp/lissav/sinv.template: + testnodes + +There are many options for matching and reporting supported by the sinv command, including support to run rinv and generate reports on firmware inventory. + diff --git a/docs/source/advanced/index.rst b/docs/source/advanced/index.rst index d9deb5afb..de3c54432 100644 --- a/docs/source/advanced/index.rst +++ b/docs/source/advanced/index.rst @@ -22,3 +22,4 @@ Advanced Topics vlan.rst zone.rst softlayer.rst + firmware_inventory.rst From ca9551d5cb9456d70d60c33b051d349f655604d7 Mon Sep 17 00:00:00 2001 From: bybai Date: Mon, 31 Aug 2015 02:19:18 -0400 Subject: [PATCH 05/64] add parammel commands doc --- .../manage_clusters/common/parallel_cmd.rst | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst b/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst index 600bc1b2f..65ccdfa3f 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst @@ -1,2 +1,98 @@ -Using Parallel Command -====================== +Parallel Commands +================= + +xCAT delivers a set of commands that can be run remote commands (ssh,scp,rsh,rcp,rsync,ping,cons) in parallel on multiple nodes. In addition the command have the capability of formatting the output from the commands, so the results are easier to process. These commands will make it much easier to administer your large cluster. + +For a list of the Parallel Commands and their man pages: + +TODO: doc link to [parallel-commands] + +Examples for xdsh +----------------- + +- To set up the SSH keys for root on node1, run as root: :: + + xdsh node1 -K + +- To run the ps -ef command on node targets node1 and node2, enter: :: + + xdsh node1,node2 "ps -ef" + +- To run the ps command on node targets node1 and run the remote command with the -v and -t flag, enter: :: + + xdsh node1,node2 -o"-v -t" ps =item * + +- To execute the commands contained in myfile in the XCAT context on several node targets, with a fanout of 1, enter: :: + + xdsh node1,node2 -f 1 -e myfile + +- To run the ps command on node1 and ignore all the dsh environment variable except the DSH_NODE_OPTS, enter: :: + + xdsh node1 -X `DSH_NODE_OPTS' ps + +- To run on Linux, the xdsh command "dpkg | grep vim" on the node ubuntu diskless image, enter: :: + + xdsh -i /install/netboot/ubuntu14.04.2/ppc64el/compute/rootimg "dpkg -l|grep vim" + +- To run xdsh with the non-root userid "user1" that has been setup as an xCAT userid and with sudo on node1 and node2 to run as root, do the following, see xCAT doc on Granting_Users_xCAT_privileges: :: + + xdsh node1,node2 --sudo -l user1 "cat /etc/passwd" + +Examples for xdcp +----------------- + +- To copy the /etc/hosts file from all nodes in the cluster to the /tmp/hosts.dir directory on the local host, enter: :: + + xdcp all -P /etc/hosts /tmp/hosts.dir + + A suffix specifying the name of the target is appended to each file name. The contents of the /tmp/hosts.dir directory are similar to: :: + + hosts._node1 hosts._node4 hosts._node7 + hosts._node2 hosts._node5 hosts._node8 + hosts._node3 hosts._node6 + +- To copy /localnode/smallfile and /tmp/bigfile to /tmp on node1 using rsync and input -t flag to rsync, enter: :: + + xdcp node1 -r /usr/bin/rsync -o "-t" /localnode/smallfile /tmp/bigfile /tmp + +- To copy the /etc/hosts file from the local host to all the nodes in the cluster, enter: :: + + xdcp all /etc/hosts /etc/hosts + +- To rsync the /etc/hosts file to your compute nodes: + + Create a rsync file /tmp/myrsync, with this line: :: + + /etc/hosts -> /etc/hosts + + or + + /etc/hosts -> /etc/ (last / is required) + + Run: :: + + xdcp compute -F /tmp/myrsync + +- To rsync the /etc/file1 and file2 to your compute nodes and rename to filex and filey: + + Create a rsync file /tmp/myrsync, with these line: :: + + /etc/file1 -> /etc/filex + + /etc/file2 -> /etc/filey + + Run: :: + + xdcp compute -F /tmp/myrsync to update the Compute Nodes + +- To rsync files in the Linux image at /install/netboot/ubuntu14.04.2/ppc64el/compute/rootimg on the MN: + + Create a rsync file /tmp/myrsync, with this line: :: + + /etc/hosts /etc/passwd -> /etc + + Run: :: + + xdcp -i /install/netboot/ubuntu14.04.2/ppc64el/compute/rootimg -F /tmp/myrsync + + From e51726823a0d7f39b37db7bbd89069bf1426a4b5 Mon Sep 17 00:00:00 2001 From: bybai Date: Mon, 31 Aug 2015 02:30:27 -0400 Subject: [PATCH 06/64] empty link for man page doc --- .../admin-guides/manage_clusters/common/parallel_cmd.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst b/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst index 65ccdfa3f..dff4eacd5 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/parallel_cmd.rst @@ -3,9 +3,7 @@ Parallel Commands xCAT delivers a set of commands that can be run remote commands (ssh,scp,rsh,rcp,rsync,ping,cons) in parallel on multiple nodes. In addition the command have the capability of formatting the output from the commands, so the results are easier to process. These commands will make it much easier to administer your large cluster. -For a list of the Parallel Commands and their man pages: - -TODO: doc link to [parallel-commands] +For a list of the Parallel Commands and their man pages doc `parallel commands`_. Examples for xdsh ----------------- From 15865ebc91a51eb6bd281b5b7a47252416c18c2b Mon Sep 17 00:00:00 2001 From: bybai Date: Mon, 31 Aug 2015 03:18:14 -0400 Subject: [PATCH 07/64] considering integration with other doc, restruct updatenode doc. --- .../manage_clusters/common/updatenode.rst | 436 +++--------------- 1 file changed, 71 insertions(+), 365 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst index a65db63e5..7f6069cc9 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst @@ -1,14 +1,14 @@ -Using Updatenode -=================== +Update Compute Nodes +==================== Introduction ------------------ -The xCAT platform-specific cookbooks explain how to initially deploy your nodes. After initial node deployment, you inevitably need to make changes/updates to your nodes. The updatenode command is for this purpose. It allows you to add or modify the following things on your nodes: +After initial node deployment, you inevitably need to make changes/updates to your nodes. The updatenode command is for this purpose. It allows you to add or modify the followings on your nodes: #. Add additional software +#. Rerun postscripts or Run Additional Postcripts #. Synchronize new/updated configuration files -#. Rerun postscripts #. Update ssh keys and xCAT certificates Each of these will be explained in the document. The basic way to use updatenode is to set the definition of nodes on the management node the way you want it and then run updatenode to push those changes out to the actual nodes. Using options to the command, you can control which of the above categories updatenode pushes out to the nodes. @@ -16,171 +16,63 @@ Each of these will be explained in the document. The basic way to use updatenode Most of what is described in this document applies to **stateful** and **stateless** nodes. In addition to the information in this document, check out the updatenode man page. -Add Additional Software (Linux Only) ------------------------------------- +Add Additional Software +------------------------- -The name of the rpms that will be installed on the node are stored in the packages list files. There are **two kinds of package list files**: +The packages that will be installed on the node are stored in the packages list files. There are **two kinds of package list files**: -#. The **package list file** contains the names of the rpms that comes from the os distro. They are stored in **.pkglist** file. -#. The **other package list file** contains the names of the rpms that do **NOT** come from the os distro. They are stored in **.otherpkgs.pkglist** file. - -The path to the package lists will be read from the osimage definition. Which osimage a node is using is specified by the provmethod attribute. To display this value for a node: :: - - lsdef node1 -i provmethod - Object name: dx360m3n03 - provmethod=rhels6.3-x86_64-netboot-compute - -You can display this details of this osimage by running the following command, supplying your osimage name: :: - - lsdef -t osimage rhels6.3-x86_64-netboot-compute - Object name: rhels6.3-x86_64-netboot-compute - exlist=/opt/xcat/share/xcat/netboot/rhels6.3/compute.exlist - imagetype=linux - osarch=x86_64 - osname=Linux - osvers=rhels6.3 - otherpkgdir=/install/post/otherpkgs/rhels6.3/x86_64 - otherpkglist=/install/custom/netboot/rh/compute.otherpkgs.pkglist - pkgdir=/install/rhels6/x86_64 - pkglist=/opt/xcat/share/xcat/netboot/rhels6/compute.pkglist - postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall - profile=compute - provmethod=netboot - rootimgdir=/install/netboot/rhels6.3/x86_64/compute - synclists=/install/custom/netboot/compute.synclist - -You can set the pkglist and otherpkglist using the following command: :: - - chdef -t osimage rhels6.3-x86_64-netboot-compute pkglist=/opt/xcat/share/xcat/netboot/rh/compute.pkglist\ - otherpkglist=/install/custom/netboot/rh/my.otherpkgs.pkglist +#. The **package list file** contains the names of the packages that come from the os distro. They are stored in **.pkglist** file. +#. The **other package list file** contains the names of the packages that do **NOT** come from the os distro. They are stored in **.otherpkgs.pkglist** file. Installing Additional OS Distro Packages ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For rpms from the OS distro, add the new rpm names (without the version number) in the .pkglist file. For example, file /install/custom/netboot/sles/compute.pkglist will look like this after adding perl-DBI:: +For packages from the OS distro, add the new package names (without the version number) in the .pkglist file. If you have newer updates to some of your operating system packages that you would like to apply to your OS image, you can place them in another directory, and add that directory to your osimage pkgdir attribute. How to add additional OS distro packages, go to `Install Additional OS Packages `_ - bash - nfs-utils - openssl - dhcpcd - kernel-smp - openssh - procps - psmisc - resmgr - wget - rsync - timezone - perl-DBI +Note:If the objective node is not installed by xCAT, please make sure the correct osimage pkgdir attribute so that you could get the correct repository data. -If you have newer updates to some of your operating system packages that you would like to apply to your OS image, you can place them in another directory, and add that directory to your osimage pkgdir attribute. For example, with the osimage defined above, if you have a new openssl package that you need to update for security fixes, you could place it in a directory, create repository data, and add that directory to your pkgdir: :: - - mkdir -p /install/osupdates/rhels6.3/x86_64 - cd /install/osupdates/rhels6.3/x86_64 - cp . - createrepo . - chdef -t osimage rhels6.3-x86_64-netboot-compute pkgdir=/install/rhels6/x86_64,/install/osupdates/rhels6.3/x86_64 - -Note:If the objective node is not installed by xCAT,please make sure the correct osimage pkgdir attribute so that you could get the correct repository data. - -Install Additional non-OS rpms ------------------------------- - -Installing Additional Packages Using an Otherpkgs Pkglist -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you have additional rpms (rpms not in the distro) that you also want installed, make a directory to hold them, create a list of the rpms you want installed, and add that information to the osimage definition: - -#. Create a directory to hold the additional rpms: :: - - mkdir -p /install/post/otherpkgs/rh/x86_64 - cd /install/post/otherpkgs/rh/x86_64 - cp /myrpms/* . - createrepo . - - NOTE: when the management node is rhels6.x, and the otherpkgs repository data is for rhels5.x, - we should run createrepo with "-s md5". Such as: :: - - createrepo -s md5 . - -#. Create a file that lists the additional rpms that should be installed. For example, in /install/custom/netboot/rh/compute.otherpkgs.pkglist put: :: - - myrpm1 - myrpm2 - myrpm3 - -#. Add both the directory and the file to the osimage definition: :: - - chdef -t osimage mycomputeimage otherpkgdir=/install/post/otherpkgs/rh/x86_64 \ - otherpkglist=/install/custom/netboot/rh/compute.otherpkgs.pkglist - - If you add more rpms at a later time, you must run createrepo again. The createrepo command is in the createrepo rpm, which for RHEL is in the 1st DVD, but for SLES is in the SDK DVD. - - If you have **multiple sets** of rpms that you want to **keep separate** to keep them organized, you can put them in separate sub-directories in the otherpkgdir: - - 1. Run createrepo in each sub-directory. - - 2. In your otherpkgs.pkglist, list at least 1 file from each sub-directory. (During installation, - xCAT will define a yum or zypper repository for each directory you reference in your - otherpkgs.pkglist.) - - For example: :: - - xcat/xcat-core/xCATsn - xcat/xcat-dep/rh6/x86_64/conserver-xcat - - There are some examples of otherpkgs.pkglist in /opt/xcat/share/xcat/netboot//service.*.otherpkgs.pkglist that show the format. - - Note: the otherpkgs postbootscript should by default be associated with every node. Use lsdef to check: :: - - lsdef node1 -i postbootscripts - - If it is not, you need to add it. For example, add it for all of the nodes in the "compute" group: :: - - chdef -p -t group compute postbootscripts=otherpkgs - - For the format of the .otherpkg.pklist file, go to Appendix_A:File_Format_for.pkglist_File +Install Additional non-OS Packages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If you have additional packages (packages not in the distro) that you also want installed, make a directory to hold them, create a list of the packages you want installed, and add that information to the osimage definition. How to add Additional Other Packages, go to `Install Additional Other Packages `_ Update Stateful Nodes ^^^^^^^^^^^^^^^^^^^^^ Run the updatenode command to push the new software to the nodes: :: - - updatenode -S - -The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. + updatenode -S + +The -S flag updates the nodes with all the new or updated packages specified in both .pkglist and .otherpkgs.pkglist. If you have a configuration script that is necessary to configure the new software, then instead run: :: - + cp myconfigscript /install/postscripts/ chdef -p -t compute postbootscripts=myconfigscript updatenode ospkgs,otherpkgs,myconfigscript - -The next time you re-install these nodes, the additional software will be automatically installed. +The next time you re-install these nodes, the additional software will be automatically installed. Update Stateless Nodes ^^^^^^^^^^^^^^^^^^^^^^ Run the updatenode command to push the new software to the nodes: :: - - updatenode -S - -The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. + updatenode -S + + +The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. If you have a configuration script that is necessary to configure the new software, then instead run: :: - + cp myconfigscript /install/postscripts/ chdef -p -t compute postbootscripts=myconfigscript - updatenode ospkgs,otherpkgs,myconfigscript + updatenode ospkgs,otherpkgs,myconfigscript **You must also do this next step**, otherwise the next time you reboot the stateless nodes, the new software won't be on the nodes. Run genimage and packimage to install the extra rpms into the image: :: - + genimage - packimage + packimage Update the delta changes in Sysclone environment ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -189,264 +81,78 @@ Updatenode can also be used in Sysclone environment to push delta changes to tar updatenode -S -Rerun Postscripts or Run Additional Postcripts with the updatenode Command +Rerun Postscripts or Run Additional Postcripts -------------------------------------------------------------------------- -You can use the updatenode command to perform the following functions after the nodes are up and running: +You can use the updatenode command to perform the following functions after the nodes are up and running: - * Rerun postscripts defined in the postscripts table. You might want to do this, for example, if you changed database attributes that affect the running of the postscripts. - * Run any additional postscript one time. (If you want it run every time the node is deployed, you should add it to the postscript or postbootscript attribute of the nodes or node group.) The reason you might want to run a postscript on the nodes once, instead of running a script via xdsh or psh, is that the former approach will make a lot of environment variables available to the postscript that contain the node database values. See [[**TODO** :Postscripts_and_Prescripts]] for more information. + * Rerun postscripts defined in the postscripts table. + * Run any additional postscript one time. + +Go to `Using Postscript `_ to see how to configure postscript. + +Go to `Using Prescript `_ to see how to configure prepostscript. To rerun all the postscripts for the nodes. (In general, xCAT postscripts are structured such that it is not harmful to run them multiple times.) :: - + updatenode -P - To rerun just the syslog postscript for the nodes: :: - - updatenode -P syslog + + updatenode -P syslog To run a list of your own postscripts, make sure the scripts are copied to /install/postscripts directory, then: :: - + updatenode -P "script1,script2" If you need to, you can also pass arguments to your scripts (this will work in xCAT 2.6.7 and greater): :: - + updatenode -P "script1 p1 p2,script2" - + mypostscript template for updatenode -As of xCAT 2.8, you can customize what attributes you want made available to the post*script, using the shipped mypostscript.tmpl file. +As of xCAT 2.8, you can customize what attributes you want made available to the post*script, using the shipped mypostscript.tmpl file. -[[**TODO**:include ref=Template_of_mypostscript]] +[[**TODO**:include ref=Template_of_mypostscript]] + +Synchronize new/updated configuration files +------------------------------------------- + +Setting up syncfile +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use instuctions in `Sync Files to Compute Node `_ + +syncfiles to the nodes +^^^^^^^^^^^^^^^^^^^^^^^^ + +After compute node is installed, you would like to sync files to the nodes: :: + + updatenode -F + +**With the updatenode command the syncfiles postscript cannot be used to sync files to the nodes.** Therefore, if you run updatenode -P syncfiles, nothing will be done. A messages will be logged that you must use updatenode -F to sync files using updatenode. Update the ssh Keys and Credentials on the Nodes ------------------------------------------------ If after node deployment, the ssh keys or xCAT ssl credentials become corrupted, xCAT provides a way to quickly fix the keys and credentials on your Service and compute nodes: :: - - updatenode -K -Note: this option can't be used with any of the other updatenode options. + updatenode -K -syncfiles to the nodes ----------------------- +Note: this option can't be used with any of the other updatenode options. -If after install, you would like to sync files to the nodes, use the instructions in the next section on "Setting up syncfile for updatenode" and then run: :: - - updatenode -F - - -**With the updatenode command the syncfiles postscript cannot be used to sync files to the nodes.** Therefore, if you run updatenode <noderange> -P syncfiles, nothing will be done. A messages will be logged that you must use updatenode <noderange> -F to sync files using updatenode. - -Setting up syncfile for updatenode -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -[[**TODO**:include ref=The_location_of_synclist_file_for_updatenode_and_install_process]] - -Appendix A: File Format for otherpkgs.pkglist File --------------------------------------------------- - -The otherpkgs.pklist file can contain the following types of entries: - - * rpm name without version numbers - * otherpkgs subdirectory plus rpm name - * blank lines - * comment lines starting with # - * #INCLUDE: # to include other pkglist files - * #NEW_INSTALL_LIST# to signify that the following rpms will be installed with a new rpm install command (zypper, yum, or rpm as determined by the function using this file) - * #ENV:# to specify environment variable(s) for a sperate rpm install command - * rpms to remove before installing marked with a "-" - * rpms to remove after installing marked with a "--" - -These are described in more details in the following sections. - -RPM Names ---------- - -A simple otherpkgs.pkglist file just contains the the name of the rpm file without the version numbers. - -For example, if you put the following three rpms under /install/post/otherpkgs/<os>/<arch>/ directory, :: - - rsct.core-2.5.3.1-09120.ppc.rpm - rsct.core.utils-2.5.3.1-09118.ppc.rpm - src-1.3.0.4-09118.ppc.rpm - -The otherpkgs.pkglist file will be like this: :: - - src - rsct.core - rsct.core.utils - -RPM Names with otherpkgs Subdirectories -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you create a subdirectory under /install/post/otherpkgs/<os>/<arch>/, say rsct, the otherpkgs.pkglist file will be like this: :: - - rsct/src - rsct/rsct.core - rsct/rsct.core.utils - -Include Other pkglist Files -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -You can group some rpms in a file and include that file in the otherpkgs.pkglist file using #INCLUDE:# format. :: - - rsct/src - rsct/rsct.core - rsct/rsct.core.utils - #INCLUDE:/install/post/otherpkgs/myotherlist# - -where /install/post/otherpkgs/myotherlist is another package list file that follows the same format. - -Note the trailing "#" character at the end of the line. It is important to specify this character for correct pkglist parsing. - -Multiple Install Lists -^^^^^^^^^^^^^^^^^^^^^^ - -The #NEW_INSTALL_LIST# statement is supported in xCAT 2.4 and later. - -You can specify that separate calls should be made to the rpm install program (zypper, yum, rpm) for groups of rpms by specifying the entry #NEW_INSTALL_LIST# on a line by itself as a separator in your pkglist file. All rpms listed up to this separator will be installed together. You can have as many separators as you wish in your pkglist file, and each sublist will be installed separately in the order they appear in the file. - -For example: :: - - compilers/vacpp.rte - compilers/vac.lib - compilers/vacpp.lib - compilers/vacpp.rte.lnk - #NEW_INSTALL_LIST# - pe/IBM_pe_license - -Environment Variable List -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The #ENV statement is supported on Redhat and SLES in xCAT 2.6.9 and later. - -You can specify environment variable(s) for each rpm install call by entry "#ENV:#". The environment variables also apply to rpm(s) remove call if there is rpm(s) needed to be removed in the sublist. - -For example: :: - - #ENV:INUCLIENTS=1 INUBOSTYPE=1# - rsct/rsct.core - rsct/rsct.core.utils - rsct/src - -Be same as, :: - - #ENV:INUCLIENTS=1# - #ENV:INUBOSTYPE=1# - rsct/rsct.core - rsct/rsct.core.utils - rsct/src - -Remove RPMs Before Installing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The "-" syntax is supported in xCAT 2.3 and later. - -You can also specify in this file that certain rpms to be removed before installing the new software. This is done by adding '-' before the rpm names you want to remove. For example: :: - - rsct/src - rsct/rsct.core - rsct/rsct.core.utils - #INCLUDE:/install/post/otherpkgs/myotherlist# - -perl-doc - - -If you have #NEW_INSTALL_LIST# separators in your pkglist file, the rpms will be removed before the install of the sublist that the "-" appears in. - -Remove RPMs After Installing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The "--" syntax is supported in xCAT 2.3 and later. - -You can also specify in this file that certain rpms to be removed after installing the new software. This is done by adding '--' before the rpm names you want to remove. For example: :: - - pe/IBM_pe_license - --ibm-java2-ppc64-jre - -If you have #NEW_INSTALL_LIST# separators in your pkglist file, the rpms will be removed after the install of the sublist that the "--" appears in. - -Appendix B: File Format for .pkglist File -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The .pklist file is used to specify the rpm and the group/pattern names from os distro that will be installed on the nodes. It can contain the following types of entries: :: - - * rpm name without version numbers - * group/pattern name marked with a '@' (for full install only) - * rpms to removed after the installation marked with a "-" (for full install only) - -These are described in more details in the following sections. - -RPM Names -^^^^^^^^^ - -A simple .pkglist file just contains the the name of the rpm file without the version numbers. - -For example, :: - - openssl - xntp - rsync - glibc-devel.i686 - -Include pkglist Files -^^^^^^^^^^^^^^^^^^^^^ - -The #INCLUDE statement is supported in the pkglist file. - -You can group some rpms in a file and include that file in the pkglist file using #INCLUDE:# format. :: - - openssl - xntp - rsync - glibc-devel.1686 - #INCLUDE:/install/post/custom/rh/myotherlist# - -where /install/post/custom/rh/myotherlist is another package list file that follows the same format. - -Note: the trailing "#" character at the end of the line. It is important to specify this character for correct pkglist parsing. - -Group/Pattern Names -^^^^^^^^^^^^^^^^^^^ - -It is only supported for statefull deployment. - -In Linux, a groups of rpms can be packaged together into one package. It is called a **group** on RedHat, CentOS, Fedora and Scientific Linux. To get the a list of available groups, run :: - - yum grouplist - -On SLES, it is called a **pattern**. To list all the available patterns, run :: - - zypper se -t pattern - -You can specify in this file the group/pattern names by adding a '@' and a space before the group/pattern names. For example: :: - - @ base - -Remove RPMs After Installing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It is only supported for statefull deployment. - -You can specify in this file that certain rpms to be removed after installing the new software. This is done by adding '-' before the rpm names you want to remove. For example: :: - - wget - -Appendix C: Debugging Tips +Appendix : Debugging Tips -------------------------- -Internally updatenode command uses the xdsh in the following ways: +Internally updatenode command uses the xdsh in the following ways: Linux: xdsh -e /install/postscripts/xcatdsklspost -m -AIX: xdsh -e /install/postscripts/xcataixspost -m -c 1 +where is a comma separated postscript like ospkgs,otherpkgs etc. -where is a comma separated postscript like ospkgs,otherpkgs etc. - - * wget is used in xcatdsklspost/xcataixpost to get all the postscripts from the to the node. You can check /tmp/wget.log file on the node to see if wget was successful or not. You need to make sure the /xcatpost directory has enough space to hold the postscripts. - * A file called /xcatpost/mypostscript (Linux) or /xcatpost/myxcatpost_ (AIX) is created on the node which contains the environmental variables and scripts to be run. Please make sure this file exists and it contains correct info. You can also run this file on the node manually to debug. - * For ospkgs/otherpkgs, if /install is not mounted on the , it will download all the rpms from the to the node using wget. Please make sure /tmp and /xcatpost have enough space to hold the rpms and please check /tmp/wget.log for errors. - * For ospkgs/otherpkgs, If zypper or yum is installed on the node, it will be used the command to install the rpms. Please make sure to run createrepo on the source direcory on the every time a rpm is added or removed. Otherwise, the rpm command will be used, in this case, please make sure all the necessary depended rpms are copied in the same source directory. - * You can append -x on the first line of ospkgs/otherpkgs to get more debug info. + * wget is used in xcatdsklspost/xcataixpost to get all the postscripts from the to the node. You can check /tmp/wget.log file on the node to see if wget was successful or not. You need to make sure the /xcatpost directory has enough space to hold the postscripts. + * A file called /xcatpost/mypostscript (Linux) is created on the node which contains the environmental variables and scripts to be run. Please make sure this file exists and it contains correct info. You can also run this file on the node manually to debug. + * For ospkgs/otherpkgs, if /install is not mounted on the , it will download all the rpms from the to the node using wget. Please make sure /tmp and /xcatpost have enough space to hold the rpms and please check /tmp/wget.log for errors. + * For ospkgs/otherpkgs, If zypper or yum is installed on the node, it will be used the command to install the rpms. Please make sure to run createrepo on the source direcory on the every time a rpm is added or removed. Otherwise, the rpm command will be used, in this case, please make sure all the necessary depended rpms are copied in the same source directory. + * You can append -x on the first line of ospkgs/otherpkgs to get more debug info. From 2dc39ed47e8c4fcc4bb0c13af17c98fbd2a3b094 Mon Sep 17 00:00:00 2001 From: bybai Date: Mon, 31 Aug 2015 16:58:45 +0800 Subject: [PATCH 08/64] combine common parts from statefull and stateless --- .../manage_clusters/common/updatenode.rst | 20 ++----------------- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst index 7f6069cc9..9395c7424 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/updatenode.rst @@ -36,7 +36,7 @@ Install Additional non-OS Packages If you have additional packages (packages not in the distro) that you also want installed, make a directory to hold them, create a list of the packages you want installed, and add that information to the osimage definition. How to add Additional Other Packages, go to `Install Additional Other Packages `_ -Update Stateful Nodes +Update Nodes ^^^^^^^^^^^^^^^^^^^^^ Run the updatenode command to push the new software to the nodes: :: @@ -53,23 +53,7 @@ If you have a configuration script that is necessary to configure the new softwa The next time you re-install these nodes, the additional software will be automatically installed. -Update Stateless Nodes -^^^^^^^^^^^^^^^^^^^^^^ - -Run the updatenode command to push the new software to the nodes: :: - - updatenode -S - - -The -S flag updates the nodes with all the new or updated rpms specified in both .pkglist and .otherpkgs.pkglist. - -If you have a configuration script that is necessary to configure the new software, then instead run: :: - - cp myconfigscript /install/postscripts/ - chdef -p -t compute postbootscripts=myconfigscript - updatenode ospkgs,otherpkgs,myconfigscript - -**You must also do this next step**, otherwise the next time you reboot the stateless nodes, the new software won't be on the nodes. Run genimage and packimage to install the extra rpms into the image: :: +**If you update stateless nodes, you must also do this next step**, otherwise the next time you reboot the stateless nodes, the new software won't be on the nodes. Run genimage and packimage to install the extra rpms into the image: :: genimage packimage From 1356b06fb11b9af0bdeba548c3cd0fc31e34a66a Mon Sep 17 00:00:00 2001 From: bybai Date: Wed, 2 Sep 2015 04:53:35 -0400 Subject: [PATCH 09/64] add manage vm doc --- .../manage_clusters/common/kvm/manage_vm.rst | 318 +++++++++++++++++- 1 file changed, 316 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/kvm/manage_vm.rst b/docs/source/guides/admin-guides/manage_clusters/common/kvm/manage_vm.rst index ab5ec9ab7..f87abd3ad 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/kvm/manage_vm.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/kvm/manage_vm.rst @@ -4,5 +4,319 @@ Manage Virtual Machine Create Virtual Machine ---------------------- -Change Virtual Machine ----------------------- +Define Virtual node "vm1" +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Define virtual machine vm1, add it to xCAT under the vm group, its ip is x.x.x.x, use makehost to add hostname and ip into /etc/hosts file: :: + + mkdef vm1 groups=vm,all + chdef vm1 ip=x.x.x.x + makehosts vm1 + +Update DNS with this new node: :: + + makedns -n + makedns -a + +Define the attributes of virtual machine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Run the chdef command to change the following attributes for the vm1: + +1. Define the virtual cpu number: :: + + chdef vm1 vmcpus=2 + +2. Define the kvm hypervisor of the virtual machine vm1, it should be set to node001: :: + + chdef vm1 vmhost=node001 + +3. Define the virtual memory size, the unit is Megabit, for example, define 1G memory to the vm1: :: + + chdef vm1 vmmemory=1024 + + Note: For diskless node, the vmmemory should be set larger than 2048, otherwise the node cannot be booted up. + +4. Define the hardware management module: :: + + chdef vm1 mgt=kvm + +5. Define the virtual network card, it should be set to the bridge br0/virb0/default which defined in hypervisor. If no bridge was set explicitly, no network device will be created for the node vm1: :: + + chdef vm1 vmnics=br0 + +6. The vmnicnicmodel attribute is used to set the type and corresponding driver for the nic. If not set, the default value is 'virtio'. + :: + + chdef vm1 vmnicnicmodel=virtio + +7. Define the storage for the vm1, three formats for the storage source are supported. + + A. Create storage on a nfs server. + The format is 'nfs:///dir', that means the kvm disk files will be created at 'nfs:///dir': :: + + chdef vm1 vmstorage=nfs:///install/vms/ + + B. Create storage on a device of hypervisor + + The format is 'phy:/dev/sdb1': :: + + chdef vm1 vmstorage=phy:/dev/sdb1 + + C. Create storage on a directory of hypervisor + + The format is 'dir:/install/vms': :: + + chdef vm1 vmstorage=dir:///install/vms + + Note: The attribute vmstorage is only necessary for diskfull node. You can ignore it for diskless node. + +8. Define the console attributes for the virtual machine: :: + + chdef vm1 serialport=0 serialspeed=115200 + +9. (optional)For monitor the installing process from kimchi, set vidpassword value: :: + + chtab node=vm1 vm.vidpassword=abc123 + +10. Set 'netboot' attribute + + * **[x86_64]** + + :: + + chdef vm1 netboot=xnba + + * **[PPC64LE]** + :: + + chdef vm1 netboot=grub2 + + Make sure the grub2 had been installed on your Management Node: :: + + rpm -aq | grep grub2 + grub2-xcat-1.0-1.noarch + + Note: If you are working with xCAT-dep oldder than 20141012, the modules for xCAT shipped grub2 can not support ubuntu LE smoothly. So the following steps needed to complete the grub2 setting. :: + + rm /tftpboot/boot/grub2/grub2.ppc + cp /tftpboot/boot/grub2/powerpc-ieee1275/core.elf /tftpboot/boot/grub2/grub2.ppc + /bin/cp -rf /tmp/iso/boot/grub/powerpc-ieee1275/elf.mod /tftpboot/boot/grub2/powerpc-ieee1275/ + +Create the virtual machine +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Create the virtual machine kvm1 with 20G hard disk. :: + + mkvm vm1 -s 20G + +If the vm1 was created successfully, a hard disk file named vm1.hda.qcow2 can be found in vmstorage location. And you can run the lsdef vm1 to see whether the mac attribute has been set automatically. + +Create osimage object +^^^^^^^^^^^^^^^^^^^^^ + +After you download the OS ISO, refer to :ref:`create_img` to create osimage objects. + +Configure password for root in xCAT MN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + chtab key=system passwd.username=root passwd.password=xxxxxx + +Configure DHCP +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + makedhcp -n + makedhcp -a + +Set the boot state +^^^^^^^^^^^^^^^^^^^ +:: + + nodeset vm1 osimage= + +Power on the virtual machine to start OS installation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + rpower vm1 on + +If the vm1 was powered on successfully, you can get following information when running 'virsh list' on the kvm hypervisor node001. :: + + virsh list + Id Name State + -------------------------------- + 6 vm1 running + + +Use console to monitor the installing process +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can use console in xcat management node or kvm hypervisor to monitor the process. + +* On the kvm hypervisor you can use virsh to open text console: :: + + virsh console vm1 + +* Use rcons/wcons on the xCAT management node to open text console: :: + + makeconservercf vm1 + rcons vm1 + wcons vm1 + +* Connecting to the virtual machine's vnc console + + In order to connect to the virtual machine's console, you need to generate a new set of credentials. You can do it by running: :: + + xcatclient getrvidparms vm1 + vm1: method: kvm + vm1: textconsole: /dev/pts/0 + vm1: password: JOQTUtn0dUOBv9o3 + vm1: vidproto: vnc + vm1: server: kvmhost1 + vm1: vidport: 5900 + + Note: Now just pick your favorite vnc client and connect to the hypervisor, using the password generated by "getrvidparms". If the vnc client complains the password is not valid, it is possible that your hypervisor and headnode clocks are out of sync! You can sync them by running "ntpdate " on both the hypervisor and the headnode. + + +* Use wvid on the xCAT management node + + Make sure firewalld service had been stopped. :: + + chkconfig firewalld off + + Note: Forwarding request to systemctl will disable firewalld.service. :: + + rm /etc/systemd/system/basic.target.wants/firewalld.service + rm /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service + + Then, run wvid vm1 on MN:: + + wvid vm1 + +* For powerKVM, we can use kimchi to monitor the installing process + + Open "https://:8001" to open kimchi. There will be a “connect” button you can use below "Actions" button and input Password required:abc123 your have set before mkvm, then you could get the console. + + +Remove a virtual machine +------------------------ + +Remove the kvm1 even when it is in power on status. :: + + rmvm mv1 -f + +Remove the definition of kvm and related storage. :: + + rmvm vm1 -p + + +Clone a kvm node +---------------- + +Clone is a concept that create a new node from the old one by reuse most of data that has been installed on the old node. Before creating a new node, a vm (virtual machine) master must be created first. The new node will be created from the vm master. The new node can attach to the vm master or not. +The node can NOT be run without the vm master if choosing to make the node attach to the vm master. The advantage is that the less disk space is needed. + +**In attaching mode** + +In this mode, all the nodes will be attached to the vm master. Lesser disk space will be used than the general node. +Create the vm master kvmm from a node (vm1) and make the original node kvm2 attaches to the new created vm master: :: + + clonevm vm1 -t kvmm + vm1: Cloning vm1.hda.qcow2 (currently is 1050.6640625 MB and has a capacity of 4096MB) + vm1: Cloning of vm1.hda.qcow2 complete (clone uses 1006.74609375 for a disk size of 4096MB) + vm1: Rebasing vm1.hda.qcow2 from master + vm1: Rebased vm1.hda.qcow2 from master + +After the performing, you can see the following entry has been added into the vmmaster table. :: + + tabdump vmmaster + name,os,arch,profile,storage,storagemodel,nics,vintage,originator,comments,disable + "kvmm","rhels6","x86_64","compute","nfs:///vms/kvm",,"br0","Tue Nov 23 04:18:17 2010","root",, + +Clone a new node vm2 from vm master kvmm: :: + + clonevm vm2 -b kvmm + +**In detaching mode** + +Create a vm master that the original node detaches with the created vm master. :: + + clonevm vm2 -t kvmmd -d + vm2: Cloning vm2.hda.qcow2 (currently is 1049.4765625 MB and has a capacity of 4096MB) + vm2: Cloning of vm2.hda.qcow2 complete (clone uses 1042.21875 for a disk size of 4096MB) + +Clone the vm3 from the kvmmd with the detaching mode turn on: :: + + clonevm vm3 -b kvmmd -d + vm3: Cloning kvmmd.hda.qcow2 (currently is 1042.21875 MB and has a capacity of 4096MB) + +FAQ +--- + +1, libvirtd run into problem + + **Issue**: One error as following message: :: + + rpower kvm1 on + kvm1: internal error no supported architecture for os type 'hvm' + + **Solution**: This error was fixed by restarting libvirtd on the host machine: :: + + xdsh kvmhost1 service libvirtd restart + + Note: In any case that you find there is libvirtd error message in syslog, you can try to restart the libvirtd. + +2, Virtual disk has problem + + **Issue**: When running command 'rpower kvm1 on', get the following error message: :: + + kvm1: Error: unable to set user and group to '0:0' + on '/var/lib/xcat/pools/27f1df4b-e6cb-5ed2-42f2-9ef7bdd5f00f/kvm1.hda.qcow2': Invalid argument: + + **Solution**: try to figure out the nfs:// server was exported correctly. The nfs client should have root authority. + +3, VNC client complains the credentials are not valid + + **Issue**: When connecting to the hypervisor using VNC to get a VM console, the vnc client complains with "Authentication failed". + + **Solution**: Check if the clocks on your hypervisor and headnode are in sync! + +4, rpower fails with "qemu: could not open disk image /var/lib/xcat/pools/2e66895a-e09a-53d5-74d3-eccdd9746eb5/vmXYZ.hda.qcow2: Permission denied" error message + + **Issue**: When running rpower on a kvm vm, rpower complains with the following error message: :: + + rpower vm1 on + vm1: Error: internal error Process exited while reading console log output: char device redirected to /dev/pts/1 + qemu: could not open disk image /var/lib/xcat/pools/2e66895a-e09a-53d5-74d3-eccdd9746eb5/vm1.hda.qcow2: Permission denied: internal error Process exited while reading console log output: char device redirected to /dev/pts/1 + qemu: could not open disk image /var/lib/xcat/pools/2e66895a-e09a-53d5-74d3-eccdd9746eb5/vm1.hda.qcow2: Permission denied + [root@xcat xCAT_plugin]# + + **Solution**: This might be caused by bad permissions in your NFS server / client (where clients will not mount the share with the correct permissions). Systems like CentOS 6 will have NFS v4 support activated by default. This might be causing the above mentioned problems so one solution is to simply disable NFS v4 support in your NFS server by uncommenting the following option in /etc/sysconfig/nfs: :: + + RPCNFSDARGS="-N 4" + + Finish by restarting your NFS services (i.e. service nfsd restart) and try powering on your VM again... + Note: if you are running a stateless hypervisor, we advise you to purge the VM (rmvm -p vmXYZ), restart the hypervisor and "mkvm vmXYZ -s 4" to recreate the VM as soon as the hypervisor is up and running. + +5, Error: Cannot communicate via libvirt to + + **Issue**: This error mostly caused by the incorrect setting of the ssh tunnel between xCAT management node and . + + **Solution**: Check that xCAT MN could ssh to the without password. + +6, Cannot ping to the vm after the first boot of stateful install + + **Issue**: The new installed stateful vm node is not pingable after the first boot, you may see the following error message in the console when vm booting: :: + + ADDRCONF(NETDEV_UP): eth0 link is not ready. + + **Solutoin**: This issue may be caused by the incorrect driver for vm. You can try to change driver to 'virtio' by following steps: :: + + rmvm kvm1 + chdef kvm1 vmnicnicmodel=virtio + mkvm kvm1 + + From 1ef31f1418f3c4a2e7c02cfd6d987a17b6d9a179 Mon Sep 17 00:00:00 2001 From: bybai Date: Wed, 2 Sep 2015 04:54:53 -0400 Subject: [PATCH 10/64] add _diskful_installation label --- .../admin-guides/manage_clusters/ppc64le/diskful/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/guides/admin-guides/manage_clusters/ppc64le/diskful/index.rst b/docs/source/guides/admin-guides/manage_clusters/ppc64le/diskful/index.rst index 3e7c62d47..4b3907c02 100644 --- a/docs/source/guides/admin-guides/manage_clusters/ppc64le/diskful/index.rst +++ b/docs/source/guides/admin-guides/manage_clusters/ppc64le/diskful/index.rst @@ -1,3 +1,5 @@ +.. _diskful_installation: + Diskful Installation ==================== From 63101586e2697b426f94ab0115f00061f32cf2a7 Mon Sep 17 00:00:00 2001 From: bybai Date: Wed, 2 Sep 2015 04:55:21 -0400 Subject: [PATCH 11/64] add powerKVM doc --- .../ppc64le/virtual_machines/powerKVM.rst | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/ppc64le/virtual_machines/powerKVM.rst b/docs/source/guides/admin-guides/manage_clusters/ppc64le/virtual_machines/powerKVM.rst index 4c334720f..613070857 100644 --- a/docs/source/guides/admin-guides/manage_clusters/ppc64le/virtual_machines/powerKVM.rst +++ b/docs/source/guides/admin-guides/manage_clusters/ppc64le/virtual_machines/powerKVM.rst @@ -1,5 +1,28 @@ PowerKVM ======== -Install PoweKVM ---------------- +Install PowerKVM +---------------- + +The process to set up PowerKVM hypervisor with xCAT is the same with Diskfull installation. Prepare powerKVM iso, such as ibm-powerkvm-2.1.1.0-22.0-ppc64-gold-201410191558.iso, then refer to :ref:`diskful_installation` to install PowerKVM hypervisor. + +Check bridge setting after installation finished +------------------------------------------------ + +After PowerKVM hypervisor is installed successfully, you can get the bridge information: :: + + # brctl show + bridge name bridge id STP enabled interfaces + br0 8000.000000000000 no eth0 + +If the bridge show is not like above, it means that you may not run xCAT post install script. You can manually run following commands to create the bridge, for example: :: + + IPADDR=10.1.101.1/16 + brctl addbr br0 + brctl addif br0 eth0 + brctl setfd br0 0 + ip addr add dev br0 $IPADDR + ip link set br0 up + ip addr del dev eth0 $IPADDR + +Note: During ubuntu LE virtual machines installation, the virtual machines need to access Internet, so make sure the PowerKVM hypervisor is able to access Internet. From 2e43bd251e79d1674a410947e68d087d1d2fa00f Mon Sep 17 00:00:00 2001 From: litingt Date: Wed, 2 Sep 2015 06:19:23 -0400 Subject: [PATCH 12/64] add enable_kdump.rst,generate_img.rst,install_new_kernel.rst documentation --- .../common/deployment/enable_kdump.rst | 168 +++++++++++++++ .../common/deployment/generate_img.rst | 201 ++++++++++++++++++ .../common/deployment/install_new_kernel.rst | 82 +++++++ .../diskless/customize_image/index.rst | 2 +- .../customize_image/install_new_kernel.rst | 1 + 5 files changed, 453 insertions(+), 1 deletion(-) create mode 100644 docs/source/guides/admin-guides/manage_clusters/common/deployment/install_new_kernel.rst create mode 100644 docs/source/guides/admin-guides/manage_clusters/ppc64le/diskless/customize_image/install_new_kernel.rst diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst index 4152a6cf9..eeafb39b3 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/enable_kdump.rst @@ -1,2 +1,170 @@ Enable Kdump Over Ethernet ========================== + +Overview +-------- + +kdump is an advanced crash dumping mechanism. When enabled, the system is booted from the context of another kernel. This second kernel reserves a small amount of memory, and its only purpose is to capture the core dump image in case the system crashes. Since being able to analyze the core dump helps significantly to determine the exact cause of the system failure. + + +xCAT Interface +-------------- + +The pkglist, exclude and postinstall files location and name can be obtained by running the following command: :: + + lsdef -t osimage + +The pkglist file +---------------- + +For RHEL6 and RHEL7, there are two rpm packages for kdump: :: + + kexec-tools + crash + +For SLES11, there are 3 rpm packages for kdump: :: + + kdump + kexec-tools + makedumpfile + +For SLES10, there are 4 rpm packages for kdump: :: + + kernel-kdump + kexec-tools + kdump + makedumpfile + +Setup pkglist +------------- + +Before setting up kdump,the approprite rpms should be added to the pkglist file as found by running: :: + + lsdef -t osimage + +The exclude file +---------------- + +The base diskless image excludes the /boot directory, but it is required for kdump. Update the exlist file and remove the entry for /boot. Then run the packimage or liteimg command to update your image with the changes. + +The postinstall file +-------------------- + +The kdump will create a new initrd which used in the dumping stage. The /tmp or /var/tmp directory will be used as the temporary directory. These 2 directory only are allocated 10M space by default. You need to enlarge it to 200M. + +For RHELS6 or SLES10, modify the postinstall file to increase /tmp space: :: + + tmpfs /var/tmp tmpfs defaults,size=200m 0 2 + +For SLES11, modify the postinstall file to increase /tmp/space: :: + + tmpfs /tmp tmpfs defaults,size=200m 0 2 + +The dump attribute +------------------ + +In order to support kdump, the dump attribute was added into linuximage table, which is used to define the remote path where the crash information should be dumped to. Use the chdef command to change the image's dump attribute using the URI format. :: + + chdef -t osimage dump=nfs:/// + +The can be excluded if the destination NFS server is the service or management node. :: + + chdef -t osimage dump=nfs:/// + +The crashkernelsize attribute +----------------------------- + +For system x machine, on sles10 set the crashkernelsize attribute like this: :: + + chdef -t osimage crashkernelsize=M@16M + +On sles11 and rhels6 set the crashkernelsize attribute like this: :: + + chdef -t osimage crashkernelsize=M + +Where recommended value is 256. For more information about the size can refer to the following information: + ``_. + + ``_. + + ``_. + + ``_. + +For system p machine, set the crashkernelsize attribute to this: :: + + chdef -t osimage crashkernelsize=@32M + +Where recommended value is 256, more information can refer the kdump document for the system x. + +When your node starts, and you get a kdump start error like this: :: + + Your running kernel is using more than 70% of the amount of space you reserved for kdump, you should consider increasing your crashkernel + +You should modify this attribute using this chdef command: :: + + chdef -t osimage crashkernelsize=512M@32M + +If 512M@32M is not large enough, you should change the crashkernelsize larger like 1024M until the error message disappear. + +The enablekdump postscript +-------------------------- + +This postscript enablekdump is used to start the kdump service when the node is booting up. Add it to your nodes list of postscripts by running this command: :: + + chdef -t node -p postscripts=enablekdump + + +Notes +----- + +Currently, only NFS is supported for the setup of kdump. + +If the dump attribute is not set, the kdump service will not be enabled. + +Please make sure the NFS remote path(nfs:///) is exported and it is read-writeable to the node where kdump service is enabled. + +How to trigger kernel panic on Linux +------------------------------------ + +Normally, kernel panic() will trigger booting into capture kernel. Once the kernel panic is triggered, the node will reboot into the capture kernel, and a kernel dump (vmcore) will be automatically saved to the directory on the specified NFS server (). + +#. For RHESL6 the directory is /var/crash/-