diff --git a/confluent_client/doc/man/buildindex.sh b/confluent_client/doc/man/buildindex.sh new file mode 100644 index 00000000..7476716a --- /dev/null +++ b/confluent_client/doc/man/buildindex.sh @@ -0,0 +1 @@ +for i in *.ronn; do echo -n `head -n 1 $i|awk '{print $1}'`; echo " $i"; done > index.txt diff --git a/confluent_client/doc/man/confluent.ronn b/confluent_client/doc/man/confluent.ronn new file mode 100644 index 00000000..633cef86 --- /dev/null +++ b/confluent_client/doc/man/confluent.ronn @@ -0,0 +1,14 @@ +confluent(8) -- Start the confluent server +========================================================= + +## SYNOPSIS + +`confluent` + +## DESCRIPTION + +**confluent** is the name of the server daemon. It is normally run +through the init subsystem rather than executed directly. All confluent +commands connect to confluent daemon. It provides the web interface, debug, +and unix socket connectivity. + diff --git a/confluent_client/doc/man/confluentdbutil.ronn b/confluent_client/doc/man/confluentdbutil.ronn new file mode 100644 index 00000000..e7dc9d39 --- /dev/null +++ b/confluent_client/doc/man/confluentdbutil.ronn @@ -0,0 +1,26 @@ +confluentdbutil(8) -- Backup or restore confluent database +========================================================= + +## SYNOPSIS + + +`confluentdbutil [options] ` + +## DESCRIPTION + +**confluentdbutil** is a utility to export/import the confluent attributes +to/from json files. The path is a directory that holds the json version. +In order to perform restore, the confluent service must not be running. It +is required to indicate how to treat the usernames/passwords are treated in +the json files (password protected, removed from the files, or unprotected). + +## OPTIONS + +* `-p`, `--password`: + If specified, information such as usernames and passwords will be encrypted + using the given password. +* `-r`, `--redact`: + Indicates to replace usernames and passwords with a dummy string rather + than included. +* `-u`, `--unprotected`: + The keys.json file will include the encryption keys without any protection. \ No newline at end of file diff --git a/confluent_client/doc/man/nodeattribexpressions.ronn b/confluent_client/doc/man/nodeattribexpressions.ronn new file mode 100644 index 00000000..61277bad --- /dev/null +++ b/confluent_client/doc/man/nodeattribexpressions.ronn @@ -0,0 +1,64 @@ +nodeattribexpressions(5) -- Confluent attribute expression syntax +================================================================= + +## DESCRIPTION + +In confluent, any attribute may either be a straightforward value, or an +expression to generate the value. + +An expression will contain some directives wrapped in `{}` characters. Within +`{}` are a number of potential substitute values and operations. + +The most common operation is to extract a number from the nodename. These +values are available as n1, n2, etc. So for example attributes for a node named +b1o2r3u4 would have {n1} as 1, {n2} as 2, {n3} as 3, and {n4} as 4. +Additionally, {n0} is special as representing the last number in a name, so in +the b1o2r3u4 example, {n0} would be 4. + +Frequently a value derives from a number in the node name, but must undergo a +transform to be useful. As an example, if we have a scheme where nodes are +numbered n1-n512, and they are arranged 1-42 in rack1, 43-84 in rack2, and so +forth, it is convenient to perform arithmetic on the extracted number. Here is +an example of codifying the above scheme, and setting the u to the remainder: + +`location.rack=rack{(n1-1)/42+1}` +`location.u={(n1-1)%42+1}` + +Note how text may be mixed into expressions, only data within {} will receive +special treatment. Here we also had to adjust by subtracting 1 and adding it +back to make the math work as expected. + +It is sometimes the case that the number must be formatted a different way, +either specifying 0 padding or converting to hexadecimal. This can be done by a +number of operators at the end to indicate formatting changes. + +`{n1:02x} - Zero pad to two decimal places, and convert to hexadecimal, as mightbe used for generating MAC addresses` +`{n1:x} - Hexadecimal without padding, as may be used in a generated IPv6 address` +`{n1:X} - Uppercase hexadecimal` +`{n1:02d} - Zero pad a normal numeric representation of the number.` + +Another common element to pull into an expression is the node name in whole: + +`hardwaremanagement.manager={nodename}-imm` + +Additionally other attributes may be pulled in: + +`hardwaremanagement.switchport={location.u}` + +Multiple expressions are permissible within a single attribute: + +`hardwaremanagement.manager={nodename}-{hardwaremanagement.method}` + +A note to developers: in general the API layer will automatically recognize a +generic set attribute to string with expression syntax and import it as an +expression. For example, submitting the following JSON: + +`{ 'location.rack': '{n1}' }` + +Will auto-detect {n1} as an expression and assign it normally. If wanting to +set that value verbatim, it can either be escaped by doubling the {} or by +explicitly declaring it as a value: + +`{ 'location.rack': '{{n1}}' }` + +`{ 'location.rack': { 'value': '{n1}' } }` diff --git a/confluent_client/doc/man/nodeboot.ronn b/confluent_client/doc/man/nodeboot.ronn new file mode 100644 index 00000000..9b067556 --- /dev/null +++ b/confluent_client/doc/man/nodeboot.ronn @@ -0,0 +1,34 @@ +nodeboot(8) -- Reboot a confluent node to a specific device +========================================================= + +## SYNOPSIS + +`nodeboot ` +`nodeboot ` [net|setup] + +## DESCRIPTION + +**nodeboot** reboots nodes in a noderange. If an additional argument is given, +it sets the node to specifically boot to that as the next boot. + +## EXAMPLES +* Booting n3 and n4 to the default boot behavior: + `# nodeboot n3-n4` + `n3: default` + `n4: default` + `n3: on->reset` + `n4: on->reset` + +* Booting n1 and n2 to setup menu: + `# nodeboot n1-n2 setup` + `n2: setup` + `n1: setup` + `n2: on->reset` + `n1: on->reset` + +* Booting n3 and n4 to network: + `# nodeboot n3-n4 net` + `n3: network` + `n4: network` + `n4: on->reset` + `n3: off->on` diff --git a/confluent_client/doc/man/nodeeventlog.ronn b/confluent_client/doc/man/nodeeventlog.ronn new file mode 100644 index 00000000..69e78480 --- /dev/null +++ b/confluent_client/doc/man/nodeeventlog.ronn @@ -0,0 +1,33 @@ +nodeeventlog(8) -- Pull eventlog from confluent nodes +============================================================ + +## SYNOPSIS + +`nodeeventlog ` +`nodeeventlog [clear]` + +## DESCRIPTION + +`nodeeventlog` pulls and optionally clears the event log from the requested +noderange. + +## EXAMPLES +* Pull the event log from n2 and n3: + `# nodeeventlog n2,n3` + `n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` + `n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` + `n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` + `n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` + +* Pull and clear the event log from n2 and n3: +`# nodeeventlog n2,n3 clear` +`n2: 05/03/2017 11:44:25 Event Log Disabled - SEL Fullness - Log clear` +`n2: 05/03/2017 11:44:56 System Firmware - Progress - Unspecified` +`n2: 05/03/2017 11:48:29 System Firmware - Progress - Starting OS boot` +`n3: 05/03/2017 11:44:39 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:45:00 System Firmware - Progress - Unspecified` +`n3: 05/03/2017 11:47:22 System Firmware - Progress - Starting OS boot` +`# nodeeventlog n2,n3` +`n2: 05/03/2017 11:48:48 Event Log Disabled - SEL Fullness - Log clear` +`n3: 05/03/2017 11:48:52 Event Log Disabled - SEL Fullness - Log clear` diff --git a/confluent_client/doc/man/nodefirmware.ronn b/confluent_client/doc/man/nodefirmware.ronn new file mode 100644 index 00000000..4aa7bf83 --- /dev/null +++ b/confluent_client/doc/man/nodefirmware.ronn @@ -0,0 +1,30 @@ +nodefirmware(8) -- Report firmware information on confluent nodes +================================================================= + +## SYNOPSIS + +`nodefirmware ` + +## DESCRIPTION + +`nodefirmware` reports various pieces of firmware on confluent nodes. + +## EXAMPLES + +* Pull firmware from a node: +`# nodefirmware r1` +`r1: IMM: 3.70 (TCOO26H 2016-11-29T05:09:51)` +`r1: IMM Backup: 1.71 (TCOO10D 2015-04-17T00:00:00)` +`r1: IMM Trusted Image: TCOO26H` +`r1: UEFI: 2.31 (TCE128I 2016-12-13T00:00:00)` +`r1: UEFI Backup: 2.20 (TCE126O)` +`r1: FPGA: 3.2.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Bootcode: 1.38` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller MBA: 16.8.0` +`r1: Broadcom NetXtreme Gigabit Ethernet Controller Firmware Package: 0.0.0a` +`r1: ServeRAID M1215 MegaRAID Controller Firmware: 24.12.0-0038 (2016-10-20T00:00:00)` +`r1: ServeRAID M1215 Disk 28 MBF2600RC: SB2C` +`r1: ServeRAID M1215 Disk 29 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 0 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 1 MBF2600RC: SB2C` +`r1: ServeRAID M5210 Disk 2 MBF2600RC: SB2C` diff --git a/confluent_client/doc/man/nodegroupattrib.ronn b/confluent_client/doc/man/nodegroupattrib.ronn new file mode 100644 index 00000000..57ada27c --- /dev/null +++ b/confluent_client/doc/man/nodegroupattrib.ronn @@ -0,0 +1,42 @@ +nodegroupattrib(8) -- List or change confluent nodegroup attributes +=================================================================== + +## SYNOPSIS + +`nodegroupattrib [ current | all ]` +`nodegroupattrib [...]` +`nodegroupattrib [ ...]` +`nodegroupattrib [-c] [ ...]` + +## DESCRIPTION + +`nodegroupattrip` queries the confluent server to get information about nodes. +In the simplest form, it simply takes the given group and lists the attributes of that group. + +Contrasted with nodeattrib(8), settings managed by nodegroupattrib will be added +and removed from a node as it is added or removed from a group. If an attribute +is set using nodeattrib(8) against a noderange(5) that happens to be a group name, +nodeattrib(8) individually sets attributes directly on each individual node that is +currently a member of that group. Removing group membership or adding a new +node after using the nodeattrib(8) command will not have attributes change automatically. +It's easiest to see by using the `nodeattrib -b` to understand how +the attributes are set on the node versus a group to which a node belongs. + +## OPTIONS + +* `-c`, `--clear`: + Clear specified nodeattributes. + +## EXAMPLES + +* Show attributes of a group called `demogrp`: + `# nodegroupattrib demogrp` + `demogrp: hardwaremanagement.manager: (will derive from expression 10.30.{n0/255}.{n0%255})` + `demogrp: nodes: n12,n13,n10,n11,n9,n1,n2,n3,n4` + +* Set location.u to be the remainder of first number in node name when divided by 42: + `# nodegroupattrib demogrp location.u={n1%42}` + +## SEE ALSO + +nodeattrib(8), nodeattribexpressions(5) diff --git a/confluent_client/doc/man/nodehealth.ronn b/confluent_client/doc/man/nodehealth.ronn new file mode 100644 index 00000000..b8baa722 --- /dev/null +++ b/confluent_client/doc/man/nodehealth.ronn @@ -0,0 +1,22 @@ +nodehealth(8) -- Show health summary of confluent nodes +======================================================== + +## SYNOPSIS + +`nodehealth ` + +## DESCRIPTION + +`nodehealth` reports the current health assessment of a confluent node. It +will report either `ok`, `warning`, `critical`, or `failed`, along with +a string explaining the reason for any result other than `ok`. + +## EXAMPLES + +* Pull health summary of 5 nodes: + `# nodehealth n1-n4,r1` + `n1: critical (Mezz Exp 2 Fault:Critical)` + `n3: ok` + `n2: ok` + `r1: ok` + `n4: ok` diff --git a/confluent_client/doc/man/nodeidentify.ronn b/confluent_client/doc/man/nodeidentify.ronn new file mode 100644 index 00000000..3a9cbe69 --- /dev/null +++ b/confluent_client/doc/man/nodeidentify.ronn @@ -0,0 +1,31 @@ +nodeidentify(8) -- Control the identify LED of confluent nodes +========================================================= + +## SYNOPSIS + +`nodidentify [on|off]` + +## DESCRIPTION + +`nodeidentify` allows you to turn on or off the location LED of conflueunt nodes, +making it easier to determine the physical location of the nodes. The following +options are supported: + +* `on`: Turn on the identify LED +* `off`: Turn off the identify LED + +## EXAMPLES: + +* Turn on the identify LED on nodes n1 through n4: + `# nodeidentify n1-n4 on` + `n1: on` + `n2: on` + `n3: on` + `n4: on` + +* Turn off the identify LED on nodes n1 thorugh n4: + `# nodeidentify n1-n4 off` + `n1: off` + `n2: off` + `n4: off` + `n3: off` diff --git a/confluent_client/doc/man/nodeinventory.ronn b/confluent_client/doc/man/nodeinventory.ronn new file mode 100644 index 00000000..802b1c87 --- /dev/null +++ b/confluent_client/doc/man/nodeinventory.ronn @@ -0,0 +1,106 @@ +nodeinventory(8) -- Get hardware inventory of confluent node +=============================================================== + +## SYNOPSIS + +`nodeinventory ` + +## DESCRIPTION + +`nodeinventory` pulls information about hardware of a node. This includes +information such as adapters, serial numbers, processors, and memory modules, +as supported by the platforms hardware management implementation. + +## EXAMPLES + +* Pulling inventory of a node named r1: + `# nodeinventory r1` + `r1: System MAC Address 1: 40:f2:e9:af:45:a0` + `r1: System MAC Address 2: 40:f2:e9:af:45:a1` + `r1: System MAC Address 3: 40:f2:e9:af:45:a2` + `r1: System MAC Address 4: 40:f2:e9:af:45:a3` + `r1: System Board manufacturer: IBM` + `r1: System Product name: System x3650 M5` + `r1: System Device ID: 32` + `r1: System Revision: 9` + `r1: System Product ID: 323` + `r1: System Board model: 00KG915` + `r1: System Device Revision: 0` + `r1: System Serial Number: E2K4831` + `r1: System Board manufacture date: 2014-10-20T12:00` + `r1: System Board serial number: Y010UF4AL0B5` + `r1: System Manufacturer: IBM` + `r1: System FRU Number: 00FK639` + `r1: System Board product name: System Board` + `r1: System Model: 5462AC1` + `r1: System UUID: 1B29CE46-765E-31A3-A3B9-B5FB934F15AB` + `r1: System Hardware Version: 0x0000` + `r1: System Manufacturer ID: 20301` + `r1: System Chassis serial number: E2K4831` + `r1: System Asset Number: ` + `r1: System Chassis type: Other` + `r1: Power Supply 1 Board model: 94Y8136` + `r1: Power Supply 1 Board manufacturer: EMER` + `r1: Power Supply 1 FRU Number: 94Y8137` + `r1: Power Supply 1 Board product name: IBM Designed Device` + `r1: Power Supply 1 Board manufacture date: 2014-11-08T00:00` + `r1: Power Supply 1 Board serial number: K13814B88ED` + `r1: Power Supply 1 Revision: 49` + `r1: Power Supply 2: Not Present` + `r1: DASD Backplane 1 Board model: 00JY139` + `r1: DASD Backplane 1 Board manufacturer: WIST` + `r1: DASD Backplane 1 FRU Number: 00FJ756` + `r1: DASD Backplane 1 Board product name: IBM Designed Device` + `r1: DASD Backplane 1 Board manufacture date: 2014-08-28T00:00` + `r1: DASD Backplane 1 Board serial number: Y011UF48W02U` + `r1: DASD Backplane 1 Revision: 0` + `r1: DASD Backplane 2: Not Present` + `r1: DASD Backplane 3: Not Present` + `r1: DASD Backplane 4: Not Present` + `r1: DASD Backplane 5 Board model: 00YJ530` + `r1: DASD Backplane 5 Board manufacturer: WIST` + `r1: DASD Backplane 5 FRU Number: 00AL953` + `r1: DASD Backplane 5 Board product name: IBM Designed Device` + `r1: DASD Backplane 5 Board manufacture date: 2016-02-04T00:00` + `r1: DASD Backplane 5 Board serial number: Y010UF624024` + `r1: DASD Backplane 5 Revision: 0` + `r1: DASD Backplane 6: Not Present` + `r1: CPU 1 Hardware Version: Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz` + `r1: CPU 1 Asset Number: Unknown` + `r1: CPU 1 Manufacturer: Intel(R) Corporation` + `r1: CPU 2: Not Present` + `r1: ML2 Card: Not Present` + `r1: DIMM 1: Not Present` + `r1: DIMM 2: Not Present` + `r1: DIMM 3: Not Present` + `r1: DIMM 4: Not Present` + `r1: DIMM 5: Not Present` + `r1: DIMM 6: Not Present` + `r1: DIMM 7: Not Present` + `r1: DIMM 8: Not Present` + `r1: DIMM 9: Not Present` + `r1: DIMM 10: Not Present` + `r1: DIMM 11: Not Present` + `r1: DIMM 12: Not Present` + `r1: DIMM 13: Not Present` + `r1: DIMM 14: Not Present` + `r1: DIMM 15: Not Present` + `r1: DIMM 16: Not Present` + `r1: DIMM 17: Not Present` + `r1: DIMM 18: Not Present` + `r1: DIMM 19: Not Present` + `r1: DIMM 20: Not Present` + `r1: DIMM 21: Not Present` + `r1: DIMM 22: Not Present` + `r1: DIMM 23: Not Present` + `r1: DIMM 24: Not Present` + `r1: X8 PCI 1: Not Present` + `r1: X8 PCI 2: Not Present` + `r1: X8 PCI 6: Not Present` + `r1: X8 PCI 7: Not Present` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 1: 40:f2:e9:af:45:a0` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 2: 40:f2:e9:af:45:a1` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 3: 40:f2:e9:af:45:a2` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller MAC Address 4: 40:f2:e9:af:45:a3` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller PCI slot: 1b:00` + `r1: Broadcom NetXtreme Gigabit Ethernet Controller location: Onboard` diff --git a/confluent_client/doc/man/nodepower.ronn b/confluent_client/doc/man/nodepower.ronn new file mode 100644 index 00000000..8249c95b --- /dev/null +++ b/confluent_client/doc/man/nodepower.ronn @@ -0,0 +1,43 @@ +nodepower(8) -- Check or change power state of confluent nodes +========================================================= + +## SYNOPSIS + +`nodepower ` +`nodepower [on|off|boot|shutdown|reset|status]` + +## DESCRIPTION + +**nodepower** with only a noderange will retrieve current power state of nodes +through confluent. When given an additional argument, it will request a change +to the power state of the nodes. The following arguments are recognized: + +* `on`: Turn on the specified noderange. Nothing will happen to nodes of +the noderange that are already on. +* `off`: Immediately turn off the specified noderange, without waiting for OS +to shutdown. Nothing will happen to nodes of the noderange that are already on. +* `boot`: Immediately boot a system. This will power on nodes of the noderange +that are off, and reset nodes of the noderange that are on. The previous state +will be reflected in the output. +* `shutdown`: Request the OS gracefully shut down. Nothing will happen for +nodes that are off, and nodes will not shutdown if the OS fails to gracefully +respond. +* `reset`: Request immediate reset of nodes of the noderange. Nodes that are +off will not react to this request. +* `status`: Behave identically to having no argument passed at all. + +## EXAMPLES +* Get power state of nodes n1 through n4: + `# nodepower n1-n4` + `n1: on` + `n2: on` + `n3: on` + `n4: off` + + +* Forcing a reboot of nodes n1-n4: + `# nodepower n1-n4 boot` + `n3: on->reset` + `n1: on->reset` + `n2: on->reset` + `n4: off->on` diff --git a/confluent_client/doc/man/noderun.ronn b/confluent_client/doc/man/noderun.ronn new file mode 100644 index 00000000..626bb685 --- /dev/null +++ b/confluent_client/doc/man/noderun.ronn @@ -0,0 +1,53 @@ +noderun(8) -- Run arbitrary commands per node in a noderange +============================================================= + +## SYNOPSIS + +`noderun ` + +## DESCRIPTION + +`noderun` will take a given command and execute it in parallel once per node +in the specified noderange. Attribute expressions as documented in +nodeattribexpressions(5) are expanded prior to execution of the command. For +noderun, the commands are locally executed. To execute commands on the nodes +themselves, see nodeshell(8). + +## EXAMPLES + +* Run ping against nodes n1 through n4: + `# noderun n1-n4 ping -c 1 {nodename}` + `n3: PING n3 (172.30.2.3) 56(84) bytes of data.` + `n3: 64 bytes from n3 (172.30.2.3): icmp_seq=1 ttl=64 time=0.387 ms` + `n3: ` + `n3: --- n3 ping statistics ---` + `n3: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n3: rtt min/avg/max/mdev = 0.387/0.387/0.387/0.000 ms` + `n4: PING n4 (172.30.2.4) 56(84) bytes of data.` + `n4: 64 bytes from n4 (172.30.2.4): icmp_seq=1 ttl=64 time=0.325 ms` + `n4: ` + `n4: --- n4 ping statistics ---` + `n4: 1 packets transmitted, 1 received, 0% packet loss, time 0ms` + `n4: rtt min/avg/max/mdev = 0.325/0.325/0.325/0.000 ms` + `n2: PING n2 (172.30.2.2) 56(84) bytes of data.` + `n2: From odin (172.30.0.6) icmp_seq=1 Destination Host Unreachable` + `n2: ` + `n2: --- n2 ping statistics ---` + `n2: 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 3000ms` + `n2: ` + `n1: PING n1 (172.30.2.1) 56(84) bytes of data.` + `n1: ` + `n1: --- n1 ping statistics ---` + `n1: 1 packets transmitted, 0 received, 100% packet loss, time 10000ms` + `n1: ` + +* Run an ipmitool raw command against the management controllers of n1 through n4: + `# noderun n1-n4 ipmitool -I lanplus -U USERID -E -H {hardwaremanagement.manager} raw 0 1` + `n3: 01 10 00` + `n1: 01 10 00` + `n4: 01 10 00` + `n2: 01 10 00` + +## SEE ALSO + +nodeshell(8) diff --git a/confluent_client/doc/man/nodesetboot.ronn b/confluent_client/doc/man/nodesetboot.ronn new file mode 100644 index 00000000..349a9a47 --- /dev/null +++ b/confluent_client/doc/man/nodesetboot.ronn @@ -0,0 +1,69 @@ +nodesetboot(8) -- Check or set next boot device for noderange +==================================================== + +## SYNOPSIS + +`nodesetboot ` +`nodesetboot [options] [default|cd|network|setup|hd]` + +## DESCRIPTION + +Requests that the next boot occur from the specified device. Unless otherwise +specified, this is a one time boot option, and does not change the normal boot +behavior of the system. This is useful for taking a system that normally boots +to the hard drive and startking a network install, or to go into the firmware +setup menu without having to hit a keystroke at the correct time on the console. + +Generally, it's a bit more convenient and direct to use the nodeboot(8) command, +which will follow up the boot device with an immediate power directive to take +effect. The `nodesetboot` command is still useful, particularly if you want +to use `nodesetboot setup` and then initiate a reboot from within +the operating system with ssh or similar rather than using the remote hardware +control. + +## OPTIONS + +* `-b`, `--bios`: + For a system that supports both BIOS and UEFI style boot, request BIOS style + boot if supported (some platforms will UEFI boot with this flag anyway). + +* `-p`, `--persist`: + For a system that supports it, mark the boot override to persist rather than + be a one time change. Many systems do not support this functionality. + +* `default`: + Request a normal default boot with no particular device override + +* `cd`: + Request boot from media. Note that this can include physical CD, + remote media mounted as CD/DVD, and detachable hard disks drives such as usb + key devices. + +* `network`: + Request boot to network + +* `setup`: + Request to enter the firmware configuration menu (e.g. F1 setup) on next boot. + +* `hd`: + Boot straight to hard disk drive + +## EXAMPLES + +* Set next boot to setup for four nodes: + `# nodesetboot n1-n4 setup` + `n1: setup` + `n3: setup` + `n2: setup` + `n4: setup` + +* Check boot override settings on four nodes: + `# nodesetboot n1-n4` + `n1: setup` + `n2: setup` + `n3: setup` + `n4: setup` + +## SEE ALSO + +nodeboot(8) diff --git a/confluent_client/doc/man/nodeshell.ronn b/confluent_client/doc/man/nodeshell.ronn new file mode 100644 index 00000000..7dd8c5ed --- /dev/null +++ b/confluent_client/doc/man/nodeshell.ronn @@ -0,0 +1,28 @@ +nodeshell(8) -- Execute command on many nodes in a noderange through ssh +========================================================================= + +## SYNOPSIS + +`nodeshell ` + +## DESCRIPTION + +Allows execution of a command on many nodes in parallel. Like noderun(8), it +accepts and interpolates confluent attribute expressions as documented in +nodeattribexpressions(5). + +## EXAMPLES + +* Running `echo hi` on for nodes: + `# nodeshell n1-n4 echo hi` + `n1: hi` + `n2: hi` + `n3: hi` + `n4: hi` + +* Setting a new static ip address temporarily on secondary interface of four nodes: + `# nodeshell n1-n4 ifconfig eth1 172.30.93.{n1}` + +## SEE ALSO + +noderun(8) diff --git a/confluent_client/makeman b/confluent_client/makeman new file mode 100644 index 00000000..feb804a0 --- /dev/null +++ b/confluent_client/makeman @@ -0,0 +1,8 @@ +#!/bin/sh +cd `dirname $0`/doc/man +mkdir -p ../../man/man5 +mkdir -p ../../man/man8 +ronn -r *.ronn +mv *.5 ../../man/man5/ +mv *.8 ../../man/man8/ + diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py new file mode 100644 index 00000000..11e25666 --- /dev/null +++ b/confluent_server/confluent/discovery/core.py @@ -0,0 +1,850 @@ +# Copyright 2016-2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This manages the detection and auto-configuration of nodes. +# Discovery sources may implement scans and may be passive or may provide +# both. + +# The phases and actions: +# - Detect - Notice the existance of a potentially supported target +# - Potentially apply a secure replacement for default credential +# (perhaps using some key identifier combined with some string +# denoting temporary use, and use confluent master integrity key +# to generate a password in a formulaic way?) +# - Do some universal reconfiguration if applicable (e.g. if something is +# part of an enclosure with an optionally enabled enclosure manager, +# check and request enclosure manager enablement +# - Throughout all of this, at this phase no sensitive data is divulged, +# only using credentials that are factory default or equivalent to +# factory default +# - Request transition to Locate +# - Locate - Use available cues to ascertain the physical location. This may +# be mac address lookup through switch or correlated by a server +# enclosure manager. If the location data suggests a node identity, +# then proceed to the 'verify' state +# - Verify - Given the current information and candidate upstream verifier, +# verify the authenticity of the servers claim in an automated way +# if possible. A few things may happen at this juncture +# - Verification outright fails (confirmed negative response) +# - Audit log entry created, element is not *allowed* to +# proceed +# - Verification not possible (neither good or bad) +# - If security policy is set to low, proceed to 'Manage' +# - Otherwise, log the detection event and stop (user +# would then manually bless the endpoint if applicable +# - Verification succeeds +# - If security policy is set to strict (or manual, whichever +# word works best, note the successfull verification, but +# do not manage +# - Otherwise, proceed to 'Manage' +# -Pre-configure - Given data up to this point, try to do some pre-config. +# For example, if located and X, then check for S, enable S +# This happens regardless of verify, as verify may depend on +# S +# - Manage +# - Create the node if autonode (Deferred) +# - If there is not a defined ip address, collect the current LLA and use +# that value. +# - If no username/password defined, generate a unique password, 20 bytes +# long, written to pass most complexity rules (15 random bytes, base64, +# retry until uppercase, lowercase, digit, and symbol all present) +# - Apply defined configuration to endpoint + +import confluent.config.configmanager as cfm +import confluent.discovery.protocols.pxe as pxe +#import confluent.discovery.protocols.ssdp as ssdp +import confluent.discovery.protocols.slp as slp +import confluent.discovery.handlers.imm as imm +import confluent.discovery.handlers.pxe as pxeh +import confluent.discovery.handlers.smm as smm +import confluent.discovery.handlers.xcc as xcc +import confluent.exceptions as exc +import confluent.log as log +import confluent.messages as msg +import confluent.networking.macmap as macmap +import confluent.noderange as noderange +import confluent.util as util +import traceback + +import eventlet +import eventlet.semaphore + +class nesteddict(dict): + + def __missing__(self, key): + v = self[key] = nesteddict() + return v + +nodehandlers = { + 'service:lenovo-smm': smm, + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': xcc, + 'service:management-hardware.IBM:integrated-management-module2': imm, + 'pxe-client': pxeh, +} + +servicenames = { + 'pxe-client': 'pxe-client', + 'service:lenovo-smm': 'lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller': 'lenovo-xcc', + 'service:management-hardware.IBM:integrated-management-module2': 'lenovo-imm2', +} + +servicebyname = { + 'pxe-client': 'pxe-client', + 'lenovo-smm': 'service:lenovo-smm', + 'lenovo-xcc': 'service:management-hardware.Lenovo:lenovo-xclarity-controller', + 'lenovo-imm2': 'service:management-hardware.IBM:integrated-management-module2', +} +# Passive-only auto-detection protocols: +# PXE + +# Both passive and active +# SLP (passive mode listens for SLP DA and unicast interrogation of the system) +# mDNS +# SSD + +# Also there are location providers +# Switch +# chassis +# chassis may in turn describe more chassis + +# We normalize discovered node data to the following pieces of information: +# * Detected node name (if available, from switch discovery or similar or +# auto generated node name. +# * Model number +# * Model name +# * Serial number +# * System UUID (in x86 space, specifically whichever UUID would be in DMI) +# * Network interfaces and addresses +# * Switch connectivity information +# * enclosure information +# * Management TLS fingerprint if validated (switch publication or enclosure) +# * System TLS fingerprint if validated (switch publication or system manager) + + +#TODO: by serial, by uuid, by node +known_info = {} +known_services = {} +known_serials = {} +known_nodes = nesteddict() +unknown_info = {} +pending_nodes = {} + + +def send_discovery_datum(info): + addresses = info.get('addresses', []) + yield msg.KeyValueData({'nodename': info.get('nodename', '')}) + yield msg.KeyValueData({'ipaddrs': [x[0] for x in addresses]}) + yield msg.KeyValueData({'serialnumber': info.get('serialnumber', '')}) + yield msg.KeyValueData({'modelnumber': info.get('modelnumber', '')}) + yield msg.KeyValueData({'macs': [info.get('hwaddr', '')]}) + types = [] + for infotype in info.get('services', []): + if infotype in servicenames: + types.append(servicenames[infotype]) + yield msg.KeyValueData({'types': types}) + + +def _info_matches(info, criteria): + model = criteria.get('by-model', None) + devtype = criteria.get('by-type', None) + node = criteria.get('by-node', None) + serial = criteria.get('by-serial', None) + status = criteria.get('by-state', None) + if model and info.get('modelnumber', None) != model: + return False + if devtype and devtype not in info.get('services', []): + return False + if node and info.get('nodename', None) != node: + return False + if serial and info.get('serialnumber', None) != serial: + return False + if status and info.get('discostatus', None) != status: + return False + return True + + +def list_matching_nodes(criteria): + retnodes = [] + for node in known_nodes: + for mac in known_nodes[node]: + info = known_info[mac] + if _info_matches(info, criteria): + retnodes.append(node) + break + retnodes.sort(key=noderange.humanify_nodename) + return [msg.ChildCollection(node + '/') for node in retnodes] + + +def list_matching_serials(criteria): + for serial in sorted(list(known_serials)): + info = known_serials[serial] + if _info_matches(info, criteria): + yield msg.ChildCollection(serial + '/') + + +def list_matching_states(criteria): + return [msg.ChildCollection(x) for x in ('discovered/', 'identified/', + 'unidentified/')] + +def list_matching_macs(criteria): + for mac in sorted(list(known_info)): + info = known_info[mac] + if _info_matches(info, criteria): + yield msg.ChildCollection(mac.replace(':', '-')) + + +def list_matching_types(criteria): + rettypes = [] + for infotype in known_services: + typename = servicenames[infotype] + if ('by-model' not in criteria or + criteria['by-model'] in known_services[infotype]): + rettypes.append(typename) + return [msg.ChildCollection(typename + '/') + for typename in sorted(rettypes)] + + +def list_matching_models(criteria): + for model in sorted(list(detected_models())): + if ('by-type' not in criteria or + model in known_services[criteria['by-type']]): + yield msg.ChildCollection(model + '/') + + +def show_info(mac): + mac = mac.replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException(mac + ' not a known mac address') + for i in send_discovery_datum(known_info[mac]): + yield i + + +list_info = { + 'by-node': list_matching_nodes, + 'by-serial': list_matching_serials, + 'by-type': list_matching_types, + 'by-model': list_matching_models, + 'by-mac': list_matching_macs, + 'by-state': list_matching_states, +} + +multi_selectors = set([ + 'by-type', + 'by-model', + 'by-state', +]) + + +node_selectors = set([ + 'by-node', + #'by-uuid', + 'by-serial', +]) + + +single_selectors = set([ + 'by-mac', +]) + + +def _parameterize_path(pathcomponents): + listrequested = False + childcoll = True + if len(pathcomponents) % 2 == 1: + listrequested = pathcomponents[-1] + pathcomponents = pathcomponents[:-1] + pathit = iter(pathcomponents) + keyparams = {} + validselectors = multi_selectors | node_selectors | single_selectors + for key, val in zip(pathit, pathit): + if key not in validselectors: + raise exc.NotFoundException('{0} is not valid here'.format(key)) + if key == 'by-type': + keyparams[key] = servicebyname.get(val, None) + else: + keyparams[key] = val + validselectors.discard(key) + if key in single_selectors: + childcoll = False + validselectors = set([]) + elif key in node_selectors: + validselectors = single_selectors | set([]) + return validselectors, keyparams, listrequested, childcoll + + +def handle_api_request(configmanager, inputdata, operation, pathcomponents): + if operation == 'retrieve': + return handle_read_api_request(pathcomponents) + elif (operation in ('update', 'create') and + pathcomponents == ['discovery', 'rescan']): + if inputdata != {'rescan': 'start'}: + raise exc.InvalidArgumentException() + rescan() + return (msg.KeyValueData({'rescan': 'started'}),) + elif (operation in ('update', 'create')): + if 'node' not in inputdata: + raise exc.InvalidArgumentException('Missing node name in input') + _, queryparms, _, _ = _parameterize_path(pathcomponents[1:]) + if 'by-mac' not in queryparms: + raise exc.InvalidArgumentException('Must target using "by-mac"') + mac = queryparms['by-mac'].replace('-', ':') + if mac not in known_info: + raise exc.NotFoundException('{0} not found'.format(mac)) + info = known_info[mac] + handler = info['handler'].NodeHandler(info, configmanager) + eval_node(configmanager, handler, info, inputdata['node'], + manual=True) + return [msg.AssignedResource(inputdata['node'])] + raise exc.NotImplementedException( + 'Unable to {0} to {1}'.format(operation, '/'.join(pathcomponents))) + + +def handle_read_api_request(pathcomponents): + # TODO(jjohnson2): This should be more generalized... + # odd indexes into components are 'by-'*, even indexes + # starting at 2 are parameters to previous index + subcats, queryparms, indexof, coll = _parameterize_path(pathcomponents[1:]) + if len(pathcomponents) == 1: + dirlist = [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + dirlist.append(msg.ChildCollection('rescan')) + return dirlist + if not coll: + return show_info(queryparms['by-mac']) + if not indexof: + return [msg.ChildCollection(x + '/') for x in sorted(list(subcats))] + if indexof not in list_info: + raise exc.NotFoundException('{0} is not found'.format(indexof)) + return list_info[indexof](queryparms) + + +def detected_services(): + for srv in known_services: + yield servicenames[srv] + + +def detected_models(): + knownmodels = set([]) + for info in known_info: + info = known_info[info] + if 'modelnumber' in info and info['modelnumber'] not in knownmodels: + knownmodels.add(info['modelnumber']) + yield info['modelnumber'] + + +def _recheck_nodes(nodeattribs, configmanager): + global rechecker + _map_unique_ids(nodeattribs) + # for the nodes whose attributes have changed, consider them as potential + # strangers + for node in nodeattribs: + if node in known_nodes: + for somemac in known_nodes[node]: + unknown_info[somemac] = known_nodes[node][somemac] + unknown_info[somemac]['discostatus'] = 'unidentified' + # Now we go through ones we did not find earlier + for mac in list(unknown_info): + try: + _recheck_single_unknown(configmanager, mac) + except Exception: + traceback.print_exc() + continue + # now we go through ones that were identified, but could not pass + # policy or hadn't been able to verify key + for nodename in pending_nodes: + info = pending_nodes[nodename] + handler = info['handler'].NodeHandler(info, configmanager) + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def _recheck_single_unknown(configmanager, mac): + global rechecker + global rechecktime + info = unknown_info.get(mac, None) + if not info: + return + if info['handler'] != pxeh and not info.get('addresses', None): + log.log({'info': 'Missing address information in ' + repr(info)}) + return + handler = info['handler'].NodeHandler(info, configmanager) + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + # addresses data is bad, clear it, to force repair next + # opportunity + info['addresses'] = [] + # TODO(jjohnson2): rescan due to bad peer addr data? + # not just wait around for the next announce + return + log.log( + { + 'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + # if cancel did not result in dead, then we are in progress + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, + configmanager) + return + nodename = get_nodename(configmanager, handler, info) + if nodename: + if handler.https_supported: + dp = configmanager.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + eventlet.spawn_n(eval_node, configmanager, handler, info, nodename) + + +def safe_detected(info): + eventlet.spawn_n(eval_detected, info) + + +def eval_detected(info): + try: + return detected(info) + except Exception as e: + traceback.print_exc() + + +def detected(info): + global rechecker + global rechecktime + if 'hwaddr' not in info: + return # For now, require hwaddr field to proceed + # later, manual and CMM discovery may act on SN and/or UUID + for service in info['services']: + if nodehandlers.get(service, None): + if service not in known_services: + known_services[service] = set([]) + handler = nodehandlers[service] + info['handler'] = handler + break + else: # no nodehandler, ignore for now + return + try: + snum = info['attributes']['enclosure-serial-number'][0].rstrip() + if snum: + info['serialnumber'] = snum + known_serials[info['serialnumber']] = info + except (KeyError, IndexError): + pass + try: + info['modelnumber'] = info['attributes']['enclosure-machinetype-model'][0] + known_services[service].add(info['modelnumber']) + except (KeyError, IndexError): + pass + if info['hwaddr'] in known_info and 'addresses' in info: + # we should tee these up for parsing when an enclosure comes up + # also when switch config parameters change, should discard + # and there's also if wiring is fixed... + # of course could periodically revisit known_nodes + # replace potentially stale address info + #TODO(jjohnson2): remove this + # temporary workaround for XCC not doing SLP DA over dedicated port + # bz 93219, fix submitted, but not in builds yet + # strictly speaking, going ipv4 only legitimately is mistreated here, + # but that should be an edge case + oldaddr = known_info[info['hwaddr']].get('addresses', []) + for addr in info['addresses']: + if addr[0].startswith('fe80::'): + break + else: + for addr in oldaddr: + if addr[0].startswith('fe80::'): + info['addresses'].append(addr) + if known_info[info['hwaddr']].get( + 'addresses', []) == info['addresses']: + # if the ip addresses match, then assume no changes + # now something resetting to defaults could, in theory + # have the same address, but need to be reset + # in that case, however, a user can clear pubkeys to force a check + return + known_info[info['hwaddr']] = info + cfg = cfm.ConfigManager(None) + handler = handler.NodeHandler(info, cfg) + if handler.https_supported and not handler.https_cert: + if handler.cert_fail_reason == 'unreachable': + log.log( + { + 'info': '{0} with hwaddr {1} is not reachable at {2}' + ''.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['addresses'] = [] + return + log.log( + {'info': '{0} with hwaddr {1} at address {2} is not yet running ' + 'https, will examine later'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + if rechecker is not None and rechecktime > util.monotonic_time() + 60: + rechecker.cancel() + if rechecker is None or rechecker.dead: + rechecktime = util.monotonic_time() + 60 + rechecker = eventlet.spawn_after(60, _periodic_recheck, cfg) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentfied' + #TODO, eventlet spawn after to recheck sooner, or somehow else + # influence periodic recheck to shorten delay? + return + nodename = get_nodename(cfg, handler, info) + if nodename and handler.https_supported: + dp = cfg.get_node_attributes([nodename], + ('pubkeys.tls_hardwaremanager',)) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + if util.cert_matches(lastfp, handler.https_cert): + info['nodename'] = nodename + known_nodes[nodename][info['hwaddr']] = info + info['discostatus'] = 'discovered' + return # already known, no need for more + #TODO(jjohnson2): We might have to get UUID for certain searches... + #for now defer probe until inside eval_node. We might not have + #a nodename without probe in the future. + if nodename: + eval_node(cfg, handler, info, nodename) + else: + log.log( + {'info': 'Detected unknown {0} with hwaddr {1} at ' + 'address {2}'.format( + handler.devname, info['hwaddr'], handler.ipaddr + )}) + info['discostatus'] = 'unidentified' + unknown_info[info['hwaddr']] = info + + +def get_nodename(cfg, handler, info): + if not handler.https_supported: + curruuid = info['uuid'] + nodename = nodes_by_uuid.get(curruuid, None) + if nodename is None: + # TODO: if there are too many matches on port for a + # given type, error! Can't just arbitarily limit, + # shared nic with vms is possible and valid + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + currcert = handler.https_cert + if not currcert: + info['discofailure'] = 'nohttps' + return None + currprint = util.get_fingerprint(currcert) + nodename = nodes_by_fprint.get(currprint, None) + if not nodename: + nodename = macmap.find_node_by_mac(info['hwaddr'], cfg) + return nodename + + +def eval_node(cfg, handler, info, nodename, manual=False): + try: + handler.probe() # unicast interrogation as possible to get more data + # for now, we search switch only, ideally we search cmm, smm, and + # switch concurrently + # do some preconfig, for example, to bring a SMM online if applicable + handler.preconfig() + except Exception as e: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = 'An error occured during discovery, check the ' \ + 'trace and stderr logs, mac was {0} and ip was {1}' \ + ', the node or the containing enclosure was {2}' \ + ''.format(info['hwaddr'], handler.ipaddr, nodename) + traceback.print_exc() + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # first, if had a bay, it was in an enclosure. If it was discovered by + # switch, it is probably the enclosure manager and not + # the node directly. switch is ambiguous and we should leave it alone + if 'enclosure.bay' in info and handler.is_enclosure: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + log.log({'error': 'Something that is an enclosure reported a bay, ' + 'not possible'}) + if manual: + raise exc.InvalidArgumentException() + return + nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename)) + if not handler.is_enclosure and nl: + # The specified node is an enclosure (has nodes mapped to it), but + # what we are talking to is *not* an enclosure + if 'enclosure.bay' not in info: + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + errorstr = '{2} with mac {0} is in {1}, but unable to ' \ + 'determine bay number'.format(info['hwaddr'], + nodename, + handler.ipaddr) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + return + # search for nodes fitting our description using filters + # lead with the most specific to have a small second pass + nl = cfg.filter_node_attributes( + 'enclosure.bay=' + info['enclosure.bay'], nl) + nl = list(nl) + if len(nl) != 1: + info['discofailure'] = 'ambigconfig' + if len(nl): + errorstr = 'The following nodes have duplicate ' \ + 'enclosure attributes: ' + ','.join(nl) + + else: + errorstr = 'The {0} in enclosure {1} bay {2} does not ' \ + 'seem to be a defined node ({3})'.format( + handler.devname, nodename, + info['enclosure.bay'], + handler.ipaddr, + ) + if manual: + raise exc.InvalidArgumentException(errorstr) + log.log({'error': errorstr}) + unknown_info[info['hwaddr']] = info + info['discostatus'] = 'unidentified' + return + nodename = nl[0] + if not discover_node(cfg, handler, info, nodename, manual): + # store it as pending, assuming blocked on enclosure + # assurance... + pending_nodes[nodename] = info + else: + # we can and did accurately discover by switch or in enclosure + if not discover_node(cfg, handler, info, nodename, manual): + pending_nodes[nodename] = info + + +def discover_node(cfg, handler, info, nodename, manual): + known_nodes[nodename][info['hwaddr']] = info + if info['hwaddr'] in unknown_info: + del unknown_info[info['hwaddr']] + info['discostatus'] = 'identified' + dp = cfg.get_node_attributes( + [nodename], ('discovery.policy', + 'pubkeys.tls_hardwaremanager')) + policy = dp.get(nodename, {}).get('discovery.policy', {}).get( + 'value', None) + lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', + {}).get('value', None) + # TODO(jjohnson2): permissive requires we guarantee storage of + # the pubkeys, which is deferred for a little bit + # Also, 'secure', when we have the needed infrastructure done + # in some product or another. + if (policy == 'permissive' and handler.https_supported and lastfp and + not manual): + info['discofailure'] = 'fingerprint' + log.log({'info': 'Detected replacement of {0} with existing ' + 'fingerprint and permissive discovery policy, not ' + 'doing discovery unless discovery.policy=open or ' + 'pubkeys.tls_hardwaremanager attribute is cleared ' + 'first'.format(nodename)}) + return False # With a permissive policy, do not discover new + elif policy in ('open', 'permissive') or manual: + info['nodename'] = nodename + if not handler.https_supported: + # use uuid based scheme in lieu of tls cert, ideally only + # for stateless 'discovery' targets like pxe, where data does not + # change + if info['uuid'] in known_pxe_uuids: + return True + uuidinfo = cfg.get_node_attributes(nodename, 'id.uuid') + known_pxe_uuids[info['uuid']] = nodename + # TODO(jjohnson2): This is messing with the attrib database + # so it should only be possible if policy is 'open' + # + if manual or policy == 'open': + olduuid = uuidinfo.get(nodename, {}).get('id.uuid', None) + if 'uuid' in info and info['uuid'] != olduuid: + cfg.set_node_attributes( + {nodename: {'id.uuid': info['uuid']}}) + log.log({'info': 'Detected {0} ({1} with mac {2})'.format( + nodename, handler.devname, info['hwaddr'])}) + return True + elif manual or not util.cert_matches(lastfp, handler.https_cert): + # only 'discover' if it is not the same as last time + try: + handler.config(nodename) + except Exception as e: + info['discofailure'] = 'bug' + log.log( + {'error': + 'Error encountered trying to set up {0}, {1}'.format( + nodename, str(e))}) + traceback.print_exc() + return False + newnodeattribs = {} + if 'uuid' in info: + newnodeattribs['id.uuid'] = info['uuid'] + if handler.https_cert: + newnodeattribs['pubkeys.tls_hardwaremanager'] = \ + util.get_fingerprint(handler.https_cert) + if newnodeattribs: + cfg.set_node_attributes({nodename: newnodeattribs}) + log.log({'info': 'Discovered {0} ({1})'.format(nodename, + handler.devname)}) + info['discostatus'] = 'discovered' + return True + log.log({'info': 'Detected {0}, but discovery.policy is not set to a ' + 'value allowing discovery (open or permissive)'.format( + nodename)}) + info['discofailure'] = 'policy' + return False + + +attribwatcher = None +nodeaddhandler = None +needaddhandled = False + + +def _handle_nodelist_change(configmanager): + global needaddhandled + global nodeaddhandler + _recheck_nodes((), configmanager) + if needaddhandled: + needaddhandled = False + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + else: + nodeaddhandler = None + + +def newnodes(added, deleting, configmanager): + global attribwatcher + global needaddhandled + global nodeaddhandler + configmanager.remove_watcher(attribwatcher) + allnodes = configmanager.list_nodes() + attribwatcher = configmanager.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + if nodeaddhandler: + needaddhandled = True + else: + nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager) + + + +rechecker = None +rechecktime = None +rechecklock = eventlet.semaphore.Semaphore() + +def _periodic_recheck(configmanager): + global rechecker + global rechecktime + rechecker = None + # There shouldn't be anything causing this to double up, but just in case + # use a semaphore to absolutely guarantee this doesn't multiply + with rechecklock: + try: + _recheck_nodes((), configmanager) + except Exception: + traceback.print_exc() + log.log({'error': 'Unexpected error during discovery, check debug ' + 'logs'}) + # if rechecker is set, it means that an accelerated schedule + # for rechecker was requested in the course of recheck_nodes + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, + configmanager) + + +def rescan(): + _map_unique_ids() + eventlet.spawn_n(slp.active_scan, safe_detected) + + +def start_detection(): + global attribwatcher + global rechecker + _map_unique_ids() + cfg = cfm.ConfigManager(None) + allnodes = cfg.list_nodes() + attribwatcher = cfg.watch_attributes( + allnodes, ('discovery.policy', 'net*.switch', + 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', + 'pubkeys.tls_hardwaremanager'), _recheck_nodes) + cfg.watch_nodecollection(newnodes) + eventlet.spawn_n(slp.snoop, safe_detected) + eventlet.spawn_n(pxe.snoop, safe_detected) + if rechecker is None: + rechecktime = util.monotonic_time() + 900 + rechecker = eventlet.spawn_after(900, _periodic_recheck, cfg) + + # eventlet.spawn_n(ssdp.snoop, safe_detected) + + + +nodes_by_fprint = {} +nodes_by_uuid = {} +known_pxe_uuids = {} + +def _map_unique_ids(nodes=None): + global nodes_by_uuid + global nodes_by_fprint + nodes_by_uuid = {} + nodes_by_fprint = {} + # Map current known ids based on uuid and fingperprints for fast lookup + cfg = cfm.ConfigManager(None) + if nodes is None: + nodes = cfg.list_nodes() + bigmap = cfg.get_node_attributes(nodes, + ('id.uuid', + 'pubkeys.tls_hardwaremanager')) + uuid_by_nodes = {} + fprint_by_nodes = {} + for uuid in nodes_by_uuid: + node = nodes_by_uuid[uuid] + if node in bigmap: + uuid_by_nodes[node] = uuid + for fprint in nodes_by_fprint: + node = nodes_by_fprint[fprint] + if node in bigmap: + fprint_by_nodes[node] =fprint + for node in bigmap: + if node in uuid_by_nodes: + del nodes_by_uuid[uuid_by_nodes[node]] + if node in fprint_by_nodes: + del nodes_by_fprint[fprint_by_nodes[node]] + uuid = bigmap[node].get('id.uuid', {}).get('value', None) + if uuid: + nodes_by_uuid[uuid] = node + fprint = bigmap[node].get( + 'pubkeys.tls_hardwaremanager', {}).get('value', None) + if fprint: + nodes_by_fprint[fprint] = node + for uuid in known_pxe_uuids: + if uuid not in nodes_by_uuid: + nodes_by_uuid[uuid] = known_pxe_uuids[uuid] + + +if __name__ == '__main__': + start_detection() + while True: + eventlet.sleep(30) \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/bmc.py b/confluent_server/confluent/discovery/handlers/bmc.py new file mode 100644 index 00000000..62e12e4b --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/bmc.py @@ -0,0 +1,153 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.generic as generic +import confluent.exceptions as exc +import confluent.netutil as netutil +import eventlet.support.greendns + +# Provide foundation for general IPMI device configuration + +import pyghmi.exceptions as pygexc +ipmicommand = eventlet.import_patched('pyghmi.ipmi.command') +ipmicommand.session.select = eventlet.green.select +ipmicommand.session.threading = eventlet.green.threading +ipmicommand.session.socket.getaddrinfo = eventlet.support.greendns.getaddrinfo +getaddrinfo = eventlet.support.greendns.getaddrinfo + +DEFAULT_USER = 'USERID' +DEFAULT_PASS = 'PASSW0RD' + + +class NodeHandler(generic.NodeHandler): + + def _get_ipmicmd(self, user=DEFAULT_USER, password=DEFAULT_PASS): + return ipmicommand.Command(self.ipaddr, user, password) + + def __init__(self, info, configmanager): + super(NodeHandler, self).__init__(info, configmanager) + + def probe(self): + return + # TODO(jjohnson2): probe serial number and uuid + + def config(self, nodename, reset=False): + # TODO(jjohnson2): set ip parameters, user/pass, alert cfg maybe + # In general, try to use https automation, to make it consistent + # between hypothetical secure path and today. + try: + ic = self._get_ipmicmd() + passwd = DEFAULT_PASS + except pygexc.IpmiException as pi: + creds = self.configmanager.get_node_attributes( + nodename, + ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword'], decrypt=True) + user = creds.get(nodename, {}).get( + 'secret.hardwaremanagementuser', {}).get('value', None) + havecustomcreds = False + if user is not None and user != DEFAULT_USER: + havecustomcreds = True + else: + user = DEFAULT_USER + passwd = creds.get(nodename, {}).get( + 'secret.hardwaremanagementpassword', {}).get('value', None) + if passwd is not None and passwd != DEFAULT_PASS: + havecustomcreds = True + else: + passwd = DEFAULT_PASS + if havecustomcreds: + ic = self._get_ipmicmd(user, passwd) + else: + raise + currusers = ic.get_users() + lanchan = ic.get_network_channel() + userdata = ic.xraw_command(netfn=6, command=0x44, data=(lanchan, + 1)) + userdata = bytearray(userdata['data']) + maxusers = userdata[0] & 0b111111 + enabledusers = userdata[1] & 0b111111 + lockedusers = userdata[2] & 0b111111 + cfg = self.configmanager + cd = cfg.get_node_attributes( + nodename, ['secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword', + 'hardwaremanagement.manager'], True) + cd = cd.get(nodename, {}) + if ('secret.hardwaremanagementuser' not in cd or + 'secret.hardwaremanagementpassword' not in cd): + raise exc.TargetEndpointBadCredentials( + 'Missing user and/or password') + if ('hardwaremanagement.manager' in cd and + cd['hardwaremanagement.manager']['value'] and + not cd['hardwaremanagement.manager']['value'].startswith( + 'fe80::')): + newip = cd['hardwaremanagement.manager']['value'] + newipinfo = getaddrinfo(newip, 0)[0] + # This getaddrinfo is repeated in get_nic_config, could be + # optimized, albeit with a more convoluted api.. + newip = newipinfo[-1][0] + if ':' in newip: + raise exc.NotImplementedException('IPv6 remote config TODO') + netconfig = netutil.get_nic_config(cfg, nodename, ip=newip) + plen = netconfig['prefix'] + newip = '{0}/{1}'.format(newip, plen) + ic.set_net_configuration(ipv4_address=newip, + ipv4_configuration='static', + ipv4_gateway=netconfig['ipv4_gateway']) + elif self.ipaddr.startswith('fe80::'): + cfg.set_node_attributes( + {nodename: {'hardwaremanagement.manager': self.ipaddr}}) + else: + raise exc.TargetEndpointUnreachable( + 'hardwaremanagement.manager must be set to desired address') + newuser = cd['secret.hardwaremanagementuser']['value'] + newpass = cd['secret.hardwaremanagementpassword']['value'] + for uid in currusers: + if currusers[uid]['name'] == newuser: + # Use existing account that has been created + newuserslot = uid + break + else: + newuserslot = lockedusers + 1 + if newuserslot < 2: + newuserslot = 2 + ic.set_user_name(newuserslot, newuser) + ic.set_user_access(newuserslot, lanchan, + privilege_level='administrator') + if newpass != passwd: # don't mess with existing if no change + ic.set_user_password(newuserslot, password=newpass) + # Now to zap others + for uid in currusers: + if uid != newuserslot: + if uid <= lockedusers: # we cannot delete, settle for disable + ic.disable_user(uid, 'disable') + else: + # lead with the most critical thing, removing user access + ic.set_user_access(uid, channel=None, callback=False, + link_auth=False, ipmi_msg=False, + privilege_level='no_access') + # next, try to disable the password + ic.set_user_password(uid, mode='disable', password=None) + # ok, now we can be less paranoid + try: + ic.user_delete(uid) + except pygexc.IpmiException as ie: + if ie.ipmicode != 0xd5: # some response to the 0xff + # name... + # the user will remain, but that is life + raise + if reset: + ic.reset_bmc() + return diff --git a/confluent_server/confluent/discovery/handlers/generic.py b/confluent_server/confluent/discovery/handlers/generic.py new file mode 100644 index 00000000..be5a2a57 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/generic.py @@ -0,0 +1,85 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import eventlet +webclient = eventlet.import_patched('pyghmi.util.webclient') + +class NodeHandler(object): + https_supported = True + is_enclosure = False + + def __init__(self, info, configmanager): + self._certfailreason = None + self._fp = None + self.info = info + self.configmanager = configmanager + targsa = None + # first let us prefer LLA if possible, since that's most stable + for sa in info['addresses']: + if sa[0].startswith('fe80'): + targsa = sa + break + else: + targsa = info['addresses'][0] + self.ipaddr = targsa[0] + return + + def probe(self): + # Use appropriate direct strategy to gather data such as + # serial number and uuid to flesh out data as needed + return + + def preconfig(self): + return + + @property + def discoverable_by_switch(self): + return True + + def _savecert(self, certificate): + self._fp = certificate + return True + + @property + def cert_fail_reason(self): + if self._certfailreason == 1: + return 'refused' + elif self._certfailreason == 2: + return 'unreachable' + + @property + def https_cert(self): + if self._fp: + return self._fp + if ':' in self.ipaddr: + ip = '[{0}]'.format(self.ipaddr) + else: + ip = self.ipaddr + wc = webclient.SecureHTTPConnection(ip, verifycallback=self._savecert) + try: + wc.connect() + except IOError as ie: + if ie.errno == errno.ECONNREFUSED: + self._certfailreason = 1 + return None + elif ie.errno == errno.EHOSTUNREACH: + self._certfailreason = 2 + return None + self._certfailreason = 2 + return None + except Exception: + self._certfailreason = 2 + return None + return self._fp \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/imm.py b/confluent_server/confluent/discovery/handlers/imm.py new file mode 100644 index 00000000..23feded7 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/imm.py @@ -0,0 +1,46 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler +import pyghmi.exceptions as pygexc +import pyghmi.ipmi.private.util as pygutil + + +class NodeHandler(bmchandler.NodeHandler): + devname = 'IMM' + + def probe(self): + try: + ipmicmd = self._get_ipmicmd() + guiddata = ipmicmd.xraw_command(netfn=6, command=8) + self.info['uuid'] = pygutil.decode_wireformat_uuid( + guiddata['data']) + ipmicmd.oem_init() + bayid = ipmicmd._oem.immhandler.get_property( + '/v2/cmm/sp/7') + if not bayid: + return + # + self.info['enclosure.bay'] = bayid + # enclosure.bay only happens for Flex, nextscale doesn't do it + # this way + except pygexc.IpmiException as ie: + print(repr(ie)) + raise + + +# TODO(jjohnson2): web based init config for future prevalidated cert scheme +# def config(self, nodename): +# return + diff --git a/confluent_server/confluent/discovery/handlers/pxe.py b/confluent_server/confluent/discovery/handlers/pxe.py new file mode 100644 index 00000000..2b43ffbd --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/pxe.py @@ -0,0 +1,39 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This contains functionality for passive detection and, one day, active +# response to pxe + + +import confluent.discovery.handlers.generic as generic + +class NodeHandler(generic.NodeHandler): + https_supported = False + is_enclosure = False + devname = 'PXE' + + def __init__(self, info, configmanager): + self.ipaddr = '' + self.cfm = configmanager + + @property + def cert_fail_reason(self): + return 'unsupported' + + @property + def https_cert(self): + return None + + def config(self, nodename): + return diff --git a/confluent_server/confluent/discovery/handlers/smm.py b/confluent_server/confluent/discovery/handlers/smm.py new file mode 100644 index 00000000..954c8e9d --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/smm.py @@ -0,0 +1,38 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler + +class NodeHandler(bmchandler.NodeHandler): + is_enclosure = True + devname = 'SMM' + + def config(self, nodename): + # SMM for now has to reset to assure configuration applies + super(NodeHandler, self).config(nodename) + +# notes for smm: +# POST to: +# https://172.30.254.160/data/changepwd +# oripwd=PASSW0RD&newpwd=Passw0rd!4321 +# got response: +# 0-ChangePwdlogin.htmlok +# requires relogin +# https://172.30.254.160/index.html +# post to: +# https://172.30.254.160/data/login +# with body user=USERID&password=Passw0rd!4321 +# yields: +# ok 0 index.html +# note forwardUrl, if password change needed, will indicate something else \ No newline at end of file diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py new file mode 100644 index 00000000..f12e1569 --- /dev/null +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -0,0 +1,69 @@ +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.discovery.handlers.bmc as bmchandler +import pyghmi.exceptions as pygexc +import pyghmi.ipmi.private.util as pygutil + + +class NodeHandler(bmchandler.NodeHandler): + devname = 'XCC' + + def probe(self): + try: + ipmicmd = self._get_ipmicmd() + guiddata = ipmicmd.xraw_command(netfn=6, command=8) + self.info['uuid'] = pygutil.decode_wireformat_uuid( + guiddata['data']) + ipmicmd.oem_init() + bayid = ipmicmd._oem.immhandler.get_property( + '/v2/cmm/sp/7') + if not bayid: + return + self.info['enclosure.bay'] = bayid + smmid = ipmicmd._oem.immhandler.get_property( + '/v2/ibmc/smm/chassis/uuid') + if not smmid: + return + smmid = smmid.lower().replace(' ', '') + smmid = '{0}-{1}-{2}-{3}-{4}'.format(smmid[:8], smmid[8:12], + smmid[12:16], smmid[16:20], + smmid[20:]) + self.info['enclosure.uuid'] = smmid + self.info['enclosure.type'] = 'smm' + except pygexc.IpmiException as ie: + print(repr(ie)) + raise + + def preconfig(self): + # attempt to enable SMM + #it's normal to get a 'not supported' (193) for systems without an SMM + ipmicmd = None + try: + ipmicmd = self._get_ipmicmd() + ipmicmd.xraw_command(netfn=0x3a, command=0xf1, data=(1,)) + except pygexc.IpmiException as e: + if e.ipmicode != 193: + # raise an issue if anything other than to be expected + raise + #TODO: decide how to clean out if important + #as it stands, this can step on itself + #if ipmicmd: + # ipmicmd.ipmi_session.logout() + + +# TODO(jjohnson2): web based init config for future prevalidated cert scheme +# def config(self, nodename): +# return + diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py new file mode 100644 index 00000000..cbb1f87a --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -0,0 +1,118 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We can listen to port 69 with SO_REUSEADDR to snoop port 69 *even* if dhcp +# is running (because the other dhcp servers do it already) + +# Goal is to detect and act on a DHCPDISCOVER, without actually having to do +# any offer + +# option 97 = UUID (wireformat) + +import eventlet.green.socket as socket +import struct + +pxearchs = { + '\x00\x00': 'bios-x86', + '\x00\x07': 'uefi-x64', + '\x00\x09': 'uefi-x64', + '\x00\x0b': 'uefi-aarch64', +} + + +def decode_uuid(rawguid): + lebytes = struct.unpack_from('HHI', buffer(rawguid[8:])) + return '{0:08X}-{1:04X}-{2:04X}-{3:04X}-{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], bebytes[1], bebytes[2]) + + +def find_info_in_options(rq, optidx): + uuid = None + arch = None + try: + while uuid is None or arch is None: + if rq[optidx] == 53: # DHCP message type + # we want only length 1 and only discover (type 1) + if rq[optidx + 1] != 1 or rq[optidx + 2] != 1: + return uuid, arch + optidx += 3 + elif rq[optidx] == 97: + if rq[optidx + 1] != 17: + # 16 bytes of uuid and one reserved byte + return uuid, arch + if rq[optidx + 2] != 0: # the reserved byte should be zero, + # anything else would be a new spec that we don't know yet + return uuid, arch + uuid = decode_uuid(rq[optidx + 3:optidx + 19]) + optidx += 19 + elif rq[optidx] == 93: + if rq[optidx + 1] != 2: + return uuid, arch + archraw = bytes(rq[optidx + 2:optidx + 4]) + if archraw in pxearchs: + arch = pxearchs[archraw] + optidx += 4 + else: + optidx += rq[optidx + 1] + 2 + except IndexError: + return uuid, arch + return uuid, arch + +def snoop(handler): + #TODO(jjohnson2): ipv6 socket and multicast for DHCPv6, should that be + #prominent + #TODO(jjohnson2): IP_PKTINFO, recvmsg to get the destination ip, per + #proxydhcp.c from xCAT + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 67)) + while True: + # Just need some delay, picked a prime number so that overlap with other + # timers might be reduced, though it really is probably nothing + (rq, peer) = net4.recvfrom(9000) + # if we have a small packet, just skip, it can't possible hold enough + # data and avoids some downstream IndexErrors that would be messy + # with try/except + if len(rq) < 64: + continue + rq = bytearray(rq) + if rq[0] == 1: # Boot request + addrlen = rq[2] + if addrlen > 16: # max address size in bootp is 16 bytes + continue + netaddr = rq[28:28+addrlen] + netaddr = ':'.join(['{0:02x}'.format(x) for x in netaddr]) + optidx = 0 + try: + optidx = rq.index('\x63\x82\x53\x63') + 4 + except ValueError: + continue + uuid, arch = find_info_in_options(rq, optidx) + if uuid is None: + continue + # We will fill out service to have something to byte into, + # but the nature of the beast is that we do not have peers, + # so that will not be present for a pxe snoop + handler({'hwaddr': netaddr, 'uuid': uuid, 'architecture': arch, + 'services': ('pxe-client',)}) + +if __name__ == '__main__': + def testsnoop(info): + print(repr(info)) + snoop(testsnoop) + + diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py new file mode 100644 index 00000000..f87ce3e8 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -0,0 +1,515 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.neighutil as neighutil +import confluent.util as util +import os +import random +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct +import subprocess + + +_slp_services = set([ + 'service:management-hardware.IBM:integrated-management-module2', + 'service:lenovo-smm', + 'service:management-hardware.Lenovo:lenovo-xclarity-controller', +]) + +# SLP has a lot of ambition that was unfulfilled in practice. +# So we have a static footer here to always use 'DEFAULT' scope, no LDAP +# predicates, and no authentication for service requests +srvreqfooter = b'\x00\x07DEFAULT\x00\x00\x00\x00' +# An empty instance of the attribute list extension +# which is defined in RFC 3059, used to indicate support for that capability +attrlistext = b'\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00' + + +def _parse_slp_header(packet): + packet = bytearray(packet) + if len(packet) < 16 or packet[0] != 2: + # discard packets that are obviously useless + return None + parsed = { + 'function': packet[1], + } + (offset, parsed['xid'], langlen) = struct.unpack('!IHH', + bytes(b'\x00' + packet[7:14])) + parsed['lang'] = packet[14:14 + langlen].decode('utf-8') + parsed['payload'] = packet[14 + langlen:] + if offset: + parsed['offset'] = 14 + langlen + parsed['extoffset'] = offset + return parsed + + +def _pop_url(payload): + urllen = struct.unpack('!H', bytes(payload[3:5]))[0] + url = bytes(payload[5:5+urllen]).decode('utf-8') + if payload[5+urllen] != 0: + raise Exception('Auth blocks unsupported') + payload = payload[5+urllen+1:] + return url, payload + + +def _parse_SrvRply(parsed): + """ Modify passed dictionary to have parsed data + + + :param parsed: + :return: + """ + payload = parsed['payload'] + ecode, ucount = struct.unpack('!HH', bytes(payload[0:4])) + if ecode: + parsed['errorcode'] = ecode + payload = payload[4:] + parsed['urls'] = [] + while ucount: + ucount -= 1 + url, payload = _pop_url(payload) + parsed['urls'].append(url) + + +def _parse_slp_packet(packet, peer, rsps, xidmap): + parsed = _parse_slp_header(packet) + if not parsed: + return + addr = peer[0] + if '%' in addr: + addr = addr[:addr.index('%')] + mac = None + if addr in neighutil.neightable: + identifier = neighutil.neightable[addr] + mac = identifier + else: + identifier = addr + if (identifier, parsed['xid']) in rsps: + # avoid obviously duplicate entries + parsed = rsps[(identifier, parsed['xid'])] + else: + rsps[(identifier, parsed['xid'])] = parsed + if mac and 'hwaddr' not in parsed: + parsed['hwaddr'] = mac + if parsed['xid'] in xidmap: + parsed['services'] = [xidmap[parsed['xid']]] + if 'addresses' in parsed: + if peer not in parsed['addresses']: + parsed['addresses'].append(peer) + else: + parsed['addresses'] = [peer] + if parsed['function'] == 2: # A service reply + _parse_SrvRply(parsed) + + +def _v6mcasthash(srvtype): + # The hash algorithm described by RFC 3111 + nums = bytearray(srvtype.encode('utf-8')) + hashval = 0 + for i in nums: + hashval *= 33 + hashval += i + hashval &= 0xffff # only need to track the lowest 16 bits + hashval &= 0x3ff + hashval |= 0x1000 + return '{0:x}'.format(hashval) + + +def _generate_slp_header(payload, multicast, functionid, xid, extoffset=0): + if multicast: + flags = 0x2000 + else: + flags = 0 + packetlen = len(payload) + 16 # we have a fixed 16 byte header supported + if extoffset: # if we have an offset, add 16 to account for this function + # generating a 16 byte header + extoffset += 16 + if packetlen > 1400: + # For now, we aren't intending to support large SLP transmits + # raise an exception to help identify if such a requirement emerges + raise Exception("TODO: Transmit overflow packets") + # We always do SLP v2, and only v2 + header = bytearray([2, functionid]) + # SLP uses 24 bit packed integers, so in such places we pack 32 then + # discard the high byte + header.extend(struct.pack('!IH', packetlen, flags)[1:]) + # '2' below refers to the length of the language tag + header.extend(struct.pack('!IHH', extoffset, xid, 2)[1:]) + # we only do english (in SLP world, it's not like non-english appears...) + header.extend(b'en') + return header + +def _generate_attr_request(service, xid): + service = service.encode('utf-8') + payload = bytearray(struct.pack('!HH', 0, len(service)) + service) + payload.extend(srvreqfooter) + header = _generate_slp_header(payload, False, functionid=6, xid=xid) + return header + payload + + + +def _generate_request_payload(srvtype, multicast, xid, prlist=''): + prlist = prlist.encode('utf-8') + payload = bytearray(struct.pack('!H', len(prlist)) + prlist) + srvtype = srvtype.encode('utf-8') + payload.extend(struct.pack('!H', len(srvtype)) + srvtype) + payload.extend(srvreqfooter) + extoffset = len(payload) + payload.extend(attrlistext) + header = _generate_slp_header(payload, multicast, functionid=1, xid=xid, + extoffset=extoffset) + return header + payload + + +def _find_srvtype(net, net4, srvtype, addresses, xid): + """Internal function to find a single service type + + Helper to do singleton requests to srvtype + + :param net: Socket active + :param srvtype: Service type to do now + :param addresses: Pass through of addresses argument from find_targets + :return: + """ + if addresses is None: + data = _generate_request_payload(srvtype, True, xid) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + v6addrs = [] + v6hash = _v6mcasthash(srvtype) + # do 'interface local' and 'link local' + # it shouldn't make sense, but some configurations work with interface + # local that do not work with link local + v6addrs.append(('ff01::1:' + v6hash, 427, 0, 0)) + v6addrs.append(('ff02::1:' + v6hash, 427, 0, 0)) + for idx in util.list_interface_indexes(): + # IPv6 multicast is by index, so lead with that + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, idx) + for sa in v6addrs: + try: + net.sendto(data, sa) + except socket.error: + # if we hit an interface without ipv6 multicast, + # this can cause an error, skip such an interface + # case in point, 'lo' + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(data, ('239.255.255.253', 427)) + net4.sendto(data, (bcast, 427)) + + +def _grab_rsps(socks, rsps, interval, xidmap): + r, _, _ = select.select(socks, (), (), interval) + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_slp_packet(rsp, peer, rsps, xidmap) + r, _, _ = select.select(socks, (), (), interval) + + + +def _parse_attrlist(attrstr): + attribs = {} + while attrstr: + if attrstr[0] == '(': + if ')' not in attrstr: + attribs['INCOMPLETE'] = True + return attribs + currattr = attrstr[1:attrstr.index(')')] + if '=' not in currattr: # Not allegedly kosher, but still.. + currattr = currattr.decode('utf-8') + attribs[currattr] = None + else: + attrname, attrval = currattr.split('=') + attrname = attrname.decode('utf-8') + attribs[attrname] = [] + for val in attrval.split(','): + try: + val = val.decode('utf-8') + except UnicodeDecodeError: + val = '*DECODEERROR*' + if val[:3] == '\\FF': # we should make this bytes + finalval = bytearray([]) + for bnum in attrval[3:].split('\\'): + if bnum == '': + continue + finalval.append(int(bnum, 16)) + val = finalval + if 'uuid' in attrname and len(val) == 16: + lebytes = struct.unpack_from( + 'HHI', buffer(val[8:])) + val = '{0:08X}-{1:04X}-{2:04X}-{3:04X}-' \ + '{4:04X}{5:08X}'.format( + lebytes[0], lebytes[1], lebytes[2], bebytes[0], + bebytes[1], bebytes[2] + ) + attribs[attrname].append(val) + attrstr = attrstr[attrstr.index(')'):] + elif attrstr[0] == ',': + attrstr = attrstr[1:] + elif ',' in attrstr: + currattr = attrstr[:attrstr.index(',')] + attribs[currattr] = None + attrstr = attrstr[attrstr.index(','):] + else: + currattr = attrstr + attribs[currattr] = None + attrstr = None + return attribs + + +def _parse_attrs(data, parsed): + headinfo = _parse_slp_header(data) + if headinfo['function'] != 7 or headinfo['xid'] != parsed['xid']: + return + payload = headinfo['payload'] + if struct.unpack('!H', bytes(payload[:2]))[0] != 0: + return + length = struct.unpack('!H', bytes(payload[2:4]))[0] + attrstr = bytes(payload[4:4+length]) + parsed['attributes'] = _parse_attrlist(attrstr) + + +def _add_attributes(parsed): + attrq = _generate_attr_request(parsed['services'][0], parsed['xid']) + target = None + # prefer reaching out to an fe80 if present, to be highly robust + # in face of network changes + for addr in parsed['addresses']: + if addr[0].startswith('fe80'): + target = addr + # however if no fe80 seen, roll with the first available address + if not target: + target = parsed['addresses'][0] + if len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + net.connect(target) + except socket.error: + return + net.sendall(attrq) + rsp = net.recv(8192) + net.close() + _parse_attrs(rsp, parsed) + + +def query_srvtypes(target): + """Query the srvtypes advertised by the target + + :param target: A sockaddr tuple (if you get the peer info) + """ + payload = b'\x00\x00\xff\xff\x00\x07DEFAULT' + header = _generate_slp_header(payload, False, functionid=9, xid=1) + packet = header + payload + if len(target) == 2: + net = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + elif len(target) == 4: + net = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + raise Exception('Unrecognized target {0}'.format(repr(target))) + tries = 3 + connected = False + while tries and not connected: + tries -= 1 + try: + net.connect(target) + connected = True + except socket.error: + pass + if not connected: + return [u''] + net.sendall(packet) + rs = net.recv(8192) + net.close() + parsed = _parse_slp_header(rs) + if parsed: + payload = parsed['payload'] + if payload[:2] != '\x00\x00': + return + stypelen = struct.unpack('!H', bytes(payload[2:4]))[0] + stypes = payload[4:4+stypelen].decode('utf-8') + return stypes.split(',') + +def rescan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def snoop(handler): + """Watch for SLP activity + + handler will be called with a dictionary of relevant attributes + + :param handler: + :return: + """ + active_scan(handler) + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + slpg = socket.inet_pton(socket.AF_INET6, 'ff01::123') + slpg2 = socket.inet_pton(socket.AF_INET6, 'ff02::123') + for i6idx in util.list_interface_indexes(): + mreq = slpg + struct.pack('=I', i6idx) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + mreq = slpg2 + struct.pack('=I', i6idx) + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, mreq) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + slpmcast = socket.inet_aton('239.255.255.253') + \ + socket.inet_aton(i4['addr']) + try: + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + slpmcast) + except socket.error as e: + if e.errno != 98: + raise + # socket in use can occur when aliased ipv4 are encountered + net.bind(('', 427)) + net4.bind(('', 427)) + + while True: + newmacs = set([]) + r, _, _ = select.select((net, net4), (), (), 60) + # clear known_peers and peerbymacaddress + # to avoid stale info getting in... + # rely upon the select(0.2) to catch rapid fire and aggregate ip + # addresses that come close together + # calling code needs to understand deeper context, as snoop + # will now yield dupe info over time + known_peers = set([]) + peerbymacaddress = {} + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + known_peers.add(peer) + mac = neighutil.neightable[ip] + if mac in peerbymacaddress: + peerbymacaddress[mac]['addresses'].append(peer) + else: + q = query_srvtypes(peer) + if not q or not q[0]: + # SLP might have started and not ready yet + # ignore for now + known_peers.discard(peer) + continue + peerbymacaddress[mac] = { + 'services': q, + 'addresses': [peer], + } + newmacs.add(mac) + r, _, _ = select.select((net, net4), (), (), 0.2) + for mac in newmacs: + peerbymacaddress[mac]['xid'] = 1 + _add_attributes(peerbymacaddress[mac]) + peerbymacaddress[mac]['hwaddr'] = mac + handler(peerbymacaddress[mac]) + + +def active_scan(handler): + known_peers = set([]) + for scanned in scan(): + for addr in scanned['addresses']: + ip = addr[0].partition('%')[0] # discard scope if present + if ip not in neighutil.neightable: + continue + if addr in known_peers: + break + known_peers.add(addr) + else: + handler(scanned) + + +def scan(srvtypes=_slp_services, addresses=None): + """Find targets providing matching requested srvtypes + + This is a generator that will iterate over respondants to the SrvType + requested. + + :param srvtypes: An iterable list of the service types to find + :param addresses: An iterable of addresses/ranges. Default is to scan + local network segment using multicast and broadcast. + Each address can be a single address, hyphen-delimited + range, or an IP/CIDR indication of a network. + :return: Iterable set of results + """ + net = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + # TODO: increase RCVBUF to max, mitigate chance of + # failure due to full buffer. + # SLP is very poor at scanning large counts and managing it, so we + # must make the best of it + # Some platforms/config default to IPV6ONLY, we are doing IPv4 + # too, so force it + net.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + # we are going to do broadcast, so allow that... + initxid = random.randint(0, 32768) + xididx = 0 + xidmap = {} + # First we give fast repsonders of each srvtype individual chances to be + # processed, mitigating volume of response traffic + rsps = {} + for srvtype in srvtypes: + xididx += 1 + _find_srvtype(net, net4, srvtype, addresses, initxid + xididx) + xidmap[initxid + xididx] = srvtype + _grab_rsps((net, net4), rsps, 0.1, xidmap) + # now do a more slow check to work to get stragglers, + # but fortunately the above should have taken the brunt of volume, so + # reduced chance of many responses overwhelming receive buffer. + _grab_rsps((net, net4), rsps, 1, xidmap) + # now to analyze and flesh out the responses + for id in rsps: + _add_attributes(rsps[id]) + del rsps[id]['payload'] + del rsps[id]['function'] + del rsps[id]['xid'] + yield rsps[id] + + +if __name__ == '__main__': + def testsnoop(a): + print(repr(a)) + snoop(testsnoop) \ No newline at end of file diff --git a/confluent_server/confluent/discovery/protocols/ssdp.py b/confluent_server/confluent/discovery/protocols/ssdp.py new file mode 100644 index 00000000..de543697 --- /dev/null +++ b/confluent_server/confluent/discovery/protocols/ssdp.py @@ -0,0 +1,232 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Documented somewhat at +# http://buildingskb.schneider-electric.com/view.php?AID=15197 + +# Here is the payload of an SSDP 'announce', sent to the multicast v4/v6 1900 +# NOTIFY * HTTP/1.1 +# HOST: 239.255.255.250:1900 +# CACHE-CONTROL: max-age=1800 +# AL: https://172.30.254.151:8080/redfish/v1 +# SERVER: Linux/3.14.28-ltsi Redfish/1.0 +# NT: urn:dmtf-org:service:redfish-rest:1 +# USN: uuid:00000000-0000-0000-0005-000000000001::urn:dmtf-org:service:redfish-rest:1 +# NTS: ssdp:alive + + +import confluent.neighutil as neighutil +import confluent.util as util +import eventlet.green.select as select +import eventlet.green.socket as socket +import struct + +mcastv4addr = '239.255.255.250' +mcastv6addr = 'ff02::c' + +ssdp6mcast = socket.inet_pton(socket.AF_INET6, mcastv6addr) +smsg = ('M-SEARCH * HTTP/1.1\r\n' + 'HOST: {0}:1900\r\n' + 'MAN: "ssdp:discover"\r\n' + 'ST: {1}\r\n' + 'MX: 3\r\n\r\n') + + +def scan(services, target=None): + for service in services: + for rply in _find_service(service, target): + yield rply + + +def snoop(handler, byehandler=None): + """Watch for SSDP notify messages + + The handler shall be called on any service coming online. + byehandler is called whenever a system advertises that it is departing. + If no byehandler is specified, byebye messages are ignored. The handler is + given (as possible), the mac address, a list of viable sockaddrs to reference + the peer, and the notification type (e.g. + 'urn:dmtf-org:service:redfish-rest:1' + + :param handler: A handler for online notifications from network + :param byehandler: Optional handler for devices going off the network + """ + # Normally, I like using v6/v4 agnostic socket. However, since we are + # dabbling in multicast wizardry here, such sockets can cause big problems, + # so we will have two distinct sockets + known_peers = set([]) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + for ifidx in util.list_interface_indexes(): + v6grp = ssdp6mcast + struct.pack('=I', ifidx) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, v6grp) + net6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + for i4 in util.list_ips(): + ssdp4mcast = socket.inet_pton(socket.AF_INET, mcastv4addr) + \ + socket.inet_aton(i4['addr']) + net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, + ssdp4mcast) + net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + net4.bind(('', 1900)) + net6.bind(('', 1900)) + peerbymacaddress = {} + while True: + newmacs = set([]) + machandlers = {} + r, _, _ = select.select((net4, net6), (), (), 60) + neighutil.update_neigh() + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + rsp = rsp.split('\r\n') + method, _, _ = rsp[0].split(' ', 2) + if method == 'NOTIFY': + ip = peer[0].partition('%')[0] + if ip not in neighutil.neightable: + continue + if peer in known_peers: + continue + mac = neighutil.neightable[ip] + known_peers.add(peer) + newmacs.add(mac) + if mac in peerbymacaddress: + peerbymacaddress[mac]['peers'].append(peer) + else: + peerbymacaddress[mac] = { + 'hwaddr': mac, + 'peers': [peer], + } + peerdata = peerbymacaddress[mac] + for headline in rsp[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'NT': + peerdata['service'] = value + elif header == 'NTS': + if value == 'ssdp:byebye': + machandlers[mac] = byehandler + elif value == 'ssdp:alive': + machandlers[mac] = handler + r, _, _ = select.select((net4, net6), (), (), 0.1) + for mac in newmacs: + thehandler = machandlers.get(mac, None) + if thehandler: + thehandler(peerbymacaddress[mac]) + + +def _find_service(service, target): + net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) + if target: + addrs = socket.getaddrinfo(target, 1900, 0, socket.SOCK_DGRAM) + for addr in addrs: + host = addr[4][0] + if addr[0] == socket.AF_INET: + net4.sendto(smsg.format(host, service), addr[4]) + elif addr[0] == socket.AF_INET6: + host = '[{0}]'.format(host) + net6.sendto(smsg.format(host, service), addr[4]) + else: + net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + for idx in util.list_interface_indexes(): + net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_IF, + idx) + try: + net6.sendto(smsg.format('[{0}]'.format(mcastv6addr), service + ), (mcastv6addr, 1900, 0, 0)) + except socket.error: + # ignore interfaces without ipv6 multicast causing error + pass + for i4 in util.list_ips(): + if 'broadcast' not in i4: + continue + addr = i4['addr'] + bcast = i4['broadcast'] + net4.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, + socket.inet_aton(addr)) + net4.sendto(smsg.format(mcastv4addr, service), + (mcastv4addr, 1900)) + net4.sendto(smsg.format(bcast, service), (bcast, 1900)) + # SSDP by spec encourages responses to spread out over a 3 second interval + # hence we must be a bit more patient + r, _, _ = select.select((net4, net6), (), (), 4) + peerdata = {} + while r: + for s in r: + (rsp, peer) = s.recvfrom(9000) + neighutil.refresh_neigh() + _parse_ssdp(peer, rsp, peerdata) + r, _, _ = select.select((net4, net6), (), (), 4) + for nid in peerdata: + yield peerdata[nid] + + +def _parse_ssdp(peer, rsp, peerdata): + ip = peer[0].partition('%')[0] + nid = ip + mac = None + if ip in neighutil.neightable: + nid = neighutil.neightable[ip] + mac = nid + headlines = rsp.split('\r\n') + try: + _, code, _ = headlines[0].split(' ', 2) + except ValueError: + return + myurl = None + if code == '200': + if nid in peerdata: + peerdatum = peerdata[nid] + else: + peerdatum = { + 'peers': [peer], + 'hwaddr': mac, + } + peerdata[nid] = peerdatum + for headline in headlines[1:]: + if not headline: + continue + header, _, value = headline.partition(':') + header = header.strip() + value = value.strip() + if header == 'AL' or header == 'LOCATION': + myurl = value + if 'urls' not in peerdatum: + peerdatum['urls'] = [value] + elif value not in peerdatum['urls']: + peerdatum['urls'].append(value) + elif header == 'ST': + if 'services' not in peerdatum: + peerdatum['services'] = [value] + elif value not in peerdatum['services']: + peerdatum['services'].append(value) + + + +if __name__ == '__main__': + + for rsp in scan(['urn:dmtf-org:service:redfish-rest:1']): + print(repr(rsp)) + def fun(a): + print(repr(a)) + def byefun(a): + print('bye' + repr(a)) + snoop(fun, byefun) diff --git a/confluent_server/confluent/neighutil.py b/confluent_server/confluent/neighutil.py new file mode 100644 index 00000000..9da1d195 --- /dev/null +++ b/confluent_server/confluent/neighutil.py @@ -0,0 +1,64 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A consolidated manage of neighbor table information management. +# Ultimately, this should use AF_NETLINK, but in the interest of time, +# use ip neigh for the moment + +import eventlet.green.subprocess as subprocess +import os + +neightable = {} +neightime = 0 + +import re + +_validmac = re.compile('..:..:..:..:..:..') + + +def update_neigh(): + global neightable + global neightime + neightable = {} + if os.name == 'nt': + return + ipn = subprocess.Popen(['ip', 'neigh'], stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (neighdata, err) = ipn.communicate() + for entry in neighdata.split('\n'): + entry = entry.split(' ') + if len(entry) < 5 or not entry[4]: + continue + if entry[0] in ('192.168.0.100', '192.168.70.100', '192.168.70.125'): + # Note that these addresses are common static ip addresses + # that are hopelessly ambiguous if there are many + # so ignore such entries and move on + # ideally the system network steers clear of this landmine of + # a subnet, but just in case + continue + if not _validmac.match(entry[4]): + continue + neightable[entry[0]] = entry[4] + neightime = os.times()[4] + + +def refresh_neigh(): + global neightime + if os.name == 'nt': + return + if os.times()[4] > (neightime + 30): + update_neigh() diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py new file mode 100644 index 00000000..cbe9c2fe --- /dev/null +++ b/confluent_server/confluent/netutil.py @@ -0,0 +1,124 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# this will implement noderange grammar + + +import codecs +import struct +import eventlet.green.socket as socket +import eventlet.support.greendns +getaddrinfo = eventlet.support.greendns.getaddrinfo + + +def ip_on_same_subnet(first, second, prefix): + addrinf = socket.getaddrinfo(first, None, 0, socket.SOCK_STREAM)[0] + fam = addrinf[0] + ip = socket.inet_pton(fam, addrinf[-1][0]) + ip = int(codecs.encode(bytes(ip), 'hex'), 16) + addrinf = socket.getaddrinfo(second, None, 0, socket.SOCK_STREAM)[0] + if fam != addrinf[0]: + return False + oip = socket.inet_pton(fam, addrinf[-1][0]) + oip = int(codecs.encode(bytes(oip), 'hex'), 16) + if fam == socket.AF_INET: + addrlen = 32 + elif fam == socket.AF_INET6: + addrlen = 128 + else: + raise Exception("Unknown address family {0}".format(fam)) + mask = 2 ** prefix - 1 << (addrlen - prefix) + return ip & mask == oip & mask + + +# TODO(jjohnson2): have a method to arbitrate setting methods, to aid +# in correct matching of net.* based on parameters, mainly for pxe +# The scheme for pxe: +# For one: the candidate net.* should have pxe set to true, to help +# disambiguate from interfaces meant for bmc access +# bmc relies upon hardwaremanagement.manager, plus we don't collect +# that mac address +# the ip as reported by recvmsg to match the subnet of that net.* interface +# if switch and port available, that should match. +def get_nic_config(configmanager, node, ip=None, mac=None): + """Fetch network configuration parameters for a nic + + For a given node and interface, find and retrieve the pertinent network + configuration data. The desired configuration can be searched + either by ip or by mac. + + :param configmanager: The relevant confluent.config.ConfigManager + instance. + :param node: The name of the node + :param ip: An IP address on the intended subnet + :param mac: The mac address of the interface + + :returns: A dict of parameters, 'ipv4_gateway', .... + """ + # ip parameter *could* be the result of recvmsg with cmsg to tell + # pxe *our* ip address, or it could be the desired ip address + #TODO(jjohnson2): ip address, prefix length, mac address, + # join a bond/bridge, vlan configs, etc. + # also other nic criteria, physical location, driver and index... + nodenetattribs = configmanager.get_node_attributes( + node, 'net*.ipv4_gateway').get(node, {}) + cfgdata = { + 'ipv4_gateway': None, + 'prefix': None, + } + if ip is not None: + prefixlen = get_prefix_len_for_ip(ip) + cfgdata['prefix'] = prefixlen + for setting in nodenetattribs: + gw = nodenetattribs[setting].get('value', None) + if gw is None: + continue + if ip_on_same_subnet(ip, gw, prefixlen): + cfgdata['ipv4_gateway'] = gw + break + return cfgdata + + +def get_prefix_len_for_ip(ip): + # for now, we'll use the system route table + # later may provide for configuration lookup to override the route + # table + ip = getaddrinfo(ip, 0, socket.AF_INET)[0][-1][0] + try: + ipn = socket.inet_aton(ip) + except socket.error: # For now, assume 64 for ipv6 + return 64 + # It comes out big endian, regardless of host arch + ipn = struct.unpack('>I', ipn)[0] + rf = open('/proc/net/route') + ri = rf.read() + rf.close() + ri = ri.split('\n')[1:] + for rl in ri: + if not rl: + continue + rd = rl.split('\t') + if rd[1] == '00000000': # default gateway, not useful for this + continue + # don't have big endian to look at, assume that it is host endian + maskn = struct.unpack('I', struct.pack('>I', int(rd[7], 16)))[0] + netn = struct.unpack('I', struct.pack('>I', int(rd[1], 16)))[0] + if ipn & maskn == netn: + nbits = 0 + while maskn: + nbits += 1 + maskn = maskn << 1 & 0xffffffff + return nbits + raise exc.NotImplementedException("Non local addresses not supported") \ No newline at end of file diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py new file mode 100644 index 00000000..3ff04ebf --- /dev/null +++ b/confluent_server/confluent/networking/lldp.py @@ -0,0 +1,131 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2016 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This provides the implementation of locating MAC addresses on ethernet +# switches. It is, essentially, a port of 'MacMap.pm' to confluent. +# However, there are enhancements. +# For one, each switch interrogation is handled in an eventlet 'thread' +# For another, MAC addresses are checked in the dictionary on every +# switch return, rather than waiting for all switches to check in +# (which makes it more responsive when there is a missing or bad switch) +# Also, we track the quantity, actual ifName value, and provide a mechanism +# to detect ambiguous result (e.g. if two matches are found, can log an error +# rather than doing the wrong one, complete with the detected ifName value). +# Further, the map shall be available to all facets of the codebase, not just +# the discovery process, so that the cached data maintenance will pay off +# for direct queries + +# Provides support for viewing and processing lldp data for switches + +import confluent.exceptions as exc +import confluent.log as log +import confluent.snmputil as snmp +from eventlet.greenpool import GreenPool +import re + +# The interesting OIDs are: +# 1.0.8802.1.1.2.1.3.7.1.4 - Lookup of LLDP index id to description +# Yet another fun fact, the LLDP port index frequent +# does *not* map to ifName, like a sane +# implementation would do. Assume ifName equality +# but provide a way for 1.3.6.1.2.1.1 indicated +# ids to provide custom functions +# (1.0.8802.1.1.2.1.3.7.1.2 - theoretically this process is only very useful +# if this is '5' meaning 'same as ifName per +# 802.1AB-2005, however at *least* 7 has +# been observed to produce same results +# For now we'll optimistically assume +# equality to ifName +# 1.0.8802.1.1.2.1.4.1.1 - The information about the remote systems attached +# indexed by time index, local port, and an +# incrementing value +# 1.0.8802.1.1.2.1.4.1.1.5 - chassis id - in theory might have been useful, in +# practice limited as the potential to correlate +# to other contexts is limited. As a result, +# our strategy will be to ignore this and focus +# instead on bridge-mib/qbridge-mib indicate data +# a potential exception would be pulling in things +# that are fundamentally network equipment, +# where significant ambiguity may exist. +# While in a 'host' scenario, there is ambiguity +# it is more controlled (virtual machines are given +# special treatment, and strategies exist for +# disambiguating shared management/data port, and +# other functions do not interact with our discovery +# framework +# # 1.0.8802.1.1.2.1.4.1.1.9 - SysName - could be handy hint in some scenarios +# # 1.0.8802.1.1.2.1.4.1.1.10 - SysDesc - good stuff + + +def lenovoname(idx, desc): + if desc.isdigit(): + return 'Ethernet' + str(idx) + return desc + +nameoverrides = [ + (re.compile('20301\..*'), lenovoname), +] + + +def _lldpdesc_to_ifname(switchid, idx, desc): + for tform in nameoverrides: + if tform[0].match(switchid): + desc = tform[1](idx, desc) + return desc + + +def _extract_neighbor_data_b(args): + """Build LLDP data about elements connected to switch + + args are carried as a tuple, because of eventlet convenience + """ + switch, password, user = args + conn = snmp.Session(switch, password, user) + sid = None + lldpdata = {} + for sysid in conn.walk('1.3.6.1.2.1.1.2'): + sid = str(sysid[1][6:]) + idxtoifname = {} + for oidindex in conn.walk('1.0.8802.1.1.2.1.3.7.1.4'): + idx = oidindex[0][-1] + idxtoifname[idx] = _lldpdesc_to_ifname(sid, idx, str(oidindex[1])) + for remotedesc in conn.walk('1.0.8802.1.1.2.1.4.1.1.10'): + iname = idxtoifname[remotedesc[0][-2]] + lldpdata[iname] = {'description': str(remotedesc[1])} + for remotename in conn.walk('1.0.8802.1.1.2.1.4.1.1.9'): + iname = idxtoifname[remotename[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['name'] = str(remotename[1]) + for remoteid in conn.walk('1.0.8802.1.1.2.1.4.1.1.5'): + iname = idxtoifname[remoteid[0][-2]] + if iname not in lldpdata: + lldpdata[iname] = {} + lldpdata[iname]['chassisid'] = str(remoteid[1]) + print(repr(lldpdata)) + + +def _extract_neighbor_data(args): + try: + _extract_neighbor_data_b(args) + except Exception: + log.logtrace() + +if __name__ == '__main__': + # a quick one-shot test, args are switch and snmpv1 string for now + # (should do three argument form for snmpv3 test + import sys + _extract_neighbor_data((sys.argv[1], sys.argv[2])) diff --git a/confluent_server/dbgtools/confluentdbgcli.py b/confluent_server/dbgtools/confluentdbgcli.py new file mode 100644 index 00000000..04ba8783 --- /dev/null +++ b/confluent_server/dbgtools/confluentdbgcli.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Note that this script has a high chance of breaking confluent, so +# do not be surprised if confluent crashes as you exit... + +import select +import socket +import readline +import sys +import threading + +readline.parse_and_bind('tab: complete') +conn = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +conn.connect('/var/run/confluent/dbg.sock') + +pendingoutput = None + +class GetInput(threading.Thread): + def run(self): + global pendingoutput + while True: + try: + pendingoutput = raw_input('') + except EOFError: + pendingoutput = False + break + + +inputthread = GetInput() +inputthread.start() +while True: + r, _, _ = select.select((conn,), (), (), 0.1) + if conn in r: + sys.stdout.write(conn.recv(1)) + if pendingoutput is not None: + if pendingoutput is False: + conn.shutdown(socket.SHUT_WR) + sys.exit(1) + else: + conn.sendall(pendingoutput + '\n') + pendingoutput = None + sys.stdout.flush()