From 814f4208529720989842aba900304941c9f09aa9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 31 Oct 2023 11:47:28 -0400 Subject: [PATCH 001/122] Update genesis to deal with CS9 sshd --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index b7035fe0..ebf0a380 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -174,6 +174,8 @@ dnsdomain=${dnsdomain#dnsdomain: } echo search $dnsdomain >> /etc/resolv.conf echo -n "Initializing ssh..." ssh-keygen -A +mkdir -p /usr/share/empty.sshd +rm /etc/ssh/ssh_host_dsa_key* for pubkey in /etc/ssh/ssh_host*key.pub; do certfile=${pubkey/.pub/-cert.pub} privfile=${pubkey%.pub} From 8a4ef0b1fe237fae9c579194d553f3fdebddfcf6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 1 Nov 2023 10:42:33 -0400 Subject: [PATCH 002/122] Make link type detection more specific If the ip command shows altnames, do not let the altnames interfere with locking on to linktype. Further, use show dev instead of grep to be more specific. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index b2881e0b..65abf8f6 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -189,7 +189,7 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection -linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +linktype=$(ip link show dev ${ifname}|grep link/|awk '{print $1}') if [ "$linktype" = link/infiniband ]; then linktype="infiniband" else diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 4fca92cf..a9eba388 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -154,7 +154,7 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection -linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +linktype=$(ip link show dev ${ifname}|grep link/|awk '{print $1}') if [ "$linktype" = link/infiniband ]; then linktype="infiniband" else From 8f927d94e9b9f29d62c3228e43cd2533d8ae467e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 1 Nov 2023 11:17:04 -0400 Subject: [PATCH 003/122] Switch to bond from team Teaming is deprecated and EL went back to bond, follow that guidance. --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 7e641205..4e9fe9b3 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -344,7 +344,7 @@ class NetworkManager(object): bondcfg[stg] = deats[stg] if member in self.uuidbyname: subprocess.check_call(['nmcli', 'c', 'del', self.uuidbyname[member]]) - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'team-slave', 'master', team, 'con-name', member, 'connection.interface-name', member]) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond-slave', 'master', team, 'con-name', member, 'connection.interface-name', member]) if bondcfg: args = [] for parm in bondcfg: @@ -378,7 +378,7 @@ class NetworkManager(object): for arg in cmdargs: cargs.append(arg) cargs.append(cmdargs[arg]) - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'team', 'con-name', cname, 'connection.interface-name', cname, 'team.runner', stgs['team_mode']] + cargs) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) subprocess.check_call(['nmcli', 'c', 'u', cname]) From e90f2829abade00ee0cd84bb780d7fac912ed383 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Nov 2023 09:37:44 -0500 Subject: [PATCH 004/122] Filter bind mounts from imgutil capture If bind mounts are in use, it will foul the capture. Notably, one example is if you install the firefox snap in ubuntu, snapd creates a bind mount. This will ignore bind mounts, and rely upon the system to put it straight. --- imgutil/imgutil | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index de3a9025..959c4a17 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -61,13 +61,27 @@ FALLOC_FL_PUNCH_HOLE = 2 numregex = re.compile('([0-9]+)') def get_partition_info(): + with open('/proc/self/mountinfo') as procinfo: + mountinfo = procinfo.read() + capmounts = set([]) + for entry in mountinfo.split('\n'): + if not entry: + continue + firstinf, lastinf = entry.split(' - ') + root, mount = firstinf.split()[3:5] + filesystem = lastinf.split()[0] + if root != '/': + continue + if filesystem not in ('ext3', 'ext4', 'xfs', 'btrfs', 'vfat'): + continue + capmounts.add(mount) with open('/proc/mounts') as procmounts: mountinfo = procmounts.read() for entry in mountinfo.split('\n'): if not entry: continue dev, mount, fs, flags = entry.split()[:4] - if fs not in ('ext3', 'ext4', 'xfs', 'btrfs', 'vfat'): + if mount not in capmounts: continue fsinfo = os.statvfs(mount) partinfo = { From 2cd75ef4252f2ceb5b70ef38e9be19f60f3602db Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Nov 2023 10:22:51 -0500 Subject: [PATCH 005/122] Fix diskless autocons message --- .../ubuntu20.04-diskless/initramfs/conf/conf.d/confluent | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent index 64a3713d..79787074 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent +++ b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent @@ -1,4 +1,5 @@ if ! grep console= /proc/cmdline > /dev/null; then + mkdir -p /custom-installation /opt/confluent/bin/autocons > /custom-installation/autocons.info cons=$(cat /custom-installation/autocons.info) if [ ! -z "$cons" ]; then From e03f010eac0f1afaf372453ed8bb1d2caaefb6a0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 09:03:59 -0500 Subject: [PATCH 006/122] Fix Ubuntu confignet without IPv6 If confignet did not have ipv6 to work with, it would fail to work at all. Also handle when the configuration has a blank DNS server in it. --- confluent_osdeploy/common/profile/scripts/confignet | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 4e9fe9b3..eaaf2621 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -151,13 +151,14 @@ class NetplanManager(object): needcfgapply = False for devname in devnames: needcfgwrite = False - if stgs['ipv6_method'] == 'static': - curraddr = stgs['ipv6_address'] + # ipv6_method missing at uconn... + if stgs.get('ipv6_method', None) == 'static': + curraddr = stgs'ipv6_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: needcfgwrite = True currips.append(curraddr) - if stgs['ipv4_method'] == 'static': + if stgs.get('ipv4_method', None) == 'static': curraddr = stgs['ipv4_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: @@ -180,7 +181,7 @@ class NetplanManager(object): if dnsips: currdnsips = self.getcfgarrpath([devname, 'nameservers', 'addresses']) for dnsip in dnsips: - if dnsip not in currdnsips: + if dnsip and dnsip not in currdnsips: needcfgwrite = True currdnsips.append(dnsip) if dnsdomain: From 6e092934e7ec4222a175e5bc473cb3a7ae8d4db8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 17:15:17 -0500 Subject: [PATCH 007/122] Fix for ubuntu clone to nvme --- .../ubuntu20.04-diskless/profiles/default/scripts/image2disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py index 5d15e3d4..1d19ebad 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py @@ -206,6 +206,8 @@ def fixup(rootdir, vols): partnum = re.search('(\d+)$', targdev).group(1) targblock = re.search('(.*)\d+$', targdev).group(1) if targblock: + if targblock.endswith('p') and 'nvme' in targblock: + targblock = targblock[:-1] shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) From ec023831a5d322afb86ca73938a7a79c1fa54ddb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 17:28:38 -0500 Subject: [PATCH 008/122] Fix syntax error in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index eaaf2621..cb2569ce 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -153,7 +153,7 @@ class NetplanManager(object): needcfgwrite = False # ipv6_method missing at uconn... if stgs.get('ipv6_method', None) == 'static': - curraddr = stgs'ipv6_address'] + curraddr = stgs['ipv6_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: needcfgwrite = True From f475d589559627f0222022b003dd03496028ea88 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 13 Nov 2023 15:43:11 -0500 Subject: [PATCH 009/122] Various permission fixes for osdeploy initialize Fix a few scenarios where certain ordering of initialize creates unworkable permissions. --- confluent_server/bin/osdeploy | 28 ++++++++++++++++--- confluent_server/confluent/certutil.py | 38 ++++++++++++++------------ confluent_server/confluent/sshutil.py | 13 +++++---- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/confluent_server/bin/osdeploy b/confluent_server/bin/osdeploy index ed39e78c..ef6859e3 100644 --- a/confluent_server/bin/osdeploy +++ b/confluent_server/bin/osdeploy @@ -373,9 +373,14 @@ def initialize(cmdset): for rsp in c.read('/uuid'): uuid = rsp.get('uuid', {}).get('value', None) if uuid: - with open('confluent_uuid', 'w') as uuidout: - uuidout.write(uuid) - uuidout.write('\n') + oum = os.umask(0o11) + try: + with open('confluent_uuid', 'w') as uuidout: + uuidout.write(uuid) + uuidout.write('\n') + os.chmod('confluent_uuid', 0o644) + finally: + os.umask(oum) totar.append('confluent_uuid') topack.append('confluent_uuid') if os.path.exists('ssh'): @@ -403,7 +408,17 @@ def initialize(cmdset): if res: sys.stderr.write('Error occurred while packing site initramfs') sys.exit(1) - os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') + oum = os.umask(0o22) + try: + os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') + os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + finally: + os.umask(oum) + oum = os.umask(0o22) + try: + os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + finally: + os.umask(oum) if cmdset.g: updateboot('genesis-x86_64') if totar: @@ -411,6 +426,11 @@ def initialize(cmdset): tarcmd = ['tar', '-czf', tmptarname] + totar subprocess.check_call(tarcmd) os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz') + oum = os.umask(0o22) + try: + os.chown('/var/lib/confluent/public/site/initramfs.tgz', 0o644) + finally: + os.umask(0o22) os.chdir(opath) print('Site initramfs content packed successfully') diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index dffaf85e..2e788bad 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -95,27 +95,29 @@ def assure_tls_ca(): os.makedirs(os.path.dirname(fname)) except OSError as e: if e.errno != 17: + os.seteuid(ouid) raise + try: + shutil.copy2('/etc/confluent/tls/cacert.pem', fname) + hv, _ = util.run( + ['openssl', 'x509', '-in', '/etc/confluent/tls/cacert.pem', '-hash', '-noout']) + if not isinstance(hv, str): + hv = hv.decode('utf8') + hv = hv.strip() + hashname = '/var/lib/confluent/public/site/tls/{0}.0'.format(hv) + certname = '{0}.pem'.format(collective.get_myname()) + for currname in os.listdir('/var/lib/confluent/public/site/tls/'): + currname = os.path.join('/var/lib/confluent/public/site/tls/', currname) + if currname.endswith('.0'): + try: + realname = os.readlink(currname) + if realname == certname: + os.unlink(currname) + except OSError: + pass + os.symlink(certname, hashname) finally: os.seteuid(ouid) - shutil.copy2('/etc/confluent/tls/cacert.pem', fname) - hv, _ = util.run( - ['openssl', 'x509', '-in', '/etc/confluent/tls/cacert.pem', '-hash', '-noout']) - if not isinstance(hv, str): - hv = hv.decode('utf8') - hv = hv.strip() - hashname = '/var/lib/confluent/public/site/tls/{0}.0'.format(hv) - certname = '{0}.pem'.format(collective.get_myname()) - for currname in os.listdir('/var/lib/confluent/public/site/tls/'): - currname = os.path.join('/var/lib/confluent/public/site/tls/', currname) - if currname.endswith('.0'): - try: - realname = os.readlink(currname) - if realname == certname: - os.unlink(currname) - except OSError: - pass - os.symlink(certname, hashname) def substitute_cfg(setting, key, val, newval, cfgfile, line): if key.strip() == setting: diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index d097ade1..16e4db7e 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -98,14 +98,15 @@ def initialize_ca(): preexec_fn=normalize_uid) ouid = normalize_uid() try: - os.makedirs('/var/lib/confluent/public/site/ssh/', mode=0o755) - except OSError as e: - if e.errno != 17: - raise + try: + os.makedirs('/var/lib/confluent/public/site/ssh/', mode=0o755) + except OSError as e: + if e.errno != 17: + raise + cafilename = '/var/lib/confluent/public/site/ssh/{0}.ca'.format(myname) + shutil.copy('/etc/confluent/ssh/ca.pub', cafilename) finally: os.seteuid(ouid) - cafilename = '/var/lib/confluent/public/site/ssh/{0}.ca'.format(myname) - shutil.copy('/etc/confluent/ssh/ca.pub', cafilename) # newent = '@cert-authority * ' + capub.read() From cd07e0e212a8c526074a24ce0487e100e7dc1221 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2023 15:14:54 -0500 Subject: [PATCH 010/122] Add missing disclaimer from tmt license --- confluent_vtbufferd/NOTICE | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/confluent_vtbufferd/NOTICE b/confluent_vtbufferd/NOTICE index 95b86a82..da174e81 100644 --- a/confluent_vtbufferd/NOTICE +++ b/confluent_vtbufferd/NOTICE @@ -22,3 +22,16 @@ modification, are permitted provided that the following conditions are met: * Neither the name of the copyright holder nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS, +* COPYRIGHT HOLDERS, OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + From c9452e65e8f35916adab7cb7257ca02e537beda5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 15 Nov 2023 11:30:20 -0500 Subject: [PATCH 011/122] Fix some osdeploy ordering issues osdeploy initialization dependencies have been improved and marked if absolutely dependent. --- confluent_server/bin/osdeploy | 46 +++++++++++++++++---------- confluent_server/confluent/sshutil.py | 8 +++++ confluent_server/confluent/util.py | 4 +-- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/confluent_server/bin/osdeploy b/confluent_server/bin/osdeploy index ef6859e3..fff220be 100644 --- a/confluent_server/bin/osdeploy +++ b/confluent_server/bin/osdeploy @@ -72,6 +72,12 @@ def main(args): return rebase(cmdset.profile) ap.print_help() +def symlinkp(src, trg): + try: + os.symlink(src, trg) + except Exception as e: + if e.errno != 17: + raise def initialize_genesis(): if not os.path.exists('/opt/confluent/genesis/x86_64/boot/kernel'): @@ -89,30 +95,33 @@ def initialize_genesis(): return retval[1] retcode = 0 try: + util.mkdirp('/var/lib/confluent', 0o755) if hasconfluentuser: + os.chown('/var/lib/confluent', hasconfluentuser.pw_uid, -1) os.setgid(hasconfluentuser.pw_gid) os.setuid(hasconfluentuser.pw_uid) os.umask(0o22) - os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755) - os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755) - os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI', + util.mkdirp('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755) + util.mkdirp('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755) + symlinkp('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI', '/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/BOOTX64.EFI') - os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi', + symlinkp('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi', '/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/grubx64.efi') - os.symlink('/opt/confluent/genesis/x86_64/boot/initramfs/distribution', + symlinkp('/opt/confluent/genesis/x86_64/boot/initramfs/distribution', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/distribution') - os.symlink('/var/lib/confluent/public/site/initramfs.cpio', + symlinkp('/var/lib/confluent/public/site/initramfs.cpio', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/site.cpio') - os.symlink('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio', + symlinkp('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/addons.cpio') - os.symlink('/opt/confluent/genesis/x86_64/boot/kernel', + symlinkp('/opt/confluent/genesis/x86_64/boot/kernel', '/var/lib/confluent/public/os/genesis-x86_64/boot/kernel') - shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/', - '/var/lib/confluent/public/os/genesis-x86_64/ansible/') - shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/', - '/var/lib/confluent/public/os/genesis-x86_64/scripts/') - shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml', - '/var/lib/confluent/public/os/genesis-x86_64/profile.yaml') + if not os.path.exists('/var/lib/confluent/public/os/genesis-x86_64/ansible/'): + shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/', + '/var/lib/confluent/public/os/genesis-x86_64/ansible/') + shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/', + '/var/lib/confluent/public/os/genesis-x86_64/scripts/') + shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml', + '/var/lib/confluent/public/os/genesis-x86_64/profile.yaml') except Exception as e: sys.stderr.write(str(e) + '\n') retcode = 1 @@ -411,12 +420,12 @@ def initialize(cmdset): oum = os.umask(0o22) try: os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') - os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.cpio', 0o644) finally: os.umask(oum) oum = os.umask(0o22) try: - os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.cpio', 0o644) finally: os.umask(oum) if cmdset.g: @@ -428,7 +437,7 @@ def initialize(cmdset): os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz') oum = os.umask(0o22) try: - os.chown('/var/lib/confluent/public/site/initramfs.tgz', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.tgz', 0o644) finally: os.umask(0o22) os.chdir(opath) @@ -441,6 +450,9 @@ def initialize(cmdset): def updateboot(profilename): + if not os.path.exists('/var/lib/confluent/public/site/initramfs.cpio'): + emprint('Must generate site content first (TLS (-t) and/or SSH (-s))') + return 1 c = client.Command() for rsp in c.update('/deployment/profiles/{0}'.format(profilename), {'updateboot': 1}): diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index 16e4db7e..cf17f37a 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -186,6 +186,14 @@ def initialize_root_key(generate, automation=False): if os.path.exists('/etc/confluent/ssh/automation'): alreadyexist = True else: + ouid = normalize_uid() + try: + os.makedirs('/etc/confluent/ssh', mode=0o700) + except OSError as e: + if e.errno != 17: + raise + finally: + os.seteuid(ouid) subprocess.check_call( ['ssh-keygen', '-t', 'ed25519', '-f','/etc/confluent/ssh/automation', '-N', get_passphrase(), diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 8cf9bbc9..96d2291b 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -29,9 +29,9 @@ import struct import eventlet.green.subprocess as subprocess -def mkdirp(path): +def mkdirp(path, mode=0o777): try: - os.makedirs(path) + os.makedirs(path, mode) except OSError as e: if e.errno != 17: raise From 9757cd1ae32e343f6eddcd47067f2440a1c070d4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 16 Nov 2023 10:17:55 -0500 Subject: [PATCH 012/122] Check the profile *before* rebooting systems This provides a much better experience when a typo or other mistake has a profile that is not actionable. --- confluent_client/bin/nodedeploy | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/confluent_client/bin/nodedeploy b/confluent_client/bin/nodedeploy index 2417f2c5..52e3a7d9 100755 --- a/confluent_client/bin/nodedeploy +++ b/confluent_client/bin/nodedeploy @@ -90,17 +90,6 @@ def main(args): if 'error' in rsp: sys.stderr.write(rsp['error'] + '\n') sys.exit(1) - if not args.clear and args.network and not args.prepareonly: - rc = c.simple_noderange_command(args.noderange, '/boot/nextdevice', 'network', - bootmode='uefi', - persistent=False, - errnodes=errnodes) - if errnodes: - sys.stderr.write( - 'Unable to set boot device for following nodes: {0}\n'.format( - ','.join(errnodes))) - return 1 - rc |= c.simple_noderange_command(args.noderange, '/power/state', 'boot') if args.clear: cleararm(args.noderange, c) clearpending(args.noderange, c) @@ -120,7 +109,7 @@ def main(args): for profname in profnames: sys.stderr.write(' ' + profname + '\n') else: - sys.stderr.write('No deployment profiles available, try osdeploy fiimport or imgutil capture\n') + sys.stderr.write('No deployment profiles available, try osdeploy import or imgutil capture\n') sys.exit(1) armonce(args.noderange, c) setpending(args.noderange, args.profile, c) @@ -166,6 +155,17 @@ def main(args): else: print('{0}: {1}{2}'.format(node, profile, armed)) sys.exit(0) + if not args.clear and args.network and not args.prepareonly: + rc = c.simple_noderange_command(args.noderange, '/boot/nextdevice', 'network', + bootmode='uefi', + persistent=False, + errnodes=errnodes) + if errnodes: + sys.stderr.write( + 'Unable to set boot device for following nodes: {0}\n'.format( + ','.join(errnodes))) + return 1 + rc |= c.simple_noderange_command(args.noderange, '/power/state', 'boot') if args.network and not args.prepareonly: return rc return 0 From 68ce3d039d7b26125222d2bd51e5d911b0ea14fe Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Nov 2023 08:34:34 -0500 Subject: [PATCH 013/122] Filter out nvme 'c' devnames, that are used to refer to paths to nvme Some versions start manifesting nvme devnames with 'c', which are to be used to interact with multipath to have raw devices backing a traditional nvme device. --- .../el7-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk | 2 ++ .../el8-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk | 2 ++ .../el9-diskless/profiles/default/scripts/getinstalldisk | 2 ++ .../rhvh4/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk | 2 ++ .../suse15/profiles/server/scripts/getinstalldisk | 2 ++ .../profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu20.04/profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu22.04/profiles/default/scripts/getinstalldisk | 2 ++ 11 files changed, 22 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None From 0b28d64c83f439412fcbf4c02b6aae84657b52c3 Mon Sep 17 00:00:00 2001 From: Christian Goll Date: Mon, 27 Nov 2023 15:00:27 +0100 Subject: [PATCH 014/122] python3-dbm is required for SUSE the python module `anydbm` is part of this python package Signed-off-by: Christian Goll --- confluent_server/confluent_server.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index c7e2aa3a..51046a8f 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -22,7 +22,7 @@ Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3- %if "%{dist}" == ".el9" Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else -Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute +Requires: python3-dbm,python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute %endif %endif %endif From 3730ba049f97b4d007b538b058055fd93e0aa8a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 13:11:25 -0500 Subject: [PATCH 015/122] Fix potential doubling up of IPv6 brackets There were scenarios where IPv6 URL brackets may double up. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 4 ++-- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 4 ++-- .../lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 65abf8f6..cdcc12fd 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -155,7 +155,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) @@ -324,7 +324,7 @@ fi echo '[proxy]' >> /run/NetworkManager/system-connections/$ifname.nmconnection chmod 600 /run/NetworkManager/system-connections/*.nmconnection confluent_websrv=$confluent_mgr -if [[ $confluent_websrv == *:* ]]; then +if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then confluent_websrv="[$confluent_websrv]" fi echo -n "Initializing ssh..." diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index a9eba388..a4f10ee2 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -120,7 +120,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) @@ -281,7 +281,7 @@ fi echo '[proxy]' >> /run/NetworkManager/system-connections/$ifname.nmconnection chmod 600 /run/NetworkManager/system-connections/*.nmconnection confluent_websrv=$confluent_mgr -if [[ $confluent_websrv == *:* ]]; then +if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then confluent_websrv="[$confluent_websrv]" fi echo -n "Initializing ssh..." diff --git a/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 146c4797..5586978c 100644 --- a/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -116,7 +116,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) From 55e60d52fd0c693b831df0983938d85f4e0b6a33 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 13:33:21 -0500 Subject: [PATCH 016/122] Avoid potential multiple brackets in imageboot.sh --- .../el8-diskless/profiles/default/scripts/imageboot.sh | 2 +- .../el9-diskless/profiles/default/scripts/imageboot.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index ee2a8125..fe53bf38 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -1,6 +1,6 @@ . /lib/dracut-lib.sh confluent_whost=$confluent_mgr -if [[ "$confluent_whost" == *:* ]]; then +if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index ee2a8125..fe53bf38 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -1,6 +1,6 @@ . /lib/dracut-lib.sh confluent_whost=$confluent_mgr -if [[ "$confluent_whost" == *:* ]]; then +if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay From 63b737dc52551b43e2f000d53b5b6d940cb3264d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 14:09:59 -0500 Subject: [PATCH 017/122] Correct bonding in confignet for NetworkManager --- confluent_osdeploy/common/profile/scripts/confignet | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index cb2569ce..f19e620c 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -379,6 +379,8 @@ class NetworkManager(object): for arg in cmdargs: cargs.append(arg) cargs.append(cmdargs[arg]) + if stgs['team_mode'] == 'lacp': + stgs['team_mode'] = '802.3ad' subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) From 6763c863879368371b6c134f549e52c6dafff998 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 14:35:01 -0500 Subject: [PATCH 018/122] Add DNS to NetworkManager Similar to netplan, apply DNS to every interface. --- .../common/profile/scripts/confignet | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f19e620c..44eb32ed 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -295,7 +295,8 @@ class WickedManager(object): class NetworkManager(object): - def __init__(self, devtypes): + def __init__(self, devtypes, deploycfg): + self.deploycfg = deploycfg self.connections = {} self.uuidbyname = {} self.uuidbydev = {} @@ -367,6 +368,20 @@ class NetworkManager(object): cmdargs['ipv4.gateway'] = stgs['ipv4_gateway'] if stgs.get('ipv6_gateway', None): cmdargs['ipv6.gateway'] = stgs['ipv6_gateway'] + dnsips = self.deploycfg.get('nameservers', []) + if not dnsips: + dnsips = [] + dns4 = [] + dns6 = [] + for dnsip in dnsips: + if '.' in dnsip: + dns4.append(dnsip) + elif ':' in dnsip: + dns6.append(dnsip) + if dns4: + cmdargs['ipv4.dns'] = dns4.join(',') + if dns6: + cmdargs['ipv6.dns'] = dns6.join(',') if len(cfg['interfaces']) > 1: # team time.. should be.. if not cfg['settings'].get('team_mode', None): sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) @@ -487,7 +502,7 @@ if __name__ == '__main__': if os.path.exists('/usr/sbin/netplan'): nm = NetplanManager(dc) if os.path.exists('/usr/bin/nmcli'): - nm = NetworkManager(devtypes) + nm = NetworkManager(devtypes, dc) elif os.path.exists('/usr/sbin/wicked'): nm = WickedManager() for netn in netname_to_interfaces: From 3aa91b61e5befc32e0ebe851b986c84d34634053 Mon Sep 17 00:00:00 2001 From: Christian Goll Date: Fri, 1 Dec 2023 10:57:31 +0100 Subject: [PATCH 019/122] disable online repos for openSUSE leap online repositories may not be accesible for the cluster nodes but were added from the content.xml. Editing this files with initprofile.sh is impossible as they are executed in parallel, so all repos starting with https?://download.opensuse.org are removed during post Signed-off-by: Christian Goll --- confluent_osdeploy/suse15/profiles/hpc/autoyast.leap | 6 ++++++ confluent_osdeploy/suse15/profiles/hpc/initprofile.sh | 3 +++ .../profiles/hpc/scripts/post.d/10-remove-online-repos.sh | 3 +++ confluent_osdeploy/suse15/profiles/server/autoyast.leap | 6 ++++++ confluent_osdeploy/suse15/profiles/server/initprofile.sh | 3 +++ .../server/scripts/post.d/10-remove-online-repos.sh | 3 +++ 6 files changed, 24 insertions(+) create mode 100644 confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh create mode 100644 confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh diff --git a/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap b/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap index 7f9d08f7..e92ec9fd 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap +++ b/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap @@ -10,6 +10,12 @@ dynamic behavior and replace with static configuration. UTC %%TIMEZONE%% + + false + + + false + false diff --git a/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh b/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh index 9c6c295e..62a2663e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh +++ b/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh @@ -1,4 +1,7 @@ #!/bin/sh +# WARNING +# be careful when editing files here as this script is called +# in parallel to other copy operations, so changes to files can be lost discnum=$(basename $1) if [ "$discnum" != 1 ]; then exit 0; fi if [ -e $2/boot/kernel ]; then exit 0; fi diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh b/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh new file mode 100644 index 00000000..9ae8224e --- /dev/null +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh @@ -0,0 +1,3 @@ +#!/usr/bin/bash +# remove online repos +grep -lE "baseurl=https?://download.opensuse.org" /etc/zypp/repos.d/*repo | xargs rm -- diff --git a/confluent_osdeploy/suse15/profiles/server/autoyast.leap b/confluent_osdeploy/suse15/profiles/server/autoyast.leap index 7f9d08f7..e92ec9fd 100644 --- a/confluent_osdeploy/suse15/profiles/server/autoyast.leap +++ b/confluent_osdeploy/suse15/profiles/server/autoyast.leap @@ -10,6 +10,12 @@ dynamic behavior and replace with static configuration. UTC %%TIMEZONE%% + + false + + + false + false diff --git a/confluent_osdeploy/suse15/profiles/server/initprofile.sh b/confluent_osdeploy/suse15/profiles/server/initprofile.sh index 9c6c295e..62a2663e 100644 --- a/confluent_osdeploy/suse15/profiles/server/initprofile.sh +++ b/confluent_osdeploy/suse15/profiles/server/initprofile.sh @@ -1,4 +1,7 @@ #!/bin/sh +# WARNING +# be careful when editing files here as this script is called +# in parallel to other copy operations, so changes to files can be lost discnum=$(basename $1) if [ "$discnum" != 1 ]; then exit 0; fi if [ -e $2/boot/kernel ]; then exit 0; fi diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh b/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh new file mode 100644 index 00000000..9ae8224e --- /dev/null +++ b/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh @@ -0,0 +1,3 @@ +#!/usr/bin/bash +# remove online repos +grep -lE "baseurl=https?://download.opensuse.org" /etc/zypp/repos.d/*repo | xargs rm -- From 7b89054b35e63728755c51035ef86338ded391a1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Dec 2023 15:55:17 -0500 Subject: [PATCH 020/122] Fix a few noderange abbreviations Also, add some test cases on abbreviation to help sanity check things in the future. --- confluent_server/confluent/noderange.py | 46 ++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index df4552b8..cf99dd72 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -96,6 +96,7 @@ class Bracketer(object): txtnums = getnumbers_nodename(nodename) nums = [int(x) for x in txtnums] for n in range(self.count): + # First pass to see if we have exactly one different number padto = len(txtnums[n]) needpad = (padto != len('{}'.format(nums[n]))) if self.sequences[n] is None: @@ -105,7 +106,24 @@ class Bracketer(object): elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: continue # new nodename has no new number, keep going else: # if self.sequences[n][2] != nums[n] or : - if self.diffn is not None and (n != self.diffn or + if self.diffn is not None and (n != self.diffn or + (padto < self.numlens[n][1]) or + (needpad and padto != self.numlens[n][1])): + self.flush_current() + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [padto, padto] + self.diffn = n + for n in range(self.count): + padto = len(txtnums[n]) + needpad = (padto != len('{}'.format(nums[n]))) + if self.sequences[n] is None: + # We initialize to text pieces, 'currstart', and 'prev' number + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] + elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: + continue # new nodename has no new number, keep going + else: # if self.sequences[n][2] != nums[n] or : + if self.diffn is not None and (n != self.diffn or (padto < self.numlens[n][1]) or (needpad and padto != self.numlens[n][1])): self.flush_current() @@ -449,3 +467,29 @@ class NodeRange(object): if self.cfm is None: return set([element]) raise Exception(element + ' not a recognized node, group, or alias') + +if __name__ == '__main__': + cases = [ + (['r3u4', 'r5u6'], 'r3u4,r5u6'), # should not erroneously gather + (['r3u4s1', 'r5u6s3'], 'r3u4s1,r5u6s3'), # should not erroneously gather + (['r3u4s1', 'r3u4s2', 'r5u4s3'], 'r3u4s[1:2],r5u4s3'), # should not erroneously gather + (['r3u4', 'r3u5', 'r3u6', 'r3u9', 'r4u1'], 'r3u[4:6,9],r4u1'), + (['n01', 'n2', 'n03'], 'n01,n2,n03'), + (['n7', 'n8', 'n09', 'n10', 'n11', 'n12', 'n13', 'n14', 'n15', 'n16', + 'n17', 'n18', 'n19', 'n20'], 'n[7:8],n[09:20]') + ] + for case in cases: + gc = case[0] + bracketer = Bracketer(gc[0]) + for chnk in gc[1:]: + bracketer.extend(chnk) + br = bracketer.range + resnodes = NodeRange(br).nodes + if set(resnodes) != set(gc): + print('FAILED: ' + repr(sorted(gc))) + print('RESULT: ' + repr(sorted(resnodes))) + print('EXPECTED: ' + repr(case[1])) + print('ACTUAL: ' + br) + + + From 7aef012a42f6859df8e8bbece7f208dc568935d8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 5 Dec 2023 14:39:36 -0500 Subject: [PATCH 021/122] Correct string join syntax in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 44eb32ed..8cda6c83 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -379,9 +379,9 @@ class NetworkManager(object): elif ':' in dnsip: dns6.append(dnsip) if dns4: - cmdargs['ipv4.dns'] = dns4.join(',') + cmdargs['ipv4.dns'] = ','.join(dns4) if dns6: - cmdargs['ipv6.dns'] = dns6.join(',') + cmdargs['ipv6.dns'] = ','.join(dns6) if len(cfg['interfaces']) > 1: # team time.. should be.. if not cfg['settings'].get('team_mode', None): sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) From 93269a05ebb66c4b97ab484402ce55808fda2101 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Dec 2023 17:06:09 -0500 Subject: [PATCH 022/122] Fix cloning with ipv6 and EL9 --- .../profiles/default/scripts/firstboot.sh | 11 +++++++++-- .../el9-diskless/profiles/default/scripts/post.sh | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh index 2bab4136..ed11d9e7 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh @@ -9,9 +9,16 @@ HOME=$(getent passwd $(whoami)|cut -d: -f 6) export HOME nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -export nodename confluent_mgr confluent_profile +export nodename confluent_mgr confluent_profile confluent_websrv . /etc/confluent/functions ( exec >> /var/log/confluent/confluent-firstboot.log diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh index 3a52d128..3b20a946 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh @@ -5,9 +5,16 @@ nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -export nodename confluent_mgr confluent_profile +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi +export nodename confluent_mgr confluent_profile confluent_websrv . /etc/confluent/functions mkdir -p /var/log/confluent chmod 700 /var/log/confluent From 85629dea64d202c02a5f99191b4535b7743a1e03 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Dec 2023 14:44:42 -0500 Subject: [PATCH 023/122] Prevent unitiailized collective info When doing proxyconsole, don't land in a useless retach loop when managerinfo is None. --- confluent_server/confluent/consoleserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 37274792..fb607a27 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -622,6 +622,8 @@ def connect_node(node, configmanager, username=None, direct=True, width=80, myname = collective.get_myname() if myc and myc != collective.get_myname() and direct: minfo = configmodule.get_collective_member(myc) + if not minfo: + raise Exception('Unable to get collective member for {}'.format(node)) return ProxyConsole(node, minfo, myname, configmanager, username, width, height) consk = (node, configmanager.tenant) From b0e23121a84c9b8d980e5bb0cd4f3edc19f2b325 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Jan 2024 15:03:49 -0500 Subject: [PATCH 024/122] Add stub resize handler For uninitialized console handlers, provide a stub to do nothing on resize. This avoids such a request crashing a shared websocket session. --- confluent_server/confluent/consoleserver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index fb607a27..ebfd8c97 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -175,6 +175,9 @@ class ConsoleHandler(object): self.connectstate = 'connecting' eventlet.spawn(self._connect) + def resize(self, width, height): + return None + def _get_retry_time(self): clustsize = len(self.cfgmgr._cfgstore['nodes']) self._retrytime = self._retrytime * 2 + 1 From 39c00323b391d1ef0e83aad60fd5f543b68702bd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Jan 2024 15:58:24 -0500 Subject: [PATCH 025/122] Fix error where layout would bail if a partial error were encountered --- confluent_server/confluent/plugins/info/layout.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 8397af7f..76b07ac7 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -93,6 +93,9 @@ def retrieve(nodes, element, configmanager, inputdata): '/noderange/{0}/description'.format(needheight), 'retrieve', configmanager, inputdata=None): + if not hasattr(rsp, 'kvpairs'): + results['errors'].append((rsp.node, rsp.error)) + continue kvp = rsp.kvpairs for node in kvp: allnodedata[node]['height'] = kvp[node]['height'] From 4d639081645339ca9dcee1c5922eeb489172e9c1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Jan 2024 11:17:02 -0500 Subject: [PATCH 026/122] Have a fallback height of 1 for any missing height --- confluent_server/confluent/plugins/info/layout.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 76b07ac7..ca7f120c 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -99,5 +99,8 @@ def retrieve(nodes, element, configmanager, inputdata): kvp = rsp.kvpairs for node in kvp: allnodedata[node]['height'] = kvp[node]['height'] + for node in allnodedata: + if 'height' not in allnodedata[node]: + allnodedata[node]['height'] = 1 yield msg.Generic(results) From 70f91d59b293b97b51d76025e0b939adf0d7186f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Jan 2024 13:32:54 -0500 Subject: [PATCH 027/122] Update license material in gathering genesis --- genesis/buildgenesis.sh | 4 ++++ genesis/getlicenses.py | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/genesis/buildgenesis.sh b/genesis/buildgenesis.sh index 81c46790..680702ef 100644 --- a/genesis/buildgenesis.sh +++ b/genesis/buildgenesis.sh @@ -44,6 +44,10 @@ for lic in $(cat /tmp/tmpliclist); do cp $lic licenses/$dlo/$fname lo=$dlo/$fname echo %license /opt/confluent/genesis/%{arch}/licenses/$lo >> confluent-genesis-out.spec + if [ "$fname" == README ] && [ "$dlo" == "zlib" ]; then + cp $lic licenses/nss/$fname + echo %license /opt/confluent/genesis/%{arch}/licenses/nss/$fname >> confluent-genesis-out.spec + fi done mkdir -p licenses/ipmitool cp /usr/share/doc/ipmitool/COPYING licenses/ipmitool diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 5a9b2ac2..92899850 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -29,12 +29,17 @@ with open(sys.argv[1]) as rpmlist: rpmlist = rpmlist.read().split('\n') licenses = set([]) licensesbyrpm = {} +lfirmlicenses = [ + 'WHENCE', + 'chelsio_firmware', + 'hfi1_firmware', + 'ice_enhaced', + 'rtlwifi_firmware.txt', +] for rpm in rpmlist: if not rpm: continue srpm = rpmtosrpm[rpm] - if srpm.startswith('linux-firmware'): - continue for relrpm in srpmtorpm[srpm]: if relrpm.startswith('libss-'): continue @@ -44,6 +49,12 @@ for rpm in rpmlist: continue if lic == '(contains no files)': continue + if srpm.startswith('linux-firmware'): + for desired in lfirmlicenses: + if desired in lic: + break + else: + continue licensesbyrpm[rpm] = lic licenses.add(lic) for lic in sorted(licenses): @@ -63,6 +74,8 @@ manualrpms = [ ] manuallicenses = [ '/usr/share/licenses/lz4/LICENSE.BSD', + '/usr/share/licenses/nss/LICENSE.APACHE', # http://www.apache.org/licenses/LICENSE-2.0 + '/usr/share/licenses/openssh/COPYING.blowfish, # from header of blowfish file in bsd-compat # cp /usr/share/doc/lz4-libs/LICENSE /usr/share/licenses/lz4/LICENSE.BSD #'lz4-1.8.3]# cp LICENSE /usr/share/licenses/lz4/LICENSE' # net-snmp has a bundled openssl, but the build does not avail itself of that copy From cfccb046bc4a39157993701774979f9ea953c660 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Jan 2024 13:36:55 -0500 Subject: [PATCH 028/122] Correct syntax error in draft attempt --- genesis/getlicenses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 92899850..037699bc 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -75,7 +75,7 @@ manualrpms = [ manuallicenses = [ '/usr/share/licenses/lz4/LICENSE.BSD', '/usr/share/licenses/nss/LICENSE.APACHE', # http://www.apache.org/licenses/LICENSE-2.0 - '/usr/share/licenses/openssh/COPYING.blowfish, # from header of blowfish file in bsd-compat + '/usr/share/licenses/openssh/COPYING.blowfish', # from header of blowfish file in bsd-compat # cp /usr/share/doc/lz4-libs/LICENSE /usr/share/licenses/lz4/LICENSE.BSD #'lz4-1.8.3]# cp LICENSE /usr/share/licenses/lz4/LICENSE' # net-snmp has a bundled openssl, but the build does not avail itself of that copy From 0dfe66f1b28ebf3b3b59a935dbe882a5449ce91d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Jan 2024 16:08:49 -0500 Subject: [PATCH 029/122] Fix overzealous reaping of '-lib' folders --- genesis/buildgenesis.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/buildgenesis.sh b/genesis/buildgenesis.sh index 680702ef..8e0de608 100644 --- a/genesis/buildgenesis.sh +++ b/genesis/buildgenesis.sh @@ -29,7 +29,7 @@ for lic in $(cat /tmp/tmpliclist); do fname=$(basename $lo) dlo=$(dirname $lo) if [[ "$dlo" == *"-lib"* ]]; then - dlo=${dlo/-*} + dlo=${dlo/-lib*} elif [[ "$dlo" == "device-mapper-"* ]]; then dlo=${dlo/-*}-mapper elif [[ "$dlo" == "bind-"* ]]; then From 382feea68d619572ab4650c0dd5bd8aff69904b4 Mon Sep 17 00:00:00 2001 From: henglikuang1 Date: Thu, 11 Jan 2024 11:44:25 +0800 Subject: [PATCH 030/122] Add default time zone as UTC --- confluent_server/confluent/selfservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index cd4180c7..3d7feebb 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -30,7 +30,7 @@ import eventlet webclient = eventlet.import_patched('pyghmi.util.webclient') -currtz = None +currtz = 'UTC' keymap = 'us' currlocale = 'en_US.UTF-8' currtzvintage = None From ea88ccb0add2b661dcd7daa5fe6393f0e5cdeb40 Mon Sep 17 00:00:00 2001 From: henglikuang1 Date: Thu, 11 Jan 2024 14:31:45 +0800 Subject: [PATCH 031/122] Fix efivars handling of unexpected unmount --- confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh | 1 + confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh index 7b970285..16a624c3 100755 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh @@ -56,6 +56,7 @@ cp /custom-installation/confluent/bin/apiclient /target/opt/confluent/bin mount -o bind /dev /target/dev mount -o bind /proc /target/proc mount -o bind /sys /target/sys +mount -o bind /sys/firmware/efi/efivars /target/sys/firmware/efi/efivars if [ 1 = $updategrub ]; then chroot /target update-grub fi diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh index 773bf8ad..d9730889 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh @@ -60,6 +60,7 @@ cp /custom-installation/confluent/bin/apiclient /target/opt/confluent/bin mount -o bind /dev /target/dev mount -o bind /proc /target/proc mount -o bind /sys /target/sys +mount -o bind /sys/firmware/efi/efivars /target/sys/firmware/efi/efivars if [ 1 = $updategrub ]; then chroot /target update-grub fi From 56b644ead9927b25f86d869aa6cf1b63233ebed6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 11 Jan 2024 16:30:45 -0500 Subject: [PATCH 032/122] The 3rd party monotonic is no longer needed --- confluent_server/confluent_server.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index 51046a8f..bf81c969 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -17,7 +17,7 @@ Requires: confluent_vtbufferd Requires: python-pyghmi >= 1.0.34, python-eventlet, python-greenlet, python-pycryptodomex >= 3.4.7, confluent_client == %{version}, python-pyparsing, python-paramiko, python-dnspython, python-netifaces, python2-pyasn1 >= 0.2.3, python-pysnmp >= 4.3.4, python-lxml, python-eficompressor, python-setuptools, python-dateutil, python-websocket-client python2-msgpack python-libarchive-c python-yaml python-monotonic %else %if "%{dist}" == ".el8" -Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-enum34, python3-asn1crypto, python3-cffi, python3-pyOpenSSL, python3-monotonic, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute +Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-enum34, python3-asn1crypto, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else %if "%{dist}" == ".el9" Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute From 07f91d792a5fc354750691094718892de0249eb9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Jan 2024 10:52:49 -0500 Subject: [PATCH 033/122] Fix omission of info dir in plugins --- confluent_server/setup.py.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/setup.py.tmpl b/confluent_server/setup.py.tmpl index e6bd08b2..871497e3 100644 --- a/confluent_server/setup.py.tmpl +++ b/confluent_server/setup.py.tmpl @@ -19,6 +19,7 @@ setup( 'confluent/plugins/hardwaremanagement/', 'confluent/plugins/deployment/', 'confluent/plugins/console/', + 'confluent/plugins/info/', 'confluent/plugins/shell/', 'confluent/collective/', 'confluent/plugins/configuration/'], From 5fdd6973f12279fbfae31823cd2c02a4e4130b46 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 16 Jan 2024 11:11:53 -0500 Subject: [PATCH 034/122] Update with more license content --- genesis/getlicenses.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 037699bc..5306eb68 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -80,6 +80,12 @@ manuallicenses = [ #'lz4-1.8.3]# cp LICENSE /usr/share/licenses/lz4/LICENSE' # net-snmp has a bundled openssl, but the build does not avail itself of that copy '/usr/share/licenses/perl-libs/LICENSE', # ./dist/ExtUtils-CBuilder/LICENSE from perl srpm + '/usr/share/licenses/pam/COPYING.bison', # pam_conv_y + '/usr/share/licenses/pcre/LICENSE.BSD2', # stack-less just in time compiler, Zoltan Herzeg + '/usr/share/licenses/sqlite/LICENSE.md', # https://raw.githubusercontent.com/sqlite/sqlite/master/LICENSE.md + '/usr/share/licenses/pcre2/LICENSE.BSD2', + '/usr/share/licenses/perl/COPYING.regexec', # regexec.c + '/usr/share/doc/platform-python/README.rst', '/usr/share/licenses/lz4/LICENSE', '/usr/share/licenses/lm_sensors/COPYING', '/usr/share/doc/libunistring/README', @@ -93,7 +99,9 @@ manuallicenses = [ '/usr/share/doc/libnl3/COPYING', '/usr/share/licenses/xfsprogs/GPL-2.0', '/usr/share/licenses/xfsprogs/LGPL-2.1', - '/usr/share/licenses/tmux/NOTICE', + '/usr/share/licenses/tmux/NOTICE', # built by extracttmuxlicenses.py + '/usr/share/licenses/tmux/COPYING', # extracted from source + '/usr/share/licenses/tmux/README', # extracted from source '/usr/share/licenses/kernel-extra/exceptions/Linux-syscall-note', '/usr/share/licenses/kernel-extra/other/Apache-2.0', '/usr/share/licenses/kernel-extra/other/CC-BY-SA-4.0', From 9203ac32e979fa29681cd4bbea97e154c70adb24 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 17 Jan 2024 17:01:49 -0500 Subject: [PATCH 035/122] Start work on browserfs concept This will allow WebUI reactivity even with large files for import. --- confluent_server/confluent/core.py | 13 ++++- confluent_server/confluent/mountmanager.py | 58 ++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 confluent_server/confluent/mountmanager.py diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index 6ab6bd59..f21d36b5 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -44,6 +44,7 @@ import confluent.discovery.core as disco import confluent.interface.console as console import confluent.exceptions as exc import confluent.messages as msg +import confluent.mountmanager as mountmanager import confluent.networking.macmap as macmap import confluent.noderange as noderange import confluent.osimage as osimage @@ -159,7 +160,7 @@ def _merge_dict(original, custom): rootcollections = ['deployment/', 'discovery/', 'events/', 'networking/', - 'noderange/', 'nodes/', 'nodegroups/', 'usergroups/' , + 'noderange/', 'nodes/', 'nodegroups/', 'storage/', 'usergroups/' , 'users/', 'uuid', 'version'] @@ -169,6 +170,13 @@ class PluginRoute(object): +def handle_storage(configmanager, inputdata, pathcomponents, operation): + if len(pathcomponents) == 1: + yield msg.ChildCollection('remote/') + return + if pathcomponents[1] == 'remote': + for rsp in mountmanager.handle_request(configmanager, inputdata, pathcomponents[2:], operation): + yield rsp def handle_deployment(configmanager, inputdata, pathcomponents, operation): if len(pathcomponents) == 1: @@ -1245,6 +1253,9 @@ def handle_path(path, operation, configmanager, inputdata=None, autostrip=True): elif pathcomponents[0] == 'deployment': return handle_deployment(configmanager, inputdata, pathcomponents, operation) + elif pathcomponents[0] == 'storage': + return handle_storage(configmanager, inputdata, pathcomponents, + operation) elif pathcomponents[0] == 'nodegroups': return handle_nodegroup_request(configmanager, inputdata, pathcomponents, diff --git a/confluent_server/confluent/mountmanager.py b/confluent_server/confluent/mountmanager.py new file mode 100644 index 00000000..c73b87a2 --- /dev/null +++ b/confluent_server/confluent/mountmanager.py @@ -0,0 +1,58 @@ + +import confluent.messages as msg +import confluent.exceptions as exc +import struct +import eventlet.green.socket as socket +mountsbyuser = {} + +def handle_request(configmanager, inputdata, pathcomponents, operation): + curruser = configmanager.current_user + if len(pathcomponents) == 0: + mounts = mountsbyuser.get(curruser, []) + if operation == 'retrieve': + for mount in mounts: + yield msg.ChildCollection(mount['index']) + elif operation == 'create': + if 'name' not in inputdata: + raise exc.InvalidArgumentException('Required parameter "name" is missing') + usedidx = set([]) + for mount in mounts: + usedidx.add(mount['index']) + curridx = 1 + while curridx in usedidx: + curridx += 1 + currmount = requestmount(curruser, inputdata['name']) + currmount['index'] = curridx + if curruser not in mountsbyuser: + mountsbyuser[curruser] = [] + mountsbyuser[curruser].append(currmount) + yield msg.KeyValueData({ + 'path': currmount['path'], + 'authtoken': currmount['authtoken'] + }) + +def requestmount(subdir, filename): + a = socket.socket(socket.AF_UNIX) + a.connect('/var/run/confluent/browserfs/control') + subname = subdir.encode() + a.send(struct.pack('!II', 1, len(subname))) + a.send(subname) + fname = filename.encode() + a.send(struct.pack('!I', len(fname))) + a.send(fname) + rsp = a.recv(4) + retcode = struct.unpack('!I', rsp)[0] + if retcode != 0: + raise Exception("Bad return code") + rsp = a.recv(4) + nlen = struct.unpack('!I', rsp)[0] + idstr = a.recv(nlen).decode('utf8') + rsp = a.recv(4) + nlen = struct.unpack('!I', rsp)[0] + authtok = a.recv(nlen).decode('utf8') + thismount = { + 'id': idstr, + 'path': '{}/{}/{}'.format(idstr, subdir, filename), + 'authtoken': authtok + } + return thismount From bcc631f88d821c474010ca3ad2f12936cd832801 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 Jan 2024 10:39:17 -0500 Subject: [PATCH 036/122] Set static hostname in diskless boot --- .../el8-diskless/profiles/default/scripts/onboot.sh | 1 + .../el9-diskless/profiles/default/scripts/onboot.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh index 3c99ad12..b2c0d1b3 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/onboot.sh @@ -16,6 +16,7 @@ if [ -z "$confluent_mgr" ]; then fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') +hostnamectl set-hostname $nodename export nodename confluent_mgr confluent_profile . /etc/confluent/functions mkdir -p /var/log/confluent diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh index 3c99ad12..b2c0d1b3 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/onboot.sh @@ -16,6 +16,7 @@ if [ -z "$confluent_mgr" ]; then fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') timedatectl set-timezone $(grep ^timezone: /etc/confluent/confluent.deploycfg|awk '{print $2}') +hostnamectl set-hostname $nodename export nodename confluent_mgr confluent_profile . /etc/confluent/functions mkdir -p /var/log/confluent From b90718982efb2bd1895a4c2b1fa2c587da217de7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 Jan 2024 17:22:47 -0500 Subject: [PATCH 037/122] Implement a number of OS deployment management enhancements. Add capability to fingerprint media without doing a full import (/deployment/fingerprinting/) Add fetching the profile info as json under the /deployment/ api. Prepare to support custom distribution name on import --- confluent_server/confluent/core.py | 36 +++++++++++++++++++--- confluent_server/confluent/mountmanager.py | 1 + confluent_server/confluent/osimage.py | 36 ++++++++++++++-------- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index f21d36b5..a8a4412b 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -70,6 +70,7 @@ import os import eventlet.green.socket as socket import struct import sys +import yaml pluginmap = {} dispatch_plugins = (b'ipmi', u'ipmi', b'redfish', u'redfish', b'tsmsol', u'tsmsol', b'geist', u'geist', b'deltapdu', u'deltapdu', b'eatonpdu', u'eatonpdu', b'affluent', u'affluent', b'cnos', u'cnos') @@ -177,6 +178,7 @@ def handle_storage(configmanager, inputdata, pathcomponents, operation): if pathcomponents[1] == 'remote': for rsp in mountmanager.handle_request(configmanager, inputdata, pathcomponents[2:], operation): yield rsp + def handle_deployment(configmanager, inputdata, pathcomponents, operation): if len(pathcomponents) == 1: @@ -199,8 +201,19 @@ def handle_deployment(configmanager, inputdata, pathcomponents, for prof in osimage.list_profiles(): yield msg.ChildCollection(prof + '/') return - if len(pathcomponents) == 3: - profname = pathcomponents[-1] + if len(pathcomponents) >= 3: + profname = pathcomponents[2] + if len(pathcomponents) == 4: + if operation == 'retrieve': + if len(pathcomponents) == 4 and pathcomponents[-1] == 'info': + with open('/var/lib/confluent/public/os/{}/profile.yaml'.format(profname)) as profyaml: + profinfo = yaml.safe_load(profyaml) + profinfo['name'] = profname + yield msg.KeyValueData(profinfo) + return + elif len(pathcomponents) == 3: + if operation == 'retrieve': + yield msg.ChildCollection('info') if operation == 'update': if 'updateboot' in inputdata: osimage.update_boot(profname) @@ -216,6 +229,17 @@ def handle_deployment(configmanager, inputdata, pathcomponents, for cust in customized: yield msg.KeyValueData({'customized': cust}) return + if pathcomponents[1] == 'fingerprint': + if operation == 'create': + importer = osimage.MediaImporter(inputdata['filename'], configmanager, checkonly=True) + medinfo = { + 'targetpath': importer.targpath, + 'name': importer.osname, + 'oscategory': importer.oscategory, + 'errors': importer.errors, + } + yield msg.KeyValueData(medinfo) + return if pathcomponents[1] == 'importing': if len(pathcomponents) == 2 or not pathcomponents[-1]: if operation == 'retrieve': @@ -223,8 +247,12 @@ def handle_deployment(configmanager, inputdata, pathcomponents, yield imp return elif operation == 'create': - importer = osimage.MediaImporter(inputdata['filename'], - configmanager) + if inputdata.get('custname', None): + importer = osimage.MediaImporter(inputdata['filename'], + configmanager, inputdata['custname']) + else: + importer = osimage.MediaImporter(inputdata['filename'], + configmanager) yield msg.KeyValueData({'target': importer.targpath, 'name': importer.importkey}) return diff --git a/confluent_server/confluent/mountmanager.py b/confluent_server/confluent/mountmanager.py index c73b87a2..c6c10bc1 100644 --- a/confluent_server/confluent/mountmanager.py +++ b/confluent_server/confluent/mountmanager.py @@ -28,6 +28,7 @@ def handle_request(configmanager, inputdata, pathcomponents, operation): mountsbyuser[curruser].append(currmount) yield msg.KeyValueData({ 'path': currmount['path'], + 'fullpath': '/var/run/confluent/browserfs/mount/{}'.format(currmount['path']), 'authtoken': currmount['authtoken'] }) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 8884e0e9..969e05b4 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -747,9 +747,9 @@ def rebase_profile(dirname): # customization detected, skip # else # update required, manifest update - - - + + + def get_hashes(dirname): hashmap = {} for dname, _, fnames in os.walk(dirname): @@ -776,7 +776,7 @@ def generate_stock_profiles(defprofile, distpath, targpath, osname, continue oumask = os.umask(0o22) shutil.copytree(srcname, dirname) - hmap = get_hashes(dirname) + hmap = get_hashes(dirname) profdata = None try: os.makedirs('{0}/boot/initramfs'.format(dirname), 0o755) @@ -824,11 +824,12 @@ def generate_stock_profiles(defprofile, distpath, targpath, osname, class MediaImporter(object): - def __init__(self, media, cfm=None): + def __init__(self, media, cfm=None, customname=None, checkonly=False): self.worker = None if not os.path.exists('/var/lib/confluent/public'): raise Exception('`osdeploy initialize` must be executed before importing any media') self.profiles = [] + self.errors = [] medfile = None self.medfile = None if cfm and media in cfm.clientfiles: @@ -848,25 +849,34 @@ class MediaImporter(object): self.phase = 'copying' if not identity: raise Exception('Unrecognized OS Media') - if 'subname' in identity: + if customname: + importkey = customname + elif 'subname' in identity: importkey = '{0}-{1}'.format(identity['name'], identity['subname']) else: importkey = identity['name'] - if importkey in importing: + if importkey in importing and not checkonly: raise Exception('Media import already in progress for this media') self.importkey = importkey - importing[importkey] = self - self.importkey = importkey self.osname = identity['name'] self.oscategory = identity.get('category', None) - targpath = identity['name'] + if customname: + targpath = customname + else: + targpath = identity['name'] self.distpath = '/var/lib/confluent/distributions/' + targpath - if identity.get('subname', None): + if identity.get('subname', None): # subname is to indicate disk number in a media set targpath += '/' + identity['subname'] self.targpath = '/var/lib/confluent/distributions/' + targpath if os.path.exists(self.targpath): - del importing[importkey] - raise Exception('{0} already exists'.format(self.targpath)) + errstr = '{0} already exists'.format(self.targpath) + if checkonly: + self.errors = [errstr] + else: + raise Exception(errstr) + if checkonly: + return + importing[importkey] = self self.filename = os.path.abspath(media) self.error = '' self.importer = eventlet.spawn(self.importmedia) From e5736ecb51837bd9144114e6dc304cd1bbabae2c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 23 Jan 2024 14:18:25 -0500 Subject: [PATCH 038/122] Update license assets in genesis --- genesis/getlicenses.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 5306eb68..d5e9cd12 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -94,6 +94,8 @@ manuallicenses = [ '/usr/share/doc/zstd/README.md', '/usr/share/doc/hwdata/LICENSE', '/usr/share/doc/ipmitool/COPYING', + '/usr/share/licenses/linux-firmware/LICENSE.hfi1_firmware', # these two need to be extracted from srcrpm + '/usr/share/licenses/linux-firmware/LICENSE.ice_enhanced', # '/usr/share/doc/libaio/COPYING', '/usr/share/doc/net-snmp/COPYING', '/usr/share/doc/libnl3/COPYING', @@ -121,6 +123,8 @@ manuallicenses = [ '/usr/share/licenses/kmod/tools/COPYING', # GPL not LGPL, must extract from kmod srpm '/usr/share/licenses/krb5-libs/NOTICE', # copy it verbatim from LICENSE, exact same file '/usr/share/doc/less/README', + '/usr/share/almalinux-release/EULA', + '/usr/share/doc/almalinux-release/GPL', '/usr/share/licenses/libcap-ng-utils/COPYING', '/usr/share/licenses/libdb/copyright', # from libdb, db-5.3.28, lang/sql/odbc/debian/copyright '/usr/share/licenses/libgcrypt/LICENSES.ppc-aes-gcm', # libgcrypt license to carry forward From fa7cd2940e58a513f2ddb288a123fbcfa41fb7f7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 23 Jan 2024 14:39:32 -0500 Subject: [PATCH 039/122] More license updates for genesis --- genesis/getlicenses.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index d5e9cd12..edb47b90 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -76,6 +76,8 @@ manuallicenses = [ '/usr/share/licenses/lz4/LICENSE.BSD', '/usr/share/licenses/nss/LICENSE.APACHE', # http://www.apache.org/licenses/LICENSE-2.0 '/usr/share/licenses/openssh/COPYING.blowfish', # from header of blowfish file in bsd-compat + '/usr/share/licenses/bc/COPYING.GPLv2', + '/usr/share/licenses/bind-license/LICENSE', # MPLv2 from the source code # cp /usr/share/doc/lz4-libs/LICENSE /usr/share/licenses/lz4/LICENSE.BSD #'lz4-1.8.3]# cp LICENSE /usr/share/licenses/lz4/LICENSE' # net-snmp has a bundled openssl, but the build does not avail itself of that copy From e5051408e53c087c3c5a3cbaddff4a13ff2b249f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 23 Jan 2024 15:02:08 -0500 Subject: [PATCH 040/122] More license handling --- genesis/getlicenses.py | 1 + 1 file changed, 1 insertion(+) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index edb47b90..5514a5cb 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -78,6 +78,7 @@ manuallicenses = [ '/usr/share/licenses/openssh/COPYING.blowfish', # from header of blowfish file in bsd-compat '/usr/share/licenses/bc/COPYING.GPLv2', '/usr/share/licenses/bind-license/LICENSE', # MPLv2 from the source code + '/usr/share/licenses/procps-ng/COPYING.LIBv2.1', # fetched internet # cp /usr/share/doc/lz4-libs/LICENSE /usr/share/licenses/lz4/LICENSE.BSD #'lz4-1.8.3]# cp LICENSE /usr/share/licenses/lz4/LICENSE' # net-snmp has a bundled openssl, but the build does not avail itself of that copy From a3d386dc39a119d375c80ffcb2e02e146d51f28d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 24 Jan 2024 14:47:23 -0500 Subject: [PATCH 041/122] Add NOTICE gathering for some genesis packages --- genesis/getlicenses.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 5514a5cb..3f79c319 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -87,6 +87,9 @@ manuallicenses = [ '/usr/share/licenses/pcre/LICENSE.BSD2', # stack-less just in time compiler, Zoltan Herzeg '/usr/share/licenses/sqlite/LICENSE.md', # https://raw.githubusercontent.com/sqlite/sqlite/master/LICENSE.md '/usr/share/licenses/pcre2/LICENSE.BSD2', + '/usr/share/licenses/dhcp/NOTICE', + '/usr/share/licenses/bash/NOTICE', + '/usr/share/licenses/libsepol/NOTICE', '/usr/share/licenses/perl/COPYING.regexec', # regexec.c '/usr/share/doc/platform-python/README.rst', '/usr/share/licenses/lz4/LICENSE', From 41675e528f5c74debacf69ec8947112c2839c7cd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 24 Jan 2024 15:31:27 -0500 Subject: [PATCH 042/122] Amend dhcp license path --- genesis/getlicenses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index 3f79c319..f4df2fca 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -87,7 +87,7 @@ manuallicenses = [ '/usr/share/licenses/pcre/LICENSE.BSD2', # stack-less just in time compiler, Zoltan Herzeg '/usr/share/licenses/sqlite/LICENSE.md', # https://raw.githubusercontent.com/sqlite/sqlite/master/LICENSE.md '/usr/share/licenses/pcre2/LICENSE.BSD2', - '/usr/share/licenses/dhcp/NOTICE', + '/usr/share/licenses/dhcp-common/NOTICE', '/usr/share/licenses/bash/NOTICE', '/usr/share/licenses/libsepol/NOTICE', '/usr/share/licenses/perl/COPYING.regexec', # regexec.c From fa3e1202c47a450bad4c0a054daaef4266303050 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 26 Jan 2024 09:24:41 -0500 Subject: [PATCH 043/122] Relax systemd device policy to allow /dev/fuse access --- confluent_server/systemd/confluent.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/systemd/confluent.service b/confluent_server/systemd/confluent.service index da0fee7b..598c23c9 100644 --- a/confluent_server/systemd/confluent.service +++ b/confluent_server/systemd/confluent.service @@ -16,7 +16,7 @@ Restart=on-failure AmbientCapabilities=CAP_NET_BIND_SERVICE CAP_SETUID CAP_SETGID CAP_CHOWN CAP_NET_RAW User=confluent Group=confluent -DevicePolicy=closed +#DevicePolicy=closed # fuse filesystem requires us to interact with /dev/fuse ProtectControlGroups=true ProtectSystem=true From 87454c1ab1dfdbee68c37ca9de8437c2d48c0cf1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 26 Jan 2024 09:31:59 -0500 Subject: [PATCH 044/122] Start browserfs if not yet running --- confluent_server/confluent/mountmanager.py | 20 ++++++++++++++++++++ genesis/getlicenses.py | 1 + 2 files changed, 21 insertions(+) diff --git a/confluent_server/confluent/mountmanager.py b/confluent_server/confluent/mountmanager.py index c6c10bc1..36f654d2 100644 --- a/confluent_server/confluent/mountmanager.py +++ b/confluent_server/confluent/mountmanager.py @@ -1,9 +1,27 @@ +import eventlet import confluent.messages as msg import confluent.exceptions as exc import struct import eventlet.green.socket as socket +import eventlet.green.subprocess as subprocess +import os mountsbyuser = {} +_browserfsd = None + +def assure_browserfs(): + global _browserfsd + if _browserfsd is None: + os.makedirs('/var/run/confluent/browserfs/mount', exist_ok=True) + _browserfsd = subprocess.Popen( + ['/opt/confluent/bin/browserfs', + '-c', '/var/run/confluent/browserfs/control', + '-s', '127.0.0.1:4006', + # browserfs supports unix domain websocket, however apache reverse proxy is dicey that way in some versions + '-w', '/var/run/confluent/browserfs/mount']) + while not os.path.exists('/var/run/confluent/browserfs/control'): + eventlet.sleep(0.5) + def handle_request(configmanager, inputdata, pathcomponents, operation): curruser = configmanager.current_user @@ -33,6 +51,7 @@ def handle_request(configmanager, inputdata, pathcomponents, operation): }) def requestmount(subdir, filename): + assure_browserfs() a = socket.socket(socket.AF_UNIX) a.connect('/var/run/confluent/browserfs/control') subname = subdir.encode() @@ -57,3 +76,4 @@ def requestmount(subdir, filename): 'authtoken': authtok } return thismount + diff --git a/genesis/getlicenses.py b/genesis/getlicenses.py index f4df2fca..a0118c48 100644 --- a/genesis/getlicenses.py +++ b/genesis/getlicenses.py @@ -88,6 +88,7 @@ manuallicenses = [ '/usr/share/licenses/sqlite/LICENSE.md', # https://raw.githubusercontent.com/sqlite/sqlite/master/LICENSE.md '/usr/share/licenses/pcre2/LICENSE.BSD2', '/usr/share/licenses/dhcp-common/NOTICE', + '/usr/share/licenses/xz/COPYING.GPLv3', # manually extracted from xz source '/usr/share/licenses/bash/NOTICE', '/usr/share/licenses/libsepol/NOTICE', '/usr/share/licenses/perl/COPYING.regexec', # regexec.c From 16ad4e776feab656a5ce5066658882dcf410af63 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Fri, 26 Jan 2024 12:39:25 -0500 Subject: [PATCH 045/122] opening web ui using default ip --- confluent_server/confluent/httpapi.py | 47 ++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index f36f2c73..4688ddd5 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -618,6 +618,31 @@ def resourcehandler(env, start_response): yield '500 - ' + str(e) return +def targ_ip_family(targip, first_pass=True): + # check ipv4 + try: + socket.inet_aton(targip) + return 'is_ipv4' + except socket.error: + pass + # check ipv6 + try: + check_ip = targip + if '%' in targip: + check_ip = targip.split('%')[0] + socket.inet_pton(socket.AF_INET6, check_ip) + return 'is_ipv6' + except socket.error: + # at this point we now know its not both ipv6 or ipv4 so we check if its hostname + if first_pass: + try: + ip_address = socket.gethostbyname(targip) + return targ_ip_family(ip_address, False) + except socket.gaierror: + return 'Cant figure that guy' + else: + return 'Cant figure it out' + def resourcehandler_backend(env, start_response): """Function to handle new wsgi requests @@ -728,7 +753,13 @@ def resourcehandler_backend(env, start_response): elif (env['PATH_INFO'].endswith('/forward/web') and env['PATH_INFO'].startswith('/nodes/')): prefix, _, _ = env['PATH_INFO'].partition('/forward/web') - _, _, nodename = prefix.rpartition('/') + #_, _, nodename = prefix.rpartition('/') + default = False + if 'default' in env['PATH_INFO']: + default = True + _,_,nodename,_ = prefix.split('/') + else: + _, _, nodename = prefix.rpartition('/') hm = cfgmgr.get_node_attributes(nodename, 'hardwaremanagement.manager') targip = hm.get(nodename, {}).get( 'hardwaremanagement.manager', {}).get('value', None) @@ -737,6 +768,20 @@ def resourcehandler_backend(env, start_response): yield 'No hardwaremanagement.manager defined for node' return targip = targip.split('/', 1)[0] + if default: + # understand targip + ip_family = targ_ip_family(targip) + if ip_family == 'is_ipv4': + url = 'https://{0}'.format(targip) + elif ip_family == 'is_ipv6': + url = 'https://[{0}]'.format(targip) + else: + start_response('404 Not Found', headers) + yield 'Cant figure out the hardwaremanagenent.manager attribute ip' + return + start_response('302', [('Location', url)]) + yield 'Our princess is in another castle!' + return funport = forwarder.get_port(targip, env['HTTP_X_FORWARDED_FOR'], authorized['sessionid']) host = env['HTTP_X_FORWARDED_HOST'] From fcb3d917db33d08d512b156b3d5e5ac5b986d9a2 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Fri, 26 Jan 2024 17:14:04 -0500 Subject: [PATCH 046/122] use socket.getaddrinfo --- confluent_server/confluent/httpapi.py | 51 +++++++++------------------ 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 4688ddd5..e30df36d 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -618,32 +618,6 @@ def resourcehandler(env, start_response): yield '500 - ' + str(e) return -def targ_ip_family(targip, first_pass=True): - # check ipv4 - try: - socket.inet_aton(targip) - return 'is_ipv4' - except socket.error: - pass - # check ipv6 - try: - check_ip = targip - if '%' in targip: - check_ip = targip.split('%')[0] - socket.inet_pton(socket.AF_INET6, check_ip) - return 'is_ipv6' - except socket.error: - # at this point we now know its not both ipv6 or ipv4 so we check if its hostname - if first_pass: - try: - ip_address = socket.gethostbyname(targip) - return targ_ip_family(ip_address, False) - except socket.gaierror: - return 'Cant figure that guy' - else: - return 'Cant figure it out' - - def resourcehandler_backend(env, start_response): """Function to handle new wsgi requests """ @@ -769,15 +743,24 @@ def resourcehandler_backend(env, start_response): return targip = targip.split('/', 1)[0] if default: - # understand targip - ip_family = targ_ip_family(targip) - if ip_family == 'is_ipv4': - url = 'https://{0}'.format(targip) - elif ip_family == 'is_ipv6': - url = 'https://[{0}]'.format(targip) - else: + try: + ip_info = socket.getaddrinfo(targip, 0, 0, socket.SOCK_STREAM) + except socket.gaierror: start_response('404 Not Found', headers) - yield 'Cant figure out the hardwaremanagenent.manager attribute ip' + yield 'hardwaremanagement.manager definition could not be resolved' + return + # this is just to future proof just in case the indexes of the address family change in future + for i in range(len(ip_info)): + if ip_info[i][0] == socket.AF_INET: + url = 'https://{0}/'.format(ip_info[i][-1][0]) + start_response('302', [('Location', url)]) + yield 'Our princess is in another castle!' + return + elif ip_info[i][0] == socket.AF_INET6: + url = 'https://[{0}]/'.format(ip_info[i][-1][0]) + if url.startswith('https://[fe80'): + start_response('405 Method Not Allowed', headers) + yield 'link local ipv6 address cannot be used in browser' return start_response('302', [('Location', url)]) yield 'Our princess is in another castle!' From d0373977b35464d15374f6abd7d8d80b03fd4365 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 09:08:28 -0500 Subject: [PATCH 047/122] Fix FFDC preflight checks The code was comparing two string constants, instead of a variable to a constant. Correct the problem to enable the preflight checks to work as intended. --- confluent_server/confluent/firmwaremanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/firmwaremanager.py b/confluent_server/confluent/firmwaremanager.py index a7713943..eb5d4c86 100644 --- a/confluent_server/confluent/firmwaremanager.py +++ b/confluent_server/confluent/firmwaremanager.py @@ -53,7 +53,7 @@ def execupdate(handler, filename, updateobj, type, owner, node, datfile): return if type == 'ffdc' and os.path.isdir(filename): filename += '/' + node - if 'type' == 'ffdc': + if type == 'ffdc': errstr = False if os.path.exists(filename): errstr = '{0} already exists on {1}, cannot overwrite'.format( From 72cace5a50f55836db19035d3da51068e3c5bac9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 11:11:27 -0500 Subject: [PATCH 048/122] More thoroughly wire up custom name Have custom name go through to actual import and influence profile names --- confluent_server/confluent/osimage.py | 31 +++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 969e05b4..2289a048 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -601,7 +601,7 @@ def fingerprint(archive): return imginfo, None, None -def import_image(filename, callback, backend=False, mfd=None): +def import_image(filename, callback, backend=False, mfd=None, custtargpath=None, custdistpath=None, custname=''): if mfd: archive = os.fdopen(int(mfd), 'rb') else: @@ -610,11 +610,16 @@ def import_image(filename, callback, backend=False, mfd=None): if not identity: return -1 identity, imginfo, funname = identity - targpath = identity['name'] - distpath = '/var/lib/confluent/distributions/' + targpath - if identity.get('subname', None): - targpath += '/' + identity['subname'] - targpath = '/var/lib/confluent/distributions/' + targpath + distpath = custdistpath + if not distpath: + targpath = identity['name'] + distpath = '/var/lib/confluent/distributions/' + targpath + if not custtargpath: + if identity.get('subname', None): + targpath += '/' + identity['subname'] + targpath = '/var/lib/confluent/distributions/' + targpath + else: + targpath = custtargpath try: os.makedirs(targpath, 0o755) except Exception as e: @@ -765,12 +770,15 @@ def get_hashes(dirname): def generate_stock_profiles(defprofile, distpath, targpath, osname, - profilelist): + profilelist, customname): osd, osversion, arch = osname.split('-') bootupdates = [] for prof in os.listdir('{0}/profiles'.format(defprofile)): srcname = '{0}/profiles/{1}'.format(defprofile, prof) - profname = '{0}-{1}'.format(osname, prof) + if customname: + profname = '{0}-{1}'.format(customname, prof) + else: + profname = '{0}-{1}'.format(osname, prof) dirname = '/var/lib/confluent/public/os/{0}'.format(profname) if os.path.exists(dirname): continue @@ -849,6 +857,7 @@ class MediaImporter(object): self.phase = 'copying' if not identity: raise Exception('Unrecognized OS Media') + self.customname = customname if customname else '' if customname: importkey = customname elif 'subname' in identity: @@ -894,7 +903,7 @@ class MediaImporter(object): os.environ['CONFLUENT_MEDIAFD'] = '{0}'.format(self.medfile.fileno()) with open(os.devnull, 'w') as devnull: self.worker = subprocess.Popen( - [sys.executable, __file__, self.filename, '-b'], + [sys.executable, __file__, self.filename, '-b', self.targpath, self.distpath, self.customname], stdin=devnull, stdout=subprocess.PIPE, close_fds=False) wkr = self.worker currline = b'' @@ -934,7 +943,7 @@ class MediaImporter(object): self.oscategory) try: generate_stock_profiles(defprofile, self.distpath, self.targpath, - self.osname, self.profiles) + self.osname, self.profiles, self.customname) except Exception as e: self.phase = 'error' self.error = str(e) @@ -961,7 +970,7 @@ if __name__ == '__main__': os.umask(0o022) if len(sys.argv) > 2: mfd = os.environ.get('CONFLUENT_MEDIAFD', None) - sys.exit(import_image(sys.argv[1], callback=printit, backend=True, mfd=mfd)) + sys.exit(import_image(sys.argv[1], callback=printit, backend=True, mfd=mfd, custtargpath=sys.argv[3], custdistpath=sys.argv[4], custname=sys.argv[5])) else: sys.exit(import_image(sys.argv[1], callback=printit)) From 170e585e57c5d77689576317e9efed3d53ced43e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 13:53:59 -0500 Subject: [PATCH 049/122] Add preliminary ubuntu 24.04 support --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 5 +++-- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index f1b7c804..add86e6c 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -26,7 +26,8 @@ mkdir -p opt/confluent/bin mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 -for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 coreos el9; do +ln -s ubuntu22.04 ubuntu24.04 +for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then @@ -76,7 +77,7 @@ cp -a esxi7 esxi8 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ #cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 genesis suse15 ubuntu20.04 ubuntu22.04 esxi6 esxi7 esxi8 coreos; do +for os in rhvh4 el7 el8 el9 genesis suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs/aarch64/ if [ -d ${os}disklessout ]; then diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index d939a0c3..07506bbf 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -28,7 +28,8 @@ This contains support utilities for enabling deployment of x86_64 architecture s #cp start_root urlmount ../stateless-bin/ #cd .. ln -s el8 el9 -for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 coreos el9; do +ln -s ubuntu22.04 ubuntu24.04 +for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out if [ -d ../${os}bin ]; then @@ -42,7 +43,7 @@ for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 coreo mv ../addons.cpio . cd .. done -for os in el7 el8 suse15 el9 ubuntu20.04 ubuntu22.04; do +for os in el7 el8 suse15 el9 ubuntu20.04 ubuntu22.04 ubuntu24.04; do mkdir ${os}disklessout cd ${os}disklessout if [ -d ../${os}bin ]; then From 7618fa8b634ca65d2ca0107c27e1624e41007fd8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 14:21:25 -0500 Subject: [PATCH 050/122] Add diskless links --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 1 + confluent_osdeploy/confluent_osdeploy.spec.tmpl | 1 + 2 files changed, 2 insertions(+) diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index add86e6c..db2df9f0 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -27,6 +27,7 @@ mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 ln -s ubuntu22.04 ubuntu24.04 +ln -s ubuntu22.04-diskless ubuntu24.04-diskless for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 07506bbf..b8cdca12 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -29,6 +29,7 @@ This contains support utilities for enabling deployment of x86_64 architecture s #cd .. ln -s el8 el9 ln -s ubuntu22.04 ubuntu24.04 +ln -s ubuntu22.04-diskless ubuntu24.04-diskless for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out From 9ad9912ef1cad623c4e66e9175311b8dfb22449c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 14:28:11 -0500 Subject: [PATCH 051/122] Change to pre-made links for ubuntu24.04 --- confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl | 2 -- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 2 -- confluent_osdeploy/ubuntu24.04 | 1 + confluent_osdeploy/ubuntu24.04-diskless | 1 + 4 files changed, 2 insertions(+), 4 deletions(-) create mode 120000 confluent_osdeploy/ubuntu24.04 create mode 120000 confluent_osdeploy/ubuntu24.04-diskless diff --git a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl index db2df9f0..fb6f6ddc 100644 --- a/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy-aarch64.spec.tmpl @@ -26,8 +26,6 @@ mkdir -p opt/confluent/bin mkdir -p stateless-bin cp -a el8bin/* . ln -s el8 el9 -ln -s ubuntu22.04 ubuntu24.04 -ln -s ubuntu22.04-diskless ubuntu24.04-diskless for os in rhvh4 el7 genesis el8 suse15 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index b8cdca12..46648790 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -28,8 +28,6 @@ This contains support utilities for enabling deployment of x86_64 architecture s #cp start_root urlmount ../stateless-bin/ #cd .. ln -s el8 el9 -ln -s ubuntu22.04 ubuntu24.04 -ln -s ubuntu22.04-diskless ubuntu24.04-diskless for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 coreos el9; do mkdir ${os}out cd ${os}out diff --git a/confluent_osdeploy/ubuntu24.04 b/confluent_osdeploy/ubuntu24.04 new file mode 120000 index 00000000..13759564 --- /dev/null +++ b/confluent_osdeploy/ubuntu24.04 @@ -0,0 +1 @@ +ubuntu22.04 \ No newline at end of file diff --git a/confluent_osdeploy/ubuntu24.04-diskless b/confluent_osdeploy/ubuntu24.04-diskless new file mode 120000 index 00000000..00822b05 --- /dev/null +++ b/confluent_osdeploy/ubuntu24.04-diskless @@ -0,0 +1 @@ +ubuntu20.04-diskless \ No newline at end of file From fc82021f2bd80e918a7cb89d79b7c8cbbda36794 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 15:04:04 -0500 Subject: [PATCH 052/122] Add missing ubuntu24.04 to packaging --- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 46648790..5faab31f 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -78,7 +78,7 @@ cp -a esxi7 esxi8 %install mkdir -p %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ cp LICENSE %{buildroot}/opt/confluent/share/licenses/confluent_osdeploy/ -for os in rhvh4 el7 el8 el9 genesis suse15 ubuntu20.04 ubuntu18.04 ubuntu22.04 esxi6 esxi7 esxi8 coreos; do +for os in rhvh4 el7 el8 el9 genesis suse15 ubuntu20.04 ubuntu18.04 ubuntu22.04 ubuntu24.04 esxi6 esxi7 esxi8 coreos; do mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs mkdir -p %{buildroot}/opt/confluent/lib/osdeploy/$os/profiles cp ${os}out/addons.* %{buildroot}/opt/confluent/lib/osdeploy/$os/initramfs From 7377c44e0fadc79b91cfe0daee164d6cbd7a2759 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 Feb 2024 08:50:44 -0500 Subject: [PATCH 053/122] Fix problem where one multicast/broadcast attempt could tank other interfaces Carrying over change from ssdp, ignore failures on transmit, particularly if firewall --- confluent_server/confluent/discovery/protocols/slp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index e42c1577..ac332def 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -246,11 +246,11 @@ def _find_srvtype(net, net4, srvtype, addresses, xid): try: net4.sendto(data, ('239.255.255.253', 427)) except socket.error as se: - # On occasion, multicasting may be disabled - # tolerate this scenario and move on - if se.errno != 101: - raise - net4.sendto(data, (bcast, 427)) + pass + try: + net4.sendto(data, (bcast, 427)) + except socket.error as se: + pass def _grab_rsps(socks, rsps, interval, xidmap, deferrals): From a17695ad0653e7ee7425a371179b6f6d223ce783 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 Feb 2024 16:38:04 -0500 Subject: [PATCH 054/122] Extend confluent PXE support For relay agent options, preserve and echo back the option, needed for certain environments. Also, it turns out that for whatever reason on some platforms, iPXE's proxyDHCP logic can't seem to get a reply. In this scenario, provide the filename in the DHCP offer without waiting for proxyDHCP. This change may be worth evaluating more broadly, but may carry risk of not working right with unmanaged DHCP servers. --- .../confluent/discovery/protocols/pxe.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index a9a07963..6dd34efa 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -771,6 +771,14 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile): node, profile, len(bootfile) - 127)}) return repview[108:108 + len(bootfile)] = bootfile + elif info['architecture'] == 'uefi-aarch64' and packet.get(77, None) == b'iPXE': + if not profile: + profile = get_deployment_profile(node, cfg) + if not profile: + log.log({'info': 'No pending profile for {0}, skipping proxyDHCP eply'.format(node)}) + return + bootfile = 'http://{0}/confluent-public/os/{1}/boot.ipxe'.format(myipn, profile).encode('utf8') + repview[108:108 + len(bootfile)] = bootfile myip = myipn myipn = socket.inet_aton(myipn) orepview[12:16] = myipn @@ -812,6 +820,13 @@ def reply_dhcp4(node, info, packet, cfg, reqview, httpboot, cfd, profile): repview[replen - 1:replen + 1] = b'\x03\x04' repview[replen + 1:replen + 5] = gateway replen += 6 + if 82 in packet: + reloptionslen = len(packet[82]) + reloptionshdr = struct.pack('BB', 82, reloptionslen) + repview[replen - 1:replen + 1] = reloptionshdr + repview[replen + 1:replen + reloptionslen + 1] = packet[82] + replen += 2 + reloptionslen + repview[replen - 1:replen] = b'\xff' # end of options, should always be last byte repview = memoryview(reply) pktlen = struct.pack('!H', replen + 28) # ip+udp = 28 From 59a31d38a25f1c44bad3dcfc2daad2a0520c5501 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 Feb 2024 08:51:12 -0500 Subject: [PATCH 055/122] Make reseat concurrent Spawn reseat activity concurrently between chassis. This should reduce time to nodes per chassis rather than total nodes. --- .../plugins/hardwaremanagement/enclosure.py | 49 +++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py index 933a852b..4658e2a0 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py @@ -15,10 +15,32 @@ import confluent.core as core import confluent.messages as msg import pyghmi.exceptions as pygexc import confluent.exceptions as exc +import eventlet +import eventlet.queue as queue +import eventlet.greenpool as greenpool + + +def reseat_bays(encmgr, bays, configmanager, rspq): + try: + for encbay in bays: + node = bays[encbay] + try: + for rsp in core.handle_path( + '/nodes/{0}/_enclosure/reseat_bay'.format(encmgr), + 'update', configmanager, + inputdata={'reseat': int(encbay)}): + rspq.put(rsp) + except pygexc.UnsupportedFunctionality as uf: + rspq.put(msg.ConfluentNodeError(node, str(uf))) + except exc.TargetEndpointUnreachable as uf: + rspq.put(msg.ConfluentNodeError(node, str(uf))) + finally: + rspq.put(None) def update(nodes, element, configmanager, inputdata): emebs = configmanager.get_node_attributes( nodes, (u'enclosure.manager', u'enclosure.bay')) + baysbyencmgr = {} for node in nodes: try: em = emebs[node]['enclosure.manager']['value'] @@ -30,13 +52,20 @@ def update(nodes, element, configmanager, inputdata): em = node if not eb: eb = -1 - try: - for rsp in core.handle_path( - '/nodes/{0}/_enclosure/reseat_bay'.format(em), - 'update', configmanager, - inputdata={'reseat': int(eb)}): - yield rsp - except pygexc.UnsupportedFunctionality as uf: - yield msg.ConfluentNodeError(node, str(uf)) - except exc.TargetEndpointUnreachable as uf: - yield msg.ConfluentNodeError(node, str(uf)) + if em not in baysbyencmgr: + baysbyencmgr[em] = {} + baysbyencmgr[em][eb] = node + rspq = queue.Queue() + gp = greenpool.GreenPool(64) + for encmgr in baysbyencmgr: + gp.spawn_n(reseat_bays, encmgr, baysbyencmgr[encmgr], configmanager, rspq) + while gp.running(): + nrsp = rspq.get() + if nrsp is not None: + yield nrsp + while not rspq.empty(): + nrsp = rspq.get() + if nrsp is not None: + yield nrsp + + From 3a0172ccccd9a14ef63794a3bd646187b13cff3f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 2 Feb 2024 10:35:47 -0500 Subject: [PATCH 056/122] Make indirect PDU operations concurrent Similar to the enclosure reseat work, have indirect PDU operations be made concurrent across PDUs, though still serial within a PDU. --- .../plugins/hardwaremanagement/enclosure.py | 1 - .../plugins/hardwaremanagement/pdu.py | 68 ++++++++++++++++--- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py index 4658e2a0..a59422c0 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/enclosure.py @@ -15,7 +15,6 @@ import confluent.core as core import confluent.messages as msg import pyghmi.exceptions as pygexc import confluent.exceptions as exc -import eventlet import eventlet.queue as queue import eventlet.greenpool as greenpool diff --git a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py index b19c9b22..3db21636 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/pdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/pdu.py @@ -15,10 +15,16 @@ import confluent.core as core import confluent.messages as msg import pyghmi.exceptions as pygexc import confluent.exceptions as exc +import eventlet.greenpool as greenpool +import eventlet.queue as queue + +class TaskDone: + pass def retrieve(nodes, element, configmanager, inputdata): emebs = configmanager.get_node_attributes( nodes, (u'power.*pdu', u'power.*outlet')) + relpdus = {} if element == ['power', 'inlets']: outletnames = set([]) for node in nodes: @@ -39,13 +45,36 @@ def retrieve(nodes, element, configmanager, inputdata): for pgroup in outlets[node]: pdu = outlets[node][pgroup]['pdu'] outlet = outlets[node][pgroup]['outlet'] - try: - for rsp in core.handle_path( - '/nodes/{0}/power/outlets/{1}'.format(pdu, outlet), - 'retrieve', configmanager): - yield msg.KeyValueData({pgroup: rsp.kvpairs['state']['value']}, node) - except exc.TargetEndpointBadCredentials: - yield msg.ConfluentTargetInvalidCredentials(pdu) + if pdu not in relpdus: + relpdus[pdu] = {} + relpdus[pdu][outlet] = (node, pgroup) + rspq = queue.Queue() + gp = greenpool.GreenPool(64) + for pdu in relpdus: + gp.spawn(readpdu, pdu, relpdus[pdu], configmanager, rspq) + while gp.running(): + nrsp = rspq.get() + if not isinstance(nrsp, TaskDone): + yield nrsp + while not rspq.empty(): + nrsp = rspq.get() + if not isinstance(nrsp, TaskDone): + yield nrsp + +def readpdu(pdu, outletmap, configmanager, rspq): + try: + for outlet in outletmap: + node, pgroup = outletmap[outlet] + try: + for rsp in core.handle_path( + '/nodes/{0}/power/outlets/{1}'.format(pdu, outlet), + 'retrieve', configmanager): + rspq.put(msg.KeyValueData({pgroup: rsp.kvpairs['state']['value']}, node)) + except exc.TargetEndpointBadCredentials: + rspq.put(msg.ConfluentTargetInvalidCredentials(pdu)) + finally: # ensure thhat at least one thing triggers the get + rspq.put(TaskDone()) + def get_outlets(nodes, emebs, inletname): outlets = {} @@ -72,11 +101,34 @@ def update(nodes, element, configmanager, inputdata): emebs = configmanager.get_node_attributes( nodes, (u'power.*pdu', u'power.*outlet')) inletname = element[-1] + relpdus = {} + rspq = queue.Queue() + gp = greenpool.GreenPool(64) outlets = get_outlets(nodes, emebs, inletname) for node in outlets: for pgroup in outlets[node]: pdu = outlets[node][pgroup]['pdu'] outlet = outlets[node][pgroup]['outlet'] + if pdu not in relpdus: + relpdus[pdu] = {} + relpdus[pdu][outlet] = (node, pgroup) + for pdu in relpdus: + gp.spawn(updatepdu, pdu, relpdus[pdu], configmanager, inputdata, rspq) + while gp.running(): + nrsp = rspq.get() + if not isinstance(nrsp, TaskDone): + yield nrsp + while not rspq.empty(): + nrsp = rspq.get() + if not isinstance(nrsp, TaskDone): + yield nrsp + +def updatepdu(pdu, outletmap, configmanager, inputdata, rspq): + try: + for outlet in outletmap: + node, pgroup = outletmap[outlet] for rsp in core.handle_path('/nodes/{0}/power/outlets/{1}'.format(pdu, outlet), 'update', configmanager, inputdata={'state': inputdata.powerstate(node)}): - yield msg.KeyValueData({pgroup: rsp.kvpairs['state']['value']}, node) + rspq.put(msg.KeyValueData({pgroup: rsp.kvpairs['state']['value']}, node)) + finally: + rspq.put(TaskDone()) From d07e6f86c0ede225e0d314f5cd7e1cdf90566b11 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 Feb 2024 15:58:08 -0500 Subject: [PATCH 057/122] Provide more useful error messages on mistakes within [] --- confluent_server/confluent/noderange.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index cf99dd72..4a5cb808 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -402,12 +402,16 @@ class NodeRange(object): def _expandstring(self, element, filternodes=None): prefix = '' if element[0][0] in ('/', '~'): + if self.purenumeric: + raise Exception('Regular expression not supported within "[]"') element = ''.join(element) nameexpression = element[1:] if self.cfm is None: raise Exception('Verification configmanager required') return set(self.cfm.filter_nodenames(nameexpression, filternodes)) elif '=' in element[0] or '!~' in element[0]: + if self.purenumeric: + raise Exception('The "=" character is invalid within "[]"') element = ''.join(element) if self.cfm is None: raise Exception('Verification configmanager required') From 21f691cbd8fd984582b969d28a83d7be4f119ab5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 Feb 2024 16:00:50 -0500 Subject: [PATCH 058/122] Correct the equality message in better messagesw --- confluent_server/confluent/noderange.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 4a5cb808..7657292c 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -411,7 +411,7 @@ class NodeRange(object): return set(self.cfm.filter_nodenames(nameexpression, filternodes)) elif '=' in element[0] or '!~' in element[0]: if self.purenumeric: - raise Exception('The "=" character is invalid within "[]"') + raise Exception('Equality/Inequality operators (=, !=, =~, !~) are invalid within "[]"') element = ''.join(element) if self.cfm is None: raise Exception('Verification configmanager required') From 72e26caf360e84d81ac057ad4998c41c119e8fa7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Feb 2024 15:05:56 -0500 Subject: [PATCH 059/122] Change to unix domain for vtbuffer communication The semaphore arbitrated single channel sharing was proving to be too slow. Make the communication lockless by having dedicated sockets per request. --- confluent_server/confluent/consoleserver.py | 56 +++--- confluent_vtbufferd/vtbufferd.c | 179 +++++++++++++++----- 2 files changed, 161 insertions(+), 74 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index ebfd8c97..19509eb5 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -62,39 +62,38 @@ def chunk_output(output, n): yield output[i:i + n] def get_buffer_output(nodename): - out = _bufferdaemon.stdin - instream = _bufferdaemon.stdout + out = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + out.setsockopt(socket.SOL_SOCKET, socket.SO_PASSCRED, 1) + out.connect("\x00confluent-vtbuffer") if not isinstance(nodename, bytes): nodename = nodename.encode('utf8') outdata = bytearray() - with _bufferlock: - out.write(struct.pack('I', len(nodename))) - out.write(nodename) - out.flush() - select.select((instream,), (), (), 30) - while not outdata or outdata[-1]: - try: - chunk = os.read(instream.fileno(), 128) - except IOError: - chunk = None - if chunk: - outdata.extend(chunk) - else: - select.select((instream,), (), (), 0) - return bytes(outdata[:-1]) + out.send(struct.pack('I', len(nodename))) + out.send(nodename) + select.select((out,), (), (), 30) + while not outdata or outdata[-1]: + try: + chunk = os.read(out.fileno(), 128) + except IOError: + chunk = None + if chunk: + outdata.extend(chunk) + else: + select.select((out,), (), (), 0) + return bytes(outdata[:-1]) def send_output(nodename, output): if not isinstance(nodename, bytes): nodename = nodename.encode('utf8') - with _bufferlock: - _bufferdaemon.stdin.write(struct.pack('I', len(nodename) | (1 << 29))) - _bufferdaemon.stdin.write(nodename) - _bufferdaemon.stdin.flush() - for chunk in chunk_output(output, 8192): - _bufferdaemon.stdin.write(struct.pack('I', len(chunk) | (2 << 29))) - _bufferdaemon.stdin.write(chunk) - _bufferdaemon.stdin.flush() + out = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + out.setsockopt(socket.SOL_SOCKET, socket.SO_PASSCRED, 1) + out.connect("\x00confluent-vtbuffer") + out.send(struct.pack('I', len(nodename) | (1 << 29))) + out.send(nodename) + for chunk in chunk_output(output, 8192): + out.send(struct.pack('I', len(chunk) | (2 << 29))) + out.send(chunk) def _utf8_normalize(data, decoder): # first we give the stateful decoder a crack at the byte stream, @@ -607,11 +606,8 @@ def initialize(): _bufferlock = semaphore.Semaphore() _tracelog = log.Logger('trace') _bufferdaemon = subprocess.Popen( - ['/opt/confluent/bin/vtbufferd'], bufsize=0, stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - fl = fcntl.fcntl(_bufferdaemon.stdout.fileno(), fcntl.F_GETFL) - fcntl.fcntl(_bufferdaemon.stdout.fileno(), - fcntl.F_SETFL, fl | os.O_NONBLOCK) + ['/opt/confluent/bin/vtbufferd', 'confluent-vtbuffer'], bufsize=0, stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL) def start_console_sessions(): configmodule.hook_new_configmanagers(_start_tenant_sessions) diff --git a/confluent_vtbufferd/vtbufferd.c b/confluent_vtbufferd/vtbufferd.c index e89269b4..055a5263 100644 --- a/confluent_vtbufferd/vtbufferd.c +++ b/confluent_vtbufferd/vtbufferd.c @@ -1,8 +1,14 @@ +#include +#define _GNU_SOURCE #include #include #include #include #include +#include +#include +#include +#include #include "tmt.h" #define HASHSIZE 2053 #define MAXNAMELEN 256 @@ -10,13 +16,17 @@ struct terment { struct terment *next; char *name; + int fd; TMT *vt; }; #define SETNODE 1 #define WRITE 2 #define READBUFF 0 +#define CLOSECONN 3 +#define MAXEVTS 16 static struct terment *buffers[HASHSIZE]; +static char* nodenames[HASHSIZE]; unsigned long hash(char *str) /* djb2a */ @@ -37,10 +47,13 @@ TMT *get_termentbyname(char *name) { return NULL; } -TMT *set_termentbyname(char *name) { +TMT *set_termentbyname(char *name, int fd) { struct terment *ret; int idx; + if (nodenames[fd] == NULL) { + nodenames[fd] = strdup(name); + } idx = hash(name); for (ret = buffers[idx]; ret != NULL; ret = ret->next) if (strcmp(name, ret->name) == 0) @@ -48,12 +61,13 @@ TMT *set_termentbyname(char *name) { ret = (struct terment *)malloc(sizeof(*ret)); ret->next = buffers[idx]; ret->name = strdup(name); + ret->fd = fd; ret->vt = tmt_open(31, 100, NULL, NULL, L"→←↑↓■◆▒°±▒┘┐┌└┼⎺───⎽├┤┴┬│≤≥π≠£•"); buffers[idx] = ret; return ret->vt; } -void dump_vt(TMT* outvt) { +void dump_vt(TMT* outvt, int outfd) { const TMTSCREEN *out = tmt_screen(outvt); const TMTPOINT *curs = tmt_cursor(outvt); int line, idx, maxcol, maxrow; @@ -67,9 +81,10 @@ void dump_vt(TMT* outvt) { tmt_color_t fg = TMT_COLOR_DEFAULT; tmt_color_t bg = TMT_COLOR_DEFAULT; wchar_t sgrline[30]; + char strbuffer[128]; size_t srgidx = 0; char colorcode = 0; - wprintf(L"\033c"); + write(outfd, "\033c", 2); maxcol = 0; maxrow = 0; for (line = out->nline - 1; line >= 0; --line) { @@ -148,60 +163,136 @@ void dump_vt(TMT* outvt) { } if (sgrline[0] != 0) { sgrline[wcslen(sgrline) - 1] = 0; // Trim last ; - wprintf(L"\033[%lsm", sgrline); + + snprintf(strbuffer, sizeof(strbuffer), "\033[%lsm", sgrline); + write(outfd, strbuffer, strlen(strbuffer)); + write(outfd, "\033[]", 3); } - wprintf(L"%lc", out->lines[line]->chars[idx].c); + snprintf(strbuffer, sizeof(strbuffer), "%lc", out->lines[line]->chars[idx].c); + write(outfd, strbuffer, strlen(strbuffer)); } if (line < maxrow) - wprintf(L"\r\n"); + write(outfd, "\r\n", 2); } - fflush(stdout); - wprintf(L"\x1b[%ld;%ldH", curs->r + 1, curs->c + 1); - fflush(stdout); + //fflush(stdout); + snprintf(strbuffer, sizeof(strbuffer), "\x1b[%ld;%ldH", curs->r + 1, curs->c + 1); + write(outfd, strbuffer, strlen(strbuffer)); + //fflush(stdout); +} + +int handle_traffic(int fd) { + int cmd, length; + char currnode[MAXNAMELEN]; + char cmdbuf[MAXDATALEN]; + char *nodename; + TMT *currvt = NULL; + TMT *outvt = NULL; + length = read(fd, &cmd, 4); + if (length <= 0) { + return 0; + } + length = cmd & 536870911; + cmd = cmd >> 29; + if (cmd == SETNODE) { + cmd = read(fd, currnode, length); + currnode[length] = 0; + if (cmd < 0) + return 0; + currvt = set_termentbyname(currnode, fd); + } else if (cmd == WRITE) { + if (currvt == NULL) { + nodename = nodenames[fd]; + currvt = set_termentbyname(nodename, fd); + } + cmd = read(fd, cmdbuf, length); + cmdbuf[length] = 0; + if (cmd < 0) + return 0; + tmt_write(currvt, cmdbuf, length); + } else if (cmd == READBUFF) { + cmd = read(fd, cmdbuf, length); + cmdbuf[length] = 0; + if (cmd < 0) + return 0; + outvt = get_termentbyname(cmdbuf); + if (outvt != NULL) + dump_vt(outvt, fd); + length = write(fd, "\x00", 1); + if (length < 0) + return 0; + } else if (cmd == CLOSECONN) { + return 0; + } + return 1; } int main(int argc, char* argv[]) { - int cmd, length; setlocale(LC_ALL, ""); - char cmdbuf[MAXDATALEN]; - char currnode[MAXNAMELEN]; - TMT *currvt = NULL; - TMT *outvt = NULL; + struct sockaddr_un addr; + int numevts; + int status; + int poller; + int n; + socklen_t len; + int ctlsock, currsock; + socklen_t addrlen; + struct ucred ucr; + + struct epoll_event epvt, evts[MAXEVTS]; stdin = freopen(NULL, "rb", stdin); if (stdin == NULL) { exit(1); } + memset(&addr, 0, sizeof(struct sockaddr_un)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path + 1, argv[1], sizeof(addr.sun_path) - 2); // abstract namespace socket + ctlsock = socket(AF_UNIX, SOCK_STREAM, 0); + status = bind(ctlsock, (const struct sockaddr*)&addr, sizeof(sa_family_t) + strlen(argv[1]) + 1); //sizeof(struct sockaddr_un)); + if (status < 0) { + perror("Unable to open unix socket - "); + exit(1); + } + listen(ctlsock, 128); + poller = epoll_create(1); + memset(&epvt, 0, sizeof(struct epoll_event)); + epvt.events = EPOLLIN; + epvt.data.fd = ctlsock; + if (epoll_ctl(poller, EPOLL_CTL_ADD, ctlsock, &epvt) < 0) { + perror("Unable to poll the socket"); + exit(1); + } + // create a unix domain socket for accepting, each connection is only allowed to either read or write, not both while (1) { - length = fread(&cmd, 4, 1, stdin); - if (length < 0) - continue; - length = cmd & 536870911; - cmd = cmd >> 29; - if (cmd == SETNODE) { - cmd = fread(currnode, 1, length, stdin); - currnode[length] = 0; - if (cmd < 0) - continue; - currvt = set_termentbyname(currnode); - } else if (cmd == WRITE) { - if (currvt == NULL) - currvt = set_termentbyname(""); - cmd = fread(cmdbuf, 1, length, stdin); - cmdbuf[length] = 0; - if (cmd < 0) - continue; - tmt_write(currvt, cmdbuf, length); - } else if (cmd == READBUFF) { - cmd = fread(cmdbuf, 1, length, stdin); - cmdbuf[length] = 0; - if (cmd < 0) - continue; - outvt = get_termentbyname(cmdbuf); - if (outvt != NULL) - dump_vt(outvt); - length = write(1, "\x00", 1); - if (length < 0) - continue; + numevts = epoll_wait(poller, evts, MAXEVTS, -1); + if (numevts < 0) { + perror("Failed wait"); + exit(1); + } + for (n = 0; n < numevts; ++n) { + if (evts[n].data.fd == ctlsock) { + currsock = accept(ctlsock, (struct sockaddr *) &addr, &addrlen); + len = sizeof(ucr); + getsockopt(currsock, SOL_SOCKET, SO_PEERCRED, &ucr, &len); + if (ucr.uid != getuid()) { // block access for other users + close(currsock); + continue; + } + memset(&epvt, 0, sizeof(struct epoll_event)); + epvt.events = EPOLLIN; + epvt.data.fd = currsock; + epoll_ctl(poller, EPOLL_CTL_ADD, currsock, &epvt); + } else { + if (!handle_traffic(evts[n].data.fd)) { + epoll_ctl(poller, EPOLL_CTL_DEL, evts[n].data.fd, NULL); + close(evts[n].data.fd); + if (nodenames[evts[n].data.fd] != NULL) { + free(nodenames[evts[n].data.fd]); + nodenames[evts[n].data.fd] = NULL; + } + } + } } } } + + From fa5b1c671ef54e55f9f04b57a894a06dd2f23123 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Feb 2024 15:07:12 -0500 Subject: [PATCH 060/122] Remove disused bufferlock We no longer use a lock on buffer communication, eliminate the stale variable. --- confluent_server/confluent/consoleserver.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 19509eb5..aa05b9b7 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -49,7 +49,6 @@ _handled_consoles = {} _tracelog = None _bufferdaemon = None -_bufferlock = None try: range = xrange @@ -602,8 +601,6 @@ def _start_tenant_sessions(cfm): def initialize(): global _tracelog global _bufferdaemon - global _bufferlock - _bufferlock = semaphore.Semaphore() _tracelog = log.Logger('trace') _bufferdaemon = subprocess.Popen( ['/opt/confluent/bin/vtbufferd', 'confluent-vtbuffer'], bufsize=0, stdin=subprocess.DEVNULL, From 75db6da621632db72e40d1a208c812f327c0b6f1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 4 Mar 2024 08:06:01 -0500 Subject: [PATCH 061/122] Opportunisticlly use sshd_config.d when detected --- .../el8/profiles/default/scripts/setupssh.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/setupssh.sh b/confluent_osdeploy/el8/profiles/default/scripts/setupssh.sh index f06c4d61..bc74faf5 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/setupssh.sh +++ b/confluent_osdeploy/el8/profiles/default/scripts/setupssh.sh @@ -1,8 +1,12 @@ #!/bin/sh -grep HostCert /etc/ssh/sshd_config.anaconda >> /mnt/sysimage/etc/ssh/sshd_config -echo HostbasedAuthentication yes >> /mnt/sysimage/etc/ssh/sshd_config -echo HostbasedUsesNameFromPacketOnly yes >> /mnt/sysimage/etc/ssh/sshd_config -echo IgnoreRhosts no >> /mnt/sysimage/etc/ssh/sshd_config +targssh=/mnt/sysimage/etc/ssh/sshd_config +if [ -d /mnt/sysimage/etc/ssh/sshd_config.d/ ]; then + targssh=/mnt/sysimage/etc/ssh/sshd_config.d/90-confluent.conf +fi +grep HostCert /etc/ssh/sshd_config.anaconda >> $targssh +echo HostbasedAuthentication yes >> $targssh +echo HostbasedUsesNameFromPacketOnly yes >> $targssh +echo IgnoreRhosts no >> $targssh sshconf=/mnt/sysimage/etc/ssh/ssh_config if [ -d /mnt/sysimage/etc/ssh/ssh_config.d/ ]; then sshconf=/mnt/sysimage/etc/ssh/ssh_config.d/01-confluent.conf From 2f8dfac9bce1e127d742f7cade64c3a3522369e1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Mar 2024 08:45:23 -0500 Subject: [PATCH 062/122] Dump stderr to client if ansible had an utterly disastrous condition --- confluent_server/confluent/runansible.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/runansible.py b/confluent_server/confluent/runansible.py index cbbecc58..8e5d1a3d 100644 --- a/confluent_server/confluent/runansible.py +++ b/confluent_server/confluent/runansible.py @@ -63,6 +63,9 @@ class PlayRunner(object): else: textout += result['state'] + '\n' textout += '\n' + if self.stderr: + textout += "ERRORS **********************************\n" + textout += self.stderr return textout def dump_json(self): From 5ae3f4c62aa8b9df37f4cff335d089a1a3717363 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Mar 2024 09:27:53 -0500 Subject: [PATCH 063/122] Properly address runansible error relay --- confluent_server/confluent/runansible.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/runansible.py b/confluent_server/confluent/runansible.py index 8e5d1a3d..50696742 100644 --- a/confluent_server/confluent/runansible.py +++ b/confluent_server/confluent/runansible.py @@ -32,6 +32,7 @@ anspypath = None running_status = {} class PlayRunner(object): def __init__(self, playfiles, nodes): + self.stderr = '' self.playfiles = playfiles self.nodes = nodes self.worker = None @@ -96,7 +97,8 @@ class PlayRunner(object): [mypath, __file__, targnodes, playfilename], stdin=devnull, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, self.stderr = worker.communicate() + stdout, stder = worker.communicate() + self.stderr += stder.decode('utf8') current = memoryview(stdout) while len(current): sz = struct.unpack('=q', current[:8])[0] From 3ffeef5cf306d4c6040b898a0e1b7ad34a4e8a22 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Mar 2024 16:27:09 -0500 Subject: [PATCH 064/122] Fix stray blank line at end of nodelist Wrong indentation level for nodelist resulting in spurious line. --- confluent_client/bin/nodelist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/nodelist b/confluent_client/bin/nodelist index 462ed922..c1b9c436 100755 --- a/confluent_client/bin/nodelist +++ b/confluent_client/bin/nodelist @@ -68,7 +68,7 @@ def main(): else: elem=(res['item']['href'].replace('/', '')) list.append(elem) - print(options.delim.join(list)) + print(options.delim.join(list)) sys.exit(exitcode) From cdefb400f9b1eaf94142d350fdc8f7c1006fac41 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Mar 2024 13:32:45 -0400 Subject: [PATCH 065/122] Expose fingerprinting and better error handling to osdeploy This allows custom name and pre-import checking. --- confluent_client/confluent_env.sh | 4 ++-- confluent_server/bin/osdeploy | 35 +++++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh index 81a70198..925a873d 100644 --- a/confluent_client/confluent_env.sh +++ b/confluent_client/confluent_env.sh @@ -153,11 +153,11 @@ _confluent_osimage_completion() { _confluent_get_args if [ $NUMARGS == 2 ]; then - COMPREPLY=($(compgen -W "initialize import updateboot rebase" -- ${COMP_WORDS[COMP_CWORD]})) + COMPREPLY=($(compgen -W "initialize import importcheck updateboot rebase" -- ${COMP_WORDS[COMP_CWORD]})) return elif [ ${CMPARGS[1]} == 'initialize' ]; then COMPREPLY=($(compgen -W "-h -u -s -t -i" -- ${COMP_WORDS[COMP_CWORD]})) - elif [ ${CMPARGS[1]} == 'import' ]; then + elif [ ${CMPARGS[1]} == 'import' ] || [ ${CMPARGS[1]} == 'importcheck' ]; then compopt -o default COMPREPLY=() return diff --git a/confluent_server/bin/osdeploy b/confluent_server/bin/osdeploy index fff220be..47ebc4a8 100644 --- a/confluent_server/bin/osdeploy +++ b/confluent_server/bin/osdeploy @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 __author__ = 'jjohnson2,bfinley' @@ -49,8 +49,11 @@ def main(args): wiz.add_argument('-p', help='Copy in TFTP contents required for PXE support', action='store_true') wiz.add_argument('-i', help='Interactively prompt for behaviors', action='store_true') wiz.add_argument('-l', help='Set up local management node to allow login from managed nodes', action='store_true') + osip = sp.add_parser('importcheck', help='Check import of an OS image from an ISO image') + osip.add_argument('imagefile', help='File to use for source of importing') osip = sp.add_parser('import', help='Import an OS image from an ISO image') osip.add_argument('imagefile', help='File to use for source of importing') + osip.add_argument('-n', help='Specific a custom distribution name') upb = sp.add_parser( 'updateboot', help='Push profile.yaml of the named profile data into boot assets as appropriate') @@ -63,7 +66,9 @@ def main(args): if cmdset.command == 'list': return oslist() if cmdset.command == 'import': - return osimport(cmdset.imagefile) + return osimport(cmdset.imagefile, custname=cmdset.n) + if cmdset.command == 'importcheck': + return osimport(cmdset.imagefile, checkonly=True) if cmdset.command == 'initialize': return initialize(cmdset) if cmdset.command == 'updateboot': @@ -496,7 +501,7 @@ def oslist(): print("") -def osimport(imagefile): +def osimport(imagefile, checkonly=False, custname=None): c = client.Command() imagefile = os.path.abspath(imagefile) if c.unixdomain: @@ -507,11 +512,33 @@ def osimport(imagefile): pass importing = False shortname = None - for rsp in c.create('/deployment/importing/', {'filename': imagefile}): + apipath = '/deployment/importing/' + if checkonly: + apipath = '/deployment/fingerprint/' + apiargs = {'filename': imagefile} + if custname: + apiargs['custname'] = custname + for rsp in c.create(apipath, apiargs): if 'target' in rsp: importing = True shortname = rsp['name'] print('Importing from {0} to {1}'.format(imagefile, rsp['target'])) + elif 'targetpath' in rsp: + tpath = rsp.get('targetpath', None) + tname = rsp.get('name', None) + oscat = rsp.get('oscategory', None) + if tpath: + print('Detected target directory: ' + tpath) + if tname: + print('Detected distribution name: ' + tname) + if oscat: + print('Detected OS category: ' + oscat) + for err in rsp.get('errors', []): + sys.stderr.write('Error: ' + err + '\n') + + elif 'error' in rsp: + sys.stderr.write(rsp['error'] + '\n') + sys.exit(rsp.get('errorcode', 1)) else: print(repr(rsp)) try: From 49e614eb32fe1cf3f2887932cf5b5d4b71092220 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Mar 2024 17:10:33 -0400 Subject: [PATCH 066/122] Have image2disk delay exit on error Debugging cloning is difficult when system immediately reboots on error. --- .../el8-diskless/profiles/default/scripts/image2disk.py | 8 +++++++- .../el9-diskless/profiles/default/scripts/image2disk.py | 8 +++++++- .../profiles/default/scripts/image2disk.py | 8 +++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index aaaca9d4..655aaedc 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -10,6 +10,7 @@ import stat import struct import sys import subprocess +import traceback bootuuid = None @@ -426,4 +427,9 @@ def install_to_disk(imgpath): if __name__ == '__main__': - install_to_disk(os.environ['mountsrc']) + try: + install_to_disk(os.environ['mountsrc']) + except Exception: + traceback.print_exc() + time.sleep(86400) + raise diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 7b312a93..48a15767 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -10,6 +10,7 @@ import stat import struct import sys import subprocess +import traceback bootuuid = None @@ -426,4 +427,9 @@ def install_to_disk(imgpath): if __name__ == '__main__': - install_to_disk(os.environ['mountsrc']) + try: + install_to_disk(os.environ['mountsrc']) + except Exception: + traceback.print_exc() + time.sleep(86400) + raise diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py index 1d19ebad..91afc5cb 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py @@ -10,6 +10,7 @@ import stat import struct import sys import subprocess +import traceback bootuuid = None @@ -424,5 +425,10 @@ def install_to_disk(imgpath): if __name__ == '__main__': - install_to_disk(os.environ['mountsrc']) + try: + install_to_disk(os.environ['mountsrc']) + except Exception: + traceback.print_exc() + time.sleep(86400) + raise From 0d720baf2539a188d5b80cf4721bdcf5bcab66e8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Mar 2024 09:36:40 -0400 Subject: [PATCH 067/122] Fix lldp when peername is null Some neighbors result in a null name, handle that. --- confluent_server/confluent/networking/lldp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index e1fd8d4e..e181d46f 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -381,9 +381,10 @@ def list_info(parms, requestedparameter): break else: candidate = info[requestedparameter] - candidate = candidate.strip() - if candidate != '': - results.add(_api_sanitize_string(candidate)) + if candidate: + candidate = candidate.strip() + if candidate != '': + results.add(_api_sanitize_string(candidate)) return [msg.ChildCollection(x + suffix) for x in util.natural_sort(results)] def _handle_neighbor_query(pathcomponents, configmanager): From 17af9c74b81927601e84845965de421db9deb022 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Mar 2024 15:32:44 -0400 Subject: [PATCH 068/122] Fix nodeapply redoing a single node multiple times --- confluent_client/bin/nodeapply | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeapply b/confluent_client/bin/nodeapply index e39447bc..2e798742 100755 --- a/confluent_client/bin/nodeapply +++ b/confluent_client/bin/nodeapply @@ -102,9 +102,9 @@ def run(): cmdv = ['ssh', sshnode] + cmdvbase + cmdstorun[0] if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(sshnode, cmdv, all, pipedesc) else: - pendingexecs.append((node, cmdv)) + pendingexecs.append((sshnode, cmdv)) if not all or exitcode: sys.exit(exitcode) rdy, _, _ = select.select(all, [], [], 10) From 58d9bc1816101ac814beaa32d4da237e35aea9bc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 14 Mar 2024 10:50:01 -0400 Subject: [PATCH 069/122] Updates to confluent_selfcheck Reap ssh-agent to avoid stale agents lying around. Remove nuisance warnings about virbr0 when present. Do a full runthrough as the confluent user to ssh to a node when user requests with '-a', marking known_hosts and automation key issues. --- confluent_server/bin/confluent_selfcheck | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index cc1409cf..1539434f 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -22,6 +22,8 @@ import shutil import eventlet.green.socket as socket import eventlet import greenlet +import pwd +import signal def fprint(txt): sys.stdout.write(txt) @@ -109,6 +111,8 @@ def nics_missing_ipv6(): iname, state = comps[:2] if iname == b'lo': continue + if iname == b'virbr0': + continue addrs = comps[2:] hasv6 = False hasv4 = False @@ -157,6 +161,7 @@ def lookup_node(node): if __name__ == '__main__': ap = argparse.ArgumentParser(description='Run configuration checks for a system running confluent service') ap.add_argument('-n', '--node', help='A node name to run node specific checks against') + ap.add_argument('-a', '--automation', help='Do checks against a deployed node for automation and syncfiles function', action='store_true') args, extra = ap.parse_known_args(sys.argv) if len(extra) > 1: ap.print_help() @@ -217,6 +222,7 @@ if __name__ == '__main__': print('OK') except subprocess.CalledProcessError: emprint('Failed to load confluent automation key, syncfiles and profile ansible plays will not work (Example resolution: osdeploy initialize -a)') + os.kill(int(sshutil.agent_pid), signal.SIGTERM) fprint('Checking for blocked insecure boot: ') if insecure_boot_attempts(): emprint('Some nodes are attempting network boot using PXE or HTTP boot, but the node is not configured to allow this (Example resolution: nodegroupattrib everything deployment.useinsecureprotocols=firmware)') @@ -311,6 +317,34 @@ if __name__ == '__main__': emprint('Name resolution failed for node, it is normally a good idea for the node name to resolve to an IP') if result: print("OK") + if args.automation: + print(f'Checking confluent automation access to {args.node}...') + child = os.fork() + if child > 0: + pid, extcode = os.waitpid(child, 0) + else: + sshutil.ready_keys = {} + sshutil.agent_pid = None + cuser = pwd.getpwnam('confluent') + os.setgid(cuser.pw_gid) + os.setuid(cuser.pw_uid) + sshutil.prep_ssh_key('/etc/confluent/ssh/automation') + srun = subprocess.run( + ['ssh', '-Tn', '-o', 'BatchMode=yes', '-l', 'root', + '-o', 'StrictHostKeyChecking=yes', args.node, 'true'], + stdin=subprocess.DEVNULL, stderr=subprocess.PIPE) + os.kill(int(sshutil.agent_pid), signal.SIGTERM) + if srun.returncode == 0: + print(f'Confluent automation access to {args.node} seems OK') + else: + if b'Host key verification failed' in srun.stderr: + emprint('Confluent ssh unable to verify host key, check /etc/ssh/ssh_known_hosts. (Example resolution: osdeploy initialize -k)') + elif b'ermission denied' in srun.stderr: + emprint('Confluent user unable to ssh in, check /root/.ssh/authorized_keys on the target system versus /etc/confluent/ssh/automation.pub (Example resolution: osdeploy initialize -a)') + else: + emprint('Unknown error attempting confluent automation ssh:') + sys.stderr.buffer.write(srun.stderr) + os.kill(int(sshutil.agent_pid), signal.SIGTERM) else: print("Skipping node checks, no node specified (Example: confluent_selfcheck -n n1)") # possible checks: From 876b59c1f0a2998ad58888f33c4fb099da5f7319 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 14 Mar 2024 10:52:52 -0400 Subject: [PATCH 070/122] Remove redundant kill on the agent pid Extraneous kill on the agent pid is removed. --- confluent_server/bin/confluent_selfcheck | 1 - 1 file changed, 1 deletion(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 1539434f..f558cf46 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -344,7 +344,6 @@ if __name__ == '__main__': else: emprint('Unknown error attempting confluent automation ssh:') sys.stderr.buffer.write(srun.stderr) - os.kill(int(sshutil.agent_pid), signal.SIGTERM) else: print("Skipping node checks, no node specified (Example: confluent_selfcheck -n n1)") # possible checks: From 1d4505ff3ca1916e1a4eeed5a7b3d886477c9c25 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 14 Mar 2024 11:20:36 -0400 Subject: [PATCH 071/122] SSH test by IP, to reflect actual usage and catch issues One issue is modified ssh_known_hosts wildcard customization failing to cover IP address. --- confluent_server/bin/confluent_selfcheck | 33 ++++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index f558cf46..b9651d17 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -280,13 +280,17 @@ if __name__ == '__main__': cfg = configmanager.ConfigManager(None) bootablev4nics = [] bootablev6nics = [] + targsships = [] for nic in glob.glob("/sys/class/net/*/ifindex"): idx = int(open(nic, "r").read()) nicname = nic.split('/')[-2] ncfg = netutil.get_nic_config(cfg, args.node, ifidx=idx) + if ncfg['ipv4_address']: + targsships.append(ncfg['ipv4_address']) if ncfg['ipv4_address'] or ncfg['ipv4_method'] == 'dhcp': bootablev4nics.append(nicname) if ncfg['ipv6_address']: + targsships.append(ncfg['ipv6_address']) bootablev6nics.append(nicname) if bootablev4nics: print("{} appears to have network configuration suitable for IPv4 deployment via: {}".format(args.node, ",".join(bootablev4nics))) @@ -329,21 +333,22 @@ if __name__ == '__main__': os.setgid(cuser.pw_gid) os.setuid(cuser.pw_uid) sshutil.prep_ssh_key('/etc/confluent/ssh/automation') - srun = subprocess.run( - ['ssh', '-Tn', '-o', 'BatchMode=yes', '-l', 'root', - '-o', 'StrictHostKeyChecking=yes', args.node, 'true'], - stdin=subprocess.DEVNULL, stderr=subprocess.PIPE) - os.kill(int(sshutil.agent_pid), signal.SIGTERM) - if srun.returncode == 0: - print(f'Confluent automation access to {args.node} seems OK') - else: - if b'Host key verification failed' in srun.stderr: - emprint('Confluent ssh unable to verify host key, check /etc/ssh/ssh_known_hosts. (Example resolution: osdeploy initialize -k)') - elif b'ermission denied' in srun.stderr: - emprint('Confluent user unable to ssh in, check /root/.ssh/authorized_keys on the target system versus /etc/confluent/ssh/automation.pub (Example resolution: osdeploy initialize -a)') + for targ in targsships: + srun = subprocess.run( + ['ssh', '-Tn', '-o', 'BatchMode=yes', '-l', 'root', + '-o', 'StrictHostKeyChecking=yes', targ, 'true'], + stdin=subprocess.DEVNULL, stderr=subprocess.PIPE) + if srun.returncode == 0: + print(f'Confluent automation access to {targ} seems OK') else: - emprint('Unknown error attempting confluent automation ssh:') - sys.stderr.buffer.write(srun.stderr) + if b'Host key verification failed' in srun.stderr: + emprint(f'Confluent ssh unable to verify host key for {targ}, check /etc/ssh/ssh_known_hosts. (Example resolution: osdeploy initialize -k)') + elif b'ermission denied' in srun.stderr: + emprint(f'Confluent user unable to ssh in to {targ}, check /root/.ssh/authorized_keys on the target system versus /etc/confluent/ssh/automation.pub (Example resolution: osdeploy initialize -a)') + else: + emprint('Unknown error attempting confluent automation ssh:') + sys.stderr.buffer.write(srun.stderr) + os.kill(int(sshutil.agent_pid), signal.SIGTERM) else: print("Skipping node checks, no node specified (Example: confluent_selfcheck -n n1)") # possible checks: From 789376029dc56c9a6bceb18c1cf0476f37bf1df5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Mar 2024 09:57:23 -0400 Subject: [PATCH 072/122] Numerous fixes to the EL9 cloning Fix various callbacks when using IPv6 based deployment. Do not attempt to restore erroneously cloned zram partitions. Convert LVM names to new LVM names consistent with source naming scheme. Push new kernel command line into /boot/loader and /etc/kernel/cmdline. --- .../profiles/default/scripts/firstboot.sh | 2 +- .../profiles/default/scripts/image2disk.py | 99 ++++++++++++++++--- .../profiles/default/scripts/post.sh | 6 +- 3 files changed, 91 insertions(+), 16 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh index ed11d9e7..fabb9385 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh @@ -41,7 +41,7 @@ if [ ! -f /etc/confluent/firstboot.ran ]; then run_remote_config firstboot.d fi -curl -X POST -d 'status: complete' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus +curl -X POST -d 'status: complete' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus systemctl disable firstboot rm /etc/systemd/system/firstboot.service rm /etc/confluent/firstboot.ran diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 48a15767..83cffc6b 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -13,6 +13,13 @@ import subprocess import traceback bootuuid = None +vgname = 'localstorage' +oldvgname = None + +def convert_lv(oldlvname): + if oldvgname is None: + return None + return oldlvname.replace(oldvgname, vgname) def get_partname(devname, idx): if devname[-1] in '0123456789': @@ -54,6 +61,8 @@ def get_image_metadata(imgpath): header = img.read(16) if header == b'\x63\x7b\x9d\x26\xb7\xfd\x48\x30\x89\xf9\x11\xcf\x18\xfd\xff\xa1': for md in get_multipart_image_meta(img): + if md.get('device', '').startswith('/dev/zram'): + continue yield md else: raise Exception('Installation from single part image not supported') @@ -87,14 +96,14 @@ def fixup(rootdir, vols): if tab.startswith('#ORIGFSTAB#'): if entry[1] in devbymount: targetdev = devbymount[entry[1]] - if targetdev.startswith('/dev/localstorage/'): + if targetdev.startswith('/dev/{}/'.format(vgname)): entry[0] = targetdev else: uuid = subprocess.check_output(['blkid', '-s', 'UUID', '-o', 'value', targetdev]).decode('utf8') uuid = uuid.strip() entry[0] = 'UUID={}'.format(uuid) elif entry[2] == 'swap': - entry[0] = '/dev/mapper/localstorage-swap' + entry[0] = '/dev/mapper/{}-swap'.format(vgname) entry[0] = entry[0].ljust(42) entry[1] = entry[1].ljust(16) entry[3] = entry[3].ljust(28) @@ -142,6 +151,46 @@ def fixup(rootdir, vols): grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') if not os.path.exists(grubsyscfg): grubsyscfg = os.path.join(rootdir, 'etc/default/grub') + kcmdline = os.path.join(rootdir, 'etc/kernel/cmdline') + if os.path.exists(kcmdline): + with open(kcmdline) as kcmdlinein: + kcmdlinecontent = kcmdlinein.read() + newkcmdlineent = [] + for ent in kcmdlinecontent.split(): + if ent.startswith('resume='): + newkcmdlineent.append('resume={}'.format(newswapdev)) + elif ent.startswith('root='): + newkcmdlineent.append('root={}'.format(newrootdev)) + elif ent.startswith('rd.lvm.lv='): + ent = convert_lv(ent) + if ent: + newkcmdlineent.append(ent) + else: + newkcmdlineent.append(ent) + with open(kcmdline, 'w') as kcmdlineout: + kcmdlineout.write(' '.join(newkcmdlineent) + '\n') + for loadent in glob.glob(os.path.join(rootdir, 'boot/loader/entries/*.conf')): + with open(loadent) as loadentin: + currentry = loadentin.read().split('\n') + with open(loadent, 'w') as loadentout: + for cfgline in currentry: + cfgparts = cfgline.split() + if not cfgparts or cfgparts[0] != 'options': + loadentout.write(cfgline + '\n') + continue + newcfgparts = [cfgparts[0]] + for cfgpart in cfgparts[1:]: + if cfgpart.startswith('root='): + newcfgparts.append('root={}'.format(newrootdev)) + elif cfgpart.startswith('resume='): + newcfgparts.append('resume={}'.format(newswapdev)) + elif cfgpart.startswith('rd.lvm.lv='): + cfgpart = convert_lv(cfgpart) + if cfgpart: + newcfgparts.append(cfgpart) + else: + newcfgparts.append(cfgpart) + loadentout.write(' '.join(newcfgparts) + '\n') with open(grubsyscfg) as defgrubin: defgrub = defgrubin.read().split('\n') with open(grubsyscfg, 'w') as defgrubout: @@ -149,9 +198,16 @@ def fixup(rootdir, vols): gline = gline.split() newline = [] for ent in gline: - if ent.startswith('resume=') or ent.startswith('rd.lvm.lv'): - continue - newline.append(ent) + if ent.startswith('resume='): + newline.append('resume={}'.format(newswapdev)) + elif ent.startswith('root='): + newline.append('root={}'.format(newrootdev)) + elif ent.startswith('rd.lvm.lv='): + ent = convert_lv(ent) + if ent: + newline.append(ent) + else: + newline.append(ent) defgrubout.write(' '.join(newline) + '\n') grubcfg = subprocess.check_output(['find', os.path.join(rootdir, 'boot'), '-name', 'grub.cfg']).decode('utf8').strip().replace(rootdir, '/').replace('//', '/') grubcfg = grubcfg.split('\n') @@ -228,8 +284,14 @@ def had_swap(): return True return False +newrootdev = None +newswapdev = None def install_to_disk(imgpath): global bootuuid + global newrootdev + global newswapdev + global vgname + global oldvgname lvmvols = {} deftotsize = 0 mintotsize = 0 @@ -261,6 +323,12 @@ def install_to_disk(imgpath): biggestfs = fs biggestsize = fs['initsize'] if fs['device'].startswith('/dev/mapper'): + oldvgname = fs['device'].rsplit('/', 1)[-1] + if '_' in oldvgname and '-' in oldvgname.split('_')[-1]: + oldvgname = oldvgname.rsplit('-', 1)[0] + osname = oldvgname.split('_')[0] + nodename = socket.gethostname().split('.')[0] + vgname = '{}_{}'.format(osname, nodename) lvmvols[fs['device'].replace('/dev/mapper/', '')] = fs deflvmsize += fs['initsize'] minlvmsize += fs['minsize'] @@ -305,6 +373,8 @@ def install_to_disk(imgpath): end = sectors parted.run('mkpart primary {}s {}s'.format(curroffset, end)) vol['targetdisk'] = get_partname(instdisk, volidx) + if vol['mount'] == '/': + newrootdev = vol['targetdisk'] curroffset += size + 1 if not lvmvols: if swapsize: @@ -314,13 +384,14 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - subprocess.check_call(['mkswap', get_partname(instdisk, volidx + 1)]) + newswapdev = get_partname(instdisk, volidx + 1) + subprocess.check_call(['mkswap', newswapdev]) else: parted.run('mkpart lvm {}s 100%'.format(curroffset)) lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) - subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) - vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') + subprocess.check_call(['vgcreate', vgname, lvmpart]) + vginfo = subprocess.check_output(['vgdisplay', vgname, '--units', 'b']).decode('utf8') vginfo = vginfo.split('\n') pesize = 0 pes = 0 @@ -347,13 +418,17 @@ def install_to_disk(imgpath): extents += 1 if vol['mount'] == '/': lvname = 'root' + else: lvname = vol['mount'].replace('/', '_') - subprocess.check_call(['lvcreate', '-l', '{}'.format(extents), '-y', '-n', lvname, 'localstorage']) - vol['targetdisk'] = '/dev/localstorage/{}'.format(lvname) + subprocess.check_call(['lvcreate', '-l', '{}'.format(extents), '-y', '-n', lvname, vgname]) + vol['targetdisk'] = '/dev/{}/{}'.format(vgname, lvname) + if vol['mount'] == '/': + newrootdev = vol['targetdisk'] if swapsize: - subprocess.check_call(['lvcreate', '-y', '-l', '{}'.format(swapsize // pesize), '-n', 'swap', 'localstorage']) - subprocess.check_call(['mkswap', '/dev/localstorage/swap']) + subprocess.check_call(['lvcreate', '-y', '-l', '{}'.format(swapsize // pesize), '-n', 'swap', vgname]) + subprocess.check_call(['mkswap', '/dev/{}/swap'.format(vgname)]) + newswapdev = '/dev/{}/swap'.format(vgname) os.makedirs('/run/imginst/targ') for vol in allvols: with open(vol['targetdisk'], 'wb') as partition: diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh index 3b20a946..7a7ac01e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh @@ -23,9 +23,9 @@ exec 2>> /var/log/confluent/confluent-post.log chmod 600 /var/log/confluent/confluent-post.log tail -f /var/log/confluent/confluent-post.log > /dev/console & logshowpid=$! -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service +curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service mkdir -p /opt/confluent/bin -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh +curl -f https://$confluent_websrv/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh chmod +x /opt/confluent/bin/firstboot.sh systemctl enable firstboot selinuxpolicy=$(grep ^SELINUXTYPE /etc/selinux/config |awk -F= '{print $2}') @@ -40,7 +40,7 @@ run_remote_parts post.d # Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ run_remote_config post.d -curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus +curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_websrv/confluent-api/self/updatestatus kill $logshowpid From bd2288ccb79b60fc4b12fbdc6975453c976091cc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Mar 2024 12:29:37 -0400 Subject: [PATCH 073/122] Ensure preservation of " if rename fails If ent would swallow a ", make sure to put it back. --- .../el9-diskless/profiles/default/scripts/image2disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 83cffc6b..79d0008e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -206,6 +206,8 @@ def fixup(rootdir, vols): ent = convert_lv(ent) if ent: newline.append(ent) + elif '""' in ent: + newline.append('""') else: newline.append(ent) defgrubout.write(' '.join(newline) + '\n') From 60fe306890a08a8fb128145d83de3a342e9cf77b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Mar 2024 13:03:46 -0400 Subject: [PATCH 074/122] Numerous fixes Normalize cloning by wipefs prior to image2disk Have imgutil filter out zram mounts. Fix syncfiles error handling. --- .../el9-diskless/profiles/default/scripts/installimage | 1 + confluent_server/confluent/syncfiles.py | 5 +++-- imgutil/imgutil | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage index 2e791ce6..56597086 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/installimage @@ -30,6 +30,7 @@ if [ ! -f /sysroot/tmp/installdisk ]; then done fi lvm vgchange -a n +/sysroot/usr/sbin/wipefs -a /dev/$(cat /sysroot/tmp/installdisk) udevadm control -e if [ -f /sysroot/etc/lvm/devices/system.devices ]; then rm /sysroot/etc/lvm/devices/system.devices diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 6c11d072..70e5bdaf 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -193,6 +193,7 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): targip = node if peerip: targip = peerip + #BOOO, need stderr!!! output = util.run( ['rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)])[0] except Exception as e: @@ -212,7 +213,7 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): unreadablefiles.append(filename.replace(targdir, '')) if unreadablefiles: raise Exception("Syncing failed due to unreadable files: " + ','.join(unreadablefiles)) - elif b'Permission denied, please try again.' in e.stderr: + elif hasattr(e, 'stderr') and e.stderr and b'Permission denied, please try again.' in e.stderr: raise Exception('Syncing failed due to authentication error, is the confluent automation key not set up (osdeploy initialize -a) or is there some process replacing authorized_keys on the host?') else: raise @@ -231,7 +232,7 @@ def stage_ent(currmap, ent, targdir, appendexist=False): everyfent = [] allfents = ent.split() for tmpent in allfents: - fents = glob.glob(tmpent) + fents = glob.glob(tmpent) # TODO: recursive globbing? if not fents: raise Exception('No matching files for "{}"'.format(tmpent)) everyfent.extend(fents) diff --git a/imgutil/imgutil b/imgutil/imgutil index 959c4a17..022279cc 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -83,6 +83,8 @@ def get_partition_info(): dev, mount, fs, flags = entry.split()[:4] if mount not in capmounts: continue + if '/dev/zram' in dev: + continue fsinfo = os.statvfs(mount) partinfo = { 'mount': mount, From b157e55f000c401d6c0d9f5a3874a75073d1d265 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Mar 2024 15:50:58 -0400 Subject: [PATCH 075/122] Fallback to unverified noderange on candidate manager check in PXE When doing pxe and the noderange of the candidate managers fails, try again without validation in case the user omitted collective members from nodelist, but still used ',' to enumerate them. --- confluent_server/confluent/discovery/protocols/pxe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index 6dd34efa..4a39654f 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -587,7 +587,10 @@ def get_deployment_profile(node, cfg, cfd=None): return None candmgrs = cfd.get(node, {}).get('collective.managercandidates', {}).get('value', None) if candmgrs: - candmgrs = noderange.NodeRange(candmgrs, cfg).nodes + try: + candmgrs = noderange.NodeRange(candmgrs, cfg).nodes + except Exception: # fallback to unverified noderange + candmgrs = noderange.NodeRange(candmgrs).nodes if collective.get_myname() not in candmgrs: return None return profile From a595d31e946b1d9fee8ae3420465314148bb66a0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Mar 2024 08:56:06 -0400 Subject: [PATCH 076/122] Explicitly invoke bash for ubuntu post Ubuntu really tries to use non-bash, explicitly use bash when we need it. --- .../ubuntu20.04/initramfs/custom-installation/post.sh | 3 ++- .../ubuntu22.04/initramfs/custom-installation/post.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/post.sh b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/post.sh index 5bd43bc6..d9dc27b2 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/post.sh +++ b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/post.sh @@ -4,4 +4,5 @@ confluent_mgr=$(grep ^deploy_server $deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: $deploycfg|awk '{print $2}') export deploycfg confluent_mgr confluent_profile curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/post.sh > /tmp/post.sh -. /tmp/post.sh +bash /tmp/post.sh +true diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/custom-installation/post.sh b/confluent_osdeploy/ubuntu22.04/initramfs/custom-installation/post.sh index 5bd43bc6..d9dc27b2 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/custom-installation/post.sh +++ b/confluent_osdeploy/ubuntu22.04/initramfs/custom-installation/post.sh @@ -4,4 +4,5 @@ confluent_mgr=$(grep ^deploy_server $deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: $deploycfg|awk '{print $2}') export deploycfg confluent_mgr confluent_profile curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/post.sh > /tmp/post.sh -. /tmp/post.sh +bash /tmp/post.sh +true From 3dd09b95e47ae238f24b3b437f466cc2f71623cd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Mar 2024 09:13:53 -0400 Subject: [PATCH 077/122] Fix Ubuntu 20 pre script to match 22 --- .../profiles/default/scripts/pre.sh | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/pre.sh b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/pre.sh index ddfe598b..5db222a7 100755 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/pre.sh @@ -1,5 +1,16 @@ #!/bin/bash deploycfg=/custom-installation/confluent/confluent.deploycfg +mkdir -p /var/log/confluent +mkdir -p /opt/confluent/bin +mkdir -p /etc/confluent +cp /custom-installation/confluent/confluent.info /custom-installation/confluent/confluent.apikey /etc/confluent/ +cat /custom-installation/tls/*.pem >> /etc/confluent/ca.pem +cp /custom-installation/confluent/bin/apiclient /opt/confluent/bin +cp $deploycfg /etc/confluent/ +( +exec >> /var/log/confluent/confluent-pre.log +exec 2>> /var/log/confluent/confluent-pre.log +chmod 600 /var/log/confluent/confluent-pre.log cryptboot=$(grep encryptboot: $deploycfg|sed -e 's/^encryptboot: //') if [ "$cryptboot" != "" ] && [ "$cryptboot" != "none" ] && [ "$cryptboot" != "null" ]; then @@ -23,7 +34,17 @@ echo HostbasedAuthentication yes >> /etc/ssh/sshd_config.d/confluent.conf echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/confluent.conf echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/confluent.conf systemctl restart sshd +mkdir -p /etc/confluent +export nodename confluent_profile confluent_mgr +curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions +. /etc/confluent/functions +run_remote_parts pre.d curl -f -X POST -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $apikey" https://$confluent_mgr/confluent-api/self/nodelist > /tmp/allnodes -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /custom-installation/getinstalldisk -python3 /custom-installation/getinstalldisk +if [ ! -e /tmp/installdisk ]; then + curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /custom-installation/getinstalldisk + python3 /custom-installation/getinstalldisk +fi sed -i s!%%INSTALLDISK%%!/dev/$(cat /tmp/installdisk)! /autoinstall.yaml +) & +tail --pid $! -n 0 -F /var/log/confluent/confluent-pre.log > /dev/console + From 6502573d905eae76b31b67369c8af71f69bdbaa3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Mar 2024 09:15:11 -0400 Subject: [PATCH 078/122] Bring ubuntu 22 versions of firstboot and post to 20 --- .../profiles/default/scripts/firstboot.sh | 10 ++++++++-- .../ubuntu20.04/profiles/default/scripts/post.sh | 16 +++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/firstboot.sh index d14269cf..c0ba44ab 100755 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/firstboot.sh @@ -2,7 +2,10 @@ echo "Confluent first boot is running" HOME=$(getent passwd $(whoami)|cut -d: -f 6) export HOME -seems a potentially relevant thing to put i... by Jarrod Johnson +( +exec >> /target/var/log/confluent/confluent-firstboot.log +exec 2>> /target/var/log/confluent/confluent-firstboot.log +chmod 600 /target/var/log/confluent/confluent-firstboot.log cp -a /etc/confluent/ssh/* /etc/ssh/ systemctl restart sshd rootpw=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg |awk '{print $2}') @@ -18,7 +21,10 @@ done hostnamectl set-hostname $(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') touch /etc/cloud/cloud-init.disabled source /etc/confluent/functions - +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +export confluent_mgr confluent_profile run_remote_parts firstboot.d run_remote_config firstboot.d curl --capath /etc/confluent/tls -f -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" -X POST -d "status: complete" https://$confluent_mgr/confluent-api/self/updatestatus +) & +tail --pid $! -n 0 -F /target/var/log/confluent/confluent-post.log > /dev/console diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh index 16a624c3..d9730889 100755 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh @@ -8,7 +8,6 @@ chmod go-rwx /etc/confluent/* for i in /custom-installation/ssh/*.ca; do echo '@cert-authority *' $(cat $i) >> /target/etc/ssh/ssh_known_hosts done - cp -a /etc/ssh/ssh_host* /target/etc/confluent/ssh/ cp -a /etc/ssh/sshd_config.d/confluent.conf /target/etc/confluent/ssh/sshd_config.d/ sshconf=/target/etc/ssh/ssh_config @@ -19,10 +18,15 @@ echo 'Host *' >> $sshconf echo ' HostbasedAuthentication yes' >> $sshconf echo ' EnableSSHKeysign yes' >> $sshconf echo ' HostbasedKeyTypes *ed25519*' >> $sshconf - +cp /etc/confluent/functions /target/etc/confluent/functions +source /etc/confluent/functions +mkdir -p /target/var/log/confluent +cp /var/log/confluent/* /target/var/log/confluent/ +( +exec >> /target/var/log/confluent/confluent-post.log +exec 2>> /target/var/log/confluent/confluent-post.log +chmod 600 /target/var/log/confluent/confluent-post.log curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /target/etc/confluent/firstboot.sh -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /target/etc/confluent/functions -source /target/etc/confluent/functions chmod +x /target/etc/confluent/firstboot.sh cp /tmp/allnodes /target/root/.shosts cp /tmp/allnodes /target/etc/ssh/shosts.equiv @@ -84,6 +88,8 @@ chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d source /target/etc/confluent/functions run_remote_config post +python3 /opt/confluent/bin/apiclient /confluent-api/self/updatestatus -d 'status: staged' umount /target/sys /target/dev /target/proc - +) & +tail --pid $! -n 0 -F /target/var/log/confluent/confluent-post.log > /dev/console From 7a6b03097b53e60aeb0d9845075d283a61904219 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Mar 2024 12:24:25 -0400 Subject: [PATCH 079/122] Fixup Ubuntu 22 ARM support --- .../ubuntu22.04/profiles/default/initprofile.sh | 9 ++++++++- confluent_server/confluent/osimage.py | 4 +--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh index 20e12471..28d7e74c 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh @@ -3,5 +3,12 @@ sed -i 's/label: ubuntu/label: Ubuntu/' $2/profile.yaml && \ ln -s $1/casper/vmlinuz $2/boot/kernel && \ ln -s $1/casper/initrd $2/boot/initramfs/distribution && \ mkdir -p $2/boot/efi/boot && \ -ln -s $1/EFI/boot/* $2/boot/efi/boot +if [ -d $1/EFI/boot/ ]; then + ln -s $1/EFI/boot/* $2/boot/efi/boot +elif [ -d $1/efi/boot/ ]; then + ln -s $1/efi/boot/* $2/boot/efi/boot +else + echo "Unrecogrized boot contents in media" > &2 + exit 1 +fi diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index 2289a048..e0c1a8cb 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -411,9 +411,7 @@ def check_ubuntu(isoinfo): ] return {'name': 'ubuntu-{0}-{1}'.format(ver, arch), 'method': EXTRACT|COPY, - 'extractlist': ['casper/vmlinuz', 'casper/initrd', - 'efi/boot/bootx64.efi', 'efi/boot/grubx64.efi' - ], + 'extractlist': exlist, 'copyto': 'install.iso', 'category': 'ubuntu{0}'.format(major)} From 5f801e6683481f32c82c483f60bf8d3bf2097088 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Mar 2024 12:45:25 -0400 Subject: [PATCH 080/122] Correct syntax error in ubuntu arm profile init --- confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh index 28d7e74c..cebcd41d 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/initprofile.sh @@ -8,7 +8,7 @@ if [ -d $1/EFI/boot/ ]; then elif [ -d $1/efi/boot/ ]; then ln -s $1/efi/boot/* $2/boot/efi/boot else - echo "Unrecogrized boot contents in media" > &2 + echo "Unrecogrized boot contents in media" >&2 exit 1 fi From 559e88b14459e8655180c11628acc59b0a472b85 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 19 Mar 2024 09:41:20 -0400 Subject: [PATCH 081/122] Correct vgname for hyphenated node names --- .../el9-diskless/profiles/default/scripts/image2disk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 79d0008e..425e5177 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -326,8 +326,9 @@ def install_to_disk(imgpath): biggestsize = fs['initsize'] if fs['device'].startswith('/dev/mapper'): oldvgname = fs['device'].rsplit('/', 1)[-1] + # if node has - then /dev/mapper will double up the hypen if '_' in oldvgname and '-' in oldvgname.split('_')[-1]: - oldvgname = oldvgname.rsplit('-', 1)[0] + oldvgname = oldvgname.rsplit('-', 1)[0].replace('--', '-') osname = oldvgname.split('_')[0] nodename = socket.gethostname().split('.')[0] vgname = '{}_{}'.format(osname, nodename) From 13fc5d9f37deabb6b38ea1595a3944ff58e90b32 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Mar 2024 09:49:25 -0400 Subject: [PATCH 082/122] Capture better error data on failed syncfiles syncfiles can often hang up in unexpected ways, provide a catch-all. --- confluent_server/confluent/syncfiles.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 70e5bdaf..94b74eea 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -24,6 +24,7 @@ import confluent.noderange as noderange import eventlet import pwd import grp +import sys def mkdirp(path): try: @@ -193,9 +194,8 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): targip = node if peerip: targip = peerip - #BOOO, need stderr!!! - output = util.run( - ['rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)])[0] + output, stderr = util.run( + ['rsync', '-rvLD', targdir + '/', 'root@[{}]:/'.format(targip)]) except Exception as e: if 'CalledProcessError' not in repr(e): # https://github.com/eventlet/eventlet/issues/413 @@ -215,6 +215,9 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): raise Exception("Syncing failed due to unreadable files: " + ','.join(unreadablefiles)) elif hasattr(e, 'stderr') and e.stderr and b'Permission denied, please try again.' in e.stderr: raise Exception('Syncing failed due to authentication error, is the confluent automation key not set up (osdeploy initialize -a) or is there some process replacing authorized_keys on the host?') + elif hasattr(e, 'stderr') and e.stderr: + sys.stderr.write(e.stderr.decode('utf8')) + raise else: raise finally: From 5a7d98c6b81618db5f4e76a813e565550923bd62 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 21 Mar 2024 16:09:37 -0400 Subject: [PATCH 083/122] Enhance error reporting For one, when using confluent expressions, induce {} to be an error to trigger an error for someone trying to xargs something. Another is to add warnings when clear does something deliberately, but is something that might surprise a user, steering them toward what they possibly might want to do instead. --- confluent_client/bin/nodeattrib | 3 ++- confluent_client/confluent/client.py | 3 +++ .../confluent/config/configmanager.py | 20 ++++++++++++++++--- .../plugins/configuration/attributes.py | 18 ++++++++++------- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/confluent_client/bin/nodeattrib b/confluent_client/bin/nodeattrib index f4b0331f..265fe917 100755 --- a/confluent_client/bin/nodeattrib +++ b/confluent_client/bin/nodeattrib @@ -126,13 +126,14 @@ elif options.set: argset = argset.strip() if argset: arglist += shlex.split(argset) - argset = argfile.readline() + argset = argfile.readline() session.stop_if_noderange_over(noderange, options.maxnodes) exitcode=client.updateattrib(session,arglist,nodetype, noderange, options, None) if exitcode != 0: sys.exit(exitcode) # Lists all attributes + if len(args) > 0: # setting output to all so it can search since if we do have something to search, we want to show all outputs even if it is blank. if requestargs is None: diff --git a/confluent_client/confluent/client.py b/confluent_client/confluent/client.py index ad29ff02..a7c13cd3 100644 --- a/confluent_client/confluent/client.py +++ b/confluent_client/confluent/client.py @@ -668,6 +668,9 @@ def updateattrib(session, updateargs, nodetype, noderange, options, dictassign=N for attrib in updateargs[1:]: keydata[attrib] = None for res in session.update(targpath, keydata): + for node in res.get('databynode', {}): + for warnmsg in res['databynode'][node].get('_warnings', []): + sys.stderr.write('Warning: ' + warnmsg + '\n') if 'error' in res: if 'errorcode' in res: exitcode = res['errorcode'] diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 9419e7fe..dce692fd 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1089,6 +1089,11 @@ class _ExpressionFormat(string.Formatter): self._nodename = nodename self._numbers = None + def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, + auto_arg_index=False): + super()._vformat(format_string, args, kwargs, used_args, + recursion_depth, auto_arg_index) + def get_field(self, field_name, args, kwargs): return field_name, field_name @@ -2197,16 +2202,16 @@ class ConfigManager(object): self._notif_attribwatchers(changeset) self._bg_sync_to_file() - def clear_node_attributes(self, nodes, attributes): + def clear_node_attributes(self, nodes, attributes, warnings=None): if cfgleader: return exec_on_leader('_rpc_master_clear_node_attributes', self.tenant, nodes, attributes) if cfgstreams: exec_on_followers('_rpc_clear_node_attributes', self.tenant, nodes, attributes) - self._true_clear_node_attributes(nodes, attributes) + self._true_clear_node_attributes(nodes, attributes, warnings) - def _true_clear_node_attributes(self, nodes, attributes): + def _true_clear_node_attributes(self, nodes, attributes, warnings): # accumulate all changes into a changeset and push in one go changeset = {} realattributes = [] @@ -2229,8 +2234,17 @@ class ConfigManager(object): # delete it and check for inheritence to backfil data del nodek[attrib] self._do_inheritance(nodek, attrib, node, changeset) + if not warnings is None: + if attrib in nodek: + warnings.append('The attribute "{}" was defined specifically for the node and clearing now has a value inherited from the group "{}"'.format(attrib, nodek[attrib]['inheritedfrom'])) _addchange(changeset, node, attrib) _mark_dirtykey('nodes', node, self.tenant) + elif attrib in nodek: + if not warnings is None: + warnings.append('The attribute "{0}" is inherited from group "{1}", leaving the inherited value alone (use "{0}=" with no value to explicitly blank the value if desired)'.format(attrib, nodek[attrib]['inheritedfrom'])) + else: + if not warnings is None: + warnings.append('Attribute "{}" is either already cleared, or does not match a defined attribute (if referencing an attribute group, try a wildcard)'.format(attrib)) if ('_expressionkeys' in nodek and attrib in nodek['_expressionkeys']): recalcexpressions = True diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index c2ea83d9..a56a1aee 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -21,16 +21,16 @@ import confluent.util as util from fnmatch import fnmatch -def retrieve(nodes, element, configmanager, inputdata): +def retrieve(nodes, element, configmanager, inputdata, clearwarnbynode=None): configmanager.check_quorum() if nodes is not None: - return retrieve_nodes(nodes, element, configmanager, inputdata) + return retrieve_nodes(nodes, element, configmanager, inputdata, clearwarnbynode) elif element[0] == 'nodegroups': return retrieve_nodegroup( - element[1], element[3], configmanager, inputdata) + element[1], element[3], configmanager, inputdata, clearwarnbynode) -def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): +def retrieve_nodegroup(nodegroup, element, configmanager, inputdata, clearwarnbynode): try: grpcfg = configmanager.get_nodegroup_attributes(nodegroup) except KeyError: @@ -106,10 +106,12 @@ def retrieve_nodegroup(nodegroup, element, configmanager, inputdata): raise Exception("BUGGY ATTRIBUTE FOR NODEGROUP") -def retrieve_nodes(nodes, element, configmanager, inputdata): +def retrieve_nodes(nodes, element, configmanager, inputdata, clearwarnbynode): attributes = configmanager.get_node_attributes(nodes) if element[-1] == 'all': for node in util.natural_sort(nodes): + if clearwarnbynode and node in clearwarnbynode: + yield msg.Attributes(node, {'_warnings': clearwarnbynode[node]}) theattrs = set(allattributes.node).union(set(attributes[node])) for attribute in sorted(theattrs): if attribute in attributes[node]: # have a setting for it @@ -266,6 +268,7 @@ def update_nodes(nodes, element, configmanager, inputdata): namemap[node] = rename['rename'] configmanager.rename_nodes(namemap) return yield_rename_resources(namemap, isnode=True) + clearwarnbynode = {} for node in nodes: updatenode = inputdata.get_attributes(node, allattributes.node) clearattribs = [] @@ -299,10 +302,11 @@ def update_nodes(nodes, element, configmanager, inputdata): markup = (e.text[:e.offset-1] + '-->' + e.text[e.offset-1] + '<--' + e.text[e.offset:]).strip() raise exc.InvalidArgumentException('Syntax error in attribute name: "{0}"'.format(markup)) if len(clearattribs) > 0: - configmanager.clear_node_attributes([node], clearattribs) + clearwarnbynode[node] = [] + configmanager.clear_node_attributes([node], clearattribs, warnings=clearwarnbynode[node]) updatedict[node] = updatenode try: configmanager.set_node_attributes(updatedict) except ValueError as e: raise exc.InvalidArgumentException(str(e)) - return retrieve(nodes, element, configmanager, inputdata) + return retrieve(nodes, element, configmanager, inputdata, clearwarnbynode) From 6ad0e773de4bafa489da4587e6829dc3cf67b413 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 21 Mar 2024 16:28:49 -0400 Subject: [PATCH 084/122] Actually have the vformat override return Performing the super() is hardly helpful if it doesn't actually copy the return behavior. --- confluent_server/confluent/config/configmanager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index dce692fd..9e7818b5 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -1091,8 +1091,8 @@ class _ExpressionFormat(string.Formatter): def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, auto_arg_index=False): - super()._vformat(format_string, args, kwargs, used_args, - recursion_depth, auto_arg_index) + return super()._vformat(format_string, args, kwargs, used_args, + recursion_depth, auto_arg_index) def get_field(self, field_name, args, kwargs): return field_name, field_name From 838c0920eb8e159a293e4ba3983d5a85c9fa042e Mon Sep 17 00:00:00 2001 From: tkucherera Date: Fri, 22 Mar 2024 11:37:12 -0400 Subject: [PATCH 085/122] l2traceroute --- confluent_client/doc/man/l2traceroute.ronn | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 confluent_client/doc/man/l2traceroute.ronn diff --git a/confluent_client/doc/man/l2traceroute.ronn b/confluent_client/doc/man/l2traceroute.ronn new file mode 100644 index 00000000..7e660094 --- /dev/null +++ b/confluent_client/doc/man/l2traceroute.ronn @@ -0,0 +1,34 @@ +l2traceroute(8) -- returns the layer 2 route through an Ethernet network managed by confluent given 2 end points. +============================== +## SYNOPSIS +`l2traceroute [options] ` + +## DESCRIPTION +**l2traceroute** is a command that returns the layer 2 route for the configered interfaces in nodeattrib. +It can also be used with the -i and -e options to check against specific interfaces on the endpoints. + +Note the net..switch attributes have to be set on the end points + + +## OPTIONS +* ` -e` EFACE, --eface=INTERFACE + interface to check against for the second end point +* ` -i` INTERFACE, --interface=INTERFACE + interface to check against for the first end point +* `-h`, `--help`: + Show help message and exit + + +## EXAMPLES + * Checking route between two nodes: + `# l2traceroute_client n244 n1851` + `n244 to n1851: ['switch114']` + +* Checking route from one node to multiple nodes: + `# l2traceroute_client n244 n1833,n1851` + `n244 to n1833: ['switch114', 'switch7', 'switch32', 'switch253', 'switch85', 'switch72', 'switch21', 'switch2', 'switch96', 'switch103', 'switch115'] + n244 to n1851: ['switch114']` + + + + From 466ed7496123c4808c767a272af37fd5a8814ac6 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Fri, 22 Mar 2024 11:37:51 -0400 Subject: [PATCH 086/122] l2traceroute --- confluent_client/bin/l2traceroute | 154 ++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100755 confluent_client/bin/l2traceroute diff --git a/confluent_client/bin/l2traceroute b/confluent_client/bin/l2traceroute new file mode 100755 index 00000000..7b9ad4ac --- /dev/null +++ b/confluent_client/bin/l2traceroute @@ -0,0 +1,154 @@ +#!/usr/libexec/platform-python +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2017 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'tkucherera' + +import optparse +import os +import signal +import sys +import subprocess + +try: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) +except AttributeError: + pass +path = os.path.dirname(os.path.realpath(__file__)) +path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) +if path.startswith('/opt'): + sys.path.append(path) + +import confluent.client as client + +argparser = optparse.OptionParser( + usage="Usage: %prog -i -e ", +) +argparser.add_option('-i', '--interface', type='str', + help='interface to check path against for the start node') +argparser.add_option('-e', '--eface', type='str', + help='interface to check path against for the end node') + +(options, args) = argparser.parse_args() + + +session = client.Command() + +def get_neighbors(switch): + switch_neigbors = [] + url = 'networking/neighbors/by-switch/{0}/by-peername/'.format(switch) + for neighbor in session.read(url): + if neighbor['item']['href'].startswith('switch'): + switch = neighbor['item']['href'].strip('/') + switch_neigbors.append(switch) + return switch_neigbors + + + +def find_path(start, end, path=[]): + path = path + [start] + if start == end: + return path # If start and end are the same, return the path + + for node in get_neighbors(start): + if node not in path and node.startswith('switch'): + new_path = find_path(node, end, path) + if new_path: + return new_path # If a path is found, return it + + return None # If no path is found, return None + + + +def is_cumulus(switch): + try: + read_attrib = subprocess.check_output(['nodeattrib', switch, 'hardwaremanagement.method']) + except subprocess.CalledProcessError: + return False + for attribs in read_attrib.decode('utf-8').split('\n'): + if len(attribs.split(':')) > 1: + attrib = attribs.split(':') + if attrib[2].strip() == 'affluent': + return True + else: + return False + else: + return False + + +def host_to_switch(node, interface=None): + # first check the the node config to see what switches are connected + # if host is in rhel can use nmstate package + cummulus_switches = [] + netarg = 'net.*.switch' + if interface: + netarg = 'net.{0}.switch'.format(interface) + read_attrib = subprocess.check_output(['nodeattrib', node, netarg]) + for attribs in read_attrib.decode('utf-8').split('\n'): + attrib = attribs.split(':') + try: + if ' net.mgt.switch' in attrib or attrib[2] == '': + continue + except IndexError: + continue + switch = attrib[2].strip() + if is_cumulus(switch): + cummulus_switches.append(switch) + return cummulus_switches + +try: + start_node = args[0] + end_node = args[1] + interface = options.interface + eface = options.eface +except IndexError: + argparser.print_help() + sys.exit(1) + +def path_between_nodes(start_switches, end_switches): + for start_switch in start_switches: + for end_switch in end_switches: + if start_switch == end_switch: + return [start_switch] + else: + path = find_path(start_switch, end_switch) + if path: + return path + else: + return 'No path found' + +end_nodeslist = [] +nodelist = '/noderange/{0}/nodes/'.format(end_node) +for res in session.read(nodelist): + if 'error' in res: + sys.stderr.write(res['error'] + '\n') + exitcode = 1 + else: + elem=(res['item']['href'].replace('/', '')) + end_nodeslist.append(elem) + +start_switches = host_to_switch(start_node, interface) +for end_node in end_nodeslist: + if end_node: + end_switches = host_to_switch(end_node, eface) + path = path_between_nodes(start_switches, end_switches) + print(f'{start_node} to {end_node}: {path}') + + + + + + From c60bf68cbc34c6f389b53ef0a8c69086f7b555dd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 22 Mar 2024 12:56:09 -0400 Subject: [PATCH 087/122] Logout prior to renaming user Some firmware cannot tolerate a web session being active during a rename. Make sure logout has been done, and give a retry if needed to let the session close out after logging out. --- .../confluent/discovery/handlers/xcc.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 23f31fb0..ff7ca042 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -247,6 +247,10 @@ class NodeHandler(immhandler.NodeHandler): if rsp.status == 200: pwdchanged = True password = newpassword + wc.set_header('Authorization', 'Bearer ' + rspdata['access_token']) + if '_csrf_token' in wc.cookies: + wc.set_header('X-XSRF-TOKEN', wc.cookies['_csrf_token']) + wc.grab_json_response_with_status('/api/providers/logout') else: if rspdata.get('locktime', 0) > 0: raise LockedUserException( @@ -280,6 +284,7 @@ class NodeHandler(immhandler.NodeHandler): rsp.read() if rsp.status != 200: return (None, None) + wc.grab_json_response_with_status('/api/providers/logout') self._currcreds = (username, newpassword) wc.set_basic_credentials(username, newpassword) pwdchanged = True @@ -434,6 +439,7 @@ class NodeHandler(immhandler.NodeHandler): '/api/function', {'USER_UserModify': '{0},{1},,1,4,0,0,0,0,,8,,,'.format(uid, username)}) if status == 200 and rsp.get('return', 0) == 13: + wc.grab_json_response('/api/providers/logout') wc.set_basic_credentials(self._currcreds[0], self._currcreds[1]) status = 503 while status != 200: @@ -442,10 +448,13 @@ class NodeHandler(immhandler.NodeHandler): {'UserName': username}, method='PATCH') if status != 200: rsp = json.loads(rsp) - if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError'): - eventlet.sleep(10) + if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError'): + eventlet.sleep(4) else: break + self.tmppasswd = None + self._currcreds = (username, passwd) + return self.tmppasswd = None wc.grab_json_response('/api/providers/logout') self._currcreds = (username, passwd) @@ -632,3 +641,4 @@ def remote_nodecfg(nodename, cfm): info = {'addresses': [ipaddr]} nh = NodeHandler(info, cfm) nh.config(nodename) + From 296a0e88b4962f5e93e43f954414fb3c5532b6d8 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Sun, 24 Mar 2024 11:41:23 -0400 Subject: [PATCH 088/122] making the use cases more generic --- confluent_client/bin/l2traceroute | 65 ++++++++++++++-------- confluent_client/doc/man/l2traceroute.ronn | 6 +- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/confluent_client/bin/l2traceroute b/confluent_client/bin/l2traceroute index 7b9ad4ac..e8f9705e 100755 --- a/confluent_client/bin/l2traceroute +++ b/confluent_client/bin/l2traceroute @@ -41,38 +41,44 @@ argparser.add_option('-i', '--interface', type='str', help='interface to check path against for the start node') argparser.add_option('-e', '--eface', type='str', help='interface to check path against for the end node') +argparser.add_option('-c', '--cumulus', action="store_true", dest="cumulus", + help='return layer 2 route through cumulus switches only') (options, args) = argparser.parse_args() +try: + start_node = args[0] + end_node = args[1] + interface = options.interface + eface = options.eface +except IndexError: + argparser.print_help() + sys.exit(1) session = client.Command() def get_neighbors(switch): switch_neigbors = [] - url = 'networking/neighbors/by-switch/{0}/by-peername/'.format(switch) + url = '/networking/neighbors/by-switch/{0}/by-peername/'.format(switch) for neighbor in session.read(url): - if neighbor['item']['href'].startswith('switch'): - switch = neighbor['item']['href'].strip('/') + switch = neighbor['item']['href'].strip('/') + if switch in all_switches: switch_neigbors.append(switch) return switch_neigbors - - def find_path(start, end, path=[]): path = path + [start] if start == end: return path # If start and end are the same, return the path for node in get_neighbors(start): - if node not in path and node.startswith('switch'): + if node not in path: new_path = find_path(node, end, path) if new_path: return new_path # If a path is found, return it return None # If no path is found, return None - - def is_cumulus(switch): try: read_attrib = subprocess.check_output(['nodeattrib', switch, 'hardwaremanagement.method']) @@ -92,11 +98,16 @@ def is_cumulus(switch): def host_to_switch(node, interface=None): # first check the the node config to see what switches are connected # if host is in rhel can use nmstate package - cummulus_switches = [] + if node in all_switches: + return [node] + switches = [] netarg = 'net.*.switch' if interface: netarg = 'net.{0}.switch'.format(interface) - read_attrib = subprocess.check_output(['nodeattrib', node, netarg]) + try: + read_attrib = subprocess.check_output(['nodeattrib', node, netarg]) + except subprocess.CalledProcessError: + return False for attribs in read_attrib.decode('utf-8').split('\n'): attrib = attribs.split(':') try: @@ -105,18 +116,11 @@ def host_to_switch(node, interface=None): except IndexError: continue switch = attrib[2].strip() - if is_cumulus(switch): - cummulus_switches.append(switch) - return cummulus_switches - -try: - start_node = args[0] - end_node = args[1] - interface = options.interface - eface = options.eface -except IndexError: - argparser.print_help() - sys.exit(1) + if is_cumulus(switch) and options.cumulus: + switches.append(switch) + else: + switches.append(switch) + return switches def path_between_nodes(start_switches, end_switches): for start_switch in start_switches: @@ -129,7 +133,17 @@ def path_between_nodes(start_switches, end_switches): return path else: return 'No path found' - + + +all_switches = [] +for res in session.read('/networking/neighbors/by-switch/'): + if 'error' in res: + sys.stderr.write(res['error'] + '\n') + exitcode = 1 + else: + switch = (res['item']['href'].replace('/', '')) + all_switches.append(switch) + end_nodeslist = [] nodelist = '/noderange/{0}/nodes/'.format(end_node) for res in session.read(nodelist): @@ -144,9 +158,14 @@ start_switches = host_to_switch(start_node, interface) for end_node in end_nodeslist: if end_node: end_switches = host_to_switch(end_node, eface) + if not end_switches: + print('Error: net.{0}.switch attribute is not valid') + continue path = path_between_nodes(start_switches, end_switches) print(f'{start_node} to {end_node}: {path}') +# TODO dont put switches that are connected through management interfaces. + diff --git a/confluent_client/doc/man/l2traceroute.ronn b/confluent_client/doc/man/l2traceroute.ronn index 7e660094..16318567 100644 --- a/confluent_client/doc/man/l2traceroute.ronn +++ b/confluent_client/doc/man/l2traceroute.ronn @@ -7,7 +7,9 @@ l2traceroute(8) -- returns the layer 2 route through an Ethernet network managed **l2traceroute** is a command that returns the layer 2 route for the configered interfaces in nodeattrib. It can also be used with the -i and -e options to check against specific interfaces on the endpoints. -Note the net..switch attributes have to be set on the end points + +## PREREQUISITES +**l2traceroute** the net..switch attributes have to be set on the end points if endpoint is not a switch ## OPTIONS @@ -15,6 +17,8 @@ Note the net..switch attributes have to be set on the end points interface to check against for the second end point * ` -i` INTERFACE, --interface=INTERFACE interface to check against for the first end point +* ` -c` CUMULUS, --cumulus=CUMULUS + return layer 2 route through cumulus switches only * `-h`, `--help`: Show help message and exit From f7a2e51f9c284ab9c42d7c28b62b76df4ed0e711 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 26 Mar 2024 10:31:29 -0400 Subject: [PATCH 089/122] fstab fixup for hyphenated lvm vg names --- .../el9-diskless/profiles/default/scripts/image2disk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 425e5177..6a924964 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -103,7 +103,7 @@ def fixup(rootdir, vols): uuid = uuid.strip() entry[0] = 'UUID={}'.format(uuid) elif entry[2] == 'swap': - entry[0] = '/dev/mapper/{}-swap'.format(vgname) + entry[0] = '/dev/mapper/{}-swap'.format(vgname.replace('-', '--')) entry[0] = entry[0].ljust(42) entry[1] = entry[1].ljust(16) entry[3] = entry[3].ljust(28) From f1d3e47439f901075f2d02728f478705d88feac7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 22 Mar 2024 12:56:09 -0400 Subject: [PATCH 090/122] Logout prior to renaming user Some firmware cannot tolerate a web session being active during a rename. Make sure logout has been done, and give a retry if needed to let the session close out after logging out. --- .../confluent/discovery/handlers/xcc.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/handlers/xcc.py b/confluent_server/confluent/discovery/handlers/xcc.py index 23f31fb0..ff7ca042 100644 --- a/confluent_server/confluent/discovery/handlers/xcc.py +++ b/confluent_server/confluent/discovery/handlers/xcc.py @@ -247,6 +247,10 @@ class NodeHandler(immhandler.NodeHandler): if rsp.status == 200: pwdchanged = True password = newpassword + wc.set_header('Authorization', 'Bearer ' + rspdata['access_token']) + if '_csrf_token' in wc.cookies: + wc.set_header('X-XSRF-TOKEN', wc.cookies['_csrf_token']) + wc.grab_json_response_with_status('/api/providers/logout') else: if rspdata.get('locktime', 0) > 0: raise LockedUserException( @@ -280,6 +284,7 @@ class NodeHandler(immhandler.NodeHandler): rsp.read() if rsp.status != 200: return (None, None) + wc.grab_json_response_with_status('/api/providers/logout') self._currcreds = (username, newpassword) wc.set_basic_credentials(username, newpassword) pwdchanged = True @@ -434,6 +439,7 @@ class NodeHandler(immhandler.NodeHandler): '/api/function', {'USER_UserModify': '{0},{1},,1,4,0,0,0,0,,8,,,'.format(uid, username)}) if status == 200 and rsp.get('return', 0) == 13: + wc.grab_json_response('/api/providers/logout') wc.set_basic_credentials(self._currcreds[0], self._currcreds[1]) status = 503 while status != 200: @@ -442,10 +448,13 @@ class NodeHandler(immhandler.NodeHandler): {'UserName': username}, method='PATCH') if status != 200: rsp = json.loads(rsp) - if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError'): - eventlet.sleep(10) + if rsp.get('error', {}).get('code', 'Unknown') in ('Base.1.8.GeneralError', 'Base.1.12.GeneralError', 'Base.1.14.GeneralError'): + eventlet.sleep(4) else: break + self.tmppasswd = None + self._currcreds = (username, passwd) + return self.tmppasswd = None wc.grab_json_response('/api/providers/logout') self._currcreds = (username, passwd) @@ -632,3 +641,4 @@ def remote_nodecfg(nodename, cfm): info = {'addresses': [ipaddr]} nh = NodeHandler(info, cfm) nh.config(nodename) + From e38cd5d3e56ef0621f5449501719c810975ff096 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Mar 2024 15:50:58 -0400 Subject: [PATCH 091/122] Fallback to unverified noderange on candidate manager check in PXE When doing pxe and the noderange of the candidate managers fails, try again without validation in case the user omitted collective members from nodelist, but still used ',' to enumerate them. --- confluent_server/confluent/discovery/protocols/pxe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/discovery/protocols/pxe.py b/confluent_server/confluent/discovery/protocols/pxe.py index a9a07963..ff473ebc 100644 --- a/confluent_server/confluent/discovery/protocols/pxe.py +++ b/confluent_server/confluent/discovery/protocols/pxe.py @@ -587,7 +587,10 @@ def get_deployment_profile(node, cfg, cfd=None): return None candmgrs = cfd.get(node, {}).get('collective.managercandidates', {}).get('value', None) if candmgrs: - candmgrs = noderange.NodeRange(candmgrs, cfg).nodes + try: + candmgrs = noderange.NodeRange(candmgrs, cfg).nodes + except Exception: # fallback to unverified noderange + candmgrs = noderange.NodeRange(candmgrs).nodes if collective.get_myname() not in candmgrs: return None return profile From 19e9c6910d609f856cbc0422271e108cea5839e0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Mar 2024 15:32:44 -0400 Subject: [PATCH 092/122] Fix nodeapply redoing a single node multiple times --- confluent_client/bin/nodeapply | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_client/bin/nodeapply b/confluent_client/bin/nodeapply index e39447bc..2e798742 100755 --- a/confluent_client/bin/nodeapply +++ b/confluent_client/bin/nodeapply @@ -102,9 +102,9 @@ def run(): cmdv = ['ssh', sshnode] + cmdvbase + cmdstorun[0] if currprocs < concurrentprocs: currprocs += 1 - run_cmdv(node, cmdv, all, pipedesc) + run_cmdv(sshnode, cmdv, all, pipedesc) else: - pendingexecs.append((node, cmdv)) + pendingexecs.append((sshnode, cmdv)) if not all or exitcode: sys.exit(exitcode) rdy, _, _ = select.select(all, [], [], 10) From ac1f7c57b606674a1e05788d382e470349ef53b7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Mar 2024 09:36:40 -0400 Subject: [PATCH 093/122] Fix lldp when peername is null Some neighbors result in a null name, handle that. --- confluent_server/confluent/networking/lldp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py index e1fd8d4e..e181d46f 100644 --- a/confluent_server/confluent/networking/lldp.py +++ b/confluent_server/confluent/networking/lldp.py @@ -381,9 +381,10 @@ def list_info(parms, requestedparameter): break else: candidate = info[requestedparameter] - candidate = candidate.strip() - if candidate != '': - results.add(_api_sanitize_string(candidate)) + if candidate: + candidate = candidate.strip() + if candidate != '': + results.add(_api_sanitize_string(candidate)) return [msg.ChildCollection(x + suffix) for x in util.natural_sort(results)] def _handle_neighbor_query(pathcomponents, configmanager): From c1afc144cb32d900fe4b9962e7581bd3612174de Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Feb 2024 15:05:56 -0500 Subject: [PATCH 094/122] Change to unix domain for vtbuffer communication The semaphore arbitrated single channel sharing was proving to be too slow. Make the communication lockless by having dedicated sockets per request. --- confluent_server/confluent/consoleserver.py | 56 +++--- confluent_vtbufferd/vtbufferd.c | 179 +++++++++++++++----- 2 files changed, 161 insertions(+), 74 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 37274792..783d77de 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -62,39 +62,38 @@ def chunk_output(output, n): yield output[i:i + n] def get_buffer_output(nodename): - out = _bufferdaemon.stdin - instream = _bufferdaemon.stdout + out = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + out.setsockopt(socket.SOL_SOCKET, socket.SO_PASSCRED, 1) + out.connect("\x00confluent-vtbuffer") if not isinstance(nodename, bytes): nodename = nodename.encode('utf8') outdata = bytearray() - with _bufferlock: - out.write(struct.pack('I', len(nodename))) - out.write(nodename) - out.flush() - select.select((instream,), (), (), 30) - while not outdata or outdata[-1]: - try: - chunk = os.read(instream.fileno(), 128) - except IOError: - chunk = None - if chunk: - outdata.extend(chunk) - else: - select.select((instream,), (), (), 0) - return bytes(outdata[:-1]) + out.send(struct.pack('I', len(nodename))) + out.send(nodename) + select.select((out,), (), (), 30) + while not outdata or outdata[-1]: + try: + chunk = os.read(out.fileno(), 128) + except IOError: + chunk = None + if chunk: + outdata.extend(chunk) + else: + select.select((out,), (), (), 0) + return bytes(outdata[:-1]) def send_output(nodename, output): if not isinstance(nodename, bytes): nodename = nodename.encode('utf8') - with _bufferlock: - _bufferdaemon.stdin.write(struct.pack('I', len(nodename) | (1 << 29))) - _bufferdaemon.stdin.write(nodename) - _bufferdaemon.stdin.flush() - for chunk in chunk_output(output, 8192): - _bufferdaemon.stdin.write(struct.pack('I', len(chunk) | (2 << 29))) - _bufferdaemon.stdin.write(chunk) - _bufferdaemon.stdin.flush() + out = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + out.setsockopt(socket.SOL_SOCKET, socket.SO_PASSCRED, 1) + out.connect("\x00confluent-vtbuffer") + out.send(struct.pack('I', len(nodename) | (1 << 29))) + out.send(nodename) + for chunk in chunk_output(output, 8192): + out.send(struct.pack('I', len(chunk) | (2 << 29))) + out.send(chunk) def _utf8_normalize(data, decoder): # first we give the stateful decoder a crack at the byte stream, @@ -604,11 +603,8 @@ def initialize(): _bufferlock = semaphore.Semaphore() _tracelog = log.Logger('trace') _bufferdaemon = subprocess.Popen( - ['/opt/confluent/bin/vtbufferd'], bufsize=0, stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - fl = fcntl.fcntl(_bufferdaemon.stdout.fileno(), fcntl.F_GETFL) - fcntl.fcntl(_bufferdaemon.stdout.fileno(), - fcntl.F_SETFL, fl | os.O_NONBLOCK) + ['/opt/confluent/bin/vtbufferd', 'confluent-vtbuffer'], bufsize=0, stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL) def start_console_sessions(): configmodule.hook_new_configmanagers(_start_tenant_sessions) diff --git a/confluent_vtbufferd/vtbufferd.c b/confluent_vtbufferd/vtbufferd.c index e89269b4..055a5263 100644 --- a/confluent_vtbufferd/vtbufferd.c +++ b/confluent_vtbufferd/vtbufferd.c @@ -1,8 +1,14 @@ +#include +#define _GNU_SOURCE #include #include #include #include #include +#include +#include +#include +#include #include "tmt.h" #define HASHSIZE 2053 #define MAXNAMELEN 256 @@ -10,13 +16,17 @@ struct terment { struct terment *next; char *name; + int fd; TMT *vt; }; #define SETNODE 1 #define WRITE 2 #define READBUFF 0 +#define CLOSECONN 3 +#define MAXEVTS 16 static struct terment *buffers[HASHSIZE]; +static char* nodenames[HASHSIZE]; unsigned long hash(char *str) /* djb2a */ @@ -37,10 +47,13 @@ TMT *get_termentbyname(char *name) { return NULL; } -TMT *set_termentbyname(char *name) { +TMT *set_termentbyname(char *name, int fd) { struct terment *ret; int idx; + if (nodenames[fd] == NULL) { + nodenames[fd] = strdup(name); + } idx = hash(name); for (ret = buffers[idx]; ret != NULL; ret = ret->next) if (strcmp(name, ret->name) == 0) @@ -48,12 +61,13 @@ TMT *set_termentbyname(char *name) { ret = (struct terment *)malloc(sizeof(*ret)); ret->next = buffers[idx]; ret->name = strdup(name); + ret->fd = fd; ret->vt = tmt_open(31, 100, NULL, NULL, L"→←↑↓■◆▒°±▒┘┐┌└┼⎺───⎽├┤┴┬│≤≥π≠£•"); buffers[idx] = ret; return ret->vt; } -void dump_vt(TMT* outvt) { +void dump_vt(TMT* outvt, int outfd) { const TMTSCREEN *out = tmt_screen(outvt); const TMTPOINT *curs = tmt_cursor(outvt); int line, idx, maxcol, maxrow; @@ -67,9 +81,10 @@ void dump_vt(TMT* outvt) { tmt_color_t fg = TMT_COLOR_DEFAULT; tmt_color_t bg = TMT_COLOR_DEFAULT; wchar_t sgrline[30]; + char strbuffer[128]; size_t srgidx = 0; char colorcode = 0; - wprintf(L"\033c"); + write(outfd, "\033c", 2); maxcol = 0; maxrow = 0; for (line = out->nline - 1; line >= 0; --line) { @@ -148,60 +163,136 @@ void dump_vt(TMT* outvt) { } if (sgrline[0] != 0) { sgrline[wcslen(sgrline) - 1] = 0; // Trim last ; - wprintf(L"\033[%lsm", sgrline); + + snprintf(strbuffer, sizeof(strbuffer), "\033[%lsm", sgrline); + write(outfd, strbuffer, strlen(strbuffer)); + write(outfd, "\033[]", 3); } - wprintf(L"%lc", out->lines[line]->chars[idx].c); + snprintf(strbuffer, sizeof(strbuffer), "%lc", out->lines[line]->chars[idx].c); + write(outfd, strbuffer, strlen(strbuffer)); } if (line < maxrow) - wprintf(L"\r\n"); + write(outfd, "\r\n", 2); } - fflush(stdout); - wprintf(L"\x1b[%ld;%ldH", curs->r + 1, curs->c + 1); - fflush(stdout); + //fflush(stdout); + snprintf(strbuffer, sizeof(strbuffer), "\x1b[%ld;%ldH", curs->r + 1, curs->c + 1); + write(outfd, strbuffer, strlen(strbuffer)); + //fflush(stdout); +} + +int handle_traffic(int fd) { + int cmd, length; + char currnode[MAXNAMELEN]; + char cmdbuf[MAXDATALEN]; + char *nodename; + TMT *currvt = NULL; + TMT *outvt = NULL; + length = read(fd, &cmd, 4); + if (length <= 0) { + return 0; + } + length = cmd & 536870911; + cmd = cmd >> 29; + if (cmd == SETNODE) { + cmd = read(fd, currnode, length); + currnode[length] = 0; + if (cmd < 0) + return 0; + currvt = set_termentbyname(currnode, fd); + } else if (cmd == WRITE) { + if (currvt == NULL) { + nodename = nodenames[fd]; + currvt = set_termentbyname(nodename, fd); + } + cmd = read(fd, cmdbuf, length); + cmdbuf[length] = 0; + if (cmd < 0) + return 0; + tmt_write(currvt, cmdbuf, length); + } else if (cmd == READBUFF) { + cmd = read(fd, cmdbuf, length); + cmdbuf[length] = 0; + if (cmd < 0) + return 0; + outvt = get_termentbyname(cmdbuf); + if (outvt != NULL) + dump_vt(outvt, fd); + length = write(fd, "\x00", 1); + if (length < 0) + return 0; + } else if (cmd == CLOSECONN) { + return 0; + } + return 1; } int main(int argc, char* argv[]) { - int cmd, length; setlocale(LC_ALL, ""); - char cmdbuf[MAXDATALEN]; - char currnode[MAXNAMELEN]; - TMT *currvt = NULL; - TMT *outvt = NULL; + struct sockaddr_un addr; + int numevts; + int status; + int poller; + int n; + socklen_t len; + int ctlsock, currsock; + socklen_t addrlen; + struct ucred ucr; + + struct epoll_event epvt, evts[MAXEVTS]; stdin = freopen(NULL, "rb", stdin); if (stdin == NULL) { exit(1); } + memset(&addr, 0, sizeof(struct sockaddr_un)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path + 1, argv[1], sizeof(addr.sun_path) - 2); // abstract namespace socket + ctlsock = socket(AF_UNIX, SOCK_STREAM, 0); + status = bind(ctlsock, (const struct sockaddr*)&addr, sizeof(sa_family_t) + strlen(argv[1]) + 1); //sizeof(struct sockaddr_un)); + if (status < 0) { + perror("Unable to open unix socket - "); + exit(1); + } + listen(ctlsock, 128); + poller = epoll_create(1); + memset(&epvt, 0, sizeof(struct epoll_event)); + epvt.events = EPOLLIN; + epvt.data.fd = ctlsock; + if (epoll_ctl(poller, EPOLL_CTL_ADD, ctlsock, &epvt) < 0) { + perror("Unable to poll the socket"); + exit(1); + } + // create a unix domain socket for accepting, each connection is only allowed to either read or write, not both while (1) { - length = fread(&cmd, 4, 1, stdin); - if (length < 0) - continue; - length = cmd & 536870911; - cmd = cmd >> 29; - if (cmd == SETNODE) { - cmd = fread(currnode, 1, length, stdin); - currnode[length] = 0; - if (cmd < 0) - continue; - currvt = set_termentbyname(currnode); - } else if (cmd == WRITE) { - if (currvt == NULL) - currvt = set_termentbyname(""); - cmd = fread(cmdbuf, 1, length, stdin); - cmdbuf[length] = 0; - if (cmd < 0) - continue; - tmt_write(currvt, cmdbuf, length); - } else if (cmd == READBUFF) { - cmd = fread(cmdbuf, 1, length, stdin); - cmdbuf[length] = 0; - if (cmd < 0) - continue; - outvt = get_termentbyname(cmdbuf); - if (outvt != NULL) - dump_vt(outvt); - length = write(1, "\x00", 1); - if (length < 0) - continue; + numevts = epoll_wait(poller, evts, MAXEVTS, -1); + if (numevts < 0) { + perror("Failed wait"); + exit(1); + } + for (n = 0; n < numevts; ++n) { + if (evts[n].data.fd == ctlsock) { + currsock = accept(ctlsock, (struct sockaddr *) &addr, &addrlen); + len = sizeof(ucr); + getsockopt(currsock, SOL_SOCKET, SO_PEERCRED, &ucr, &len); + if (ucr.uid != getuid()) { // block access for other users + close(currsock); + continue; + } + memset(&epvt, 0, sizeof(struct epoll_event)); + epvt.events = EPOLLIN; + epvt.data.fd = currsock; + epoll_ctl(poller, EPOLL_CTL_ADD, currsock, &epvt); + } else { + if (!handle_traffic(evts[n].data.fd)) { + epoll_ctl(poller, EPOLL_CTL_DEL, evts[n].data.fd, NULL); + close(evts[n].data.fd); + if (nodenames[evts[n].data.fd] != NULL) { + free(nodenames[evts[n].data.fd]); + nodenames[evts[n].data.fd] = NULL; + } + } + } } } } + + From b3b3627bf926c3f18e0578fd3f8033463af4855a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 22 Feb 2024 15:07:12 -0500 Subject: [PATCH 095/122] Remove disused bufferlock We no longer use a lock on buffer communication, eliminate the stale variable. --- confluent_server/confluent/consoleserver.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 783d77de..7b9530f2 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -49,7 +49,6 @@ _handled_consoles = {} _tracelog = None _bufferdaemon = None -_bufferlock = None try: range = xrange @@ -599,8 +598,6 @@ def _start_tenant_sessions(cfm): def initialize(): global _tracelog global _bufferdaemon - global _bufferlock - _bufferlock = semaphore.Semaphore() _tracelog = log.Logger('trace') _bufferdaemon = subprocess.Popen( ['/opt/confluent/bin/vtbufferd', 'confluent-vtbuffer'], bufsize=0, stdin=subprocess.DEVNULL, From d183a3f99cb452c2a8e0a38dcc9a2c30bb834db4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 1 Feb 2024 08:50:44 -0500 Subject: [PATCH 096/122] Fix problem where one multicast/broadcast attempt could tank other interfaces Carrying over change from ssdp, ignore failures on transmit, particularly if firewall --- confluent_server/confluent/discovery/protocols/slp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/discovery/protocols/slp.py b/confluent_server/confluent/discovery/protocols/slp.py index e42c1577..ac332def 100644 --- a/confluent_server/confluent/discovery/protocols/slp.py +++ b/confluent_server/confluent/discovery/protocols/slp.py @@ -246,11 +246,11 @@ def _find_srvtype(net, net4, srvtype, addresses, xid): try: net4.sendto(data, ('239.255.255.253', 427)) except socket.error as se: - # On occasion, multicasting may be disabled - # tolerate this scenario and move on - if se.errno != 101: - raise - net4.sendto(data, (bcast, 427)) + pass + try: + net4.sendto(data, (bcast, 427)) + except socket.error as se: + pass def _grab_rsps(socks, rsps, interval, xidmap, deferrals): From 7b3129a1a2a736d8d3672f2f983eb2ccd0062400 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 30 Jan 2024 09:08:28 -0500 Subject: [PATCH 097/122] Fix FFDC preflight checks The code was comparing two string constants, instead of a variable to a constant. Correct the problem to enable the preflight checks to work as intended. --- confluent_server/confluent/firmwaremanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/firmwaremanager.py b/confluent_server/confluent/firmwaremanager.py index a7713943..eb5d4c86 100644 --- a/confluent_server/confluent/firmwaremanager.py +++ b/confluent_server/confluent/firmwaremanager.py @@ -53,7 +53,7 @@ def execupdate(handler, filename, updateobj, type, owner, node, datfile): return if type == 'ffdc' and os.path.isdir(filename): filename += '/' + node - if 'type' == 'ffdc': + if type == 'ffdc': errstr = False if os.path.exists(filename): errstr = '{0} already exists on {1}, cannot overwrite'.format( From 17fff4997baa5b344013b9f4003bbe166bd94db3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Jan 2024 10:52:49 -0500 Subject: [PATCH 098/122] Fix omission of info dir in plugins --- confluent_server/setup.py.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/setup.py.tmpl b/confluent_server/setup.py.tmpl index e6bd08b2..871497e3 100644 --- a/confluent_server/setup.py.tmpl +++ b/confluent_server/setup.py.tmpl @@ -19,6 +19,7 @@ setup( 'confluent/plugins/hardwaremanagement/', 'confluent/plugins/deployment/', 'confluent/plugins/console/', + 'confluent/plugins/info/', 'confluent/plugins/shell/', 'confluent/collective/', 'confluent/plugins/configuration/'], From ddb8c4cce44ac8d0d30385cea7d466ea862d3c73 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Dec 2023 15:55:17 -0500 Subject: [PATCH 099/122] Fix a few noderange abbreviations Also, add some test cases on abbreviation to help sanity check things in the future. --- confluent_server/confluent/noderange.py | 46 ++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index df4552b8..cf99dd72 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -96,6 +96,7 @@ class Bracketer(object): txtnums = getnumbers_nodename(nodename) nums = [int(x) for x in txtnums] for n in range(self.count): + # First pass to see if we have exactly one different number padto = len(txtnums[n]) needpad = (padto != len('{}'.format(nums[n]))) if self.sequences[n] is None: @@ -105,7 +106,24 @@ class Bracketer(object): elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: continue # new nodename has no new number, keep going else: # if self.sequences[n][2] != nums[n] or : - if self.diffn is not None and (n != self.diffn or + if self.diffn is not None and (n != self.diffn or + (padto < self.numlens[n][1]) or + (needpad and padto != self.numlens[n][1])): + self.flush_current() + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [padto, padto] + self.diffn = n + for n in range(self.count): + padto = len(txtnums[n]) + needpad = (padto != len('{}'.format(nums[n]))) + if self.sequences[n] is None: + # We initialize to text pieces, 'currstart', and 'prev' number + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] + elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: + continue # new nodename has no new number, keep going + else: # if self.sequences[n][2] != nums[n] or : + if self.diffn is not None and (n != self.diffn or (padto < self.numlens[n][1]) or (needpad and padto != self.numlens[n][1])): self.flush_current() @@ -449,3 +467,29 @@ class NodeRange(object): if self.cfm is None: return set([element]) raise Exception(element + ' not a recognized node, group, or alias') + +if __name__ == '__main__': + cases = [ + (['r3u4', 'r5u6'], 'r3u4,r5u6'), # should not erroneously gather + (['r3u4s1', 'r5u6s3'], 'r3u4s1,r5u6s3'), # should not erroneously gather + (['r3u4s1', 'r3u4s2', 'r5u4s3'], 'r3u4s[1:2],r5u4s3'), # should not erroneously gather + (['r3u4', 'r3u5', 'r3u6', 'r3u9', 'r4u1'], 'r3u[4:6,9],r4u1'), + (['n01', 'n2', 'n03'], 'n01,n2,n03'), + (['n7', 'n8', 'n09', 'n10', 'n11', 'n12', 'n13', 'n14', 'n15', 'n16', + 'n17', 'n18', 'n19', 'n20'], 'n[7:8],n[09:20]') + ] + for case in cases: + gc = case[0] + bracketer = Bracketer(gc[0]) + for chnk in gc[1:]: + bracketer.extend(chnk) + br = bracketer.range + resnodes = NodeRange(br).nodes + if set(resnodes) != set(gc): + print('FAILED: ' + repr(sorted(gc))) + print('RESULT: ' + repr(sorted(resnodes))) + print('EXPECTED: ' + repr(case[1])) + print('ACTUAL: ' + br) + + + From 661b2ae81542deffa9c53b7df844389515470eef Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Nov 2023 08:34:34 -0500 Subject: [PATCH 100/122] Filter out nvme 'c' devnames, that are used to refer to paths to nvme Some versions start manifesting nvme devnames with 'c', which are to be used to interact with multipath to have raw devices backing a traditional nvme device. --- .../el7-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk | 2 ++ .../el8-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk | 2 ++ .../el9-diskless/profiles/default/scripts/getinstalldisk | 2 ++ .../rhvh4/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk | 2 ++ .../suse15/profiles/server/scripts/getinstalldisk | 2 ++ .../profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu20.04/profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu22.04/profiles/default/scripts/getinstalldisk | 2 ++ 11 files changed, 22 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None From 03bdbfc8ed43f64e25a9b103fa1874e774dbe362 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 Feb 2024 15:58:08 -0500 Subject: [PATCH 101/122] Provide more useful error messages on mistakes within [] --- confluent_server/confluent/noderange.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index cf99dd72..4a5cb808 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -402,12 +402,16 @@ class NodeRange(object): def _expandstring(self, element, filternodes=None): prefix = '' if element[0][0] in ('/', '~'): + if self.purenumeric: + raise Exception('Regular expression not supported within "[]"') element = ''.join(element) nameexpression = element[1:] if self.cfm is None: raise Exception('Verification configmanager required') return set(self.cfm.filter_nodenames(nameexpression, filternodes)) elif '=' in element[0] or '!~' in element[0]: + if self.purenumeric: + raise Exception('The "=" character is invalid within "[]"') element = ''.join(element) if self.cfm is None: raise Exception('Verification configmanager required') From 12bb5d583a7d58d2326521d455426ea887e1449e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 13 Feb 2024 16:00:50 -0500 Subject: [PATCH 102/122] Correct the equality message in better messagesw --- confluent_server/confluent/noderange.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 4a5cb808..7657292c 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -411,7 +411,7 @@ class NodeRange(object): return set(self.cfm.filter_nodenames(nameexpression, filternodes)) elif '=' in element[0] or '!~' in element[0]: if self.purenumeric: - raise Exception('The "=" character is invalid within "[]"') + raise Exception('Equality/Inequality operators (=, !=, =~, !~) are invalid within "[]"') element = ''.join(element) if self.cfm is None: raise Exception('Verification configmanager required') From 34804b2d5f70e3f1835ab5368da589053f188ca7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 1 Apr 2024 12:13:21 -0400 Subject: [PATCH 103/122] Provide components for cert management with modern XCC Refresh getcsr and installcert to handle latest firmware. Also add ability to have pre-existing CSR, and trust the SAN on the way through. If this becomes more properly a feature, then would likely impose a SAN on certs, similar to the SSH principals, rather than deferring to the CSR to get it right. --- confluent_server/confluent/certutil.py | 76 +++++++++++++++++++------- misc/getcsr.py | 54 ++++++++++++++---- misc/installcert.py | 15 +++++ 3 files changed, 112 insertions(+), 33 deletions(-) diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index 2e788bad..9a478787 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -206,7 +206,7 @@ def create_simple_ca(keyout, certout): finally: os.remove(tmpconfig) -def create_certificate(keyout=None, certout=None): +def create_certificate(keyout=None, certout=None, csrout=None): if not keyout: keyout, certout = get_certificate_paths() if not keyout: @@ -214,9 +214,10 @@ def create_certificate(keyout=None, certout=None): assure_tls_ca() shortname = socket.gethostname().split('.')[0] longname = shortname # socket.getfqdn() - subprocess.check_call( - ['openssl', 'ecparam', '-name', 'secp384r1', '-genkey', '-out', - keyout]) + if not csrout: + subprocess.check_call( + ['openssl', 'ecparam', '-name', 'secp384r1', '-genkey', '-out', + keyout]) san = ['IP:{0}'.format(x) for x in get_ip_addresses()] # It is incorrect to put IP addresses as DNS type. However # there exists non-compliant clients that fail with them as IP @@ -229,21 +230,34 @@ def create_certificate(keyout=None, certout=None): os.close(tmphdl) tmphdl, extconfig = tempfile.mkstemp() os.close(tmphdl) - tmphdl, csrout = tempfile.mkstemp() - os.close(tmphdl) + needcsr = False + if csrout is None: + needcsr = True + tmphdl, csrout = tempfile.mkstemp() + os.close(tmphdl) shutil.copy2(sslcfg, tmpconfig) - serialnum = '0x' + ''.join(['{:02x}'.format(x) for x in bytearray(os.urandom(20))]) try: - with open(tmpconfig, 'a') as cfgfile: - cfgfile.write('\n[SAN]\nsubjectAltName={0}'.format(san)) - with open(extconfig, 'a') as cfgfile: - cfgfile.write('\nbasicConstraints=CA:false\nsubjectAltName={0}'.format(san)) - subprocess.check_call([ - 'openssl', 'req', '-new', '-key', keyout, '-out', csrout, '-subj', - '/CN={0}'.format(longname), - '-extensions', 'SAN', '-config', tmpconfig - ]) + if needcsr: + with open(tmpconfig, 'a') as cfgfile: + cfgfile.write('\n[SAN]\nsubjectAltName={0}'.format(san)) + with open(extconfig, 'a') as cfgfile: + cfgfile.write('\nbasicConstraints=CA:false\nsubjectAltName={0}'.format(san)) + subprocess.check_call([ + 'openssl', 'req', '-new', '-key', keyout, '-out', csrout, '-subj', + '/CN={0}'.format(longname), + '-extensions', 'SAN', '-config', tmpconfig + ]) + else: + # when used manually, allow the csr SAN to stand + # may add explicit subj/SAN argument, in which case we would skip copy + with open(tmpconfig, 'a') as cfgfile: + cfgfile.write('\ncopy_extensions=copy\n') + with open(extconfig, 'a') as cfgfile: + cfgfile.write('\nbasicConstraints=CA:false\n') if os.path.exists('/etc/confluent/tls/cakey.pem'): + # simple style CA in effect, make a random serial number and + # hope for the best, and accept inability to backdate the cert + serialnum = '0x' + ''.join(['{:02x}'.format(x) for x in bytearray(os.urandom(20))]) subprocess.check_call([ 'openssl', 'x509', '-req', '-in', csrout, '-CA', '/etc/confluent/tls/cacert.pem', @@ -252,20 +266,40 @@ def create_certificate(keyout=None, certout=None): '-extfile', extconfig ]) else: + # we moved to a 'proper' CA, mainly for access to backdating + # start of certs for finicky system clocks + # this also provides a harder guarantee of serial uniqueness, but + # not of practical consequence (160 bit random value is as good as + # guaranteed unique) + # downside is certificate generation is serialized + cacfgfile = '/etc/confluent/tls/ca/openssl.cfg' + if needcsr: + tmphdl, tmpcafile = tempfile.mkstemp() + shutil.copy2(cacfgfile, tmpcafile) + os.close(tmphdl) + cacfgfile = tmpcafile + # with realcalock: # if we put it in server, we must lock it subprocess.check_call([ - 'openssl', 'ca', '-config', '/etc/confluent/tls/ca/openssl.cfg', + 'openssl', 'ca', '-config', cacfgfile, '-in', csrout, '-out', certout, '-batch', '-notext', '-startdate', '19700101010101Z', '-enddate', '21000101010101Z', '-extfile', extconfig ]) finally: os.remove(tmpconfig) - os.remove(csrout) - os.remove(extconfig) + if needcsr: + os.remove(csrout) + print(extconfig) # os.remove(extconfig) if __name__ == '__main__': + import sys outdir = os.getcwd() keyout = os.path.join(outdir, 'key.pem') - certout = os.path.join(outdir, 'cert.pem') - create_certificate(keyout, certout) + certout = os.path.join(outdir, sys.argv[2] + 'cert.pem') + csrout = None + try: + csrout = sys.argv[1] + except IndexError: + csrout = None + create_certificate(keyout, certout, csrout) diff --git a/misc/getcsr.py b/misc/getcsr.py index 253bfcd8..6f956b2d 100644 --- a/misc/getcsr.py +++ b/misc/getcsr.py @@ -12,11 +12,40 @@ ap.add_argument('--state', help='State or Province') ap.add_argument('--city', help='City or Locality') ap.add_argument('--org', help='Organization name') ap.add_argument('--name', help='Common/Host Name') +ap.add_argument('outcsr', help='CSR filename to save') args = ap.parse_args() c = cmd.Command(args.xcc, os.environ['XCCUSER'], os.environ['XCCPASS'], verifycallback=lambda x: True) -params = [ + +overview = c._do_web_request('/redfish/v1/') +cs = overview.get('CertificateService', {}).get('@odata.id', None) +if cs: + csinfo = c._do_web_request(cs) + gcsr = csinfo.get('Actions', {}).get('#CertificateService.GenerateCSR', {}).get('target', None) + if gcsr: + #https://n241-bmc/redfish/v1/Managers/1/NetworkProtocol HTTPS + #/redfish/v1/Managers/1/NetworkProtocol/HTTPS/Certificates + #/redfish/v1/CertificateService/CertificateLocations + csrargs = { + 'City': args.city, + 'State': args.state, + 'Organization': args.org, + 'Country': args.country, + 'CommonName': args.name, + 'KeyPairAlgorithm': 'TPM_ALG_ECDH', + 'KeyCurveId': 'TPM_ECC_NIST_P384', + 'CertificateCollection': { '@odata.id': '/redfish/v1/Managers/1/NetworkProtocol/HTTPS/Certificates'} + } + + csrinfo = c._do_web_request(gcsr, csrargs) + if 'CSRString' in csrinfo: + with open(args.outcsr, 'w') as csrout: + csrout.write(csrinfo['CSRString']) + sys.exit(0) + +else: + params = [ '0', # 'serviceType' args.country, args.state, @@ -32,15 +61,16 @@ params = [ '', '', '', -] -wc = c.oem.wc -rsp, status = wc.grab_json_response_with_status('/api/function', {'Sec_GenKeyAndCSR': ','.join(params)}) -rsp, status = wc.grab_json_response_with_status('/api/dataset', {'CSR_Format': '1'}) -rsp, status = wc.grab_json_response_with_status('/api/function', {'Sec_DownloadCSRANDCert': '0,4,0'}) -wc.request('GET', '/download/{0}'.format(rsp['FileName'])) -rsp = wc.getresponse() -csr = rsp.read() -if rsp.getheader('Content-Encoding', None) == 'gzip': - csr = gzip.GzipFile(fileobj=io.BytesIO(csr)).read() -print(csr) + ] + + wc = c.oem.wc + rsp, status = wc.grab_json_response_with_status('/api/function', {'Sec_GenKeyAndCSR': ','.join(params)}) + rsp, status = wc.grab_json_response_with_status('/api/dataset', {'CSR_Format': '1'}) + rsp, status = wc.grab_json_response_with_status('/api/function', {'Sec_DownloadCSRANDCert': '0,4,0'}) + wc.request('GET', '/download/{0}'.format(rsp['FileName'])) + rsp = wc.getresponse() + csr = rsp.read() + if rsp.getheader('Content-Encoding', None) == 'gzip': + csr = gzip.GzipFile(fileobj=io.BytesIO(csr)).read() + print(csr) diff --git a/misc/installcert.py b/misc/installcert.py index 9654bf54..2d53e800 100644 --- a/misc/installcert.py +++ b/misc/installcert.py @@ -8,8 +8,23 @@ ap.add_argument('xcc', help='XCC address') ap.add_argument('cert', help='Certificate in PEM format') args = ap.parse_args() +cert = open(args.cert, 'r').read() c = cmd.Command(args.xcc, os.environ['XCCUSER'], os.environ['XCCPASS'], verifycallback=lambda x: True) +overview = c._do_web_request('/redfish/v1/') +cs = overview.get('CertificateService', {}).get('@odata.id', None) +if cs: + csinfo = c._do_web_request(cs) + gcsr = csinfo.get('Actions', {}).get('#CertificateService.ReplaceCertificate', {}).get('target', None) + if gcsr: + repcertargs = { + 'CertificateUri': { '@odata.id': '/redfish/v1/Managers/1/NetworkProtocol/HTTPS/Certificates/1' }, + 'CertificateType': 'PEM', + 'CertificateString': cert } + print(repr(c._do_web_request(gcsr, repcertargs))) + sys.exit(0) + + #CertificateService.ReplaceCertificate wc = c.oem.wc cert = open(args.cert, 'rb').read() res = wc.grab_json_response_with_status('/api/function', {'Sec_ImportCert': '0,1,0,0,,{0}'.format(cert)}) From a4e152c17d226cb613a7b684bb4ff215a7e2e131 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 10:31:46 -0400 Subject: [PATCH 104/122] Defer disarm until after successful client notification It is theoretically possible for a client to get disconnected right in the middle. In such a scenario, err on the side of letting the mechanism stay armed for the sake of a retry being possible. --- confluent_server/confluent/credserver.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/credserver.py b/confluent_server/confluent/credserver.py index c569bc4d..390179f8 100644 --- a/confluent_server/confluent/credserver.py +++ b/confluent_server/confluent/credserver.py @@ -127,14 +127,15 @@ class CredServer(object): if hmacval != hmac.new(hmackey, etok, hashlib.sha256).digest(): client.close() return - cfgupdate = {nodename: {'crypted.selfapikey': {'hashvalue': echotoken}, 'deployment.sealedapikey': '', 'deployment.apiarmed': ''}} - if hmackey and apiarmed != 'continuous': - self.cfm.clear_node_attributes([nodename], ['secret.selfapiarmtoken']) - if apiarmed == 'continuous': - del cfgupdate[nodename]['deployment.apiarmed'] + cfgupdate = {nodename: {'crypted.selfapikey': {'hashvalue': echotoken}}} self.cfm.set_node_attributes(cfgupdate) client.recv(2) # drain end of message client.send(b'\x05\x00') # report success + if hmackey and apiarmed != 'continuous': + self.cfm.clear_node_attributes([nodename], ['secret.selfapiarmtoken']) + if apiarmed != 'continuous': + tokclear = {nodename: {'deployment.sealedapikey': '', 'deployment.apiarmed': ''}} + self.cfm.set_node_attributes(tokclear) finally: client.close() From f68f9f46939ed115b501b125212359777cf045c6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 11:07:11 -0400 Subject: [PATCH 105/122] Make syncfile step robust or pause If syncfiles fails, keep it retrying. Also, slow down sync checking to avoid hammering the system. Further, randomized delay to spread highly synchronized requestors. Block attempts to do multiple concurrent syncfile runs. --- .../profiles/default/scripts/syncfileclient | 2 ++ .../profiles/default/scripts/syncfileclient | 2 ++ .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../suse15/profiles/hpc/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/server/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- .../profiles/default/scripts/syncfileclient | 18 +++++++++++++++++- confluent_server/confluent/syncfiles.py | 2 ++ 13 files changed, 176 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el7-diskless/profiles/default/scripts/syncfileclient index 8d37d43a..cca0f57d 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/syncfileclient @@ -1,4 +1,5 @@ #!/usr/bin/python +import time import importlib import tempfile import json @@ -223,6 +224,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: + time.sleep(2) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/el7/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el7/profiles/default/scripts/syncfileclient index 8d37d43a..02dbcc4d 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el7/profiles/default/scripts/syncfileclient @@ -5,6 +5,7 @@ import json import os import shutil import pwd +import time import grp try: from importlib.machinery import SourceFileLoader @@ -223,6 +224,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: + time.sleep(2) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient index f7d4c0b4..088fa9f7 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient @@ -1,4 +1,6 @@ #!/usr/bin/python3 +import random +import time import subprocess import importlib import tempfile @@ -227,9 +229,14 @@ def synchronize(): myips.append(addr) data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status >= 300: + sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) + sys.stderr.write(repr(rsp)) + return status if status == 202: lastrsp = '' while status != 204: + time.sleep(1+(2*random.random(a))) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') @@ -277,10 +284,19 @@ def synchronize(): os.chmod(fname, int(opts[fname][opt], 8)) if uid != -1 or gid != -1: os.chown(fname, uid, gid) + return status finally: shutil.rmtree(tmpdir) shutil.rmtree(appendoncedir) if __name__ == '__main__': - synchronize() + status = 202 + while status not in (204, 200): + try: + status = synchronize() + except Exception as e: + sys.stderr.write(str(e)) + status = 300 + if status not in (204, 200): + time.sleep((random.random()*3)+2) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 94b74eea..df5574e3 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -289,6 +289,8 @@ syncrunners = {} def start_syncfiles(nodename, cfg, suffixes, principals=[]): peerip = None + if nodename in syncrunners: + return '503 Synchronization already in progress ' if 'myips' in suffixes: targips = suffixes['myips'] del suffixes['myips'] From 33271451d711d3bc2a26a10cf22381d948e8aed2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 13:17:19 -0400 Subject: [PATCH 106/122] Support SHA384 if used as fingerprint --- confluent_server/confluent/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 96d2291b..462ec930 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -168,7 +168,7 @@ def cert_matches(fingerprint, certificate): return algo(certificate).digest() == fingerprint algo, _, fp = fingerprint.partition('$') newfp = None - if algo in ('sha512', 'sha256'): + if algo in ('sha512', 'sha256', 'sha384'): newfp = get_fingerprint(certificate, algo) return newfp and fingerprint == newfp From 02f301b5d08eaa20a9e02a4c1bd39be019f073a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 13:41:27 -0400 Subject: [PATCH 107/122] Fix mistakes in syncfileclient change --- .../el8-diskless/profiles/default/scripts/syncfileclient | 3 ++- confluent_osdeploy/el8/profiles/default/scripts/syncfileclient | 3 ++- .../el9-diskless/profiles/default/scripts/syncfileclient | 3 ++- .../genesis/profiles/default/scripts/syncfileclient | 3 ++- .../suse15-diskless/profiles/default/scripts/syncfileclient | 3 ++- confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient | 3 ++- .../suse15/profiles/server/scripts/syncfileclient | 3 ++- .../profiles/default/scripts/syncfileclient | 3 ++- .../ubuntu20.04/profiles/default/scripts/syncfileclient | 3 ++- .../ubuntu22.04/profiles/default/scripts/syncfileclient | 3 ++- 10 files changed, 20 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient index 088fa9f7..ac60f5f7 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient @@ -9,6 +9,7 @@ import os import shutil import pwd import grp +import sys from importlib.machinery import SourceFileLoader try: apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() @@ -236,7 +237,7 @@ def synchronize(): if status == 202: lastrsp = '' while status != 204: - time.sleep(1+(2*random.random(a))) + time.sleep(1+(2*random.random())) status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') if not isinstance(rsp, str): rsp = rsp.decode('utf8') From 8ca9a44476de8da95894d7a7ed3324232f12bc05 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 14:27:00 -0400 Subject: [PATCH 108/122] Provide more interesting response body to syncfileclient --- confluent_server/confluent/selfservice.py | 4 ++-- confluent_server/confluent/syncfiles.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index 3d7feebb..a166e0fb 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -517,8 +517,8 @@ def handle_request(env, start_response): pals = get_extra_names(nodename, cfg, myip) result = syncfiles.start_syncfiles( nodename, cfg, json.loads(reqbody), pals) - start_response(result, ()) - yield '' + start_response(result[0], ()) + yield result[1] return if 'GET' == operation: status, output = syncfiles.get_syncresult(nodename) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index df5574e3..ed99fedf 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -290,7 +290,7 @@ syncrunners = {} def start_syncfiles(nodename, cfg, suffixes, principals=[]): peerip = None if nodename in syncrunners: - return '503 Synchronization already in progress ' + return '503 Synchronization already in progress', 'Synchronization already in progress for {}'.format(nodename) if 'myips' in suffixes: targips = suffixes['myips'] del suffixes['myips'] @@ -313,13 +313,13 @@ def start_syncfiles(nodename, cfg, suffixes, principals=[]): raise Exception('Cannot perform syncfiles without profile assigned') synclist = '/var/lib/confluent/public/os/{}/syncfiles'.format(profile) if not os.path.exists(synclist): - return '200 OK' # not running + return '200 OK', 'No synclist' # not running sl = SyncList(synclist, nodename, cfg) if not (sl.appendmap or sl.mergemap or sl.replacemap or sl.appendoncemap): - return '200 OK' # the synclist has no actual entries + return '200 OK', 'Empty synclist' # the synclist has no actual entries syncrunners[nodename] = eventlet.spawn( sync_list_to_node, sl, nodename, suffixes, peerip) - return '202 Queued' # backgrounded + return '202 Queued', 'Background synchronization initiated' # backgrounded def get_syncresult(nodename): if nodename not in syncrunners: From 67b3c48dc9b322817f2a556e7afdf463be4c1ee8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 14:58:38 -0400 Subject: [PATCH 109/122] Clean up error output on syncfileclient execution --- .../el8-diskless/profiles/default/scripts/syncfileclient | 2 ++ confluent_osdeploy/el8/profiles/default/scripts/syncfileclient | 2 ++ .../el9-diskless/profiles/default/scripts/syncfileclient | 2 ++ .../genesis/profiles/default/scripts/syncfileclient | 2 ++ .../suse15-diskless/profiles/default/scripts/syncfileclient | 2 ++ confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient | 2 ++ .../suse15/profiles/server/scripts/syncfileclient | 2 ++ .../profiles/default/scripts/syncfileclient | 2 ++ .../ubuntu20.04/profiles/default/scripts/syncfileclient | 2 ++ .../ubuntu22.04/profiles/default/scripts/syncfileclient | 2 ++ 10 files changed, 20 insertions(+) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient index ac60f5f7..237c443d 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient @@ -298,6 +298,8 @@ if __name__ == '__main__': status = synchronize() except Exception as e: sys.stderr.write(str(e)) + sys.stderr.write('\n') + sys.stderr.flush() status = 300 if status not in (204, 200): time.sleep((random.random()*3)+2) From 1da27083cc8a64510009331b8853fcbc8a804c0f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 15:08:56 -0400 Subject: [PATCH 110/122] Another cleanup of syncfileclient output --- .../el8-diskless/profiles/default/scripts/syncfileclient | 4 +++- .../el8/profiles/default/scripts/syncfileclient | 4 +++- .../el9-diskless/profiles/default/scripts/syncfileclient | 4 +++- .../genesis/profiles/default/scripts/syncfileclient | 4 +++- .../suse15-diskless/profiles/default/scripts/syncfileclient | 4 +++- confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient | 4 +++- .../suse15/profiles/server/scripts/syncfileclient | 4 +++- .../profiles/default/scripts/syncfileclient | 4 +++- .../ubuntu20.04/profiles/default/scripts/syncfileclient | 4 +++- .../ubuntu22.04/profiles/default/scripts/syncfileclient | 4 +++- 10 files changed, 30 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el8/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/genesis/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient +++ b/confluent_osdeploy/suse15/profiles/server/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient index 237c443d..5f2efc5e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/syncfileclient @@ -232,7 +232,9 @@ def synchronize(): status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) if status >= 300: sys.stderr.write("Error starting syncfiles - {}:\n".format(status)) - sys.stderr.write(repr(rsp)) + sys.stderr.write(rsp.decode('utf8')) + sys.stderr.write('\n') + sys.stderr.flush() return status if status == 202: lastrsp = '' From eaffb342b2af2878c1a8aaad00c79b7873d23f74 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 15:19:38 -0400 Subject: [PATCH 111/122] Reap stale sync runners after a minute dead If the client never claims the result, delete the sync task. --- confluent_server/confluent/syncfiles.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index ed99fedf..1f739ba1 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -285,9 +285,10 @@ def mkpathorlink(source, destination, appendexist=False): syncrunners = {} - +cleaner = None def start_syncfiles(nodename, cfg, suffixes, principals=[]): + global cleaner peerip = None if nodename in syncrunners: return '503 Synchronization already in progress', 'Synchronization already in progress for {}'.format(nodename) @@ -319,7 +320,26 @@ def start_syncfiles(nodename, cfg, suffixes, principals=[]): return '200 OK', 'Empty synclist' # the synclist has no actual entries syncrunners[nodename] = eventlet.spawn( sync_list_to_node, sl, nodename, suffixes, peerip) - return '202 Queued', 'Background synchronization initiated' # backgrounded + if not cleaner: + cleaner = eventlet.spawn(cleanit) + return '202 Queued', 'Background synchronization initiated' # backgrounded + + +def cleanit(): + toreap = {} + while True: + for nn in list(syncrunners): + if syncrunners[nn].dead: + if nn in toreap: + syncrunners[nn].wait() + del syncrunners[nn] + del toreap[nn] + else: + toreap[nn] = 1 + elif nn is in toreap: + del toreap[nn] + eventlet.sleep(30) + def get_syncresult(nodename): if nodename not in syncrunners: From 8fb889ba736609b6ee70be835836d8691e91f4ff Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 15:27:20 -0400 Subject: [PATCH 112/122] Correct syntax error --- confluent_server/confluent/syncfiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 1f739ba1..068d1ae4 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -336,7 +336,7 @@ def cleanit(): del toreap[nn] else: toreap[nn] = 1 - elif nn is in toreap: + elif nn in toreap: del toreap[nn] eventlet.sleep(30) From 01722c18c4e1e99d56e4bd465eb8005a69a1ae58 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 15:40:40 -0400 Subject: [PATCH 113/122] Fix location of idle sleep in syncfiles cleaner --- confluent_server/confluent/syncfiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 068d1ae4..9c96e533 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -338,7 +338,7 @@ def cleanit(): toreap[nn] = 1 elif nn in toreap: del toreap[nn] - eventlet.sleep(30) + eventlet.sleep(30) def get_syncresult(nodename): From ceaf641c1a31cd03da1ecab66557bfb6d244d73f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 9 Apr 2024 16:18:24 -0400 Subject: [PATCH 114/122] Keep reap loop going on error --- confluent_server/confluent/syncfiles.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 9c96e533..f1e638f8 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -331,7 +331,11 @@ def cleanit(): for nn in list(syncrunners): if syncrunners[nn].dead: if nn in toreap: - syncrunners[nn].wait() + try: + syncrunners[nn].wait() + except Exception as e: + print(repr(e)) + pass del syncrunners[nn] del toreap[nn] else: From 8e5ee6c9d8490d0275d9e84a118048d1e783744b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 10 Apr 2024 13:54:06 -0400 Subject: [PATCH 115/122] Make orphaned sync runner retire on new sync request --- confluent_server/confluent/syncfiles.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index f1e638f8..16cf4c49 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -290,8 +290,6 @@ cleaner = None def start_syncfiles(nodename, cfg, suffixes, principals=[]): global cleaner peerip = None - if nodename in syncrunners: - return '503 Synchronization already in progress', 'Synchronization already in progress for {}'.format(nodename) if 'myips' in suffixes: targips = suffixes['myips'] del suffixes['myips'] @@ -318,6 +316,11 @@ def start_syncfiles(nodename, cfg, suffixes, principals=[]): sl = SyncList(synclist, nodename, cfg) if not (sl.appendmap or sl.mergemap or sl.replacemap or sl.appendoncemap): return '200 OK', 'Empty synclist' # the synclist has no actual entries + if nodename in syncrunners: + if syncrunners[nodename].dead: + syncrunners[nodename].wait() + else: + return '503 Synchronization already in progress', 'Synchronization already in progress for {}'.format(nodename) syncrunners[nodename] = eventlet.spawn( sync_list_to_node, sl, nodename, suffixes, peerip) if not cleaner: From a6a2f2f2de333b863c3d3ed926f2942d7b92e7b3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Apr 2024 11:46:11 -0400 Subject: [PATCH 116/122] Fixes for attribute clear warning behavior Correct collective behavior for failing to clear on followers. Also, connect the warnings from the leader to the member issuing the RPC. --- .../confluent/config/configmanager.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 9e7818b5..6c7ebd71 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -252,10 +252,12 @@ def _rpc_master_rename_nodegroups(tenant, renamemap): def _rpc_master_clear_node_attributes(tenant, nodes, attributes): - ConfigManager(tenant).clear_node_attributes(nodes, attributes) + warnings = [] + ConfigManager(tenant).clear_node_attributes(nodes, attributes, warnings) + return warnings -def _rpc_clear_node_attributes(tenant, nodes, attributes): +def _rpc_clear_node_attributes(tenant, nodes, attributes): # master has to do the warnings ConfigManager(tenant)._true_clear_node_attributes(nodes, attributes) @@ -348,9 +350,9 @@ def exec_on_leader(function, *args): rpclen = len(rpcpayload) cfgleader.sendall(struct.pack('!Q', rpclen)) cfgleader.sendall(rpcpayload) - _pendingchangesets[xid].wait() + retv = _pendingchangesets[xid].wait() del _pendingchangesets[xid] - return + return retv def exec_on_followers(fnname, *args): @@ -714,8 +716,9 @@ def relay_slaved_requests(name, listener): exc = None if not (rpc['function'].startswith('_rpc_') or rpc['function'].endswith('_collective_member')): raise Exception('Unsupported function {0} called'.format(rpc['function'])) + retv = None try: - globals()[rpc['function']](*rpc['args']) + retv = globals()[rpc['function']](*rpc['args']) except ValueError as ve: exc = ['ValueError', str(ve)] except Exception as e: @@ -723,7 +726,7 @@ def relay_slaved_requests(name, listener): exc = ['Exception', str(e)] if 'xid' in rpc: res = _push_rpc(listener, msgpack.packb({'xid': rpc['xid'], - 'exc': exc}, use_bin_type=False)) + 'exc': exc, 'ret': retv}, use_bin_type=False)) if not res: break try: @@ -929,7 +932,7 @@ def follow_channel(channel): exc = Exception(excstr) _pendingchangesets[rpc['xid']].send_exception(exc) else: - _pendingchangesets[rpc['xid']].send() + _pendingchangesets[rpc['xid']].send(rpc.get('ret', None)) if 'quorum' in rpc: _hasquorum = rpc['quorum'] res = _push_rpc(channel, b'') # use null as ACK @@ -2204,14 +2207,17 @@ class ConfigManager(object): def clear_node_attributes(self, nodes, attributes, warnings=None): if cfgleader: - return exec_on_leader('_rpc_master_clear_node_attributes', + mywarnings = exec_on_leader('_rpc_master_clear_node_attributes', self.tenant, nodes, attributes) + if warnings is not None: + warnings.extend(mywarnings) + return if cfgstreams: exec_on_followers('_rpc_clear_node_attributes', self.tenant, nodes, attributes) self._true_clear_node_attributes(nodes, attributes, warnings) - def _true_clear_node_attributes(self, nodes, attributes, warnings): + def _true_clear_node_attributes(self, nodes, attributes, warnings=None): # accumulate all changes into a changeset and push in one go changeset = {} realattributes = [] @@ -2234,16 +2240,16 @@ class ConfigManager(object): # delete it and check for inheritence to backfil data del nodek[attrib] self._do_inheritance(nodek, attrib, node, changeset) - if not warnings is None: + if warnings is not None: if attrib in nodek: warnings.append('The attribute "{}" was defined specifically for the node and clearing now has a value inherited from the group "{}"'.format(attrib, nodek[attrib]['inheritedfrom'])) _addchange(changeset, node, attrib) _mark_dirtykey('nodes', node, self.tenant) elif attrib in nodek: - if not warnings is None: + if warnings is not None: warnings.append('The attribute "{0}" is inherited from group "{1}", leaving the inherited value alone (use "{0}=" with no value to explicitly blank the value if desired)'.format(attrib, nodek[attrib]['inheritedfrom'])) else: - if not warnings is None: + if warnings is not None: warnings.append('Attribute "{}" is either already cleared, or does not match a defined attribute (if referencing an attribute group, try a wildcard)'.format(attrib)) if ('_expressionkeys' in nodek and attrib in nodek['_expressionkeys']): From 3ba3394a542422433cbb0b726103f4742e4ba363 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 12 Apr 2024 17:32:13 -0400 Subject: [PATCH 117/122] Fix None return by exec_on_leader with warnings --- confluent_server/confluent/config/configmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 6c7ebd71..528924e8 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -2209,7 +2209,7 @@ class ConfigManager(object): if cfgleader: mywarnings = exec_on_leader('_rpc_master_clear_node_attributes', self.tenant, nodes, attributes) - if warnings is not None: + if mywarnings and warnings is not None: warnings.extend(mywarnings) return if cfgstreams: From 3b55f500cea756f37837dbdc14a399af9624778d Mon Sep 17 00:00:00 2001 From: tkucherera Date: Tue, 16 Apr 2024 03:16:15 -0400 Subject: [PATCH 118/122] sample post scripts directory --- .../profile/scripts/sample/consoleredirect | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 confluent_osdeploy/common/profile/scripts/sample/consoleredirect diff --git a/confluent_osdeploy/common/profile/scripts/sample/consoleredirect b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect new file mode 100644 index 00000000..4ebc3a8f --- /dev/null +++ b/confluent_osdeploy/common/profile/scripts/sample/consoleredirect @@ -0,0 +1,49 @@ +is_suse=false +is_rhel=false + +if test -f /boot/efi/EFI/redhat/grub.cfg; then + grubcfg="/boot/efi/EFI/redhat/grub.cfg" + grub2-mkconfig -o $grubcfg + is_rhel=true +elif test -f /boot/efi/EFI/sle_hpc/grub.cfg; then + grubcfg="/boot/efi/EFI/sle_hpc/grub.cfg" + grub2-mkconfig -o $grubcfg + is_suse=true +else + echo "Expected File missing: Check if os sle_hpc or redhat" + exit +fi + +# working on SUSE +if $is_suse; then + start=false + num_line=0 + lines_to_edit=() + while read line; do + ((num_line++)) + if [[ $line == *"grub_platform"* ]]; then + start=true + fi + if $start; then + if [[ $line != "#"* ]];then + lines_to_edit+=($num_line) + fi + fi + if [[ ${#line} -eq 2 && $line == *"fi" ]]; then + if $start; then + start=false + fi + fi + done < grub_cnf.cfg + + for line_num in "${lines_to_edit[@]}"; do + line_num+="s" + sed -i "${line_num},^,#," $grubcfg + done + sed -i 's,^terminal,#terminal,' $grubcfg +fi + +# Working on Redhat +if $is_rhel; then + sed -i 's,^serial,#serial, ; s,^terminal,#terminal,' $grubcfg +fi \ No newline at end of file From 10f0fabb8cfe81b71fb4933c078f58cea729e096 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 17 Apr 2024 15:18:45 -0400 Subject: [PATCH 119/122] Fix nodegroup retrieval nodegroup information was broken by clear warning support. --- confluent_server/confluent/plugins/configuration/attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/plugins/configuration/attributes.py b/confluent_server/confluent/plugins/configuration/attributes.py index a56a1aee..2c9a6ac9 100644 --- a/confluent_server/confluent/plugins/configuration/attributes.py +++ b/confluent_server/confluent/plugins/configuration/attributes.py @@ -30,7 +30,7 @@ def retrieve(nodes, element, configmanager, inputdata, clearwarnbynode=None): element[1], element[3], configmanager, inputdata, clearwarnbynode) -def retrieve_nodegroup(nodegroup, element, configmanager, inputdata, clearwarnbynode): +def retrieve_nodegroup(nodegroup, element, configmanager, inputdata, clearwarnbynode=None): try: grpcfg = configmanager.get_nodegroup_attributes(nodegroup) except KeyError: From b6068823271c96b4ab344dde03813aae8b1a72c3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Apr 2024 08:22:32 -0400 Subject: [PATCH 120/122] Have collate preserve relative whitespace The change to tolerate either a space or no space ended up greedily consuming whitespace. Do best possible in two cases: For log, use the first line as a clue, and consistently pad or not pad according to first line. It won't catch different pad strategies, or handle first line being indented but other lines not being indented. For the textgroup variant, allow subsequent lines to revise the pad downward, and accept any whitespace, not just space. --- confluent_client/bin/collate | 11 +++++++++-- confluent_client/confluent/textgroup.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_client/bin/collate b/confluent_client/bin/collate index 07095901..2a086303 100755 --- a/confluent_client/bin/collate +++ b/confluent_client/bin/collate @@ -21,6 +21,7 @@ import optparse import os +import re import select import sys @@ -84,6 +85,7 @@ fullline = sys.stdin.readline() printpending = True clearpending = False holdoff = 0 +padded = None while fullline: for line in fullline.split('\n'): if not line: @@ -92,13 +94,18 @@ while fullline: line = 'UNKNOWN: ' + line if options.log: node, output = line.split(':', 1) - output = output.lstrip() + if padded is None: + if output.startswith(' '): + padded = True + else: + padded = False + if padded: + output = re.sub(r'^ ', '', output) currlog = options.log.format(node=node, nodename=node) with open(currlog, mode='a') as log: log.write(output + '\n') continue node, output = line.split(':', 1) - output = output.lstrip() grouped.add_line(node, output) if options.watch: if not holdoff: diff --git a/confluent_client/confluent/textgroup.py b/confluent_client/confluent/textgroup.py index cd35b6fa..e2f0dc7f 100644 --- a/confluent_client/confluent/textgroup.py +++ b/confluent_client/confluent/textgroup.py @@ -98,17 +98,24 @@ class GroupedData(object): self.byoutput = {} self.header = {} self.client = confluentconnection + self.detectedpad = None def generate_byoutput(self): self.byoutput = {} + thepad = self.detectedpad if self.detectedpad else '' for n in self.bynode: - output = '\n'.join(self.bynode[n]) + output = '' + for ln in self.bynode[n]: + output += ln.replace(thepad, '', 1) + '\n' if output not in self.byoutput: self.byoutput[output] = set([n]) else: self.byoutput[output].add(n) def add_line(self, node, line): + wspc = re.search(r'^\s*', line).group() + if self.detectedpad is None or len(wspc) < len(self.detectedpad): + self.detectedpad = wspc if node not in self.bynode: self.bynode[node] = [line] else: @@ -219,4 +226,4 @@ if __name__ == '__main__': if not line: continue groupoutput.add_line(*line.split(': ', 1)) - groupoutput.print_deviants() \ No newline at end of file + groupoutput.print_deviants() From 6f2be355efa738c08e617d49fceb5aeafc240931 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 Apr 2024 11:32:27 -0400 Subject: [PATCH 121/122] Source from "local" media if present Some environments may want to load the bulk of the media via USB rather than over the network. This prefers that source if that scheme is detected. --- .../usr/lib/dracut/hooks/pre-trigger/01-confluent.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh index 6db95276..355a5ad7 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh @@ -227,7 +227,13 @@ if [ "$textconsole" = "true" ] && ! grep console= /proc/cmdline > /dev/null; the fi fi -echo inst.repo=$proto://$mgr/confluent-public/os/$profilename/distribution >> /etc/cmdline.d/01-confluent.conf +. /etc/os-release +ISOSRC=$(blkid -t TYPE=iso9660|grep -Ei ' LABEL="'$ID-$VERSION_ID|sed -e s/:.*//) +if [ -z "$ISOSRC" ]; then + echo inst.repo=$proto://$mgr/confluent-public/os/$profilename/distribution >> /etc/cmdline.d/01-confluent.conf +else + echo inst.repo=cdrom:$ISOSRC >> /etc/cmdline.d/01-confluent.conf +fi echo inst.ks=$proto://$mgr/confluent-public/os/$profilename/kickstart >> /etc/cmdline.d/01-confluent.conf kickstart=$proto://$mgr/confluent-public/os/$profilename/kickstart root=anaconda-net:$proto://$mgr/confluent-public/os/$profilename/distribution From 86e612b4bf0887a748c9a2c67a2f0e095507862c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 22 Apr 2024 12:47:52 -0400 Subject: [PATCH 122/122] Move anaconda netroot to not be specified in media sourced When sourcing from media, do not trigger anaconda netroot behavior. --- .../usr/lib/dracut/hooks/pre-trigger/01-confluent.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh index 355a5ad7..a1778e08 100644 --- a/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh +++ b/confluent_osdeploy/el8/initramfs/usr/lib/dracut/hooks/pre-trigger/01-confluent.sh @@ -231,14 +231,14 @@ fi ISOSRC=$(blkid -t TYPE=iso9660|grep -Ei ' LABEL="'$ID-$VERSION_ID|sed -e s/:.*//) if [ -z "$ISOSRC" ]; then echo inst.repo=$proto://$mgr/confluent-public/os/$profilename/distribution >> /etc/cmdline.d/01-confluent.conf + root=anaconda-net:$proto://$mgr/confluent-public/os/$profilename/distribution + export root else echo inst.repo=cdrom:$ISOSRC >> /etc/cmdline.d/01-confluent.conf fi echo inst.ks=$proto://$mgr/confluent-public/os/$profilename/kickstart >> /etc/cmdline.d/01-confluent.conf kickstart=$proto://$mgr/confluent-public/os/$profilename/kickstart -root=anaconda-net:$proto://$mgr/confluent-public/os/$profilename/distribution export kickstart -export root autoconfigmethod=$(grep ipv4_method /etc/confluent/confluent.deploycfg) autoconfigmethod=${autoconfigmethod#ipv4_method: } if [ "$autoconfigmethod" = "dhcp" ]; then