From a9d15de1564a90a46efc54dd84aba676c33f393e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 2 Oct 2025 10:55:43 -0400 Subject: [PATCH] Rework Ubuntu identity image DHCP bringup The stock Ubuntu approach was inadequate. It would DHCP out every nic and take the fastest result, and no going back. Now the CDC nic can frequently win that race. First, rmmod cdc_ether, as a scenario that is completely right out. But beyond that, let Ubuntu have one shot at multi-nic bringup. Beyond that, maintain a list of all link-up devices. If the check should fail, then start doing one nic at a time, cycling through them. Also, the openssl s_client timeout is painfully slow, use subshell and kill to speed up things. --- .../initramfs/scripts/init-premount/confluent | 35 ++++++++++++++++--- .../initramfs/scripts/init-premount/confluent | 32 +++++++++++++---- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index 964869d7..82ff8aec 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -52,13 +52,38 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do ipconfig -d $MYIP::$MYGW:$MYNM::$NIC echo $NIC > /tmp/autodetectnic else - configure_networking - for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - break + rmmod cdc_ether 2> /dev/null + while [ ! -f /run/confirmednic ]; do + ALLNETDEVS=$(ip a|grep LOWER_UP|grep MULTICAST|awk '{print $2}'|sed -e s/://) + rm -rf /run/net* /run/dhcpcd /var/lib/dhcpcd + for dev in $(ip a|grep MULTICAST|awk '{print $2}'|sed -e s/://); do + ip a flush $dev + echo 1 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + echo 0 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + done + unset DEVICE DEVICE6 IP IP6 dev + [ -z "$1" ] || DEVICE=$1 + shift + configure_networking + for dsrv in $deploysrvs; do + (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + echo $dsrv > /run/confirmednic + break + fi) & + chkpid=$! + ( sleep 10 && kill $chkpid ) & + timeoutpid=$! + wait $chkpid + kill $timeoutpid 2> /dev/null + unset chkpid timeoutpid + done + if [ ! -f /run/confirmednic ]; then + echo "No connectivity to deployment servers, retrying..." + [ -z "$1" ] && set -- $ALLNETDEVS fi done + deploysrvs=$(cat /run/confirmednic) + rm /run/confirmednic fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}') diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 0eeadd55..98c9129c 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -53,19 +53,39 @@ while ! grep NODENAME /custom-installation/confluent/confluent.info; do ipconfig -d $MYIP::$MYGW:$MYNM::$NIC echo $NIC > /tmp/autodetectnic else - while [ -z "$NIC" ]; do + rmmod cdc_ether 2> /dev/null + while [ ! -f /run/confirmednic ]; do + ALLNETDEVS=$(ip a|grep LOWER_UP|grep MULTICAST|awk '{print $2}'|sed -e s/://) + + rm -rf /run/net* /run/dhcpcd /var/lib/dhcpcd + for dev in $(ip a|grep MULTICAST|awk '{print $2}'|sed -e s/://); do + ip a flush $dev + echo 1 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + echo 0 > /proc/sys/net/ipv6/conf/$dev/addr_gen_mode + done + unset DEVICE DEVICE6 IP IP6 dev + [ -z "$1" ] || DEVICE=$1 + shift configure_networking for dsrv in $deploysrvs; do - if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then - deploysrvs=$dsrv - NIC=1 + (if openssl s_client -connect $dsrv:443 > /dev/null 2>&1; then + echo $dsrv > /run/confirmednic break - fi + fi) & + chkpid=$! + ( sleep 10 && kill $chkpid ) & + timeoutpid=$! + wait $chkpid + kill $timeoutpid 2> /dev/null + unset chkpid timeoutpid done - if [ -z "$NIC" ]; then + if [ ! -f /run/confirmednic ]; then echo "No connectivity to deployment servers, retrying..." + [ -z "$1" ] && set -- $ALLNETDEVS fi done + deploysrvs=$(cat /run/confirmednic) + rm /run/confirmednic fi MGR=$deploysrvs NODENAME=$(grep ^nodename: /tmp/idntmnt/cnflnt.yml | awk '{print $2}')