From a1c9100063811e7b653eb2a709d100974e5d578d Mon Sep 17 00:00:00 2001 From: saksarav-nokia Date: Thu, 18 Apr 2024 01:19:49 -0400 Subject: [PATCH] Wait till CHASIS_APP_DB PING is successful, host_name and asic_name are valid in CONIFG_DB before starting chassis-db-cleanup (#17962) This PR fixes the issue reported in Issu #17945 We noticed that chassis db clean up is skipped sometimes when the CHASSIS_APP_DB PING fails. Also if host_name and asic_name are not written to CONIG_DB, it could pass the empty strings to CHASSIS_APP_DB EVAL commands. The service hostname-config.service is restarted whenever the config-reload or load-minigraph is done and this services renames the file /etc/hosts to updates it with the new file. This interferes with swss@.service and when swss.sh script CHASSIS_APP_DPP when the /etc/hosts file is renamed, the error "Unable to connect to redis: Cannot assign requested address" is seen and the CHASSIS_APP_DB EVAL command fails. This causes the chassis db entries not getting cleaned up and causes orchagent crash in remote LC's. --------- Signed-off-by: saksarav --- files/image_config/hostname/hostname-config.sh | 8 +++++++- files/scripts/swss.sh | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/files/image_config/hostname/hostname-config.sh b/files/image_config/hostname/hostname-config.sh index c2a4f1e546ce..5f4bfede2097 100755 --- a/files/image_config/hostname/hostname-config.sh +++ b/files/image_config/hostname/hostname-config.sh @@ -11,10 +11,16 @@ fi echo $HOSTNAME > /etc/hostname hostname -F /etc/hostname +#Don't update the /etc/hosts if hostname is not changed +#This is to prevent intermittent redis_chassis.server reachability issue +if [ $CURRENT_HOSTNAME == $HOSTNAME ] ; then + exit 0 +fi + # Remove the old hostname entry from hosts file. # But, 'localhost' entry is used by multiple applications. Don't remove it altogether. # Edit contents of /etc/hosts and put in /etc/hosts.new -if [ $CURRENT_HOSTNAME != "localhost" ] || [ $CURRENT_HOSTNAME == $HOSTNAME ] ; then +if [ $CURRENT_HOSTNAME != "localhost" ] ; then sed "/\s$CURRENT_HOSTNAME$/d" /etc/hosts > /etc/hosts.new else cp -f /etc/hosts /etc/hosts.new diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 912461ba9ad9..16849cdffa74 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -132,12 +132,23 @@ function clean_up_chassis_db_tables() return fi - if [[ !($($SONIC_DB_CLI CHASSIS_APP_DB PING | grep -c True) -gt 0) ]]; then - return - fi + until [[ $($SONIC_DB_CLI CHASSIS_APP_DB PING | grep -c True) -gt 0 ]]; do + sleep 1 + done lc=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'hostname'` + until [[ -n "${lc}" ]]; do + lc=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'hostname'` + sleep 1 + done + debug "Chassis db clean up for ${SERVICE}$DEV. hostname=$lc" + asic=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'asic_name'` + until [[ -n "${asic}" ]]; do + asic=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'asic_name'` + sleep 1 + done + debug "Chassis db clean up for ${SERVICE}$DEV. asic=$asic" # First, delete SYSTEM_NEIGH entries num_neigh=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL "