From 388457aaf846b46576738d03a480a4015a139856 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Fri, 5 Jan 2024 00:35:07 +0800 Subject: [PATCH] [arp_update]: Flush neighbors with incorrect MAC info (#17238) (#17677) --- files/scripts/arp_update | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 14a82ebe4da3..f402d53dced3 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -9,6 +9,11 @@ ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2" +# Overload `logger` command to include arp_update tag +logger () { + command logger -t "arp_update" "$@" +} + while /bin/true; do # find L3 interfaces which are UP, send ipv6 multicast pings ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE}) @@ -19,7 +24,7 @@ while /bin/true; do STATIC_ROUTE_IFNAMES=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_ifnames')) # on supervisor/rp exit the script gracefully if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]] || [[ -z "$STATIC_ROUTE_IFNAMES" ]]; then - logger "arp_update: exiting as no static route in packet based chassis" + logger "exiting as no static route in packet based chassis" exit 0 fi for i in ${!STATIC_ROUTE_NEXTHOPS[@]}; do @@ -38,7 +43,7 @@ while /bin/true; do interface="${STATIC_ROUTE_IFNAMES[i]}" if [[ -z "$interface" ]]; then # should never be here, handling just in case - logger "ERR: arp_update: missing interface entry for static route $nexthop" + logger -p error "missing interface entry for static route $nexthop" continue fi intf_up=$(ip link show $interface | grep "state UP") @@ -47,7 +52,7 @@ while /bin/true; do eval $pingcmd # STALE entries may appear more often, not logging to prevent periodic syslogs if [[ -z $(echo ${neigh_state} | grep 'STALE') ]]; then - logger "arp_update: static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface" + logger "static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface" fi fi fi @@ -70,6 +75,30 @@ while /bin/true; do fi done + # find neighbor entries with aged MAC and flush/relearn them + STALE_NEIGHS=$(ip neigh show | grep -v "fe80" | grep "STALE" | awk '{print $1 "," $5}' | tr [:lower:] [:upper:]) + for neigh in $STALE_NEIGHS; do + ip="$( cut -d ',' -f 1 <<< "$neigh" )" + mac="$( cut -d ',' -f 2 <<< "$neigh" )" + if [[ -z $(sonic-db-cli ASIC_DB keys "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY*${mac}*") ]]; then + timeout 0.2 ping -c1 -w1 $ip > /dev/null + fi + done + + # Flush neighbor entries with MAC mismatch between kernel and APPL_DB + KERNEL_NEIGH=$(ip neigh show | grep -v "fe80" | grep -v "FAILED\|INCOMPLETE" | cut -d ' ' -f 1,3,5 --output-delimiter=',' | tr -d ' ') + for neigh in $KERNEL_NEIGH; do + ip="$( cut -d ',' -f 1 <<< "$neigh" )" + intf="$( cut -d ',' -f 2 <<< "$neigh" )" + kernel_mac="$( cut -d ',' -f 3 <<< "$neigh" )" + appl_db_mac="$(sonic-db-cli APPL_DB hget NEIGH_TABLE:$intf:$ip neigh)" + if [[ $kernel_mac != $appl_db_mac ]]; then + logger -p warning "MAC mismatch for ${ip} on ${intf} - kernel: ${kernel_mac}, APPL_DB: ${appl_db_mac}" + ip neigh flush $ip + timeout 0.2 ping -c1 -w1 $ip > /dev/null + fi + done + VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan') SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]') for vlan in $VLAN; do @@ -158,11 +187,11 @@ while /bin/true; do if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null" eval $pingcmd - logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}" + logger "mismatch arp entry, pinging ${ip} on ${intf}" elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null" eval $ping6cmd - logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}" + logger "mismatch v6 nbr entry, pinging ${ip} on ${intf}" fi fi done