From 2f9bc000166e30a5ffbb66eaba6b76def6103a73 Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Thu, 18 Apr 2024 02:29:23 -0700 Subject: [PATCH] [FRR]Adding fix for memory leak seen with BGP community (#18606) Porting PR #18272 to 202311. Why I did it Porting fix for FRRouting/frr#15459 Adding patch for PR FRRouting/frr#15466 Work item tracking Microsoft ADO (number only): How I did it Ported the fix FRRouting/frr#15466 to 8.5.1 How to verify it Running the test_stress_route and ensure no memory leak --- .../0037-bgp-community-memory-leak-fix.patch | 303 ++++++++++++++++++ src/sonic-frr/patch/series | 1 + 2 files changed, 304 insertions(+) create mode 100644 src/sonic-frr/patch/0037-bgp-community-memory-leak-fix.patch diff --git a/src/sonic-frr/patch/0037-bgp-community-memory-leak-fix.patch b/src/sonic-frr/patch/0037-bgp-community-memory-leak-fix.patch new file mode 100644 index 000000000000..8d1ff02d46bf --- /dev/null +++ b/src/sonic-frr/patch/0037-bgp-community-memory-leak-fix.patch @@ -0,0 +1,303 @@ +From 2b8e4e4b93a78e5884e2e5b97050b4ea3843e2e0 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Thu, 7 Mar 2024 22:18:18 -0500 +Subject: [PATCH 1/2] bgpd: Combined patch to clean up filter leaks + +Customer has this valgrind trace: + +Direct leak of 2829120 byte(s) in 70728 object(s) allocated from: + 0 in community_new ../bgpd/bgp_community.c:39 + 1 in community_uniq_sort ../bgpd/bgp_community.c:170 + 2 in route_set_community ../bgpd/bgp_routemap.c:2342 + 3 in route_map_apply_ext ../lib/routemap.c:2673 + 4 in subgroup_announce_check ../bgpd/bgp_route.c:2367 + 5 in subgroup_process_announce_selected ../bgpd/bgp_route.c:2914 + 6 in group_announce_route_walkcb ../bgpd/bgp_updgrp_adv.c:199 + 7 in hash_walk ../lib/hash.c:285 + 8 in update_group_af_walk ../bgpd/bgp_updgrp.c:2061 + 9 in group_announce_route ../bgpd/bgp_updgrp_adv.c:1059 + 10 in bgp_process_main_one ../bgpd/bgp_route.c:3221 + 11 in bgp_process_wq ../bgpd/bgp_route.c:3221 + 12 in work_queue_run ../lib/workqueue.c:282 + +The above leak detected by valgrind was from a screenshot so I copied it +by hand. Any mistakes in line numbers are purely from my transcription. +Additionally this is against a slightly modified 8.5.1 version of FRR. +Code inspection of 8.5.1 -vs- latest master shows the same problem +exists. Code should be able to be followed from there to here. + +What is happening: + +There is a route-map being applied that modifes the outgoing community +to a peer. This is saved in the attr copy created in +subgroup_process_announce_selected. This community pointer is not +interned. So the community->refcount is still 0. Normally when +a prefix is announced, the attr and the prefix are placed on a +adjency out structure where the attribute is interned. This will +cause the community to be saved in the community hash list as well. +In a non-normal operation when the decision to send is aborted after +the route-map application, the attribute is just dropped and the +pointer to the community is just dropped too, leading to situations +where the memory is leaked. The usage of bgp suppress-fib would +would be a case where the community is caused to be leaked. +Additionally the previous commit where an unsuppress-map is used +to modify the outgoing attribute but since unsuppress-map was +not considered part of outgoing policy the attribute would be dropped as +well. This pointer drop also extends to any dynamically allocated +memory saved by the attribute pointer that was not interned yet as well. + +So let's modify the return case where the decision is made to +not send the prefix to the peer to always just flush the attribute +to ensure memory is not leaked. + +Fixes: #15459 +Signed-off-by: Donald Sharp +--- + bgpd/bgp_conditional_adv.c | 5 +++-- + bgpd/bgp_route.c | 19 +++++++++++------ + bgpd/bgp_updgrp.h | 2 +- + bgpd/bgp_updgrp_adv.c | 53 ++++++++++++++++++++++++++-------------------- + 4 files changed, 47 insertions(+), 32 deletions(-) + +diff --git a/bgpd/bgp_conditional_adv.c b/bgpd/bgp_conditional_adv.c +index 4ad00ed121..89ea85ff46 100644 +--- a/bgpd/bgp_conditional_adv.c ++++ b/bgpd/bgp_conditional_adv.c +@@ -134,8 +134,9 @@ static void bgp_conditional_adv_routes(struct peer *peer, afi_t afi, + if (update_type == UPDATE_TYPE_ADVERTISE && + subgroup_announce_check(dest, pi, subgrp, dest_p, + &attr, &advmap_attr)) { +- bgp_adj_out_set_subgroup(dest, subgrp, &attr, +- pi); ++ if (!bgp_adj_out_set_subgroup(dest, subgrp, ++ &attr, pi)) ++ bgp_attr_flush(&attr); + } else { + /* If default originate is enabled for + * the peer, do not send explicit +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c +index a7a5c9849a..157bfa8a2b 100644 +--- a/bgpd/bgp_route.c ++++ b/bgpd/bgp_route.c +@@ -2917,16 +2917,23 @@ void subgroup_process_announce_selected(struct update_subgroup *subgrp, + * in FIB, then it is advertised + */ + if (advertise) { +- if (!bgp_check_withdrawal(bgp, dest)) +- bgp_adj_out_set_subgroup( +- dest, subgrp, &attr, selected); +- else ++ if (!bgp_check_withdrawal(bgp, dest)) { ++ if (!bgp_adj_out_set_subgroup( ++ dest, subgrp, &attr, ++ selected)) ++ bgp_attr_flush(&attr); ++ } else { + bgp_adj_out_unset_subgroup( + dest, subgrp, 1, addpath_tx_id); +- } +- } else ++ bgp_attr_flush(&attr); ++ } ++ } else ++ bgp_attr_flush(&attr); ++ } else { + bgp_adj_out_unset_subgroup(dest, subgrp, 1, + addpath_tx_id); ++ bgp_attr_flush(&attr); ++ } + } + + /* If selected is NULL we must withdraw the path using addpath_tx_id */ +diff --git a/bgpd/bgp_updgrp.h b/bgpd/bgp_updgrp.h +index e27c1e7b67..b7b6aa07e9 100644 +--- a/bgpd/bgp_updgrp.h ++++ b/bgpd/bgp_updgrp.h +@@ -458,7 +458,7 @@ extern struct bgp_adj_out *bgp_adj_out_alloc(struct update_subgroup *subgrp, + extern void bgp_adj_out_remove_subgroup(struct bgp_dest *dest, + struct bgp_adj_out *adj, + struct update_subgroup *subgrp); +-extern void bgp_adj_out_set_subgroup(struct bgp_dest *dest, ++extern bool bgp_adj_out_set_subgroup(struct bgp_dest *dest, + struct update_subgroup *subgrp, + struct attr *attr, + struct bgp_path_info *path); +diff --git a/bgpd/bgp_updgrp_adv.c b/bgpd/bgp_updgrp_adv.c +index af8ef751da..301a8b267e 100644 +--- a/bgpd/bgp_updgrp_adv.c ++++ b/bgpd/bgp_updgrp_adv.c +@@ -454,7 +454,7 @@ bgp_advertise_clean_subgroup(struct update_subgroup *subgrp, + return next; + } + +-void bgp_adj_out_set_subgroup(struct bgp_dest *dest, ++bool bgp_adj_out_set_subgroup(struct bgp_dest *dest, + struct update_subgroup *subgrp, struct attr *attr, + struct bgp_path_info *path) + { +@@ -474,7 +474,7 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, + bgp = SUBGRP_INST(subgrp); + + if (DISABLE_BGP_ANNOUNCE) +- return; ++ return false; + + /* Look for adjacency information. */ + adj = adj_lookup( +@@ -490,7 +490,7 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, + bgp_addpath_id_for_peer(peer, afi, safi, + &path->tx_addpath)); + if (!adj) +- return; ++ return false; + + subgrp->pscount++; + } +@@ -529,7 +529,7 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, + * will never be able to coalesce the 3rd peer down + */ + subgrp->version = MAX(subgrp->version, dest->version); +- return; ++ return false; + } + + if (adj->adv) +@@ -576,6 +576,8 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, + bgp_adv_fifo_add_tail(&subgrp->sync->update, adv); + + subgrp->version = MAX(subgrp->version, dest->version); ++ ++ return true; + } + + /* The only time 'withdraw' will be false is if we are sending +@@ -668,7 +670,7 @@ void subgroup_announce_table(struct update_subgroup *subgrp, + { + struct bgp_dest *dest; + struct bgp_path_info *ri; +- struct attr attr; ++ struct attr attr = {0}; + struct peer *peer; + afi_t afi; + safi_t safi; +@@ -715,19 +717,24 @@ void subgroup_announce_table(struct update_subgroup *subgrp, + &attr, NULL)) { + /* Check if route can be advertised */ + if (advertise) { +- if (!bgp_check_withdrawal(bgp, dest)) +- bgp_adj_out_set_subgroup( +- dest, subgrp, &attr, +- ri); +- else ++ if (!bgp_check_withdrawal(bgp, dest)) { ++ if (!bgp_adj_out_set_subgroup( ++ dest, subgrp, &attr, ++ ri)) ++ bgp_attr_flush(&attr); ++ } else { ++ bgp_attr_flush(&attr); + bgp_adj_out_unset_subgroup( + dest, subgrp, 1, + bgp_addpath_id_for_peer( + peer, afi, + safi_rib, + &ri->tx_addpath)); +- } ++ } ++ } else ++ bgp_attr_flush(&attr); + } else { ++ bgp_attr_flush(&attr); + /* If default originate is enabled for + * the peer, do not send explicit + * withdraw. This will prevent deletion +@@ -947,18 +954,18 @@ void subgroup_default_originate(struct update_subgroup *subgrp, int withdraw) + if (dest) { + for (pi = bgp_dest_get_bgp_path_info(dest); pi; + pi = pi->next) { +- if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) +- if (subgroup_announce_check( +- dest, pi, subgrp, +- bgp_dest_get_prefix(dest), +- &attr, NULL)) { +- struct attr *default_attr = +- bgp_attr_intern(&attr); +- +- bgp_adj_out_set_subgroup( +- dest, subgrp, +- default_attr, pi); +- } ++ if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) ++ continue; ++ ++ if (subgroup_announce_check( ++ dest, pi, subgrp, ++ bgp_dest_get_prefix(dest), &attr, ++ NULL)) { ++ if (!bgp_adj_out_set_subgroup( ++ dest, subgrp, &attr, pi)) ++ bgp_attr_flush(&attr); ++ } else ++ bgp_attr_flush(&attr); + } + bgp_dest_unlock_node(dest); + } +-- +2.14.1 + + +From 761907075520aa3fae70a8d18fa717a24d3cbd65 Mon Sep 17 00:00:00 2001 +From: Donald Sharp +Date: Wed, 13 Mar 2024 10:26:58 -0400 +Subject: [PATCH 2/2] bgpd: Ensure that the correct aspath is free'd + +Currently in subgroup_default_originate the attr.aspath +is set in bgp_attr_default_set, which hashs the aspath +and creates a refcount for it. If this is a withdraw +the subgroup_announce_check and bgp_adj_out_set_subgroup +is called which will intern the attribute. This will +cause the the attr.aspath to be set to a new value +finally at the bottom of the function it intentionally +uninterns the aspath which is not the one that was +created for this function. This reduces the other +aspath's refcount by 1 and if a clear bgp * is issued +fast enough the aspath for that will be removed +and the system will crash. + +Signed-off-by: Donald Sharp +--- + bgpd/bgp_updgrp_adv.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/bgpd/bgp_updgrp_adv.c b/bgpd/bgp_updgrp_adv.c +index 301a8b267e..75e377f9a1 100644 +--- a/bgpd/bgp_updgrp_adv.c ++++ b/bgpd/bgp_updgrp_adv.c +@@ -817,6 +817,7 @@ void subgroup_default_originate(struct update_subgroup *subgrp, int withdraw) + struct bgp *bgp; + struct attr attr; + struct attr *new_attr = &attr; ++ struct aspath *aspath; + struct prefix p; + struct peer *from; + struct bgp_dest *dest; +@@ -854,6 +855,7 @@ void subgroup_default_originate(struct update_subgroup *subgrp, int withdraw) + /* make coverity happy */ + assert(attr.aspath); + ++ aspath = attr.aspath; + attr.med = 0; + attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC); + +@@ -1009,7 +1011,7 @@ void subgroup_default_originate(struct update_subgroup *subgrp, int withdraw) + } + } + +- aspath_unintern(&attr.aspath); ++ aspath_unintern(&aspath); + } + + /* +-- +2.14.1 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index 3c9c6e6acf25..fcc15bac8ffc 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -33,3 +33,4 @@ cross-compile-changes.patch 0034-fpm-Use-vrf_id-for-vrf-not-tabled_id.patch 0035-fpm-ignore-route-from-default-table.patch 0036-Add-support-of-bgp-l3vni-evpn.patch +0037-bgp-community-memory-leak-fix.patch