Skip to content

Commit

Permalink
bgpd:'bgpd' core generated on Leaf device with system-test config
Browse files Browse the repository at this point in the history
Description:
aggregate member route was enqueued for recalculation
    while bgp instance was deleted.
    As part of aggregate member route deletion, the aggregate route is
    reinstalled with self-peer as source, but self-peer is already removed.
    Assert() for null peer pointer is path attribute aborts bgp.
Problem Description/Summary :
BGP crashed while cleaning up aggregate route as part of bgp instance deletion.
-----------------------
Leaf-4(config)#
Leaf-4(config)# no router bgp 65179 vrf Vrf-red
Leaf-4(config)# no router bgp 65179
Leaf-4(config)#
Leaf-4(config)#
Leaf-4(config)# root@Leaf-4:~#

Sep 26 15:38:21.257554 System is not ready - Core services are down
------------
router bgp 65179
bgp router-id 100.2.0.3
no bgp default ipv4-unicast
bgp network import-check
neighbor LeafToHostv4 peer-group
neighbor LeafToHostv4 remote-as 65003
neighbor LeafToHostv6 peer-group
neighbor LeafToHostv6 remote-as 65003
neighbor LeafToSpinev4 peer-group
neighbor LeafToSpinev4 remote-as 65134
neighbor LeafToSpinev4 bfd
neighbor LeafToSpinev6 peer-group
neighbor LeafToSpinev6 remote-as 65134
neighbor LeafToSpinev6 bfd
neighbor WindowsServer peer-group
neighbor WindowsServer remote-as 65201
neighbor 155.1.0.4 peer-group LeafToSpinev4
neighbor 155.2.0.4 peer-group LeafToSpinev4
neighbor 2000:155:1::4 peer-group LeafToSpinev6
neighbor 2000:155:2::4 peer-group LeafToSpinev6
neighbor 172.16.11.2 peer-group WindowsServer
neighbor 172.16.1.2 remote-as 65101
neighbor 2000:172:16:1::2 remote-as 65101
bgp listen limit 400
bgp listen range 133.3.0.0/16 peer-group LeafToHostv4
bgp listen range 2000:133:3::/48 peer-group LeafToHostv6
!
address-family ipv4 unicast
aggregate-address 133.1.0.0/16 as-set
aggregate-address 133.2.0.0/16 as-set
aggregate-address 133.3.0.0/16 as-set
aggregate-address 133.4.0.0/16 as-set
redistribute connected
neighbor LeafToHostv4 activate
neighbor LeafToSpinev4 activate
neighbor LeafToSpinev4 allowas-in 1
neighbor LeafToSpinev4 route-map spine_v4_export out
neighbor WindowsServer activate
neighbor 172.16.1.2 activate
exit-address-family
!
address-family ipv6 unicast
aggregate-address 2000:133:1::/48 as-set
aggregate-address 2000:133:2::/48 as-set
aggregate-address 2000:133:3::/48 as-set
aggregate-address 2000:133:4::/48 as-set
redistribute connected
..
------------
(gdb) bt
name=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add")
at bgpd/bgpd.c:1159
name=name@entry=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add",
peer=<optimized out>) at bgpd/bgpd.c:1158
pi=<optimized out>) at bgpd/bgp_route.c:313
afi=afi@entry=AFI_IP, safi=safi@entry=SAFI_UNICAST,
p=p@entry=0x55607f1c4e10, origin=<optimized out>, aspath=0x55607f4bc8a0,
community=<optimized out>, ecommunity=<optimized out>,
lcommunity=<optimized out>, atomic_aggregate=0 '\000',
aggregate=0x55607f1c4ee0) at bgpd/bgp_route.c:5926
aggr_p=<optimized out>, aggregate=<optimized out>, pi=0x55607f41f9f0,
safi=SAFI_UNICAST, afi=AFI_IP, bgp=0x55607eeba5d0) at bgpd/bgp_route.c:6385
del=del@entry=0x55607f41f9f0, afi=afi@entry=AFI_IP,
--Type <return> to continue, or q <return> to quit--
safi=safi@entry=SAFI_UNICAST) at bgpd/bgp_route.c:6446
pi=0x55607f41f9f0, peer=0x55607ef22c10, afi=AFI_IP, safi=SAFI_UNICAST)
at bgpd/bgp_route.c:2885
data=<optimized out>) at bgpd/bgp_route.c:4125
at lib/workqueue.c:291
at lib/thread.c:1540
at bgpd/bgp_main.c:498
(gdb) fr 5
name=name@entry=0x55607dd49090 <_FUNCTION_.23915> "bgp_path_info_add",
peer=<optimized out>) at bgpd/bgpd.c:1158
1158 bgpd/bgpd.c: No such file or directory.
(gdb) fr 10
pi=0x55607f41f9f0, peer=0x55607ef22c10, afi=AFI_IP, safi=SAFI_UNICAST)
at bgpd/bgp_route.c:2885
2885 bgpd/bgp_route.c: No such file or directory.
(gdb) p peer->lock
$2 = 210
(gdb) p peer->status
$3 = 8
(gdb)
(gdb) p bgp
$11 = (struct bgp *) 0x56121ba315d0
(gdb) p bgp->peer_self
$12 = (struct peer *) 0x0
(gdb) p bgp->name
$13 = 0x0
(gdb) p bgp->name_pretty
$14 = 0x56121bb046a0 "VRF default"
(gdb) p bgp->inst_type
$15 = BGP_INSTANCE_TYPE_DEFAULT
(gdb)

bgp_aggregate_install():
5920
5921 new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, 0,
5922 bgp->peer_self, attr, rn);
5923
5924 SET_FLAG(new->flags, BGP_PATH_VALID);
5925
5926 bgp_path_info_add(rn, new);
5927 bgp_process(bgp, rn, afi, safi);

299 void bgp_path_info_add(struct bgp_node *rn, struct bgp_path_info *pi):
...
310
311 bgp_path_info_lock(pi);
312 bgp_lock_node(rn);
313 peer_lock(pi->peer); /* bgp_path_info peer reference */ <<< This points to bgp->peer_self = NULL
314 }

1573 #define peer_lock(B) peer_lock_with_caller(_FUNCTION_, (B))

1156 /* increase reference count on a struct peer */
1157 struct peer *peer_lock_with_caller(const char *name, struct peer *peer)
1158 {
1159 assert(peer && (peer->lock >= 0)); <<< asserted here
1160

Similar issue was fixed in community and we already have the fix:
#4816
root@sr407497_lxc2:/home/ubuntu/frr_repo/frr/bgpd# git diff dfb6fd1~ dfb6fd1
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index abad1db..a372568 100644
— a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -5332,6 +5332,13 @@ static void bgp_purge_af_static_redist_routes(struct bgp *bgp, afi_t afi,
struct bgp_node *rn;
struct bgp_path_info *pi;

+ /* Do not install the aggregate route if BGP is in the
+ * process of termination.
+ */
+ if (bgp_flag_check(bgp, BGP_FLAG_DELETE_IN_PROGRESS) ||
+ (bgp->peer_self == NULL))
+ return;
+
table = bgp->rib[afi][safi];
for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) {

But looks like similar handling is required at other places as well:

Expected Behavior :
BGP daemon should not crash

Signed-off-by: sudhanshukumar22 <sudhanshu.kumar@broadcom.com>
  • Loading branch information
sudhanshukumar22 committed Jan 18, 2021
1 parent 849909e commit dfb19e9
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions bgpd/bgp_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -6657,6 +6657,9 @@ static void bgp_aggregate_install(

if (!attr) {
bgp_aggregate_delete(bgp, p, afi, safi, aggregate);
if (BGP_DEBUG(update_groups, UPDATE_GROUPS))
zlog_debug("%s(%pFX): null attribute", __func__,
p);
return;
}

Expand Down Expand Up @@ -7175,6 +7178,13 @@ static void bgp_add_route_to_aggregate(struct bgp *bgp,
struct ecommunity *ecommunity = NULL;
struct lcommunity *lcommunity = NULL;

/* If the bgp instance is being deleted or self peer is deleted
* then do not create aggregate route
*/
if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)
|| (bgp->peer_self == NULL))
return;

/* ORIGIN attribute: If at least one route among routes that are
* aggregated has ORIGIN with the value INCOMPLETE, then the
* aggregated route must have the ORIGIN attribute with the value
Expand Down Expand Up @@ -7291,6 +7301,13 @@ static void bgp_remove_route_from_aggregate(struct bgp *bgp, afi_t afi,
struct lcommunity *lcommunity = NULL;
unsigned long match = 0;

/* If the bgp instance is being deleted or self peer is deleted
* then do not create aggregate route
*/
if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)
|| (bgp->peer_self == NULL))
return;

if (BGP_PATH_HOLDDOWN(pi))
return;

Expand Down Expand Up @@ -7487,6 +7504,13 @@ int bgp_aggregate_unset(struct bgp *bgp, struct prefix *prefix, afi_t afi,
struct bgp_dest *dest;
struct bgp_aggregate *aggregate;

/* If the bgp instance is being deleted or self peer is deleted
* then do not create aggregate route
*/
if (CHECK_FLAG(bgp->flags, BGP_FLAG_DELETE_IN_PROGRESS)
|| (bgp->peer_self == NULL))
return 0;

apply_mask(prefix);
/* Old configuration check. */
dest = bgp_node_lookup(bgp->aggregate[afi][safi], prefix);
Expand Down

0 comments on commit dfb19e9

Please sign in to comment.