From 5a589217f873efaa7684a994c0c2b4ffebe28409 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 6 Sep 2024 14:29:45 -0400 Subject: [PATCH 1/5] tests: When finding nexthops ensure that they are active Do not accept a nexthop as valid unless it is marked as being active. Signed-off-by: Donald Sharp --- .../bgp_default_originate/test_default_orginate_vrf.py | 8 +------- tests/topotests/lib/common_config.py | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/topotests/bgp_default_originate/test_default_orginate_vrf.py b/tests/topotests/bgp_default_originate/test_default_orginate_vrf.py index 1506b02e5d..905c3e2b66 100644 --- a/tests/topotests/bgp_default_originate/test_default_orginate_vrf.py +++ b/tests/topotests/bgp_default_originate/test_default_orginate_vrf.py @@ -546,13 +546,7 @@ def test_verify_default_originate_route_with_non_default_VRF_p1(request): tc_name, result ) - result = verify_rib( - tgen, - addr_type, - "r2", - static_routes_input, - next_hop=DEFAULT_ROUTE_NXT_HOP_R1[addr_type], - ) + result = verify_rib(tgen, addr_type, "r2", static_routes_input) assert result is True, "Testcase {} : Failed \n Error: {}".format( tc_name, result ) diff --git a/tests/topotests/lib/common_config.py b/tests/topotests/lib/common_config.py index 540a627c65..2cee5fdaed 100644 --- a/tests/topotests/lib/common_config.py +++ b/tests/topotests/lib/common_config.py @@ -3371,7 +3371,7 @@ def verify_rib( found_hops = [ rib_r["ip"] for rib_r in rib_routes_json[st_rt][0]["nexthops"] - if "ip" in rib_r + if "ip" in rib_r and "active" in rib_r ] # If somehow key "ip" is not found in nexthops JSON From 1bbbcf043b1896b21cc303c37c99732858b6e1ed Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 7 Feb 2024 14:56:15 -0500 Subject: [PATCH 2/5] zebra: Properly note that a nhg's nexthop has gone down Current code when a link is set down is to just mark the nexthop group as not properly setup. Leaving situations where when an interface goes down and show output is entered we see incorrect state. This is true for anything that would be checking those flags at that point in time. Modify the interface down nexthop group code to notice the nexthops appropriately ( and I mean set the appropriate flags ) and to allow a `show ip route` command to actually display what is going on with the nexthops. eva# show ip route 1.0.0.0 Routing entry for 1.0.0.0/32 Known via "sharp", distance 150, metric 0, best Last update 00:00:06 ago * 192.168.44.33, via dummy1, weight 1 * 192.168.45.33, via dummy2, weight 1 sharpd@eva:~/frr1$ sudo ip link set dummy2 down eva# show ip route 1.0.0.0 Routing entry for 1.0.0.0/32 Known via "sharp", distance 150, metric 0, best Last update 00:00:12 ago * 192.168.44.33, via dummy1, weight 1 192.168.45.33, via dummy2 inactive, weight 1 Notice now that the 1.0.0.0/32 route now correctly displays the route for the nexthop group entry. Signed-off-by: Donald Sharp --- zebra/zebra_nhg.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 637eabde8d..3dc4f24374 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -1101,11 +1101,15 @@ void zebra_nhg_check_valid(struct nhg_hash_entry *nhe) bool valid = false; /* - * If I have other nhe's depending on me, then this is a - * singleton nhe so set this nexthops flag as appropriate. + * If I have other nhe's depending on me, or I have nothing + * I am depending on then this is a + * singleton nhe so set this nexthops flag as appropriate. */ - if (nhg_connected_tree_count(&nhe->nhg_depends)) + if (nhg_connected_tree_count(&nhe->nhg_depends) || + nhg_connected_tree_count(&nhe->nhg_dependents) == 0) { + UNSET_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_FIB); UNSET_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE); + } /* If anthing else in the group is valid, the group is valid */ frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { From ce166ca78928d31200461ec841ac597ac97c52e9 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 11 Sep 2024 13:58:32 -0400 Subject: [PATCH 3/5] zebra: Expose _route_entry_dump_nh so it can be used. Expose this helper function so it can be used in zebra_nhg.c Signed-off-by: Donald Sharp --- zebra/rib.h | 4 ++++ zebra/zebra_rib.c | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/zebra/rib.h b/zebra/rib.h index 4293b5f240..071cc7b3de 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -637,6 +637,10 @@ extern pid_t pid; extern uint32_t rt_table_main_id; +void route_entry_dump_nh(const struct route_entry *re, const char *straddr, + const struct vrf *re_vrf, + const struct nexthop *nexthop); + /* Name of hook calls */ #define ZEBRA_ON_RIB_PROCESS_HOOK_CALL "on_rib_process_dplane_results" diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 402a3104b9..cba626490e 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -4118,9 +4118,8 @@ void rib_delnode(struct route_node *rn, struct route_entry *re) /* * Helper that debugs a single nexthop within a route-entry */ -static void _route_entry_dump_nh(const struct route_entry *re, - const char *straddr, const struct vrf *re_vrf, - const struct nexthop *nexthop) +void route_entry_dump_nh(const struct route_entry *re, const char *straddr, + const struct vrf *re_vrf, const struct nexthop *nexthop) { char nhname[PREFIX_STRLEN]; char backup_str[50]; @@ -4243,7 +4242,7 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, /* Dump nexthops */ for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) - _route_entry_dump_nh(re, straddr, vrf, nexthop); + route_entry_dump_nh(re, straddr, vrf, nexthop); if (zebra_nhg_get_backup_nhg(re->nhe)) { zlog_debug("%s(%s): backup nexthops:", straddr, @@ -4251,7 +4250,7 @@ void _route_entry_dump(const char *func, union prefixconstptr pp, nhg = zebra_nhg_get_backup_nhg(re->nhe); for (ALL_NEXTHOPS_PTR(nhg, nexthop)) - _route_entry_dump_nh(re, straddr, vrf, nexthop); + route_entry_dump_nh(re, straddr, vrf, nexthop); } zlog_debug("%s(%s): dump complete", straddr, VRF_LOGNAME(vrf)); From 3be8b48e6bab845a57b9bee83158f5b5bdf10555 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 11 Sep 2024 14:19:51 -0400 Subject: [PATCH 4/5] zebra: Reinstall nexthop when interface comes back up If a interface down event caused a nexthop group to remove one of the entries in the kernel, have it be reinstalled when the interface comes back up. Mark the nexthop as usable. new behavior: eva# show nexthop-group rib 181818168 ID: 181818168 (sharp) RefCnt: 1 Uptime: 00:00:23 VRF: default(bad-value) Valid, Installed Depends: (35) (38) (44) (51) via 192.168.99.33, dummy1 (vrf default), weight 1 via 192.168.100.33, dummy2 (vrf default), weight 1 via 192.168.101.33, dummy3 (vrf default), weight 1 via 192.168.102.33, dummy4 (vrf default), weight 1 eva# conf eva(config)# int dummy3 eva(config-if)# shut eva(config-if)# do show nexthop-group rib 181818168 ID: 181818168 (sharp) RefCnt: 1 Uptime: 00:00:44 VRF: default(bad-value) Depends: (35) (38) (44) (51) via 192.168.99.33, dummy1 (vrf default), weight 1 via 192.168.100.33, dummy2 (vrf default), weight 1 via 192.168.101.33, dummy3 (vrf default) inactive, weight 1 via 192.168.102.33, dummy4 (vrf default), weight 1 eva(config-if)# no shut eva(config-if)# do show nexthop-group rib 181818168 ID: 181818168 (sharp) RefCnt: 1 Uptime: 00:00:53 VRF: default(bad-value) Valid, Installed Depends: (35) (38) (44) (51) via 192.168.99.33, dummy1 (vrf default), weight 1 via 192.168.100.33, dummy2 (vrf default), weight 1 via 192.168.101.33, dummy3 (vrf default), weight 1 via 192.168.102.33, dummy4 (vrf default), weight 1 eva(config-if)# exit eva(config)# exit eva# exit sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 181818168 id 181818168 group 35/38/44/51 proto 194 sharpd@eva ~/frr1 (master)> Signed-off-by: Donald Sharp --- zebra/zebra_nhg.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 3dc4f24374..6493ca6aff 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -3782,6 +3782,17 @@ void zebra_interface_nhg_reinstall(struct interface *ifp) frr_each_safe (nhg_connected_tree, &rb_node_dep->nhe->nhg_dependents, rb_node_dependent) { + struct nexthop *nhop_dependent = + rb_node_dependent->nhe->nhg.nexthop; + + while (nhop_dependent && + !nexthop_same(nhop_dependent, nh)) + nhop_dependent = nhop_dependent->next; + + if (nhop_dependent) + SET_FLAG(nhop_dependent->flags, + NEXTHOP_FLAG_ACTIVE); + if (IS_ZEBRA_DEBUG_NHG) zlog_debug("%s dependent nhe %pNG Setting Reinstall flag", __func__, From f02d76f0fd1085c522bc89f5c3aae8d03230ab6e Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 11 Sep 2024 14:24:27 -0400 Subject: [PATCH 5/5] zebra: Attempt to reuse NHG after interface up and route reinstall The previous commit modified zebra to reinstall the singleton nexthops for a nexthop group when a interface event comes up. Now let's modify zebra to attempt to reuse the nexthop group when this happens and the upper level protocol resends the route down with that. Only match if the protocol is the same as well as the instance and the nexthop groups would match. Here is the new behavior: eva(config)# do show ip route 9.9.9.9/32 Routing entry for 9.9.9.9/32 Known via "static", distance 1, metric 0, best Last update 00:00:08 ago * 192.168.99.33, via dummy1, weight 1 * 192.168.100.33, via dummy2, weight 1 * 192.168.101.33, via dummy3, weight 1 * 192.168.102.33, via dummy4, weight 1 eva(config)# do show ip route nexthop-group 9.9.9.9/32 % Unknown command: do show ip route nexthop-group 9.9.9.9/32 eva(config)# do show ip route 9.9.9.9/32 nexthop-group Routing entry for 9.9.9.9/32 Known via "static", distance 1, metric 0, best Last update 00:00:54 ago Nexthop Group ID: 57 * 192.168.99.33, via dummy1, weight 1 * 192.168.100.33, via dummy2, weight 1 * 192.168.101.33, via dummy3, weight 1 * 192.168.102.33, via dummy4, weight 1 eva(config)# exit eva# conf eva(config)# int dummy3 eva(config-if)# shut eva(config-if)# no shut eva(config-if)# do show ip route 9.9.9.9/32 nexthop-group Routing entry for 9.9.9.9/32 Known via "static", distance 1, metric 0, best Last update 00:00:08 ago Nexthop Group ID: 57 * 192.168.99.33, via dummy1, weight 1 * 192.168.100.33, via dummy2, weight 1 * 192.168.101.33, via dummy3, weight 1 * 192.168.102.33, via dummy4, weight 1 eva(config-if)# exit eva(config)# exit eva# exit sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 57 id 57 group 37/43/50/58 proto zebra sharpd@eva ~/frr1 (master)> ip route show 9.9.9.9/32 9.9.9.9 nhid 57 proto 196 metric 20 nexthop via 192.168.99.33 dev dummy1 weight 1 nexthop via 192.168.100.33 dev dummy2 weight 1 nexthop via 192.168.101.33 dev dummy3 weight 1 nexthop via 192.168.102.33 dev dummy4 weight 1 sharpd@eva ~/frr1 (master)> Notice that we now no longer are creating a bunch of new nexthop groups. Signed-off-by: Donald Sharp --- zebra/zebra_nhg.c | 158 +++++++++++++++++++++++++++++++++++++++++++++- zebra/zebra_nhg.h | 3 +- zebra/zebra_rib.c | 2 +- 3 files changed, 159 insertions(+), 4 deletions(-) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index 6493ca6aff..b5b8e22552 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -1103,7 +1103,7 @@ void zebra_nhg_check_valid(struct nhg_hash_entry *nhe) /* * If I have other nhe's depending on me, or I have nothing * I am depending on then this is a - * singleton nhe so set this nexthops flag as appropriate. + * singleton nhe so set this nexthops flag as appropriate. */ if (nhg_connected_tree_count(&nhe->nhg_depends) || nhg_connected_tree_count(&nhe->nhg_dependents) == 0) { @@ -2924,6 +2924,154 @@ static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg) return curr_active; } +/* + * This function takes the start of two comparable nexthops from two different + * nexthop groups and walks them to see if they can be considered the same + * or not. This is being used to determine if zebra should reuse a nhg + * from the old_re to the new_re, when an interface goes down and the + * new nhg sent down from the upper level protocol would resolve to it + */ +static bool zebra_nhg_nexthop_compare(const struct nexthop *nhop, + const struct nexthop *old_nhop, + const struct route_node *rn) +{ + bool same = true; + + while (nhop && old_nhop) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: %pRN Comparing %pNHvv(%u) to old: %pNHvv(%u)", + __func__, rn, nhop, nhop->flags, old_nhop, + old_nhop->flags); + if (!CHECK_FLAG(old_nhop->flags, NEXTHOP_FLAG_ACTIVE)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: %pRN Old is not active going to the next one", + __func__, rn); + old_nhop = old_nhop->next; + continue; + } + + if (nexthop_same(nhop, old_nhop)) { + struct nexthop *new_recursive, *old_recursive; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: %pRN New and old are same, continuing search", + __func__, rn); + + new_recursive = nhop->resolved; + old_recursive = old_nhop->resolved; + + while (new_recursive && old_recursive) { + if (!nexthop_same(new_recursive, old_recursive)) { + same = false; + break; + } + + new_recursive = new_recursive->next; + old_recursive = old_recursive->next; + } + + if (new_recursive) + same = false; + else if (old_recursive) { + while (old_recursive) { + if (CHECK_FLAG(old_recursive->flags, + NEXTHOP_FLAG_ACTIVE)) + break; + old_recursive = old_recursive->next; + } + + if (old_recursive) + same = false; + } + + if (!same) + break; + + nhop = nhop->next; + old_nhop = old_nhop->next; + continue; + } else { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s:%pRN They are not the same, stopping using new nexthop entry", + __func__, rn); + same = false; + break; + } + } + + if (nhop) + same = false; + else if (old_nhop) { + while (old_nhop) { + if (CHECK_FLAG(old_nhop->flags, NEXTHOP_FLAG_ACTIVE)) + break; + old_nhop = old_nhop->next; + } + + if (old_nhop) + same = false; + } + + return same; +} + +static struct nhg_hash_entry *zebra_nhg_rib_compare_old_nhe( + const struct route_node *rn, const struct route_entry *re, + struct nhg_hash_entry *new_nhe, struct nhg_hash_entry *old_nhe) +{ + struct nexthop *nhop, *old_nhop; + bool same = true; + struct vrf *vrf = vrf_lookup_by_id(re->vrf_id); + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) { + char straddr[PREFIX_STRLEN]; + + prefix2str(&rn->p, straddr, sizeof(straddr)); + zlog_debug("%s: %pRN new id: %u old id: %u", __func__, rn, + new_nhe->id, old_nhe->id); + zlog_debug("%s: %pRN NEW", __func__, rn); + for (ALL_NEXTHOPS(new_nhe->nhg, nhop)) + route_entry_dump_nh(re, straddr, vrf, nhop); + + zlog_debug("%s: %pRN OLD", __func__, rn); + for (ALL_NEXTHOPS(old_nhe->nhg, nhop)) + route_entry_dump_nh(re, straddr, vrf, nhop); + } + + nhop = new_nhe->nhg.nexthop; + old_nhop = old_nhe->nhg.nexthop; + + same = zebra_nhg_nexthop_compare(nhop, old_nhop, rn); + + if (same) { + struct nexthop_group *bnhg, *old_bnhg; + + bnhg = zebra_nhg_get_backup_nhg(new_nhe); + old_bnhg = zebra_nhg_get_backup_nhg(old_nhe); + + if (bnhg || old_bnhg) { + if (bnhg && !old_bnhg) + same = false; + else if (!bnhg && old_bnhg) + same = false; + else + same = zebra_nhg_nexthop_compare(bnhg->nexthop, + old_bnhg->nexthop, + rn); + } + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s:%pRN They are %sthe same, using the %s nhg entry", + __func__, rn, same ? "" : "not ", + same ? "old" : "new"); + + if (same) + return old_nhe; + else + return new_nhe; +} + /* * Iterate over all nexthops of the given RIB entry and refresh their * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag, @@ -2931,7 +3079,8 @@ static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg) * * Return value is the new number of active nexthops. */ -int nexthop_active_update(struct route_node *rn, struct route_entry *re) +int nexthop_active_update(struct route_node *rn, struct route_entry *re, + struct route_entry *old_re) { struct nhg_hash_entry *curr_nhe; uint32_t curr_active = 0, backup_active = 0; @@ -2987,6 +3136,11 @@ backups_done: new_nhe = zebra_nhg_rib_find_nhe(curr_nhe, rt_afi); + if (old_re && old_re->type == re->type && + old_re->instance == re->instance) + new_nhe = zebra_nhg_rib_compare_old_nhe(rn, re, new_nhe, + old_re->nhe); + if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug( "%s: re %p CHANGED: nhe %p (%pNG) => new_nhe %p (%pNG)", diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index 712c1057a1..435ccb0d01 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -404,7 +404,8 @@ extern void zebra_nhg_mark_keep(void); /* Nexthop resolution processing */ struct route_entry; /* Forward ref to avoid circular includes */ -extern int nexthop_active_update(struct route_node *rn, struct route_entry *re); +extern int nexthop_active_update(struct route_node *rn, struct route_entry *re, + struct route_entry *old_re); #ifdef _FRR_ATTRIBUTE_PRINTFRR #pragma FRR printfrr_ext "%pNG" (const struct nhg_hash_entry *) diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index cba626490e..721eca70a4 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -1313,7 +1313,7 @@ static void rib_process(struct route_node *rn) */ if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) { proto_re_changed = re; - if (!nexthop_active_update(rn, re)) { + if (!nexthop_active_update(rn, re, old_fib)) { const struct prefix *p; struct rib_table_info *info;