From 82beaf6ae520f1c969755ae0ee82d2af2f150927 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Thu, 23 Nov 2023 14:19:04 +0100 Subject: [PATCH 1/6] sharpd: fix deleting nhid when suppressing nexthop from nh group When removing the last nexthop from a nexthop-group, the nexthop group remains in the zebra contexts: > ubuntu2204(config)# nexthop-group gdgd > 2023/11/23 14:06:36 SHARP: [Q5NBA-GN1BG] NHG ID assigned: 179687502 > ubuntu2204(config-nh-group)# nexthop 192.0.2.7 loop1 > ubuntu2204(config-nh-group)# 2023/11/23 14:06:38 ZEBRA: [VNMVB-91G3G] _netlink_nexthop_build_group: ID (179687502): group 338 > 2023/11/23 14:06:38 ZEBRA: [R43C6-KYHWT] netlink_nexthop_msg_encode: RTM_NEWNEXTHOP, id=179687502 > 2023/11/23 14:06:38 ZEBRA: [HYEHE-CQZ9G] nl_batch_send: netlink-dp (NS 0), batch size=44, msg cnt=1 > 2023/11/23 14:06:38 SHARP: [JWRCN-N9K90] Installed nhg 179687502 > > ubuntu2204(config-nh-group)# no nexthop 192.0.2.7 loop1 > 2023/11/23 14:06:47 SHARP: [Y2G2F-ZTW6M] nhg_add: nhg 179687502 not sent: no valid nexthops > ubuntu2204(config-nh-group)# do show nexthop-group rib 179687502 > ID: 179687502 (sharp) > RefCnt: 1 > Uptime: 00:00:23 > VRF: default > Valid, Installed > Depends: (338) > via 192.0.2.7, loop1 (vrf default), weight 1 Fix this by sending an NHG_DEL message when no nexthops are attached, and when the id was already installed. Fixes: 5a9c0931aa95 ("sharpd: don't send invalid nexthop-groups to zebra") Signed-off-by: Philippe Guibert --- sharpd/sharp_zebra.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sharpd/sharp_zebra.c b/sharpd/sharp_zebra.c index aa720bacf2..9ff6ba99b6 100644 --- a/sharpd/sharp_zebra.c +++ b/sharpd/sharp_zebra.c @@ -563,9 +563,15 @@ void nhg_add(uint32_t id, const struct nexthop_group *nhg, } if (api_nhg.nexthop_num == 0) { - zlog_debug("%s: nhg %u not sent: no valid nexthops", __func__, - id); - is_valid = false; + if (sharp_nhgroup_id_is_installed(id)) { + zlog_debug("%s: nhg %u: no nexthops, deleting nexthop group", __func__, + id); + zclient_nhg_send(zclient, ZEBRA_NHG_DEL, &api_nhg); + } else { + zlog_debug("%s: nhg %u not sent: no valid nexthops", __func__, + id); + is_valid = false; + } goto done; } From 427d3f81f4a8c040b52ecb2e1d7ba4224b2d069c Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Mon, 4 Sep 2023 15:43:37 +0200 Subject: [PATCH 2/6] zebra: clarify error when calling zebra_nhg_rib_find_nhe() Display a specific log message when the rt_nhe parameter is not set at all. Signed-off-by: Philippe Guibert --- zebra/zebra_nhg.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index c38203df0f..c172f18cf1 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -1526,7 +1526,13 @@ zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi) { struct nhg_hash_entry *nhe = NULL; - if (!(rt_nhe && rt_nhe->nhg.nexthop)) { + if (!rt_nhe) { + flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED, + "No nhg_hash_entry passed to %s", __func__); + return NULL; + } + + if (!rt_nhe->nhg.nexthop) { flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED, "No nexthop passed to %s", __func__); return NULL; From d41b425ab3384f80b338a8ec3b407a277a8646e0 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Thu, 23 Nov 2023 17:18:23 +0100 Subject: [PATCH 3/6] zebra: add client counter for nhg operations Add three counters that account for the nhg operations that are using the zebra API with the NHG_ADD and NHG_DEL commands. > # show zebra client > [..] > Type Add Update Del > ================================================== > IPv4 100 0 0 > IPv6 0 0 0 > Redist:v4 0 0 0 > Redist:v6 0 0 0 > NHG 1 1 1 > VRF 3 0 0 > [..] Signed-off-by: Philippe Guibert --- zebra/zapi_msg.c | 10 +++++++++- zebra/zserv.c | 2 ++ zebra/zserv.h | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 98268dafa6..f7293d8f0f 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -1973,6 +1973,8 @@ static void zread_nhg_del(ZAPI_HANDLER_ARGS) zsend_nhg_notify(api_nhg.proto, client->instance, client->session_id, api_nhg.id, ZAPI_NHG_REMOVE_FAIL); + /* Stats */ + client->nhg_del_cnt++; } static void zread_nhg_add(ZAPI_HANDLER_ARGS) @@ -1981,7 +1983,7 @@ static void zread_nhg_add(ZAPI_HANDLER_ARGS) struct zapi_nhg api_nhg = {}; struct nexthop_group *nhg = NULL; struct nhg_backup_info *bnhg = NULL; - struct nhg_hash_entry *nhe; + struct nhg_hash_entry *nhe, *nhe_tmp; s = msg; if (zapi_nhg_decode(s, hdr->command, &api_nhg) < 0) { @@ -2039,6 +2041,12 @@ static void zread_nhg_add(ZAPI_HANDLER_ARGS) nexthop_group_delete(&nhg); zebra_nhg_backup_free(&bnhg); + /* Stats */ + nhe_tmp = zebra_nhg_lookup_id(api_nhg.id); + if (nhe_tmp) + client->nhg_upd8_cnt++; + else + client->nhg_add_cnt++; } static void zread_route_add(ZAPI_HANDLER_ARGS) diff --git a/zebra/zserv.c b/zebra/zserv.c index 2db228b158..1d3989dc73 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -1061,6 +1061,8 @@ static void zebra_show_client_detail(struct vty *vty, struct zserv *client) 0, client->redist_v4_del_cnt); vty_out(vty, "Redist:v6 %-12u%-12u%-12u\n", client->redist_v6_add_cnt, 0, client->redist_v6_del_cnt); + vty_out(vty, "NHG %-12u%-12u%-12u\n", client->nhg_add_cnt, + client->nhg_upd8_cnt, client->nhg_del_cnt); vty_out(vty, "VRF %-12u%-12u%-12u\n", client->vrfadd_cnt, 0, client->vrfdel_cnt); vty_out(vty, "Connected %-12u%-12u%-12u\n", client->ifadd_cnt, 0, diff --git a/zebra/zserv.h b/zebra/zserv.h index 90aa4d53f4..631145e4fb 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -185,6 +185,9 @@ struct zserv { uint32_t local_es_evi_add_cnt; uint32_t local_es_evi_del_cnt; uint32_t error_cnt; + uint32_t nhg_add_cnt; + uint32_t nhg_upd8_cnt; + uint32_t nhg_del_cnt; time_t nh_reg_time; time_t nh_dereg_time; From e91ef7727f7c8062f0fc78fe3f6bf5a654abd0b7 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Fri, 24 Nov 2023 16:38:31 +0100 Subject: [PATCH 4/6] zebra: fix wrong nexthop id debug message When allocating big protocol level identifiers, the number range is big, and when pushing to netlink messages, the first nexthop group is truncated, whereas the nexthop has been installed on the kernel. > ubuntu2204(config)# nexthop-group A > ubuntu2204(config-nh-group)# group 1 > ubuntu2204(config-nh-group)# group 2 > ubuntu2204(config-nh-group)# exi > ubuntu2204(config)# nexthop-group 1 > ubuntu2204(config-nh-group)# nexthop 192.0.2.130 loop1 enable-proto-nhg-control > ubuntu2204(config-nh-group)# exi > ubuntu2204(config)# nexthop-group 2 > ubuntu2204(config-nh-group)# nexthop 192.0.2.131 loop1 enable-proto-nhg-control > [..] > 2023/11/24 16:47:40 ZEBRA: [VNMVB-91G3G] _netlink_nexthop_build_group: ID (179687500): group 17968/179687502 > # ip nexthop ls > id 179687500 group 179687501/179687502 proto 194 Fix this by increasing the buffer size when appending the first number. Fixes: 8d03bc501b5f ("zebra: Handle nhg_hash_entry encaps/more debugging") Signed-off-by: Philippe Guibert --- zebra/rt_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 477119d1ed..68b89454a5 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -2624,7 +2624,7 @@ static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size, if (IS_ZEBRA_DEBUG_KERNEL) { if (i == 0) - snprintf(buf, sizeof(buf1), "group %u", + snprintf(buf, sizeof(buf), "group %u", grp[i].id); else { snprintf(buf1, sizeof(buf1), "/%u", From 2ff41dd690eab66d7ca12d28cecaab1d5f7b4dc9 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Mon, 27 Nov 2023 12:28:52 +0100 Subject: [PATCH 5/6] lib: remove useless nexthop_group_active_nexthop_num_no_recurse() The nexthop_group_active_nexthop_num_no_recurse() function is no more used. Let us remove the function call. Fixes: 148813c22a8d ("zebra: zebra_nhg check each nexthop for active, not just number") Signed-off-by: Philippe Guibert --- lib/nexthop_group.c | 14 -------------- lib/nexthop_group.h | 2 -- 2 files changed, 16 deletions(-) diff --git a/lib/nexthop_group.c b/lib/nexthop_group.c index c75ff7b4cd..e5c2258191 100644 --- a/lib/nexthop_group.c +++ b/lib/nexthop_group.c @@ -105,20 +105,6 @@ uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg) return num; } -uint8_t -nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg) -{ - struct nexthop *nhop; - uint8_t num = 0; - - for (nhop = nhg->nexthop; nhop; nhop = nhop->next) { - if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_ACTIVE)) - num++; - } - - return num; -} - bool nexthop_group_has_label(const struct nexthop_group *nhg) { struct nexthop *nhop; diff --git a/lib/nexthop_group.h b/lib/nexthop_group.h index 78237e464f..88f9c62ccb 100644 --- a/lib/nexthop_group.h +++ b/lib/nexthop_group.h @@ -154,8 +154,6 @@ extern uint8_t nexthop_group_nexthop_num_no_recurse(const struct nexthop_group *nhg); extern uint8_t nexthop_group_active_nexthop_num(const struct nexthop_group *nhg); -extern uint8_t -nexthop_group_active_nexthop_num_no_recurse(const struct nexthop_group *nhg); extern bool nexthop_group_has_label(const struct nexthop_group *nhg); From 0c34fa2cc6354633dfb64275d1e37c273f3b95d0 Mon Sep 17 00:00:00 2001 From: Philippe Guibert Date: Thu, 30 Nov 2023 13:57:57 +0100 Subject: [PATCH 6/6] lib: fix nexthop node entry from nhg_list When stopping a VRF, the linked list entries must be removed too. Fixes: 98cbbaea91f6 ("lib: Handle if up/down and vrf enable/disable events") Signed-off-by: Philippe Guibert --- lib/nexthop_group.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/nexthop_group.c b/lib/nexthop_group.c index e5c2258191..9abd3139fd 100644 --- a/lib/nexthop_group.c +++ b/lib/nexthop_group.c @@ -1234,9 +1234,9 @@ void nexthop_group_disable_vrf(struct vrf *vrf) struct nexthop_hold *nhh; RB_FOREACH (nhgc, nhgc_entry_head, &nhgc_entries) { - struct listnode *node; + struct listnode *node, *nnode; - for (ALL_LIST_ELEMENTS_RO(nhgc->nhg_list, node, nhh)) { + for (ALL_LIST_ELEMENTS(nhgc->nhg_list, node, nnode, nhh)) { struct nexthop nhop; struct nexthop *nh; @@ -1257,6 +1257,8 @@ void nexthop_group_disable_vrf(struct vrf *vrf) nhg_hooks.del_nexthop(nhgc, nh); nexthop_free(nh); + + list_delete_node(nhgc->nhg_list, node); } } }