From f663c5819c7ce9b3b5b37255c9bb7c752eafb60e Mon Sep 17 00:00:00 2001 From: Renato Westphal Date: Thu, 9 Apr 2020 15:56:11 -0300 Subject: [PATCH 1/4] bgpd: convert NHT code to use rb-trees instead of routing tables Fist, routing tables aren't the most appropriate data structure to store nexthops and imported routes since we don't need to do longest prefix matches with that information. Second, by converting the NHT code to use rb-trees, we can index the nexthops using additional information, not only the destination address. This will be useful later to index bgpd's nexthops by both destination and SR-TE color. Co-authored-by: Sebastien Merle Signed-off-by: Renato Westphal --- bgpd/bgp_nexthop.c | 101 +++++++++++++++---------------- bgpd/bgp_nexthop.h | 29 ++++++++- bgpd/bgp_nht.c | 144 ++++++++++----------------------------------- bgpd/bgpd.h | 9 +-- 4 files changed, 110 insertions(+), 173 deletions(-) diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 5cc0d60529..9fbd5063e5 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -35,7 +35,6 @@ #include "filter.h" #include "bgpd/bgpd.h" -#include "bgpd/bgp_table.h" #include "bgpd/bgp_route.h" #include "bgpd/bgp_attr.h" #include "bgpd/bgp_nexthop.h" @@ -48,10 +47,15 @@ DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Address Intf String"); -char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size) +int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, + const struct bgp_nexthop_cache *b) { - prefix2str(bgp_dest_get_prefix(bnc->dest), buf, size); - return buf; + return prefix_cmp(&a->prefix, &b->prefix); +} + +const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size) +{ + return prefix2str(&bnc->prefix, buf, size); } void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) @@ -59,32 +63,47 @@ void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) nexthops_free(bnc->nexthop); } -struct bgp_nexthop_cache *bnc_new(void) +struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix) { struct bgp_nexthop_cache *bnc; bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE, sizeof(struct bgp_nexthop_cache)); + bnc->prefix = *prefix; + bnc->tree = tree; LIST_INIT(&(bnc->paths)); + bgp_nexthop_cache_add(tree, bnc); + return bnc; } void bnc_free(struct bgp_nexthop_cache *bnc) { bnc_nexthop_free(bnc); + bgp_nexthop_cache_del(bnc->tree, bnc); XFREE(MTYPE_BGP_NEXTHOP_CACHE, bnc); } -/* Reset and free all BGP nexthop cache. */ -static void bgp_nexthop_cache_reset(struct bgp_table *table) +struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix) +{ + struct bgp_nexthop_cache bnc = {}; + + if (!tree) + return NULL; + + bnc.prefix = *prefix; + return bgp_nexthop_cache_find(tree, &bnc); +} + +/* Reset and free all BGP nexthop cache. */ +static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; - for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; + while (bgp_nexthop_cache_count(tree) > 0) { + bnc = bgp_nexthop_cache_first(tree); while (!LIST_EMPTY(&(bnc->paths))) { struct bgp_path_info *path = LIST_FIRST(&(bnc->paths)); @@ -93,8 +112,6 @@ static void bgp_nexthop_cache_reset(struct bgp_table *table) } bnc_free(bnc); - bgp_dest_set_bgp_nexthop_info(dest, NULL); - bgp_dest_unlock_node(dest); } } @@ -773,20 +790,19 @@ static void bgp_show_nexthops_detail(struct vty *vty, struct bgp *bgp, } static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, - struct bgp_dest *dest, struct bgp_nexthop_cache *bnc, bool specific) { char buf[PREFIX2STR_BUFFER]; time_t tbuf; struct peer *peer; - const struct prefix *p = bgp_dest_get_prefix(dest); peer = (struct peer *)bnc->nht_info; if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) { vty_out(vty, " %s valid [IGP metric %d], #paths %d", - inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)), + inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, + buf, sizeof(buf)), bnc->metric, bnc->path_count); if (peer) vty_out(vty, ", peer %s", peer->host); @@ -794,7 +810,8 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, bgp_show_nexthops_detail(vty, bgp, bnc); } else { vty_out(vty, " %s invalid, #paths %d", - inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)), + inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, + buf, sizeof(buf)), bnc->path_count); if (peer) vty_out(vty, ", peer %s", peer->host); @@ -816,29 +833,21 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, static void bgp_show_nexthops(struct vty *vty, struct bgp *bgp, bool import_table) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; afi_t afi; - struct bgp_table **table; + struct bgp_nexthop_cache_head(*tree)[AFI_MAX]; if (import_table) vty_out(vty, "Current BGP import check cache:\n"); else vty_out(vty, "Current BGP nexthop cache:\n"); if (import_table) - table = bgp->import_check_table; + tree = &bgp->import_check_table; else - table = bgp->nexthop_cache_table; + tree = &bgp->nexthop_cache_table; for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!table || !table[afi]) - continue; - for (dest = bgp_table_top(table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; - bgp_show_nexthop(vty, bgp, dest, bnc, false); - } + frr_each (bgp_nexthop_cache, &(*tree)[afi], bnc) + bgp_show_nexthop(vty, bgp, bnc, false); } } @@ -859,27 +868,21 @@ static int show_ip_bgp_nexthop_table(struct vty *vty, const char *name, if (nhopip_str) { struct prefix nhop; - struct bgp_table **table; - struct bgp_dest *dest; + struct bgp_nexthop_cache_head (*tree)[AFI_MAX]; struct bgp_nexthop_cache *bnc; if (!str2prefix(nhopip_str, &nhop)) { vty_out(vty, "nexthop address is malformed\n"); return CMD_WARNING; } - table = import_table ? \ - bgp->import_check_table : bgp->nexthop_cache_table; - dest = bgp_node_lookup(table[family2afi(nhop.family)], &nhop); - if (!dest) { - vty_out(vty, "specified nexthop is not found\n"); - return CMD_SUCCESS; - } - bnc = bgp_dest_get_bgp_nexthop_info(dest); + tree = import_table ? &bgp->import_check_table + : &bgp->nexthop_cache_table; + bnc = bnc_find(tree[family2afi(nhop.family)], &nhop, 0); if (!bnc) { vty_out(vty, "specified nexthop does not have entry\n"); return CMD_SUCCESS; } - bgp_show_nexthop(vty, bgp, dest, bnc, true); + bgp_show_nexthop(vty, bgp, bnc, true); } else bgp_show_nexthops(vty, bgp, import_table); @@ -966,12 +969,10 @@ void bgp_scan_init(struct bgp *bgp) afi_t afi; for (afi = AFI_IP; afi < AFI_MAX; afi++) { - bgp->nexthop_cache_table[afi] = - bgp_table_init(bgp, afi, SAFI_UNICAST); + bgp_nexthop_cache_init(&bgp->nexthop_cache_table[afi]); + bgp_nexthop_cache_init(&bgp->import_check_table[afi]); bgp->connected_table[afi] = bgp_table_init(bgp, afi, SAFI_UNICAST); - bgp->import_check_table[afi] = - bgp_table_init(bgp, afi, SAFI_UNICAST); } } @@ -988,16 +989,12 @@ void bgp_scan_finish(struct bgp *bgp) for (afi = AFI_IP; afi < AFI_MAX; afi++) { /* Only the current one needs to be reset. */ - bgp_nexthop_cache_reset(bgp->nexthop_cache_table[afi]); - bgp_table_unlock(bgp->nexthop_cache_table[afi]); - bgp->nexthop_cache_table[afi] = NULL; + bgp_nexthop_cache_reset(&bgp->nexthop_cache_table[afi]); + bgp_nexthop_cache_reset(&bgp->import_check_table[afi]); bgp->connected_table[afi]->route_table->cleanup = bgp_connected_cleanup; bgp_table_unlock(bgp->connected_table[afi]); bgp->connected_table[afi] = NULL; - - bgp_table_unlock(bgp->import_check_table[afi]); - bgp->import_check_table[afi] = NULL; } } diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index 416ab2a739..e235fe4727 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -24,6 +24,7 @@ #include "if.h" #include "queue.h" #include "prefix.h" +#include "bgp_table.h" #define NEXTHOP_FAMILY(nexthop_len) \ (((nexthop_len) == 4 || (nexthop_len) == 12 \ @@ -36,8 +37,13 @@ #define BGP_MP_NEXTHOP_FAMILY NEXTHOP_FAMILY +PREDECL_RBTREE_UNIQ(bgp_nexthop_cache); + /* BGP nexthop cache value structure. */ struct bgp_nexthop_cache { + /* RB-tree entry. */ + struct bgp_nexthop_cache_item entry; + /* IGP route's metric. */ uint32_t metric; @@ -61,13 +67,21 @@ struct bgp_nexthop_cache { #define BGP_NEXTHOP_METRIC_CHANGED (1 << 1) #define BGP_NEXTHOP_CONNECTED_CHANGED (1 << 2) - struct bgp_dest *dest; + /* Back pointer to the cache tree this entry belongs to. */ + struct bgp_nexthop_cache_head *tree; + + struct prefix prefix; void *nht_info; /* In BGP, peer session */ LIST_HEAD(path_list, bgp_path_info) paths; unsigned int path_count; struct bgp *bgp; }; +extern int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, + const struct bgp_nexthop_cache *b); +DECLARE_RBTREE_UNIQ(bgp_nexthop_cache, struct bgp_nexthop_cache, entry, + bgp_nexthop_cache_compare); + /* Own tunnel-ip address structure */ struct tip_addr { struct in_addr addr; @@ -79,6 +93,12 @@ struct bgp_addrv6 { struct list *ifp_name_list; }; +/* Forward declaration(s). */ +struct peer; +struct update_subgroup; +struct bgp_dest; +struct attr; + extern void bgp_connected_add(struct bgp *bgp, struct connected *c); extern void bgp_connected_delete(struct bgp *bgp, struct connected *c); extern bool bgp_subgrp_multiaccess_check_v4(struct in_addr nexthop, @@ -94,10 +114,13 @@ extern int bgp_config_write_scan_time(struct vty *); extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type, uint8_t sub_type, struct attr *attr, struct bgp_dest *dest); -extern struct bgp_nexthop_cache *bnc_new(void); +extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix); extern void bnc_free(struct bgp_nexthop_cache *bnc); +extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix); extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); -extern char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); +extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); extern void bgp_scan_init(struct bgp *bgp); extern void bgp_scan_finish(struct bgp *bgp); extern void bgp_scan_vty_init(void); diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index a780fb7347..2365522bec 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -78,9 +78,6 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc) } unregister_zebra_rnh(bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); - bgp_dest_set_bgp_nexthop_info(bnc->dest, NULL); - bgp_dest_unlock_node(bnc->dest); - bnc->dest = NULL; bnc_free(bnc); } } @@ -100,16 +97,13 @@ void bgp_unlink_nexthop(struct bgp_path_info *path) void bgp_unlink_nexthop_by_peer(struct peer *peer) { struct prefix p; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; afi_t afi = family2afi(peer->su.sa.sa_family); if (!sockunion2hostprefix(&peer->su, &p)) return; - dest = bgp_node_get(peer->bgp->nexthop_cache_table[afi], &p); - - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p); if (!bnc) return; @@ -127,11 +121,10 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, afi_t afi, struct bgp_path_info *pi, struct peer *peer, int connected) { - struct bgp_dest *dest; + struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache *bnc; struct prefix p; int is_bgp_static_route = 0; - const struct prefix *bnc_p; if (pi) { is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP) @@ -168,17 +161,14 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, return 0; if (is_bgp_static_route) - dest = bgp_node_get(bgp_nexthop->import_check_table[afi], &p); + tree = &bgp_nexthop->import_check_table[afi]; else - dest = bgp_node_get(bgp_nexthop->nexthop_cache_table[afi], &p); + tree = &bgp_nexthop->nexthop_cache_table[afi]; - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(tree, &p); if (!bnc) { - bnc = bnc_new(); - bgp_dest_set_bgp_nexthop_info(dest, bnc); - bnc->dest = dest; + bnc = bnc_new(tree, &p); bnc->bgp = bgp_nexthop; - bgp_dest_lock_node(dest); if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; @@ -188,9 +178,6 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, } } - bnc_p = bgp_dest_get_prefix(bnc->dest); - - bgp_dest_unlock_node(dest); if (is_bgp_static_route) { SET_FLAG(bnc->flags, BGP_STATIC_ROUTE); @@ -236,7 +223,7 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID); } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED) - && !is_default_host_route(bnc_p)) + && !is_default_host_route(&bnc->prefix)) register_zebra_rnh(bnc, is_bgp_static_route); if (pi && pi->nexthop != bnc) { @@ -269,7 +256,6 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct prefix p; @@ -279,9 +265,9 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) if (!sockunion2hostprefix(&peer->su, &p)) return; - dest = bgp_node_lookup( - peer->bgp->nexthop_cache_table[family2afi(p.family)], &p); - if (!dest) { + bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)], + &p); + if (!bnc) { if (BGP_DEBUG(nht, NHT)) zlog_debug( "Cannot find connected NHT node for peer %s(%s)", @@ -289,17 +275,6 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) return; } - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) { - if (BGP_DEBUG(nht, NHT)) - zlog_debug( - "Cannot find connected NHT node for peer %s(%s) on route_node as expected", - peer->host, peer->bgp->name_pretty); - bgp_dest_unlock_node(dest); - return; - } - bgp_dest_unlock_node(dest); - if (bnc->nht_info != peer) { if (BGP_DEBUG(nht, NHT)) zlog_debug( @@ -317,15 +292,13 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) "Freeing connected NHT node %p for peer %s(%s)", bnc, peer->host, bnc->bgp->name_pretty); unregister_zebra_rnh(bnc, 0); - bgp_dest_set_bgp_nexthop_info(bnc->dest, NULL); - bgp_dest_unlock_node(bnc->dest); bnc_free(bnc); } } void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) { - struct bgp_dest *dest = NULL; + struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache *bnc; struct nexthop *nexthop; struct nexthop *oldnh; @@ -352,39 +325,23 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) } if (command == ZEBRA_NEXTHOP_UPDATE) - dest = bgp_node_lookup( - bgp->nexthop_cache_table[family2afi(nhr.prefix.family)], - &nhr.prefix); + tree = &bgp->nexthop_cache_table[family2afi(nhr.prefix.family)]; else if (command == ZEBRA_IMPORT_CHECK_UPDATE) - dest = bgp_node_lookup( - bgp->import_check_table[family2afi(nhr.prefix.family)], - &nhr.prefix); + tree = &bgp->import_check_table[family2afi(nhr.prefix.family)]; - if (!dest) { - if (BGP_DEBUG(nht, NHT)) { - char buf[PREFIX2STR_BUFFER]; - prefix2str(&nhr.prefix, buf, sizeof(buf)); - zlog_debug("parse nexthop update(%s(%s)): rn not found", - buf, bgp->name_pretty); - } - return; - } - - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(tree, &nhr.prefix); if (!bnc) { if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; prefix2str(&nhr.prefix, buf, sizeof(buf)); zlog_debug( - "parse nexthop update(%s(%s)): bnc node info not found", + "parse nexthop update(%s(%s)): bnc info not found", buf, bgp->name_pretty); } - bgp_dest_unlock_node(dest); return; } - bgp_dest_unlock_node(dest); bnc->last_update = bgp_clock(); bnc->change_flags = 0; @@ -503,20 +460,11 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) */ void bgp_cleanup_nexthops(struct bgp *bgp) { - afi_t afi; - struct bgp_dest *dest; - struct bgp_nexthop_cache *bnc; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!bgp->nexthop_cache_table[afi]) - continue; - - for (dest = bgp_table_top(bgp->nexthop_cache_table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; + for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) { + struct bgp_nexthop_cache *bnc; + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc) { /* Clear relevant flags. */ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID); UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); @@ -609,7 +557,6 @@ static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p) */ static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command) { - const struct prefix *p; bool exact_match = false; int ret; @@ -631,23 +578,18 @@ static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command) "%s: We have not connected yet, cannot send nexthops", __func__); } - p = bgp_dest_get_prefix(bnc->dest); if ((command == ZEBRA_NEXTHOP_REGISTER || command == ZEBRA_IMPORT_ROUTE_REGISTER) && (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED) || CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))) exact_match = true; - if (BGP_DEBUG(zebra, ZEBRA)) { - char buf[PREFIX2STR_BUFFER]; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__, + zserv_command_string(command), &bnc->prefix, + bnc->bgp->name_pretty); - prefix2str(p, buf, PREFIX2STR_BUFFER); - zlog_debug("%s: sending cmd %s for %s (vrf %s)", - __func__, zserv_command_string(command), buf, - bnc->bgp->name_pretty); - } - - ret = zclient_send_rnh(zclient, command, p, exact_match, + ret = zclient_send_rnh(zclient, command, &bnc->prefix, exact_match, bnc->bgp->vrf_id); /* TBD: handle the failure */ if (ret < 0) @@ -901,21 +843,11 @@ void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc, */ void bgp_nht_register_nexthops(struct bgp *bgp) { - struct bgp_dest *dest; - struct bgp_nexthop_cache *bnc; - afi_t afi; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!bgp->nexthop_cache_table[afi]) - continue; - - for (dest = bgp_table_top(bgp->nexthop_cache_table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - - if (!bnc) - continue; + for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) { + struct bgp_nexthop_cache *bnc; + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc) { register_zebra_rnh(bnc, 0); } } @@ -924,7 +856,6 @@ void bgp_nht_register_nexthops(struct bgp *bgp) void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) { struct bgp *bgp; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct nexthop *nhop; struct interface *ifp; @@ -934,10 +865,6 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) return; bgp = peer->bgp; - - if (!bgp->nexthop_cache_table[AFI_IP6]) - return; - if (!sockunion2hostprefix(&peer->su, &p)) { zlog_warn("%s: Unable to convert sockunion to prefix for %s", __func__, peer->host); @@ -946,11 +873,8 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - dest = bgp_node_lookup(bgp->nexthop_cache_table[AFI_IP6], &p); - if (!dest) - return; - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); if (!bnc) return; @@ -973,7 +897,6 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) { struct bgp *bgp; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct nexthop *nhop; struct interface *ifp; @@ -984,9 +907,6 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) bgp = peer->bgp; - if (!bgp->nexthop_cache_table[AFI_IP6]) - return; - if (!sockunion2hostprefix(&peer->su, &p)) { zlog_warn("%s: Unable to convert sockunion to prefix for %s", __func__, peer->host); @@ -996,11 +916,7 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - dest = bgp_node_lookup(bgp->nexthop_cache_table[AFI_IP6], &p); - if (!dest) - return; - - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); if (!bnc) return; diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6fc3f08836..20a41987ca 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -42,6 +42,7 @@ #include "vxlan.h" #include "bgp_labelpool.h" #include "bgp_addpath_types.h" +#include "bgp_nexthop.h" #define BGP_MAX_HOSTNAME 64 /* Linux max, is larger than most other sys */ #define BGP_PEER_MAX_HASH_SIZE 16384 @@ -482,11 +483,11 @@ struct bgp { /* BGP per AF peer count */ uint32_t af_peer_count[AFI_MAX][SAFI_MAX]; - /* Route table for next-hop lookup cache. */ - struct bgp_table *nexthop_cache_table[AFI_MAX]; + /* Tree for next-hop lookup cache. */ + struct bgp_nexthop_cache_head nexthop_cache_table[AFI_MAX]; - /* Route table for import-check */ - struct bgp_table *import_check_table[AFI_MAX]; + /* Tree for import-check */ + struct bgp_nexthop_cache_head import_check_table[AFI_MAX]; struct bgp_table *connected_table[AFI_MAX]; From ef3e0d04766a11d1b976807c42da8207e887174e Mon Sep 17 00:00:00 2001 From: Sebastien Merle Date: Tue, 28 Jan 2020 11:59:57 +0000 Subject: [PATCH 2/4] bgpd: Add support for SR-TE Policies in route-maps Example configuration: route-map SET_SR_POLICY permit 10 set sr-te color 1 ! router bgp 1 bgp router-id 1.1.1.1 neighbor 2.2.2.2 remote-as 1 neighbor 2.2.2.2 update-source lo address-family ipv4 unicast neighbor 2.2.2.2 next-hop-self neighbor 2.2.2.2 route-map SET_SR_POLICY in exit-address-family ! ! Learned BGP routes from 2.2.2.2 are mapped to the SR-TE Policy which is uniquely determined by the BGP nexthop (2.2.2.2 in this case) and the SR-TE color in the route-map. Co-authored-by: Renato Westphal Co-authored-by: GalaxyGorilla Co-authored-by: Sebastien Merle Signed-off-by: Sebastien Merle --- bgpd/bgp_attr.c | 3 ++- bgpd/bgp_attr.h | 4 ++++ bgpd/bgp_routemap.c | 43 +++++++++++++++++++++++++++++++++++++++++++ bgpd/bgp_zebra.c | 8 ++++++++ bgpd/bgpd.h | 1 + doc/user/routemap.rst | 6 ++++++ 6 files changed, 64 insertions(+), 1 deletion(-) diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index cac3ab1ca7..d8eaabfdf8 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -729,7 +729,8 @@ bool attrhash_cmp(const void *p1, const void *p2) && attr1->nh_lla_ifindex == attr2->nh_lla_ifindex && attr1->distance == attr2->distance && srv6_l3vpn_same(attr1->srv6_l3vpn, attr2->srv6_l3vpn) - && srv6_vpn_same(attr1->srv6_vpn, attr2->srv6_vpn)) + && srv6_vpn_same(attr1->srv6_vpn, attr2->srv6_vpn) + && attr1->srte_color == attr2->srte_color) return true; } diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index c57cf81007..e6e953364b 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -24,6 +24,7 @@ #include "mpls.h" #include "bgp_attr_evpn.h" #include "bgpd/bgp_encap_types.h" +#include "srte.h" /* Simple bit mapping. */ #define BITMAP_NBBY 8 @@ -290,6 +291,9 @@ struct attr { /* EVPN ES */ esi_t esi; + + /* SR-TE Color */ + uint32_t srte_color; }; /* rmap_change_flags definition */ diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index 6d81cfaab4..09cc775d47 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -1668,6 +1668,45 @@ static const struct route_map_rule_cmd route_match_tag_cmd = { route_map_rule_tag_free, }; +static enum route_map_cmd_result_t +route_set_srte_color(void *rule, const struct prefix *prefix, + route_map_object_t type, void *object) +{ + uint32_t *srte_color = rule; + struct bgp_path_info *path; + + if (type != RMAP_BGP) + return RMAP_OKAY; + + path = object; + + path->attr->srte_color = *srte_color; + path->attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR); + + return RMAP_OKAY; +} + +/* Route map `sr-te color' compile function */ +static void *route_set_srte_color_compile(const char *arg) +{ + uint32_t *color; + + color = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(uint32_t)); + *color = atoi(arg); + + return color; +} + +/* Free route map's compiled `sr-te color' value. */ +static void route_set_srte_color_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +/* Route map commands for sr-te color set. */ +struct route_map_rule_cmd route_set_srte_color_cmd = { + "sr-te color", route_set_srte_color, route_set_srte_color_compile, + route_set_srte_color_free}; /* Set nexthop to object. ojbect must be pointer to struct attr. */ struct rmap_ip_nexthop_set { @@ -5686,6 +5725,9 @@ void bgp_route_map_init(void) route_map_match_tag_hook(generic_match_add); route_map_no_match_tag_hook(generic_match_delete); + route_map_set_srte_color_hook(generic_set_add); + route_map_no_set_srte_color_hook(generic_set_delete); + route_map_set_ip_nexthop_hook(generic_set_add); route_map_no_set_ip_nexthop_hook(generic_set_delete); @@ -5728,6 +5770,7 @@ void bgp_route_map_init(void) route_map_install_match(&route_match_vrl_source_vrf_cmd); route_map_install_set(&route_set_table_id_cmd); + route_map_install_set(&route_set_srte_color_cmd); route_map_install_set(&route_set_ip_nexthop_cmd); route_map_install_set(&route_set_local_pref_cmd); route_map_install_set(&route_set_weight_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index dba114f86a..15bd6d33b8 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -1265,6 +1265,9 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, api.tableid = info->attr->rmap_table_id; } + if (CHECK_FLAG(info->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR))) + SET_FLAG(api.message, ZAPI_MESSAGE_SRTE); + /* Metric is currently based on the best-path only */ metric = info->attr->med; @@ -1303,6 +1306,11 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, continue; } api_nh = &api.nexthops[valid_nh_count]; + + if (CHECK_FLAG(info->attr->flag, + ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR))) + api_nh->srte_color = info->attr->srte_color; + if (nh_family == AF_INET) { if (bgp_debug_zebra(&api.prefix)) { if (mpinfo->extra) { diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 20a41987ca..6145d9305f 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -1538,6 +1538,7 @@ struct bgp_nlri { #define BGP_ATTR_IPV6_EXT_COMMUNITIES 25 #define BGP_ATTR_LARGE_COMMUNITIES 32 #define BGP_ATTR_PREFIX_SID 40 +#define BGP_ATTR_SRTE_COLOR 51 #ifdef ENABLE_BGP_VNC_ATTR #define BGP_ATTR_VNC 255 #endif diff --git a/doc/user/routemap.rst b/doc/user/routemap.rst index f557cbe022..fa5fc248a8 100644 --- a/doc/user/routemap.rst +++ b/doc/user/routemap.rst @@ -329,6 +329,12 @@ Route Map Set Command Set the BGP table to a given table identifier +.. index:: set sr-te color (1-4294967295) +.. clicmd:: set sr-te color (1-4294967295) + + Set the color of a SR-TE Policy to be applied to a learned route. The SR-TE + Policy is uniquely determined by the color and the BGP nexthop. + .. _route-map-call-command: Route Map Call Command From 545aeef1d13ec29c14713450deea24d6f8810af2 Mon Sep 17 00:00:00 2001 From: Renato Westphal Date: Wed, 26 Aug 2020 14:39:33 -0300 Subject: [PATCH 3/4] bgpd: extend the NHT code to understand SR-TE colors Extend the NHT code so that only the affected BGP routes are affected whenever an SR-policy is updated on zebra. Signed-off-by: Renato Westphal --- bgpd/bgp_nexthop.c | 13 +++- bgpd/bgp_nexthop.h | 7 +- bgpd/bgp_nht.c | 171 ++++++++++++++++++++++++++++----------------- 3 files changed, 121 insertions(+), 70 deletions(-) diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 9fbd5063e5..6f36fd1394 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -50,6 +50,11 @@ DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Address Intf String"); int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, const struct bgp_nexthop_cache *b) { + if (a->srte_color < b->srte_color) + return -1; + if (a->srte_color > b->srte_color) + return 1; + return prefix_cmp(&a->prefix, &b->prefix); } @@ -64,13 +69,14 @@ void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) } struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix) + struct prefix *prefix, uint32_t srte_color) { struct bgp_nexthop_cache *bnc; bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE, sizeof(struct bgp_nexthop_cache)); bnc->prefix = *prefix; + bnc->srte_color = srte_color; bnc->tree = tree; LIST_INIT(&(bnc->paths)); bgp_nexthop_cache_add(tree, bnc); @@ -86,7 +92,7 @@ void bnc_free(struct bgp_nexthop_cache *bnc) } struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix) + struct prefix *prefix, uint32_t srte_color) { struct bgp_nexthop_cache bnc = {}; @@ -94,6 +100,7 @@ struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, return NULL; bnc.prefix = *prefix; + bnc.srte_color = srte_color; return bgp_nexthop_cache_find(tree, &bnc); } @@ -799,6 +806,8 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, peer = (struct peer *)bnc->nht_info; + if (bnc->srte_color) + vty_out(vty, " SR-TE color %u -", bnc->srte_color); if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) { vty_out(vty, " %s valid [IGP metric %d], #paths %d", inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index e235fe4727..356af54002 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -70,6 +70,7 @@ struct bgp_nexthop_cache { /* Back pointer to the cache tree this entry belongs to. */ struct bgp_nexthop_cache_head *tree; + uint32_t srte_color; struct prefix prefix; void *nht_info; /* In BGP, peer session */ LIST_HEAD(path_list, bgp_path_info) paths; @@ -115,10 +116,12 @@ extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type, uint8_t sub_type, struct attr *attr, struct bgp_dest *dest); extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix); + struct prefix *prefix, + uint32_t srte_color); extern void bnc_free(struct bgp_nexthop_cache *bnc); extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix); + struct prefix *prefix, + uint32_t srte_color); extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); extern void bgp_scan_init(struct bgp *bgp); diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index 2365522bec..80ee5f5349 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -72,9 +72,9 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc) if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) { if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%s)", + zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%u)(%s)", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty); + bnc->srte_color, bnc->bgp->name_pretty); } unregister_zebra_rnh(bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); @@ -103,7 +103,7 @@ void bgp_unlink_nexthop_by_peer(struct peer *peer) if (!sockunion2hostprefix(&peer->su, &p)) return; - bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p); + bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0); if (!bnc) return; @@ -124,6 +124,7 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache *bnc; struct prefix p; + uint32_t srte_color = 0; int is_bgp_static_route = 0; if (pi) { @@ -148,6 +149,8 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, * addr */ if (make_prefix(afi, pi, &p) < 0) return 1; + + srte_color = pi->attr->srte_color; } else if (peer) { if (!sockunion2hostprefix(&peer->su, &p)) { if (BGP_DEBUG(nht, NHT)) { @@ -165,16 +168,17 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, else tree = &bgp_nexthop->nexthop_cache_table[afi]; - bnc = bnc_find(tree, &p); + bnc = bnc_find(tree, &p, srte_color); if (!bnc) { - bnc = bnc_new(tree, &p); + bnc = bnc_new(tree, &p, srte_color); bnc->bgp = bgp_nexthop; if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("Allocated bnc %s(%s) peer %p", + zlog_debug("Allocated bnc %s(%u)(%s) peer %p", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty, peer); + bnc->srte_color, bnc->bgp->name_pretty, + peer); } } @@ -266,7 +270,7 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) return; bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)], - &p); + &p, 0); if (!bnc) { if (BGP_DEBUG(nht, NHT)) zlog_debug( @@ -296,51 +300,14 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) } } -void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc, + struct zapi_route *nhr) { - struct bgp_nexthop_cache_head *tree = NULL; - struct bgp_nexthop_cache *bnc; struct nexthop *nexthop; struct nexthop *oldnh; struct nexthop *nhlist_head = NULL; struct nexthop *nhlist_tail = NULL; int i; - struct bgp *bgp; - struct zapi_route nhr; - - bgp = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp) { - flog_err( - EC_BGP_NH_UPD, - "parse nexthop update: instance not found for vrf_id %u", - vrf_id); - return; - } - - if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { - if (BGP_DEBUG(nht, NHT)) - zlog_debug("%s[%s]: Failure to decode nexthop update", - __func__, bgp->name_pretty); - return; - } - - if (command == ZEBRA_NEXTHOP_UPDATE) - tree = &bgp->nexthop_cache_table[family2afi(nhr.prefix.family)]; - else if (command == ZEBRA_IMPORT_CHECK_UPDATE) - tree = &bgp->import_check_table[family2afi(nhr.prefix.family)]; - - bnc = bnc_find(tree, &nhr.prefix); - if (!bnc) { - if (BGP_DEBUG(nht, NHT)) { - char buf[PREFIX2STR_BUFFER]; - - prefix2str(&nhr.prefix, buf, sizeof(buf)); - zlog_debug( - "parse nexthop update(%s(%s)): bnc info not found", - buf, bgp->name_pretty); - } - return; - } bnc->last_update = bgp_clock(); bnc->change_flags = 0; @@ -348,21 +315,21 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) /* debug print the input */ if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - prefix2str(&nhr.prefix, buf, sizeof(buf)); + prefix2str(&nhr->prefix, buf, sizeof(buf)); zlog_debug( - "%s(%u): Rcvd NH update %s - metric %d/%d #nhops %d/%d flags 0x%x", - bnc->bgp->name_pretty, vrf_id, buf, nhr.metric, - bnc->metric, nhr.nexthop_num, bnc->nexthop_num, - bnc->flags); + "%s(%u): Rcvd NH update %s(%u) - metric %d/%d #nhops %d/%d flags 0x%x", + bnc->bgp->name_pretty, bnc->bgp->vrf_id, buf, + bnc->srte_color, nhr->metric, bnc->metric, + nhr->nexthop_num, bnc->nexthop_num, bnc->flags); } - if (nhr.metric != bnc->metric) + if (nhr->metric != bnc->metric) bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED; - if (nhr.nexthop_num != bnc->nexthop_num) + if (nhr->nexthop_num != bnc->nexthop_num) bnc->change_flags |= BGP_NEXTHOP_CHANGED; - if (nhr.nexthop_num) { + if (nhr->nexthop_num) { struct peer *peer = bnc->nht_info; /* notify bgp fsm if nbr ip goes from invalid->valid */ @@ -370,15 +337,15 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); bnc->flags |= BGP_NEXTHOP_VALID; - bnc->metric = nhr.metric; - bnc->nexthop_num = nhr.nexthop_num; + bnc->metric = nhr->metric; + bnc->nexthop_num = nhr->nexthop_num; bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */ - for (i = 0; i < nhr.nexthop_num; i++) { + for (i = 0; i < nhr->nexthop_num; i++) { int num_labels = 0; - nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]); + nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); /* * Turn on RA for the v6 nexthops @@ -388,7 +355,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) if (peer && !peer->ifp && CHECK_FLAG(peer->flags, PEER_FLAG_CAPABILITY_ENHE) - && nhr.prefix.family == AF_INET6 + && nhr->prefix.family == AF_INET6 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { struct interface *ifp; @@ -442,7 +409,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) bnc->nexthop = nhlist_head; } else { bnc->flags &= ~BGP_NEXTHOP_VALID; - bnc->nexthop_num = nhr.nexthop_num; + bnc->nexthop_num = nhr->nexthop_num; /* notify bgp fsm if nbr ip goes from valid->invalid */ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); @@ -454,6 +421,77 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) evaluate_paths(bnc); } +void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +{ + struct bgp_nexthop_cache_head *tree = NULL; + struct bgp_nexthop_cache *bnc; + struct bgp *bgp; + struct zapi_route nhr; + afi_t afi; + + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) { + flog_err( + EC_BGP_NH_UPD, + "parse nexthop update: instance not found for vrf_id %u", + vrf_id); + return; + } + + if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { + if (BGP_DEBUG(nht, NHT)) + zlog_debug("%s[%s]: Failure to decode nexthop update", + __PRETTY_FUNCTION__, bgp->name_pretty); + return; + } + + afi = family2afi(nhr.prefix.family); + if (command == ZEBRA_NEXTHOP_UPDATE) + tree = &bgp->nexthop_cache_table[afi]; + else if (command == ZEBRA_IMPORT_CHECK_UPDATE) + tree = &bgp->import_check_table[afi]; + + bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color); + if (!bnc) { + if (BGP_DEBUG(nht, NHT)) { + char buf[PREFIX2STR_BUFFER]; + + prefix2str(&nhr.prefix, buf, sizeof(buf)); + zlog_debug( + "parse nexthop update(%s(%u)(%s)): bnc info not found", + buf, nhr.srte_color, bgp->name_pretty); + } + return; + } + + bgp_process_nexthop_update(bnc, &nhr); + + /* + * HACK: if any BGP route is dependant on an SR-policy that doesn't + * exist, zebra will never send NH updates relative to that policy. In + * that case, whenever we receive an update about a colorless NH, update + * the corresponding colorful NHs that share the same endpoint but that + * are inactive. This ugly hack should work around the problem at the + * cost of a performance pernalty. Long term, what should be done is to + * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is), + * which should provide a better infrastructure to solve this issue in + * a more efficient and elegant way. + */ + if (nhr.srte_color == 0) { + struct bgp_nexthop_cache *bnc_iter; + + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc_iter) { + if (!prefix_same(&bnc->prefix, &bnc_iter->prefix) + || bnc_iter->srte_color == 0 + || CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID)) + continue; + + bgp_process_nexthop_update(bnc_iter, &nhr); + } + } +} + /* * Cleanup nexthop registration and status information for BGP nexthops * pertaining to this VRF. This is invoked upon VRF deletion. @@ -667,8 +705,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) char buf[PREFIX2STR_BUFFER]; bnc_str(bnc, buf, PREFIX2STR_BUFFER); zlog_debug( - "NH update for %s %s flags 0x%x chgflags 0x%x - evaluate paths", - buf, bnc->bgp->name_pretty, bnc->flags, + "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths", + buf, bnc->srte_color, bnc->bgp->name_pretty, bnc->flags, bnc->change_flags); } @@ -756,7 +794,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) path->extra->igpmetric = 0; if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) - || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)) + || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) + || path->attr->srte_color != 0) SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED); path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID); @@ -874,7 +913,7 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return; @@ -916,7 +955,7 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return; From e37e1e27e4f26d370946bedb9ae0767344fe339d Mon Sep 17 00:00:00 2001 From: Pat Ruddy Date: Fri, 26 Jun 2020 17:37:30 +0100 Subject: [PATCH 4/4] bgpd: do not unregister for prefix nexthop updates if nh exists since the addition of srte_color to the comparison for bgp nexthops it is possible to have several nexthops per prefix but since zebra only sores a per prefix registration we should not unregister for nh notifications for a prefix unti all the nexthops for that prefix have been deleted. Otherwise we can get into a deadlock situation where BGP thinks we have registered but we have unregistered from zebra. Signed-off-by: Pat Ruddy --- bgpd/bgp_nexthop.c | 13 +++++++++++++ bgpd/bgp_nexthop.h | 1 + bgpd/bgp_nht.c | 6 ++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 6f36fd1394..ed026a2fff 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -84,6 +84,19 @@ struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, return bnc; } +bool bnc_existing_for_prefix(struct bgp_nexthop_cache *bnc) +{ + struct bgp_nexthop_cache *bnc_tmp; + + frr_each (bgp_nexthop_cache, bnc->tree, bnc_tmp) { + if (bnc_tmp == bnc) + continue; + if (prefix_cmp(&bnc->prefix, &bnc_tmp->prefix) == 0) + return true; + } + return false; +} + void bnc_free(struct bgp_nexthop_cache *bnc) { bnc_nexthop_free(bnc); diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index 356af54002..c4b913faf4 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -118,6 +118,7 @@ extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type, extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, struct prefix *prefix, uint32_t srte_color); +extern bool bnc_existing_for_prefix(struct bgp_nexthop_cache *bnc); extern void bnc_free(struct bgp_nexthop_cache *bnc); extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, struct prefix *prefix, diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index 80ee5f5349..9573d118e5 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -76,8 +76,10 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc) bnc_str(bnc, buf, PREFIX2STR_BUFFER), bnc->srte_color, bnc->bgp->name_pretty); } - unregister_zebra_rnh(bnc, - CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); + /* only unregister if this is the last nh for this prefix*/ + if (!bnc_existing_for_prefix(bnc)) + unregister_zebra_rnh( + bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); bnc_free(bnc); } }