diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index cac3ab1ca7..d8eaabfdf8 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -729,7 +729,8 @@ bool attrhash_cmp(const void *p1, const void *p2) && attr1->nh_lla_ifindex == attr2->nh_lla_ifindex && attr1->distance == attr2->distance && srv6_l3vpn_same(attr1->srv6_l3vpn, attr2->srv6_l3vpn) - && srv6_vpn_same(attr1->srv6_vpn, attr2->srv6_vpn)) + && srv6_vpn_same(attr1->srv6_vpn, attr2->srv6_vpn) + && attr1->srte_color == attr2->srte_color) return true; } diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index c57cf81007..e6e953364b 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -24,6 +24,7 @@ #include "mpls.h" #include "bgp_attr_evpn.h" #include "bgpd/bgp_encap_types.h" +#include "srte.h" /* Simple bit mapping. */ #define BITMAP_NBBY 8 @@ -290,6 +291,9 @@ struct attr { /* EVPN ES */ esi_t esi; + + /* SR-TE Color */ + uint32_t srte_color; }; /* rmap_change_flags definition */ diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 5cc0d60529..ed026a2fff 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -35,7 +35,6 @@ #include "filter.h" #include "bgpd/bgpd.h" -#include "bgpd/bgp_table.h" #include "bgpd/bgp_route.h" #include "bgpd/bgp_attr.h" #include "bgpd/bgp_nexthop.h" @@ -48,10 +47,20 @@ DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Address Intf String"); -char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size) +int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, + const struct bgp_nexthop_cache *b) { - prefix2str(bgp_dest_get_prefix(bnc->dest), buf, size); - return buf; + if (a->srte_color < b->srte_color) + return -1; + if (a->srte_color > b->srte_color) + return 1; + + return prefix_cmp(&a->prefix, &b->prefix); +} + +const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size) +{ + return prefix2str(&bnc->prefix, buf, size); } void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) @@ -59,32 +68,62 @@ void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) nexthops_free(bnc->nexthop); } -struct bgp_nexthop_cache *bnc_new(void) +struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix, uint32_t srte_color) { struct bgp_nexthop_cache *bnc; bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE, sizeof(struct bgp_nexthop_cache)); + bnc->prefix = *prefix; + bnc->srte_color = srte_color; + bnc->tree = tree; LIST_INIT(&(bnc->paths)); + bgp_nexthop_cache_add(tree, bnc); + return bnc; } +bool bnc_existing_for_prefix(struct bgp_nexthop_cache *bnc) +{ + struct bgp_nexthop_cache *bnc_tmp; + + frr_each (bgp_nexthop_cache, bnc->tree, bnc_tmp) { + if (bnc_tmp == bnc) + continue; + if (prefix_cmp(&bnc->prefix, &bnc_tmp->prefix) == 0) + return true; + } + return false; +} + void bnc_free(struct bgp_nexthop_cache *bnc) { bnc_nexthop_free(bnc); + bgp_nexthop_cache_del(bnc->tree, bnc); XFREE(MTYPE_BGP_NEXTHOP_CACHE, bnc); } -/* Reset and free all BGP nexthop cache. */ -static void bgp_nexthop_cache_reset(struct bgp_table *table) +struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix, uint32_t srte_color) +{ + struct bgp_nexthop_cache bnc = {}; + + if (!tree) + return NULL; + + bnc.prefix = *prefix; + bnc.srte_color = srte_color; + return bgp_nexthop_cache_find(tree, &bnc); +} + +/* Reset and free all BGP nexthop cache. */ +static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; - for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; + while (bgp_nexthop_cache_count(tree) > 0) { + bnc = bgp_nexthop_cache_first(tree); while (!LIST_EMPTY(&(bnc->paths))) { struct bgp_path_info *path = LIST_FIRST(&(bnc->paths)); @@ -93,8 +132,6 @@ static void bgp_nexthop_cache_reset(struct bgp_table *table) } bnc_free(bnc); - bgp_dest_set_bgp_nexthop_info(dest, NULL); - bgp_dest_unlock_node(dest); } } @@ -773,20 +810,21 @@ static void bgp_show_nexthops_detail(struct vty *vty, struct bgp *bgp, } static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, - struct bgp_dest *dest, struct bgp_nexthop_cache *bnc, bool specific) { char buf[PREFIX2STR_BUFFER]; time_t tbuf; struct peer *peer; - const struct prefix *p = bgp_dest_get_prefix(dest); peer = (struct peer *)bnc->nht_info; + if (bnc->srte_color) + vty_out(vty, " SR-TE color %u -", bnc->srte_color); if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) { vty_out(vty, " %s valid [IGP metric %d], #paths %d", - inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)), + inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, + buf, sizeof(buf)), bnc->metric, bnc->path_count); if (peer) vty_out(vty, ", peer %s", peer->host); @@ -794,7 +832,8 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, bgp_show_nexthops_detail(vty, bgp, bnc); } else { vty_out(vty, " %s invalid, #paths %d", - inet_ntop(p->family, &p->u.prefix, buf, sizeof(buf)), + inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, + buf, sizeof(buf)), bnc->path_count); if (peer) vty_out(vty, ", peer %s", peer->host); @@ -816,29 +855,21 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, static void bgp_show_nexthops(struct vty *vty, struct bgp *bgp, bool import_table) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; afi_t afi; - struct bgp_table **table; + struct bgp_nexthop_cache_head(*tree)[AFI_MAX]; if (import_table) vty_out(vty, "Current BGP import check cache:\n"); else vty_out(vty, "Current BGP nexthop cache:\n"); if (import_table) - table = bgp->import_check_table; + tree = &bgp->import_check_table; else - table = bgp->nexthop_cache_table; + tree = &bgp->nexthop_cache_table; for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!table || !table[afi]) - continue; - for (dest = bgp_table_top(table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; - bgp_show_nexthop(vty, bgp, dest, bnc, false); - } + frr_each (bgp_nexthop_cache, &(*tree)[afi], bnc) + bgp_show_nexthop(vty, bgp, bnc, false); } } @@ -859,27 +890,21 @@ static int show_ip_bgp_nexthop_table(struct vty *vty, const char *name, if (nhopip_str) { struct prefix nhop; - struct bgp_table **table; - struct bgp_dest *dest; + struct bgp_nexthop_cache_head (*tree)[AFI_MAX]; struct bgp_nexthop_cache *bnc; if (!str2prefix(nhopip_str, &nhop)) { vty_out(vty, "nexthop address is malformed\n"); return CMD_WARNING; } - table = import_table ? \ - bgp->import_check_table : bgp->nexthop_cache_table; - dest = bgp_node_lookup(table[family2afi(nhop.family)], &nhop); - if (!dest) { - vty_out(vty, "specified nexthop is not found\n"); - return CMD_SUCCESS; - } - bnc = bgp_dest_get_bgp_nexthop_info(dest); + tree = import_table ? &bgp->import_check_table + : &bgp->nexthop_cache_table; + bnc = bnc_find(tree[family2afi(nhop.family)], &nhop, 0); if (!bnc) { vty_out(vty, "specified nexthop does not have entry\n"); return CMD_SUCCESS; } - bgp_show_nexthop(vty, bgp, dest, bnc, true); + bgp_show_nexthop(vty, bgp, bnc, true); } else bgp_show_nexthops(vty, bgp, import_table); @@ -966,12 +991,10 @@ void bgp_scan_init(struct bgp *bgp) afi_t afi; for (afi = AFI_IP; afi < AFI_MAX; afi++) { - bgp->nexthop_cache_table[afi] = - bgp_table_init(bgp, afi, SAFI_UNICAST); + bgp_nexthop_cache_init(&bgp->nexthop_cache_table[afi]); + bgp_nexthop_cache_init(&bgp->import_check_table[afi]); bgp->connected_table[afi] = bgp_table_init(bgp, afi, SAFI_UNICAST); - bgp->import_check_table[afi] = - bgp_table_init(bgp, afi, SAFI_UNICAST); } } @@ -988,16 +1011,12 @@ void bgp_scan_finish(struct bgp *bgp) for (afi = AFI_IP; afi < AFI_MAX; afi++) { /* Only the current one needs to be reset. */ - bgp_nexthop_cache_reset(bgp->nexthop_cache_table[afi]); - bgp_table_unlock(bgp->nexthop_cache_table[afi]); - bgp->nexthop_cache_table[afi] = NULL; + bgp_nexthop_cache_reset(&bgp->nexthop_cache_table[afi]); + bgp_nexthop_cache_reset(&bgp->import_check_table[afi]); bgp->connected_table[afi]->route_table->cleanup = bgp_connected_cleanup; bgp_table_unlock(bgp->connected_table[afi]); bgp->connected_table[afi] = NULL; - - bgp_table_unlock(bgp->import_check_table[afi]); - bgp->import_check_table[afi] = NULL; } } diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index 416ab2a739..c4b913faf4 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -24,6 +24,7 @@ #include "if.h" #include "queue.h" #include "prefix.h" +#include "bgp_table.h" #define NEXTHOP_FAMILY(nexthop_len) \ (((nexthop_len) == 4 || (nexthop_len) == 12 \ @@ -36,8 +37,13 @@ #define BGP_MP_NEXTHOP_FAMILY NEXTHOP_FAMILY +PREDECL_RBTREE_UNIQ(bgp_nexthop_cache); + /* BGP nexthop cache value structure. */ struct bgp_nexthop_cache { + /* RB-tree entry. */ + struct bgp_nexthop_cache_item entry; + /* IGP route's metric. */ uint32_t metric; @@ -61,13 +67,22 @@ struct bgp_nexthop_cache { #define BGP_NEXTHOP_METRIC_CHANGED (1 << 1) #define BGP_NEXTHOP_CONNECTED_CHANGED (1 << 2) - struct bgp_dest *dest; + /* Back pointer to the cache tree this entry belongs to. */ + struct bgp_nexthop_cache_head *tree; + + uint32_t srte_color; + struct prefix prefix; void *nht_info; /* In BGP, peer session */ LIST_HEAD(path_list, bgp_path_info) paths; unsigned int path_count; struct bgp *bgp; }; +extern int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, + const struct bgp_nexthop_cache *b); +DECLARE_RBTREE_UNIQ(bgp_nexthop_cache, struct bgp_nexthop_cache, entry, + bgp_nexthop_cache_compare); + /* Own tunnel-ip address structure */ struct tip_addr { struct in_addr addr; @@ -79,6 +94,12 @@ struct bgp_addrv6 { struct list *ifp_name_list; }; +/* Forward declaration(s). */ +struct peer; +struct update_subgroup; +struct bgp_dest; +struct attr; + extern void bgp_connected_add(struct bgp *bgp, struct connected *c); extern void bgp_connected_delete(struct bgp *bgp, struct connected *c); extern bool bgp_subgrp_multiaccess_check_v4(struct in_addr nexthop, @@ -94,10 +115,16 @@ extern int bgp_config_write_scan_time(struct vty *); extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type, uint8_t sub_type, struct attr *attr, struct bgp_dest *dest); -extern struct bgp_nexthop_cache *bnc_new(void); +extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix, + uint32_t srte_color); +extern bool bnc_existing_for_prefix(struct bgp_nexthop_cache *bnc); extern void bnc_free(struct bgp_nexthop_cache *bnc); +extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, + struct prefix *prefix, + uint32_t srte_color); extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); -extern char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); +extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); extern void bgp_scan_init(struct bgp *bgp); extern void bgp_scan_finish(struct bgp *bgp); extern void bgp_scan_vty_init(void); diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index a780fb7347..9573d118e5 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -72,15 +72,14 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc) if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) { if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%s)", + zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%u)(%s)", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty); + bnc->srte_color, bnc->bgp->name_pretty); } - unregister_zebra_rnh(bnc, - CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); - bgp_dest_set_bgp_nexthop_info(bnc->dest, NULL); - bgp_dest_unlock_node(bnc->dest); - bnc->dest = NULL; + /* only unregister if this is the last nh for this prefix*/ + if (!bnc_existing_for_prefix(bnc)) + unregister_zebra_rnh( + bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); bnc_free(bnc); } } @@ -100,16 +99,13 @@ void bgp_unlink_nexthop(struct bgp_path_info *path) void bgp_unlink_nexthop_by_peer(struct peer *peer) { struct prefix p; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; afi_t afi = family2afi(peer->su.sa.sa_family); if (!sockunion2hostprefix(&peer->su, &p)) return; - dest = bgp_node_get(peer->bgp->nexthop_cache_table[afi], &p); - - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0); if (!bnc) return; @@ -127,11 +123,11 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, afi_t afi, struct bgp_path_info *pi, struct peer *peer, int connected) { - struct bgp_dest *dest; + struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache *bnc; struct prefix p; + uint32_t srte_color = 0; int is_bgp_static_route = 0; - const struct prefix *bnc_p; if (pi) { is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP) @@ -155,6 +151,8 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, * addr */ if (make_prefix(afi, pi, &p) < 0) return 1; + + srte_color = pi->attr->srte_color; } else if (peer) { if (!sockunion2hostprefix(&peer->su, &p)) { if (BGP_DEBUG(nht, NHT)) { @@ -168,29 +166,24 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, return 0; if (is_bgp_static_route) - dest = bgp_node_get(bgp_nexthop->import_check_table[afi], &p); + tree = &bgp_nexthop->import_check_table[afi]; else - dest = bgp_node_get(bgp_nexthop->nexthop_cache_table[afi], &p); + tree = &bgp_nexthop->nexthop_cache_table[afi]; - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(tree, &p, srte_color); if (!bnc) { - bnc = bnc_new(); - bgp_dest_set_bgp_nexthop_info(dest, bnc); - bnc->dest = dest; + bnc = bnc_new(tree, &p, srte_color); bnc->bgp = bgp_nexthop; - bgp_dest_lock_node(dest); if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("Allocated bnc %s(%s) peer %p", + zlog_debug("Allocated bnc %s(%u)(%s) peer %p", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty, peer); + bnc->srte_color, bnc->bgp->name_pretty, + peer); } } - bnc_p = bgp_dest_get_prefix(bnc->dest); - - bgp_dest_unlock_node(dest); if (is_bgp_static_route) { SET_FLAG(bnc->flags, BGP_STATIC_ROUTE); @@ -236,7 +229,7 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID); } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED) - && !is_default_host_route(bnc_p)) + && !is_default_host_route(&bnc->prefix)) register_zebra_rnh(bnc, is_bgp_static_route); if (pi && pi->nexthop != bnc) { @@ -269,7 +262,6 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) { - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct prefix p; @@ -279,9 +271,9 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) if (!sockunion2hostprefix(&peer->su, &p)) return; - dest = bgp_node_lookup( - peer->bgp->nexthop_cache_table[family2afi(p.family)], &p); - if (!dest) { + bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)], + &p, 0); + if (!bnc) { if (BGP_DEBUG(nht, NHT)) zlog_debug( "Cannot find connected NHT node for peer %s(%s)", @@ -289,17 +281,6 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) return; } - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) { - if (BGP_DEBUG(nht, NHT)) - zlog_debug( - "Cannot find connected NHT node for peer %s(%s) on route_node as expected", - peer->host, peer->bgp->name_pretty); - bgp_dest_unlock_node(dest); - return; - } - bgp_dest_unlock_node(dest); - if (bnc->nht_info != peer) { if (BGP_DEBUG(nht, NHT)) zlog_debug( @@ -317,95 +298,40 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) "Freeing connected NHT node %p for peer %s(%s)", bnc, peer->host, bnc->bgp->name_pretty); unregister_zebra_rnh(bnc, 0); - bgp_dest_set_bgp_nexthop_info(bnc->dest, NULL); - bgp_dest_unlock_node(bnc->dest); bnc_free(bnc); } } -void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc, + struct zapi_route *nhr) { - struct bgp_dest *dest = NULL; - struct bgp_nexthop_cache *bnc; struct nexthop *nexthop; struct nexthop *oldnh; struct nexthop *nhlist_head = NULL; struct nexthop *nhlist_tail = NULL; int i; - struct bgp *bgp; - struct zapi_route nhr; - bgp = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp) { - flog_err( - EC_BGP_NH_UPD, - "parse nexthop update: instance not found for vrf_id %u", - vrf_id); - return; - } - - if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { - if (BGP_DEBUG(nht, NHT)) - zlog_debug("%s[%s]: Failure to decode nexthop update", - __func__, bgp->name_pretty); - return; - } - - if (command == ZEBRA_NEXTHOP_UPDATE) - dest = bgp_node_lookup( - bgp->nexthop_cache_table[family2afi(nhr.prefix.family)], - &nhr.prefix); - else if (command == ZEBRA_IMPORT_CHECK_UPDATE) - dest = bgp_node_lookup( - bgp->import_check_table[family2afi(nhr.prefix.family)], - &nhr.prefix); - - if (!dest) { - if (BGP_DEBUG(nht, NHT)) { - char buf[PREFIX2STR_BUFFER]; - prefix2str(&nhr.prefix, buf, sizeof(buf)); - zlog_debug("parse nexthop update(%s(%s)): rn not found", - buf, bgp->name_pretty); - } - return; - } - - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) { - if (BGP_DEBUG(nht, NHT)) { - char buf[PREFIX2STR_BUFFER]; - - prefix2str(&nhr.prefix, buf, sizeof(buf)); - zlog_debug( - "parse nexthop update(%s(%s)): bnc node info not found", - buf, bgp->name_pretty); - } - bgp_dest_unlock_node(dest); - return; - } - - bgp_dest_unlock_node(dest); bnc->last_update = bgp_clock(); bnc->change_flags = 0; /* debug print the input */ if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - prefix2str(&nhr.prefix, buf, sizeof(buf)); + prefix2str(&nhr->prefix, buf, sizeof(buf)); zlog_debug( - "%s(%u): Rcvd NH update %s - metric %d/%d #nhops %d/%d flags 0x%x", - bnc->bgp->name_pretty, vrf_id, buf, nhr.metric, - bnc->metric, nhr.nexthop_num, bnc->nexthop_num, - bnc->flags); + "%s(%u): Rcvd NH update %s(%u) - metric %d/%d #nhops %d/%d flags 0x%x", + bnc->bgp->name_pretty, bnc->bgp->vrf_id, buf, + bnc->srte_color, nhr->metric, bnc->metric, + nhr->nexthop_num, bnc->nexthop_num, bnc->flags); } - if (nhr.metric != bnc->metric) + if (nhr->metric != bnc->metric) bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED; - if (nhr.nexthop_num != bnc->nexthop_num) + if (nhr->nexthop_num != bnc->nexthop_num) bnc->change_flags |= BGP_NEXTHOP_CHANGED; - if (nhr.nexthop_num) { + if (nhr->nexthop_num) { struct peer *peer = bnc->nht_info; /* notify bgp fsm if nbr ip goes from invalid->valid */ @@ -413,15 +339,15 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); bnc->flags |= BGP_NEXTHOP_VALID; - bnc->metric = nhr.metric; - bnc->nexthop_num = nhr.nexthop_num; + bnc->metric = nhr->metric; + bnc->nexthop_num = nhr->nexthop_num; bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */ - for (i = 0; i < nhr.nexthop_num; i++) { + for (i = 0; i < nhr->nexthop_num; i++) { int num_labels = 0; - nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]); + nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); /* * Turn on RA for the v6 nexthops @@ -431,7 +357,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) if (peer && !peer->ifp && CHECK_FLAG(peer->flags, PEER_FLAG_CAPABILITY_ENHE) - && nhr.prefix.family == AF_INET6 + && nhr->prefix.family == AF_INET6 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { struct interface *ifp; @@ -485,7 +411,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) bnc->nexthop = nhlist_head; } else { bnc->flags &= ~BGP_NEXTHOP_VALID; - bnc->nexthop_num = nhr.nexthop_num; + bnc->nexthop_num = nhr->nexthop_num; /* notify bgp fsm if nbr ip goes from valid->invalid */ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); @@ -497,26 +423,88 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) evaluate_paths(bnc); } +void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +{ + struct bgp_nexthop_cache_head *tree = NULL; + struct bgp_nexthop_cache *bnc; + struct bgp *bgp; + struct zapi_route nhr; + afi_t afi; + + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) { + flog_err( + EC_BGP_NH_UPD, + "parse nexthop update: instance not found for vrf_id %u", + vrf_id); + return; + } + + if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { + if (BGP_DEBUG(nht, NHT)) + zlog_debug("%s[%s]: Failure to decode nexthop update", + __PRETTY_FUNCTION__, bgp->name_pretty); + return; + } + + afi = family2afi(nhr.prefix.family); + if (command == ZEBRA_NEXTHOP_UPDATE) + tree = &bgp->nexthop_cache_table[afi]; + else if (command == ZEBRA_IMPORT_CHECK_UPDATE) + tree = &bgp->import_check_table[afi]; + + bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color); + if (!bnc) { + if (BGP_DEBUG(nht, NHT)) { + char buf[PREFIX2STR_BUFFER]; + + prefix2str(&nhr.prefix, buf, sizeof(buf)); + zlog_debug( + "parse nexthop update(%s(%u)(%s)): bnc info not found", + buf, nhr.srte_color, bgp->name_pretty); + } + return; + } + + bgp_process_nexthop_update(bnc, &nhr); + + /* + * HACK: if any BGP route is dependant on an SR-policy that doesn't + * exist, zebra will never send NH updates relative to that policy. In + * that case, whenever we receive an update about a colorless NH, update + * the corresponding colorful NHs that share the same endpoint but that + * are inactive. This ugly hack should work around the problem at the + * cost of a performance pernalty. Long term, what should be done is to + * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is), + * which should provide a better infrastructure to solve this issue in + * a more efficient and elegant way. + */ + if (nhr.srte_color == 0) { + struct bgp_nexthop_cache *bnc_iter; + + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc_iter) { + if (!prefix_same(&bnc->prefix, &bnc_iter->prefix) + || bnc_iter->srte_color == 0 + || CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID)) + continue; + + bgp_process_nexthop_update(bnc_iter, &nhr); + } + } +} + /* * Cleanup nexthop registration and status information for BGP nexthops * pertaining to this VRF. This is invoked upon VRF deletion. */ void bgp_cleanup_nexthops(struct bgp *bgp) { - afi_t afi; - struct bgp_dest *dest; - struct bgp_nexthop_cache *bnc; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!bgp->nexthop_cache_table[afi]) - continue; - - for (dest = bgp_table_top(bgp->nexthop_cache_table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - if (!bnc) - continue; + for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) { + struct bgp_nexthop_cache *bnc; + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc) { /* Clear relevant flags. */ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID); UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); @@ -609,7 +597,6 @@ static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p) */ static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command) { - const struct prefix *p; bool exact_match = false; int ret; @@ -631,23 +618,18 @@ static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command) "%s: We have not connected yet, cannot send nexthops", __func__); } - p = bgp_dest_get_prefix(bnc->dest); if ((command == ZEBRA_NEXTHOP_REGISTER || command == ZEBRA_IMPORT_ROUTE_REGISTER) && (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED) || CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))) exact_match = true; - if (BGP_DEBUG(zebra, ZEBRA)) { - char buf[PREFIX2STR_BUFFER]; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__, + zserv_command_string(command), &bnc->prefix, + bnc->bgp->name_pretty); - prefix2str(p, buf, PREFIX2STR_BUFFER); - zlog_debug("%s: sending cmd %s for %s (vrf %s)", - __func__, zserv_command_string(command), buf, - bnc->bgp->name_pretty); - } - - ret = zclient_send_rnh(zclient, command, p, exact_match, + ret = zclient_send_rnh(zclient, command, &bnc->prefix, exact_match, bnc->bgp->vrf_id); /* TBD: handle the failure */ if (ret < 0) @@ -725,8 +707,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) char buf[PREFIX2STR_BUFFER]; bnc_str(bnc, buf, PREFIX2STR_BUFFER); zlog_debug( - "NH update for %s %s flags 0x%x chgflags 0x%x - evaluate paths", - buf, bnc->bgp->name_pretty, bnc->flags, + "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths", + buf, bnc->srte_color, bnc->bgp->name_pretty, bnc->flags, bnc->change_flags); } @@ -814,7 +796,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) path->extra->igpmetric = 0; if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) - || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)) + || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) + || path->attr->srte_color != 0) SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED); path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID); @@ -901,21 +884,11 @@ void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc, */ void bgp_nht_register_nexthops(struct bgp *bgp) { - struct bgp_dest *dest; - struct bgp_nexthop_cache *bnc; - afi_t afi; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) { - if (!bgp->nexthop_cache_table[afi]) - continue; - - for (dest = bgp_table_top(bgp->nexthop_cache_table[afi]); dest; - dest = bgp_route_next(dest)) { - bnc = bgp_dest_get_bgp_nexthop_info(dest); - - if (!bnc) - continue; + for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) { + struct bgp_nexthop_cache *bnc; + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc) { register_zebra_rnh(bnc, 0); } } @@ -924,7 +897,6 @@ void bgp_nht_register_nexthops(struct bgp *bgp) void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) { struct bgp *bgp; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct nexthop *nhop; struct interface *ifp; @@ -934,10 +906,6 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) return; bgp = peer->bgp; - - if (!bgp->nexthop_cache_table[AFI_IP6]) - return; - if (!sockunion2hostprefix(&peer->su, &p)) { zlog_warn("%s: Unable to convert sockunion to prefix for %s", __func__, peer->host); @@ -946,11 +914,8 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - dest = bgp_node_lookup(bgp->nexthop_cache_table[AFI_IP6], &p); - if (!dest) - return; - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return; @@ -973,7 +938,6 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) { struct bgp *bgp; - struct bgp_dest *dest; struct bgp_nexthop_cache *bnc; struct nexthop *nhop; struct interface *ifp; @@ -984,9 +948,6 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) bgp = peer->bgp; - if (!bgp->nexthop_cache_table[AFI_IP6]) - return; - if (!sockunion2hostprefix(&peer->su, &p)) { zlog_warn("%s: Unable to convert sockunion to prefix for %s", __func__, peer->host); @@ -996,11 +957,7 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - dest = bgp_node_lookup(bgp->nexthop_cache_table[AFI_IP6], &p); - if (!dest) - return; - - bnc = bgp_dest_get_bgp_nexthop_info(dest); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return; diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index 6d81cfaab4..09cc775d47 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -1668,6 +1668,45 @@ static const struct route_map_rule_cmd route_match_tag_cmd = { route_map_rule_tag_free, }; +static enum route_map_cmd_result_t +route_set_srte_color(void *rule, const struct prefix *prefix, + route_map_object_t type, void *object) +{ + uint32_t *srte_color = rule; + struct bgp_path_info *path; + + if (type != RMAP_BGP) + return RMAP_OKAY; + + path = object; + + path->attr->srte_color = *srte_color; + path->attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR); + + return RMAP_OKAY; +} + +/* Route map `sr-te color' compile function */ +static void *route_set_srte_color_compile(const char *arg) +{ + uint32_t *color; + + color = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(uint32_t)); + *color = atoi(arg); + + return color; +} + +/* Free route map's compiled `sr-te color' value. */ +static void route_set_srte_color_free(void *rule) +{ + XFREE(MTYPE_ROUTE_MAP_COMPILED, rule); +} + +/* Route map commands for sr-te color set. */ +struct route_map_rule_cmd route_set_srte_color_cmd = { + "sr-te color", route_set_srte_color, route_set_srte_color_compile, + route_set_srte_color_free}; /* Set nexthop to object. ojbect must be pointer to struct attr. */ struct rmap_ip_nexthop_set { @@ -5686,6 +5725,9 @@ void bgp_route_map_init(void) route_map_match_tag_hook(generic_match_add); route_map_no_match_tag_hook(generic_match_delete); + route_map_set_srte_color_hook(generic_set_add); + route_map_no_set_srte_color_hook(generic_set_delete); + route_map_set_ip_nexthop_hook(generic_set_add); route_map_no_set_ip_nexthop_hook(generic_set_delete); @@ -5728,6 +5770,7 @@ void bgp_route_map_init(void) route_map_install_match(&route_match_vrl_source_vrf_cmd); route_map_install_set(&route_set_table_id_cmd); + route_map_install_set(&route_set_srte_color_cmd); route_map_install_set(&route_set_ip_nexthop_cmd); route_map_install_set(&route_set_local_pref_cmd); route_map_install_set(&route_set_weight_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index dba114f86a..15bd6d33b8 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -1265,6 +1265,9 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, api.tableid = info->attr->rmap_table_id; } + if (CHECK_FLAG(info->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR))) + SET_FLAG(api.message, ZAPI_MESSAGE_SRTE); + /* Metric is currently based on the best-path only */ metric = info->attr->med; @@ -1303,6 +1306,11 @@ void bgp_zebra_announce(struct bgp_dest *dest, const struct prefix *p, continue; } api_nh = &api.nexthops[valid_nh_count]; + + if (CHECK_FLAG(info->attr->flag, + ATTR_FLAG_BIT(BGP_ATTR_SRTE_COLOR))) + api_nh->srte_color = info->attr->srte_color; + if (nh_family == AF_INET) { if (bgp_debug_zebra(&api.prefix)) { if (mpinfo->extra) { diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6fc3f08836..6145d9305f 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -42,6 +42,7 @@ #include "vxlan.h" #include "bgp_labelpool.h" #include "bgp_addpath_types.h" +#include "bgp_nexthop.h" #define BGP_MAX_HOSTNAME 64 /* Linux max, is larger than most other sys */ #define BGP_PEER_MAX_HASH_SIZE 16384 @@ -482,11 +483,11 @@ struct bgp { /* BGP per AF peer count */ uint32_t af_peer_count[AFI_MAX][SAFI_MAX]; - /* Route table for next-hop lookup cache. */ - struct bgp_table *nexthop_cache_table[AFI_MAX]; + /* Tree for next-hop lookup cache. */ + struct bgp_nexthop_cache_head nexthop_cache_table[AFI_MAX]; - /* Route table for import-check */ - struct bgp_table *import_check_table[AFI_MAX]; + /* Tree for import-check */ + struct bgp_nexthop_cache_head import_check_table[AFI_MAX]; struct bgp_table *connected_table[AFI_MAX]; @@ -1537,6 +1538,7 @@ struct bgp_nlri { #define BGP_ATTR_IPV6_EXT_COMMUNITIES 25 #define BGP_ATTR_LARGE_COMMUNITIES 32 #define BGP_ATTR_PREFIX_SID 40 +#define BGP_ATTR_SRTE_COLOR 51 #ifdef ENABLE_BGP_VNC_ATTR #define BGP_ATTR_VNC 255 #endif diff --git a/doc/user/routemap.rst b/doc/user/routemap.rst index f557cbe022..fa5fc248a8 100644 --- a/doc/user/routemap.rst +++ b/doc/user/routemap.rst @@ -329,6 +329,12 @@ Route Map Set Command Set the BGP table to a given table identifier +.. index:: set sr-te color (1-4294967295) +.. clicmd:: set sr-te color (1-4294967295) + + Set the color of a SR-TE Policy to be applied to a learned route. The SR-TE + Policy is uniquely determined by the color and the BGP nexthop. + .. _route-map-call-command: Route Map Call Command