diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 9fbd5063e5..6f36fd1394 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -50,6 +50,11 @@ DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Address Intf String"); int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, const struct bgp_nexthop_cache *b) { + if (a->srte_color < b->srte_color) + return -1; + if (a->srte_color > b->srte_color) + return 1; + return prefix_cmp(&a->prefix, &b->prefix); } @@ -64,13 +69,14 @@ void bnc_nexthop_free(struct bgp_nexthop_cache *bnc) } struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix) + struct prefix *prefix, uint32_t srte_color) { struct bgp_nexthop_cache *bnc; bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE, sizeof(struct bgp_nexthop_cache)); bnc->prefix = *prefix; + bnc->srte_color = srte_color; bnc->tree = tree; LIST_INIT(&(bnc->paths)); bgp_nexthop_cache_add(tree, bnc); @@ -86,7 +92,7 @@ void bnc_free(struct bgp_nexthop_cache *bnc) } struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix) + struct prefix *prefix, uint32_t srte_color) { struct bgp_nexthop_cache bnc = {}; @@ -94,6 +100,7 @@ struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, return NULL; bnc.prefix = *prefix; + bnc.srte_color = srte_color; return bgp_nexthop_cache_find(tree, &bnc); } @@ -799,6 +806,8 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, peer = (struct peer *)bnc->nht_info; + if (bnc->srte_color) + vty_out(vty, " SR-TE color %u -", bnc->srte_color); if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) { vty_out(vty, " %s valid [IGP metric %d], #paths %d", inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index e235fe4727..356af54002 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -70,6 +70,7 @@ struct bgp_nexthop_cache { /* Back pointer to the cache tree this entry belongs to. */ struct bgp_nexthop_cache_head *tree; + uint32_t srte_color; struct prefix prefix; void *nht_info; /* In BGP, peer session */ LIST_HEAD(path_list, bgp_path_info) paths; @@ -115,10 +116,12 @@ extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type, uint8_t sub_type, struct attr *attr, struct bgp_dest *dest); extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix); + struct prefix *prefix, + uint32_t srte_color); extern void bnc_free(struct bgp_nexthop_cache *bnc); extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, - struct prefix *prefix); + struct prefix *prefix, + uint32_t srte_color); extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); extern void bgp_scan_init(struct bgp *bgp); diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index 2365522bec..80ee5f5349 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -72,9 +72,9 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc) if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) { if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%s)", + zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%u)(%s)", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty); + bnc->srte_color, bnc->bgp->name_pretty); } unregister_zebra_rnh(bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); @@ -103,7 +103,7 @@ void bgp_unlink_nexthop_by_peer(struct peer *peer) if (!sockunion2hostprefix(&peer->su, &p)) return; - bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p); + bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0); if (!bnc) return; @@ -124,6 +124,7 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache *bnc; struct prefix p; + uint32_t srte_color = 0; int is_bgp_static_route = 0; if (pi) { @@ -148,6 +149,8 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, * addr */ if (make_prefix(afi, pi, &p) < 0) return 1; + + srte_color = pi->attr->srte_color; } else if (peer) { if (!sockunion2hostprefix(&peer->su, &p)) { if (BGP_DEBUG(nht, NHT)) { @@ -165,16 +168,17 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop, else tree = &bgp_nexthop->nexthop_cache_table[afi]; - bnc = bnc_find(tree, &p); + bnc = bnc_find(tree, &p, srte_color); if (!bnc) { - bnc = bnc_new(tree, &p); + bnc = bnc_new(tree, &p, srte_color); bnc->bgp = bgp_nexthop; if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - zlog_debug("Allocated bnc %s(%s) peer %p", + zlog_debug("Allocated bnc %s(%u)(%s) peer %p", bnc_str(bnc, buf, PREFIX2STR_BUFFER), - bnc->bgp->name_pretty, peer); + bnc->srte_color, bnc->bgp->name_pretty, + peer); } } @@ -266,7 +270,7 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) return; bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)], - &p); + &p, 0); if (!bnc) { if (BGP_DEBUG(nht, NHT)) zlog_debug( @@ -296,51 +300,14 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) } } -void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc, + struct zapi_route *nhr) { - struct bgp_nexthop_cache_head *tree = NULL; - struct bgp_nexthop_cache *bnc; struct nexthop *nexthop; struct nexthop *oldnh; struct nexthop *nhlist_head = NULL; struct nexthop *nhlist_tail = NULL; int i; - struct bgp *bgp; - struct zapi_route nhr; - - bgp = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp) { - flog_err( - EC_BGP_NH_UPD, - "parse nexthop update: instance not found for vrf_id %u", - vrf_id); - return; - } - - if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { - if (BGP_DEBUG(nht, NHT)) - zlog_debug("%s[%s]: Failure to decode nexthop update", - __func__, bgp->name_pretty); - return; - } - - if (command == ZEBRA_NEXTHOP_UPDATE) - tree = &bgp->nexthop_cache_table[family2afi(nhr.prefix.family)]; - else if (command == ZEBRA_IMPORT_CHECK_UPDATE) - tree = &bgp->import_check_table[family2afi(nhr.prefix.family)]; - - bnc = bnc_find(tree, &nhr.prefix); - if (!bnc) { - if (BGP_DEBUG(nht, NHT)) { - char buf[PREFIX2STR_BUFFER]; - - prefix2str(&nhr.prefix, buf, sizeof(buf)); - zlog_debug( - "parse nexthop update(%s(%s)): bnc info not found", - buf, bgp->name_pretty); - } - return; - } bnc->last_update = bgp_clock(); bnc->change_flags = 0; @@ -348,21 +315,21 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) /* debug print the input */ if (BGP_DEBUG(nht, NHT)) { char buf[PREFIX2STR_BUFFER]; - prefix2str(&nhr.prefix, buf, sizeof(buf)); + prefix2str(&nhr->prefix, buf, sizeof(buf)); zlog_debug( - "%s(%u): Rcvd NH update %s - metric %d/%d #nhops %d/%d flags 0x%x", - bnc->bgp->name_pretty, vrf_id, buf, nhr.metric, - bnc->metric, nhr.nexthop_num, bnc->nexthop_num, - bnc->flags); + "%s(%u): Rcvd NH update %s(%u) - metric %d/%d #nhops %d/%d flags 0x%x", + bnc->bgp->name_pretty, bnc->bgp->vrf_id, buf, + bnc->srte_color, nhr->metric, bnc->metric, + nhr->nexthop_num, bnc->nexthop_num, bnc->flags); } - if (nhr.metric != bnc->metric) + if (nhr->metric != bnc->metric) bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED; - if (nhr.nexthop_num != bnc->nexthop_num) + if (nhr->nexthop_num != bnc->nexthop_num) bnc->change_flags |= BGP_NEXTHOP_CHANGED; - if (nhr.nexthop_num) { + if (nhr->nexthop_num) { struct peer *peer = bnc->nht_info; /* notify bgp fsm if nbr ip goes from invalid->valid */ @@ -370,15 +337,15 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); bnc->flags |= BGP_NEXTHOP_VALID; - bnc->metric = nhr.metric; - bnc->nexthop_num = nhr.nexthop_num; + bnc->metric = nhr->metric; + bnc->nexthop_num = nhr->nexthop_num; bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */ - for (i = 0; i < nhr.nexthop_num; i++) { + for (i = 0; i < nhr->nexthop_num; i++) { int num_labels = 0; - nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]); + nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]); /* * Turn on RA for the v6 nexthops @@ -388,7 +355,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) if (peer && !peer->ifp && CHECK_FLAG(peer->flags, PEER_FLAG_CAPABILITY_ENHE) - && nhr.prefix.family == AF_INET6 + && nhr->prefix.family == AF_INET6 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { struct interface *ifp; @@ -442,7 +409,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) bnc->nexthop = nhlist_head; } else { bnc->flags &= ~BGP_NEXTHOP_VALID; - bnc->nexthop_num = nhr.nexthop_num; + bnc->nexthop_num = nhr->nexthop_num; /* notify bgp fsm if nbr ip goes from valid->invalid */ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); @@ -454,6 +421,77 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) evaluate_paths(bnc); } +void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) +{ + struct bgp_nexthop_cache_head *tree = NULL; + struct bgp_nexthop_cache *bnc; + struct bgp *bgp; + struct zapi_route nhr; + afi_t afi; + + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) { + flog_err( + EC_BGP_NH_UPD, + "parse nexthop update: instance not found for vrf_id %u", + vrf_id); + return; + } + + if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) { + if (BGP_DEBUG(nht, NHT)) + zlog_debug("%s[%s]: Failure to decode nexthop update", + __PRETTY_FUNCTION__, bgp->name_pretty); + return; + } + + afi = family2afi(nhr.prefix.family); + if (command == ZEBRA_NEXTHOP_UPDATE) + tree = &bgp->nexthop_cache_table[afi]; + else if (command == ZEBRA_IMPORT_CHECK_UPDATE) + tree = &bgp->import_check_table[afi]; + + bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color); + if (!bnc) { + if (BGP_DEBUG(nht, NHT)) { + char buf[PREFIX2STR_BUFFER]; + + prefix2str(&nhr.prefix, buf, sizeof(buf)); + zlog_debug( + "parse nexthop update(%s(%u)(%s)): bnc info not found", + buf, nhr.srte_color, bgp->name_pretty); + } + return; + } + + bgp_process_nexthop_update(bnc, &nhr); + + /* + * HACK: if any BGP route is dependant on an SR-policy that doesn't + * exist, zebra will never send NH updates relative to that policy. In + * that case, whenever we receive an update about a colorless NH, update + * the corresponding colorful NHs that share the same endpoint but that + * are inactive. This ugly hack should work around the problem at the + * cost of a performance pernalty. Long term, what should be done is to + * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is), + * which should provide a better infrastructure to solve this issue in + * a more efficient and elegant way. + */ + if (nhr.srte_color == 0) { + struct bgp_nexthop_cache *bnc_iter; + + frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi], + bnc_iter) { + if (!prefix_same(&bnc->prefix, &bnc_iter->prefix) + || bnc_iter->srte_color == 0 + || CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID)) + continue; + + bgp_process_nexthop_update(bnc_iter, &nhr); + } + } +} + /* * Cleanup nexthop registration and status information for BGP nexthops * pertaining to this VRF. This is invoked upon VRF deletion. @@ -667,8 +705,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) char buf[PREFIX2STR_BUFFER]; bnc_str(bnc, buf, PREFIX2STR_BUFFER); zlog_debug( - "NH update for %s %s flags 0x%x chgflags 0x%x - evaluate paths", - buf, bnc->bgp->name_pretty, bnc->flags, + "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths", + buf, bnc->srte_color, bnc->bgp->name_pretty, bnc->flags, bnc->change_flags); } @@ -756,7 +794,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc) path->extra->igpmetric = 0; if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) - || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)) + || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) + || path->attr->srte_color != 0) SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED); path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID); @@ -874,7 +913,7 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return; @@ -916,7 +955,7 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer) if (p.family != AF_INET6) return; - bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); + bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0); if (!bnc) return;