diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 6c68fe0b44..e9391acaf1 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -1336,6 +1336,248 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn, return new; } +void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi) +{ + struct bgp_label_per_nexthop_cache *blnc; + + if (!pi) + return; + + blnc = pi->label_nexthop_cache; + + if (!blnc) + return; + + LIST_REMOVE(pi, label_nh_thread); + pi->label_nexthop_cache->path_count--; + pi->label_nexthop_cache = NULL; + + if (LIST_EMPTY(&(blnc->paths))) + bgp_label_per_nexthop_free(blnc); +} + +/* Called upon reception of a ZAPI Message from zebra, about + * a new available label. + */ +static int bgp_mplsvpn_get_label_per_nexthop_cb(mpls_label_t label, + void *context, bool allocated) +{ + struct bgp_label_per_nexthop_cache *blnc = context; + mpls_label_t old_label; + int debug = BGP_DEBUG(vpn, VPN_LEAK_LABEL); + struct bgp_path_info *pi; + struct bgp_table *table; + + old_label = blnc->label; + + if (debug) + zlog_debug("%s: label=%u, allocated=%d, nexthop=%pFX", __func__, + label, allocated, &blnc->nexthop); + if (allocated) + /* update the entry with the new label */ + blnc->label = label; + else + /* + * previously-allocated label is now invalid + * eg: zebra deallocated the labels and notifies it + */ + blnc->label = MPLS_INVALID_LABEL; + + if (old_label == blnc->label) + return 0; /* no change */ + + /* update paths */ + if (blnc->label != MPLS_INVALID_LABEL) + bgp_zebra_send_nexthop_label(ZEBRA_MPLS_LABELS_ADD, blnc->label, + ZEBRA_LSP_BGP, &blnc->nexthop); + + LIST_FOREACH (pi, &(blnc->paths), label_nh_thread) { + if (!pi->net) + continue; + table = bgp_dest_table(pi->net); + if (!table) + continue; + vpn_leak_from_vrf_update(blnc->to_bgp, table->bgp, pi); + } + + return 0; +} + +/* Get a per label nexthop value: + * - Find and return a per label nexthop from the cache + * - else allocate a new per label nexthop cache entry and request a + * label to zebra. Return MPLS_INVALID_LABEL + */ +static mpls_label_t _vpn_leak_from_vrf_get_per_nexthop_label( + struct bgp_path_info *pi, struct bgp *to_bgp, struct bgp *from_bgp, + afi_t afi, safi_t safi) +{ + struct bgp_nexthop_cache *bnc = pi->nexthop; + struct bgp_label_per_nexthop_cache *blnc; + struct bgp_label_per_nexthop_cache_head *tree; + struct prefix *nh_pfx = NULL; + struct prefix nh_gate = {0}; + + /* extract the nexthop from the BNC nexthop cache */ + switch (bnc->nexthop->type) { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + /* the nexthop is recursive */ + nh_gate.family = AF_INET; + nh_gate.prefixlen = IPV4_MAX_BITLEN; + IPV4_ADDR_COPY(&nh_gate.u.prefix4, &bnc->nexthop->gate.ipv4); + nh_pfx = &nh_gate; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + /* the nexthop is recursive */ + nh_gate.family = AF_INET6; + nh_gate.prefixlen = IPV6_MAX_BITLEN; + IPV6_ADDR_COPY(&nh_gate.u.prefix6, &bnc->nexthop->gate.ipv6); + nh_pfx = &nh_gate; + break; + case NEXTHOP_TYPE_IFINDEX: + /* the nexthop is direcly connected */ + nh_pfx = &bnc->prefix; + break; + case NEXTHOP_TYPE_BLACKHOLE: + assert(!"Blackhole nexthop. Already checked by the caller."); + } + + /* find or allocate a nexthop label cache entry */ + tree = &from_bgp->mpls_labels_per_nexthop[family2afi(nh_pfx->family)]; + blnc = bgp_label_per_nexthop_find(tree, nh_pfx); + if (!blnc) { + blnc = bgp_label_per_nexthop_new(tree, nh_pfx); + blnc->to_bgp = to_bgp; + /* request a label to zebra for this nexthop + * the response from zebra will trigger the callback + */ + bgp_lp_get(LP_TYPE_NEXTHOP, blnc, + bgp_mplsvpn_get_label_per_nexthop_cb); + } + + if (pi->label_nexthop_cache == blnc) + /* no change */ + return blnc->label; + + /* Unlink from any existing nexthop cache. Free the entry if unused. + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + if (blnc) { + /* updates NHT pi list reference */ + LIST_INSERT_HEAD(&(blnc->paths), pi, label_nh_thread); + pi->label_nexthop_cache = blnc; + pi->label_nexthop_cache->path_count++; + } + return blnc->label; +} + +/* Filter out all the cases where a per nexthop label is not possible: + * - return an invalid label when the nexthop is invalid + * - return the per VRF label when the per nexthop label is not supported + * Otherwise, find or request a per label nexthop. + */ +static mpls_label_t vpn_leak_from_vrf_get_per_nexthop_label( + afi_t afi, safi_t safi, struct bgp_path_info *pi, struct bgp *from_bgp, + struct bgp *to_bgp) +{ + struct bgp_path_info *bpi_ultimate = bgp_get_imported_bpi_ultimate(pi); + struct bgp *bgp_nexthop = NULL; + bool nh_valid; + afi_t nh_afi; + bool is_bgp_static_route; + + is_bgp_static_route = bpi_ultimate->sub_type == BGP_ROUTE_STATIC && + bpi_ultimate->type == ZEBRA_ROUTE_BGP; + + if (is_bgp_static_route == false && afi == AFI_IP && + CHECK_FLAG(pi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) && + (pi->attr->nexthop.s_addr == INADDR_ANY || + !ipv4_unicast_valid(&pi->attr->nexthop))) { + /* IPv4 nexthop in standard BGP encoding format. + * Format of address is not valid (not any, not unicast). + * Fallback to the per VRF label. + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return from_bgp->vpn_policy[afi].tovpn_label; + } + + if (is_bgp_static_route == false && afi == AFI_IP && + pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4 && + (pi->attr->mp_nexthop_global_in.s_addr == INADDR_ANY || + !ipv4_unicast_valid(&pi->attr->mp_nexthop_global_in))) { + /* IPv4 nexthop is in MP-BGP encoding format. + * Format of address is not valid (not any, not unicast). + * Fallback to the per VRF label. + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return from_bgp->vpn_policy[afi].tovpn_label; + } + + if (is_bgp_static_route == false && afi == AFI_IP6 && + (pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL || + pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) && + (IN6_IS_ADDR_UNSPECIFIED(&pi->attr->mp_nexthop_global) || + IN6_IS_ADDR_LOOPBACK(&pi->attr->mp_nexthop_global) || + IN6_IS_ADDR_MULTICAST(&pi->attr->mp_nexthop_global))) { + /* IPv6 nexthop is in MP-BGP encoding format. + * Format of address is not valid + * Fallback to the per VRF label. + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return from_bgp->vpn_policy[afi].tovpn_label; + } + + /* Check the next-hop reachability. + * Get the bgp instance where the bgp_path_info originates. + */ + if (pi->extra && pi->extra->bgp_orig) + bgp_nexthop = pi->extra->bgp_orig; + else + bgp_nexthop = from_bgp; + + nh_afi = BGP_ATTR_NH_AFI(afi, pi->attr); + nh_valid = bgp_find_or_add_nexthop(from_bgp, bgp_nexthop, nh_afi, safi, + pi, NULL, 0, NULL); + + if (!nh_valid && is_bgp_static_route && + !CHECK_FLAG(from_bgp->flags, BGP_FLAG_IMPORT_CHECK)) { + /* "network" prefixes not routable, but since 'no bgp network + * import-check' is configured, they are always valid in the BGP + * table. Fallback to the per-vrf label + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return from_bgp->vpn_policy[afi].tovpn_label; + } + + if (!nh_valid || !pi->nexthop || pi->nexthop->nexthop_num == 0 || + !pi->nexthop->nexthop) { + /* invalid next-hop: + * do not send the per-vrf label + * otherwise, when the next-hop becomes valid, + * we will have 2 BGP updates: + * - one with the per-vrf label + * - the second with the per-nexthop label + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return MPLS_INVALID_LABEL; + } + + if (pi->nexthop->nexthop_num > 1 || + pi->nexthop->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) { + /* Blackhole or ECMP routes + * is not compatible with per-nexthop label. + * Fallback to per-vrf label. + */ + bgp_mplsvpn_path_nh_label_unlink(pi); + return from_bgp->vpn_policy[afi].tovpn_label; + } + + return _vpn_leak_from_vrf_get_per_nexthop_label(pi, to_bgp, from_bgp, + afi, safi); +} + /* cf vnc_import_bgp_add_route_mode_nvegroup() and add_vnc_route() */ void vpn_leak_from_vrf_update(struct bgp *to_bgp, /* to */ struct bgp *from_bgp, /* from */ @@ -1528,7 +1770,28 @@ void vpn_leak_from_vrf_update(struct bgp *to_bgp, /* to */ nexthop_self_flag = 1; } - label_val = from_bgp->vpn_policy[afi].tovpn_label; + if (CHECK_FLAG(from_bgp->vpn_policy[afi].flags, + BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) + /* per nexthop label mode */ + label_val = vpn_leak_from_vrf_get_per_nexthop_label( + afi, safi, path_vrf, from_bgp, to_bgp); + else + /* per VRF label mode */ + label_val = from_bgp->vpn_policy[afi].tovpn_label; + + if (label_val == MPLS_INVALID_LABEL && + CHECK_FLAG(from_bgp->vpn_policy[afi].flags, + BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) { + /* no valid label for the moment + * when the 'bgp_mplsvpn_get_label_per_nexthop_cb' callback gets + * a valid label value, it will call the current function again. + */ + if (debug) + zlog_debug( + "%s: %s skipping: waiting for a valid per-label nexthop.", + __func__, from_bgp->name_pretty); + return; + } if (label_val == MPLS_LABEL_NONE) encode_label(MPLS_LABEL_IMPLICIT_NULL, &label); else @@ -1769,6 +2032,8 @@ void vpn_leak_from_vrf_withdraw_all(struct bgp *to_bgp, struct bgp *from_bgp, bpi, afi, safi); bgp_path_info_delete(bn, bpi); bgp_process(to_bgp, bn, afi, safi); + bgp_mplsvpn_path_nh_label_unlink( + bpi->extra->parent); } } } diff --git a/bgpd/bgp_mplsvpn.h b/bgpd/bgp_mplsvpn.h index c832b4abd4..75758edcc2 100644 --- a/bgpd/bgp_mplsvpn.h +++ b/bgpd/bgp_mplsvpn.h @@ -31,6 +31,7 @@ #define BGP_PREFIX_SID_SRV6_MAX_FUNCTION_LENGTH 20 extern void bgp_mplsvpn_init(void); +extern void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi); extern int bgp_nlri_parse_vpn(struct peer *, struct attr *, struct bgp_nlri *); extern uint32_t decode_label(mpls_label_t *); extern void encode_label(mpls_label_t, mpls_label_t *); diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 1c79d7d03b..c878512389 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -31,6 +31,7 @@ #include "bgpd/bgp_fsm.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_rd.h" +#include "bgpd/bgp_mplsvpn.h" DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String"); @@ -119,6 +120,8 @@ static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree) while (!LIST_EMPTY(&(bnc->paths))) { struct bgp_path_info *path = LIST_FIRST(&(bnc->paths)); + bgp_mplsvpn_path_nh_label_unlink(path); + path_nh_map(path, bnc, false); } diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index a294ebcc63..39aff8d500 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -31,6 +31,7 @@ #include "bgpd/bgp_flowspec_util.h" #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_rd.h" +#include "bgpd/bgp_mplsvpn.h" extern struct zclient *zclient; @@ -149,6 +150,8 @@ void bgp_unlink_nexthop(struct bgp_path_info *path) { struct bgp_nexthop_cache *bnc = path->nexthop; + bgp_mplsvpn_path_nh_label_unlink(path); + if (!bnc) return; @@ -1230,7 +1233,16 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc) SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED); path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID); - if (path_valid != bnc_is_valid_nexthop) { + if (path->type == ZEBRA_ROUTE_BGP && + path->sub_type == BGP_ROUTE_STATIC && + !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK)) + /* static routes with 'no bgp network import-check' are + * always valid. if nht is called with static routes, + * the vpn exportation needs to be triggered + */ + vpn_leak_from_vrf_update(bgp_get_default(), bgp_path, + path); + else if (path_valid != bnc_is_valid_nexthop) { if (path_valid) { /* No longer valid, clear flag; also for EVPN * routes, unimport from VRFs if needed. @@ -1243,6 +1255,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc) bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest))) bgp_evpn_unimport_route(bgp_path, afi, safi, bgp_dest_get_prefix(dest), path); + if (safi == SAFI_UNICAST && + (bgp_path->inst_type != + BGP_INSTANCE_TYPE_VIEW)) + vpn_leak_from_vrf_withdraw( + bgp_get_default(), bgp_path, + path); } else { /* Path becomes valid, set flag; also for EVPN * routes, import from VRFs if needed. @@ -1255,6 +1273,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc) bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest))) bgp_evpn_import_route(bgp_path, afi, safi, bgp_dest_get_prefix(dest), path); + if (safi == SAFI_UNICAST && + (bgp_path->inst_type != + BGP_INSTANCE_TYPE_VIEW)) + vpn_leak_from_vrf_update( + bgp_get_default(), bgp_path, + path); } } diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index a64144b625..fbdd5fae7d 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -319,6 +319,12 @@ struct bgp_path_info { /* Addpath identifiers */ uint32_t addpath_rx_id; struct bgp_addpath_info_data tx_addpath; + + /* For nexthop per label linked list */ + LIST_ENTRY(bgp_path_info) label_nh_thread; + + /* Back pointer to the bgp label per nexthop structure */ + struct bgp_label_per_nexthop_cache *label_nexthop_cache; }; /* Structure used in BGP path selection */ diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 96b1f3e00f..f0724f4eb1 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3911,3 +3911,26 @@ int bgp_zebra_srv6_manager_release_locator_chunk(const char *name) { return srv6_manager_release_locator_chunk(zclient, name); } + +void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label, + enum lsp_types_t ltype, struct prefix *p) +{ + struct zapi_labels zl = {}; + struct zapi_nexthop *znh; + + zl.type = ltype; + zl.local_label = label; + zl.nexthop_num = 1; + znh = &zl.nexthops[0]; + if (p->family == AF_INET) + IPV4_ADDR_COPY(&znh->gate.ipv4, &p->u.prefix4); + else + IPV6_ADDR_COPY(&znh->gate.ipv6, &p->u.prefix6); + znh->type = + (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4 : NEXTHOP_TYPE_IPV6; + znh->ifindex = 0; + znh->label_num = 0; + + /* vrf_id is DEFAULT_VRF */ + zebra_send_mpls_labels(zclient, cmd, &zl); +} diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index b09be890e5..6a266e1a67 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -118,4 +118,7 @@ extern int bgp_zebra_update(struct bgp *bgp, afi_t afi, safi_t safi, extern int bgp_zebra_stale_timer_update(struct bgp *bgp); extern int bgp_zebra_srv6_manager_get_locator_chunk(const char *name); extern int bgp_zebra_srv6_manager_release_locator_chunk(const char *name); +extern void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label, + enum lsp_types_t ltype, + struct prefix *p); #endif /* _QUAGGA_BGP_ZEBRA_H */