diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index 1b176f8716..a583581030 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -243,6 +243,15 @@ struct attr { */ #define ATTR_ES_PEER_ROUTER (1 << 4) + /* These two flags are only set on L3 routes installed in a + * VRF as a result of EVPN MAC-IP route + * XXX - while splitting up per-family attrs these need to be + * classified as non-EVPN + */ +#define ATTR_ES_L3_NHG_USE (1 << 5) +#define ATTR_ES_L3_NHG_ACTIVE (1 << 6) +#define ATTR_ES_L3_NHG (ATTR_ES_L3_NHG_USE | ATTR_ES_L3_NHG_ACTIVE) + /* route tag */ route_tag_t tag; diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index cebc1c4d22..2d4fea413a 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -65,11 +65,6 @@ DEFINE_QOBJ_TYPE(bgp_evpn_es); * Static function declarations */ static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn); -static void bgp_evpn_update_type2_route_entry(struct bgp *bgp, - struct bgpevpn *vpn, - struct bgp_dest *dest, - struct bgp_path_info *local_pi, - const char *caller); static struct in_addr zero_vtep_ip; /* @@ -1602,8 +1597,8 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, } } - /* MAC-IP routes in the VNI route table are linked to the - * destination ES + /* local MAC-IP routes in the VNI table are linked to + * the destination ES */ if (route_change && vpn_rt && (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)) @@ -1669,6 +1664,18 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp, evpn_zebra_reinstall_best_route(bgp, vpn, dest); } +static inline bool bgp_evpn_route_add_l3_ecomm_ok(struct bgpevpn *vpn, + const struct prefix_evpn *p, + esi_t *esi) +{ + return p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE + && (is_evpn_prefix_ipaddr_v4(p) + || !IN6_IS_ADDR_LINKLOCAL( + &p->prefix.macip_addr.ip.ipaddr_v6)) + && CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) + && bgpevpn_get_l3vni(vpn) && bgp_evpn_es_add_l3_ecomm_ok(esi); +} + /* * Create or update EVPN route (of type based on prefix) for specified VNI * and schedule for processing. @@ -1738,12 +1745,8 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * IPv4 or IPv6 global addresses and we're advertising L3VNI with * these routes. */ - if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE && - (is_evpn_prefix_ipaddr_v4(p) || - !IN6_IS_ADDR_LINKLOCAL(&p->prefix.macip_addr.ip.ipaddr_v6)) && - CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) && - bgpevpn_get_l3vni(vpn)) - add_l3_ecomm = 1; + add_l3_ecomm = bgp_evpn_route_add_l3_ecomm_ok( + vpn, p, (attr.es_flags & ATTR_ES_IS_LOCAL) ? &attr.esi : NULL); /* Set up extended community. */ build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); @@ -1930,11 +1933,10 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, return 0; } -static void bgp_evpn_update_type2_route_entry(struct bgp *bgp, - struct bgpevpn *vpn, - struct bgp_dest *dest, - struct bgp_path_info *local_pi, - const char *caller) +void bgp_evpn_update_type2_route_entry(struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_dest *dest, + struct bgp_path_info *local_pi, + const char *caller) { afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; @@ -1977,12 +1979,9 @@ static void bgp_evpn_update_type2_route_entry(struct bgp *bgp, /* Add L3 VNI RTs and RMAC for non IPv6 link-local if * using L3 VNI for type-2 routes also. */ - if ((is_evpn_prefix_ipaddr_v4(evp) || - !IN6_IS_ADDR_LINKLOCAL( - &evp->prefix.macip_addr.ip.ipaddr_v6)) && - CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS) && - bgpevpn_get_l3vni(vpn)) - add_l3_ecomm = 1; + add_l3_ecomm = bgp_evpn_route_add_l3_ecomm_ok( + vpn, evp, + (attr.es_flags & ATTR_ES_IS_LOCAL) ? &attr.esi : NULL); /* Set up extended community. */ build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); @@ -2379,6 +2378,8 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, afi_t afi = 0; safi_t safi = 0; bool new_pi = false; + bool use_l3nhg = false; + bool is_l3nhg_active = false; memset(pp, 0, sizeof(struct prefix)); ip_prefix_from_evpn_prefix(evp, pp); @@ -2414,6 +2415,13 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, else attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP); + bgp_evpn_es_vrf_use_nhg(bgp_vrf, &parent_pi->attr->esi, &use_l3nhg, + &is_l3nhg_active, NULL); + if (use_l3nhg) + attr.es_flags |= ATTR_ES_L3_NHG_USE; + if (is_l3nhg_active) + attr.es_flags |= ATTR_ES_L3_NHG_ACTIVE; + /* Check if route entry is already present. */ for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) if (pi->extra @@ -2454,6 +2462,9 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, /* as it is an importation, change nexthop */ bgp_path_info_set_flag(dest, pi, BGP_PATH_ANNC_NH_SELF); + /* Link path to evpn nexthop */ + bgp_evpn_path_nh_add(bgp_vrf, pi); + bgp_aggregate_increment(bgp_vrf, bgp_dest_get_prefix(dest), pi, afi, safi); @@ -2487,6 +2498,8 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, struct attr *attr_new; int ret; struct prefix_evpn ad_evp; + bool old_local_es = false; + bool new_local_es; /* EAD prefix in the global table doesn't include the VTEP-IP so * we need to create a different copy for the VNI @@ -2509,6 +2522,7 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, /* Create an info */ pi = bgp_create_evpn_bgp_path_info(parent_pi, dest, parent_pi->attr); + new_local_es = bgp_evpn_attr_is_local_es(pi->attr); } else { if (attrhash_cmp(pi->attr, parent_pi->attr) && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { @@ -2527,17 +2541,29 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop)) SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED); + old_local_es = bgp_evpn_attr_is_local_es(pi->attr); + new_local_es = bgp_evpn_attr_is_local_es(attr_new); + /* If ESI is different or if its type has changed we + * need to reinstall the path in zebra + */ + if ((old_local_es != new_local_es) + || memcmp(&pi->attr->esi, &attr_new->esi, + sizeof(attr_new->esi))) { + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("VNI %d path %pFX chg to %s es", + vpn->vni, &pi->net->p, + new_local_es ? "local" + : "non-local"); + bgp_path_info_set_flag(dest, pi, BGP_PATH_ATTR_CHANGED); + } + /* Unintern existing, set to new. */ bgp_attr_unintern(&pi->attr); pi->attr = attr_new; pi->uptime = bgp_clock(); } - /* MAC-IP routes in the VNI table are linked to the destination ES */ - if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) - bgp_evpn_path_es_link(pi, vpn->vni, - bgp_evpn_attr_get_esi(pi->attr)); - /* Perform route selection and update zebra, if required. */ ret = evpn_route_select_install(bgp, vpn, dest); @@ -2547,10 +2573,9 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, * from sync-path to remote-path) */ local_pi = bgp_evpn_route_get_local_path(bgp, dest); - if (local_pi && bgp_evpn_attr_is_local_es(local_pi->attr)) + if (local_pi && (old_local_es || new_local_es)) bgp_evpn_update_type2_route_entry(bgp, vpn, dest, local_pi, - __func__); - + __func__); bgp_dest_unlock_node(dest); return ret; @@ -2619,6 +2644,9 @@ static int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, /* Mark entry for deletion */ bgp_path_info_delete(dest, pi); + /* Unlink path to evpn nexthop */ + bgp_evpn_path_nh_del(bgp_vrf, pi); + /* Perform route selection and update zebra, if required. */ bgp_process(bgp_vrf, dest, afi, safi); @@ -2853,11 +2881,11 @@ static int bgp_evpn_route_rmac_self_check(struct bgp *bgp_vrf, /* don't import hosts that are locally attached */ static inline bool -bgp_evpn_skip_vrf_import_of_local_es(const struct prefix_evpn *evp, +bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, + const struct prefix_evpn *evp, struct bgp_path_info *pi, int install) { esi_t *esi; - struct in_addr nh; if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) { esi = bgp_evpn_attr_get_esi(pi->attr); @@ -2875,31 +2903,53 @@ bgp_evpn_skip_vrf_import_of_local_es(const struct prefix_evpn *evp, } return true; } - - /* Don't import routes with ES as destination if the nexthop - * has not been advertised via the EAD-ES - */ - if (pi->attr) - nh = pi->attr->nexthop; - else - nh.s_addr = INADDR_ANY; - if (install && !bgp_evpn_es_is_vtep_active(esi, nh)) { - if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) { - char esi_buf[ESI_STR_LEN]; - - zlog_debug( - "vrf %s of evpn prefix %pFX skipped, nh %pI4 inactive in es %s", - install ? "import" : "unimport", evp, - &nh, - esi_to_str(esi, esi_buf, - sizeof(esi_buf))); - } - return true; - } } return false; } +/* + * Install or uninstall a mac-ip route in the provided vrf if + * there is a rt match + */ +int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, + struct bgp_path_info *pi, + int install) +{ + int ret = 0; + const struct prefix_evpn *evp = + (const struct prefix_evpn *)bgp_dest_get_prefix(pi->net); + + /* Consider "valid" remote routes applicable for + * this VRF. + */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_NORMAL)) + return 0; + + if (is_route_matching_for_vrf(bgp_vrf, pi)) { + if (bgp_evpn_route_rmac_self_check(bgp_vrf, evp, pi)) + return 0; + + /* don't import hosts that are locally attached */ + if (install + && !bgp_evpn_skip_vrf_import_of_local_es(bgp_vrf, evp, pi, + install)) + ret = install_evpn_route_entry_in_vrf(bgp_vrf, evp, pi); + else + ret = uninstall_evpn_route_entry_in_vrf(bgp_vrf, evp, + pi); + + if (ret) + flog_err(EC_BGP_EVPN_FAIL, + "Failed to %s EVPN %pFX route in VRF %s", + install ? "install" : "uninstall", evp, + vrf_id_to_name(bgp_vrf->vrf_id)); + } + + return ret; +} + /* * Install or uninstall mac-ip routes are appropriate for this * particular VRF. @@ -2949,46 +2999,10 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, int install) for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - /* Consider "valid" remote routes applicable for - * this VRF. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - continue; - - /* don't import hosts that are locally attached - */ - if (bgp_evpn_skip_vrf_import_of_local_es( - evp, pi, install)) - continue; - - if (is_route_matching_for_vrf(bgp_vrf, pi)) { - if (bgp_evpn_route_rmac_self_check( - bgp_vrf, evp, pi)) - continue; - - if (install) - ret = install_evpn_route_entry_in_vrf( - bgp_vrf, evp, pi); - else - ret = uninstall_evpn_route_entry_in_vrf( - bgp_vrf, evp, pi); - - if (ret) { - flog_err( - EC_BGP_EVPN_FAIL, - "Failed to %s EVPN %pFX route in VRF %s", - install ? "install" - : "uninstall", - evp, - vrf_id_to_name( - bgp_vrf->vrf_id)); - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; - } - } + ret = bgp_evpn_route_entry_install_if_vrf_match( + bgp_vrf, pi, install); + if (ret) + return ret; } } } @@ -3169,14 +3183,13 @@ static int install_uninstall_route_in_vrfs(struct bgp *bgp_def, afi_t afi, || is_evpn_prefix_ipaddr_v6(evp))) return 0; - /* don't import hosts that are locally attached */ - if (bgp_evpn_skip_vrf_import_of_local_es(evp, pi, install)) - return 0; - for (ALL_LIST_ELEMENTS(vrfs, node, nnode, bgp_vrf)) { int ret; - if (install) + /* don't import hosts that are locally attached */ + if (install + && !bgp_evpn_skip_vrf_import_of_local_es(bgp_vrf, evp, pi, + install)) ret = install_evpn_route_entry_in_vrf(bgp_vrf, evp, pi); else ret = uninstall_evpn_route_entry_in_vrf(bgp_vrf, evp, @@ -3291,6 +3304,13 @@ static int bgp_evpn_install_uninstall_table(struct bgp *bgp, afi_t afi, if (sub_type != ECOMMUNITY_ROUTE_TARGET) continue; + /* non-local MAC-IP routes in the global route table are linked + * to the destination ES + */ + if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) + bgp_evpn_path_es_link(pi, 0, + bgp_evpn_attr_get_esi(pi->attr)); + /* * macip routes (type-2) are imported into VNI and VRF tables. * IMET route is imported into VNI table. @@ -3370,6 +3390,18 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, true, true); } +void bgp_evpn_import_type2_route(struct bgp_path_info *pi, int import) +{ + struct bgp *bgp_evpn; + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) + return; + + install_uninstall_evpn_route(bgp_evpn, AFI_L2VPN, SAFI_EVPN, + &pi->net->p, pi, import); +} + /* Import the pi into vrf routing tables */ void bgp_evpn_import_route_in_vrfs(struct bgp_path_info *pi, int import) { @@ -3723,7 +3755,7 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, if (attr) { STREAM_GET(&attr->esi, pkt, sizeof(esi_t)); - if (bgp_evpn_is_esi_local(&attr->esi)) + if (bgp_evpn_is_esi_local_and_non_bypass(&attr->esi)) attr->es_flags |= ATTR_ES_IS_LOCAL; else attr->es_flags &= ~ATTR_ES_IS_LOCAL; @@ -5776,11 +5808,14 @@ void bgp_evpn_init(struct bgp *bgp) /* Default BUM handling is to do head-end replication. */ bgp->vxlan_flood_ctrl = VXLAN_FLOOD_HEAD_END_REPL; + + bgp_evpn_nh_init(bgp); } void bgp_evpn_vrf_delete(struct bgp *bgp_vrf) { bgp_evpn_unmap_vrf_from_its_rts(bgp_vrf); + bgp_evpn_nh_finish(bgp_vrf); } /* diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 29d3d2c62f..83a6dd84c8 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -206,5 +206,4 @@ extern void bgp_evpn_init(struct bgp *bgp); extern int bgp_evpn_get_type5_prefixlen(const struct prefix *pfx); extern bool bgp_evpn_is_prefix_nht_supported(const struct prefix *pfx); extern void update_advertise_vrf_routes(struct bgp *bgp_vrf); - #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c index 826de21b9d..868238ebdd 100644 --- a/bgpd/bgp_evpn_mh.c +++ b/bgpd/bgp_evpn_mh.c @@ -64,17 +64,20 @@ static void bgp_evpn_es_vtep_del(struct bgp *bgp, struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es); static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es); -static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi); +static struct bgp_evpn_es_evi * +bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi); static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es); static void bgp_evpn_l3nhg_update_on_vtep_chg(struct bgp_evpn_es *es); static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi); static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller); -static void -bgp_evpn_es_path_update_on_vtep_chg(struct bgp_evpn_es_vtep *es_vtep, - bool active); +static void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info); +static void bgp_evpn_mac_update_on_es_local_chg(struct bgp_evpn_es *es, + bool is_local); esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; static int bgp_evpn_run_consistency_checks(struct thread *t); +static void bgp_evpn_path_nh_info_free(struct bgp_path_evpn_nh_info *nh_info); +static void bgp_evpn_path_nh_unlink(struct bgp_path_evpn_nh_info *nh_info); /****************************************************************************** * per-ES (Ethernet Segment) routing table @@ -1291,8 +1294,6 @@ static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, * removed. */ bgp_evpn_l3nhg_update_on_vtep_chg(es_vtep->es); - bgp_evpn_es_path_update_on_vtep_chg(es_vtep, new_active); - /* queue up the es for background consistency checks */ bgp_evpn_es_cons_checks_pend_add(es_vtep->es); } @@ -1368,59 +1369,60 @@ static void bgp_evpn_es_vtep_del(struct bgp *bgp, bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr); } -bool bgp_evpn_es_is_vtep_active(esi_t *esi, struct in_addr nh) -{ - struct bgp_evpn_es *es; - struct bgp_evpn_es_vtep *es_vtep; - struct listnode *node = NULL; - bool rc = false; - - if (!memcmp(esi, zero_esi, sizeof(*esi)) || !nh.s_addr) - return true; - - es = bgp_evpn_es_find(esi); - if (!es) - return false; - - for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { - if (es_vtep->vtep_ip.s_addr == nh.s_addr) { - if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE)) - rc = true; - break; - } - } - return rc; -} - /********************** ES MAC-IP paths ************************************* - * MAC-IP routes in the VNI routing table are linked to the destination - * ES for efficient updates on ES changes (such as VTEP add/del). + * 1. Local MAC-IP routes in the VNI routing table are linked to the + * destination ES (macip_evi_path_list) for efficient updates on ES oper + * state changes. + * 2. Non-local MAC-IP routes in the global routing table are linked to + * the detination for efficient updates on - + * a. VTEP add/del - this results in a L3NHG update. + * b. ES-VRF add/del - this may result in the host route being migrated to + * L3NHG or vice versa (flat multipath list). ****************************************************************************/ -void bgp_evpn_path_es_info_free(struct bgp_path_es_info *es_info) +static void bgp_evpn_path_es_info_free(struct bgp_path_es_info *es_info) { bgp_evpn_path_es_unlink(es_info); XFREE(MTYPE_BGP_EVPN_PATH_ES_INFO, es_info); } +void bgp_evpn_path_mh_info_free(struct bgp_path_mh_info *mh_info) +{ + if (mh_info->es_info) + bgp_evpn_path_es_info_free(mh_info->es_info); + if (mh_info->nh_info) + bgp_evpn_path_nh_info_free(mh_info->nh_info); + XFREE(MTYPE_BGP_EVPN_PATH_MH_INFO, mh_info); +} + static struct bgp_path_es_info * bgp_evpn_path_es_info_new(struct bgp_path_info *pi, vni_t vni) { struct bgp_path_info_extra *e; + struct bgp_path_mh_info *mh_info; + struct bgp_path_es_info *es_info; e = bgp_path_info_extra_get(pi); + /* If mh_info doesn't exist allocate it */ + mh_info = e->mh_info; + if (!mh_info) + e->mh_info = mh_info = XCALLOC(MTYPE_BGP_EVPN_PATH_MH_INFO, + sizeof(struct bgp_path_mh_info)); + /* If es_info doesn't exist allocate it */ - if (!e->es_info) { - e->es_info = XCALLOC(MTYPE_BGP_EVPN_PATH_ES_INFO, - sizeof(struct bgp_path_es_info)); - e->es_info->pi = pi; - e->es_info->vni = vni; + es_info = mh_info->es_info; + if (!es_info) { + mh_info->es_info = es_info = + XCALLOC(MTYPE_BGP_EVPN_PATH_ES_INFO, + sizeof(struct bgp_path_es_info)); + es_info->vni = vni; + es_info->pi = pi; } - return e->es_info; + return es_info; } -void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info) +static void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info) { struct bgp_evpn_es *es = es_info->es; struct bgp_path_info *pi; @@ -1433,7 +1435,13 @@ void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info) zlog_debug("vni %u path %pFX unlinked from es %s", es_info->vni, &pi->net->p, es->esi_str); - list_delete_node(es->macip_path_list, &es_info->es_listnode); + if (es_info->vni) + list_delete_node(es->macip_evi_path_list, + &es_info->es_listnode); + else + list_delete_node(es->macip_global_path_list, + &es_info->es_listnode); + es_info->es = NULL; /* if there are no other references against the ES it @@ -1450,9 +1458,11 @@ void bgp_evpn_path_es_link(struct bgp_path_info *pi, vni_t vni, esi_t *esi) { struct bgp_path_es_info *es_info; struct bgp_evpn_es *es; - struct bgp *bgp_evpn = bgp_get_evpn(); + struct bgp *bgp_evpn; - es_info = pi->extra ? pi->extra->es_info : NULL; + es_info = (pi->extra && pi->extra->mh_info) + ? pi->extra->mh_info->es_info + : NULL; /* if the esi is zero just unlink the path from the old es */ if (!esi || !memcmp(esi, zero_esi, sizeof(*esi))) { if (es_info) @@ -1460,6 +1470,7 @@ void bgp_evpn_path_es_link(struct bgp_path_info *pi, vni_t vni, esi_t *esi) return; } + bgp_evpn = bgp_get_evpn(); if (!bgp_evpn) return; @@ -1486,43 +1497,59 @@ void bgp_evpn_path_es_link(struct bgp_path_info *pi, vni_t vni, esi_t *esi) /* link mac-ip path to the new destination ES */ es_info->es = es; listnode_init(&es_info->es_listnode, es_info); - listnode_add(es->macip_path_list, &es_info->es_listnode); + if (es_info->vni) + listnode_add(es->macip_evi_path_list, &es_info->es_listnode); + else + listnode_add(es->macip_global_path_list, &es_info->es_listnode); } +static bool bgp_evpn_is_macip_path(struct bgp_path_info *pi) +{ + struct prefix_evpn *evp; + + /* Only MAC-IP routes need to be linked (MAC-only routes can be + * skipped) as these lists are maintained for managing + * host routes in the tenant VRF + */ + evp = (struct prefix_evpn *)&pi->net->p; + return is_evpn_prefix_ipaddr_v4(evp) || is_evpn_prefix_ipaddr_v6(evp); +} + +/* When a remote ES is added to a VRF, routes using that as + * a destination need to be migrated to a L3NHG or viceversa. + * This is done indirectly by re-attempting an install of the + * route in the associated VRFs. As a part of the VRF install use + * of l3 NHG is evaluated and this results in the + * attr.es_flag ATTR_ES_USE_L3_NHG being set or cleared. + */ static void -bgp_evpn_es_path_update_on_vtep_chg(struct bgp_evpn_es_vtep *es_vtep, - bool active) +bgp_evpn_es_path_update_on_es_vrf_chg(struct bgp_evpn_es_vrf *es_vrf, + const char *reason) { struct listnode *node; struct bgp_path_es_info *es_info; struct bgp_path_info *pi; - struct bgp_path_info *parent_pi; - struct bgp_evpn_es *es = es_vtep->es; + struct bgp_evpn_es *es = es_vrf->es; + + if (!bgp_mh_info->host_routes_use_l3nhg) + return; if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) - zlog_debug("update paths linked to es %s on vtep chg", - es->esi_str); + zlog_debug("update paths linked to es %s on es-vrf %s %s", + es->esi_str, es_vrf->bgp_vrf->name, reason); - for (ALL_LIST_ELEMENTS_RO(es->macip_path_list, node, es_info)) { + for (ALL_LIST_ELEMENTS_RO(es->macip_global_path_list, node, es_info)) { pi = es_info->pi; - if (!CHECK_FLAG(pi->flags, BGP_PATH_VALID)) - continue; - if (pi->sub_type != BGP_ROUTE_IMPORTED) - continue; - - parent_pi = pi->extra ? pi->extra->parent : NULL; - if (!parent_pi || !parent_pi->attr) - continue; - - if (es_vtep->vtep_ip.s_addr != parent_pi->attr->nexthop.s_addr) + if (!bgp_evpn_is_macip_path(pi)) continue; if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) zlog_debug( - "update path %pFX linked to es %s on vtep chg", - &parent_pi->net->p, es->esi_str); - bgp_evpn_import_route_in_vrfs(parent_pi, active ? 1 : 0); + "update path %pFX linked to es %s on vrf chg", + &pi->net->p, es->esi_str); + bgp_evpn_route_entry_install_if_vrf_match(es_vrf->bgp_vrf, pi, + 1); } } @@ -1579,8 +1606,10 @@ static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi) listset_app_node_mem(es->es_vrf_list); /* Initialise the route list used for efficient event handling */ - es->macip_path_list = list_new(); - listset_app_node_mem(es->macip_path_list); + es->macip_evi_path_list = list_new(); + listset_app_node_mem(es->macip_evi_path_list); + es->macip_global_path_list = list_new(); + listset_app_node_mem(es->macip_global_path_list); QOBJ_REG(es, bgp_evpn_es); @@ -1594,7 +1623,8 @@ static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi) static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller) { if ((es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) - || listcount(es->macip_path_list)) + || listcount(es->macip_evi_path_list) + || listcount(es->macip_global_path_list)) return; if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) @@ -1604,7 +1634,8 @@ static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller) list_delete(&es->es_evi_list); list_delete(&es->es_vrf_list); list_delete(&es->es_vtep_list); - list_delete(&es->macip_path_list); + list_delete(&es->macip_evi_path_list); + list_delete(&es->macip_global_path_list); bgp_table_unlock(es->route_table); /* remove the entry from various databases */ @@ -1615,15 +1646,25 @@ static void bgp_evpn_es_free(struct bgp_evpn_es *es, const char *caller) XFREE(MTYPE_BGP_EVPN_ES, es); } +static inline bool bgp_evpn_is_es_local_and_non_bypass(struct bgp_evpn_es *es) +{ + return (es->flags & BGP_EVPNES_LOCAL) + && !(es->flags & BGP_EVPNES_BYPASS); +} + /* init local info associated with the ES */ static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) { char buf[BGP_EVPN_PREFIX_RD_LEN]; + bool old_is_local; + bool is_local; if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) return; + old_is_local = bgp_evpn_is_es_local_and_non_bypass(es); SET_FLAG(es->flags, BGP_EVPNES_LOCAL); + listnode_init(&es->es_listnode, es); listnode_add(bgp_mh_info->local_es_list, &es->es_listnode); @@ -1633,16 +1674,28 @@ static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) es->prd.prefixlen = 64; snprintfrr(buf, sizeof(buf), "%pI4:%hu", &bgp->router_id, es->rd_id); (void)str2prefix_rd(buf, &es->prd); + + is_local = bgp_evpn_is_es_local_and_non_bypass(es); + if (old_is_local != is_local) + bgp_evpn_mac_update_on_es_local_chg(es, is_local); } /* clear any local info associated with the ES */ -static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es) +static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es, bool finish) { + bool old_is_local; + bool is_local; + if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) return; + old_is_local = bgp_evpn_is_es_local_and_non_bypass(es); UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL); + is_local = bgp_evpn_is_es_local_and_non_bypass(es); + if (!finish && (old_is_local != is_local)) + bgp_evpn_mac_update_on_es_local_chg(es, is_local); + /* remove from the ES local list */ list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode); @@ -1664,10 +1717,127 @@ static void bgp_evpn_es_remote_info_re_eval(struct bgp_evpn_es *es) } } -static inline bool bgp_evpn_local_es_is_active(struct bgp_evpn_es *es) +/* If ES is present and local it needs to be active/oper-up for + * including L3 EC + */ +bool bgp_evpn_es_add_l3_ecomm_ok(esi_t *esi) { - return (es->flags & BGP_EVPNES_OPER_UP) - && !(es->flags & BGP_EVPNES_BYPASS); + struct bgp_evpn_es *es; + + if (!esi || !bgp_mh_info->suppress_l3_ecomm_on_inactive_es) + return true; + + es = bgp_evpn_es_find(esi); + + return (!es || !(es->flags & BGP_EVPNES_LOCAL) + || bgp_evpn_local_es_is_active(es)); +} + +static bool bgp_evpn_is_valid_local_path(struct bgp_path_info *pi) +{ + return (CHECK_FLAG(pi->flags, BGP_PATH_VALID) + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_STATIC); +} + +/* Update all local MAC-IP routes in the VNI routing table associated + * with the ES. When the ES is down the routes are advertised without + * the L3 extcomm + */ +static void bgp_evpn_mac_update_on_es_oper_chg(struct bgp_evpn_es *es) +{ + struct listnode *node; + struct bgp_path_es_info *es_info; + struct bgp_path_info *pi; + struct bgp *bgp; + struct bgpevpn *vpn; + + if (!bgp_mh_info->suppress_l3_ecomm_on_inactive_es) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("update paths linked to es %s on oper chg", + es->esi_str); + + bgp = bgp_get_evpn(); + for (ALL_LIST_ELEMENTS_RO(es->macip_evi_path_list, node, es_info)) { + pi = es_info->pi; + + if (!bgp_evpn_is_valid_local_path(pi)) + continue; + + if (!bgp_evpn_is_macip_path(pi)) + continue; + + vpn = bgp_evpn_lookup_vni(bgp, es_info->vni); + if (!vpn) + continue; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug( + "update path %d %pFX linked to es %s on oper chg", + es_info->vni, &pi->net->p, es->esi_str); + + bgp_evpn_update_type2_route_entry(bgp, vpn, pi->net, pi, + __func__); + } +} + +static bool bgp_evpn_is_valid_bgp_path(struct bgp_path_info *pi) +{ + return (CHECK_FLAG(pi->flags, BGP_PATH_VALID) + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_NORMAL); +} + +/* If an ES is no longer local (or becomes local) we need to re-install + * paths using that ES as destination. This is needed as the criteria + * for best path selection has changed. + */ +static void bgp_evpn_mac_update_on_es_local_chg(struct bgp_evpn_es *es, + bool is_local) +{ + struct listnode *node; + struct bgp_path_es_info *es_info; + struct bgp_path_info *pi; + bool tmp_local; + struct attr *attr_new; + struct attr attr_tmp; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("update paths linked to es %s on chg to %s", + es->esi_str, is_local ? "local" : "non-local"); + + for (ALL_LIST_ELEMENTS_RO(es->macip_global_path_list, node, es_info)) { + pi = es_info->pi; + + /* Consider "valid" remote routes */ + if (!bgp_evpn_is_valid_bgp_path(pi)) + continue; + + if (!pi->attr) + continue; + + tmp_local = !!(pi->attr->es_flags & ATTR_ES_IS_LOCAL); + if (tmp_local == is_local) + continue; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug( + "update path %pFX linked to es %s on chg to %s", + &pi->net->p, es->esi_str, + is_local ? "local" : "non-local"); + + attr_tmp = *pi->attr; + if (is_local) + attr_tmp.es_flags |= ATTR_ES_IS_LOCAL; + else + attr_tmp.es_flags &= ~ATTR_ES_IS_LOCAL; + attr_new = bgp_attr_intern(&attr_tmp); + bgp_attr_unintern(&pi->attr); + pi->attr = attr_new; + bgp_evpn_import_type2_route(pi, 1); + } } static void bgp_evpn_local_es_deactivate(struct bgp *bgp, @@ -1699,6 +1869,8 @@ static void bgp_evpn_local_es_deactivate(struct bgp *bgp, "%u failed to delete type-1 route for ESI %s", bgp->vrf_id, es->esi_str); } + + bgp_evpn_mac_update_on_es_oper_chg(es); } /* Process ES link oper-down by withdrawing ES-EAD and ESR */ @@ -1746,6 +1918,8 @@ static void bgp_evpn_local_es_activate(struct bgp *bgp, struct bgp_evpn_es *es, es->originator_ip); (void)bgp_evpn_type1_route_update(bgp, es, NULL, &p); } + + bgp_evpn_mac_update_on_es_oper_chg(es); } /* Process ES link oper-up by generating ES-EAD and ESR */ @@ -1780,11 +1954,14 @@ static void bgp_evpn_local_es_bypass_update(struct bgp *bgp, bool old_bypass = !!(es->flags & BGP_EVPNES_BYPASS); bool old_active; bool new_active; + bool old_is_local; + bool is_local; if (bypass == old_bypass) return; old_active = bgp_evpn_local_es_is_active(es); + old_is_local = bgp_evpn_is_es_local_and_non_bypass(es); if (bypass) SET_FLAG(es->flags, BGP_EVPNES_BYPASS); else @@ -1801,6 +1978,10 @@ static void bgp_evpn_local_es_bypass_update(struct bgp *bgp, else bgp_evpn_local_es_deactivate(bgp, es); } + + is_local = bgp_evpn_is_es_local_and_non_bypass(es); + if (old_is_local != is_local) + bgp_evpn_mac_update_on_es_local_chg(es, is_local); } static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es) @@ -1825,16 +2006,17 @@ static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es) /* Clear local info associated with the ES and free it up if there is * no remote reference */ - bgp_evpn_es_local_info_clear(es); + bgp_evpn_es_local_info_clear(es, false); } -bool bgp_evpn_is_esi_local(esi_t *esi) +bool bgp_evpn_is_esi_local_and_non_bypass(esi_t *esi) { struct bgp_evpn_es *es = NULL; /* Lookup ESI hash - should exist. */ es = bgp_evpn_es_find(esi); - return es ? !!(es->flags & BGP_EVPNES_LOCAL) : false; + + return es && bgp_evpn_is_es_local_and_non_bypass(es); } int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi) @@ -2114,7 +2296,9 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, json_object_int_add(json, "vrfCount", listcount(es->es_vrf_list)); json_object_int_add(json, "macipPathCount", - listcount(es->macip_path_list)); + listcount(es->macip_evi_path_list)); + json_object_int_add(json, "macipGlobalPathCount", + listcount(es->macip_global_path_list)); json_object_int_add(json, "inconsistentVniVtepCount", es->incons_evi_vtep_cnt); if (listcount(es->es_vtep_list)) { @@ -2162,8 +2346,10 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty, vty_out(vty, " Remote VNI Count: %d\n", es->remote_es_evi_cnt); vty_out(vty, " VRF Count: %d\n", listcount(es->es_vrf_list)); - vty_out(vty, " MACIP Path Count: %d\n", - listcount(es->macip_path_list)); + vty_out(vty, " MACIP EVI Path Count: %d\n", + listcount(es->macip_evi_path_list)); + vty_out(vty, " MACIP Global Path Count: %d\n", + listcount(es->macip_global_path_list)); vty_out(vty, " Inconsistent VNI VTEP Count: %d\n", es->incons_evi_vtep_cnt); if (es->inconsistencies) { @@ -2394,6 +2580,8 @@ static void bgp_evpn_l3nhg_deactivate(struct bgp_evpn_es_vrf *es_vrf) es_vrf->nhg_id); bgp_evpn_l3nhg_zebra_del(es_vrf); es_vrf->flags &= ~BGP_EVPNES_VRF_NHG_ACTIVE; + /* MAC-IPs can now be installed via the L3NHG */ + bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "l3nhg-deactivate"); } static void bgp_evpn_l3nhg_activate(struct bgp_evpn_es_vrf *es_vrf, bool update) @@ -2412,6 +2600,8 @@ static void bgp_evpn_l3nhg_activate(struct bgp_evpn_es_vrf *es_vrf, bool update) es_vrf->es->esi_str, es_vrf->bgp_vrf->vrf_id, es_vrf->nhg_id); es_vrf->flags |= BGP_EVPNES_VRF_NHG_ACTIVE; + /* MAC-IPs can now be installed via the L3NHG */ + bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "l3nhg_activate"); } bgp_evpn_l3nhg_zebra_add(es_vrf); @@ -2488,6 +2678,11 @@ static struct bgp_evpn_es_vrf *bgp_evpn_es_vrf_create(struct bgp_evpn_es *es, bgp_vrf->vrf_id, es_vrf->nhg_id, es_vrf->v6_nhg_id); bgp_evpn_l3nhg_activate(es_vrf, false /* update */); + /* update paths in the VRF that may already be associated with + * this destination ES + */ + bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "es-vrf-create"); + return es_vrf; } @@ -2516,6 +2711,11 @@ static void bgp_evpn_es_vrf_delete(struct bgp_evpn_es_vrf *es_vrf) /* remove from the VRF-ESI rb tree */ RB_REMOVE(bgp_es_vrf_rb_head, &bgp_vrf->es_vrf_rb_tree, es_vrf); + /* update paths in the VRF that may already be associated with + * this destination ES + */ + bgp_evpn_es_path_update_on_es_vrf_chg(es_vrf, "es-vrf-delete"); + XFREE(MTYPE_BGP_EVPN_ES_VRF, es_vrf); } @@ -2598,22 +2798,56 @@ void bgp_evpn_es_evi_vrf_ref(struct bgpevpn *vpn) bgp_evpn_es_vrf_ref(es_evi, vpn->bgp_vrf); } +/* 1. If ES-VRF is not present install the host route with the exploded/flat + * multi-path list. + * 2. If ES-VRF is present - + * - if L3NHG has not been activated for the ES-VRF (this could be because + * all the PEs attached to the VRF are down) do not install the route + * in zebra. + * - if L3NHG has been activated install the route via that L3NHG + */ +void bgp_evpn_es_vrf_use_nhg(struct bgp *bgp_vrf, esi_t *esi, bool *use_l3nhg, + bool *is_l3nhg_active, + struct bgp_evpn_es_vrf **es_vrf_p) +{ + struct bgp_evpn_es *es; + struct bgp_evpn_es_vrf *es_vrf; + + if (!bgp_mh_info->host_routes_use_l3nhg) + return; + + es = bgp_evpn_es_find(esi); + if (!es) + return; + + es_vrf = bgp_evpn_es_vrf_find(es, bgp_vrf); + if (!es_vrf) + return; + + *use_l3nhg = true; + if (es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE) + *is_l3nhg_active = true; + if (es_vrf_p) + *es_vrf_p = es_vrf; +} + /* returns false if legacy-exploded mp needs to be used for route install */ bool bgp_evpn_path_es_use_nhg(struct bgp *bgp_vrf, struct bgp_path_info *pi, uint32_t *nhg_p) { esi_t *esi; - struct bgp_evpn_es *es; - struct bgp_evpn_es_vrf *es_vrf; + struct bgp_evpn_es_vrf *es_vrf = NULL; struct bgp_path_info *parent_pi; struct bgp_node *rn; struct prefix_evpn *evp; struct bgp_path_info *mpinfo; + bool use_l3nhg = false; + bool is_l3nhg_active = false; *nhg_p = 0; - /* L3NHG support is disabled, use legacy-exploded multipath */ - if (!bgp_mh_info->host_routes_use_l3nhg) + /* we don't support NHG for routes leaked from another VRF yet */ + if (pi->extra && pi->extra->bgp_orig) return false; parent_pi = get_route_parent_evpn(pi); @@ -2633,15 +2867,17 @@ bool bgp_evpn_path_es_use_nhg(struct bgp *bgp_vrf, struct bgp_path_info *pi, if (!memcmp(esi, zero_esi, sizeof(*esi))) return false; - /* if the ES-VRF is not setup or if the NHG has not been installed - * we cannot install the route yet, return a 0-NHG to indicate - * that + bgp_evpn_es_vrf_use_nhg(bgp_vrf, esi, &use_l3nhg, &is_l3nhg_active, + &es_vrf); + + /* L3NHG support is disabled, use legacy-exploded multipath */ + if (!use_l3nhg) + return false; + + /* if the NHG has not been installed we cannot install the route yet, + * return a 0-NHG to indicate that */ - es = bgp_evpn_es_find(esi); - if (!es) - return true; - es_vrf = bgp_evpn_es_vrf_find(es, bgp_vrf); - if (!es_vrf || !(es_vrf->flags & BGP_EVPNES_VRF_NHG_ACTIVE)) + if (!is_l3nhg_active) return true; /* this needs to be set the v6NHG if v6route */ @@ -2652,7 +2888,7 @@ bool bgp_evpn_path_es_use_nhg(struct bgp *bgp_vrf, struct bgp_path_info *pi, for (mpinfo = bgp_path_info_mpath_next(pi); mpinfo; mpinfo = bgp_path_info_mpath_next(mpinfo)) { - /* if any of the paths of have a different ESI we can't use + /* if any of the paths have a different ESI we can't use * the NHG associated with the ES. fallback to legacy-exploded * multipath */ @@ -2988,7 +3224,8 @@ static struct bgp_evpn_es_evi *bgp_evpn_es_evi_new(struct bgp_evpn_es *es, /* remove the ES-EVI from the per-L2-VNI and per-ES tables and free * up the memory. */ -static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) +static struct bgp_evpn_es_evi * +bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) { struct bgp_evpn_es *es = es_evi->es; struct bgpevpn *vpn = es_evi->vpn; @@ -2997,7 +3234,7 @@ static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) * reference */ if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE)) - return; + return es_evi; bgp_evpn_es_vrf_deref(es_evi); @@ -3012,6 +3249,8 @@ static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) /* remove from the VNI-ESI rb tree */ XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi); + + return NULL; } /* init local info associated with the ES-EVI */ @@ -3028,17 +3267,18 @@ static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi) } /* clear any local info associated with the ES-EVI */ -static void bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi) +static struct bgp_evpn_es_evi * +bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi) { struct bgpevpn *vpn = es_evi->vpn; if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) - return; + return es_evi; UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); list_delete_node(vpn->local_es_evi_list, &es_evi->l2vni_listnode); - bgp_evpn_es_evi_free(es_evi); + return bgp_evpn_es_evi_free(es_evi); } /* eval remote info associated with the ES */ @@ -3068,14 +3308,15 @@ static void bgp_evpn_es_evi_remote_info_re_eval(struct bgp_evpn_es_evi *es_evi) } } -static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) +static struct bgp_evpn_es_evi * +bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) { struct prefix_evpn p; struct bgp_evpn_es *es = es_evi->es; struct bgp *bgp; if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) - return; + return es_evi; if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) zlog_debug("del local es %s evi %u", @@ -3109,8 +3350,7 @@ static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) } } - bgp_evpn_es_evi_local_info_clear(es_evi); - + return bgp_evpn_es_evi_local_info_clear(es_evi); } int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni) @@ -3326,6 +3566,30 @@ int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, return 0; } +/* If a VNI is being deleted we need to force del all remote VTEPs */ +static void bgp_evpn_remote_es_evi_flush(struct bgp_evpn_es_evi *es_evi) +{ + struct listnode *node = NULL; + struct listnode *nnode = NULL; + struct bgp_evpn_es_evi_vtep *evi_vtep; + struct bgp *bgp; + + bgp = bgp_get_evpn(); + if (!bgp) + return; + + /* delete all VTEPs */ + for (ALL_LIST_ELEMENTS(es_evi->es_evi_vtep_list, node, nnode, + evi_vtep)) { + evi_vtep->flags &= ~(BGP_EVPN_EVI_VTEP_EAD_PER_ES + | BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); + bgp_evpn_es_evi_vtep_free(evi_vtep); + } + /* delete the EVI */ + bgp_evpn_es_evi_remote_info_re_eval(es_evi); +} + /* Initialize the ES tables maintained per-L2_VNI */ void bgp_evpn_vni_es_init(struct bgpevpn *vpn) { @@ -3345,7 +3609,9 @@ void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn) RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi_next) { - bgp_evpn_local_es_evi_do_del(es_evi); + es_evi = bgp_evpn_local_es_evi_do_del(es_evi); + if (es_evi) + bgp_evpn_remote_es_evi_flush(es_evi); } list_delete(&vpn->local_es_evi_list); @@ -3844,6 +4110,507 @@ static int bgp_evpn_run_consistency_checks(struct thread *t) return 0; } +/***************************************************************************** + * EVPN-Nexthop and RMAC management: nexthops associated with Type-2 routes + * that have an ES as destination are consolidated by BGP into a per-VRF + * nh->rmac mapping which is sent to zebra. Zebra installs the nexthop + * as a remote neigh/fdb entry with a dummy (type-1) prefix referencing it. + * + * This handling is needed because Type-2 routes with ES as dest use NHG + * that is setup using EAD routes (i.e. such NHGs do not include the + * RMAC info). + ****************************************************************************/ +static void bgp_evpn_nh_zebra_update_send(struct bgp_evpn_nh *nh, bool add) +{ + struct stream *s; + struct bgp *bgp_vrf = nh->bgp_vrf; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return; + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp_vrf)) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No zebra instance, not %s remote nh %s", + add ? "adding" : "deleting", nh->nh_str); + return; + } + + s = zclient->obuf; + stream_reset(s); + + zclient_create_header( + s, add ? ZEBRA_EVPN_REMOTE_NH_ADD : ZEBRA_EVPN_REMOTE_NH_DEL, + bgp_vrf->vrf_id); + stream_putl(s, bgp_vrf->vrf_id); + stream_put(s, &nh->ip, sizeof(nh->ip)); + if (add) + stream_put(s, &nh->rmac, sizeof(nh->rmac)); + + stream_putw_at(s, 0, stream_get_endp(s)); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) { + if (add) + zlog_debug("evpn vrf %s nh %s rmac %pEA add to zebra", + nh->bgp_vrf->name, nh->nh_str, &nh->rmac); + else if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s del to zebra", + nh->bgp_vrf->name, nh->nh_str); + } + + zclient_send_message(zclient); +} + +static void bgp_evpn_nh_zebra_update(struct bgp_evpn_nh *nh, bool add) +{ + if (add && !is_zero_mac(&nh->rmac)) { + nh->flags |= BGP_EVPN_NH_READY_FOR_ZEBRA; + bgp_evpn_nh_zebra_update_send(nh, true); + } else { + if (!(nh->flags & BGP_EVPN_NH_READY_FOR_ZEBRA)) + return; + nh->flags &= ~BGP_EVPN_NH_READY_FOR_ZEBRA; + bgp_evpn_nh_zebra_update_send(nh, false); + } +} + +static void *bgp_evpn_nh_alloc(void *p) +{ + struct bgp_evpn_nh *tmp_n = p; + struct bgp_evpn_nh *n; + + n = XCALLOC(MTYPE_BGP_EVPN_NH, sizeof(struct bgp_evpn_nh)); + *n = *tmp_n; + + return ((void *)n); +} + +static struct bgp_evpn_nh *bgp_evpn_nh_find(struct bgp *bgp_vrf, + struct ipaddr *ip) +{ + struct bgp_evpn_nh tmp; + struct bgp_evpn_nh *n; + + memset(&tmp, 0, sizeof(tmp)); + memcpy(&tmp.ip, ip, sizeof(struct ipaddr)); + n = hash_lookup(bgp_vrf->evpn_nh_table, &tmp); + + return n; +} + +/* Add nexthop entry - implicitly created on first path reference */ +static struct bgp_evpn_nh *bgp_evpn_nh_add(struct bgp *bgp_vrf, + struct ipaddr *ip, + struct bgp_path_info *pi) +{ + struct bgp_evpn_nh tmp_n; + struct bgp_evpn_nh *n = NULL; + + memset(&tmp_n, 0, sizeof(struct bgp_evpn_nh)); + memcpy(&tmp_n.ip, ip, sizeof(struct ipaddr)); + n = hash_get(bgp_vrf->evpn_nh_table, &tmp_n, bgp_evpn_nh_alloc); + ipaddr2str(ip, n->nh_str, sizeof(n->nh_str)); + n->bgp_vrf = bgp_vrf; + + n->pi_list = list_new(); + listset_app_node_mem(n->pi_list); + + /* Setup ref_pi when the nh is created */ + if (CHECK_FLAG(pi->flags, BGP_PATH_VALID) && pi->attr) { + n->ref_pi = pi; + memcpy(&n->rmac, &pi->attr->rmac, ETH_ALEN); + } + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s rmac %pEA add", n->bgp_vrf->name, + n->nh_str, &n->rmac); + bgp_evpn_nh_zebra_update(n, true); + return n; +} + +/* Delete nexthop entry if there are no paths referencing it */ +static void bgp_evpn_nh_del(struct bgp_evpn_nh *n) +{ + struct bgp_evpn_nh *tmp_n; + struct bgp *bgp_vrf = n->bgp_vrf; + + if (listcount(n->pi_list)) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s del to zebra", bgp_vrf->name, + n->nh_str); + + bgp_evpn_nh_zebra_update(n, false); + list_delete(&n->pi_list); + tmp_n = hash_release(bgp_vrf->evpn_nh_table, n); + XFREE(MTYPE_BGP_EVPN_NH, tmp_n); +} + +static unsigned int bgp_evpn_nh_hash_keymake(const void *p) +{ + const struct bgp_evpn_nh *n = p; + const struct ipaddr *ip = &n->ip; + + if (IS_IPADDR_V4(ip)) + return jhash_1word(ip->ipaddr_v4.s_addr, 0); + + return jhash2(ip->ipaddr_v6.s6_addr32, + array_size(ip->ipaddr_v6.s6_addr32), 0); +} + +static bool bgp_evpn_nh_cmp(const void *p1, const void *p2) +{ + const struct bgp_evpn_nh *n1 = p1; + const struct bgp_evpn_nh *n2 = p2; + + if (n1 == NULL && n2 == NULL) + return true; + + if (n1 == NULL || n2 == NULL) + return false; + + return (memcmp(&n1->ip, &n2->ip, sizeof(struct ipaddr)) == 0); +} + +void bgp_evpn_nh_init(struct bgp *bgp_vrf) +{ + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh init", bgp_vrf->name); + bgp_vrf->evpn_nh_table = hash_create( + bgp_evpn_nh_hash_keymake, bgp_evpn_nh_cmp, "BGP EVPN NH table"); +} + +static void bgp_evpn_nh_flush_entry(struct bgp_evpn_nh *nh) +{ + struct listnode *node; + struct listnode *nnode; + struct bgp_path_evpn_nh_info *nh_info; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s flush", nh->bgp_vrf->name, + nh->nh_str); + + /* force flush paths */ + for (ALL_LIST_ELEMENTS(nh->pi_list, node, nnode, nh_info)) + bgp_evpn_path_nh_del(nh->bgp_vrf, nh_info->pi); +} + +static void bgp_evpn_nh_flush_cb(struct hash_bucket *bucket, void *ctxt) +{ + struct bgp_evpn_nh *nh = (struct bgp_evpn_nh *)bucket->data; + + bgp_evpn_nh_flush_entry(nh); +} + +void bgp_evpn_nh_finish(struct bgp *bgp_vrf) +{ + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh finish", bgp_vrf->name); + hash_iterate( + bgp_vrf->evpn_nh_table, + (void (*)(struct hash_bucket *, void *))bgp_evpn_nh_flush_cb, + NULL); + hash_free(bgp_vrf->evpn_nh_table); + bgp_vrf->evpn_nh_table = NULL; +} + +static void bgp_evpn_nh_update_ref_pi(struct bgp_evpn_nh *nh) +{ + struct listnode *node; + struct bgp_path_info *pi; + struct bgp_path_evpn_nh_info *nh_info; + + if (nh->ref_pi) + return; + + for (ALL_LIST_ELEMENTS_RO(nh->pi_list, node, nh_info)) { + pi = nh_info->pi; + if (!CHECK_FLAG(pi->flags, BGP_PATH_VALID) || !pi->attr) + continue; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s ref_pi update", + nh->bgp_vrf->name, nh->nh_str); + nh->ref_pi = pi; + /* If we have a new pi copy rmac from it and update + * zebra if the new rmac is different + */ + if (memcmp(&nh->rmac, &nh->ref_pi->attr->rmac, ETH_ALEN)) { + memcpy(&nh->rmac, &nh->ref_pi->attr->rmac, ETH_ALEN); + bgp_evpn_nh_zebra_update(nh, true); + } + break; + } +} + +static void bgp_evpn_nh_clear_ref_pi(struct bgp_evpn_nh *nh, + struct bgp_path_info *pi) +{ + if (nh->ref_pi != pi) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("evpn vrf %s nh %s ref_pi clear", nh->bgp_vrf->name, + nh->nh_str); + nh->ref_pi = NULL; + /* try to find another ref_pi */ + bgp_evpn_nh_update_ref_pi(nh); + /* couldn't find one - clear the old rmac and notify zebra */ + if (!nh->ref_pi) { + memset(&nh->rmac, 0, ETH_ALEN); + bgp_evpn_nh_zebra_update(nh, true); + } +} + +static void bgp_evpn_path_nh_info_free(struct bgp_path_evpn_nh_info *nh_info) +{ + bgp_evpn_path_nh_unlink(nh_info); + XFREE(MTYPE_BGP_EVPN_PATH_NH_INFO, nh_info); +} + +static struct bgp_path_evpn_nh_info * +bgp_evpn_path_nh_info_new(struct bgp_path_info *pi) +{ + struct bgp_path_info_extra *e; + struct bgp_path_mh_info *mh_info; + struct bgp_path_evpn_nh_info *nh_info; + + e = bgp_path_info_extra_get(pi); + + /* If mh_info doesn't exist allocate it */ + mh_info = e->mh_info; + if (!mh_info) + e->mh_info = mh_info = XCALLOC(MTYPE_BGP_EVPN_PATH_MH_INFO, + sizeof(struct bgp_path_mh_info)); + + /* If nh_info doesn't exist allocate it */ + nh_info = mh_info->nh_info; + if (!nh_info) { + mh_info->nh_info = nh_info = + XCALLOC(MTYPE_BGP_EVPN_PATH_NH_INFO, + sizeof(struct bgp_path_evpn_nh_info)); + nh_info->pi = pi; + } + + return nh_info; +} + +static void bgp_evpn_path_nh_unlink(struct bgp_path_evpn_nh_info *nh_info) +{ + struct bgp_evpn_nh *nh = nh_info->nh; + struct bgp_path_info *pi; + char prefix_buf[PREFIX_STRLEN]; + + if (!nh) + return; + + pi = nh_info->pi; + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("path %s unlinked from nh %s %s", + pi->net ? prefix2str(&pi->net->p, prefix_buf, + sizeof(prefix_buf)) + : "", + nh->bgp_vrf->name, nh->nh_str); + + list_delete_node(nh->pi_list, &nh_info->nh_listnode); + + nh_info->nh = NULL; + + /* check if the ref_pi need to be updated */ + bgp_evpn_nh_clear_ref_pi(nh, pi); + + /* if there are no other references against the nh it + * needs to be freed + */ + bgp_evpn_nh_del(nh); + + /* Note we don't free the path nh_info on unlink; it will be freed up + * along with the path. + */ +} + +static void bgp_evpn_path_nh_link(struct bgp *bgp_vrf, struct bgp_path_info *pi) +{ + struct bgp_path_evpn_nh_info *nh_info; + struct bgp_evpn_nh *nh; + struct ipaddr ip; + + /* EVPN nexthop setup in bgp has been turned off */ + if (!bgp_mh_info->bgp_evpn_nh_setup) + return; + + if (!bgp_vrf->evpn_nh_table) { + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("path %pFX linked to vrf %s failed", + &pi->net->p, bgp_vrf->name); + return; + } + + nh_info = (pi->extra && pi->extra->mh_info) + ? pi->extra->mh_info->nh_info + : NULL; + + /* if NHG is not being used for this path we don't need to manage the + * nexthops in bgp (they are managed by zebra instead) + */ + if (!(pi->attr->es_flags & ATTR_ES_L3_NHG_USE)) { + if (nh_info) + bgp_evpn_path_nh_unlink(nh_info); + return; + } + + /* setup nh_info against the path if it doesn't aleady exist */ + if (!nh_info) + nh_info = bgp_evpn_path_nh_info_new(pi); + + /* find-create nh */ + memset(&ip, 0, sizeof(ip)); + if (pi->net->p.family == AF_INET6) { + SET_IPADDR_V6(&ip); + memcpy(&ip.ipaddr_v6, &pi->attr->mp_nexthop_global, + sizeof(ip.ipaddr_v6)); + } else { + SET_IPADDR_V4(&ip); + memcpy(&ip.ipaddr_v4, &pi->attr->nexthop, sizeof(ip.ipaddr_v4)); + } + + nh = bgp_evpn_nh_find(bgp_vrf, &ip); + if (!nh) + nh = bgp_evpn_nh_add(bgp_vrf, &ip, pi); + + /* dup check */ + if (nh_info->nh == nh) { + /* Check if any of the paths are now valid */ + bgp_evpn_nh_update_ref_pi(nh); + return; + } + + /* unlink old nh if any */ + bgp_evpn_path_nh_unlink(nh_info); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("path %pFX linked to nh %s %s", &pi->net->p, + nh->bgp_vrf->name, nh->nh_str); + + /* link mac-ip path to the new nh */ + nh_info->nh = nh; + listnode_init(&nh_info->nh_listnode, nh_info); + listnode_add(nh->pi_list, &nh_info->nh_listnode); + /* If a new valid path got linked to the nh see if can get the rmac + * from it + */ + bgp_evpn_nh_update_ref_pi(nh); + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) { + if (!nh->ref_pi) + zlog_debug( + "path %pFX linked to nh %s %s with no valid pi", + &pi->net->p, nh->bgp_vrf->name, nh->nh_str); + } +} + +void bgp_evpn_path_nh_del(struct bgp *bgp_vrf, struct bgp_path_info *pi) +{ + struct bgp_path_evpn_nh_info *nh_info; + + nh_info = (pi->extra && pi->extra->mh_info) + ? pi->extra->mh_info->nh_info + : NULL; + + if (!nh_info) + return; + + bgp_evpn_path_nh_unlink(nh_info); +} + +void bgp_evpn_path_nh_add(struct bgp *bgp_vrf, struct bgp_path_info *pi) +{ + bgp_evpn_path_nh_link(bgp_vrf, pi); +} + +static void bgp_evpn_nh_show_entry(struct bgp_evpn_nh *nh, struct vty *vty, + json_object *json_array) +{ + json_object *json = NULL; + char mac_buf[ETHER_ADDR_STRLEN]; + char prefix_buf[PREFIX_STRLEN]; + + if (json_array) + /* create a separate json object for each ES */ + json = json_object_new_object(); + + prefix_mac2str(&nh->rmac, mac_buf, sizeof(mac_buf)); + if (nh->ref_pi && nh->ref_pi->net) + prefix2str(&nh->ref_pi->net->p, prefix_buf, sizeof(prefix_buf)); + else + prefix_buf[0] = '\0'; + if (json) { + json_object_string_add(json, "vrf", nh->bgp_vrf->name); + json_object_string_add(json, "ip", nh->nh_str); + json_object_string_add(json, "rmac", mac_buf); + json_object_string_add(json, "basePath", prefix_buf); + json_object_int_add(json, "pathCount", listcount(nh->pi_list)); + } else { + vty_out(vty, "%-15s %-15s %-17s %-10d %s\n", nh->bgp_vrf->name, + nh->nh_str, mac_buf, listcount(nh->pi_list), + prefix_buf); + } + + /* add ES to the json array */ + if (json_array) + json_object_array_add(json_array, json); +} + +struct nh_show_ctx { + struct vty *vty; + json_object *json; +}; + +static void bgp_evpn_nh_show_hash_cb(struct hash_bucket *bucket, void *ctxt) +{ + struct bgp_evpn_nh *nh = (struct bgp_evpn_nh *)bucket->data; + struct nh_show_ctx *wctx = (struct nh_show_ctx *)ctxt; + + bgp_evpn_nh_show_entry(nh, wctx->vty, wctx->json); +} + +/* Display all evpn nexthops */ +void bgp_evpn_nh_show(struct vty *vty, bool uj) +{ + json_object *json_array = NULL; + struct bgp *bgp_vrf; + struct listnode *node; + struct nh_show_ctx wctx; + + if (uj) { + /* create an array of nexthops */ + json_array = json_object_new_array(); + } else { + vty_out(vty, "%-15s %-15s %-17s %-10s %s\n", "VRF", "IP", + "RMAC", "#Paths", "Base Path"); + } + + wctx.vty = vty; + wctx.json = json_array; + + /* walk through all vrfs */ + for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) { + hash_iterate(bgp_vrf->evpn_nh_table, + (void (*)(struct hash_bucket *, + void *))bgp_evpn_nh_show_hash_cb, + &wctx); + } + + /* print the array of json-ESs */ + if (uj) { + vty_out(vty, "%s\n", + json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + /*****************************************************************************/ void bgp_evpn_mh_init(void) { @@ -3866,6 +4633,8 @@ void bgp_evpn_mh_init(void) bgp_mh_info->consistency_checking = true; bgp_mh_info->install_l3nhg = false; bgp_mh_info->host_routes_use_l3nhg = BGP_EVPN_MH_USE_ES_L3NHG_DEF; + bgp_mh_info->suppress_l3_ecomm_on_inactive_es = true; + bgp_mh_info->bgp_evpn_nh_setup = true; memset(&zero_esi_buf, 0, sizeof(esi_t)); } @@ -3880,7 +4649,7 @@ void bgp_evpn_mh_finish(void) RB_FOREACH_SAFE (es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es_next) { - bgp_evpn_es_local_info_clear(es); + bgp_evpn_es_local_info_clear(es, true); } if (bgp_mh_info->t_cons_check) thread_cancel(&bgp_mh_info->t_cons_check); diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h index 8c66e391b6..c96de86871 100644 --- a/bgpd/bgp_evpn_mh.h +++ b/bgpd/bgp_evpn_mh.h @@ -105,8 +105,17 @@ struct bgp_evpn_es { /* List of MAC-IP VNI paths using this ES as destination - * element is bgp_path_info_extra->es_info + * Note: Only local/zebra-added MACIP paths in the VNI + * routing table are linked to this list */ - struct list *macip_path_list; + struct list *macip_evi_path_list; + + /* List of MAC-IP paths in the global routing table using this + * ES as destination - data is bgp_path_info_extra->es_info + * Note: Only non-local/imported MACIP paths in the global + * routing table are linked to this list + */ + struct list *macip_global_path_list; /* Number of remote VNIs referencing this ES */ uint32_t remote_es_evi_cnt; @@ -241,6 +250,26 @@ struct bgp_evpn_es_evi_vtep { struct bgp_evpn_es_vtep *es_vtep; }; +/* A nexthop is created when a path (imported from an EVPN type-2 route) + * is added to the VRF route table using that nexthop. + * It is added on first pi reference and removed on last pi deref. + */ +struct bgp_evpn_nh { + /* backpointer to the VRF */ + struct bgp *bgp_vrf; + /* nexthop/VTEP IP */ + struct ipaddr ip; + /* description for easy logging */ + char nh_str[INET6_ADDRSTRLEN]; + struct ethaddr rmac; + /* pi from which we are pulling the nh RMAC */ + struct bgp_path_info *ref_pi; + /* List of VRF paths using this nexthop */ + struct list *pi_list; + uint8_t flags; +#define BGP_EVPN_NH_READY_FOR_ZEBRA (1 << 0) +}; + /* multihoming information stored in bgp_master */ #define bgp_mh_info (bm->mh_info) struct bgp_evpn_mh_info { @@ -273,6 +302,12 @@ struct bgp_evpn_mh_info { /* Skip EAD-EVI advertisements by turning off this knob */ bool ead_evi_tx; #define BGP_EVPN_MH_EAD_EVI_TX_DEF true + /* If the Local ES is inactive we advertise the MAC-IP without the + * L3 ecomm + */ + bool suppress_l3_ecomm_on_inactive_es; + /* Setup EVPN PE nexthops and their RMAC in bgpd */ + bool bgp_evpn_nh_setup; }; /****************************************************************************/ @@ -330,6 +365,12 @@ static inline uint32_t bgp_evpn_attr_get_df_pref(struct attr *attr) return (attr) ? attr->df_pref : 0; } +static inline bool bgp_evpn_local_es_is_active(struct bgp_evpn_es *es) +{ + return (es->flags & BGP_EVPNES_OPER_UP) + && !(es->flags & BGP_EVPNES_BYPASS); +} + /****************************************************************************/ extern int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, struct bgp_evpn_es *es, afi_t afi, safi_t safi, @@ -362,21 +403,28 @@ void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni, bool uj, bool detail); void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail); struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi); -extern bool bgp_evpn_is_esi_local(esi_t *esi); extern void bgp_evpn_vrf_es_init(struct bgp *bgp_vrf); +extern bool bgp_evpn_is_esi_local_and_non_bypass(esi_t *esi); extern void bgp_evpn_es_vrf_deref(struct bgp_evpn_es_evi *es_evi); extern void bgp_evpn_es_vrf_ref(struct bgp_evpn_es_evi *es_evi, struct bgp *bgp_vrf); -extern void bgp_evpn_path_es_info_free(struct bgp_path_es_info *es_info); -extern void bgp_evpn_path_es_unlink(struct bgp_path_es_info *es_info); +extern void bgp_evpn_path_mh_info_free(struct bgp_path_mh_info *mh_info); extern void bgp_evpn_path_es_link(struct bgp_path_info *pi, vni_t vni, esi_t *esi); -extern bool bgp_evpn_es_is_vtep_active(esi_t *esi, struct in_addr nh); extern bool bgp_evpn_path_es_use_nhg(struct bgp *bgp_vrf, struct bgp_path_info *pi, uint32_t *nhg_p); extern void bgp_evpn_es_vrf_show(struct vty *vty, bool uj, struct bgp_evpn_es *es); extern void bgp_evpn_es_vrf_show_esi(struct vty *vty, esi_t *esi, bool uj); extern void bgp_evpn_switch_ead_evi_rx(void); +extern bool bgp_evpn_es_add_l3_ecomm_ok(esi_t *esi); +extern void bgp_evpn_es_vrf_use_nhg(struct bgp *bgp_vrf, esi_t *esi, + bool *use_l3nhg, bool *is_l3nhg_active, + struct bgp_evpn_es_vrf **es_vrf_p); +extern void bgp_evpn_nh_init(struct bgp *bgp_vrf); +extern void bgp_evpn_nh_finish(struct bgp *bgp_vrf); +extern void bgp_evpn_nh_show(struct vty *vty, bool uj); +extern void bgp_evpn_path_nh_add(struct bgp *bgp_vrf, struct bgp_path_info *pi); +extern void bgp_evpn_path_nh_del(struct bgp *bgp_vrf, struct bgp_path_info *pi); #endif /* _FRR_BGP_EVPN_MH_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index ff4970af41..debed9f68b 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -631,4 +631,13 @@ bgp_global_evpn_node_lookup(struct bgp_table *table, afi_t afi, safi_t safi, const struct prefix_evpn *evp, struct prefix_rd *prd); extern void bgp_evpn_import_route_in_vrfs(struct bgp_path_info *pi, int import); +extern void bgp_evpn_update_type2_route_entry(struct bgp *bgp, + struct bgpevpn *vpn, + struct bgp_node *rn, + struct bgp_path_info *local_pi, + const char *caller); +extern int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, + struct bgp_path_info *pi, + int install); +extern void bgp_evpn_import_type2_route(struct bgp_path_info *pi, int import); #endif /* _BGP_EVPN_PRIVATE_H */ diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index b101589a79..0ae3eb33e1 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -688,7 +688,8 @@ static void show_esi_routes(struct bgp *bgp, /* Display all MAC-IP VNI routes linked to an ES */ static void bgp_evpn_show_routes_mac_ip_es(struct vty *vty, esi_t *esi, - json_object *json, int detail) + json_object *json, int detail, + bool global_table) { struct bgp_node *rn; struct bgp_path_info *pi; @@ -709,11 +710,17 @@ static void bgp_evpn_show_routes_mac_ip_es(struct vty *vty, esi_t *esi, json_paths = json_object_new_array(); RB_FOREACH (es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) { + struct list *es_list; if (esi && memcmp(esi, &es->esi, sizeof(*esi))) continue; - for (ALL_LIST_ELEMENTS_RO(es->macip_path_list, node, es_info)) { + if (global_table) + es_list = es->macip_global_path_list; + else + es_list = es->macip_evi_path_list; + + for (ALL_LIST_ELEMENTS_RO(es_list, node, es_info)) { json_object *json_path = NULL; pi = es_info->pi; @@ -758,6 +765,18 @@ static void bgp_evpn_show_routes_mac_ip_es(struct vty *vty, esi_t *esi, } } +static void bgp_evpn_show_routes_mac_ip_evi_es(struct vty *vty, esi_t *esi, + json_object *json, int detail) +{ + return bgp_evpn_show_routes_mac_ip_es(vty, esi, json, detail, false); +} + +static void bgp_evpn_show_routes_mac_ip_global_es(struct vty *vty, esi_t *esi, + json_object *json, int detail) +{ + return bgp_evpn_show_routes_mac_ip_es(vty, esi, json, detail, true); +} + static void show_vni_routes(struct bgp *bgp, struct bgpevpn *vpn, int type, struct vty *vty, struct in_addr vtep_ip, json_object *json, int detail) @@ -4100,6 +4119,21 @@ DEFPY(show_bgp_l2vpn_evpn_es_vrf, show_bgp_l2vpn_evpn_es_vrf_cmd, return CMD_SUCCESS; } +DEFPY(show_bgp_l2vpn_evpn_nh, + show_bgp_l2vpn_evpn_nh_cmd, + "show bgp l2vpn evpn next-hops [json$uj]", + SHOW_STR + BGP_STR + L2VPN_HELP_STR + EVPN_HELP_STR + "Nexthops\n" + JSON_STR) +{ + bgp_evpn_nh_show(vty, uj); + + return CMD_SUCCESS; +} + /* * Display EVPN neighbor summary. */ @@ -4658,12 +4692,12 @@ DEFUN(show_bgp_l2vpn_evpn_route_vni_all, } DEFPY_HIDDEN( - show_bgp_l2vpn_evpn_route_mac_ip_es, - show_bgp_l2vpn_evpn_route_mac_ip_es_cmd, - "show bgp l2vpn evpn route mac-ip-es [NAME$esi_str|detail$detail] [json$uj]", + show_bgp_l2vpn_evpn_route_mac_ip_evi_es, + show_bgp_l2vpn_evpn_route_mac_ip_evi_es_cmd, + "show bgp l2vpn evpn route mac-ip-evi-es [NAME$esi_str|detail$detail] [json$uj]", SHOW_STR BGP_STR L2VPN_HELP_STR EVPN_HELP_STR "EVPN route information\n" - "MAC IP routes linked to the ES\n" + "MAC IP routes in the EVI tables linked to the ES\n" "ES ID\n" "Detailed information\n" JSON_STR) { @@ -4683,7 +4717,44 @@ DEFPY_HIDDEN( if (uj) json = json_object_new_object(); - bgp_evpn_show_routes_mac_ip_es(vty, esi_p, json, !!detail); + bgp_evpn_show_routes_mac_ip_evi_es(vty, esi_p, json, !!detail); + if (uj) { + vty_out(vty, "%s\n", + json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } + + return CMD_SUCCESS; +} + +DEFPY_HIDDEN( + show_bgp_l2vpn_evpn_route_mac_ip_global_es, + show_bgp_l2vpn_evpn_route_mac_ip_global_es_cmd, + "show bgp l2vpn evpn route mac-ip-global-es [NAME$esi_str|detail$detail] [json$uj]", + SHOW_STR BGP_STR L2VPN_HELP_STR EVPN_HELP_STR + "EVPN route information\n" + "MAC IP routes in the global table linked to the ES\n" + "ES ID\n" + "Detailed information\n" JSON_STR) +{ + esi_t esi; + esi_t *esi_p; + json_object *json = NULL; + + if (esi_str) { + if (!str_to_esi(esi_str, &esi)) { + vty_out(vty, "%%Malformed ESI\n"); + return CMD_WARNING; + } + esi_p = &esi; + } else { + esi_p = NULL; + } + + if (uj) + json = json_object_new_object(); + bgp_evpn_show_routes_mac_ip_global_es(vty, esi_p, json, !!detail); if (uj) { vty_out(vty, "%s\n", json_object_to_json_string_ext( @@ -5957,6 +6028,7 @@ void bgp_ethernetvpn_init(void) install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_evi_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_es_vrf_cmd); + install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_nh_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_vni_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_summary_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_cmd); @@ -5968,7 +6040,10 @@ void bgp_ethernetvpn_init(void) &show_bgp_l2vpn_evpn_route_vni_multicast_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_vni_macip_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_vni_all_cmd); - install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_route_mac_ip_es_cmd); + install_element(VIEW_NODE, + &show_bgp_l2vpn_evpn_route_mac_ip_evi_es_cmd); + install_element(VIEW_NODE, + &show_bgp_l2vpn_evpn_route_mac_ip_global_es_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_import_rt_cmd); install_element(VIEW_NODE, &show_bgp_l2vpn_evpn_vrf_import_rt_cmd); diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c index 9a4eb5d3bd..36bdc05eb7 100644 --- a/bgpd/bgp_memory.c +++ b/bgpd/bgp_memory.c @@ -119,7 +119,10 @@ DEFINE_MTYPE(BGPD, LCOMMUNITY_VAL, "Large Community value"); DEFINE_MTYPE(BGPD, BGP_EVPN, "BGP EVPN Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_MH_INFO, "BGP EVPN MH Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_VTEP, "BGP EVPN ES VTEP"); +DEFINE_MTYPE(BGPD, BGP_EVPN_PATH_MH_INFO, "BGP EVPN PATH MH Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_PATH_ES_INFO, "BGP EVPN PATH ES Information"); +DEFINE_MTYPE(BGPD, BGP_EVPN_PATH_NH_INFO, "BGP EVPN PATH NH Information"); +DEFINE_MTYPE(BGPD, BGP_EVPN_NH, "BGP EVPN Nexthop"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI_VTEP, "BGP EVPN ES-EVI VTEP"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES, "BGP EVPN ESI Information"); DEFINE_MTYPE(BGPD, BGP_EVPN_ES_EVI, "BGP EVPN ES-per-EVI Information"); diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h index 7b839f1d4c..29923424e3 100644 --- a/bgpd/bgp_memory.h +++ b/bgpd/bgp_memory.h @@ -118,6 +118,9 @@ DECLARE_MTYPE(BGP_EVPN_ES_EVI); DECLARE_MTYPE(BGP_EVPN_ES_VRF); DECLARE_MTYPE(BGP_EVPN_ES_VTEP); DECLARE_MTYPE(BGP_EVPN_PATH_ES_INFO); +DECLARE_MTYPE(BGP_EVPN_PATH_MH_INFO); +DECLARE_MTYPE(BGP_EVPN_PATH_NH_INFO); +DECLARE_MTYPE(BGP_EVPN_NH); DECLARE_MTYPE(BGP_EVPN_ES_EVI_VTEP); DECLARE_MTYPE(BGP_EVPN); diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 124a477248..b73c83f190 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -250,8 +250,8 @@ void bgp_path_info_extra_free(struct bgp_path_info_extra **extra) if (e->aggr_suppressors) list_delete(&e->aggr_suppressors); - if (e->es_info) - bgp_evpn_path_es_info_free(e->es_info); + if (e->mh_info) + bgp_evpn_path_mh_info_free(e->mh_info); if ((*extra)->bgp_fs_iprule) list_delete(&((*extra)->bgp_fs_iprule)); @@ -8856,15 +8856,17 @@ void route_vty_out(struct vty *vty, const struct prefix *p, if (safi == SAFI_EVPN) { struct bgp_path_es_info *path_es_info = NULL; - if (path->extra) - path_es_info = path->extra->es_info; - if (bgp_evpn_is_esi_valid(&attr->esi)) { /* XXX - add these params to the json out */ vty_out(vty, "%*s", 20, " "); vty_out(vty, "ESI:%s", esi_to_str(&attr->esi, esi_buf, sizeof(esi_buf))); + + if (path->extra && path->extra->mh_info) + path_es_info = + path->extra->mh_info->es_info; + if (path_es_info && path_es_info->es) vty_out(vty, " VNI: %u", path_es_info->vni); @@ -9626,12 +9628,20 @@ void route_vty_out_detail(struct vty *vty, struct bgp *bgp, buf1, sizeof(buf1)); if (is_pi_family_evpn(parent_ri)) { vty_out(vty, - " Imported from %s:%pFX, VNI %s\n", + " Imported from %s:%pFX, VNI %s", buf1, (struct prefix_evpn *) bgp_dest_get_prefix( dest), tag_buf); + if (attr->es_flags & ATTR_ES_L3_NHG) + vty_out(vty, ", L3NHG %s", + (attr->es_flags + & ATTR_ES_L3_NHG_ACTIVE) + ? "active" + : "inactive"); + vty_out(vty, "\n"); + } else vty_out(vty, " Imported from %s:%pFX\n", diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 1dec99f085..b6aa53070b 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -102,7 +102,9 @@ enum bgp_show_adj_route_type { #define BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE -15 #define BGP_NLRI_PARSE_ERROR -32 -/* MAC-IP/type-2 path_info in the VNI routing table is linked to the +/* 1. local MAC-IP/type-2 paths in the VNI routing table are linked to the + * destination ES + * 2. remote MAC-IP paths in the global routing table are linked to the * destination ES */ struct bgp_path_es_info { @@ -113,6 +115,27 @@ struct bgp_path_es_info { struct bgp_evpn_es *es; /* memory used for linking the path to the destination ES */ struct listnode es_listnode; + uint8_t flags; +/* Path is linked to the VNI list */ +#define BGP_EVPN_PATH_ES_INFO_VNI_LIST (1 << 0) +/* Path is linked to the global list */ +#define BGP_EVPN_PATH_ES_INFO_GLOBAL_LIST (1 << 1) +}; + +/* IP paths imported into the VRF from an EVPN route source + * are linked to the nexthop/VTEP IP + */ +struct bgp_path_evpn_nh_info { + /* back pointer to the route */ + struct bgp_path_info *pi; + struct bgp_evpn_nh *nh; + /* memory used for linking the path to the nexthop */ + struct listnode nh_listnode; +}; + +struct bgp_path_mh_info { + struct bgp_path_es_info *es_info; + struct bgp_path_evpn_nh_info *nh_info; }; /* Ancillary information to struct bgp_path_info, @@ -202,7 +225,7 @@ struct bgp_path_info_extra { /* presence of FS pbr iprule based entry */ struct list *bgp_fs_iprule; /* Destination Ethernet Segment links for EVPN MH */ - struct bgp_path_es_info *es_info; + struct bgp_path_mh_info *mh_info; }; struct bgp_path_info { diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6270542178..43d0a3b2d2 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -664,6 +664,9 @@ struct bgp { /* RB tree of ES-VRFs */ struct bgp_es_vrf_rb_head es_vrf_rb_tree; + /* Hash table of EVPN nexthops maintained per-tenant-VRF */ + struct hash *evpn_nh_table; + /* vrf flags */ uint32_t vrf_flags; #define BGP_VRF_AUTO (1 << 0) diff --git a/lib/log.c b/lib/log.c index b86d3022b4..e078d8e2a7 100644 --- a/lib/log.c +++ b/lib/log.c @@ -462,7 +462,9 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY(ZEBRA_NHG_DEL), DESC_ENTRY(ZEBRA_NHG_NOTIFY_OWNER), DESC_ENTRY(ZEBRA_ROUTE_NOTIFY_REQUEST), - DESC_ENTRY(ZEBRA_CLIENT_CLOSE_NOTIFY)}; + DESC_ENTRY(ZEBRA_CLIENT_CLOSE_NOTIFY), + DESC_ENTRY(ZEBRA_EVPN_REMOTE_NH_ADD), + DESC_ENTRY(ZEBRA_EVPN_REMOTE_NH_DEL)}; #undef DESC_ENTRY static const struct zebra_desc_table unknown = {0, "unknown", '?'}; diff --git a/lib/zclient.h b/lib/zclient.h index 43197534a8..5b2298c42d 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -213,6 +213,8 @@ typedef enum { ZEBRA_NHG_ADD, ZEBRA_NHG_DEL, ZEBRA_NHG_NOTIFY_OWNER, + ZEBRA_EVPN_REMOTE_NH_ADD, + ZEBRA_EVPN_REMOTE_NH_DEL, ZEBRA_ERROR, ZEBRA_CLIENT_CAPABILITIES, ZEBRA_OPAQUE_MESSAGE, diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 63ba6cd8d9..b482914418 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -3350,6 +3350,8 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = { [ZEBRA_NHG_ADD] = zread_nhg_add, [ZEBRA_NHG_DEL] = zread_nhg_del, [ZEBRA_ROUTE_NOTIFY_REQUEST] = zread_route_notify_request, + [ZEBRA_EVPN_REMOTE_NH_ADD] = zebra_evpn_proc_remote_nh, + [ZEBRA_EVPN_REMOTE_NH_DEL] = zebra_evpn_proc_remote_nh, }; /* diff --git a/zebra/zebra_evpn_mh.c b/zebra/zebra_evpn_mh.c index 1c258a04f7..cabba707a0 100644 --- a/zebra/zebra_evpn_mh.c +++ b/zebra/zebra_evpn_mh.c @@ -3867,6 +3867,47 @@ static void zebra_evpn_mh_startup_delay_timer_start(const char *rc) } } +/***************************************************************************** + * Nexthop management: nexthops associated with Type-2 routes that have + * an ES as destination are consolidated by BGP into a per-VRF nh->rmac + * mapping which is the installed as a remote neigh/fdb entry with a + * dummy (type-1) prefix referencing it. + * This handling is needed because Type-2 routes with ES as dest use NHG + * that are setup using EAD routes (i.e. such NHGs do not include the + * RMAC info). + ****************************************************************************/ +void zebra_evpn_proc_remote_nh(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + vrf_id_t vrf_id; + struct ipaddr nh; + struct ethaddr rmac; + struct prefix_evpn dummy_prefix; + + s = msg; + vrf_id = stream_getl(s); + stream_get(&nh, s, sizeof(nh)); + + memset(&dummy_prefix, 0, sizeof(dummy_prefix)); + dummy_prefix.family = AF_EVPN; + dummy_prefix.prefixlen = (sizeof(struct evpn_addr) * 8); + dummy_prefix.prefix.route_type = 1; /* XXX - fixup to type-1 def */ + + if (hdr->command == ZEBRA_EVPN_REMOTE_NH_ADD) { + stream_get(&rmac, s, sizeof(rmac)); + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn remote nh %d %pIA rmac %pEA add", + vrf_id, &nh, &rmac); + zebra_vxlan_evpn_vrf_route_add(vrf_id, &rmac, &nh, + (struct prefix *)&dummy_prefix); + } else { + if (IS_ZEBRA_DEBUG_EVPN_MH_ES) + zlog_debug("evpn remote nh %d %pIA del", vrf_id, &nh); + zebra_vxlan_evpn_vrf_route_del(vrf_id, &nh, + (struct prefix *)&dummy_prefix); + } +} + /*****************************************************************************/ void zebra_evpn_mh_config_write(struct vty *vty) { diff --git a/zebra/zebra_evpn_mh.h b/zebra/zebra_evpn_mh.h index 2361a70bff..8861e80cee 100644 --- a/zebra/zebra_evpn_mh.h +++ b/zebra/zebra_evpn_mh.h @@ -382,5 +382,6 @@ extern void zebra_evpn_acc_bd_svi_set(struct zebra_if *vlan_zif, extern void zebra_evpn_acc_bd_svi_mac_add(struct interface *vlan_if); extern void zebra_evpn_es_bypass_update(struct zebra_evpn_es *es, struct interface *ifp, bool bypass); +extern void zebra_evpn_proc_remote_nh(ZAPI_HANDLER_ARGS); #endif /* _ZEBRA_EVPN_MH_H */ diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index bc2eac7a0b..4cd3b60a0f 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -149,6 +149,11 @@ static int host_rb_entry_compare(const struct host_rb_entry *hle1, } else if (hle1->p.family == AF_INET6) { return memcmp(&hle1->p.u.prefix6, &hle2->p.u.prefix6, IPV6_MAX_BYTELEN); + } else if (hle1->p.family == AF_EVPN) { + /* a single dummy prefix of route_type BGP_EVPN_AD_ROUTE is + * used for all nexthops associated with a non-zero ESI + */ + return 0; } else { zlog_debug("%s: Unexpected family type: %d", __func__, hle1->p.family);