diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 945094a671..961e3b84de 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -1685,6 +1685,65 @@ static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, return route_change; } +/* + * If the local route was not selected evict it and tell zebra to re-add + * the best remote dest. + * + * Typically a local path added by zebra is expected to be selected as + * best. In which case when a remote path wins as best (later) + * evpn_route_select_install itself evicts the older-local-best path. + * + * However if bgp's add and zebra's add cross paths (race condition) it + * is possible that the local path is no longer the "older" best path. + * It is a path that was never designated as best and hence requires + * additional handling to prevent bgp from injecting and holding on to a + * non-best local path. + */ +static void evpn_cleanup_local_non_best_route(struct bgp *bgp, + struct bgpevpn *vpn, + struct bgp_node *rn, + struct bgp_path_info *local_pi, + int *route_change) +{ + struct bgp_path_info *tmp_pi; + struct bgp_path_info *curr_select = NULL; + uint8_t flags = 0; + char buf[PREFIX_STRLEN]; + + if (CHECK_FLAG(local_pi->flags, BGP_PATH_SELECTED)) { + /* local path is the winner; no additional cleanup needed */ + return; + } + + /* local path was not picked as the winner; kick it out */ + if (bgp_debug_zebra(NULL)) { + zlog_debug("evicting local evpn prefix %s as remote won", + prefix2str(&rn->p, buf, sizeof(buf))); + } + *route_change = 0; + evpn_delete_old_local_route(bgp, vpn, rn, local_pi); + bgp_path_info_reap(rn, local_pi); + + /* tell zebra to re-add the best remote path */ + for (tmp_pi = rn->info; tmp_pi; tmp_pi = tmp_pi->next) { + if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_SELECTED)) { + curr_select = tmp_pi; + break; + } + } + if (curr_select && + curr_select->type == ZEBRA_ROUTE_BGP + && curr_select->sub_type == BGP_ROUTE_IMPORTED) { + if (curr_select->attr->sticky) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_STICKY); + if (curr_select->attr->default_gw) + SET_FLAG(flags, ZEBRA_MACIP_TYPE_GW); + evpn_zebra_install(bgp, vpn, (struct prefix_evpn *)&rn->p, + curr_select->attr->nexthop, flags, + mac_mobility_seqnum(curr_select->attr)); + } +} + /* * Create or update EVPN route (of type based on prefix) for specified VNI * and schedule for processing. @@ -1747,10 +1806,19 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, assert(pi); attr_new = pi->attr; + /* lock ri to prevent freeing in evpn_route_select_install */ + bgp_path_info_lock(pi); /* Perform route selection; this is just to set the flags correctly * as local route in the VNI always wins. */ evpn_route_select_install(bgp, vpn, rn); + /* + * if the new local route was not selected evict it and tell zebra + * to add the best remote dest + */ + evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi, &route_change); + bgp_path_info_unlock(pi); + bgp_unlock_node(rn); /* If this is a new route or some attribute has changed, export the diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index c20b404f19..4c408ff9dd 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -453,6 +453,7 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, char exist_buf[PATH_ADDPATH_STR_BUFFER]; uint32_t new_mm_seq; uint32_t exist_mm_seq; + int nh_cmp; *paths_eq = 0; @@ -545,6 +546,28 @@ static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new, exist_mm_seq); return 0; } + + /* + * if sequence numbers are the same path with the lowest IP + * wins + */ + nh_cmp = bgp_path_info_nexthop_cmp(new, exist); + if (nh_cmp < 0) { + if (debug) + zlog_debug( + "%s: %s wins over %s due to same MM seq %u and lower IP %s", + pfx_buf, new_buf, exist_buf, new_mm_seq, + inet_ntoa(new->attr->nexthop)); + return 1; + } + if (nh_cmp > 0) { + if (debug) + zlog_debug( + "%s: %s loses to %s due to same MM seq %u and higher IP %s", + pfx_buf, new_buf, exist_buf, new_mm_seq, + inet_ntoa(new->attr->nexthop)); + return 0; + } } /* 1. Weight check. */