zebra: Fix ships in the night issue

When using wait for install there exists situations where
zebra will issue several route change operations to the kernel
but end up in a state where we shouldn't be at the end
due to extra data being received.  Example:

a) zebra receives from bgp a route change, installs sends the
route to the kernel.
b) zebra receives a route deletion from bgp, removes the
struct route entry and then sends to the kernel a deletion.
c) zebra receives an asynchronous notification that (a) succeeded
but we treat this as a new route.

This is the ships in the night problem.  In this case if we receive
notification from the kernel about a route that we know nothing
about and we are not in startup and we are doing asic offload
then we can ignore this update.

Ticket: #2563300
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
This commit is contained in:
Donald Sharp 2021-06-24 12:23:33 -04:00
parent 16cdf5ddbc
commit c6eee91f66
7 changed files with 44 additions and 15 deletions

View File

@ -290,10 +290,12 @@ void connected_up(struct interface *ifp, struct connected *ifc)
} }
rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0,
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0); flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0,
false);
rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0, rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0,
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0); flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0,
false);
/* Schedule LSP forwarding entries for processing, if appropriate. */ /* Schedule LSP forwarding entries for processing, if appropriate. */
if (zvrf->vrf->vrf_id == VRF_DEFAULT) { if (zvrf->vrf->vrf_id == VRF_DEFAULT) {

View File

@ -1113,7 +1113,7 @@ void rtm_read(struct rt_msghdr *rtm)
|| rtm->rtm_type == RTM_CHANGE) || rtm->rtm_type == RTM_CHANGE)
rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0, rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0,
zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN,
0, 0, 0, 0); 0, 0, 0, 0, false);
else else
rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL,
0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0, 0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0,

View File

@ -698,7 +698,7 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn,
ng = nexthop_group_new(); ng = nexthop_group_new();
copy_nexthops(&ng->nexthop, re->nhe->nhg.nexthop, NULL); copy_nexthops(&ng->nexthop, re->nhe->nhg.nexthop, NULL);
rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng); rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng, false);
return 0; return 0;
} }

View File

@ -394,13 +394,14 @@ extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags, struct prefix *p, unsigned short instance, uint32_t flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh, struct prefix_ipv6 *src_p, const struct nexthop *nh,
uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t nhe_id, uint32_t table_id, uint32_t metric,
uint32_t mtu, uint8_t distance, route_tag_t tag); uint32_t mtu, uint8_t distance, route_tag_t tag,
bool startup);
/* /*
* Multipath route apis. * Multipath route apis.
*/ */
extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re, struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng); struct nexthop_group *ng, bool startup);
/* /*
* -1 -> some sort of error * -1 -> some sort of error
* 0 -> an add * 0 -> an add
@ -409,7 +410,7 @@ extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct prefix_ipv6 *src_p,
struct route_entry *re, struct route_entry *re,
struct nhg_hash_entry *nhe); struct nhg_hash_entry *nhe, bool startup);
extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags, unsigned short instance, uint32_t flags,

View File

@ -920,7 +920,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
} }
rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p, rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
&src_p, &nh, nhe_id, table, metric, mtu, &src_p, &nh, nhe_id, table, metric, mtu,
distance, tag); distance, tag, startup);
} else { } else {
/* This is a multipath route */ /* This is a multipath route */
struct route_entry *re; struct route_entry *re;
@ -964,7 +964,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
if (nhe_id || ng) if (nhe_id || ng)
rib_add_multipath(afi, SAFI_UNICAST, &p, rib_add_multipath(afi, SAFI_UNICAST, &p,
&src_p, re, ng); &src_p, re, ng, startup);
else else
XFREE(MTYPE_RE, re); XFREE(MTYPE_RE, re);
} }

View File

@ -2127,7 +2127,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
nhe.backup_info = bnhg; nhe.backup_info = bnhg;
} }
ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p, ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p,
re, &nhe); re, &nhe, false);
/* /*
* rib_add_multipath_nhe only fails in a couple spots * rib_add_multipath_nhe only fails in a couple spots

View File

@ -3346,7 +3346,7 @@ void _route_entry_dump(const char *func, union prefixconstptr pp,
*/ */
int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re, struct prefix_ipv6 *src_p, struct route_entry *re,
struct nhg_hash_entry *re_nhe) struct nhg_hash_entry *re_nhe, bool startup)
{ {
struct nhg_hash_entry *nhe = NULL; struct nhg_hash_entry *nhe = NULL;
struct route_table *table; struct route_table *table;
@ -3438,6 +3438,32 @@ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
same = first_same; same = first_same;
if (!startup &&
(re->flags & ZEBRA_FLAG_SELFROUTE) && zrouter.asic_offloaded) {
if (!same) {
if (IS_ZEBRA_DEBUG_RIB)
zlog_debug("prefix: %pRN is a self route where we do not have an entry for it. Dropping this update, it's useless", rn);
/*
* We are not on startup, this is a self route
* and we have asic offload. Which means
* we are getting a callback for a entry
* that was already deleted to the kernel
* but an earlier response was just handed
* back. Drop it on the floor
*/
if (re->nhe && re->nhe_id) {
assert(re->nhe->id == re->nhe_id);
route_entry_update_nhe(re, NULL);
} else if (re->nhe && re->nhe->nhg.nexthop)
nexthops_free(re->nhe->nhg.nexthop);
nexthops_free(re->fib_ng.nexthop);
XFREE(MTYPE_RE, re);
return ret;
}
}
/* If this route is kernel/connected route, notify the dataplane. */ /* If this route is kernel/connected route, notify the dataplane. */
if (RIB_SYSTEM_ROUTE(re)) { if (RIB_SYSTEM_ROUTE(re)) {
/* Notify dataplane */ /* Notify dataplane */
@ -3491,7 +3517,7 @@ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
*/ */
int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re, struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng) struct nexthop_group *ng, bool startup)
{ {
int ret; int ret;
struct nhg_hash_entry nhe; struct nhg_hash_entry nhe;
@ -3512,7 +3538,7 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
else if (re->nhe_id > 0) else if (re->nhe_id > 0)
nhe.id = re->nhe_id; nhe.id = re->nhe_id;
ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe); ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe, startup);
/* In this path, the callers expect memory to be freed. */ /* In this path, the callers expect memory to be freed. */
nexthop_group_delete(&ng); nexthop_group_delete(&ng);
@ -3743,7 +3769,7 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags, struct prefix *p, unsigned short instance, uint32_t flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh, struct prefix_ipv6 *src_p, const struct nexthop *nh,
uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu,
uint8_t distance, route_tag_t tag) uint8_t distance, route_tag_t tag, bool startup)
{ {
struct route_entry *re = NULL; struct route_entry *re = NULL;
struct nexthop *nexthop = NULL; struct nexthop *nexthop = NULL;
@ -3775,7 +3801,7 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
nexthop_group_add_sorted(ng, nexthop); nexthop_group_add_sorted(ng, nexthop);
} }
return rib_add_multipath(afi, safi, p, src_p, re, ng); return rib_add_multipath(afi, safi, p, src_p, re, ng, startup);
} }
static const char *rib_update_event2str(enum rib_update_event event) static const char *rib_update_event2str(enum rib_update_event event)