zebra: Fix ships in the night issue

When using wait for install there exists situations where
zebra will issue several route change operations to the kernel
but end up in a state where we shouldn't be at the end
due to extra data being received.  Example:

a) zebra receives from bgp a route change, installs sends the
route to the kernel.
b) zebra receives a route deletion from bgp, removes the
struct route entry and then sends to the kernel a deletion.
c) zebra receives an asynchronous notification that (a) succeeded
but we treat this as a new route.

This is the ships in the night problem.  In this case if we receive
notification from the kernel about a route that we know nothing
about and we are not in startup and we are doing asic offload
then we can ignore this update.

Ticket: #2563300
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
This commit is contained in:
Donald Sharp 2021-06-24 12:23:33 -04:00
parent 16cdf5ddbc
commit c6eee91f66
7 changed files with 44 additions and 15 deletions

View File

@ -290,10 +290,12 @@ void connected_up(struct interface *ifp, struct connected *ifc)
}
rib_add(afi, SAFI_UNICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0,
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0);
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0,
false);
rib_add(afi, SAFI_MULTICAST, zvrf->vrf->vrf_id, ZEBRA_ROUTE_CONNECT, 0,
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0);
flags, &p, NULL, &nh, 0, zvrf->table_id, metric, 0, 0, 0,
false);
/* Schedule LSP forwarding entries for processing, if appropriate. */
if (zvrf->vrf->vrf_id == VRF_DEFAULT) {

View File

@ -1113,7 +1113,7 @@ void rtm_read(struct rt_msghdr *rtm)
|| rtm->rtm_type == RTM_CHANGE)
rib_add(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL, 0,
zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN,
0, 0, 0, 0);
0, 0, 0, 0, false);
else
rib_delete(afi, SAFI_UNICAST, VRF_DEFAULT, ZEBRA_ROUTE_KERNEL,
0, zebra_flags, &p, NULL, &nh, 0, RT_TABLE_MAIN, 0,

View File

@ -698,7 +698,7 @@ int zebra_add_import_table_entry(struct zebra_vrf *zvrf, struct route_node *rn,
ng = nexthop_group_new();
copy_nexthops(&ng->nexthop, re->nhe->nhg.nexthop, NULL);
rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng);
rib_add_multipath(afi, SAFI_UNICAST, &p, NULL, newre, ng, false);
return 0;
}

View File

@ -394,13 +394,14 @@ extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
uint32_t nhe_id, uint32_t table_id, uint32_t metric,
uint32_t mtu, uint8_t distance, route_tag_t tag);
uint32_t mtu, uint8_t distance, route_tag_t tag,
bool startup);
/*
* Multipath route apis.
*/
extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng);
struct nexthop_group *ng, bool startup);
/*
* -1 -> some sort of error
* 0 -> an add
@ -409,7 +410,7 @@ extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p,
struct route_entry *re,
struct nhg_hash_entry *nhe);
struct nhg_hash_entry *nhe, bool startup);
extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags,

View File

@ -920,7 +920,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
}
rib_add(afi, SAFI_UNICAST, vrf_id, proto, 0, flags, &p,
&src_p, &nh, nhe_id, table, metric, mtu,
distance, tag);
distance, tag, startup);
} else {
/* This is a multipath route */
struct route_entry *re;
@ -964,7 +964,7 @@ static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
if (nhe_id || ng)
rib_add_multipath(afi, SAFI_UNICAST, &p,
&src_p, re, ng);
&src_p, re, ng, startup);
else
XFREE(MTYPE_RE, re);
}

View File

@ -2127,7 +2127,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
nhe.backup_info = bnhg;
}
ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p,
re, &nhe);
re, &nhe, false);
/*
* rib_add_multipath_nhe only fails in a couple spots

View File

@ -3346,7 +3346,7 @@ void _route_entry_dump(const char *func, union prefixconstptr pp,
*/
int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re,
struct nhg_hash_entry *re_nhe)
struct nhg_hash_entry *re_nhe, bool startup)
{
struct nhg_hash_entry *nhe = NULL;
struct route_table *table;
@ -3438,6 +3438,32 @@ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
same = first_same;
if (!startup &&
(re->flags & ZEBRA_FLAG_SELFROUTE) && zrouter.asic_offloaded) {
if (!same) {
if (IS_ZEBRA_DEBUG_RIB)
zlog_debug("prefix: %pRN is a self route where we do not have an entry for it. Dropping this update, it's useless", rn);
/*
* We are not on startup, this is a self route
* and we have asic offload. Which means
* we are getting a callback for a entry
* that was already deleted to the kernel
* but an earlier response was just handed
* back. Drop it on the floor
*/
if (re->nhe && re->nhe_id) {
assert(re->nhe->id == re->nhe_id);
route_entry_update_nhe(re, NULL);
} else if (re->nhe && re->nhe->nhg.nexthop)
nexthops_free(re->nhe->nhg.nexthop);
nexthops_free(re->fib_ng.nexthop);
XFREE(MTYPE_RE, re);
return ret;
}
}
/* If this route is kernel/connected route, notify the dataplane. */
if (RIB_SYSTEM_ROUTE(re)) {
/* Notify dataplane */
@ -3491,7 +3517,7 @@ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
*/
int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng)
struct nexthop_group *ng, bool startup)
{
int ret;
struct nhg_hash_entry nhe;
@ -3512,7 +3538,7 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
else if (re->nhe_id > 0)
nhe.id = re->nhe_id;
ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe);
ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe, startup);
/* In this path, the callers expect memory to be freed. */
nexthop_group_delete(&ng);
@ -3743,7 +3769,7 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, uint32_t flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh,
uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu,
uint8_t distance, route_tag_t tag)
uint8_t distance, route_tag_t tag, bool startup)
{
struct route_entry *re = NULL;
struct nexthop *nexthop = NULL;
@ -3775,7 +3801,7 @@ int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
nexthop_group_add_sorted(ng, nexthop);
}
return rib_add_multipath(afi, safi, p, src_p, re, ng);
return rib_add_multipath(afi, safi, p, src_p, re, ng, startup);
}
static const char *rib_update_event2str(enum rib_update_event event)