Merge pull request #12169 from donaldsharp/zebra_meta_q_ordering

zebra: Fix handling of recursive routes when processing closely in time
This commit is contained in:
Donatas Abraitis 2022-11-02 20:15:32 +02:00 committed by GitHub
commit 87bc89bc87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 7 deletions

View File

@ -2797,7 +2797,7 @@ def verify_best_path_as_per_admin_distance(
if route in rib_routes_json:
st_found = True
# Verify next_hop in rib_routes_json
if rib_routes_json[route][0]["nexthops"][0]["ip"] == _next_hop:
if [nh for nh in rib_routes_json[route][0]["nexthops"] if nh['ip'] == _next_hop]:
nh_found = True
else:
errormsg = (

View File

@ -158,6 +158,13 @@ struct route_entry {
* differs from the rib/normal set of nexthops.
*/
#define ROUTE_ENTRY_USE_FIB_NHG 0x40
/*
* Route entries that are going to the dplane for a Route Replace
* let's note the fact that this is happening. This will
* be useful when zebra is determing if a route can be
* used for nexthops
*/
#define ROUTE_ENTRY_ROUTE_REPLACING 0x80
/* Sequence value incremented for each dataplane operation */
uint32_t dplane_sequence;

View File

@ -2369,11 +2369,33 @@ static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
resolved = 0;
/* Only useful if installed */
if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED)) {
/*
* Only useful if installed or being Route Replacing
* Why Being Route Replaced as well?
* Imagine a route A and route B( that depends on A )
* for recursive resolution and A already exists in the
* zebra rib. If zebra receives the routes
* for resolution at aproximately the same time in the [
* B, A ] order on the workQ. If this happens then
* normal route resolution will happen and B will be
* resolved successfully and then A will be resolved
* successfully. Now imagine the reversed order [A, B].
* A will be resolved and then scheduled for installed
* (Thus not having the ROUTE_ENTRY_INSTALLED flag ). B
* will then get resolved and fail to be installed
* because the original below test. Let's `loosen` this
* up a tiny bit and allow the
* ROUTE_ENTRY_ROUTE_REPLACING flag ( that is set when a
* Route Replace operation is being initiated on A now )
* to now satisfy this situation. This will allow
* either order in the workQ to work properly.
*/
if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED) &&
!CHECK_FLAG(match->status,
ROUTE_ENTRY_ROUTE_REPLACING)) {
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug(
"%s: match %p (%pNG) not installed",
"%s: match %p (%pNG) not installed or being Route Replaced",
__func__, match, match->nhe);
goto done_with_match;

View File

@ -292,13 +292,16 @@ static char *_dump_re_status(const struct route_entry *re, char *buf,
}
snprintfrr(
buf, len, "%s%s%s%s%s%s%s",
buf, len, "%s%s%s%s%s%s%s%s",
CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED) ? "Removed " : "",
CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED) ? "Changed " : "",
CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)
? "Label Changed "
: "",
CHECK_FLAG(re->status, ROUTE_ENTRY_QUEUED) ? "Queued " : "",
CHECK_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING)
? "Replacing"
: "",
CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED) ? "Installed "
: "",
CHECK_FLAG(re->status, ROUTE_ENTRY_FAILED) ? "Failed " : "",
@ -713,6 +716,7 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re,
if (old) {
SET_FLAG(old->status, ROUTE_ENTRY_QUEUED);
SET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING);
/* Free old FIB nexthop group */
UNSET_FLAG(old->status, ROUTE_ENTRY_USE_FIB_NHG);
@ -1538,6 +1542,7 @@ static void zebra_rib_fixup_system(struct route_node *rn)
SET_FLAG(re->status, ROUTE_ENTRY_INSTALLED);
UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
UNSET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING);
for (ALL_NEXTHOPS(re->nhe->nhg, nhop)) {
if (CHECK_FLAG(nhop->flags, NEXTHOP_FLAG_RECURSIVE))
@ -1995,8 +2000,12 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx)
} else {
if (!zrouter.asic_offloaded ||
(CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOADED) ||
CHECK_FLAG(re->flags, ZEBRA_FLAG_OFFLOAD_FAILED)))
CHECK_FLAG(re->flags,
ZEBRA_FLAG_OFFLOAD_FAILED))) {
UNSET_FLAG(re->status,
ROUTE_ENTRY_ROUTE_REPLACING);
UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
}
}
}
@ -2252,8 +2261,10 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx)
}
/* Ensure we clear the QUEUED flag */
if (!zrouter.asic_offloaded)
if (!zrouter.asic_offloaded) {
UNSET_FLAG(re->status, ROUTE_ENTRY_QUEUED);
UNSET_FLAG(re->status, ROUTE_ENTRY_ROUTE_REPLACING);
}
/* Is this a notification that ... matters? We mostly care about
* the route that is currently selected for installation; we may also