Merge pull request #17962 from donaldsharp/fpm_problems

Fpm problems
This commit is contained in:
Russ White 2025-02-04 15:09:05 -05:00 committed by GitHub
commit 1cbb4b9e3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 66 additions and 42 deletions

View File

@ -3,21 +3,21 @@
{
"fib":1,
"rib":1,
"fibOffLoaded":0,
"fibOffLoaded":1,
"fibTrapped":0,
"type":"connected"
},
{
"fib":1,
"rib":1,
"fibOffLoaded":0,
"fibOffLoaded":1,
"fibTrapped":0,
"type":"local"
},
{
"fib":10000,
"rib":10000,
"fibOffLoaded":0,
"fibOffLoaded":10000,
"fibTrapped":0,
"type":"sharp"
}

View File

@ -3,14 +3,14 @@
{
"fib":1,
"rib":1,
"fibOffLoaded":0,
"fibOffLoaded":1,
"fibTrapped":0,
"type":"connected"
},
{
"fib":1,
"rib":1,
"fibOffLoaded":0,
"fibOffLoaded":1,
"fibTrapped":0,
"type":"local"
}

View File

@ -57,7 +57,7 @@ def setup_module(module):
router.load_config(
TopoRouter.RD_ZEBRA,
os.path.join(CWD, "{}/zebra.conf".format(rname)),
"-M dplane_fpm_nl",
"-M dplane_fpm_nl --asic-offload=notify_on_offload",
)
router.load_config(
TopoRouter.RD_SHARP, os.path.join(CWD, "{}/sharpd.conf".format(rname))
@ -65,6 +65,7 @@ def setup_module(module):
router.load_config(
TopoRouter.RD_FPM_LISTENER,
os.path.join(CWD, "{}/fpm_stub.conf".format(rname)),
"-r",
)
tgen.start_router()
@ -111,7 +112,7 @@ def test_fpm_install_routes():
topotest.router_json_cmp, router, "show ip route summ json", expected
)
success, result = topotest.run_and_expect(test_func, None, 60, 1)
success, result = topotest.run_and_expect(test_func, None, 120, 1)
assert success, "Unable to successfully install 10000 routes: {}".format(result)
# Let's remove 10000 routes
@ -124,7 +125,7 @@ def test_fpm_install_routes():
topotest.router_json_cmp, router, "show ip route summ json", expected
)
success, result = topotest.run_and_expect(test_func, None, 60, 1)
success, result = topotest.run_and_expect(test_func, None, 120, 1)
assert success, "Unable to remove 10000 routes: {}".format(result)

View File

@ -587,7 +587,6 @@ static void fpm_read(struct event *t)
struct zebra_dplane_ctx *ctx;
size_t available_bytes;
size_t hdr_available_bytes;
int ival;
/* Let's ignore the input at the moment. */
rv = stream_read_try(fnc->ibuf, fnc->socket,
@ -724,12 +723,18 @@ static void fpm_read(struct event *t)
NULL);
if (netlink_route_notify_read_ctx(hdr, 0, ctx) >= 0) {
/* In the FPM encoding, the vrfid is present */
ival = dplane_ctx_get_table(ctx);
dplane_ctx_set_vrf(ctx, ival);
dplane_ctx_set_table(ctx,
ZEBRA_ROUTE_TABLE_UNKNOWN);
/*
* Receiving back a netlink message from
* the fpm. Currently the netlink messages
* do not have a way to specify the vrf
* so it must be unknown. I'm looking
* at you sonic. If you are reading this
* and wondering why it's not working
* you must extend your patch to translate
* the tableid to the vrfid and set the
* tableid to 0 in order for this to work.
*/
dplane_ctx_set_vrf(ctx, VRF_UNKNOWN);
dplane_provider_enqueue_to_zebra(ctx);
} else {
/*
@ -946,8 +951,6 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
nl_buf_len = 0;
frr_mutex_lock_autounlock(&fnc->obuf_mutex);
/*
* If route replace is enabled then directly encode the install which
* is going to use `NLM_F_REPLACE` (instead of delete/add operations).
@ -1100,6 +1103,8 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
/* We must know if someday a message goes beyond 65KiB. */
assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX);
frr_mutex_lock_autounlock(&fnc->obuf_mutex);
/* Check if we have enough buffer space. */
if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) {
atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1,

View File

@ -756,8 +756,10 @@ static void fpm_serve(void)
while (1) {
hdr = read_fpm_msg(buf, sizeof(buf));
if (!hdr)
if (!hdr) {
close(glob->sock);
return;
}
process_fpm_msg(hdr);
}
@ -769,6 +771,8 @@ int main(int argc, char **argv)
int r;
bool fork_daemon = false;
setbuf(stdout, NULL);
memset(glob, 0, sizeof(*glob));
while ((r = getopt(argc, argv, "rdv")) != -1) {

View File

@ -1891,20 +1891,18 @@ struct route_node *rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx)
struct route_table *table = NULL;
struct route_node *rn = NULL;
const struct prefix *dest_pfx, *src_pfx;
uint32_t tableid = dplane_ctx_get_table(ctx);
vrf_id_t vrf_id = dplane_ctx_get_vrf(ctx);
/* Locate rn and re(s) from ctx */
table = zebra_vrf_lookup_table_with_table_id(dplane_ctx_get_afi(ctx),
dplane_ctx_get_safi(ctx), vrf_id, tableid);
table = zebra_vrf_lookup_table_with_table_id(
dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx),
dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx));
if (table == NULL) {
if (IS_ZEBRA_DEBUG_DPLANE) {
zlog_debug(
"Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u)",
dplane_ctx_get_afi(ctx),
dplane_ctx_get_safi(ctx),
vrf_id_to_name(dplane_ctx_get_vrf(ctx)),
dplane_ctx_get_vrf(ctx));
zlog_debug("Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u) table %u",
dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx),
vrf_id_to_name(vrf_id), vrf_id, tableid);
}
goto done;
}
@ -2214,26 +2212,13 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx)
{
struct route_node *rn = NULL;
struct route_entry *re = NULL;
struct vrf *vrf;
struct vrf *vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx));
struct nexthop *nexthop;
rib_dest_t *dest;
bool fib_changed = false;
bool debug_p = IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_RIB;
int start_count, end_count;
vrf_id_t vrf_id;
int tableid;
/* Locate vrf and route table - we must have one or the other */
tableid = dplane_ctx_get_table(ctx);
vrf_id = dplane_ctx_get_vrf(ctx);
if (vrf_id == VRF_UNKNOWN)
vrf_id = zebra_vrf_lookup_by_table(tableid,
dplane_ctx_get_ns_id(ctx));
else if (tableid == ZEBRA_ROUTE_TABLE_UNKNOWN)
tableid = zebra_vrf_lookup_tableid(vrf_id,
dplane_ctx_get_ns_id(ctx));
vrf = vrf_lookup_by_id(vrf_id);
uint32_t tableid = dplane_ctx_get_table(ctx);
/* Locate rn and re(s) from ctx */
rn = rib_find_rn_from_ctx(ctx);
@ -4862,6 +4847,33 @@ void rib_close_table(struct route_table *table)
}
}
/*
* The context sent up from the dplane may be a context
* that has been generated by the zebra master pthread
* or it may be a context generated from a event in
* either the kernel dplane code or the fpm dplane
* code. In which case the tableid and vrfid may
* not be fully known and we have to figure it out
* when the context hits the master pthread.
* since this is the *starter* spot for that let
* us do a bit of work on each one to see if any
* massaging is needed
*/
static inline void zebra_rib_translate_ctx_from_dplane(struct zebra_dplane_ctx *ctx)
{
uint32_t tableid = dplane_ctx_get_table(ctx);
vrf_id_t vrfid = dplane_ctx_get_vrf(ctx);
uint32_t nsid = dplane_ctx_get_ns_id(ctx);
enum dplane_op_e op = dplane_ctx_get_op(ctx);
if (vrfid == VRF_UNKNOWN)
dplane_ctx_set_vrf(ctx, zebra_vrf_lookup_by_table(tableid, nsid));
else if ((op == DPLANE_OP_ROUTE_INSTALL || op == DPLANE_OP_ROUTE_UPDATE ||
op == DPLANE_OP_ROUTE_DELETE) &&
tableid == ZEBRA_ROUTE_TABLE_UNKNOWN)
dplane_ctx_set_table(ctx, zebra_vrf_lookup_tableid(vrfid, nsid));
}
/*
* Handle results from the dataplane system. Dequeue update context
* structs, dispatch to appropriate internal handlers.
@ -4921,6 +4933,8 @@ static void rib_process_dplane_results(struct event *thread)
}
while (ctx) {
zebra_rib_translate_ctx_from_dplane(ctx);
#ifdef HAVE_SCRIPTING
if (ret == 0)
frrscript_call(fs,