From b2fc167978189238e045d719e11202ab303d2f59 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 30 Jan 2025 08:57:57 -0500 Subject: [PATCH 1/6] zebra: Fix pass back of data from dplane through fpm pipe A recent code change 29122bc9b8d5317f6f486f9fe61a92a854948cc5 changed the passing of data up the fpm from passing the tableid and vrf to the sonic expected tableid contains the vrfid. This violates the assumptions in the code that the netlink message passes up the tableid as the tableid. Additionally this code change did not modify the rib_find_rn_from_ctx to actually properly decode what could be passed up. Let's just fix this and let Sonic carry the patch as appropriate for themselves since they are not the only users of dplane_fpm_nl.c Signed-off-by: Donald Sharp --- zebra/dplane_fpm_nl.c | 19 ++++++++----- zebra/zebra_rib.c | 62 ++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index b8dbabb60d..a65bae95c4 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -587,7 +587,6 @@ static void fpm_read(struct event *t) struct zebra_dplane_ctx *ctx; size_t available_bytes; size_t hdr_available_bytes; - int ival; /* Let's ignore the input at the moment. */ rv = stream_read_try(fnc->ibuf, fnc->socket, @@ -724,12 +723,18 @@ static void fpm_read(struct event *t) NULL); if (netlink_route_notify_read_ctx(hdr, 0, ctx) >= 0) { - /* In the FPM encoding, the vrfid is present */ - ival = dplane_ctx_get_table(ctx); - dplane_ctx_set_vrf(ctx, ival); - dplane_ctx_set_table(ctx, - ZEBRA_ROUTE_TABLE_UNKNOWN); - + /* + * Receiving back a netlink message from + * the fpm. Currently the netlink messages + * do not have a way to specify the vrf + * so it must be unknown. I'm looking + * at you sonic. If you are reading this + * and wondering why it's not working + * you must extend your patch to translate + * the tableid to the vrfid and set the + * tableid to 0 in order for this to work. + */ + dplane_ctx_set_vrf(ctx, VRF_UNKNOWN); dplane_provider_enqueue_to_zebra(ctx); } else { /* diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 2881192eb7..a1c8cd3059 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -1891,20 +1891,18 @@ struct route_node *rib_find_rn_from_ctx(const struct zebra_dplane_ctx *ctx) struct route_table *table = NULL; struct route_node *rn = NULL; const struct prefix *dest_pfx, *src_pfx; + uint32_t tableid = dplane_ctx_get_table(ctx); + vrf_id_t vrf_id = dplane_ctx_get_vrf(ctx); /* Locate rn and re(s) from ctx */ + table = zebra_vrf_lookup_table_with_table_id(dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx), vrf_id, tableid); - table = zebra_vrf_lookup_table_with_table_id( - dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), - dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx)); if (table == NULL) { if (IS_ZEBRA_DEBUG_DPLANE) { - zlog_debug( - "Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u)", - dplane_ctx_get_afi(ctx), - dplane_ctx_get_safi(ctx), - vrf_id_to_name(dplane_ctx_get_vrf(ctx)), - dplane_ctx_get_vrf(ctx)); + zlog_debug("Failed to find route for ctx: no table for afi %d, safi %d, vrf %s(%u) table %u", + dplane_ctx_get_afi(ctx), dplane_ctx_get_safi(ctx), + vrf_id_to_name(vrf_id), vrf_id, tableid); } goto done; } @@ -2214,26 +2212,13 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx) { struct route_node *rn = NULL; struct route_entry *re = NULL; - struct vrf *vrf; + struct vrf *vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); struct nexthop *nexthop; rib_dest_t *dest; bool fib_changed = false; bool debug_p = IS_ZEBRA_DEBUG_DPLANE | IS_ZEBRA_DEBUG_RIB; int start_count, end_count; - vrf_id_t vrf_id; - int tableid; - - /* Locate vrf and route table - we must have one or the other */ - tableid = dplane_ctx_get_table(ctx); - vrf_id = dplane_ctx_get_vrf(ctx); - if (vrf_id == VRF_UNKNOWN) - vrf_id = zebra_vrf_lookup_by_table(tableid, - dplane_ctx_get_ns_id(ctx)); - else if (tableid == ZEBRA_ROUTE_TABLE_UNKNOWN) - tableid = zebra_vrf_lookup_tableid(vrf_id, - dplane_ctx_get_ns_id(ctx)); - - vrf = vrf_lookup_by_id(vrf_id); + uint32_t tableid = dplane_ctx_get_table(ctx); /* Locate rn and re(s) from ctx */ rn = rib_find_rn_from_ctx(ctx); @@ -4862,6 +4847,33 @@ void rib_close_table(struct route_table *table) } } +/* + * The context sent up from the dplane may be a context + * that has been generated by the zebra master pthread + * or it may be a context generated from a event in + * either the kernel dplane code or the fpm dplane + * code. In which case the tableid and vrfid may + * not be fully known and we have to figure it out + * when the context hits the master pthread. + * since this is the *starter* spot for that let + * us do a bit of work on each one to see if any + * massaging is needed + */ +static inline void zebra_rib_translate_ctx_from_dplane(struct zebra_dplane_ctx *ctx) +{ + uint32_t tableid = dplane_ctx_get_table(ctx); + vrf_id_t vrfid = dplane_ctx_get_vrf(ctx); + uint32_t nsid = dplane_ctx_get_ns_id(ctx); + enum dplane_op_e op = dplane_ctx_get_op(ctx); + + if (vrfid == VRF_UNKNOWN) + dplane_ctx_set_vrf(ctx, zebra_vrf_lookup_by_table(tableid, nsid)); + else if ((op == DPLANE_OP_ROUTE_INSTALL || op == DPLANE_OP_ROUTE_UPDATE || + op == DPLANE_OP_ROUTE_DELETE) && + tableid == ZEBRA_ROUTE_TABLE_UNKNOWN) + dplane_ctx_set_table(ctx, zebra_vrf_lookup_tableid(vrfid, nsid)); +} + /* * Handle results from the dataplane system. Dequeue update context * structs, dispatch to appropriate internal handlers. @@ -4921,6 +4933,8 @@ static void rib_process_dplane_results(struct event *thread) } while (ctx) { + zebra_rib_translate_ctx_from_dplane(ctx); + #ifdef HAVE_SCRIPTING if (ret == 0) frrscript_call(fs, From e71d29983aadea858ffa7c1fb6812c164c1e05f7 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 30 Jan 2025 09:02:44 -0500 Subject: [PATCH 2/6] zebra: fpm_listener allow continued operation In fpm_listener, when a error is detected it would stop listening and not recover. Modify the code to close the socket and allow the connection to recover. Signed-off-by: Donald Sharp --- zebra/fpm_listener.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/zebra/fpm_listener.c b/zebra/fpm_listener.c index 7d84c706d4..9181257db3 100644 --- a/zebra/fpm_listener.c +++ b/zebra/fpm_listener.c @@ -756,8 +756,10 @@ static void fpm_serve(void) while (1) { hdr = read_fpm_msg(buf, sizeof(buf)); - if (!hdr) + if (!hdr) { + close(glob->sock); return; + } process_fpm_msg(hdr); } From 22c7151c2322b165f87ad3b06e93a1c48ae72c6c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 30 Jan 2025 09:53:09 -0500 Subject: [PATCH 3/6] tests: Show that asic offload works in the fpm testing The fpm_testing_topo1 didn't turn on the fpm_listener sending the routes back to zebra to set the asic offload. Modify the test to tell the fpm_listener to set the offloaded flag and reflect the route back to the dplane_fpm_nl.c code. Also modify zebra to expect a response to the underlying fpm listener. Signed-off-by: Donald Sharp --- tests/topotests/fpm_testing_topo1/r1/routes_summ.json | 6 +++--- .../fpm_testing_topo1/r1/routes_summ_removed.json | 4 ++-- tests/topotests/fpm_testing_topo1/test_fpm_topo1.py | 7 ++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/topotests/fpm_testing_topo1/r1/routes_summ.json b/tests/topotests/fpm_testing_topo1/r1/routes_summ.json index e9157bc664..12fe32cab3 100644 --- a/tests/topotests/fpm_testing_topo1/r1/routes_summ.json +++ b/tests/topotests/fpm_testing_topo1/r1/routes_summ.json @@ -3,21 +3,21 @@ { "fib":1, "rib":1, - "fibOffLoaded":0, + "fibOffLoaded":1, "fibTrapped":0, "type":"connected" }, { "fib":1, "rib":1, - "fibOffLoaded":0, + "fibOffLoaded":1, "fibTrapped":0, "type":"local" }, { "fib":10000, "rib":10000, - "fibOffLoaded":0, + "fibOffLoaded":10000, "fibTrapped":0, "type":"sharp" } diff --git a/tests/topotests/fpm_testing_topo1/r1/routes_summ_removed.json b/tests/topotests/fpm_testing_topo1/r1/routes_summ_removed.json index 8585b2bb6b..15d3f71077 100644 --- a/tests/topotests/fpm_testing_topo1/r1/routes_summ_removed.json +++ b/tests/topotests/fpm_testing_topo1/r1/routes_summ_removed.json @@ -3,14 +3,14 @@ { "fib":1, "rib":1, - "fibOffLoaded":0, + "fibOffLoaded":1, "fibTrapped":0, "type":"connected" }, { "fib":1, "rib":1, - "fibOffLoaded":0, + "fibOffLoaded":1, "fibTrapped":0, "type":"local" } diff --git a/tests/topotests/fpm_testing_topo1/test_fpm_topo1.py b/tests/topotests/fpm_testing_topo1/test_fpm_topo1.py index 66cefcc2a0..b3c375549a 100644 --- a/tests/topotests/fpm_testing_topo1/test_fpm_topo1.py +++ b/tests/topotests/fpm_testing_topo1/test_fpm_topo1.py @@ -57,7 +57,7 @@ def setup_module(module): router.load_config( TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname)), - "-M dplane_fpm_nl", + "-M dplane_fpm_nl --asic-offload=notify_on_offload", ) router.load_config( TopoRouter.RD_SHARP, os.path.join(CWD, "{}/sharpd.conf".format(rname)) @@ -65,6 +65,7 @@ def setup_module(module): router.load_config( TopoRouter.RD_FPM_LISTENER, os.path.join(CWD, "{}/fpm_stub.conf".format(rname)), + "-r", ) tgen.start_router() @@ -111,7 +112,7 @@ def test_fpm_install_routes(): topotest.router_json_cmp, router, "show ip route summ json", expected ) - success, result = topotest.run_and_expect(test_func, None, 60, 1) + success, result = topotest.run_and_expect(test_func, None, 120, 1) assert success, "Unable to successfully install 10000 routes: {}".format(result) # Let's remove 10000 routes @@ -124,7 +125,7 @@ def test_fpm_install_routes(): topotest.router_json_cmp, router, "show ip route summ json", expected ) - success, result = topotest.run_and_expect(test_func, None, 60, 1) + success, result = topotest.run_and_expect(test_func, None, 120, 1) assert success, "Unable to remove 10000 routes: {}".format(result) From c58da10d2a700164e329352c5c22a924af3fa45c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 31 Jan 2025 12:12:17 -0500 Subject: [PATCH 4/6] zebra: Limit mutex for obuf to when we access obuf The mutex that wraps access to the output buffer is being held for the entire time the data is being generated to send down the pipe. Since the generation has absolutely nothing to do with the obuf, let's limit the mutex holding some. Signed-off-by: Donald Sharp --- zebra/dplane_fpm_nl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index a65bae95c4..9f26852d1f 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -951,8 +951,6 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) nl_buf_len = 0; - frr_mutex_lock_autounlock(&fnc->obuf_mutex); - /* * If route replace is enabled then directly encode the install which * is going to use `NLM_F_REPLACE` (instead of delete/add operations). @@ -1105,6 +1103,8 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) /* We must know if someday a message goes beyond 65KiB. */ assert((nl_buf_len + FPM_HEADER_SIZE) <= UINT16_MAX); + frr_mutex_lock_autounlock(&fnc->obuf_mutex); + /* Check if we have enough buffer space. */ if (STREAM_WRITEABLE(fnc->obuf) < (nl_buf_len + FPM_HEADER_SIZE)) { atomic_fetch_add_explicit(&fnc->counters.buffer_full, 1, From 07a803a7b30ad3386e491c4efe9eef7f70029d53 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 31 Jan 2025 12:14:36 -0500 Subject: [PATCH 5/6] zebra: Stop buffering output from fpm_listener Signed-off-by: Donald Sharp --- zebra/fpm_listener.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zebra/fpm_listener.c b/zebra/fpm_listener.c index 9181257db3..ed0842a3b1 100644 --- a/zebra/fpm_listener.c +++ b/zebra/fpm_listener.c @@ -771,6 +771,8 @@ int main(int argc, char **argv) int r; bool fork_daemon = false; + setbuf(stdout, NULL); + memset(glob, 0, sizeof(*glob)); while ((r = getopt(argc, argv, "rdv")) != -1) { From 64709ec2a9c4aa1143f64311a9dbc98468a2052c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 31 Jan 2025 12:38:20 -0500 Subject: [PATCH 6/6] zebra: Ensure dplane does not send work back to master at wrong time When looping through the dplane providers, the worklist was being populated with items from the last provider and then the event system was checked to see if we should stop processing. If the event system says `yes` then the dplane code would stop and send the worklist to the master zebra pthread for collection. This obviously skipped the next dplane provider on the list which is double plus not good. Signed-off-by: Donald Sharp --- zebra/zebra_dplane.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index b57c930154..9acdb4b2f8 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -7528,6 +7528,16 @@ static void dplane_thread_loop(struct event *event) if (!zdplane_info.dg_run) break; + /* + * The yield should only happen after a bit of work has been + * done but before we pull any new work off any provider + * queue to continue looping. This is a safe spot to + * do so. + */ + if (event_should_yield(event)) { + reschedule = true; + break; + } /* Locate next provider */ next_prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov); @@ -7592,11 +7602,6 @@ static void dplane_thread_loop(struct event *event) zlog_debug("dplane dequeues %d completed work from provider %s", counter, dplane_provider_get_name(prov)); - if (event_should_yield(event)) { - reschedule = true; - break; - } - /* Locate next provider */ prov = next_prov; }