From d8c16a951998b354ef3be124cc97eafef20c5ff0 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Thu, 30 Aug 2018 13:48:05 -0400 Subject: [PATCH 01/10] zebra: introduce dedicated dataplane pthread Signed-off-by: Mark Stapp --- zebra/main.c | 6 +++--- zebra/zebra_dplane.c | 29 +++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/zebra/main.c b/zebra/main.c index 5628b5e022..20a2185b1b 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -390,6 +390,9 @@ int main(int argc, char **argv) vty_config_lockless(); zebrad.master = frr_init(); + /* Initialize pthread library */ + frr_pthread_init(); + /* Zebra related initialize. */ zebra_router_init(); zserv_init(); @@ -445,9 +448,6 @@ int main(int argc, char **argv) /* Needed for BSD routing socket. */ pid = getpid(); - /* Intialize pthread library */ - frr_pthread_init(); - /* Start Zebra API server */ zserv_start(zserv_path); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 3e61418b64..98db3ca7e5 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -176,6 +176,9 @@ static struct zebra_dplane_globals { _Atomic uint32_t dg_routes_queued_max; _Atomic uint32_t dg_route_errors; + /* Dataplane pthread */ + struct frr_pthread *dg_pthread; + /* Event-delivery context 'master' for the dplane */ struct thread_master *dg_master; @@ -1004,13 +1007,23 @@ static void zebra_dplane_init_internal(struct zebra_t *zebra) zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED; /* TODO -- register default kernel 'provider' during init */ + zdplane_info.dg_run = true; + + /* Start dataplane pthread */ zdplane_info.dg_run = true; - /* TODO -- start dataplane pthread. We're using the zebra - * core/main thread temporarily - */ - zdplane_info.dg_master = zebra->master; + struct frr_pthread_attr pattr = { + .start = frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop + }; + + zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread", + "Zebra dplane"); + + zdplane_info.dg_master = zdplane_info.dg_pthread->master; + + frr_pthread_run(zdplane_info.dg_pthread, NULL); } /* Indicates zebra shutdown/exit is in progress. Some operations may be @@ -1122,8 +1135,12 @@ void zebra_dplane_shutdown(void) THREAD_OFF(zdplane_info.dg_t_update); - /* TODO */ - /* frr_pthread_stop(...) */ + frr_pthread_stop(zdplane_info.dg_pthread, NULL); + + /* Destroy pthread */ + frr_pthread_destroy(zdplane_info.dg_pthread); + zdplane_info.dg_pthread = NULL; + zdplane_info.dg_master = NULL; /* Notify provider(s) of final shutdown */ From 675440ad5a969daa6831b974a1e6a04079ac715a Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Wed, 14 Nov 2018 10:06:05 -0500 Subject: [PATCH 02/10] zebra: fix get_old_instance api One of the dplane context accessors was returning the wrong value; correct it. Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 98db3ca7e5..6cd52f7bb8 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -444,7 +444,7 @@ uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx) { DPLANE_CTX_VALID(ctx); - return ctx->zd_instance; + return ctx->zd_old_instance; } uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx) From c831033fff8775579998463e372d7653e053e6d8 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Fri, 31 Aug 2018 13:46:50 -0400 Subject: [PATCH 03/10] zebra: dataplane provider enhancements Limit the number of updates processed from the incoming queue; add more stats. Fill out apis for dataplane providers; convert route update processing to provider model; move dataplane status enum Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 732 ++++++++++++++++++++++++++++++++++++------- zebra/zebra_dplane.h | 105 +++++-- zebra/zebra_rib.c | 5 +- 3 files changed, 700 insertions(+), 142 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 6cd52f7bb8..c859bdaf6e 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -42,6 +42,9 @@ DEFINE_MTYPE(ZEBRA, DP_PROV, "Zebra DPlane Provider") const uint32_t DPLANE_DEFAULT_MAX_QUEUED = 200; +/* Default value for new work per cycle */ +const uint32_t DPLANE_DEFAULT_NEW_WORK = 100; + /* Validation check macro for context blocks */ /* #define DPLANE_DEBUG 1 */ @@ -69,6 +72,12 @@ struct zebra_dplane_ctx { /* Status on return */ enum zebra_dplane_result zd_status; + /* Dplane provider id */ + uint32_t zd_provider; + + /* Flags - used by providers, e.g. */ + int zd_flags; + /* TODO -- internal/sub-operation status? */ enum zebra_dplane_result zd_remote_status; enum zebra_dplane_result zd_kernel_status; @@ -118,6 +127,12 @@ struct zebra_dplane_ctx { TAILQ_ENTRY(zebra_dplane_ctx) zd_q_entries; }; +/* Flag that can be set by a pre-kernel provider as a signal that an update + * should bypass the kernel. + */ +#define DPLANE_CTX_FLAG_NO_KERNEL 0x01 + + /* * Registration block for one dataplane provider. */ @@ -131,16 +146,35 @@ struct zebra_dplane_provider { /* Id value */ uint32_t dp_id; + /* Mutex */ + pthread_mutex_t dp_mutex; + + /* Plugin-provided extra data */ + void *dp_data; + + /* Flags */ + int dp_flags; + dplane_provider_process_fp dp_fp; dplane_provider_fini_fp dp_fini; _Atomic uint32_t dp_in_counter; + _Atomic uint32_t dp_in_max; + _Atomic uint32_t dp_out_counter; + _Atomic uint32_t dp_out_max; _Atomic uint32_t dp_error_counter; - /* Embedded list linkage */ - TAILQ_ENTRY(zebra_dplane_provider) dp_q_providers; + /* Queue of contexts inbound to the provider */ + struct dplane_ctx_q dp_ctx_in_q; + /* Queue of completed contexts outbound from the provider back + * towards the dataplane module. + */ + struct dplane_ctx_q dp_ctx_out_q; + + /* Embedded list linkage for provider objects */ + TAILQ_ENTRY(zebra_dplane_provider) dp_prov_link; }; /* @@ -171,10 +205,16 @@ static struct zebra_dplane_globals { /* Limit number of pending, unprocessed updates */ _Atomic uint32_t dg_max_queued_updates; + /* Limit number of new updates dequeued at once, to pace an + * incoming burst. + */ + uint32_t dg_updates_per_cycle; + _Atomic uint32_t dg_routes_in; _Atomic uint32_t dg_routes_queued; _Atomic uint32_t dg_routes_queued_max; _Atomic uint32_t dg_route_errors; + _Atomic uint32_t dg_update_yields; /* Dataplane pthread */ struct frr_pthread *dg_pthread; @@ -191,14 +231,20 @@ static struct zebra_dplane_globals { } zdplane_info; /* - * Lock and unlock for interactions with the zebra 'core' + * Lock and unlock for interactions with the zebra 'core' pthread */ #define DPLANE_LOCK() pthread_mutex_lock(&zdplane_info.dg_mutex) - #define DPLANE_UNLOCK() pthread_mutex_unlock(&zdplane_info.dg_mutex) + +/* + * Lock and unlock for individual providers + */ +#define DPLANE_PROV_LOCK(p) pthread_mutex_lock(&((p)->dp_mutex)) +#define DPLANE_PROV_UNLOCK(p) pthread_mutex_unlock(&((p)->dp_mutex)) + /* Prototypes */ -static int dplane_route_process(struct thread *event); +static int dplane_thread_loop(struct thread *event); /* * Public APIs @@ -285,6 +331,38 @@ enum zebra_dplane_result dplane_ctx_get_status( return ctx->zd_status; } +void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx, + enum zebra_dplane_result status) +{ + DPLANE_CTX_VALID(ctx); + + ctx->zd_status = status; +} + +/* Retrieve last/current provider id */ +uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->zd_provider; +} + +/* Providers run before the kernel can control whether a kernel + * update should be done. + */ +void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + SET_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL); +} + +bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return CHECK_FLAG(ctx->zd_flags, DPLANE_CTX_FLAG_NO_KERNEL); +} + enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx) { DPLANE_CTX_VALID(ctx); @@ -517,6 +595,7 @@ const struct zebra_dplane_info *dplane_ctx_get_ns( * End of dplane context accessors */ + /* * Retrieve the limit on the number of pending, unprocessed updates. */ @@ -678,34 +757,11 @@ static int dplane_route_enqueue(struct zebra_dplane_ctx *ctx) } /* Ensure that an event for the dataplane thread is active */ - thread_add_event(zdplane_info.dg_master, dplane_route_process, NULL, 0, - &zdplane_info.dg_t_update); - - ret = AOK; + ret = dplane_provider_work_ready(); return ret; } -/* - * Attempt to dequeue a route-update block - */ -static struct zebra_dplane_ctx *dplane_route_dequeue(void) -{ - struct zebra_dplane_ctx *ctx = NULL; - - DPLANE_LOCK(); - { - ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); - if (ctx) { - TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q, - ctx, zd_q_entries); - } - } - DPLANE_UNLOCK(); - - return ctx; -} - /* * Utility that prepares a route update and enqueues it for processing */ @@ -828,68 +884,15 @@ done: return ret; } -/* - * Event handler function for routing updates - */ -static int dplane_route_process(struct thread *event) -{ - enum zebra_dplane_result res; - struct zebra_dplane_ctx *ctx; - - while (1) { - /* Check for shutdown */ - if (!zdplane_info.dg_run) - break; - - /* TODO -- limit number of updates per cycle? */ - ctx = dplane_route_dequeue(); - if (ctx == NULL) - break; - - /* Update counter */ - atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, 1, - memory_order_relaxed); - - if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { - char dest_str[PREFIX_STRLEN]; - - prefix2str(dplane_ctx_get_dest(ctx), - dest_str, sizeof(dest_str)); - - zlog_debug("%u:%s Dplane route update ctx %p op %s", - dplane_ctx_get_vrf(ctx), dest_str, - ctx, dplane_op2str(dplane_ctx_get_op(ctx))); - } - - /* TODO -- support series of providers */ - - /* Initially, just doing kernel-facing update here */ - res = kernel_route_update(ctx); - - if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) - atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, - 1, memory_order_relaxed); - - ctx->zd_status = res; - - /* Enqueue result to zebra main context */ - zdplane_info.dg_results_cb(ctx); - - ctx = NULL; - } - - return 0; -} - /* * Handler for 'show dplane' */ int dplane_show_helper(struct vty *vty, bool detailed) { - uint64_t queued, limit, queue_max, errs, incoming; + uint64_t queued, queue_max, limit, errs, incoming, yields; /* Using atomics because counters are being changed in different - * contexts. + * pthread contexts. */ incoming = atomic_load_explicit(&zdplane_info.dg_routes_in, memory_order_relaxed); @@ -901,12 +904,16 @@ int dplane_show_helper(struct vty *vty, bool detailed) memory_order_relaxed); errs = atomic_load_explicit(&zdplane_info.dg_route_errors, memory_order_relaxed); + yields = atomic_load_explicit(&zdplane_info.dg_update_yields, + memory_order_relaxed); - vty_out(vty, "Route updates: %"PRIu64"\n", incoming); + vty_out(vty, "Zebra dataplane:\nRoute updates: %"PRIu64"\n", + incoming); vty_out(vty, "Route update errors: %"PRIu64"\n", errs); vty_out(vty, "Route update queue limit: %"PRIu64"\n", limit); vty_out(vty, "Route update queue depth: %"PRIu64"\n", queued); vty_out(vty, "Route update queue max: %"PRIu64"\n", queue_max); + vty_out(vty, "Route update yields: %"PRIu64"\n", yields); return CMD_SUCCESS; } @@ -916,8 +923,35 @@ int dplane_show_helper(struct vty *vty, bool detailed) */ int dplane_show_provs_helper(struct vty *vty, bool detailed) { - vty_out(vty, "Zebra dataplane providers:%s\n", - (detailed ? " (detailed)" : "")); + struct zebra_dplane_provider *prov; + uint64_t in, in_max, out, out_max; + + vty_out(vty, "Zebra dataplane providers:\n"); + + DPLANE_LOCK(); + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + DPLANE_UNLOCK(); + + /* Show counters, useful info from each registered provider */ + while (prov) { + + in = atomic_load_explicit(&prov->dp_in_counter, + memory_order_relaxed); + in_max = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + out = atomic_load_explicit(&prov->dp_out_counter, + memory_order_relaxed); + out_max = atomic_load_explicit(&prov->dp_out_max, + memory_order_relaxed); + + vty_out(vty, "%s (%u): in: %"PRIu64", max: %"PRIu64", " + "out: %"PRIu64", max: %"PRIu64"\n", + prov->dp_name, prov->dp_id, in, in_max, out, out_max); + + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } return CMD_SUCCESS; } @@ -926,9 +960,11 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) * Provider registration */ int dplane_provider_register(const char *name, - enum dplane_provider_prio_e prio, + enum dplane_provider_prio prio, + int flags, dplane_provider_process_fp fp, - dplane_provider_fini_fp fini_fp) + dplane_provider_fini_fp fini_fp, + void *data) { int ret = 0; struct zebra_dplane_provider *p, *last; @@ -952,37 +988,160 @@ int dplane_provider_register(const char *name, goto done; } - strncpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN); - p->dp_name[DPLANE_PROVIDER_NAMELEN] = '\0'; /* Belt-and-suspenders */ + pthread_mutex_init(&(p->dp_mutex), NULL); + TAILQ_INIT(&(p->dp_ctx_in_q)); + TAILQ_INIT(&(p->dp_ctx_out_q)); p->dp_priority = prio; p->dp_fp = fp; p->dp_fini = fini_fp; + p->dp_data = data; - /* Lock the lock - the dplane pthread may be running */ + /* Lock - the dplane pthread may be running */ DPLANE_LOCK(); p->dp_id = ++zdplane_info.dg_provider_id; + if (name) + strlcpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN); + else + snprintf(p->dp_name, DPLANE_PROVIDER_NAMELEN, + "provider-%u", p->dp_id); + /* Insert into list ordered by priority */ - TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_q_providers) { + TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_prov_link) { if (last->dp_priority > p->dp_priority) break; } if (last) - TAILQ_INSERT_BEFORE(last, p, dp_q_providers); + TAILQ_INSERT_BEFORE(last, p, dp_prov_link); else TAILQ_INSERT_TAIL(&zdplane_info.dg_providers_q, p, - dp_q_providers); + dp_prov_link); /* And unlock */ DPLANE_UNLOCK(); + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("dplane: registered new provider '%s' (%u), prio %d", + p->dp_name, p->dp_id, p->dp_priority); + done: return ret; } +/* Accessors for provider attributes */ +const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov) +{ + return prov->dp_name; +} + +uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov) +{ + return prov->dp_id; +} + +void *dplane_provider_get_data(const struct zebra_dplane_provider *prov) +{ + return prov->dp_data; +} + +int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov) +{ + return zdplane_info.dg_updates_per_cycle; +} + +/* + * Dequeue and maintain associated counter + */ +struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( + struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx = NULL; + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries); + } + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + return ctx; +} + +/* + * Dequeue work to a list, return count + */ +int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, + struct dplane_ctx_q *listp) +{ + int limit, ret; + struct zebra_dplane_ctx *ctx; + + limit = zdplane_info.dg_updates_per_cycle; + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + for (ret = 0; ret < limit; ret++) { + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries); + + TAILQ_INSERT_TAIL(listp, ctx, zd_q_entries); + } else { + break; + } + } + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + return ret; +} + +/* + * Enqueue and maintain associated counter + */ +void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx) +{ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + TAILQ_INSERT_TAIL(&(prov->dp_ctx_out_q), ctx, + zd_q_entries); + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + atomic_fetch_add_explicit(&(prov->dp_out_counter), 1, + memory_order_relaxed); +} + +bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov) +{ + return (prov->dp_flags & DPLANE_PROV_FLAG_THREADED); +} + +/* + * Provider api to signal that work/events are available + * for the dataplane pthread. + */ +int dplane_provider_work_ready(void) +{ + thread_add_event(zdplane_info.dg_master, + dplane_thread_loop, NULL, 0, + &zdplane_info.dg_t_update); + + return AOK; +} + /* * Zebra registers a results callback with the dataplane system */ @@ -993,37 +1152,154 @@ int dplane_results_register(dplane_results_fp fp) } /* - * Initialize the dataplane module during startup, internal/private version + * Kernel dataplane provider */ -static void zebra_dplane_init_internal(struct zebra_t *zebra) + +/* + * Kernel provider callback + */ +static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) { - memset(&zdplane_info, 0, sizeof(zdplane_info)); + enum zebra_dplane_result res; + struct zebra_dplane_ctx *ctx; + int counter, limit; - pthread_mutex_init(&zdplane_info.dg_mutex, NULL); + limit = dplane_provider_get_work_limit(prov); - TAILQ_INIT(&zdplane_info.dg_route_ctx_q); - TAILQ_INIT(&zdplane_info.dg_providers_q); + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processing", + dplane_provider_get_name(prov)); - zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED; + for (counter = 0; counter < limit; counter++) { - /* TODO -- register default kernel 'provider' during init */ - zdplane_info.dg_run = true; + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; - /* Start dataplane pthread */ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char dest_str[PREFIX_STRLEN]; - zdplane_info.dg_run = true; + prefix2str(dplane_ctx_get_dest(ctx), + dest_str, sizeof(dest_str)); - struct frr_pthread_attr pattr = { - .start = frr_pthread_attr_default.start, - .stop = frr_pthread_attr_default.stop - }; + zlog_debug("%u:%s Dplane route update ctx %p op %s", + dplane_ctx_get_vrf(ctx), dest_str, + ctx, dplane_op2str(dplane_ctx_get_op(ctx))); + } - zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread", - "Zebra dplane"); + /* Call into the synchronous kernel-facing code here */ + res = kernel_route_update(ctx); - zdplane_info.dg_master = zdplane_info.dg_pthread->master; + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit( + &zdplane_info.dg_route_errors, 1, + memory_order_relaxed); - frr_pthread_run(zdplane_info.dg_pthread, NULL); + dplane_ctx_set_status(ctx, res); + + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Ensure that we'll run the work loop again if there's still + * more work to do. + */ + if (counter >= limit) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s' reached max updates %d", + dplane_provider_get_name(prov), counter); + + atomic_fetch_add_explicit(&zdplane_info.dg_update_yields, + 1, memory_order_relaxed); + + dplane_provider_work_ready(); + } + + return 0; +} + +/* + * Test dataplane provider plugin + */ + +/* + * Test provider process callback + */ +static int test_dplane_process_func(struct zebra_dplane_provider *prov) +{ + struct zebra_dplane_ctx *ctx; + int counter, limit; + + /* Just moving from 'in' queue to 'out' queue */ + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processing", + dplane_provider_get_name(prov)); + + limit = dplane_provider_get_work_limit(prov); + + for (counter = 0; counter < limit; counter++) { + + ctx = dplane_provider_dequeue_in_ctx(prov); + if (ctx == NULL) + break; + + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS); + + dplane_provider_enqueue_out_ctx(prov, ctx); + } + + /* Ensure that we'll run the work loop again if there's still + * more work to do. + */ + if (counter >= limit) + dplane_provider_work_ready(); + + return 0; +} + +/* + * Test provider shutdown/fini callback + */ +static int test_dplane_shutdown_func(struct zebra_dplane_provider *prov, + bool early) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("dplane provider '%s': %sshutdown", + dplane_provider_get_name(prov), + early ? "early " : ""); + + return 0; +} + +/* + * Register default kernel provider + */ +static void dplane_provider_init(void) +{ + int ret; + + ret = dplane_provider_register("Kernel", + DPLANE_PRIO_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, + kernel_dplane_process_func, + NULL, + NULL); + + if (ret != AOK) + zlog_err("Unable to register kernel dplane provider: %d", + ret); + + /* TODO -- make the test provider optional... */ + ret = dplane_provider_register("Test", + DPLANE_PRIO_PRE_KERNEL, + DPLANE_PROV_FLAGS_DEFAULT, + test_dplane_process_func, + test_dplane_shutdown_func, + NULL /* data */); + + if (ret != AOK) + zlog_err("Unable to register test dplane provider: %d", + ret); } /* Indicates zebra shutdown/exit is in progress. Some operations may be @@ -1059,7 +1335,9 @@ static bool dplane_work_pending(void) { struct zebra_dplane_ctx *ctx; - /* TODO -- just checking incoming/pending work for now */ + /* TODO -- just checking incoming/pending work for now, must check + * providers + */ DPLANE_LOCK(); { ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); @@ -1120,6 +1398,184 @@ void zebra_dplane_finish(void) &zdplane_info.dg_t_shutdown_check); } +/* + * Main dataplane pthread event loop. The thread takes new incoming work + * and offers it to the first provider. It then iterates through the + * providers, taking complete work from each one and offering it + * to the next in order. At each step, a limited number of updates are + * processed during a cycle in order to provide some fairness. + */ +static int dplane_thread_loop(struct thread *event) +{ + struct dplane_ctx_q work_list; + struct dplane_ctx_q error_list; + struct zebra_dplane_provider *prov; + struct zebra_dplane_ctx *ctx, *tctx; + int limit, counter, error_counter; + uint64_t lval; + + /* Capture work limit per cycle */ + limit = zdplane_info.dg_updates_per_cycle; + + TAILQ_INIT(&work_list); + + /* Check for zebra shutdown */ + if (!zdplane_info.dg_run) + goto done; + + /* Dequeue some incoming work from zebra (if any) onto the temporary + * working list. + */ + DPLANE_LOCK(); + + /* Locate initial registered provider */ + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + + for (counter = 0; counter < limit; counter++) { + ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); + if (ctx) { + TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q, ctx, + zd_q_entries); + + atomic_fetch_sub_explicit( + &zdplane_info.dg_routes_queued, 1, + memory_order_relaxed); + + ctx->zd_provider = prov->dp_id; + + TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries); + } else { + break; + } + } + + DPLANE_UNLOCK(); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane: incoming new work counter: %d", counter); + + /* Iterate through the registered providers, offering new incoming + * work. If the provider has outgoing work in its queue, take that + * work for the next provider + */ + while (prov) { + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane enqueues %d new work to provider '%s'", + counter, dplane_provider_get_name(prov)); + + /* Capture current provider id in each context; check for + * error status. + */ + TAILQ_FOREACH_SAFE(ctx, &work_list, zd_q_entries, tctx) { + if (dplane_ctx_get_status(ctx) == + ZEBRA_DPLANE_REQUEST_SUCCESS) { + ctx->zd_provider = prov->dp_id; + } else { + /* + * TODO -- improve error-handling: recirc + * errors backwards so that providers can + * 'undo' their work (if they want to) + */ + + /* Move to error list; will be returned + * zebra main. + */ + TAILQ_REMOVE(&work_list, ctx, zd_q_entries); + TAILQ_INSERT_TAIL(&error_list, + ctx, zd_q_entries); + error_counter++; + } + } + + /* Enqueue new work to the provider */ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + if (TAILQ_FIRST(&work_list)) + TAILQ_CONCAT(&(prov->dp_ctx_in_q), &work_list, + zd_q_entries); + + lval = atomic_add_fetch_explicit(&prov->dp_in_counter, counter, + memory_order_relaxed); + if (lval > prov->dp_in_max) + atomic_store_explicit(&prov->dp_in_max, lval, + memory_order_relaxed); + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + /* Reset the temp list */ + TAILQ_INIT(&work_list); + counter = 0; + + /* Call into the provider code */ + (*prov->dp_fp)(prov); + + /* Check for zebra shutdown */ + if (!zdplane_info.dg_run) + break; + + /* Dequeue completed work from the provider */ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + while (counter < limit) { + ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); + if (ctx) { + TAILQ_REMOVE(&(prov->dp_ctx_out_q), ctx, + zd_q_entries); + + TAILQ_INSERT_TAIL(&work_list, + ctx, zd_q_entries); + counter++; + } else + break; + } + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane dequeues %d completed work from provider %s", + counter, dplane_provider_get_name(prov)); + + /* Locate next provider */ + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + + if (prov) { + TAILQ_FOREACH(ctx, &work_list, zd_q_entries) + ctx->zd_provider = prov->dp_id; + } + } + + /* After all providers have been serviced, enqueue any completed + * work back to zebra so it can process the results. + */ + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane has %d completed work for zebra main", + counter); + + /* + * TODO -- I'd rather hand lists through the api to zebra main, + * to reduce the number of lock/unlock cycles + */ + for (ctx = TAILQ_FIRST(&work_list); ctx; ) { + TAILQ_REMOVE(&work_list, ctx, zd_q_entries); + + /* Call through to zebra main */ + (*zdplane_info.dg_results_cb)(ctx); + + ctx = TAILQ_FIRST(&work_list); + } + +done: + return 0; +} + /* * Final phase of shutdown, after all work enqueued to dplane has been * processed. This is called from the zebra main pthread context. @@ -1142,11 +1598,51 @@ void zebra_dplane_shutdown(void) zdplane_info.dg_pthread = NULL; zdplane_info.dg_master = NULL; - /* Notify provider(s) of final shutdown */ + /* TODO -- Notify provider(s) of final shutdown */ - /* Clean-up provider objects */ + /* TODO -- Clean-up provider objects */ - /* Clean queue(s) */ + /* TODO -- Clean queue(s), free memory */ +} + +/* + * Initialize the dataplane module during startup, internal/private version + */ +static void zebra_dplane_init_internal(struct zebra_t *zebra) +{ + memset(&zdplane_info, 0, sizeof(zdplane_info)); + + pthread_mutex_init(&zdplane_info.dg_mutex, NULL); + + TAILQ_INIT(&zdplane_info.dg_route_ctx_q); + TAILQ_INIT(&zdplane_info.dg_providers_q); + + zdplane_info.dg_updates_per_cycle = DPLANE_DEFAULT_NEW_WORK; + + zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED; + + /* Register default kernel 'provider' during init */ + dplane_provider_init(); + + /* Start dataplane pthread */ + + zdplane_info.dg_run = true; + + struct frr_pthread_attr pattr = { + .start = frr_pthread_attr_default.start, + .stop = frr_pthread_attr_default.stop + }; + + zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread", + "Zebra dplane"); + + zdplane_info.dg_master = zdplane_info.dg_pthread->master; + + /* Enqueue an initial event for the dataplane pthread */ + thread_add_event(zdplane_info.dg_master, dplane_thread_loop, NULL, 0, + &zdplane_info.dg_t_update); + + frr_pthread_run(zdplane_info.dg_pthread, NULL); } /* diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 999e0f39e4..844fd59600 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -29,7 +29,6 @@ #include "zebra/rib.h" #include "zebra/zserv.h" - /* Key netlink info from zebra ns */ struct zebra_dplane_info { ns_id_t ns_id; @@ -127,6 +126,12 @@ void dplane_ctx_fini(struct zebra_dplane_ctx **pctx); void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, const struct zebra_dplane_ctx *ctx); +/* Append a list of context blocks to another list - again, just keeping + * the context struct opaque. + */ +void dplane_ctx_list_append(struct dplane_ctx_q *to_list, + struct dplane_ctx_q *from_list); + /* Dequeue a context block from the head of caller's tailq */ void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp); @@ -135,6 +140,8 @@ void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp); */ enum zebra_dplane_result dplane_ctx_get_status( const struct zebra_dplane_ctx *ctx); +void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx, + enum zebra_dplane_result status); const char *dplane_res2str(enum zebra_dplane_result res); enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx); @@ -142,6 +149,15 @@ const char *dplane_op2str(enum dplane_op_e op); const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx); +/* Retrieve last/current provider id */ +uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx); + +/* Providers running before the kernel can control whether a kernel + * update should be done. + */ +void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx); +bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx); + /* Source prefix is a little special - use convention to return NULL * to mean "no src prefix" */ @@ -212,9 +228,11 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed); /* - * Dataplane providers: modules that consume dataplane events. + * Dataplane providers: modules that process or consume dataplane events. */ +struct zebra_dplane_provider; + /* Support string name for a dataplane provider */ #define DPLANE_PROVIDER_NAMELEN 64 @@ -223,7 +241,7 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed); * followed by the kernel, followed by some post-processing step (such as * the fpm output stream.) */ -enum dplane_provider_prio_e { +enum dplane_provider_prio { DPLANE_PRIO_NONE = 0, DPLANE_PRIO_PREPROCESS, DPLANE_PRIO_PRE_KERNEL, @@ -232,28 +250,72 @@ enum dplane_provider_prio_e { DPLANE_PRIO_LAST }; -/* Provider's entry-point to process a context block */ -typedef int (*dplane_provider_process_fp)(struct zebra_dplane_ctx *ctx); - -/* Provider's entry-point for shutdown and cleanup */ -typedef int (*dplane_provider_fini_fp)(void); - -/* Provider registration */ -int dplane_provider_register(const char *name, - enum dplane_provider_prio_e prio, - dplane_provider_process_fp fp, - dplane_provider_fini_fp fini_fp); - -/* - * Results are returned to zebra core via a callback +/* Provider's entry-point for incoming work, called in the context of the + * dataplane pthread. The dataplane pthread enqueues any new work to the + * provider's 'inbound' queue, then calls the callback. The dataplane + * then checks the provider's outbound queue. */ -typedef int (*dplane_results_fp)(const struct zebra_dplane_ctx *ctx); +typedef int (*dplane_provider_process_fp)(struct zebra_dplane_provider *prov); + +/* Provider's entry-point for shutdown and cleanup. Called with 'early' + * during shutdown, to indicate that the dataplane subsystem is allowing + * work to move through the providers and finish. When called without 'early', + * the provider should release all resources (if it has any allocated). + */ +typedef int (*dplane_provider_fini_fp)(struct zebra_dplane_provider *prov, + bool early); + +/* Flags values used during provider registration. */ +#define DPLANE_PROV_FLAGS_DEFAULT 0x0 + +/* Provider will be spawning its own worker thread */ +#define DPLANE_PROV_FLAG_THREADED 0x1 + + +/* Provider registration: ordering or priority value, callbacks, and optional + * opaque data value. + */ +int dplane_provider_register(const char *name, + enum dplane_provider_prio prio, + int flags, + dplane_provider_process_fp fp, + dplane_provider_fini_fp fini_fp, + void *data); + +/* Accessors for provider attributes */ +const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov); +uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov); +void *dplane_provider_get_data(const struct zebra_dplane_provider *prov); +bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov); + +/* Providers should limit number of updates per work cycle */ +int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov); + +/* Provider api to signal that work/events are available + * for the dataplane pthread. + */ +int dplane_provider_work_ready(void); + +/* Dequeue, maintain associated counter and locking */ +struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( + struct zebra_dplane_provider *prov); + +/* Dequeue work to a list, maintain counter and locking, return count */ +int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, + struct dplane_ctx_q *listp); + +/* Enqueue, maintain associated counter and locking */ +void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, + struct zebra_dplane_ctx *ctx); /* * Zebra registers a results callback with the dataplane. The callback is - * called in the dataplane thread context, so the expectation is that the - * context is queued (or that processing is very limited). + * called in the dataplane pthread context, so the expectation is that the + * context is queued for the zebra main pthread or that processing + * is very limited. */ +typedef int (*dplane_results_fp)(struct zebra_dplane_ctx *ctx); + int dplane_results_register(dplane_results_fp fp); /* @@ -264,7 +326,8 @@ void zebra_dplane_init(void); /* Finalize/cleanup apis, one called early as shutdown is starting, * one called late at the end of zebra shutdown, and then one called - * from the zebra main thread to stop the dplane thread free all resources. + * from the zebra main pthread to stop the dplane pthread and + * free all resources. * * Zebra expects to try to clean up all vrfs and all routes during * shutdown, so the dplane must be available until very late. diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 8285392527..f161d5875b 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -1932,11 +1932,10 @@ static void rib_process_after(struct zebra_dplane_ctx *ctx) op = dplane_ctx_get_op(ctx); status = dplane_ctx_get_status(ctx); - if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("%u:%s Processing dplane ctx %p, op %s result %s", dplane_ctx_get_vrf(ctx), dest_str, ctx, dplane_op2str(op), dplane_res2str(status)); - } if (op == DPLANE_OP_ROUTE_DELETE) { /* @@ -3289,7 +3288,7 @@ static int rib_process_dplane_results(struct thread *thread) * the dataplane pthread. We enqueue the results here for processing by * the main thread later. */ -static int rib_dplane_results(const struct zebra_dplane_ctx *ctx) +static int rib_dplane_results(struct zebra_dplane_ctx *ctx) { /* Take lock controlling queue of results */ pthread_mutex_lock(&dplane_mutex); From e5a60d8259d4b969826e622133dff5a6d8330903 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Fri, 7 Sep 2018 15:46:13 -0400 Subject: [PATCH 04/10] zebra: reorg dataplane pthread start Move dataplane pthread start later in the zebra startup; make the 'test' dplane provider conditional Signed-off-by: Mark Stapp --- zebra/main.c | 3 +++ zebra/zebra_dplane.c | 35 ++++++++++++++++++++++++++++------- zebra/zebra_dplane.h | 6 ++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/zebra/main.c b/zebra/main.c index 20a2185b1b..e5160c1a51 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -448,6 +448,9 @@ int main(int argc, char **argv) /* Needed for BSD routing socket. */ pid = getpid(); + /* Start dataplane system */ + zebra_dplane_start(); + /* Start Zebra API server */ zserv_start(zserv_path); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index c859bdaf6e..34de6a956e 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -38,10 +38,12 @@ DEFINE_MTYPE(ZEBRA, DP_PROV, "Zebra DPlane Provider") # define AOK 0 #endif +/* Enable test dataplane provider */ +/*#define DPLANE_TEST_PROVIDER 1 */ + /* Default value for max queued incoming updates */ const uint32_t DPLANE_DEFAULT_MAX_QUEUED = 200; - /* Default value for new work per cycle */ const uint32_t DPLANE_DEFAULT_NEW_WORK = 100; @@ -1135,9 +1137,16 @@ bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov) */ int dplane_provider_work_ready(void) { - thread_add_event(zdplane_info.dg_master, - dplane_thread_loop, NULL, 0, - &zdplane_info.dg_t_update); + /* Note that during zebra startup, we may be offered work before + * the dataplane pthread (and thread-master) are ready. We want to + * enqueue the work, but the event-scheduling machinery may not be + * available. + */ + if (zdplane_info.dg_run) { + thread_add_event(zdplane_info.dg_master, + dplane_thread_loop, NULL, 0, + &zdplane_info.dg_t_update); + } return AOK; } @@ -1217,6 +1226,8 @@ static int kernel_dplane_process_func(struct zebra_dplane_provider *prov) return 0; } +#if DPLANE_TEST_PROVIDER + /* * Test dataplane provider plugin */ @@ -1270,6 +1281,7 @@ static int test_dplane_shutdown_func(struct zebra_dplane_provider *prov, return 0; } +#endif /* DPLANE_TEST_PROVIDER */ /* * Register default kernel provider @@ -1289,7 +1301,8 @@ static void dplane_provider_init(void) zlog_err("Unable to register kernel dplane provider: %d", ret); - /* TODO -- make the test provider optional... */ +#if DPLANE_TEST_PROVIDER + /* Optional test provider ... */ ret = dplane_provider_register("Test", DPLANE_PRIO_PRE_KERNEL, DPLANE_PROV_FLAGS_DEFAULT, @@ -1300,6 +1313,7 @@ static void dplane_provider_init(void) if (ret != AOK) zlog_err("Unable to register test dplane provider: %d", ret); +#endif /* DPLANE_TEST_PROVIDER */ } /* Indicates zebra shutdown/exit is in progress. Some operations may be @@ -1623,11 +1637,16 @@ static void zebra_dplane_init_internal(struct zebra_t *zebra) /* Register default kernel 'provider' during init */ dplane_provider_init(); +} +/* + * Start the dataplane pthread. This step needs to be run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_dplane_start(void) +{ /* Start dataplane pthread */ - zdplane_info.dg_run = true; - struct frr_pthread_attr pattr = { .start = frr_pthread_attr_default.start, .stop = frr_pthread_attr_default.stop @@ -1638,6 +1657,8 @@ static void zebra_dplane_init_internal(struct zebra_t *zebra) zdplane_info.dg_master = zdplane_info.dg_pthread->master; + zdplane_info.dg_run = true; + /* Enqueue an initial event for the dataplane pthread */ thread_add_event(zdplane_info.dg_master, dplane_thread_loop, NULL, 0, &zdplane_info.dg_t_update); diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 844fd59600..6f471e65a1 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -324,6 +324,12 @@ int dplane_results_register(dplane_results_fp fp); */ void zebra_dplane_init(void); +/* + * Start the dataplane pthread. This step needs to be run later than the + * 'init' step, in case zebra has fork-ed. + */ +void zebra_dplane_start(void); + /* Finalize/cleanup apis, one called early as shutdown is starting, * one called late at the end of zebra shutdown, and then one called * from the zebra main pthread to stop the dplane pthread and From 14b0bc8e88555caa394e0414154312f1e6036757 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Mon, 10 Sep 2018 15:17:19 -0400 Subject: [PATCH 05/10] zebra: add initial error handling to dplane loop Capture error work during dataplane provider processing. Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 67 +++++++++++++++++++++++++++++++++----------- zebra/zebra_dplane.h | 2 +- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 34de6a956e..a387241385 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -300,7 +300,7 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) */ void dplane_ctx_fini(struct zebra_dplane_ctx **pctx) { - /* TODO -- enqueue for next provider; for now, just free */ + /* TODO -- maintain pool; for now, just free */ dplane_ctx_free(pctx); } @@ -311,6 +311,18 @@ void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, TAILQ_INSERT_TAIL(q, (struct zebra_dplane_ctx *)ctx, zd_q_entries); } +/* Append a list of context blocks to another list */ +void dplane_ctx_list_append(struct dplane_ctx_q *to_list, + struct dplane_ctx_q *from_list) +{ + if (TAILQ_FIRST(from_list)) { + TAILQ_CONCAT(to_list, from_list, zd_q_entries); + + /* And clear 'from' list */ + TAILQ_INIT(from_list); + } +} + /* Dequeue a context block from the head of a list */ void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp) { @@ -649,6 +661,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, goto done; ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; ctx->zd_type = re->type; ctx->zd_old_type = re->type; @@ -702,7 +715,7 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, /* TODO -- maybe use array of nexthops to avoid allocs? */ - /* Ensure that the dplane's nexthop flag is clear. */ + /* Ensure that the dplane's nexthops flags are clear. */ for (ALL_NEXTHOPS(ctx->zd_ng, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); @@ -1418,6 +1431,10 @@ void zebra_dplane_finish(void) * providers, taking complete work from each one and offering it * to the next in order. At each step, a limited number of updates are * processed during a cycle in order to provide some fairness. + * + * This loop through the providers is only run once, so that the dataplane + * pthread can look for other pending work - such as i/o work on behalf of + * providers. */ static int dplane_thread_loop(struct thread *event) { @@ -1431,7 +1448,10 @@ static int dplane_thread_loop(struct thread *event) /* Capture work limit per cycle */ limit = zdplane_info.dg_updates_per_cycle; + /* Init temporary lists used to move contexts among providers */ TAILQ_INIT(&work_list); + TAILQ_INIT(&error_list); + error_counter = 0; /* Check for zebra shutdown */ if (!zdplane_info.dg_run) @@ -1445,16 +1465,13 @@ static int dplane_thread_loop(struct thread *event) /* Locate initial registered provider */ prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); + /* Move new work from incoming list to temp list */ for (counter = 0; counter < limit; counter++) { ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); if (ctx) { TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q, ctx, zd_q_entries); - atomic_fetch_sub_explicit( - &zdplane_info.dg_routes_queued, 1, - memory_order_relaxed); - ctx->zd_provider = prov->dp_id; TAILQ_INSERT_TAIL(&work_list, ctx, zd_q_entries); @@ -1465,6 +1482,9 @@ static int dplane_thread_loop(struct thread *event) DPLANE_UNLOCK(); + atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter, + memory_order_relaxed); + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("dplane: incoming new work counter: %d", counter); @@ -1474,6 +1494,10 @@ static int dplane_thread_loop(struct thread *event) */ while (prov) { + /* At each iteration, the temporary work list has 'counter' + * items. + */ + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("dplane enqueues %d new work to provider '%s'", counter, dplane_provider_get_name(prov)); @@ -1519,11 +1543,16 @@ static int dplane_thread_loop(struct thread *event) if (dplane_provider_is_threaded(prov)) DPLANE_PROV_UNLOCK(prov); - /* Reset the temp list */ + /* Reset the temp list (though the 'concat' may have done this + * already), and the counter + */ TAILQ_INIT(&work_list); counter = 0; - /* Call into the provider code */ + /* Call into the provider code. Note that this is + * unconditional: we offer to do work even if we don't enqueue + * any _new_ work. + */ (*prov->dp_fp)(prov); /* Check for zebra shutdown */ @@ -1558,25 +1587,29 @@ static int dplane_thread_loop(struct thread *event) DPLANE_LOCK(); prov = TAILQ_NEXT(prov, dp_prov_link); DPLANE_UNLOCK(); - - if (prov) { - TAILQ_FOREACH(ctx, &work_list, zd_q_entries) - ctx->zd_provider = prov->dp_id; - } } /* After all providers have been serviced, enqueue any completed - * work back to zebra so it can process the results. + * work and any errors back to zebra so it can process the results. */ - if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) - zlog_debug("dplane has %d completed work for zebra main", - counter); + zlog_debug("dplane has %d completed, %d errors, for zebra main", + counter, error_counter); /* * TODO -- I'd rather hand lists through the api to zebra main, * to reduce the number of lock/unlock cycles */ + for (ctx = TAILQ_FIRST(&error_list); ctx; ) { + TAILQ_REMOVE(&error_list, ctx, zd_q_entries); + + /* Call through to zebra main */ + (*zdplane_info.dg_results_cb)(ctx); + + ctx = TAILQ_FIRST(&error_list); + } + + for (ctx = TAILQ_FIRST(&work_list); ctx; ) { TAILQ_REMOVE(&work_list, ctx, zd_q_entries); diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 6f471e65a1..60dfb07fed 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -120,7 +120,7 @@ TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx); */ void dplane_ctx_fini(struct zebra_dplane_ctx **pctx); -/* Enqueue a context block to caller's tailq. This just exists so that the +/* Enqueue a context block to caller's tailq. This exists so that the * context struct can remain opaque. */ void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, From 68b375e05956cdd1ce935fd54939312b75e3a546 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Mon, 10 Sep 2018 15:36:30 -0400 Subject: [PATCH 06/10] zebra: revise dplane dequeue api Change the dataplane context dequeue api used by zebra to make the purpose a bit clearer. Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 4 ++-- zebra/zebra_dplane.h | 2 +- zebra/zebra_rib.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index a387241385..24975697ea 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -324,14 +324,14 @@ void dplane_ctx_list_append(struct dplane_ctx_q *to_list, } /* Dequeue a context block from the head of a list */ -void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp) +struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q) { struct zebra_dplane_ctx *ctx = TAILQ_FIRST(q); if (ctx) TAILQ_REMOVE(q, ctx, zd_q_entries); - *ctxp = ctx; + return ctx; } /* diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 60dfb07fed..2e64540d6b 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -133,7 +133,7 @@ void dplane_ctx_list_append(struct dplane_ctx_q *to_list, struct dplane_ctx_q *from_list); /* Dequeue a context block from the head of caller's tailq */ -void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp); +struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q); /* * Accessors for information from the context object diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index f161d5875b..f2d07310ee 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -3266,7 +3266,7 @@ static int rib_process_dplane_results(struct thread *thread) pthread_mutex_lock(&dplane_mutex); { /* Dequeue context block */ - dplane_ctx_dequeue(&rib_dplane_q, &ctx); + ctx = dplane_ctx_dequeue(&rib_dplane_q); } pthread_mutex_unlock(&dplane_mutex); From c9d17fe85ea55f011381dc98ba927bd107fd9d19 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Tue, 11 Sep 2018 16:42:36 -0400 Subject: [PATCH 07/10] zebra: improve dataplane shutdown checks Update the dataplane shutdown checks to include the providers. Also revise the typedef for provider structs to make const work. Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 71 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 24975697ea..5fd27dc830 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -162,8 +162,10 @@ struct zebra_dplane_provider { dplane_provider_fini_fp dp_fini; _Atomic uint32_t dp_in_counter; + _Atomic uint32_t dp_in_queued; _Atomic uint32_t dp_in_max; _Atomic uint32_t dp_out_counter; + _Atomic uint32_t dp_out_queued; _Atomic uint32_t dp_out_max; _Atomic uint32_t dp_error_counter; @@ -959,8 +961,8 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed) out_max = atomic_load_explicit(&prov->dp_out_max, memory_order_relaxed); - vty_out(vty, "%s (%u): in: %"PRIu64", max: %"PRIu64", " - "out: %"PRIu64", max: %"PRIu64"\n", + vty_out(vty, "%s (%u): in: %"PRIu64", q_max: %"PRIu64", " + "out: %"PRIu64", q_max: %"PRIu64"\n", prov->dp_name, prov->dp_id, in, in_max, out, out_max); DPLANE_LOCK(); @@ -1081,6 +1083,9 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); if (ctx) { TAILQ_REMOVE(&(prov->dp_ctx_in_q), ctx, zd_q_entries); + + atomic_fetch_sub_explicit(&prov->dp_in_queued, 1, + memory_order_relaxed); } if (dplane_provider_is_threaded(prov)) @@ -1114,6 +1119,10 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, } } + if (ret > 0) + atomic_fetch_sub_explicit(&prov->dp_in_queued, ret, + memory_order_relaxed); + if (dplane_provider_is_threaded(prov)) DPLANE_PROV_UNLOCK(prov); @@ -1272,6 +1281,10 @@ static int test_dplane_process_func(struct zebra_dplane_provider *prov) dplane_provider_enqueue_out_ctx(prov, ctx); } + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) + zlog_debug("dplane provider '%s': processed %d", + dplane_provider_get_name(prov), counter); + /* Ensure that we'll run the work loop again if there's still * more work to do. */ @@ -1342,7 +1355,7 @@ bool dplane_is_in_shutdown(void) * early during zebra shutdown, as a signal to stop new work and prepare * for updates generated by shutdown/cleanup activity, as zebra tries to * remove everything it's responsible for. - * NB: This runs in the main zebra thread context. + * NB: This runs in the main zebra pthread context. */ void zebra_dplane_pre_finish(void) { @@ -1351,7 +1364,7 @@ void zebra_dplane_pre_finish(void) zdplane_info.dg_is_shutdown = true; - /* Notify provider(s) of pending shutdown */ + /* TODO -- Notify provider(s) of pending shutdown */ } /* @@ -1360,7 +1373,9 @@ void zebra_dplane_pre_finish(void) */ static bool dplane_work_pending(void) { + bool ret = false; struct zebra_dplane_ctx *ctx; + struct zebra_dplane_provider *prov; /* TODO -- just checking incoming/pending work for now, must check * providers @@ -1368,10 +1383,40 @@ static bool dplane_work_pending(void) DPLANE_LOCK(); { ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); + prov = TAILQ_FIRST(&zdplane_info.dg_providers_q); } DPLANE_UNLOCK(); - return (ctx != NULL); + if (ctx != NULL) { + ret = true; + goto done; + } + + while (prov) { + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); + + ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); + if (ctx == NULL) + ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); + + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); + + if (ctx != NULL) + break; + + DPLANE_LOCK(); + prov = TAILQ_NEXT(prov, dp_prov_link); + DPLANE_UNLOCK(); + } + + if (ctx != NULL) + ret = true; + +done: + return ret; } /* @@ -1443,7 +1488,7 @@ static int dplane_thread_loop(struct thread *event) struct zebra_dplane_provider *prov; struct zebra_dplane_ctx *ctx, *tctx; int limit, counter, error_counter; - uint64_t lval; + uint64_t curr, high; /* Capture work limit per cycle */ limit = zdplane_info.dg_updates_per_cycle; @@ -1534,10 +1579,16 @@ static int dplane_thread_loop(struct thread *event) TAILQ_CONCAT(&(prov->dp_ctx_in_q), &work_list, zd_q_entries); - lval = atomic_add_fetch_explicit(&prov->dp_in_counter, counter, - memory_order_relaxed); - if (lval > prov->dp_in_max) - atomic_store_explicit(&prov->dp_in_max, lval, + atomic_fetch_add_explicit(&prov->dp_in_counter, counter, + memory_order_relaxed); + atomic_fetch_add_explicit(&prov->dp_in_queued, counter, + memory_order_relaxed); + curr = atomic_load_explicit(&prov->dp_in_queued, + memory_order_relaxed); + high = atomic_load_explicit(&prov->dp_in_max, + memory_order_relaxed); + if (curr > high) + atomic_store_explicit(&prov->dp_in_max, curr, memory_order_relaxed); if (dplane_provider_is_threaded(prov)) From ad6aad4d0bc06f7711d05e1d05576ea25aac04c5 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Thu, 27 Sep 2018 15:10:54 -0400 Subject: [PATCH 08/10] zebra: dplane lock and thread_master apis Improve, simplify dataplane provider locking apis. Add accessor for dataplane pthread's thread_master, for use by providers who need to use the thread/event apis. Signed-off-by: Mark Stapp --- zebra/zebra_dplane.c | 58 +++++++++++++++++++++++++------------------- zebra/zebra_dplane.h | 11 ++++++++- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 5fd27dc830..95df243829 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -254,6 +254,12 @@ static int dplane_thread_loop(struct thread *event); * Public APIs */ +/* Obtain thread_master for dataplane thread */ +struct thread_master *dplane_get_thread_master(void) +{ + return zdplane_info.dg_master; +} + /* * Allocate a dataplane update context */ @@ -1069,6 +1075,21 @@ int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov) return zdplane_info.dg_updates_per_cycle; } +/* Lock/unlock a provider's mutex - iff the provider was registered with + * the THREADED flag. + */ +void dplane_provider_lock(struct zebra_dplane_provider *prov) +{ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_LOCK(prov); +} + +void dplane_provider_unlock(struct zebra_dplane_provider *prov) +{ + if (dplane_provider_is_threaded(prov)) + DPLANE_PROV_UNLOCK(prov); +} + /* * Dequeue and maintain associated counter */ @@ -1077,8 +1098,7 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( { struct zebra_dplane_ctx *ctx = NULL; - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); if (ctx) { @@ -1088,8 +1108,7 @@ struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx( memory_order_relaxed); } - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); return ctx; } @@ -1105,8 +1124,7 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, limit = zdplane_info.dg_updates_per_cycle; - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); for (ret = 0; ret < limit; ret++) { ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); @@ -1123,8 +1141,7 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, atomic_fetch_sub_explicit(&prov->dp_in_queued, ret, memory_order_relaxed); - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); return ret; } @@ -1135,14 +1152,12 @@ int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov, void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, struct zebra_dplane_ctx *ctx) { - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); TAILQ_INSERT_TAIL(&(prov->dp_ctx_out_q), ctx, zd_q_entries); - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); atomic_fetch_add_explicit(&(prov->dp_out_counter), 1, memory_order_relaxed); @@ -1394,15 +1409,13 @@ static bool dplane_work_pending(void) while (prov) { - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); ctx = TAILQ_FIRST(&(prov->dp_ctx_in_q)); if (ctx == NULL) ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); if (ctx != NULL) break; @@ -1542,7 +1555,6 @@ static int dplane_thread_loop(struct thread *event) /* At each iteration, the temporary work list has 'counter' * items. */ - if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("dplane enqueues %d new work to provider '%s'", counter, dplane_provider_get_name(prov)); @@ -1572,8 +1584,7 @@ static int dplane_thread_loop(struct thread *event) } /* Enqueue new work to the provider */ - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); if (TAILQ_FIRST(&work_list)) TAILQ_CONCAT(&(prov->dp_ctx_in_q), &work_list, @@ -1591,8 +1602,7 @@ static int dplane_thread_loop(struct thread *event) atomic_store_explicit(&prov->dp_in_max, curr, memory_order_relaxed); - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); /* Reset the temp list (though the 'concat' may have done this * already), and the counter @@ -1611,8 +1621,7 @@ static int dplane_thread_loop(struct thread *event) break; /* Dequeue completed work from the provider */ - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_LOCK(prov); + dplane_provider_lock(prov); while (counter < limit) { ctx = TAILQ_FIRST(&(prov->dp_ctx_out_q)); @@ -1627,8 +1636,7 @@ static int dplane_thread_loop(struct thread *event) break; } - if (dplane_provider_is_threaded(prov)) - DPLANE_PROV_UNLOCK(prov); + dplane_provider_unlock(prov); if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) zlog_debug("dplane dequeues %d completed work from provider %s", diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 2e64540d6b..b6b2e64600 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -288,7 +288,16 @@ uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov); void *dplane_provider_get_data(const struct zebra_dplane_provider *prov); bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov); -/* Providers should limit number of updates per work cycle */ +/* Lock/unlock a provider's mutex - iff the provider was registered with + * the THREADED flag. + */ +void dplane_provider_lock(struct zebra_dplane_provider *prov); +void dplane_provider_unlock(struct zebra_dplane_provider *prov); + +/* Obtain thread_master for dataplane thread */ +struct thread_master *dplane_get_thread_master(void); + +/* Providers should (generally) limit number of updates per work cycle */ int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov); /* Provider api to signal that work/events are available From 62b8bb7a1776ea98e5874a4a057617e335a7f77c Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Mon, 12 Nov 2018 15:57:03 -0500 Subject: [PATCH 09/10] zebra: separate netlink socket for dataplane Use a separate netlink socket for the dataplane's updates, to avoid races between the dataplane pthread and the zebra main pthread. Revise zebra shutdown so that the dataplane netlink socket is cleaned-up later, after all shutdown-time dataplane work has been done. Signed-off-by: Mark Stapp --- zebra/kernel_netlink.c | 65 +++++++++++++++++++++++++++++++++--------- zebra/kernel_socket.c | 2 +- zebra/main.c | 5 +++- zebra/rt.h | 2 +- zebra/zebra_dplane.c | 27 ++++++++++++++++-- zebra/zebra_ns.c | 39 +++++++++++++++++++++---- zebra/zebra_ns.h | 8 ++++-- 7 files changed, 120 insertions(+), 28 deletions(-) diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 0772c59b92..afc3985854 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -396,7 +396,7 @@ static int kernel_read(struct thread *thread) /* * Filter out messages from self that occur on listener socket, - * caused by our actions on the command socket + * caused by our actions on the command socket(s) * * When we add new Netlink message types we probably * do not need to add them here as that we are filtering @@ -407,7 +407,7 @@ static int kernel_read(struct thread *thread) * so that we only had to write one way to handle incoming * address add/delete changes. */ -static void netlink_install_filter(int sock, __u32 pid) +static void netlink_install_filter(int sock, __u32 pid, __u32 dplane_pid) { /* * BPF_JUMP instructions and where you jump to are based upon @@ -418,7 +418,8 @@ static void netlink_install_filter(int sock, __u32 pid) struct sock_filter filter[] = { /* * Logic: - * if (nlmsg_pid == pid) { + * if (nlmsg_pid == pid || + * nlmsg_pid == dplane_pid) { * if (the incoming nlmsg_type == * RTM_NEWADDR | RTM_DELADDR) * keep this message @@ -435,26 +436,30 @@ static void netlink_install_filter(int sock, __u32 pid) /* * 1: Compare to pid */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 4), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0), /* - * 2: Load the nlmsg_type into BPF register + * 2: Compare to dplane pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 4), + /* + * 3: Load the nlmsg_type into BPF register */ BPF_STMT(BPF_LD | BPF_ABS | BPF_H, offsetof(struct nlmsghdr, nlmsg_type)), /* - * 3: Compare to RTM_NEWADDR + * 4: Compare to RTM_NEWADDR */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 2, 0), /* - * 4: Compare to RTM_DELADDR + * 5: Compare to RTM_DELADDR */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 1, 0), /* - * 5: This is the end state of we want to skip the + * 6: This is the end state of we want to skip the * message */ BPF_STMT(BPF_RET | BPF_K, 0), - /* 6: This is the end state of we want to keep + /* 7: This is the end state of we want to keep * the message */ BPF_STMT(BPF_RET | BPF_K, 0xffff), @@ -1102,6 +1107,15 @@ void kernel_init(struct zebra_ns *zns) exit(-1); } + snprintf(zns->netlink_dplane.name, sizeof(zns->netlink_dplane.name), + "netlink-dp (NS %u)", zns->ns_id); + zns->netlink_dplane.sock = -1; + if (netlink_socket(&zns->netlink_dplane, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane.name); + exit(-1); + } + /* * SOL_NETLINK is not available on all platforms yet * apparently. It's in bits/socket.h which I am not @@ -1110,14 +1124,22 @@ void kernel_init(struct zebra_ns *zns) #if defined SOL_NETLINK /* * Let's tell the kernel that we want to receive extended - * ACKS over our command socket + * ACKS over our command socket(s) */ one = 1; ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)); if (ret < 0) - zlog_notice("Registration for extended ACK failed : %d %s", + zlog_notice("Registration for extended cmd ACK failed : %d %s", + errno, safe_strerror(errno)); + + one = 1; + ret = setsockopt(zns->netlink_dplane.sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended dp ACK failed : %d %s", errno, safe_strerror(errno)); #endif @@ -1130,12 +1152,18 @@ void kernel_init(struct zebra_ns *zns) zlog_err("Can't set %s socket error: %s(%d)", zns->netlink_cmd.name, safe_strerror(errno), errno); + if (fcntl(zns->netlink_dplane.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane.name, safe_strerror(errno), errno); + /* Set receive buffer size if it's set from command line */ if (nl_rcvbufsize) netlink_recvbuf(&zns->netlink, nl_rcvbufsize); netlink_install_filter(zns->netlink.sock, - zns->netlink_cmd.snl.nl_pid); + zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane.snl.nl_pid); + zns->t_netlink = NULL; thread_add_read(zebrad.master, kernel_read, zns, @@ -1144,7 +1172,7 @@ void kernel_init(struct zebra_ns *zns) rt_netlink_init(); } -void kernel_terminate(struct zebra_ns *zns) +void kernel_terminate(struct zebra_ns *zns, bool complete) { THREAD_READ_OFF(zns->t_netlink); @@ -1157,6 +1185,15 @@ void kernel_terminate(struct zebra_ns *zns) close(zns->netlink_cmd.sock); zns->netlink_cmd.sock = -1; } -} + /* During zebra shutdown, we need to leave the dataplane socket + * around until all work is done. + */ + if (complete) { + if (zns->netlink_dplane.sock >= 0) { + close(zns->netlink_dplane.sock); + zns->netlink_dplane.sock = -1; + } + } +} #endif /* HAVE_NETLINK */ diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 7af3083fd2..ff739ae79b 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1415,7 +1415,7 @@ void kernel_init(struct zebra_ns *zns) routing_socket(zns); } -void kernel_terminate(struct zebra_ns *zns) +void kernel_terminate(struct zebra_ns *zns, bool complete) { return; } diff --git a/zebra/main.c b/zebra/main.c index e5160c1a51..5b5ee8259a 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -172,7 +172,7 @@ static void sigint(void) work_queue_free_and_null(&zebrad.lsp_process_q); vrf_terminate(); - ns_walk_func(zebra_ns_disabled); + ns_walk_func(zebra_ns_early_shutdown); zebra_ns_notify_close(); access_list_reset(); @@ -196,6 +196,9 @@ int zebra_finalize(struct thread *dummy) { zlog_info("Zebra final shutdown"); + /* Final shutdown of ns resources */ + ns_walk_func(zebra_ns_final_shutdown); + /* Stop dplane thread and finish any cleanup */ zebra_dplane_shutdown(); diff --git a/zebra/rt.h b/zebra/rt.h index 70ac6f635c..0317dc85ba 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -86,7 +86,7 @@ extern int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip); */ extern void interface_list(struct zebra_ns *zns); extern void kernel_init(struct zebra_ns *zns); -extern void kernel_terminate(struct zebra_ns *zns); +extern void kernel_terminate(struct zebra_ns *zns, bool complete); extern void macfdb_read(struct zebra_ns *zns); extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, struct interface *br_if); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 95df243829..ba0f1b41aa 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -249,6 +249,8 @@ static struct zebra_dplane_globals { /* Prototypes */ static int dplane_thread_loop(struct thread *event); +static void dplane_info_from_zns(struct zebra_dplane_info *ns_info, + struct zebra_ns *zns); /* * Public APIs @@ -706,16 +708,17 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, zvrf = vrf_info_lookup(re->vrf_id); zns = zvrf->zns; - zebra_dplane_info_from_zns(&(ctx->zd_ns_info), zns, true /*is_cmd*/); + /* Internal copy helper */ + dplane_info_from_zns(&(ctx->zd_ns_info), zns); #if defined(HAVE_NETLINK) /* Increment message counter after copying to context struct - may need * two messages in some 'update' cases. */ if (op == DPLANE_OP_ROUTE_UPDATE) - zns->netlink_cmd.seq += 2; + zns->netlink_dplane.seq += 2; else - zns->netlink_cmd.seq++; + zns->netlink_dplane.seq++; #endif /* NETLINK*/ /* Copy nexthops; recursive info is included too */ @@ -1163,11 +1166,29 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, memory_order_relaxed); } +/* + * Accessor for provider object + */ bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov) { return (prov->dp_flags & DPLANE_PROV_FLAG_THREADED); } +/* + * Internal helper that copies information from a zebra ns object; this is + * called in the zebra main pthread context as part of dplane ctx init. + */ +static void dplane_info_from_zns(struct zebra_dplane_info *ns_info, + struct zebra_ns *zns) +{ + ns_info->ns_id = zns->ns_id; + +#if defined(HAVE_NETLINK) + ns_info->is_cmd = true; + ns_info->nls = zns->netlink_dplane; +#endif /* NETLINK */ +} + /* * Provider api to signal that work/events are available * for the dataplane pthread. diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c index e65f23dc8a..965c8c206c 100644 --- a/zebra/zebra_ns.c +++ b/zebra/zebra_ns.c @@ -47,6 +47,7 @@ DEFINE_MTYPE(ZEBRA, ZEBRA_NS, "Zebra Name Space") static struct zebra_ns *dzns; static int logicalrouter_config_write(struct vty *vty); +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete); struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id) { @@ -111,7 +112,7 @@ int zebra_ns_disabled(struct ns *ns) zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id); if (!zns) return 0; - return zebra_ns_disable(ns->ns_id, (void **)&zns); + return zebra_ns_disable_internal(zns, true); } /* Do global enable actions - open sockets, read kernel config etc. */ @@ -135,17 +136,18 @@ int zebra_ns_enable(ns_id_t ns_id, void **info) return 0; } -int zebra_ns_disable(ns_id_t ns_id, void **info) +/* Common handler for ns disable - this can be called during ns config, + * or during zebra shutdown. + */ +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete) { - struct zebra_ns *zns = (struct zebra_ns *)(*info); - route_table_finish(zns->if_table); zebra_vxlan_ns_disable(zns); #if defined(HAVE_RTADV) rtadv_terminate(zns); #endif - kernel_terminate(zns); + kernel_terminate(zns, complete); table_manager_disable(zns->ns_id); @@ -154,6 +156,33 @@ int zebra_ns_disable(ns_id_t ns_id, void **info) return 0; } +/* During zebra shutdown, do partial cleanup while the async dataplane + * is still running. + */ +int zebra_ns_early_shutdown(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + return zebra_ns_disable_internal(zns, false); +} + +/* During zebra shutdown, do final cleanup + * after all dataplane work is complete. + */ +int zebra_ns_final_shutdown(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + kernel_terminate(zns, true); + + return 0; +} int zebra_ns_init(void) { diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h index c1a9b41b8d..d3592f8f30 100644 --- a/zebra/zebra_ns.h +++ b/zebra/zebra_ns.h @@ -46,8 +46,9 @@ struct zebra_ns { ns_id_t ns_id; #ifdef HAVE_NETLINK - struct nlsock netlink; /* kernel messages */ - struct nlsock netlink_cmd; /* command channel */ + struct nlsock netlink; /* kernel messages */ + struct nlsock netlink_cmd; /* command channel */ + struct nlsock netlink_dplane; /* dataplane channel */ struct thread *t_netlink; #endif @@ -62,7 +63,8 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id); int zebra_ns_init(void); int zebra_ns_enable(ns_id_t ns_id, void **info); int zebra_ns_disabled(struct ns *ns); -int zebra_ns_disable(ns_id_t ns_id, void **info); +int zebra_ns_early_shutdown(struct ns *ns); +int zebra_ns_final_shutdown(struct ns *ns); int zebra_ns_config_write(struct vty *vty, struct ns *ns); From 80776aec8147d643ca8aaa99f7320c7f22a70677 Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Wed, 14 Nov 2018 14:45:30 -0500 Subject: [PATCH 10/10] zebra: add dataplane routing socket To avoid conflicts between the zebra main pthread and the dataplane pthread, use a separate routing socket (on non-netlink platforms) for dataplane route updates to the OS. Signed-off-by: Mark Stapp --- zebra/kernel_socket.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index ff739ae79b..dcc22d2162 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -278,6 +278,11 @@ static const struct message rtm_flag_str[] = {{RTF_UP, "UP"}, /* Kernel routing update socket. */ int routing_sock = -1; +/* Kernel dataplane routing update socket, used in the dataplane pthread + * context. + */ +int dplane_routing_sock = -1; + /* Yes I'm checking ugly routing socket behavior. */ /* #define DEBUG */ @@ -1136,7 +1141,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask, char buf[512]; } msg; - if (routing_sock < 0) + if (dplane_routing_sock < 0) return ZEBRA_ERR_EPERM; /* Clear and set rt_msghdr values */ @@ -1243,7 +1248,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask, msg.rtm.rtm_msglen = pnt - (caddr_t)&msg; - ret = write(routing_sock, &msg, msg.rtm.rtm_msglen); + ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen); if (ret != msg.rtm.rtm_msglen) { if (errno == EEXIST) @@ -1390,6 +1395,9 @@ static void routing_socket(struct zebra_ns *zns) { frr_elevate_privs(&zserv_privs) { routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); + + dplane_routing_sock = + ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id); } if (routing_sock < 0) { @@ -1397,6 +1405,12 @@ static void routing_socket(struct zebra_ns *zns) return; } + if (dplane_routing_sock < 0) { + flog_err_sys(EC_LIB_SOCKET, + "Can't init kernel dataplane routing socket"); + return; + } + /* XXX: Socket should be NONBLOCK, however as we currently * discard failed writes, this will lead to inconsistencies. * For now, socket must be blocking.