From 5418f988cf2e31be9fd79489319ff0f9e05091be Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Tue, 11 Dec 2018 14:56:08 -0500 Subject: [PATCH 1/2] libs: support timeout for workqueue retries Support an optional timeout/delay for use when a workqueue determines that it is blocked, instead of retrying immediately. Also, schedule as an 'event' instead of a 'timer' when using a zero timeout value. Signed-off-by: Mark Stapp --- lib/workqueue.c | 29 ++++++++++++++++++++++------- lib/workqueue.h | 5 +++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/workqueue.c b/lib/workqueue.c index c927d5d714..24ef24c774 100644 --- a/lib/workqueue.c +++ b/lib/workqueue.c @@ -91,9 +91,10 @@ struct work_queue *work_queue_new(struct thread_master *m, new->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; - /* Default values, can be overriden by caller */ + /* Default values, can be overridden by caller */ new->spec.hold = WORK_QUEUE_DEFAULT_HOLD; new->spec.yield = THREAD_YIELD_TIME_SLOT; + new->spec.retry = WORK_QUEUE_DEFAULT_RETRY; return new; } @@ -133,8 +134,17 @@ static int work_queue_schedule(struct work_queue *wq, unsigned int delay) if (CHECK_FLAG(wq->flags, WQ_UNPLUGGED) && (wq->thread == NULL) && !work_queue_empty(wq)) { wq->thread = NULL; - thread_add_timer_msec(wq->master, work_queue_run, wq, delay, - &wq->thread); + + /* Schedule timer if there's a delay, otherwise just schedule + * as an 'event' + */ + if (delay > 0) + thread_add_timer_msec(wq->master, work_queue_run, wq, + delay, &wq->thread); + else + thread_add_event(wq->master, work_queue_run, wq, 0, + &wq->thread); + /* set thread yield time, if needed */ if (wq->thread && wq->spec.yield != THREAD_YIELD_TIME_SLOT) thread_set_yield_time(wq->thread, wq->spec.yield); @@ -234,7 +244,7 @@ int work_queue_run(struct thread *thread) { struct work_queue *wq; struct work_queue_item *item, *titem; - wq_item_status ret; + wq_item_status ret = WQ_SUCCESS; unsigned int cycles = 0; char yielded = 0; @@ -376,9 +386,14 @@ stats: #endif /* Is the queue done yet? If it is, call the completion callback. */ - if (!work_queue_empty(wq)) - work_queue_schedule(wq, 0); - else if (wq->spec.completion_func) + if (!work_queue_empty(wq)) { + if (ret == WQ_RETRY_LATER || + ret == WQ_QUEUE_BLOCKED) + work_queue_schedule(wq, wq->spec.retry); + else + work_queue_schedule(wq, 0); + + } else if (wq->spec.completion_func) wq->spec.completion_func(wq); return 0; diff --git a/lib/workqueue.h b/lib/workqueue.h index fe1700f8de..7c84655063 100644 --- a/lib/workqueue.h +++ b/lib/workqueue.h @@ -30,6 +30,9 @@ DECLARE_MTYPE(WORK_QUEUE) /* Hold time for the initial schedule of a queue run, in millisec */ #define WORK_QUEUE_DEFAULT_HOLD 50 +/* Retry for queue that is 'blocked' or 'retry later' */ +#define WORK_QUEUE_DEFAULT_RETRY 0 + /* action value, for use by item processor and item error handlers */ typedef enum { WQ_SUCCESS = 0, @@ -90,6 +93,8 @@ struct work_queue { unsigned long yield; /* yield time in us for associated thread */ + + uint32_t retry; /* Optional retry timeout if queue is blocked */ } spec; /* remaining fields should be opaque to users */ From 6dd7b8489409f40dc6c91ae6de2da27fee6b093c Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Thu, 13 Dec 2018 14:15:27 -0500 Subject: [PATCH 2/2] zebra: use a small retry timeout for the rib workqueue In the zebra rib processing workqueue, set a small timeout so that we will wait a short time if the queue into the async dataplane is full. This helps avoid a situation where the zebra main pthread constantly retries rib work without giving the dataplane pthread a chance to make progress. Signed-off-by: Mark Stapp --- zebra/zebra_rib.c | 1 + zebra/zserv.h | 1 + 2 files changed, 2 insertions(+) diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index f2d07310ee..9f1374af57 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -2362,6 +2362,7 @@ static void rib_queue_init(struct zebra_t *zebra) /* XXX: TODO: These should be runtime configurable via vty */ zebra->ribq->spec.max_retries = 3; zebra->ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME; + zebra->ribq->spec.retry = ZEBRA_RIB_PROCESS_RETRY_TIME; if (!(zebra->mq = meta_queue_new())) { flog_err(EC_ZEBRA_WQ_NONEXISTENT, diff --git a/zebra/zserv.h b/zebra/zserv.h index f7967f54f0..fe1dbdbfe4 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -184,6 +184,7 @@ struct zebra_t { /* rib work queue */ #define ZEBRA_RIB_PROCESS_HOLD_TIME 10 +#define ZEBRA_RIB_PROCESS_RETRY_TIME 5 struct work_queue *ribq; struct meta_queue *mq;