Merge pull request #6770 from opensourcerouting/fpm-race

zebra: FPM fixes
This commit is contained in:
Quentin Young 2020-08-04 11:04:22 -04:00 committed by GitHub
commit 35b82b081d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -72,6 +72,7 @@ struct fpm_nl_ctx {
int socket; int socket;
bool disabled; bool disabled;
bool connecting; bool connecting;
bool nhg_complete;
bool rib_complete; bool rib_complete;
bool rmac_complete; bool rmac_complete;
bool use_nhg; bool use_nhg;
@ -149,8 +150,25 @@ enum fpm_nl_events {
FNE_RESET_COUNTERS, FNE_RESET_COUNTERS,
/* Toggle next hop group feature. */ /* Toggle next hop group feature. */
FNE_TOGGLE_NHG, FNE_TOGGLE_NHG,
/* Reconnect request by our own code to avoid races. */
FNE_INTERNAL_RECONNECT,
/* Next hop groups walk finished. */
FNE_NHG_FINISHED,
/* RIB walk finished. */
FNE_RIB_FINISHED,
/* RMAC walk finished. */
FNE_RMAC_FINISHED,
}; };
#define FPM_RECONNECT(fnc) \
thread_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \
FNE_INTERNAL_RECONNECT, &(fnc)->t_event)
#define WALK_FINISH(fnc, ev) \
thread_add_event((fnc)->fthread->master, fpm_process_event, (fnc), \
(ev), NULL)
/* /*
* Prototypes. * Prototypes.
*/ */
@ -428,7 +446,18 @@ static int fpm_connect(struct thread *t);
static void fpm_reconnect(struct fpm_nl_ctx *fnc) static void fpm_reconnect(struct fpm_nl_ctx *fnc)
{ {
/* Grab the lock to empty the stream and stop the zebra thread. */ /* Cancel all zebra threads first. */
thread_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL);
thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL);
thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL);
/*
* Grab the lock to empty the streams (data plane might try to
* enqueue updates while we are closing).
*/
frr_mutex_lock_autounlock(&fnc->obuf_mutex); frr_mutex_lock_autounlock(&fnc->obuf_mutex);
/* Avoid calling close on `-1`. */ /* Avoid calling close on `-1`. */
@ -442,13 +471,6 @@ static void fpm_reconnect(struct fpm_nl_ctx *fnc)
THREAD_OFF(fnc->t_read); THREAD_OFF(fnc->t_read);
THREAD_OFF(fnc->t_write); THREAD_OFF(fnc->t_write);
thread_cancel_async(zrouter.master, &fnc->t_nhgreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_nhgwalk, NULL);
thread_cancel_async(zrouter.master, &fnc->t_ribreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_ribwalk, NULL);
thread_cancel_async(zrouter.master, &fnc->t_rmacreset, NULL);
thread_cancel_async(zrouter.master, &fnc->t_rmacwalk, NULL);
/* FPM is disabled, don't attempt to connect. */ /* FPM is disabled, don't attempt to connect. */
if (fnc->disabled) if (fnc->disabled)
return; return;
@ -465,6 +487,13 @@ static int fpm_read(struct thread *t)
/* Let's ignore the input at the moment. */ /* Let's ignore the input at the moment. */
rv = stream_read_try(fnc->ibuf, fnc->socket, rv = stream_read_try(fnc->ibuf, fnc->socket,
STREAM_WRITEABLE(fnc->ibuf)); STREAM_WRITEABLE(fnc->ibuf));
/* We've got an interruption. */
if (rv == -2) {
/* Schedule next read. */
thread_add_read(fnc->fthread->master, fpm_read, fnc,
fnc->socket, &fnc->t_read);
return 0;
}
if (rv == 0) { if (rv == 0) {
atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1, atomic_fetch_add_explicit(&fnc->counters.connection_closes, 1,
memory_order_relaxed); memory_order_relaxed);
@ -472,19 +501,15 @@ static int fpm_read(struct thread *t)
if (IS_ZEBRA_DEBUG_FPM) if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: connection closed", __func__); zlog_debug("%s: connection closed", __func__);
fpm_reconnect(fnc); FPM_RECONNECT(fnc);
return 0; return 0;
} }
if (rv == -1) { if (rv == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK
|| errno == EINTR)
return 0;
atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1, atomic_fetch_add_explicit(&fnc->counters.connection_errors, 1,
memory_order_relaxed); memory_order_relaxed);
zlog_warn("%s: connection failure: %s", __func__, zlog_warn("%s: connection failure: %s", __func__,
strerror(errno)); strerror(errno));
fpm_reconnect(fnc); FPM_RECONNECT(fnc);
return 0; return 0;
} }
stream_reset(fnc->ibuf); stream_reset(fnc->ibuf);
@ -525,33 +550,15 @@ static int fpm_write(struct thread *t)
&fnc->counters.connection_errors, 1, &fnc->counters.connection_errors, 1,
memory_order_relaxed); memory_order_relaxed);
fpm_reconnect(fnc); FPM_RECONNECT(fnc);
return 0; return 0;
} }
fnc->connecting = false; fnc->connecting = false;
/* /* Permit receiving messages now. */
* Walk the route tables to send old information before starting thread_add_read(fnc->fthread->master, fpm_read, fnc,
* to send updated information. fnc->socket, &fnc->t_read);
*
* NOTE 1:
* RIB table walk is called after the next group table walk
* ends.
*
* NOTE 2:
* Don't attempt to go through next hop group table if we were
* explictly told to not use it.
*/
if (fnc->use_nhg)
thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0,
&fnc->t_nhgwalk);
else
thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0,
&fnc->t_ribwalk);
thread_add_timer(zrouter.master, fpm_rmac_send, fnc, 0,
&fnc->t_rmacwalk);
} }
frr_mutex_lock_autounlock(&fnc->obuf_mutex); frr_mutex_lock_autounlock(&fnc->obuf_mutex);
@ -589,8 +596,9 @@ static int fpm_write(struct thread *t)
memory_order_relaxed); memory_order_relaxed);
zlog_warn("%s: connection failure: %s", __func__, zlog_warn("%s: connection failure: %s", __func__,
strerror(errno)); strerror(errno));
fpm_reconnect(fnc);
break; FPM_RECONNECT(fnc);
return 0;
} }
/* Account all bytes sent. */ /* Account all bytes sent. */
@ -661,18 +669,19 @@ static int fpm_connect(struct thread *t)
fnc->connecting = (errno == EINPROGRESS); fnc->connecting = (errno == EINPROGRESS);
fnc->socket = sock; fnc->socket = sock;
thread_add_read(fnc->fthread->master, fpm_read, fnc, sock, if (!fnc->connecting)
&fnc->t_read); thread_add_read(fnc->fthread->master, fpm_read, fnc, sock,
&fnc->t_read);
thread_add_write(fnc->fthread->master, fpm_write, fnc, sock, thread_add_write(fnc->fthread->master, fpm_write, fnc, sock,
&fnc->t_write); &fnc->t_write);
/* Mark all routes as unsent. */ /* Mark all routes as unsent. */
thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0, if (fnc->use_nhg)
&fnc->t_nhgreset); thread_add_timer(zrouter.master, fpm_nhg_reset, fnc, 0,
thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0, &fnc->t_nhgreset);
&fnc->t_ribreset); else
thread_add_timer(zrouter.master, fpm_rmac_reset, fnc, 0, thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0,
&fnc->t_rmacreset); &fnc->t_ribreset);
return 0; return 0;
} }
@ -904,10 +913,11 @@ static int fpm_nhg_send(struct thread *t)
dplane_ctx_fini(&fna.ctx); dplane_ctx_fini(&fna.ctx);
/* We are done sending next hops, lets install the routes now. */ /* We are done sending next hops, lets install the routes now. */
if (fna.complete) if (fna.complete) {
thread_add_timer(zrouter.master, fpm_rib_send, fnc, 0, WALK_FINISH(fnc, FNE_NHG_FINISHED);
&fnc->t_ribwalk); thread_add_timer(zrouter.master, fpm_rib_reset, fnc, 0,
else /* Otherwise reschedule next hop group again. */ &fnc->t_ribreset);
} else /* Otherwise reschedule next hop group again. */
thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0, thread_add_timer(zrouter.master, fpm_nhg_send, fnc, 0,
&fnc->t_nhgwalk); &fnc->t_nhgwalk);
@ -963,7 +973,11 @@ static int fpm_rib_send(struct thread *t)
dplane_ctx_fini(&ctx); dplane_ctx_fini(&ctx);
/* All RIB routes sent! */ /* All RIB routes sent! */
fnc->rib_complete = true; WALK_FINISH(fnc, FNE_RIB_FINISHED);
/* Schedule next event: RMAC reset. */
thread_add_event(zrouter.master, fpm_rmac_reset, fnc, 0,
&fnc->t_rmacreset);
return 0; return 0;
} }
@ -975,6 +989,7 @@ struct fpm_rmac_arg {
struct zebra_dplane_ctx *ctx; struct zebra_dplane_ctx *ctx;
struct fpm_nl_ctx *fnc; struct fpm_nl_ctx *fnc;
zebra_l3vni_t *zl3vni; zebra_l3vni_t *zl3vni;
bool complete;
}; };
static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg) static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg)
@ -988,7 +1003,7 @@ static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg)
bool sticky; bool sticky;
/* Entry already sent. */ /* Entry already sent. */
if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT)) if (CHECK_FLAG(zrmac->flags, ZEBRA_MAC_FPM_SENT) || !fra->complete)
return; return;
sticky = !!CHECK_FLAG(zrmac->flags, sticky = !!CHECK_FLAG(zrmac->flags,
@ -1004,6 +1019,7 @@ static void fpm_enqueue_rmac_table(struct hash_bucket *backet, void *arg)
if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) { if (fpm_nl_enqueue(fra->fnc, fra->ctx) == -1) {
thread_add_timer(zrouter.master, fpm_rmac_send, thread_add_timer(zrouter.master, fpm_rmac_send,
fra->fnc, 1, &fra->fnc->t_rmacwalk); fra->fnc, 1, &fra->fnc->t_rmacwalk);
fra->complete = false;
} }
} }
@ -1022,9 +1038,14 @@ static int fpm_rmac_send(struct thread *t)
fra.fnc = THREAD_ARG(t); fra.fnc = THREAD_ARG(t);
fra.ctx = dplane_ctx_alloc(); fra.ctx = dplane_ctx_alloc();
fra.complete = true;
hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra); hash_iterate(zrouter.l3vni_table, fpm_enqueue_l3vni_table, &fra);
dplane_ctx_fini(&fra.ctx); dplane_ctx_fini(&fra.ctx);
/* RMAC walk completed. */
if (fra.complete)
WALK_FINISH(fra.fnc, FNE_RMAC_FINISHED);
return 0; return 0;
} }
@ -1041,7 +1062,14 @@ static void fpm_nhg_reset_cb(struct hash_bucket *bucket, void *arg)
static int fpm_nhg_reset(struct thread *t) static int fpm_nhg_reset(struct thread *t)
{ {
struct fpm_nl_ctx *fnc = THREAD_ARG(t);
fnc->nhg_complete = false;
hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL); hash_iterate(zrouter.nhgs_id, fpm_nhg_reset_cb, NULL);
/* Schedule next step: send next hop groups. */
thread_add_event(zrouter.master, fpm_nhg_send, fnc, 0, &fnc->t_nhgwalk);
return 0; return 0;
} }
@ -1070,6 +1098,9 @@ static int fpm_rib_reset(struct thread *t)
} }
} }
/* Schedule next step: send RIB routes. */
thread_add_event(zrouter.master, fpm_rib_send, fnc, 0, &fnc->t_ribwalk);
return 0; return 0;
} }
@ -1092,8 +1123,15 @@ static void fpm_unset_l3vni_table(struct hash_bucket *backet, void *arg)
static int fpm_rmac_reset(struct thread *t) static int fpm_rmac_reset(struct thread *t)
{ {
struct fpm_nl_ctx *fnc = THREAD_ARG(t);
fnc->rmac_complete = false;
hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL); hash_iterate(zrouter.l3vni_table, fpm_unset_l3vni_table, NULL);
/* Schedule next event: send RMAC entries. */
thread_add_event(zrouter.master, fpm_rmac_send, fnc, 0,
&fnc->t_rmacwalk);
return 0; return 0;
} }
@ -1174,6 +1212,30 @@ static int fpm_process_event(struct thread *t)
fpm_reconnect(fnc); fpm_reconnect(fnc);
break; break;
case FNE_INTERNAL_RECONNECT:
fpm_reconnect(fnc);
break;
case FNE_NHG_FINISHED:
if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: next hop groups walk finished",
__func__);
fnc->nhg_complete = true;
break;
case FNE_RIB_FINISHED:
if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: RIB walk finished", __func__);
fnc->rib_complete = true;
break;
case FNE_RMAC_FINISHED:
if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: RMAC walk finished", __func__);
fnc->rmac_complete = true;
break;
default: default:
if (IS_ZEBRA_DEBUG_FPM) if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: unhandled event %d", __func__, event); zlog_debug("%s: unhandled event %d", __func__, event);