Merge pull request #3267 from mjstapp/dplane_3

zebra async dataplane: phase 2
This commit is contained in:
Russ White 2018-11-27 13:56:30 -05:00 committed by GitHub
commit 4fedcc6947
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 992 additions and 180 deletions

View File

@ -396,7 +396,7 @@ static int kernel_read(struct thread *thread)
/*
* Filter out messages from self that occur on listener socket,
* caused by our actions on the command socket
* caused by our actions on the command socket(s)
*
* When we add new Netlink message types we probably
* do not need to add them here as that we are filtering
@ -407,7 +407,7 @@ static int kernel_read(struct thread *thread)
* so that we only had to write one way to handle incoming
* address add/delete changes.
*/
static void netlink_install_filter(int sock, __u32 pid)
static void netlink_install_filter(int sock, __u32 pid, __u32 dplane_pid)
{
/*
* BPF_JUMP instructions and where you jump to are based upon
@ -418,7 +418,8 @@ static void netlink_install_filter(int sock, __u32 pid)
struct sock_filter filter[] = {
/*
* Logic:
* if (nlmsg_pid == pid) {
* if (nlmsg_pid == pid ||
* nlmsg_pid == dplane_pid) {
* if (the incoming nlmsg_type ==
* RTM_NEWADDR | RTM_DELADDR)
* keep this message
@ -435,26 +436,30 @@ static void netlink_install_filter(int sock, __u32 pid)
/*
* 1: Compare to pid
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 4),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0),
/*
* 2: Load the nlmsg_type into BPF register
* 2: Compare to dplane pid
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 4),
/*
* 3: Load the nlmsg_type into BPF register
*/
BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
offsetof(struct nlmsghdr, nlmsg_type)),
/*
* 3: Compare to RTM_NEWADDR
* 4: Compare to RTM_NEWADDR
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 2, 0),
/*
* 4: Compare to RTM_DELADDR
* 5: Compare to RTM_DELADDR
*/
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 1, 0),
/*
* 5: This is the end state of we want to skip the
* 6: This is the end state of we want to skip the
* message
*/
BPF_STMT(BPF_RET | BPF_K, 0),
/* 6: This is the end state of we want to keep
/* 7: This is the end state of we want to keep
* the message
*/
BPF_STMT(BPF_RET | BPF_K, 0xffff),
@ -1102,6 +1107,15 @@ void kernel_init(struct zebra_ns *zns)
exit(-1);
}
snprintf(zns->netlink_dplane.name, sizeof(zns->netlink_dplane.name),
"netlink-dp (NS %u)", zns->ns_id);
zns->netlink_dplane.sock = -1;
if (netlink_socket(&zns->netlink_dplane, 0, zns->ns_id) < 0) {
zlog_err("Failure to create %s socket",
zns->netlink_dplane.name);
exit(-1);
}
/*
* SOL_NETLINK is not available on all platforms yet
* apparently. It's in bits/socket.h which I am not
@ -1110,14 +1124,22 @@ void kernel_init(struct zebra_ns *zns)
#if defined SOL_NETLINK
/*
* Let's tell the kernel that we want to receive extended
* ACKS over our command socket
* ACKS over our command socket(s)
*/
one = 1;
ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
&one, sizeof(one));
if (ret < 0)
zlog_notice("Registration for extended ACK failed : %d %s",
zlog_notice("Registration for extended cmd ACK failed : %d %s",
errno, safe_strerror(errno));
one = 1;
ret = setsockopt(zns->netlink_dplane.sock, SOL_NETLINK, NETLINK_EXT_ACK,
&one, sizeof(one));
if (ret < 0)
zlog_notice("Registration for extended dp ACK failed : %d %s",
errno, safe_strerror(errno));
#endif
@ -1130,12 +1152,18 @@ void kernel_init(struct zebra_ns *zns)
zlog_err("Can't set %s socket error: %s(%d)",
zns->netlink_cmd.name, safe_strerror(errno), errno);
if (fcntl(zns->netlink_dplane.sock, F_SETFL, O_NONBLOCK) < 0)
zlog_err("Can't set %s socket error: %s(%d)",
zns->netlink_dplane.name, safe_strerror(errno), errno);
/* Set receive buffer size if it's set from command line */
if (nl_rcvbufsize)
netlink_recvbuf(&zns->netlink, nl_rcvbufsize);
netlink_install_filter(zns->netlink.sock,
zns->netlink_cmd.snl.nl_pid);
zns->netlink_cmd.snl.nl_pid,
zns->netlink_dplane.snl.nl_pid);
zns->t_netlink = NULL;
thread_add_read(zebrad.master, kernel_read, zns,
@ -1144,7 +1172,7 @@ void kernel_init(struct zebra_ns *zns)
rt_netlink_init();
}
void kernel_terminate(struct zebra_ns *zns)
void kernel_terminate(struct zebra_ns *zns, bool complete)
{
THREAD_READ_OFF(zns->t_netlink);
@ -1157,6 +1185,15 @@ void kernel_terminate(struct zebra_ns *zns)
close(zns->netlink_cmd.sock);
zns->netlink_cmd.sock = -1;
}
}
/* During zebra shutdown, we need to leave the dataplane socket
* around until all work is done.
*/
if (complete) {
if (zns->netlink_dplane.sock >= 0) {
close(zns->netlink_dplane.sock);
zns->netlink_dplane.sock = -1;
}
}
}
#endif /* HAVE_NETLINK */

View File

@ -278,6 +278,11 @@ static const struct message rtm_flag_str[] = {{RTF_UP, "UP"},
/* Kernel routing update socket. */
int routing_sock = -1;
/* Kernel dataplane routing update socket, used in the dataplane pthread
* context.
*/
int dplane_routing_sock = -1;
/* Yes I'm checking ugly routing socket behavior. */
/* #define DEBUG */
@ -1136,7 +1141,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask,
char buf[512];
} msg;
if (routing_sock < 0)
if (dplane_routing_sock < 0)
return ZEBRA_ERR_EPERM;
/* Clear and set rt_msghdr values */
@ -1243,7 +1248,7 @@ int rtm_write(int message, union sockunion *dest, union sockunion *mask,
msg.rtm.rtm_msglen = pnt - (caddr_t)&msg;
ret = write(routing_sock, &msg, msg.rtm.rtm_msglen);
ret = write(dplane_routing_sock, &msg, msg.rtm.rtm_msglen);
if (ret != msg.rtm.rtm_msglen) {
if (errno == EEXIST)
@ -1390,6 +1395,9 @@ static void routing_socket(struct zebra_ns *zns)
{
frr_elevate_privs(&zserv_privs) {
routing_sock = ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id);
dplane_routing_sock =
ns_socket(AF_ROUTE, SOCK_RAW, 0, zns->ns_id);
}
if (routing_sock < 0) {
@ -1397,6 +1405,12 @@ static void routing_socket(struct zebra_ns *zns)
return;
}
if (dplane_routing_sock < 0) {
flog_err_sys(EC_LIB_SOCKET,
"Can't init kernel dataplane routing socket");
return;
}
/* XXX: Socket should be NONBLOCK, however as we currently
* discard failed writes, this will lead to inconsistencies.
* For now, socket must be blocking.
@ -1415,7 +1429,7 @@ void kernel_init(struct zebra_ns *zns)
routing_socket(zns);
}
void kernel_terminate(struct zebra_ns *zns)
void kernel_terminate(struct zebra_ns *zns, bool complete)
{
return;
}

View File

@ -172,7 +172,7 @@ static void sigint(void)
work_queue_free_and_null(&zebrad.lsp_process_q);
vrf_terminate();
ns_walk_func(zebra_ns_disabled);
ns_walk_func(zebra_ns_early_shutdown);
zebra_ns_notify_close();
access_list_reset();
@ -196,6 +196,9 @@ int zebra_finalize(struct thread *dummy)
{
zlog_info("Zebra final shutdown");
/* Final shutdown of ns resources */
ns_walk_func(zebra_ns_final_shutdown);
/* Stop dplane thread and finish any cleanup */
zebra_dplane_shutdown();
@ -390,6 +393,9 @@ int main(int argc, char **argv)
vty_config_lockless();
zebrad.master = frr_init();
/* Initialize pthread library */
frr_pthread_init();
/* Zebra related initialize. */
zebra_router_init();
zserv_init();
@ -445,8 +451,8 @@ int main(int argc, char **argv)
/* Needed for BSD routing socket. */
pid = getpid();
/* Intialize pthread library */
frr_pthread_init();
/* Start dataplane system */
zebra_dplane_start();
/* Start Zebra API server */
zserv_start(zserv_path);

View File

@ -86,7 +86,7 @@ extern int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip);
*/
extern void interface_list(struct zebra_ns *zns);
extern void kernel_init(struct zebra_ns *zns);
extern void kernel_terminate(struct zebra_ns *zns);
extern void kernel_terminate(struct zebra_ns *zns, bool complete);
extern void macfdb_read(struct zebra_ns *zns);
extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
struct interface *br_if);

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,6 @@
#include "zebra/rib.h"
#include "zebra/zserv.h"
/* Key netlink info from zebra ns */
struct zebra_dplane_info {
ns_id_t ns_id;
@ -121,20 +120,28 @@ TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx);
*/
void dplane_ctx_fini(struct zebra_dplane_ctx **pctx);
/* Enqueue a context block to caller's tailq. This just exists so that the
/* Enqueue a context block to caller's tailq. This exists so that the
* context struct can remain opaque.
*/
void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q,
const struct zebra_dplane_ctx *ctx);
/* Append a list of context blocks to another list - again, just keeping
* the context struct opaque.
*/
void dplane_ctx_list_append(struct dplane_ctx_q *to_list,
struct dplane_ctx_q *from_list);
/* Dequeue a context block from the head of caller's tailq */
void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp);
struct zebra_dplane_ctx *dplane_ctx_dequeue(struct dplane_ctx_q *q);
/*
* Accessors for information from the context object
*/
enum zebra_dplane_result dplane_ctx_get_status(
const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_status(struct zebra_dplane_ctx *ctx,
enum zebra_dplane_result status);
const char *dplane_res2str(enum zebra_dplane_result res);
enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx);
@ -142,6 +149,15 @@ const char *dplane_op2str(enum dplane_op_e op);
const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx);
/* Retrieve last/current provider id */
uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx *ctx);
/* Providers running before the kernel can control whether a kernel
* update should be done.
*/
void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx *ctx);
bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx *ctx);
/* Source prefix is a little special - use convention to return NULL
* to mean "no src prefix"
*/
@ -212,9 +228,11 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed);
/*
* Dataplane providers: modules that consume dataplane events.
* Dataplane providers: modules that process or consume dataplane events.
*/
struct zebra_dplane_provider;
/* Support string name for a dataplane provider */
#define DPLANE_PROVIDER_NAMELEN 64
@ -223,7 +241,7 @@ int dplane_show_provs_helper(struct vty *vty, bool detailed);
* followed by the kernel, followed by some post-processing step (such as
* the fpm output stream.)
*/
enum dplane_provider_prio_e {
enum dplane_provider_prio {
DPLANE_PRIO_NONE = 0,
DPLANE_PRIO_PREPROCESS,
DPLANE_PRIO_PRE_KERNEL,
@ -232,28 +250,81 @@ enum dplane_provider_prio_e {
DPLANE_PRIO_LAST
};
/* Provider's entry-point to process a context block */
typedef int (*dplane_provider_process_fp)(struct zebra_dplane_ctx *ctx);
/* Provider's entry-point for shutdown and cleanup */
typedef int (*dplane_provider_fini_fp)(void);
/* Provider registration */
int dplane_provider_register(const char *name,
enum dplane_provider_prio_e prio,
dplane_provider_process_fp fp,
dplane_provider_fini_fp fini_fp);
/*
* Results are returned to zebra core via a callback
/* Provider's entry-point for incoming work, called in the context of the
* dataplane pthread. The dataplane pthread enqueues any new work to the
* provider's 'inbound' queue, then calls the callback. The dataplane
* then checks the provider's outbound queue.
*/
typedef int (*dplane_results_fp)(const struct zebra_dplane_ctx *ctx);
typedef int (*dplane_provider_process_fp)(struct zebra_dplane_provider *prov);
/* Provider's entry-point for shutdown and cleanup. Called with 'early'
* during shutdown, to indicate that the dataplane subsystem is allowing
* work to move through the providers and finish. When called without 'early',
* the provider should release all resources (if it has any allocated).
*/
typedef int (*dplane_provider_fini_fp)(struct zebra_dplane_provider *prov,
bool early);
/* Flags values used during provider registration. */
#define DPLANE_PROV_FLAGS_DEFAULT 0x0
/* Provider will be spawning its own worker thread */
#define DPLANE_PROV_FLAG_THREADED 0x1
/* Provider registration: ordering or priority value, callbacks, and optional
* opaque data value.
*/
int dplane_provider_register(const char *name,
enum dplane_provider_prio prio,
int flags,
dplane_provider_process_fp fp,
dplane_provider_fini_fp fini_fp,
void *data);
/* Accessors for provider attributes */
const char *dplane_provider_get_name(const struct zebra_dplane_provider *prov);
uint32_t dplane_provider_get_id(const struct zebra_dplane_provider *prov);
void *dplane_provider_get_data(const struct zebra_dplane_provider *prov);
bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov);
/* Lock/unlock a provider's mutex - iff the provider was registered with
* the THREADED flag.
*/
void dplane_provider_lock(struct zebra_dplane_provider *prov);
void dplane_provider_unlock(struct zebra_dplane_provider *prov);
/* Obtain thread_master for dataplane thread */
struct thread_master *dplane_get_thread_master(void);
/* Providers should (generally) limit number of updates per work cycle */
int dplane_provider_get_work_limit(const struct zebra_dplane_provider *prov);
/* Provider api to signal that work/events are available
* for the dataplane pthread.
*/
int dplane_provider_work_ready(void);
/* Dequeue, maintain associated counter and locking */
struct zebra_dplane_ctx *dplane_provider_dequeue_in_ctx(
struct zebra_dplane_provider *prov);
/* Dequeue work to a list, maintain counter and locking, return count */
int dplane_provider_dequeue_in_list(struct zebra_dplane_provider *prov,
struct dplane_ctx_q *listp);
/* Enqueue, maintain associated counter and locking */
void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov,
struct zebra_dplane_ctx *ctx);
/*
* Zebra registers a results callback with the dataplane. The callback is
* called in the dataplane thread context, so the expectation is that the
* context is queued (or that processing is very limited).
* called in the dataplane pthread context, so the expectation is that the
* context is queued for the zebra main pthread or that processing
* is very limited.
*/
typedef int (*dplane_results_fp)(struct zebra_dplane_ctx *ctx);
int dplane_results_register(dplane_results_fp fp);
/*
@ -262,9 +333,16 @@ int dplane_results_register(dplane_results_fp fp);
*/
void zebra_dplane_init(void);
/*
* Start the dataplane pthread. This step needs to be run later than the
* 'init' step, in case zebra has fork-ed.
*/
void zebra_dplane_start(void);
/* Finalize/cleanup apis, one called early as shutdown is starting,
* one called late at the end of zebra shutdown, and then one called
* from the zebra main thread to stop the dplane thread free all resources.
* from the zebra main pthread to stop the dplane pthread and
* free all resources.
*
* Zebra expects to try to clean up all vrfs and all routes during
* shutdown, so the dplane must be available until very late.

View File

@ -47,6 +47,7 @@ DEFINE_MTYPE(ZEBRA, ZEBRA_NS, "Zebra Name Space")
static struct zebra_ns *dzns;
static int logicalrouter_config_write(struct vty *vty);
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete);
struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id)
{
@ -111,7 +112,7 @@ int zebra_ns_disabled(struct ns *ns)
zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
return zebra_ns_disable(ns->ns_id, (void **)&zns);
return zebra_ns_disable_internal(zns, true);
}
/* Do global enable actions - open sockets, read kernel config etc. */
@ -135,17 +136,18 @@ int zebra_ns_enable(ns_id_t ns_id, void **info)
return 0;
}
int zebra_ns_disable(ns_id_t ns_id, void **info)
/* Common handler for ns disable - this can be called during ns config,
* or during zebra shutdown.
*/
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete)
{
struct zebra_ns *zns = (struct zebra_ns *)(*info);
route_table_finish(zns->if_table);
zebra_vxlan_ns_disable(zns);
#if defined(HAVE_RTADV)
rtadv_terminate(zns);
#endif
kernel_terminate(zns);
kernel_terminate(zns, complete);
table_manager_disable(zns->ns_id);
@ -154,6 +156,33 @@ int zebra_ns_disable(ns_id_t ns_id, void **info)
return 0;
}
/* During zebra shutdown, do partial cleanup while the async dataplane
* is still running.
*/
int zebra_ns_early_shutdown(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return 0;
return zebra_ns_disable_internal(zns, false);
}
/* During zebra shutdown, do final cleanup
* after all dataplane work is complete.
*/
int zebra_ns_final_shutdown(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return 0;
kernel_terminate(zns, true);
return 0;
}
int zebra_ns_init(void)
{

View File

@ -46,8 +46,9 @@ struct zebra_ns {
ns_id_t ns_id;
#ifdef HAVE_NETLINK
struct nlsock netlink; /* kernel messages */
struct nlsock netlink_cmd; /* command channel */
struct nlsock netlink; /* kernel messages */
struct nlsock netlink_cmd; /* command channel */
struct nlsock netlink_dplane; /* dataplane channel */
struct thread *t_netlink;
#endif
@ -62,7 +63,8 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id);
int zebra_ns_init(void);
int zebra_ns_enable(ns_id_t ns_id, void **info);
int zebra_ns_disabled(struct ns *ns);
int zebra_ns_disable(ns_id_t ns_id, void **info);
int zebra_ns_early_shutdown(struct ns *ns);
int zebra_ns_final_shutdown(struct ns *ns);
int zebra_ns_config_write(struct vty *vty, struct ns *ns);

View File

@ -1932,11 +1932,10 @@ static void rib_process_after(struct zebra_dplane_ctx *ctx)
op = dplane_ctx_get_op(ctx);
status = dplane_ctx_get_status(ctx);
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) {
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
zlog_debug("%u:%s Processing dplane ctx %p, op %s result %s",
dplane_ctx_get_vrf(ctx), dest_str, ctx,
dplane_op2str(op), dplane_res2str(status));
}
if (op == DPLANE_OP_ROUTE_DELETE) {
/*
@ -3267,7 +3266,7 @@ static int rib_process_dplane_results(struct thread *thread)
pthread_mutex_lock(&dplane_mutex);
{
/* Dequeue context block */
dplane_ctx_dequeue(&rib_dplane_q, &ctx);
ctx = dplane_ctx_dequeue(&rib_dplane_q);
}
pthread_mutex_unlock(&dplane_mutex);
@ -3289,7 +3288,7 @@ static int rib_process_dplane_results(struct thread *thread)
* the dataplane pthread. We enqueue the results here for processing by
* the main thread later.
*/
static int rib_dplane_results(const struct zebra_dplane_ctx *ctx)
static int rib_dplane_results(struct zebra_dplane_ctx *ctx)
{
/* Take lock controlling queue of results */
pthread_mutex_lock(&dplane_mutex);