diff --git a/zebra/debug.c b/zebra/debug.c index 19582bb090..0eb06d7f25 100644 --- a/zebra/debug.c +++ b/zebra/debug.c @@ -275,6 +275,7 @@ DEFUN (debug_zebra_dplane, "Detailed debug information\n") { int idx = 0; + SET_FLAG(zebra_debug_dplane, ZEBRA_DEBUG_DPLANE); if (argv_find(argv, argc, "detailed", &idx)) diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index e6610f21be..0772c59b92 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -136,6 +136,7 @@ extern uint32_t nl_rcvbufsize; extern struct zebra_privs_t zserv_privs; + int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup) { /* @@ -313,11 +314,17 @@ bool netlink_read; */ void netlink_read_init(const char *fname) { + struct zebra_dplane_info dp_info; + snprintf(netlink_fuzz_file, MAXPATHLEN, "%s", fname); /* Creating this fake socket for testing purposes */ struct zebra_ns *zns = zebra_ns_lookup(NS_DEFAULT); - netlink_parse_info(netlink_information_fetch, &zns->netlink, zns, 1, 0); + /* Capture key info from zns struct */ + zebra_dplane_info_from_zns(&dp_info, zns, false); + + netlink_parse_info(netlink_information_fetch, &zns->netlink, + &dp_info, 1, 0); } /** @@ -678,7 +685,8 @@ static void netlink_parse_extended_ack(struct nlmsghdr *h) * the filter. */ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), - struct nlsock *nl, struct zebra_dplane_info *zns, + const struct nlsock *nl, + const struct zebra_dplane_info *zns, int count, int startup) { int status; @@ -919,28 +927,27 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), } /* - * netlink_talk + * netlink_talk_info * * sendmsg() to netlink socket then recvmsg(). * Calls netlink_parse_info to parse returned data * * filter -> The filter to read final results from kernel * nlmsghdr -> The data to send to the kernel - * nl -> The netlink socket information - * zns -> The zebra namespace information + * dp_info -> The dataplane and netlink socket information * startup -> Are we reading in under startup conditions * This is passed through eventually to filter. */ -int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), - struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, - int startup) +int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, + const struct zebra_dplane_info *dp_info, int startup) { int status = 0; struct sockaddr_nl snl; struct iovec iov; struct msghdr msg; int save_errno = 0; - struct zebra_dplane_info dp_info; + const struct nlsock *nl; memset(&snl, 0, sizeof snl); memset(&iov, 0, sizeof iov); @@ -955,7 +962,8 @@ int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), snl.nl_family = AF_NETLINK; - n->nlmsg_seq = ++nl->seq; + nl = &(dp_info->nls); + n->nlmsg_seq = nl->seq; n->nlmsg_pid = nl->snl.nl_pid; if (IS_ZEBRA_DEBUG_KERNEL) @@ -982,13 +990,32 @@ int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), return -1; } - /* * Get reply from netlink socket. * The reply should either be an acknowlegement or an error. */ + return netlink_parse_info(filter, nl, dp_info, 0, startup); +} + +/* + * Synchronous version of netlink_talk_info. Converts args to suit the + * common version, which is suitable for both sync and async use. + */ +int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, + int startup) +{ + struct zebra_dplane_info dp_info; + + /* Increment sequence number before capturing snapshot of ns socket + * info. + */ + nl->seq++; + + /* Capture info in intermediate info struct */ zebra_dplane_info_from_zns(&dp_info, zns, (nl == &(zns->netlink_cmd))); - return netlink_parse_info(filter, nl, &dp_info, 0, startup); + + return netlink_talk_info(filter, n, &dp_info, startup); } /* Issue request message to kernel via netlink socket. GET messages diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h index d78958d72e..9918729eb6 100644 --- a/zebra/kernel_netlink.h +++ b/zebra/kernel_netlink.h @@ -52,12 +52,18 @@ extern bool netlink_read; extern void netlink_read_init(const char *fname); #endif /* HANDLE_NETLINK_FUZZING */ extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int), - struct nlsock *nl, struct zebra_dplane_info *zns, + const struct nlsock *nl, + const struct zebra_dplane_info *dp_info, int count, int startup); extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup); extern int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, int startup); +/* Version with 'info' struct only */ +int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup), + struct nlmsghdr *n, + const struct zebra_dplane_info *dp_info, int startup); + extern int netlink_request(struct nlsock *nl, struct nlmsghdr *n); #endif /* HAVE_NETLINK */ diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 9c3d9adefb..bbae6061da 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -48,6 +48,7 @@ #include "zebra/kernel_socket.h" #include "zebra/rib.h" #include "zebra/zebra_errors.h" +#include "zebra/zebra_ptm.h" extern struct zebra_privs_t zserv_privs; diff --git a/zebra/main.c b/zebra/main.c index d8952a7b28..ce18cf849c 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -143,11 +143,19 @@ static void sigint(void) struct zebra_vrf *zvrf; struct listnode *ln, *nn; struct zserv *client; + static bool sigint_done; + + if (sigint_done) + return; + + sigint_done = true; zlog_notice("Terminating on signal"); frr_early_fini(); + zebra_dplane_pre_finish(); + for (ALL_LIST_ELEMENTS(zebrad.client_list, ln, nn, client)) zserv_close_client(client); @@ -172,6 +180,25 @@ static void sigint(void) route_map_finish(); list_delete(&zebrad.client_list); + + /* Indicate that all new dplane work has been enqueued. When that + * work is complete, the dataplane will enqueue an event + * with the 'finalize' function. + */ + zebra_dplane_finish(); +} + +/* + * Final shutdown step for the zebra main thread. This is run after all + * async update processing has completed. + */ +int zebra_finalize(struct thread *dummy) +{ + zlog_info("Zebra final shutdown"); + + /* Stop dplane thread and finish any cleanup */ + zebra_dplane_shutdown(); + work_queue_free_and_null(&zebrad.ribq); meta_queue_free(zebrad.mq); diff --git a/zebra/redistribute.h b/zebra/redistribute.h index f67480da9c..f0dc79574c 100644 --- a/zebra/redistribute.h +++ b/zebra/redistribute.h @@ -38,10 +38,11 @@ extern void zebra_redistribute_default_delete(ZAPI_HANDLER_ARGS); extern void redistribute_update(const struct prefix *p, const struct prefix *src_p, - struct route_entry *, struct route_entry *); + struct route_entry *re, + struct route_entry *prev_re); extern void redistribute_delete(const struct prefix *p, const struct prefix *src_p, - struct route_entry *); + struct route_entry *re); extern void zebra_interface_up_update(struct interface *); extern void zebra_interface_down_update(struct interface *); diff --git a/zebra/rib.h b/zebra/rib.h index f3aead32d8..97eae79f03 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -91,6 +91,9 @@ struct route_entry { /* Nexthop information. */ uint8_t nexthop_num; uint8_t nexthop_active_num; + + /* Sequence value incremented for each dataplane operation */ + uint32_t dplane_sequence; }; /* meta-queue structure: diff --git a/zebra/rt.h b/zebra/rt.h index dbea298584..70ac6f635c 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -32,31 +32,11 @@ #include "zebra/zebra_dplane.h" /* - * Install/delete the specified prefix p from the kernel - * - * old = NULL, new = pointer - Install new - * old = pointer, new = pointer - Route replace Old w/ New - * old = pointer, new = NULL, - Route Delete - * - * Please note not all kernels support route replace - * semantics so we will end up with a delete than - * a re-add. + * Update or delete a prefix from the kernel, + * using info from a dataplane context. */ -extern enum zebra_dplane_result kernel_route_rib(struct route_node *rn, - const struct prefix *p, - const struct prefix *src_p, - struct route_entry *old, - struct route_entry *new); - -/* - * So route install/failure may not be immediately known - * so let's separate it out and allow the result to - * be passed back up. - */ -extern void kernel_route_rib_pass_fail(struct route_node *rn, - const struct prefix *p, - struct route_entry *re, - enum zebra_dplane_status res); +extern enum zebra_dplane_result kernel_route_update( + struct zebra_dplane_ctx *ctx); extern int kernel_address_add_ipv4(struct interface *, struct connected *); extern int kernel_address_delete_ipv4(struct interface *, struct connected *); diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 795ee2703a..a65d477f03 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -1443,21 +1443,21 @@ static int netlink_neigh_update(int cmd, int ifindex, uint32_t addr, char *lla, 0); } -/* Routing table change via netlink interface. */ -/* Update flag indicates whether this is a "replace" or not. */ -static int netlink_route_multipath(int cmd, const struct prefix *p, - const struct prefix *src_p, - struct route_entry *re, - int update) +/* + * Routing table change via netlink interface, using a dataplane context object + */ +static int netlink_route_multipath(int cmd, struct zebra_dplane_ctx *ctx) { int bytelen; struct sockaddr_nl snl; struct nexthop *nexthop = NULL; unsigned int nexthop_num; - int family = PREFIX_FAMILY(p); + int family; const char *routedesc; int setsrc = 0; union g_addr src; + const struct prefix *p, *src_p; + uint32_t table_id; struct { struct nlmsghdr n; @@ -1465,27 +1465,37 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, char buf[NL_PKT_BUF_SIZE]; } req; - struct zebra_ns *zns; - struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); + p = dplane_ctx_get_dest(ctx); + src_p = dplane_ctx_get_src(ctx); - zns = zvrf->zns; - memset(&req, 0, sizeof req - NL_PKT_BUF_SIZE); + family = PREFIX_FAMILY(p); + + memset(&req, 0, sizeof(req) - NL_PKT_BUF_SIZE); bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; - if ((cmd == RTM_NEWROUTE) && update) - req.n.nlmsg_flags |= NLM_F_REPLACE; + + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + if ((p->family == AF_INET) || v6_rr_semantics) + req.n.nlmsg_flags |= NLM_F_REPLACE; + } + req.n.nlmsg_type = cmd; - req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid; + + req.n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid; req.r.rtm_family = family; req.r.rtm_dst_len = p->prefixlen; req.r.rtm_src_len = src_p ? src_p->prefixlen : 0; - req.r.rtm_protocol = zebra2proto(re->type); req.r.rtm_scope = RT_SCOPE_UNIVERSE; + if (cmd == RTM_DELROUTE) + req.r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx)); + else + req.r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx)); + /* * blackhole routes are not RTN_UNICAST, they are * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT @@ -1497,9 +1507,9 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, if (cmd != RTM_DELROUTE) req.r.rtm_type = RTN_UNICAST; - addattr_l(&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen); + addattr_l(&req.n, sizeof(req), RTA_DST, &p->u.prefix, bytelen); if (src_p) - addattr_l(&req.n, sizeof req, RTA_SRC, &src_p->u.prefix, + addattr_l(&req.n, sizeof(req), RTA_SRC, &src_p->u.prefix, bytelen); /* Metric. */ @@ -1509,20 +1519,31 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, * path(s) * by the routing protocol and for communicating with protocol peers. */ - addattr32(&req.n, sizeof req, RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC); + addattr32(&req.n, sizeof(req), RTA_PRIORITY, NL_DEFAULT_ROUTE_METRIC); + #if defined(SUPPORT_REALMS) - if (re->tag > 0 && re->tag <= 255) - addattr32(&req.n, sizeof req, RTA_FLOW, re->tag); + { + route_tag_t tag; + + if (cmd == RTM_DELROUTE) + tag = dplane_ctx_get_old_tag(ctx); + else + tag = dplane_ctx_get_tag(ctx); + + if (tag > 0 && tag <= 255) + addattr32(&req.n, sizeof(req), RTA_FLOW, tag); + } #endif /* Table corresponding to this route. */ - if (re->table < 256) - req.r.rtm_table = re->table; + table_id = dplane_ctx_get_table(ctx); + if (table_id < 256) + req.r.rtm_table = table_id; else { req.r.rtm_table = RT_TABLE_UNSPEC; - addattr32(&req.n, sizeof req, RTA_TABLE, re->table); + addattr32(&req.n, sizeof(req), RTA_TABLE, table_id); } - _netlink_route_debug(cmd, p, family, zvrf_id(zvrf), re->table); + _netlink_route_debug(cmd, p, family, dplane_ctx_get_vrf(ctx), table_id); /* * If we are not updating the route and we have received @@ -1530,33 +1551,34 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, * prefix information to tell the kernel to schwack * it. */ - if (!update && cmd == RTM_DELROUTE) + if (cmd == RTM_DELROUTE) goto skip; - if (re->mtu || re->nexthop_mtu) { + if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) { char buf[NL_PKT_BUF_SIZE]; struct rtattr *rta = (void *)buf; - uint32_t mtu = re->mtu; - if (!mtu || (re->nexthop_mtu && re->nexthop_mtu < mtu)) - mtu = re->nexthop_mtu; + uint32_t mtu = dplane_ctx_get_mtu(ctx); + uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx); + + if (!mtu || (nexthop_mtu && nexthop_mtu < mtu)) + mtu = nexthop_mtu; rta->rta_type = RTA_METRICS; rta->rta_len = RTA_LENGTH(0); - rta_addattr_l(rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu); + rta_addattr_l(rta, NL_PKT_BUF_SIZE, + RTAX_MTU, &mtu, sizeof(mtu)); addattr_l(&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA(rta), RTA_PAYLOAD(rta)); } /* Count overall nexthops so we can decide whether to use singlepath - * or multipath case. */ + * or multipath case. + */ nexthop_num = 0; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; if (cmd == RTM_NEWROUTE && !NEXTHOP_IS_ACTIVE(nexthop->flags)) continue; - if (cmd == RTM_DELROUTE - && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB)) - continue; nexthop_num++; } @@ -1564,7 +1586,7 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, /* Singlepath case. */ if (nexthop_num == 1 || multipath_num == 1) { nexthop_num = 0; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { /* * So we want to cover 2 types of blackhole * routes here: @@ -1588,70 +1610,61 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, } if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) { - if (!setsrc) { - if (family == AF_INET) { - if (nexthop->rmap_src.ipv4 - .s_addr - != 0) { - src.ipv4 = - nexthop->rmap_src - .ipv4; - setsrc = 1; - } else if (nexthop->src.ipv4 - .s_addr - != 0) { - src.ipv4 = - nexthop->src - .ipv4; - setsrc = 1; - } - } else if (family == AF_INET6) { - if (!IN6_IS_ADDR_UNSPECIFIED( - &nexthop->rmap_src - .ipv6)) { - src.ipv6 = - nexthop->rmap_src - .ipv6; - setsrc = 1; - } else if ( - !IN6_IS_ADDR_UNSPECIFIED( - &nexthop->src - .ipv6)) { - src.ipv6 = - nexthop->src - .ipv6; - setsrc = 1; - } + + if (setsrc) + continue; + + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr + != 0) { + src.ipv4 = + nexthop->rmap_src.ipv4; + setsrc = 1; + } else if (nexthop->src.ipv4.s_addr + != 0) { + src.ipv4 = + nexthop->src.ipv4; + setsrc = 1; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED( + &nexthop->rmap_src.ipv6)) { + src.ipv6 = + nexthop->rmap_src.ipv6; + setsrc = 1; + } else if ( + !IN6_IS_ADDR_UNSPECIFIED( + &nexthop->src.ipv6)) { + src.ipv6 = + nexthop->src.ipv6; + setsrc = 1; } } continue; } if ((cmd == RTM_NEWROUTE - && NEXTHOP_IS_ACTIVE(nexthop->flags)) - || (cmd == RTM_DELROUTE - && CHECK_FLAG(nexthop->flags, - NEXTHOP_FLAG_FIB))) { + && NEXTHOP_IS_ACTIVE(nexthop->flags))) { routedesc = nexthop->rparent ? "recursive, single-path" : "single-path"; _netlink_route_build_singlepath( routedesc, bytelen, nexthop, &req.n, - &req.r, sizeof req, cmd); + &req.r, sizeof(req), cmd); nexthop_num++; break; } } if (setsrc && (cmd == RTM_NEWROUTE)) { if (family == AF_INET) - addattr_l(&req.n, sizeof req, RTA_PREFSRC, + addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &src.ipv4, bytelen); else if (family == AF_INET6) - addattr_l(&req.n, sizeof req, RTA_PREFSRC, + addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &src.ipv6, bytelen); } - } else { + } else { /* Multipath case */ char buf[NL_PKT_BUF_SIZE]; struct rtattr *rta = (void *)buf; struct rtnexthop *rtnh; @@ -1662,57 +1675,46 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, rtnh = RTA_DATA(rta); nexthop_num = 0; - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { if (nexthop_num >= multipath_num) break; if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) { /* This only works for IPv4 now */ - if (!setsrc) { - if (family == AF_INET) { - if (nexthop->rmap_src.ipv4 - .s_addr - != 0) { - src.ipv4 = - nexthop->rmap_src - .ipv4; - setsrc = 1; - } else if (nexthop->src.ipv4 - .s_addr - != 0) { - src.ipv4 = - nexthop->src - .ipv4; - setsrc = 1; - } - } else if (family == AF_INET6) { - if (!IN6_IS_ADDR_UNSPECIFIED( - &nexthop->rmap_src - .ipv6)) { - src.ipv6 = - nexthop->rmap_src - .ipv6; - setsrc = 1; - } else if ( - !IN6_IS_ADDR_UNSPECIFIED( - &nexthop->src - .ipv6)) { - src.ipv6 = - nexthop->src - .ipv6; - setsrc = 1; - } + if (setsrc) + continue; + + if (family == AF_INET) { + if (nexthop->rmap_src.ipv4.s_addr + != 0) { + src.ipv4 = + nexthop->rmap_src.ipv4; + setsrc = 1; + } else if (nexthop->src.ipv4.s_addr + != 0) { + src.ipv4 = + nexthop->src.ipv4; + setsrc = 1; + } + } else if (family == AF_INET6) { + if (!IN6_IS_ADDR_UNSPECIFIED( + &nexthop->rmap_src.ipv6)) { + src.ipv6 = + nexthop->rmap_src.ipv6; + setsrc = 1; + } else if ( + !IN6_IS_ADDR_UNSPECIFIED( + &nexthop->src.ipv6)) { + src.ipv6 = + nexthop->src.ipv6; + setsrc = 1; } } - continue; } if ((cmd == RTM_NEWROUTE - && NEXTHOP_IS_ACTIVE(nexthop->flags)) - || (cmd == RTM_DELROUTE - && CHECK_FLAG(nexthop->flags, - NEXTHOP_FLAG_FIB))) { + && NEXTHOP_IS_ACTIVE(nexthop->flags))) { routedesc = nexthop->rparent ? "recursive, multipath" : "multipath"; @@ -1735,10 +1737,10 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, } if (setsrc && (cmd == RTM_NEWROUTE)) { if (family == AF_INET) - addattr_l(&req.n, sizeof req, RTA_PREFSRC, + addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &src.ipv4, bytelen); else if (family == AF_INET6) - addattr_l(&req.n, sizeof req, RTA_PREFSRC, + addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &src.ipv6, bytelen); if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug("Setting source"); @@ -1760,12 +1762,12 @@ static int netlink_route_multipath(int cmd, const struct prefix *p, skip: /* Destination netlink address. */ - memset(&snl, 0, sizeof snl); + memset(&snl, 0, sizeof(snl)); snl.nl_family = AF_NETLINK; /* Talk to netlink socket. */ - return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns, - 0); + return netlink_talk_info(netlink_talk_filter, &req.n, + dplane_ctx_get_ns(ctx), 0); } int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) @@ -1821,25 +1823,30 @@ int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in) return suc; } -enum zebra_dplane_result kernel_route_rib(struct route_node *rn, - const struct prefix *p, - const struct prefix *src_p, - struct route_entry *old, - struct route_entry *new) +/* + * Update or delete a prefix from the kernel, + * using info from a dataplane context. + */ +enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) { - int ret = 0; + int cmd, ret; + const struct prefix *p = dplane_ctx_get_dest(ctx); + struct nexthop *nexthop; - assert(old || new); + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) { + cmd = RTM_DELROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) { + cmd = RTM_NEWROUTE; + } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { - if (new) { - if (p->family == AF_INET || v6_rr_semantics) - ret = netlink_route_multipath(RTM_NEWROUTE, p, src_p, - new, (old) ? 1 : 0); - else { + if (p->family == AF_INET || v6_rr_semantics) { + /* Single 'replace' operation */ + cmd = RTM_NEWROUTE; + } else { /* * So v6 route replace semantics are not in * the kernel at this point as I understand it. - * So let's do a delete than an add. + * so let's do a delete then an add. * In the future once v6 route replace semantics * are in we can figure out what to do here to * allow working with old and new kernels. @@ -1848,27 +1855,37 @@ enum zebra_dplane_result kernel_route_rib(struct route_node *rn, * of the route delete. If that happens yeah we're * screwed. */ - if (old) - netlink_route_multipath(RTM_DELROUTE, p, src_p, - old, 0); - ret = netlink_route_multipath(RTM_NEWROUTE, p, src_p, - new, 0); + (void)netlink_route_multipath(RTM_DELROUTE, ctx); + cmd = RTM_NEWROUTE; } - kernel_route_rib_pass_fail(rn, p, new, - (!ret) ? ZEBRA_DPLANE_INSTALL_SUCCESS - : ZEBRA_DPLANE_INSTALL_FAILURE); - return ZEBRA_DPLANE_REQUEST_SUCCESS; + + } else { + return ZEBRA_DPLANE_REQUEST_FAILURE; } - if (old) { - ret = netlink_route_multipath(RTM_DELROUTE, p, src_p, old, 0); + ret = netlink_route_multipath(cmd, ctx); + if ((cmd == RTM_NEWROUTE) && (ret == 0)) { + /* Update installed nexthops to signal which have been + * installed. + */ + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) { + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; - kernel_route_rib_pass_fail(rn, p, old, - (!ret) ? ZEBRA_DPLANE_DELETE_SUCCESS - : ZEBRA_DPLANE_DELETE_FAILURE); + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) { + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + /* If we're only allowed a single nh, don't + * continue. + */ + if (multipath_num == 1) + break; + } + } } - return ZEBRA_DPLANE_REQUEST_SUCCESS; + return (ret == 0 ? + ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE); } int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c index c49dc7bab2..99a7ca5d55 100644 --- a/zebra/rt_socket.c +++ b/zebra/rt_socket.c @@ -91,7 +91,7 @@ static int kernel_rtm_add_labels(struct mpls_label_stack *nh_label, /* Interface between zebra message and rtm message. */ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, - struct route_entry *re) + const struct nexthop_group *ng, uint32_t metric) { struct sockaddr_in *mask = NULL; @@ -126,7 +126,7 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ /* Make gateway. */ - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(ng, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; @@ -139,8 +139,7 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, * other than ADD and DELETE? */ if ((cmd == RTM_ADD && NEXTHOP_IS_ACTIVE(nexthop->flags)) - || (cmd == RTM_DELETE - && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))) { + || (cmd == RTM_DELETE)) { if (nexthop->type == NEXTHOP_TYPE_IPV4 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) { sin_gate.sin_addr = nexthop->gate.ipv4; @@ -181,14 +180,13 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, (union sockunion *)mask, gate ? (union sockunion *)&sin_gate : NULL, - smplsp, ifindex, bh_type, re->metric); + smplsp, ifindex, bh_type, metric); - if (IS_ZEBRA_DEBUG_RIB) { + if (IS_ZEBRA_DEBUG_KERNEL) { if (!gate) { zlog_debug( - "%s: %s: attention! gate not found for re %p", - __func__, prefix_buf, re); - route_entry_dump(p, NULL, re); + "%s: %s: attention! gate not found for re", + __func__, prefix_buf); } else inet_ntop(AF_INET, &sin_gate.sin_addr, gate_buf, INET_ADDRSTRLEN); @@ -199,10 +197,15 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, * did its work. */ case ZEBRA_ERR_NOERROR: nexthop_num++; - if (IS_ZEBRA_DEBUG_RIB) + if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug( "%s: %s: successfully did NH %s", __func__, prefix_buf, gate_buf); + + if (cmd == RTM_ADD) + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + break; /* The only valid case for this error is kernel's @@ -218,14 +221,8 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, "%s: rtm_write() returned %d for command %d", __func__, error, cmd); continue; - break; - /* Given that our NEXTHOP_FLAG_FIB matches real kernel - * FIB, it isn't - * normal to get any other messages in ANY case. - */ - case ZEBRA_ERR_RTNOEXIST: - case ZEBRA_ERR_RTUNREACH: + /* Note any unexpected status returns */ default: flog_err( EC_LIB_SYSTEM_CALL, @@ -238,7 +235,7 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, break; } } /* if (cmd and flags make sense) */ - else if (IS_ZEBRA_DEBUG_RIB) + else if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug("%s: odd command %s for flags %d", __func__, lookup_msg(rtm_type_str, cmd, NULL), nexthop->flags); @@ -247,8 +244,9 @@ static int kernel_rtm_ipv4(int cmd, const struct prefix *p, /* If there was no useful nexthop, then complain. */ if (nexthop_num == 0) { if (IS_ZEBRA_DEBUG_KERNEL) - zlog_debug("%s: No useful nexthops were found in RIB entry %p", - __func__, re); + zlog_debug("%s: No useful nexthops were found in RIB prefix %s", + __func__, prefix2str(p, prefix_buf, + sizeof(prefix_buf))); return 1; } @@ -281,7 +279,7 @@ static int sin6_masklen(struct in6_addr mask) /* Interface between zebra message and rtm message. */ static int kernel_rtm_ipv6(int cmd, const struct prefix *p, - struct route_entry *re) + const struct nexthop_group *ng, uint32_t metric) { struct sockaddr_in6 *mask; struct sockaddr_in6 sin_dest, sin_mask, sin_gate; @@ -312,7 +310,7 @@ static int kernel_rtm_ipv6(int cmd, const struct prefix *p, #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ /* Make gateway. */ - for (ALL_NEXTHOPS(re->ng, nexthop)) { + for (ALL_NEXTHOPS_PTR(ng, nexthop)) { if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) continue; @@ -367,8 +365,11 @@ static int kernel_rtm_ipv6(int cmd, const struct prefix *p, error = rtm_write(cmd, (union sockunion *)&sin_dest, (union sockunion *)mask, gate ? (union sockunion *)&sin_gate : NULL, - smplsp, ifindex, bh_type, re->metric); - (void)error; + smplsp, ifindex, bh_type, metric); + + /* Update installed nexthop info on success */ + if ((cmd == RTM_ADD) && (error == ZEBRA_ERR_NOERROR)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); nexthop_num++; } @@ -383,54 +384,64 @@ static int kernel_rtm_ipv6(int cmd, const struct prefix *p, return 0; /*XXX*/ } -static int kernel_rtm(int cmd, const struct prefix *p, struct route_entry *re) +static int kernel_rtm(int cmd, const struct prefix *p, + const struct nexthop_group *ng, uint32_t metric) { switch (PREFIX_FAMILY(p)) { case AF_INET: - return kernel_rtm_ipv4(cmd, p, re); + return kernel_rtm_ipv4(cmd, p, ng, metric); case AF_INET6: - return kernel_rtm_ipv6(cmd, p, re); + return kernel_rtm_ipv6(cmd, p, ng, metric); } return 0; } -enum zebra_dplane_result kernel_route_rib(struct route_node *rn, - const struct prefix *p, - const struct prefix *src_p, - struct route_entry *old, - struct route_entry *new) +/* + * Update or delete a prefix from the kernel, + * using info from a dataplane context struct. + */ +enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx) { - int route = 0; + enum zebra_dplane_result res = ZEBRA_DPLANE_REQUEST_SUCCESS; - if (src_p && src_p->prefixlen) { - flog_warn(EC_ZEBRA_UNSUPPORTED_V6_SRCDEST, - "%s: IPv6 sourcedest routes unsupported!", __func__); - return ZEBRA_DPLANE_REQUEST_FAILURE; + if (dplane_ctx_get_src(ctx) != NULL) { + zlog_err("route add: IPv6 sourcedest routes unsupported!"); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + goto done; } - frr_elevate_privs(&zserv_privs) { + frr_elevate_privs(ZPRIVS_RAISE) { - if (old) - route |= kernel_rtm(RTM_DELETE, p, old); + if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) { + /* Must do delete and add separately - + * no update available + */ + kernel_rtm(RTM_DELETE, dplane_ctx_get_dest(ctx), + dplane_ctx_get_old_ng(ctx), + dplane_ctx_get_old_metric(ctx)); - if (new) - route |= kernel_rtm(RTM_ADD, p, new); + kernel_rtm(RTM_ADD, dplane_ctx_get_dest(ctx), + dplane_ctx_get_ng(ctx), + dplane_ctx_get_metric(ctx)); + } else { + zlog_err("Invalid routing socket update op %s (%u)", + dplane_op2str(dplane_ctx_get_op(ctx)), + dplane_ctx_get_op(ctx)); + res = ZEBRA_DPLANE_REQUEST_FAILURE; + } + } /* Elevated privs */ - } +done: - if (new) { - kernel_route_rib_pass_fail( - rn, p, new, - (!route) ? ZEBRA_DPLANE_INSTALL_SUCCESS - : ZEBRA_DPLANE_INSTALL_FAILURE); - } else { - kernel_route_rib_pass_fail(rn, p, old, - (!route) - ? ZEBRA_DPLANE_DELETE_SUCCESS - : ZEBRA_DPLANE_DELETE_FAILURE); - } - - return ZEBRA_DPLANE_REQUEST_SUCCESS; + return res; } int kernel_neigh_update(int add, int ifindex, uint32_t addr, char *lla, diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 276b825b34..7ea0a4d47d 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -740,6 +740,20 @@ int zsend_route_notify_owner(struct route_entry *re, const struct prefix *p, re->table, note)); } +/* + * Route-owner notification using info from dataplane update context. + */ +int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note) +{ + return (route_notify_internal(dplane_ctx_get_dest(ctx), + dplane_ctx_get_type(ctx), + dplane_ctx_get_instance(ctx), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx), + note)); +} + void zsend_rule_notify_owner(struct zebra_pbr_rule *rule, enum zapi_rule_notify_owner note) { diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h index 29fe59babf..11b469e144 100644 --- a/zebra/zapi_msg.h +++ b/zebra/zapi_msg.h @@ -70,6 +70,8 @@ extern int zsend_pw_update(struct zserv *client, struct zebra_pw *pw); extern int zsend_route_notify_owner(struct route_entry *re, const struct prefix *p, enum zapi_route_notify_owner note); +extern int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, + enum zapi_route_notify_owner note); extern void zsend_rule_notify_owner(struct zebra_pbr_rule *rule, enum zapi_rule_notify_owner note); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index c0e4939860..0581edfd6d 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -17,5 +17,1117 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include -#include "zebra_dplane.h" +#include "lib/libfrr.h" +#include "lib/debug.h" +#include "lib/frratomic.h" +#include "lib/frr_pthread.h" +#include "lib/memory.h" +#include "lib/queue.h" +#include "lib/zebra.h" +#include "zebra/zebra_memory.h" +#include "zebra/zserv.h" +#include "zebra/zebra_dplane.h" +#include "zebra/rt.h" +#include "zebra/debug.h" + +/* Memory type for context blocks */ +DEFINE_MTYPE(ZEBRA, DP_CTX, "Zebra DPlane Ctx") +DEFINE_MTYPE(ZEBRA, DP_PROV, "Zebra DPlane Provider") + +#ifndef AOK +# define AOK 0 +#endif + +/* Default value for max queued incoming updates */ +const uint32_t DPLANE_DEFAULT_MAX_QUEUED = 200; + + +/* Validation check macro for context blocks */ +/* #define DPLANE_DEBUG 1 */ + +#ifdef DPLANE_DEBUG + +# define DPLANE_CTX_VALID(p) \ + assert((p) != NULL) + +#else + +# define DPLANE_CTX_VALID(p) + +#endif /* DPLANE_DEBUG */ + +/* + * The context block used to exchange info about route updates across + * the boundary between the zebra main context (and pthread) and the + * dataplane layer (and pthread). + */ +struct zebra_dplane_ctx { + + /* Operation code */ + enum dplane_op_e zd_op; + + /* Status on return */ + enum zebra_dplane_result zd_status; + + /* TODO -- internal/sub-operation status? */ + enum zebra_dplane_result zd_remote_status; + enum zebra_dplane_result zd_kernel_status; + + /* Dest and (optional) source prefixes */ + struct prefix zd_dest; + struct prefix zd_src; + + bool zd_is_update; + + uint32_t zd_seq; + uint32_t zd_old_seq; + vrf_id_t zd_vrf_id; + uint32_t zd_table_id; + + int zd_type; + int zd_old_type; + + afi_t zd_afi; + safi_t zd_safi; + + route_tag_t zd_tag; + route_tag_t zd_old_tag; + uint32_t zd_metric; + uint32_t zd_old_metric; + uint16_t zd_instance; + uint16_t zd_old_instance; + + uint8_t zd_distance; + uint8_t zd_old_distance; + + uint32_t zd_mtu; + uint32_t zd_nexthop_mtu; + + /* Namespace info */ + struct zebra_dplane_info zd_ns_info; + + /* Nexthops */ + struct nexthop_group zd_ng; + + /* "Previous" nexthops, used only in route updates without netlink */ + struct nexthop_group zd_old_ng; + + /* TODO -- use fixed array of nexthops, to avoid mallocs? */ + + /* Embedded list linkage */ + TAILQ_ENTRY(zebra_dplane_ctx) zd_q_entries; +}; + +/* + * Registration block for one dataplane provider. + */ +struct zebra_dplane_provider { + /* Name */ + char dp_name[DPLANE_PROVIDER_NAMELEN + 1]; + + /* Priority, for ordering among providers */ + uint8_t dp_priority; + + /* Id value */ + uint32_t dp_id; + + dplane_provider_process_fp dp_fp; + + dplane_provider_fini_fp dp_fini; + + _Atomic uint64_t dp_in_counter; + _Atomic uint64_t dp_error_counter; + + /* Embedded list linkage */ + TAILQ_ENTRY(zebra_dplane_provider) dp_q_providers; + +}; + +/* + * Globals + */ +static struct zebra_dplane_globals { + /* Mutex to control access to dataplane components */ + pthread_mutex_t dg_mutex; + + /* Results callback registered by zebra 'core' */ + dplane_results_fp dg_results_cb; + + /* Sentinel for beginning of shutdown */ + volatile bool dg_is_shutdown; + + /* Sentinel for end of shutdown */ + volatile bool dg_run; + + /* Route-update context queue inbound to the dataplane */ + TAILQ_HEAD(zdg_ctx_q, zebra_dplane_ctx) dg_route_ctx_q; + + /* Ordered list of providers */ + TAILQ_HEAD(zdg_prov_q, zebra_dplane_provider) dg_providers_q; + + /* Counter used to assign internal ids to providers */ + uint32_t dg_provider_id; + + /* Limit number of pending, unprocessed updates */ + _Atomic uint32_t dg_max_queued_updates; + + _Atomic uint64_t dg_routes_in; + _Atomic uint32_t dg_routes_queued; + _Atomic uint32_t dg_routes_queued_max; + _Atomic uint64_t dg_route_errors; + + /* Event-delivery context 'master' for the dplane */ + struct thread_master *dg_master; + + /* Event/'thread' pointer for queued updates */ + struct thread *dg_t_update; + + /* Event pointer for pending shutdown check loop */ + struct thread *dg_t_shutdown_check; + +} zdplane_info; + +/* + * Lock and unlock for interactions with the zebra 'core' + */ +#define DPLANE_LOCK() pthread_mutex_lock(&zdplane_info.dg_mutex) + +#define DPLANE_UNLOCK() pthread_mutex_unlock(&zdplane_info.dg_mutex) + +/* Prototypes */ +static int dplane_route_process(struct thread *event); + +/* + * Public APIs + */ + +/* + * Allocate a dataplane update context + */ +static struct zebra_dplane_ctx *dplane_ctx_alloc(void) +{ + struct zebra_dplane_ctx *p; + + /* TODO -- just alloc'ing memory, but would like to maintain + * a pool + */ + p = XCALLOC(MTYPE_DP_CTX, sizeof(struct zebra_dplane_ctx)); + + return p; +} + +/* + * Free a dataplane results context. + */ +static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) +{ + if (pctx) { + DPLANE_CTX_VALID(*pctx); + + /* TODO -- just freeing memory, but would like to maintain + * a pool + */ + + /* Free embedded nexthops */ + if ((*pctx)->zd_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free((*pctx)->zd_ng.nexthop); + } + + if ((*pctx)->zd_old_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free((*pctx)->zd_old_ng.nexthop); + } + + XFREE(MTYPE_DP_CTX, *pctx); + *pctx = NULL; + } +} + +/* + * Return a context block to the dplane module after processing + */ +void dplane_ctx_fini(struct zebra_dplane_ctx **pctx) +{ + /* TODO -- enqueue for next provider; for now, just free */ + dplane_ctx_free(pctx); +} + +/* Enqueue a context block */ +void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, + const struct zebra_dplane_ctx *ctx) +{ + TAILQ_INSERT_TAIL(q, (struct zebra_dplane_ctx *)ctx, zd_q_entries); +} + +/* Dequeue a context block from the head of a list */ +void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp) +{ + struct zebra_dplane_ctx *ctx = TAILQ_FIRST(q); + + if (ctx) + TAILQ_REMOVE(q, ctx, zd_q_entries); + + *ctxp = ctx; +} + +/* + * Accessors for information from the context object + */ +enum zebra_dplane_result dplane_ctx_get_status( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_status; +} + +enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_op; +} + +const char *dplane_op2str(enum dplane_op_e op) +{ + const char *ret = "UNKNOWN"; + + switch (op) { + case DPLANE_OP_NONE: + ret = "NONE"; + break; + + /* Route update */ + case DPLANE_OP_ROUTE_INSTALL: + ret = "ROUTE_INSTALL"; + break; + case DPLANE_OP_ROUTE_UPDATE: + ret = "ROUTE_UPDATE"; + break; + case DPLANE_OP_ROUTE_DELETE: + ret = "ROUTE_DELETE"; + break; + + }; + + return ret; +} + +const char *dplane_res2str(enum zebra_dplane_result res) +{ + const char *ret = ""; + + switch (res) { + case ZEBRA_DPLANE_REQUEST_FAILURE: + ret = "FAILURE"; + break; + case ZEBRA_DPLANE_REQUEST_QUEUED: + ret = "QUEUED"; + break; + case ZEBRA_DPLANE_REQUEST_SUCCESS: + ret = "SUCCESS"; + break; + }; + + return ret; +} + +const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->zd_dest); +} + +/* Source prefix is a little special - return NULL for "no src prefix" */ +const struct prefix *dplane_ctx_get_src(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + if (ctx->zd_src.prefixlen == 0 && + IN6_IS_ADDR_UNSPECIFIED(&(ctx->zd_src.u.prefix6))) { + return NULL; + } else { + return &(ctx->zd_src); + } +} + +bool dplane_ctx_is_update(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_is_update; +} + +uint32_t dplane_ctx_get_seq(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_seq; +} + +uint32_t dplane_ctx_get_old_seq(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_seq; +} + +vrf_id_t dplane_ctx_get_vrf(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_vrf_id; +} + +int dplane_ctx_get_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_type; +} + +int dplane_ctx_get_old_type(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_type; +} + +afi_t dplane_ctx_get_afi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_afi; +} + +safi_t dplane_ctx_get_safi(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_safi; +} + +uint32_t dplane_ctx_get_table(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_table_id; +} + +route_tag_t dplane_ctx_get_tag(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_tag; +} + +route_tag_t dplane_ctx_get_old_tag(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_tag; +} + +uint16_t dplane_ctx_get_instance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_instance; +} + +uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_instance; +} + +uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_metric; +} + +uint32_t dplane_ctx_get_old_metric(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_metric; +} + +uint32_t dplane_ctx_get_mtu(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_mtu; +} + +uint32_t dplane_ctx_get_nh_mtu(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_nexthop_mtu; +} + +uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_distance; +} + +uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->zd_old_distance; +} + +const struct nexthop_group *dplane_ctx_get_ng( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->zd_ng); +} + +const struct nexthop_group *dplane_ctx_get_old_ng( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->zd_old_ng); +} + +const struct zebra_dplane_info *dplane_ctx_get_ns( + const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->zd_ns_info); +} + +/* + * End of dplane context accessors + */ + +/* + * Retrieve the limit on the number of pending, unprocessed updates. + */ +uint32_t dplane_get_in_queue_limit(void) +{ + return atomic_load_explicit(&zdplane_info.dg_max_queued_updates, + memory_order_relaxed); +} + +/* + * Configure limit on the number of pending, queued updates. + */ +void dplane_set_in_queue_limit(uint32_t limit, bool set) +{ + /* Reset to default on 'unset' */ + if (!set) + limit = DPLANE_DEFAULT_MAX_QUEUED; + + atomic_store_explicit(&zdplane_info.dg_max_queued_updates, limit, + memory_order_relaxed); +} + +/* + * Retrieve the current queue depth of incoming, unprocessed updates + */ +uint32_t dplane_get_in_queue_len(void) +{ + return atomic_load_explicit(&zdplane_info.dg_routes_queued, + memory_order_seq_cst); +} + +/* + * Initialize a context block for a route update from zebra data structs. + */ +static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, + enum dplane_op_e op, + struct route_node *rn, + struct route_entry *re) +{ + int ret = EINVAL; + const struct route_table *table = NULL; + const rib_table_info_t *info; + const struct prefix *p, *src_p; + struct zebra_ns *zns; + struct zebra_vrf *zvrf; + struct nexthop *nexthop; + + if (!ctx || !rn || !re) + goto done; + + ctx->zd_op = op; + + ctx->zd_type = re->type; + ctx->zd_old_type = re->type; + + /* Prefixes: dest, and optional source */ + srcdest_rnode_prefixes(rn, &p, &src_p); + + prefix_copy(&(ctx->zd_dest), p); + + if (src_p) + prefix_copy(&(ctx->zd_src), src_p); + else + memset(&(ctx->zd_src), 0, sizeof(ctx->zd_src)); + + ctx->zd_table_id = re->table; + + ctx->zd_metric = re->metric; + ctx->zd_old_metric = re->metric; + ctx->zd_vrf_id = re->vrf_id; + ctx->zd_mtu = re->mtu; + ctx->zd_nexthop_mtu = re->nexthop_mtu; + ctx->zd_instance = re->instance; + ctx->zd_tag = re->tag; + ctx->zd_old_tag = re->tag; + ctx->zd_distance = re->distance; + + table = srcdest_rnode_table(rn); + info = table->info; + + ctx->zd_afi = info->afi; + ctx->zd_safi = info->safi; + + /* Extract ns info - can't use pointers to 'core' structs */ + zvrf = vrf_info_lookup(re->vrf_id); + zns = zvrf->zns; + + zebra_dplane_info_from_zns(&(ctx->zd_ns_info), zns, true /*is_cmd*/); + +#if defined(HAVE_NETLINK) + /* Increment message counter after copying to context struct - may need + * two messages in some 'update' cases. + */ + if (op == DPLANE_OP_ROUTE_UPDATE) + zns->netlink_cmd.seq += 2; + else + zns->netlink_cmd.seq++; +#endif /* NETLINK*/ + + /* Copy nexthops; recursive info is included too */ + copy_nexthops(&(ctx->zd_ng.nexthop), re->ng.nexthop, NULL); + + /* TODO -- maybe use array of nexthops to avoid allocs? */ + + /* Ensure that the dplane's nexthop flag is clear. */ + for (ALL_NEXTHOPS(ctx->zd_ng, nexthop)) + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + + /* Trying out the sequence number idea, so we can try to detect + * when a result is stale. + */ + re->dplane_sequence++; + ctx->zd_seq = re->dplane_sequence; + + ret = AOK; + +done: + return ret; +} + +/* + * Enqueue a new route update, + * and ensure an event is active for the dataplane thread. + */ +static int dplane_route_enqueue(struct zebra_dplane_ctx *ctx) +{ + int ret = EINVAL; + uint32_t high, curr; + + /* Enqueue for processing by the dataplane thread */ + DPLANE_LOCK(); + { + TAILQ_INSERT_TAIL(&zdplane_info.dg_route_ctx_q, ctx, + zd_q_entries); + } + DPLANE_UNLOCK(); + + curr = atomic_add_fetch_explicit(&zdplane_info.dg_routes_queued, + 1, memory_order_seq_cst); + + /* Maybe update high-water counter also */ + high = atomic_load_explicit(&zdplane_info.dg_routes_queued_max, + memory_order_seq_cst); + while (high < curr) { + if (atomic_compare_exchange_weak_explicit( + &zdplane_info.dg_routes_queued_max, + &high, curr, + memory_order_seq_cst, + memory_order_seq_cst)) + break; + } + + /* Ensure that an event for the dataplane thread is active */ + thread_add_event(zdplane_info.dg_master, dplane_route_process, NULL, 0, + &zdplane_info.dg_t_update); + + ret = AOK; + + return ret; +} + +/* + * Attempt to dequeue a route-update block + */ +static struct zebra_dplane_ctx *dplane_route_dequeue(void) +{ + struct zebra_dplane_ctx *ctx = NULL; + + DPLANE_LOCK(); + { + ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); + if (ctx) { + TAILQ_REMOVE(&zdplane_info.dg_route_ctx_q, + ctx, zd_q_entries); + } + } + DPLANE_UNLOCK(); + + return ctx; +} + +/* + * Utility that prepares a route update and enqueues it for processing + */ +static enum zebra_dplane_result +dplane_route_update_internal(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re, + enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + if (ctx == NULL) { + ret = ENOMEM; + goto done; + } + + /* Init context with info from zebra data structs */ + ret = dplane_ctx_route_init(ctx, op, rn, re); + if (ret == AOK) { + /* Capture some extra info for update case + * where there's a different 'old' route. + */ + if ((op == DPLANE_OP_ROUTE_UPDATE) && + old_re && (old_re != re)) { + ctx->zd_is_update = true; + + old_re->dplane_sequence++; + ctx->zd_old_seq = old_re->dplane_sequence; + + ctx->zd_old_tag = old_re->tag; + ctx->zd_old_type = old_re->type; + ctx->zd_old_instance = old_re->instance; + ctx->zd_old_distance = old_re->distance; + ctx->zd_old_metric = old_re->metric; + +#ifndef HAVE_NETLINK + /* For bsd, capture previous re's nexthops too, sigh. + * We'll need these to do per-nexthop deletes. + */ + copy_nexthops(&(ctx->zd_old_ng.nexthop), + old_re->ng.nexthop, NULL); +#endif /* !HAVE_NETLINK */ + } + + /* Enqueue context for processing */ + ret = dplane_route_enqueue(ctx); + } + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_routes_in, 1, + memory_order_relaxed); + + if (ret == AOK) + result = ZEBRA_DPLANE_REQUEST_QUEUED; + else if (ctx) { + atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, 1, + memory_order_relaxed); + dplane_ctx_free(&ctx); + } + + return result; +} + +/* + * Enqueue a route 'add' for the dataplane. + */ +enum zebra_dplane_result dplane_route_add(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_ROUTE_INSTALL); + +done: + return ret; +} + +/* + * Enqueue a route update for the dataplane. + */ +enum zebra_dplane_result dplane_route_update(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, old_re, + DPLANE_OP_ROUTE_UPDATE); +done: + return ret; +} + +/* + * Enqueue a route removal for the dataplane. + */ +enum zebra_dplane_result dplane_route_delete(struct route_node *rn, + struct route_entry *re) +{ + enum zebra_dplane_result ret = ZEBRA_DPLANE_REQUEST_FAILURE; + + if (rn == NULL || re == NULL) + goto done; + + ret = dplane_route_update_internal(rn, re, NULL, + DPLANE_OP_ROUTE_DELETE); + +done: + return ret; +} + +/* + * Event handler function for routing updates + */ +static int dplane_route_process(struct thread *event) +{ + enum zebra_dplane_result res; + struct zebra_dplane_ctx *ctx; + + while (1) { + /* Check for shutdown */ + if (!zdplane_info.dg_run) + break; + + /* TODO -- limit number of updates per cycle? */ + ctx = dplane_route_dequeue(); + if (ctx == NULL) + break; + + /* Update counter */ + atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, 1, + memory_order_relaxed); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + char dest_str[PREFIX_STRLEN]; + + prefix2str(dplane_ctx_get_dest(ctx), + dest_str, sizeof(dest_str)); + + zlog_debug("%u:%s Dplane route update ctx %p op %s", + dplane_ctx_get_vrf(ctx), dest_str, + ctx, dplane_op2str(dplane_ctx_get_op(ctx))); + } + + /* TODO -- support series of providers */ + + /* Initially, just doing kernel-facing update here */ + res = kernel_route_update(ctx); + + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_route_errors, + 1, memory_order_relaxed); + + ctx->zd_status = res; + + /* Enqueue result to zebra main context */ + zdplane_info.dg_results_cb(ctx); + + ctx = NULL; + } + + return 0; +} + +/* + * Handler for 'show dplane' + */ +int dplane_show_helper(struct vty *vty, bool detailed) +{ + uint64_t queued, limit, queue_max, errs, incoming; + + /* Using atomics because counters are being changed in different + * contexts. + */ + incoming = atomic_load_explicit(&zdplane_info.dg_routes_in, + memory_order_relaxed); + limit = atomic_load_explicit(&zdplane_info.dg_max_queued_updates, + memory_order_relaxed); + queued = atomic_load_explicit(&zdplane_info.dg_routes_queued, + memory_order_relaxed); + queue_max = atomic_load_explicit(&zdplane_info.dg_routes_queued_max, + memory_order_relaxed); + errs = atomic_load_explicit(&zdplane_info.dg_route_errors, + memory_order_relaxed); + + vty_out(vty, "Route updates: %"PRIu64"\n", incoming); + vty_out(vty, "Route update errors: %"PRIu64"\n", errs); + vty_out(vty, "Route update queue limit: %"PRIu64"\n", limit); + vty_out(vty, "Route update queue depth: %"PRIu64"\n", queued); + vty_out(vty, "Route update queue max: %"PRIu64"\n", queue_max); + + return CMD_SUCCESS; +} + +/* + * Handler for 'show dplane providers' + */ +int dplane_show_provs_helper(struct vty *vty, bool detailed) +{ + vty_out(vty, "Zebra dataplane providers:%s\n", + (detailed ? " (detailed)" : "")); + + return CMD_SUCCESS; +} + +/* + * Provider registration + */ +int dplane_provider_register(const char *name, + enum dplane_provider_prio_e prio, + dplane_provider_process_fp fp, + dplane_provider_fini_fp fini_fp) +{ + int ret = 0; + struct zebra_dplane_provider *p, *last; + + /* Validate */ + if (fp == NULL) { + ret = EINVAL; + goto done; + } + + if (prio <= DPLANE_PRIO_NONE || + prio > DPLANE_PRIO_LAST) { + ret = EINVAL; + goto done; + } + + /* Allocate and init new provider struct */ + p = XCALLOC(MTYPE_DP_PROV, sizeof(struct zebra_dplane_provider)); + if (p == NULL) { + ret = ENOMEM; + goto done; + } + + strncpy(p->dp_name, name, DPLANE_PROVIDER_NAMELEN); + p->dp_name[DPLANE_PROVIDER_NAMELEN] = '\0'; /* Belt-and-suspenders */ + + p->dp_priority = prio; + p->dp_fp = fp; + p->dp_fini = fini_fp; + + /* Lock the lock - the dplane pthread may be running */ + DPLANE_LOCK(); + + p->dp_id = ++zdplane_info.dg_provider_id; + + /* Insert into list ordered by priority */ + TAILQ_FOREACH(last, &zdplane_info.dg_providers_q, dp_q_providers) { + if (last->dp_priority > p->dp_priority) + break; + } + + if (last) + TAILQ_INSERT_BEFORE(last, p, dp_q_providers); + else + TAILQ_INSERT_TAIL(&zdplane_info.dg_providers_q, p, + dp_q_providers); + + /* And unlock */ + DPLANE_UNLOCK(); + +done: + return ret; +} + +/* + * Zebra registers a results callback with the dataplane system + */ +int dplane_results_register(dplane_results_fp fp) +{ + zdplane_info.dg_results_cb = fp; + return AOK; +} + +/* + * Initialize the dataplane module during startup, internal/private version + */ +static void zebra_dplane_init_internal(struct zebra_t *zebra) +{ + memset(&zdplane_info, 0, sizeof(zdplane_info)); + + pthread_mutex_init(&zdplane_info.dg_mutex, NULL); + + TAILQ_INIT(&zdplane_info.dg_route_ctx_q); + TAILQ_INIT(&zdplane_info.dg_providers_q); + + zdplane_info.dg_max_queued_updates = DPLANE_DEFAULT_MAX_QUEUED; + + /* TODO -- register default kernel 'provider' during init */ + + zdplane_info.dg_run = true; + + /* TODO -- start dataplane pthread. We're using the zebra + * core/main thread temporarily + */ + zdplane_info.dg_master = zebra->master; +} + +/* Indicates zebra shutdown/exit is in progress. Some operations may be + * simplified or skipped during shutdown processing. + */ +bool dplane_is_in_shutdown(void) +{ + return zdplane_info.dg_is_shutdown; +} + +/* + * Early or pre-shutdown, de-init notification api. This runs pretty + * early during zebra shutdown, as a signal to stop new work and prepare + * for updates generated by shutdown/cleanup activity, as zebra tries to + * remove everything it's responsible for. + * NB: This runs in the main zebra thread context. + */ +void zebra_dplane_pre_finish(void) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane pre-fini called"); + + zdplane_info.dg_is_shutdown = true; + + /* Notify provider(s) of pending shutdown */ +} + +/* + * Utility to determine whether work remains enqueued within the dplane; + * used during system shutdown processing. + */ +static bool dplane_work_pending(void) +{ + struct zebra_dplane_ctx *ctx; + + /* TODO -- just checking incoming/pending work for now */ + DPLANE_LOCK(); + { + ctx = TAILQ_FIRST(&zdplane_info.dg_route_ctx_q); + } + DPLANE_UNLOCK(); + + return (ctx != NULL); +} + +/* + * Shutdown-time intermediate callback, used to determine when all pending + * in-flight updates are done. If there's still work to do, reschedules itself. + * If all work is done, schedules an event to the main zebra thread for + * final zebra shutdown. + * This runs in the dplane pthread context. + */ +static int dplane_check_shutdown_status(struct thread *event) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane shutdown status check called"); + + if (dplane_work_pending()) { + /* Reschedule dplane check on a short timer */ + thread_add_timer_msec(zdplane_info.dg_master, + dplane_check_shutdown_status, + NULL, 100, + &zdplane_info.dg_t_shutdown_check); + + /* TODO - give up and stop waiting after a short time? */ + + } else { + /* We appear to be done - schedule a final callback event + * for the zebra main pthread. + */ + thread_add_event(zebrad.master, zebra_finalize, NULL, 0, NULL); + } + + return 0; +} + +/* + * Shutdown, de-init api. This runs pretty late during shutdown, + * after zebra has tried to free/remove/uninstall all routes during shutdown. + * At this point, dplane work may still remain to be done, so we can't just + * blindly terminate. If there's still work to do, we'll periodically check + * and when done, we'll enqueue a task to the zebra main thread for final + * termination processing. + * + * NB: This runs in the main zebra thread context. + */ +void zebra_dplane_finish(void) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane fini called"); + + thread_add_event(zdplane_info.dg_master, + dplane_check_shutdown_status, NULL, 0, + &zdplane_info.dg_t_shutdown_check); +} + +/* + * Final phase of shutdown, after all work enqueued to dplane has been + * processed. This is called from the zebra main pthread context. + */ +void zebra_dplane_shutdown(void) +{ + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug("Zebra dataplane shutdown called"); + + /* Stop dplane thread, if it's running */ + + zdplane_info.dg_run = false; + + THREAD_OFF(zdplane_info.dg_t_update); + + /* TODO */ + /* frr_pthread_stop(...) */ + + /* Notify provider(s) of final shutdown */ + + /* Clean-up provider objects */ + + /* Clean queue(s) */ +} + +/* + * Initialize the dataplane module at startup; called by zebra rib_init() + */ +void zebra_dplane_init(void) +{ + zebra_dplane_init_internal(&zebrad); +} diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 7cbef7453c..999e0f39e4 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -20,24 +20,22 @@ #ifndef _ZEBRA_DPLANE_H #define _ZEBRA_DPLANE_H 1 -#include "zebra.h" -#include "zserv.h" -#include "prefix.h" -#include "nexthop.h" -#include "nexthop_group.h" +#include "lib/zebra.h" +#include "lib/prefix.h" +#include "lib/nexthop.h" +#include "lib/nexthop_group.h" +#include "lib/openbsd-queue.h" +#include "zebra/zebra_ns.h" +#include "zebra/rib.h" +#include "zebra/zserv.h" -/* - * API between the zebra dataplane system and the main zebra processing - * context. - */ - /* Key netlink info from zebra ns */ struct zebra_dplane_info { ns_id_t ns_id; #if defined(HAVE_NETLINK) - uint32_t nl_pid; + struct nlsock nls; bool is_cmd; #endif }; @@ -52,21 +50,13 @@ zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info, #if defined(HAVE_NETLINK) zns_info->is_cmd = is_cmd; if (is_cmd) { - zns_info->nl_pid = zns->netlink_cmd.snl.nl_pid; + zns_info->nls = zns->netlink_cmd; } else { - zns_info->nl_pid = zns->netlink.snl.nl_pid; + zns_info->nls = zns->netlink; } #endif /* NETLINK */ } -/* - * Enqueue a route install or update for the dataplane. - */ - -/* - * Enqueue a route removal for the dataplane. - */ - /* * Result codes used when returning status back to the main zebra context. */ @@ -96,4 +86,191 @@ enum zebra_dplane_result { ZEBRA_DPLANE_REQUEST_FAILURE, }; +/* + * API between the zebra dataplane system and the main zebra processing + * context. + */ + +/* + * Enqueue a route install or update for the dataplane. + */ +enum dplane_op_e { + DPLANE_OP_NONE = 0, + + /* Route update */ + DPLANE_OP_ROUTE_INSTALL, + DPLANE_OP_ROUTE_UPDATE, + DPLANE_OP_ROUTE_DELETE, + +}; + +/* + * The dataplane context struct is used to exchange info between the main zebra + * context and the dataplane module(s). If these are two independent pthreads, + * they cannot share existing global data structures safely. + */ + +/* Define a tailq list type for context blocks. The list is exposed/public, + * but the internal linkage in the context struct is private, so there + * are accessor apis that support enqueue and dequeue. + */ +TAILQ_HEAD(dplane_ctx_q, zebra_dplane_ctx); + +/* Return a dataplane results context block after use; the caller's pointer will + * be cleared. + */ +void dplane_ctx_fini(struct zebra_dplane_ctx **pctx); + +/* Enqueue a context block to caller's tailq. This just exists so that the + * context struct can remain opaque. + */ +void dplane_ctx_enqueue_tail(struct dplane_ctx_q *q, + const struct zebra_dplane_ctx *ctx); + +/* Dequeue a context block from the head of caller's tailq */ +void dplane_ctx_dequeue(struct dplane_ctx_q *q, struct zebra_dplane_ctx **ctxp); + +/* + * Accessors for information from the context object + */ +enum zebra_dplane_result dplane_ctx_get_status( + const struct zebra_dplane_ctx *ctx); +const char *dplane_res2str(enum zebra_dplane_result res); + +enum dplane_op_e dplane_ctx_get_op(const struct zebra_dplane_ctx *ctx); +const char *dplane_op2str(enum dplane_op_e op); + +const struct prefix *dplane_ctx_get_dest(const struct zebra_dplane_ctx *ctx); + +/* Source prefix is a little special - use convention to return NULL + * to mean "no src prefix" + */ +const struct prefix *dplane_ctx_get_src(const struct zebra_dplane_ctx *ctx); + +bool dplane_ctx_is_update(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_seq(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_seq(const struct zebra_dplane_ctx *ctx); +vrf_id_t dplane_ctx_get_vrf(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_type(const struct zebra_dplane_ctx *ctx); +int dplane_ctx_get_old_type(const struct zebra_dplane_ctx *ctx); +afi_t dplane_ctx_get_afi(const struct zebra_dplane_ctx *ctx); +safi_t dplane_ctx_get_safi(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_table(const struct zebra_dplane_ctx *ctx); +route_tag_t dplane_ctx_get_tag(const struct zebra_dplane_ctx *ctx); +route_tag_t dplane_ctx_get_old_tag(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_get_instance(const struct zebra_dplane_ctx *ctx); +uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_metric(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_mtu(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_nh_mtu(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); + +const struct nexthop_group *dplane_ctx_get_ng( + const struct zebra_dplane_ctx *ctx); +const struct nexthop_group *dplane_ctx_get_old_ng( + const struct zebra_dplane_ctx *ctx); + +const struct zebra_dplane_info *dplane_ctx_get_ns( + const struct zebra_dplane_ctx *ctx); + +/* Indicates zebra shutdown/exit is in progress. Some operations may be + * simplified or skipped during shutdown processing. + */ +bool dplane_is_in_shutdown(void); + +/* + * Enqueue route change operations for the dataplane. + */ +enum zebra_dplane_result dplane_route_add(struct route_node *rn, + struct route_entry *re); + +enum zebra_dplane_result dplane_route_update(struct route_node *rn, + struct route_entry *re, + struct route_entry *old_re); + +enum zebra_dplane_result dplane_route_delete(struct route_node *rn, + struct route_entry *re); + +/* Retrieve the limit on the number of pending, unprocessed updates. */ +uint32_t dplane_get_in_queue_limit(void); + +/* Configure limit on the number of pending, queued updates. If 'unset', reset + * to default value. + */ +void dplane_set_in_queue_limit(uint32_t limit, bool set); + +/* Retrieve the current queue depth of incoming, unprocessed updates */ +uint32_t dplane_get_in_queue_len(void); + +/* + * Vty/cli apis + */ +int dplane_show_helper(struct vty *vty, bool detailed); +int dplane_show_provs_helper(struct vty *vty, bool detailed); + + +/* + * Dataplane providers: modules that consume dataplane events. + */ + +/* Support string name for a dataplane provider */ +#define DPLANE_PROVIDER_NAMELEN 64 + +/* Priority or ordering values for providers. The idea is that there may be + * some pre-processing, followed by an external or remote dataplane, + * followed by the kernel, followed by some post-processing step (such as + * the fpm output stream.) + */ +enum dplane_provider_prio_e { + DPLANE_PRIO_NONE = 0, + DPLANE_PRIO_PREPROCESS, + DPLANE_PRIO_PRE_KERNEL, + DPLANE_PRIO_KERNEL, + DPLANE_PRIO_POSTPROCESS, + DPLANE_PRIO_LAST +}; + +/* Provider's entry-point to process a context block */ +typedef int (*dplane_provider_process_fp)(struct zebra_dplane_ctx *ctx); + +/* Provider's entry-point for shutdown and cleanup */ +typedef int (*dplane_provider_fini_fp)(void); + +/* Provider registration */ +int dplane_provider_register(const char *name, + enum dplane_provider_prio_e prio, + dplane_provider_process_fp fp, + dplane_provider_fini_fp fini_fp); + +/* + * Results are returned to zebra core via a callback + */ +typedef int (*dplane_results_fp)(const struct zebra_dplane_ctx *ctx); + +/* + * Zebra registers a results callback with the dataplane. The callback is + * called in the dataplane thread context, so the expectation is that the + * context is queued (or that processing is very limited). + */ +int dplane_results_register(dplane_results_fp fp); + +/* + * Initialize the dataplane modules at zebra startup. This is currently called + * by the rib module. + */ +void zebra_dplane_init(void); + +/* Finalize/cleanup apis, one called early as shutdown is starting, + * one called late at the end of zebra shutdown, and then one called + * from the zebra main thread to stop the dplane thread free all resources. + * + * Zebra expects to try to clean up all vrfs and all routes during + * shutdown, so the dplane must be available until very late. + */ +void zebra_dplane_pre_finish(void); +void zebra_dplane_finish(void); +void zebra_dplane_shutdown(void); + #endif /* _ZEBRA_DPLANE_H */ diff --git a/zebra/zebra_memory.h b/zebra/zebra_memory.h index e3439d5f64..de55478de2 100644 --- a/zebra/zebra_memory.h +++ b/zebra/zebra_memory.h @@ -34,5 +34,7 @@ DECLARE_MTYPE(STATIC_ROUTE) DECLARE_MTYPE(RIB_DEST) DECLARE_MTYPE(RIB_TABLE_INFO) DECLARE_MTYPE(RNH) +DECLARE_MTYPE(DP_CTX) +DECLARE_MTYPE(DP_PROV) #endif /* _QUAGGA_ZEBRA_MEMORY_H */ diff --git a/zebra/zebra_netns_notify.c b/zebra/zebra_netns_notify.c index 2608ffd7a1..4d2aefa236 100644 --- a/zebra/zebra_netns_notify.c +++ b/zebra/zebra_netns_notify.c @@ -353,8 +353,11 @@ void zebra_ns_notify_close(void) if (zebra_netns_notify_current->u.fd > 0) fd = zebra_netns_notify_current->u.fd; - thread_cancel(zebra_netns_notify_current); - /* auto-removal of inotify items */ + + if (zebra_netns_notify_current->master != NULL) + thread_cancel(zebra_netns_notify_current); + + /* auto-removal of notify items */ if (fd > 0) close(fd); } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 89cbdaf373..b5d7093525 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -53,6 +53,15 @@ #include "zebra/zebra_routemap.h" #include "zebra/zebra_vrf.h" #include "zebra/zebra_vxlan.h" +#include "zebra/zapi_msg.h" +#include "zebra/zebra_dplane.h" + +/* + * Event, list, and mutex for delivery of dataplane results + */ +static pthread_mutex_t dplane_mutex; +static struct thread *t_dplane; +static struct dplane_ctx_q rib_dplane_q; DEFINE_HOOK(rib_update, (struct route_node * rn, const char *reason), (rn, reason)) @@ -1084,75 +1093,6 @@ int zebra_rib_labeled_unicast(struct route_entry *re) return 1; } -void kernel_route_rib_pass_fail(struct route_node *rn, const struct prefix *p, - struct route_entry *re, - enum zebra_dplane_status res) -{ - struct nexthop *nexthop; - char buf[PREFIX_STRLEN]; - rib_dest_t *dest; - - dest = rib_dest_from_rnode(rn); - - switch (res) { - case ZEBRA_DPLANE_INSTALL_SUCCESS: - dest->selected_fib = re; - for (ALL_NEXTHOPS(re->ng, nexthop)) { - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) - continue; - - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); - else - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); - } - zsend_route_notify_owner(re, p, ZAPI_ROUTE_INSTALLED); - break; - case ZEBRA_DPLANE_INSTALL_FAILURE: - /* - * I am not sure this is the right thing to do here - * but the code always set selected_fib before - * this assignment was moved here. - */ - dest->selected_fib = re; - - zsend_route_notify_owner(re, p, ZAPI_ROUTE_FAIL_INSTALL); - flog_err(EC_ZEBRA_DP_INSTALL_FAIL, - "%u:%s: Route install failed", re->vrf_id, - prefix2str(p, buf, sizeof(buf))); - break; - case ZEBRA_DPLANE_DELETE_SUCCESS: - /* - * The case where selected_fib is not re is - * when we have received a system route - * that is overriding our installed route - * as such we should leave the selected_fib - * pointer alone - */ - if (dest->selected_fib == re) - dest->selected_fib = NULL; - for (ALL_NEXTHOPS(re->ng, nexthop)) - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); - - zsend_route_notify_owner(re, p, ZAPI_ROUTE_REMOVED); - break; - case ZEBRA_DPLANE_DELETE_FAILURE: - /* - * Should we set this to NULL if the - * delete fails? - */ - dest->selected_fib = NULL; - flog_err(EC_ZEBRA_DP_DELETE_FAIL, - "%u:%s: Route Deletion failure", re->vrf_id, - prefix2str(p, buf, sizeof(buf))); - - zsend_route_notify_owner(re, p, ZAPI_ROUTE_REMOVE_FAIL); - break; - case ZEBRA_DPLANE_STATUS_NONE: - break; - } -} - /* Update flag indicates whether this is a "replace" or not. Currently, this * is only used for IPv4. */ @@ -1161,8 +1101,11 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, { struct nexthop *nexthop; rib_table_info_t *info = srcdest_rnode_table_info(rn); - const struct prefix *p, *src_p; struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); + const struct prefix *p, *src_p; + enum zebra_dplane_result ret; + + rib_dest_t *dest = rib_dest_from_rnode(rn); srcdest_rnode_prefixes(rn, &p, &src_p); @@ -1194,24 +1137,39 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, if (old && (old != re) && (old->type != re->type)) zsend_route_notify_owner(old, p, ZAPI_ROUTE_BETTER_ADMIN_WON); + /* Update fib selection */ + dest->selected_fib = re; + /* * Make sure we update the FPM any time we send new information to * the kernel. */ hook_call(rib_update, rn, "installing in kernel"); - switch (kernel_route_rib(rn, p, src_p, old, re)) { + + /* Send add or update */ + if (old && (old != re)) + ret = dplane_route_update(rn, re, old); + else + ret = dplane_route_add(rn, re); + + switch (ret) { case ZEBRA_DPLANE_REQUEST_QUEUED: - flog_err( - EC_ZEBRA_DP_INVALID_RC, - "No current known DataPlane interfaces can return this, please fix"); + if (zvrf) + zvrf->installs_queued++; break; case ZEBRA_DPLANE_REQUEST_FAILURE: - flog_err( - EC_ZEBRA_DP_INSTALL_FAIL, - "No current known Rib Install Failure cases, please fix"); + { + char str[SRCDEST2STR_BUFFER]; + + srcdest_rnode2str(rn, str, sizeof(str)); + flog_err(EC_ZEBRA_DP_INSTALL_FAIL, + "%u:%s: Failed to enqueue dataplane install", + re->vrf_id, str); break; + } case ZEBRA_DPLANE_REQUEST_SUCCESS: - zvrf->installs++; + if (zvrf) + zvrf->installs++; break; } @@ -1223,11 +1181,8 @@ void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re) { struct nexthop *nexthop; rib_table_info_t *info = srcdest_rnode_table_info(rn); - const struct prefix *p, *src_p; struct zebra_vrf *zvrf = vrf_info_lookup(re->vrf_id); - srcdest_rnode_prefixes(rn, &p, &src_p); - if (info->safi != SAFI_UNICAST) { for (ALL_NEXTHOPS(re->ng, nexthop)) UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); @@ -1236,20 +1191,25 @@ void rib_uninstall_kernel(struct route_node *rn, struct route_entry *re) /* * Make sure we update the FPM any time we send new information to - * the kernel. + * the dataplane. */ hook_call(rib_update, rn, "uninstalling from kernel"); - switch (kernel_route_rib(rn, p, src_p, re, NULL)) { + + switch (dplane_route_delete(rn, re)) { case ZEBRA_DPLANE_REQUEST_QUEUED: - flog_err( - EC_ZEBRA_DP_INVALID_RC, - "No current known DataPlane interfaces can return this, please fix"); + if (zvrf) + zvrf->removals_queued++; break; case ZEBRA_DPLANE_REQUEST_FAILURE: - flog_err( - EC_ZEBRA_DP_INSTALL_FAIL, - "No current known RIB Install Failure cases, please fix"); + { + char str[SRCDEST2STR_BUFFER]; + + srcdest_rnode2str(rn, str, sizeof(str)); + flog_err(EC_ZEBRA_DP_INSTALL_FAIL, + "%u:%s: Failed to enqueue dataplane uninstall", + re->vrf_id, str); break; + } case ZEBRA_DPLANE_REQUEST_SUCCESS: if (zvrf) zvrf->removals++; @@ -1264,17 +1224,23 @@ static void rib_uninstall(struct route_node *rn, struct route_entry *re) { rib_table_info_t *info = srcdest_rnode_table_info(rn); rib_dest_t *dest = rib_dest_from_rnode(rn); + struct nexthop *nexthop; if (dest && dest->selected_fib == re) { if (info->safi == SAFI_UNICAST) hook_call(rib_update, rn, "rib_uninstall"); - if (!RIB_SYSTEM_ROUTE(re)) - rib_uninstall_kernel(rn, re); - /* If labeled-unicast route, uninstall transit LSP. */ if (zebra_rib_labeled_unicast(re)) zebra_mpls_lsp_uninstall(info->zvrf, rn, re); + + if (!RIB_SYSTEM_ROUTE(re)) + rib_uninstall_kernel(rn, re); + + dest->selected_fib = NULL; + + for (ALL_NEXTHOPS(re->ng, nexthop)) + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); } if (CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) { @@ -1803,28 +1769,26 @@ static void rib_process(struct route_node *rn) else if (old_fib) rib_process_del_fib(zvrf, rn, old_fib); - /* Redistribute SELECTED entry */ + /* Update SELECTED entry */ if (old_selected != new_selected || selected_changed) { - struct nexthop *nexthop = NULL; - - /* Check if we have a FIB route for the destination, otherwise, - * don't redistribute it */ - if (new_fib) { - for (ALL_NEXTHOPS(new_fib->ng, nexthop)) { - if (CHECK_FLAG(nexthop->flags, - NEXTHOP_FLAG_FIB)) { - break; - } - } - } - if (!nexthop) - new_selected = NULL; if (new_selected && new_selected != new_fib) { nexthop_active_update(rn, new_selected, 1); UNSET_FLAG(new_selected->status, ROUTE_ENTRY_CHANGED); } + if (new_selected) { + SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED); + + /* Special case: new route is system route, so + * dataplane update will not be done - ensure we + * redistribute the route. + */ + if (RIB_SYSTEM_ROUTE(new_selected)) + redistribute_update(p, src_p, new_selected, + old_selected); + } + if (old_selected) { if (!new_selected) redistribute_delete(p, src_p, old_selected); @@ -1832,14 +1796,6 @@ static void rib_process(struct route_node *rn) UNSET_FLAG(old_selected->flags, ZEBRA_FLAG_SELECTED); } - - if (new_selected) { - /* Install new or replace existing redistributed entry - */ - SET_FLAG(new_selected->flags, ZEBRA_FLAG_SELECTED); - redistribute_update(p, src_p, new_selected, - old_selected); - } } /* Remove all RE entries queued for removal */ @@ -1859,6 +1815,271 @@ static void rib_process(struct route_node *rn) rib_gc_dest(rn); } +/* + * Utility to match route with dplane context data + */ +static bool rib_route_match_ctx(const struct route_entry *re, + const struct zebra_dplane_ctx *ctx, + bool is_update) +{ + bool result = false; + + if (is_update) { + /* + * In 'update' case, we test info about the 'previous' or + * 'old' route + */ + if ((re->type == dplane_ctx_get_old_type(ctx)) && + (re->instance == dplane_ctx_get_old_instance(ctx))) { + result = true; + + /* TODO -- we're using this extra test, but it's not + * exactly clear why. + */ + if (re->type == ZEBRA_ROUTE_STATIC && + (re->distance != dplane_ctx_get_old_distance(ctx) || + re->tag != dplane_ctx_get_old_tag(ctx))) { + result = false; + } + } + + } else { + /* + * Ordinary, single-route case using primary context info + */ + if ((dplane_ctx_get_op(ctx) != DPLANE_OP_ROUTE_DELETE) && + CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED)) { + /* Skip route that's been deleted */ + goto done; + } + + if ((re->type == dplane_ctx_get_type(ctx)) && + (re->instance == dplane_ctx_get_instance(ctx))) { + result = true; + + /* TODO -- we're using this extra test, but it's not + * exactly clear why. + */ + if (re->type == ZEBRA_ROUTE_STATIC && + (re->distance != dplane_ctx_get_distance(ctx) || + re->tag != dplane_ctx_get_tag(ctx))) { + result = false; + } + } + } + +done: + + return (result); +} + +/* + * Route-update results processing after async dataplane update. + */ +static void rib_process_after(struct zebra_dplane_ctx *ctx) +{ + struct route_table *table = NULL; + struct zebra_vrf *zvrf = NULL; + struct route_node *rn = NULL; + struct route_entry *re = NULL, *old_re = NULL, *rib; + bool is_update = false; + struct nexthop *nexthop, *ctx_nexthop; + char dest_str[PREFIX_STRLEN] = ""; + enum dplane_op_e op; + enum zebra_dplane_result status; + const struct prefix *dest_pfx, *src_pfx; + + /* Locate rn and re(s) from ctx */ + + table = zebra_vrf_table_with_table_id(dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx), + dplane_ctx_get_vrf(ctx), + dplane_ctx_get_table(ctx)); + if (table == NULL) { + if (IS_ZEBRA_DEBUG_DPLANE) { + zlog_debug("Failed to process dplane results: no table for afi %d, safi %d, vrf %u", + dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx), + dplane_ctx_get_vrf(ctx)); + } + goto done; + } + + zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); + + dest_pfx = dplane_ctx_get_dest(ctx); + + /* Note well: only capturing the prefix string if debug is enabled here; + * unconditional log messages will have to generate the string. + */ + if (IS_ZEBRA_DEBUG_DPLANE) + prefix2str(dest_pfx, dest_str, sizeof(dest_str)); + + src_pfx = dplane_ctx_get_src(ctx); + rn = srcdest_rnode_get(table, dplane_ctx_get_dest(ctx), + src_pfx ? (struct prefix_ipv6 *)src_pfx : NULL); + if (rn == NULL) { + if (IS_ZEBRA_DEBUG_DPLANE) { + zlog_debug("Failed to process dplane results: no route for %u:%s", + dplane_ctx_get_vrf(ctx), dest_str); + } + goto done; + } + + srcdest_rnode_prefixes(rn, &dest_pfx, &src_pfx); + + op = dplane_ctx_get_op(ctx); + status = dplane_ctx_get_status(ctx); + + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("%u:%s Processing dplane ctx %p, op %s result %s", + dplane_ctx_get_vrf(ctx), dest_str, ctx, + dplane_op2str(op), dplane_res2str(status)); + } + + if (op == DPLANE_OP_ROUTE_DELETE) { + /* + * In the delete case, the zebra core datastructs were + * updated (or removed) at the time the delete was issued, + * so we're just notifying the route owner. + */ + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + zsend_route_notify_owner_ctx(ctx, ZAPI_ROUTE_REMOVED); + + if (zvrf) + zvrf->removals++; + } else { + zsend_route_notify_owner_ctx(ctx, + ZAPI_ROUTE_FAIL_INSTALL); + + zlog_warn("%u:%s: Route Deletion failure", + dplane_ctx_get_vrf(ctx), + prefix2str(dest_pfx, + dest_str, sizeof(dest_str))); + } + + /* Nothing more to do in delete case */ + goto done; + } + + /* + * Update is a bit of a special case, where we may have both old and new + * routes to post-process. + */ + is_update = dplane_ctx_is_update(ctx); + + /* + * Take a pass through the routes, look for matches with the context + * info. + */ + RNODE_FOREACH_RE(rn, rib) { + + if (re == NULL) { + if (rib_route_match_ctx(rib, ctx, false)) + re = rib; + } + + /* Check for old route match */ + if (is_update && (old_re == NULL)) { + if (rib_route_match_ctx(rib, ctx, true /*is_update*/)) + old_re = rib; + } + + /* Have we found the routes we need to work on? */ + if (re && ((!is_update || old_re))) + break; + } + + /* + * Check sequence number(s) to detect stale results before continuing + */ + if (re && (re->dplane_sequence != dplane_ctx_get_seq(ctx))) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("%u:%s Stale dplane result for re %p", + dplane_ctx_get_vrf(ctx), dest_str, re); + } + re = NULL; + } + + if (old_re && + (old_re->dplane_sequence != dplane_ctx_get_old_seq(ctx))) { + if (IS_ZEBRA_DEBUG_DPLANE_DETAIL) { + zlog_debug("%u:%s Stale dplane result for old_re %p", + dplane_ctx_get_vrf(ctx), dest_str, old_re); + } + old_re = NULL; + } + + /* + * Here's sort of a tough one: the route update result is stale. + * Is it better to use the context block info to generate + * redist and owner notification, or is it better to wait + * for the up-to-date result to arrive? + */ + if (re == NULL) { + /* TODO -- for now, only expose up-to-date results */ + goto done; + } + + if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) { + /* Update zebra nexthop FIB flag for each + * nexthop that was installed. + */ + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), ctx_nexthop)) { + + for (ALL_NEXTHOPS(re->ng, nexthop)) { + if (nexthop_same(ctx_nexthop, nexthop)) + break; + } + + if (nexthop == NULL) + continue; + + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(ctx_nexthop->flags, + NEXTHOP_FLAG_FIB)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + else + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB); + } + + if (zvrf) { + zvrf->installs++; + /* Set flag for nexthop tracking processing */ + zvrf->flags |= ZEBRA_VRF_RIB_SCHEDULED; + } + + /* Redistribute */ + /* TODO -- still calling the redist api using the route_entries, + * and there's a corner-case here: if there's no client + * for the 'new' route, a redist deleting the 'old' route + * will be sent. But if the 'old' context info was stale, + * 'old_re' will be NULL here and that delete will not be sent. + */ + redistribute_update(dest_pfx, src_pfx, re, old_re); + + /* Notify route owner */ + zsend_route_notify_owner(re, + dest_pfx, ZAPI_ROUTE_INSTALLED); + + } else { + zsend_route_notify_owner(re, dest_pfx, + ZAPI_ROUTE_FAIL_INSTALL); + + zlog_warn("%u:%s: Route install failed", + dplane_ctx_get_vrf(ctx), + prefix2str(dest_pfx, + dest_str, sizeof(dest_str))); + } + +done: + + /* Return context to dataplane module */ + dplane_ctx_fini(&ctx); +} + /* Take a list of route_node structs and return 1, if there was a record * picked from it and processed by rib_process(). Don't process more, * than one RN record; operate only in the specified sub-queue. @@ -1905,9 +2126,9 @@ static unsigned int process_subq(struct list *subq, uint8_t qindex) } /* - * All meta queues have been processed. Trigger next-hop evaluation. + * Perform next-hop tracking processing after RIB updates. */ -static void meta_queue_process_complete(struct work_queue *dummy) +static void do_nht_processing(void) { struct vrf *vrf; struct zebra_vrf *zvrf; @@ -1922,6 +2143,10 @@ static void meta_queue_process_complete(struct work_queue *dummy) if (zvrf == NULL || !(zvrf->flags & ZEBRA_VRF_RIB_SCHEDULED)) continue; + if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHT) + zlog_debug("NHT processing check for zvrf %s", + zvrf_name(zvrf)); + zvrf->flags &= ~ZEBRA_VRF_RIB_SCHEDULED; zebra_evaluate_rnh(zvrf, AF_INET, 0, RNH_NEXTHOP_TYPE, NULL); zebra_evaluate_rnh(zvrf, AF_INET, 0, RNH_IMPORT_CHECK_TYPE, @@ -1943,6 +2168,14 @@ static void meta_queue_process_complete(struct work_queue *dummy) } } +/* + * All meta queues have been processed. Trigger next-hop evaluation. + */ +static void meta_queue_process_complete(struct work_queue *dummy) +{ + do_nht_processing(); +} + /* Dispatch the meta queue by picking, processing and unlocking the next RN from * a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and * data @@ -1952,6 +2185,22 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data) { struct meta_queue *mq = data; unsigned i; + uint32_t queue_len, queue_limit; + + /* Ensure there's room for more dataplane updates */ + queue_limit = dplane_get_in_queue_limit(); + queue_len = dplane_get_in_queue_len(); + if (queue_len > queue_limit) { + if (IS_ZEBRA_DEBUG_RIB_DETAILED) + zlog_debug("rib queue: dplane queue len %u, limit %u, retrying", + queue_len, queue_limit); + + /* Ensure that the meta-queue is actually enqueued */ + if (work_queue_empty(zebrad.ribq)) + work_queue_add(zebrad.ribq, zebrad.mq); + + return WQ_QUEUE_BLOCKED; + } for (i = 0; i < MQ_SIZE; i++) if (process_subq(mq->subq[i], i)) { @@ -3003,10 +3252,67 @@ void rib_close_table(struct route_table *table) } } +/* + * + */ +static int rib_process_dplane_results(struct thread *thread) +{ + struct zebra_dplane_ctx *ctx; + + do { + /* Take lock controlling queue of results */ + pthread_mutex_lock(&dplane_mutex); + { + /* Dequeue context block */ + dplane_ctx_dequeue(&rib_dplane_q, &ctx); + } + pthread_mutex_unlock(&dplane_mutex); + + if (ctx) + rib_process_after(ctx); + else + break; + + } while (1); + + /* Check for nexthop tracking processing after finishing with results */ + do_nht_processing(); + + return 0; +} + +/* + * Results are returned from the dataplane subsystem, in the context of + * the dataplane pthread. We enqueue the results here for processing by + * the main thread later. + */ +static int rib_dplane_results(const struct zebra_dplane_ctx *ctx) +{ + /* Take lock controlling queue of results */ + pthread_mutex_lock(&dplane_mutex); + { + /* Enqueue context block */ + dplane_ctx_enqueue_tail(&rib_dplane_q, ctx); + } + pthread_mutex_unlock(&dplane_mutex); + + /* Ensure event is signalled to zebra main thread */ + thread_add_event(zebrad.master, rib_process_dplane_results, NULL, 0, + &t_dplane); + + return 0; +} + /* Routing information base initialize. */ void rib_init(void) { rib_queue_init(&zebrad); + + /* Init dataplane, and register for results */ + pthread_mutex_init(&dplane_mutex, NULL); + TAILQ_INIT(&rib_dplane_q); + zebra_dplane_init(); + dplane_results_register(rib_dplane_results); } /* diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c index df4435b619..b9d25a9d9b 100644 --- a/zebra/zebra_rnh.c +++ b/zebra/zebra_rnh.c @@ -517,6 +517,18 @@ static void zebra_rnh_process_pbr_tables(int family, } } +/* + * Utility to determine whether a candidate nexthop is useable. We make this + * check in a couple of places, so this is a single home for the logic we + * use. + */ +static bool rnh_nexthop_valid(const struct nexthop *nh) +{ + return ((CHECK_FLAG(nh->flags, NEXTHOP_FLAG_FIB) + || CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) + && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE)); +} + /* * Determine appropriate route (route entry) resolving a tracked * nexthop. @@ -529,6 +541,7 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, int family, struct route_table *route_table; struct route_node *rn; struct route_entry *re; + struct nexthop *nexthop; *prn = NULL; @@ -561,12 +574,23 @@ zebra_rnh_resolve_nexthop_entry(struct zebra_vrf *zvrf, int family, if (!CHECK_FLAG(re->flags, ZEBRA_FLAG_SELECTED)) continue; + /* Just being SELECTED isn't quite enough - must + * have an installed nexthop to be useful. + */ + for (nexthop = re->ng.nexthop; nexthop; + nexthop = nexthop->next) { + if (rnh_nexthop_valid(nexthop)) + break; + } + + if (nexthop == NULL) + continue; + if (CHECK_FLAG(rnh->flags, ZEBRA_NHT_CONNECTED)) { if ((re->type == ZEBRA_ROUTE_CONNECT) || (re->type == ZEBRA_ROUTE_STATIC)) break; if (re->type == ZEBRA_ROUTE_NHRP) { - struct nexthop *nexthop; for (nexthop = re->ng.nexthop; nexthop; nexthop = nexthop->next) @@ -888,9 +912,7 @@ static int send_client(struct rnh *rnh, struct zserv *client, rnh_type_t type, nump = stream_get_endp(s); stream_putc(s, 0); for (nh = re->ng.nexthop; nh; nh = nh->next) - if ((CHECK_FLAG(nh->flags, NEXTHOP_FLAG_FIB) - || CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) - && CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE)) { + if (rnh_nexthop_valid(nh)) { stream_putc(s, nh->type); switch (nh->type) { case NEXTHOP_TYPE_IPV4: diff --git a/zebra/zebra_vrf.h b/zebra/zebra_vrf.h index a39d74b08b..ef02ca63e5 100644 --- a/zebra/zebra_vrf.h +++ b/zebra/zebra_vrf.h @@ -1,7 +1,7 @@ /* * Zebra Vrf Header * Copyright (C) 2016 Cumulus Networks - * Donald Sahrp + * Donald Sharp * * This file is part of Quagga. * @@ -133,6 +133,8 @@ struct zebra_vrf { /* Route Installs */ uint64_t installs; uint64_t removals; + uint64_t installs_queued; + uint64_t removals_queued; uint64_t neigh_updates; uint64_t lsp_installs; uint64_t lsp_removals; diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index dd0e270149..17609a03fe 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -2568,6 +2568,76 @@ DEFUN (no_ipv6_forwarding, return CMD_SUCCESS; } +/* Display dataplane info */ +DEFUN (show_dataplane, + show_dataplane_cmd, + "show zebra dplane [detailed]", + SHOW_STR + ZEBRA_STR + "Zebra dataplane information\n" + "Detailed output\n") +{ + int idx = 0; + bool detailed = false; + + if (argv_find(argv, argc, "detailed", &idx)) + detailed = true; + + return dplane_show_helper(vty, detailed); +} + +/* Display dataplane providers info */ +DEFUN (show_dataplane_providers, + show_dataplane_providers_cmd, + "show zebra dplane providers [detailed]", + SHOW_STR + ZEBRA_STR + "Zebra dataplane information\n" + "Zebra dataplane provider information\n" + "Detailed output\n") +{ + int idx = 0; + bool detailed = false; + + if (argv_find(argv, argc, "detailed", &idx)) + detailed = true; + + return dplane_show_provs_helper(vty, detailed); +} + +/* Configure dataplane incoming queue limit */ +DEFUN (zebra_dplane_queue_limit, + zebra_dplane_queue_limit_cmd, + "zebra dplane limit (0-10000)", + ZEBRA_STR + "Zebra dataplane\n" + "Limit incoming queued updates\n" + "Number of queued updates\n") +{ + uint32_t limit = 0; + + limit = strtoul(argv[3]->arg, NULL, 10); + + dplane_set_in_queue_limit(limit, true); + + return CMD_SUCCESS; +} + +/* Reset dataplane queue limit to default value */ +DEFUN (no_zebra_dplane_queue_limit, + no_zebra_dplane_queue_limit_cmd, + "no zebra dplane limit [(0-10000)]", + NO_STR + ZEBRA_STR + "Zebra dataplane\n" + "Limit incoming queued updates\n" + "Number of queued updates\n") +{ + dplane_set_in_queue_limit(0, false); + + return CMD_SUCCESS; +} + /* Table configuration write function. */ static int config_write_table(struct vty *vty) { @@ -2698,5 +2768,8 @@ void zebra_vty_init(void) install_element(VRF_NODE, &vrf_vni_mapping_cmd); install_element(VRF_NODE, &no_vrf_vni_mapping_cmd); - + install_element(VIEW_NODE, &show_dataplane_cmd); + install_element(VIEW_NODE, &show_dataplane_providers_cmd); + install_element(CONFIG_NODE, &zebra_dplane_queue_limit_cmd); + install_element(CONFIG_NODE, &no_zebra_dplane_queue_limit_cmd); } diff --git a/zebra/zserv.h b/zebra/zserv.h index aaefd78eea..987c67635d 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -251,4 +251,7 @@ extern void zserv_close_client(struct zserv *client); extern void zserv_read_file(char *input); #endif +/* TODO */ +int zebra_finalize(struct thread *event); + #endif /* _ZEBRA_ZEBRA_H */