add frr zebra buffering patches

https://github.com/FRRouting/frr/pull/10482 This fix bugs for 2 proxmox users, when vlan-aware bridges exists with a lot of vlans, netlink message are too big. Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2025-04-28 16:27:54 +00:00 · 2022-04-27 17:04:55 +02:00 · 2022-04-27 17:04:55 +02:00 · 11f22179fb
commit 11f22179fb
parent d4ac04fd89
4 changed files with 799 additions and 1 deletions
--- a/debian/patches/frr/0001-zebra-buffering.patch
+++ b/debian/patches/frr/0001-zebra-buffering.patch
@ -0,0 +1,92 @@
 From 3670f5047cb00865c15b2f3ebdfcfe634443db61 Mon Sep 17 00:00:00 2001
 From: Donald Sharp <sharpd@nvidia.com>
 Date: Tue, 8 Feb 2022 09:47:24 -0500
 Subject: [PATCH] zebra: Store the sequence number to use as part of the
 dp_info
 Store and use the sequence number instead of using what is in
 the `struct nlsock`.  Future commits are going away from storing
 the `struct nlsock` and the copy of the nlsock was guaranteeing
 unique sequence numbers per message.  So let's store the
 sequence number to use instead.
 Signed-off-by: Donald Sharp <sharpd@nvidia.com>
 ---
 zebra/kernel_netlink.c | 14 +++++++-------
 zebra/zebra_dplane.h   |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)
 diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
 index e3b2f9cb665..59911945481 100644
 --- a/zebra/kernel_netlink.c
 +++ b/zebra/kernel_netlink.c
@@ -1038,7 +1038,7 @@ netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
 	const struct nlsock *nl;
 	nl = &(dp_info->nls);
 -	n->nlmsg_seq = nl->seq;
 +	n->nlmsg_seq = dp_info->seq;
 	n->nlmsg_pid = nl->snl.nl_pid;
 	if (IS_ZEBRA_DEBUG_KERNEL)
@@ -1172,8 +1172,8 @@ static int nl_batch_read_resp(struct nl_batch *bth)
 			 * 'update' context objects take two consecutive
 			 * sequence numbers.
 			 */
 -			if (dplane_ctx_is_update(ctx)
 -			    && dplane_ctx_get_ns(ctx)->nls.seq + 1 == seq) {
 +			if (dplane_ctx_is_update(ctx) &&
 +			    dplane_ctx_get_ns(ctx)->seq + 1 == seq) {
 				/*
 				 * This is the situation where we get a response
 				 * to a message that should be ignored.
@@ -1186,14 +1186,14 @@ static int nl_batch_read_resp(struct nl_batch *bth)
 			dplane_ctx_enqueue_tail(bth->ctx_out_q, ctx);
 			/* We have found corresponding context object. */
 -			if (dplane_ctx_get_ns(ctx)->nls.seq == seq)
 +			if (dplane_ctx_get_ns(ctx)->seq == seq)
 				break;
 -			if (dplane_ctx_get_ns(ctx)->nls.seq > seq)
 +			if (dplane_ctx_get_ns(ctx)->seq > seq)
 				zlog_warn(
 					"%s:WARNING Recieved %u is less than any context on the queue ctx->seq %u",
 					__func__, seq,
 -					dplane_ctx_get_ns(ctx)->nls.seq);
 +					dplane_ctx_get_ns(ctx)->seq);
 		}
 		if (ignore_msg) {
@@ -1360,7 +1360,7 @@ enum netlink_msg_status netlink_batch_add_msg(
 			return FRR_NETLINK_ERROR;
 	}
 -	seq = dplane_ctx_get_ns(ctx)->nls.seq;
 +	seq = dplane_ctx_get_ns(ctx)->seq;
 	if (ignore_res)
 		seq++;
 diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
 index 1d55181388e..69ea9c7fd95 100644
 --- a/zebra/zebra_dplane.h
 +++ b/zebra/zebra_dplane.h
@@ -42,6 +42,7 @@ struct zebra_dplane_info {
 #if defined(HAVE_NETLINK)
 	struct nlsock nls;
 +	int seq;
 	bool is_cmd;
 #endif
 };
@@ -57,8 +58,10 @@ zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info,
 	zns_info->is_cmd = is_cmd;
 	if (is_cmd) {
 		zns_info->nls = zns->netlink_cmd;
 +		zns_info->seq = zns->netlink_cmd.seq;
 	} else {
 		zns_info->nls = zns->netlink;
 +		zns_info->seq = zns->netlink.seq;
 	}
 #endif /* NETLINK */
 }
--- a/debian/patches/frr/0002-zebra-buffering.patch
+++ b/debian/patches/frr/0002-zebra-buffering.patch
@ -0,0 +1,420 @@
 From d4000d7ba3242c0e6adfa56544a34312b0f566a9 Mon Sep 17 00:00:00 2001
 From: Donald Sharp <sharpd@nvidia.com>
 Date: Wed, 2 Feb 2022 13:21:52 -0500
 Subject: [PATCH] zebra: Remove `struct nlsock` from dataplane information and
 use `int fd`
 Store the fd that corresponds to the appropriate `struct nlsock` and pass
 that around in the dplane context instead of the pointer to the nlsock.
 Modify the kernel_netlink.c code to store in a hash the `struct nlsock`
 with the socket fd as the key.
 Why do this?  The dataplane context is used to pass around the `struct nlsock`
 but the zebra code has a bug where the received buffer for kernel netlink
 messages from the kernel is not big enough.  So we need to dynamically
 grow the receive buffer per socket, instead of having a non-dynamic buffer
 that we read into.  By passing around the fd we can look up the `struct nlsock`
 that will soon have the associated buffer and not have to worry about `const`
 issues that will arise.
 Signed-off-by: Donald Sharp <sharpd@nvidia.com>
 ---
 zebra/kernel_netlink.c | 72 ++++++++++++++++++++++++++++++++++++------
 zebra/kernel_netlink.h |  1 +
 zebra/rt_netlink.c     | 13 ++++++--
 zebra/zebra_dplane.c   | 19 +++++++----
 zebra/zebra_dplane.h   |  9 +++---
 5 files changed, 91 insertions(+), 23 deletions(-)
 diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
 index 59911945481..3650d87e0fb 100644
 --- a/zebra/kernel_netlink.c
 +++ b/zebra/kernel_netlink.c
@@ -36,6 +36,7 @@
 #include "vrf.h"
 #include "mpls.h"
 #include "lib_errors.h"
 +#include "hash.h"
 //#include "zebra/zserv.h"
 #include "zebra/zebra_router.h"
@@ -160,6 +161,7 @@ extern struct zebra_privs_t zserv_privs;
 DEFINE_MTYPE_STATIC(ZEBRA, NL_BUF, "Zebra Netlink buffers");
 +struct hash *nlsock_hash;
 size_t nl_batch_tx_bufsize;
 char *nl_batch_tx_buf;
@@ -429,8 +431,10 @@ static int kernel_read(struct thread *thread)
  */
 int kernel_dplane_read(struct zebra_dplane_info *info)
 {
 -	netlink_parse_info(dplane_netlink_information_fetch, &info->nls, info,
 -			   5, false);
 +	struct nlsock *nl = kernel_netlink_nlsock_lookup(info->sock);
 +
 +	netlink_parse_info(dplane_netlink_information_fetch, nl, info, 5,
 +			   false);
 	return 0;
 }
@@ -1035,9 +1039,9 @@ netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
 		  struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
 		  bool startup)
 {
 -	const struct nlsock *nl;
 +	struct nlsock *nl;
 -	nl = &(dp_info->nls);
 +	nl = kernel_netlink_nlsock_lookup(dp_info->sock);
 	n->nlmsg_seq = dp_info->seq;
 	n->nlmsg_pid = nl->snl.nl_pid;
@@ -1109,11 +1113,11 @@ static int nl_batch_read_resp(struct nl_batch *bth)
 	struct sockaddr_nl snl;
 	struct msghdr msg = {};
 	int status, seq;
 -	const struct nlsock *nl;
 +	struct nlsock *nl;
 	struct zebra_dplane_ctx *ctx;
 	bool ignore_msg;
 -	nl = &(bth->zns->nls);
 +	nl = kernel_netlink_nlsock_lookup(bth->zns->sock);
 	msg.msg_name = (void *)&snl;
 	msg.msg_namelen = sizeof(snl);
@@ -1295,13 +1299,15 @@ static void nl_batch_send(struct nl_batch *bth)
 	bool err = false;
 	if (bth->curlen != 0 && bth->zns != NULL) {
 +		struct nlsock *nl =
 +			kernel_netlink_nlsock_lookup(bth->zns->sock);
 +
 		if (IS_ZEBRA_DEBUG_KERNEL)
 			zlog_debug("%s: %s, batch size=%zu, msg cnt=%zu",
 -				   __func__, bth->zns->nls.name, bth->curlen,
 +				   __func__, nl->name, bth->curlen,
 				   bth->msgcnt);
 -		if (netlink_send_msg(&(bth->zns->nls), bth->buf, bth->curlen)
 -		    == -1)
 +		if (netlink_send_msg(nl, bth->buf, bth->curlen) == -1)
 			err = true;
 		if (!err) {
@@ -1334,6 +1340,7 @@ enum netlink_msg_status netlink_batch_add_msg(
 	int seq;
 	ssize_t size;
 	struct nlmsghdr *msgh;
 +	struct nlsock *nl;
 	size = (*msg_encoder)(ctx, bth->buf_head, bth->bufsiz - bth->curlen);
@@ -1361,12 +1368,14 @@ enum netlink_msg_status netlink_batch_add_msg(
 	}
 	seq = dplane_ctx_get_ns(ctx)->seq;
 +	nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
 +
 	if (ignore_res)
 		seq++;
 	msgh = (struct nlmsghdr *)bth->buf_head;
 	msgh->nlmsg_seq = seq;
 -	msgh->nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
 +	msgh->nlmsg_pid = nl->snl.nl_pid;
 	bth->zns = dplane_ctx_get_ns(ctx);
 	bth->buf_head = ((char *)bth->buf_head) + size;
@@ -1496,6 +1505,33 @@ void kernel_update_multi(struct dplane_ctx_q *ctx_list)
 	dplane_ctx_list_append(ctx_list, &handled_list);
 }
 +struct nlsock *kernel_netlink_nlsock_lookup(int sock)
 +{
 +	struct nlsock lookup;
 +
 +	lookup.sock = sock;
 +
 +	return hash_lookup(nlsock_hash, &lookup);
 +}
 +
 +static uint32_t kernel_netlink_nlsock_key(const void *arg)
 +{
 +	const struct nlsock *nl = arg;
 +
 +	return nl->sock;
 +}
 +
 +static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2)
 +{
 +	const struct nlsock *nl1 = arg1;
 +	const struct nlsock *nl2 = arg2;
 +
 +	if (nl1->sock == nl2->sock)
 +		return true;
 +
 +	return false;
 +}
 +
 /* Exported interface function.  This function simply calls
    netlink_socket (). */
 void kernel_init(struct zebra_ns *zns)
@@ -1505,6 +1541,11 @@ void kernel_init(struct zebra_ns *zns)
 	int one, ret;
 #endif
 +	if (!nlsock_hash)
 +		nlsock_hash = hash_create_size(8, kernel_netlink_nlsock_key,
 +					       kernel_netlink_nlsock_hash_equal,
 +					       "Netlink Socket Hash");
 +
 	/*
 	 * Initialize netlink sockets
 	 *
@@ -1537,6 +1578,7 @@ void kernel_init(struct zebra_ns *zns)
 			 zns->netlink.name);
 		exit(-1);
 	}
 +	(void)hash_get(nlsock_hash, &zns->netlink, hash_alloc_intern);
 	snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name),
 		 "netlink-cmd (NS %u)", zns->ns_id);
@@ -1546,6 +1588,7 @@ void kernel_init(struct zebra_ns *zns)
 			 zns->netlink_cmd.name);
 		exit(-1);
 	}
 +	(void)hash_get(nlsock_hash, &zns->netlink_cmd, hash_alloc_intern);
 	/* Outbound socket for dplane programming of the host OS. */
 	snprintf(zns->netlink_dplane_out.name,
@@ -1557,6 +1600,8 @@ void kernel_init(struct zebra_ns *zns)
 			 zns->netlink_dplane_out.name);
 		exit(-1);
 	}
 +	(void)hash_get(nlsock_hash, &zns->netlink_dplane_out,
 +		       hash_alloc_intern);
 	/* Inbound socket for OS events coming to the dplane. */
 	snprintf(zns->netlink_dplane_in.name,
@@ -1569,6 +1614,7 @@ void kernel_init(struct zebra_ns *zns)
 			 zns->netlink_dplane_in.name);
 		exit(-1);
 	}
 +	(void)hash_get(nlsock_hash, &zns->netlink_dplane_in, hash_alloc_intern);
 	/*
 	 * SOL_NETLINK is not available on all platforms yet
@@ -1659,16 +1705,19 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
 	thread_cancel(&zns->t_netlink);
 	if (zns->netlink.sock >= 0) {
 +		hash_release(nlsock_hash, &zns->netlink);
 		close(zns->netlink.sock);
 		zns->netlink.sock = -1;
 	}
 	if (zns->netlink_cmd.sock >= 0) {
 +		hash_release(nlsock_hash, &zns->netlink_cmd);
 		close(zns->netlink_cmd.sock);
 		zns->netlink_cmd.sock = -1;
 	}
 	if (zns->netlink_dplane_in.sock >= 0) {
 +		hash_release(nlsock_hash, &zns->netlink_dplane_in);
 		close(zns->netlink_dplane_in.sock);
 		zns->netlink_dplane_in.sock = -1;
 	}
@@ -1678,9 +1727,12 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
 	 */
 	if (complete) {
 		if (zns->netlink_dplane_out.sock >= 0) {
 +			hash_release(nlsock_hash, &zns->netlink_dplane_out);
 			close(zns->netlink_dplane_out.sock);
 			zns->netlink_dplane_out.sock = -1;
 		}
 +
 +		hash_free(nlsock_hash);
 	}
 }
 #endif /* HAVE_NETLINK */
 diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
 index cf8b8c785e6..ae88f3372b1 100644
 --- a/zebra/kernel_netlink.h
 +++ b/zebra/kernel_netlink.h
@@ -146,6 +146,7 @@ extern int netlink_config_write_helper(struct vty *vty);
 extern void netlink_set_batch_buffer_size(uint32_t size, uint32_t threshold,
 					  bool set);
 +extern struct nlsock *kernel_netlink_nlsock_lookup(int sock);
 #endif /* HAVE_NETLINK */
 #ifdef __cplusplus
 diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
 index 9ead1a768e7..c6423dce99c 100644
 --- a/zebra/rt_netlink.c
 +++ b/zebra/rt_netlink.c
@@ -1898,6 +1898,7 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
 	union g_addr src;
 	const struct prefix *p, *src_p;
 	uint32_t table_id;
 +	struct nlsock *nl;
 	struct {
 		struct nlmsghdr n;
@@ -1911,6 +1912,8 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
 	if (datalen < sizeof(*req))
 		return 0;
 +	nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
 +
 	memset(req, 0, sizeof(*req));
 	bytelen = (p->family == AF_INET ? 4 : 16);
@@ -1924,7 +1927,7 @@ ssize_t netlink_route_multipath_msg_encode(int cmd,
 	req->n.nlmsg_type = cmd;
 -	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
 +	req->n.nlmsg_pid = nl->snl.nl_pid;
 	req->r.rtm_family = p->family;
 	req->r.rtm_dst_len = p->prefixlen;
@@ -2360,6 +2363,8 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
 	int type = dplane_ctx_get_nhe_type(ctx);
 	struct rtattr *nest;
 	uint16_t encap;
 +	struct nlsock *nl =
 +		kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
 	if (!id) {
 		flog_err(
@@ -2402,7 +2407,7 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
 		req->n.nlmsg_flags |= NLM_F_REPLACE;
 	req->n.nlmsg_type = cmd;
 -	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
 +	req->n.nlmsg_pid = nl->snl.nl_pid;
 	req->nhm.nh_family = AF_UNSPEC;
 	/* TODO: Scope? */
@@ -4283,6 +4288,8 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
 	const char *routedesc;
 	int route_type;
 	struct prefix p = {0};
 +	struct nlsock *nl =
 +		kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
 	struct {
 		struct nlmsghdr n;
@@ -4325,7 +4332,7 @@ ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
 	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 	req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
 	req->n.nlmsg_type = cmd;
 -	req->n.nlmsg_pid = dplane_ctx_get_ns(ctx)->nls.snl.nl_pid;
 +	req->n.nlmsg_pid = nl->snl.nl_pid;
 	req->r.rtm_family = AF_MPLS;
 	req->r.rtm_table = RT_TABLE_MAIN;
 diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
 index 656ebcf3b7f..05297e143b5 100644
 --- a/zebra/zebra_dplane.c
 +++ b/zebra/zebra_dplane.c
@@ -1465,6 +1465,13 @@ const struct zebra_dplane_info *dplane_ctx_get_ns(
 	return &(ctx->zd_ns_info);
 }
 +int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
 +{
 +	DPLANE_CTX_VALID(ctx);
 +
 +	return ctx->zd_ns_info.sock;
 +}
 +
 /* Accessors for nexthop information */
 uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx)
 {
@@ -4844,7 +4851,7 @@ static void dplane_info_from_zns(struct zebra_dplane_info *ns_info,
 #if defined(HAVE_NETLINK)
 	ns_info->is_cmd = true;
 -	ns_info->nls = zns->netlink_dplane_out;
 +	ns_info->sock = zns->netlink_dplane_out.sock;
 #endif /* NETLINK */
 }
@@ -4861,7 +4868,7 @@ static int dplane_incoming_read(struct thread *event)
 	/* Re-start read task */
 	thread_add_read(zdplane_info.dg_master, dplane_incoming_read, zi,
 -			zi->info.nls.sock, &zi->t_read);
 +			zi->info.sock, &zi->t_read);
 	return 0;
 }
@@ -4906,13 +4913,13 @@ void zebra_dplane_ns_enable(struct zebra_ns *zns, bool enabled)
 		/* Make sure we're up-to-date with the zns object */
 #if defined(HAVE_NETLINK)
 		zi->info.is_cmd = false;
 -		zi->info.nls = zns->netlink_dplane_in;
 +		zi->info.sock = zns->netlink_dplane_in.sock;
 		/* Start read task for the dplane pthread. */
 		if (zdplane_info.dg_master)
 			thread_add_read(zdplane_info.dg_master,
 -					dplane_incoming_read, zi,
 -					zi->info.nls.sock, &zi->t_read);
 +					dplane_incoming_read, zi, zi->info.sock,
 +					&zi->t_read);
 #endif
 	} else if (zi) {
 		if (IS_ZEBRA_DEBUG_DPLANE)
@@ -5935,7 +5942,7 @@ void zebra_dplane_start(void)
 	frr_each (zns_info_list, &zdplane_info.dg_zns_list, zi) {
 #if defined(HAVE_NETLINK)
 		thread_add_read(zdplane_info.dg_master, dplane_incoming_read,
 -				zi, zi->info.nls.sock, &zi->t_read);
 +				zi, zi->info.sock, &zi->t_read);
 #endif
 	}
 diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h
 index 69ea9c7fd95..a7a5f99e451 100644
 --- a/zebra/zebra_dplane.h
 +++ b/zebra/zebra_dplane.h
@@ -41,7 +41,7 @@ struct zebra_dplane_info {
 	ns_id_t ns_id;
 #if defined(HAVE_NETLINK)
 -	struct nlsock nls;
 +	int sock;
 	int seq;
 	bool is_cmd;
 #endif
@@ -57,10 +57,10 @@ zebra_dplane_info_from_zns(struct zebra_dplane_info *zns_info,
 #if defined(HAVE_NETLINK)
 	zns_info->is_cmd = is_cmd;
 	if (is_cmd) {
 -		zns_info->nls = zns->netlink_cmd;
 +		zns_info->sock = zns->netlink_cmd.sock;
 		zns_info->seq = zns->netlink_cmd.seq;
 	} else {
 -		zns_info->nls = zns->netlink;
 +		zns_info->sock = zns->netlink.sock;
 		zns_info->seq = zns->netlink.seq;
 	}
 #endif /* NETLINK */
@@ -564,9 +564,10 @@ dplane_ctx_gre_get_mtu(const struct zebra_dplane_ctx *ctx);
 const struct zebra_l2info_gre *
 dplane_ctx_gre_get_info(const struct zebra_dplane_ctx *ctx);
 -/* Namespace info - esp. for netlink communication */
 +/* Namespace fd info - esp. for netlink communication */
 const struct zebra_dplane_info *dplane_ctx_get_ns(
 	const struct zebra_dplane_ctx *ctx);
 +int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx);
 /* Indicates zebra shutdown/exit is in progress. Some operations may be
  * simplified or skipped during shutdown processing.
--- a/debian/patches/frr/0003-zebra-buffering.patch
+++ b/debian/patches/frr/0003-zebra-buffering.patch
@ -0,0 +1,283 @@
 From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
 From: Donald Sharp <sharpd@nvidia.com>
 Date: Wed, 2 Feb 2022 13:28:42 -0500
 Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
 Currently when the kernel sends netlink messages to FRR
 the buffers to receive this data is of fixed length.
 The kernel, with certain configurations, will send
 netlink messages that are larger than this fixed length.
 This leads to situations where, on startup, zebra gets
 really confused about the state of the kernel.  Effectively
 the current algorithm is this:
 read up to buffer in size
 while (data to parse)
     get netlink message header, look at size
        parse if you can
 The problem is that there is a 32k buffer we read.
 We get the first message that is say 1k in size,
 subtract that 1k to 31k left to parse.  We then
 get the next header and notice that the length
 of the message is 33k.  Which is obviously larger
 than what we read in.  FRR has no recover mechanism
 nor is there a way to know, a priori, what the maximum
 size the kernel will send us.
 Modify FRR to look at the kernel message and see if the
 buffer is large enough, if not, make it large enough to
 read in the message.
 This code has to be per netlink socket because of the usage
 of pthreads.  So add to `struct nlsock` the buffer and current
 buffer length.  Growing it as necessary.
 Fixes: #10404
 Signed-off-by: Donald Sharp <sharpd@nvidia.com>
 ---
 zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
 zebra/kernel_netlink.h |  2 +-
 zebra/zebra_dplane.c   |  4 +++
 zebra/zebra_ns.h       |  3 ++
 4 files changed, 49 insertions(+), 28 deletions(-)
 diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
 index 3650d87e0fb..d0c86a6bb0e 100644
 --- a/zebra/kernel_netlink.c
 +++ b/zebra/kernel_netlink.c
@@ -90,8 +90,6 @@
  */
 #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
 -#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
 -
 static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
 					   {RTM_DELROUTE, "RTM_DELROUTE"},
 					   {RTM_GETROUTE, "RTM_GETROUTE"},
@@ -165,8 +163,6 @@ struct hash *nlsock_hash;
 size_t nl_batch_tx_bufsize;
 char *nl_batch_tx_buf;
 -char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
 -
 _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
 _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
@@ -320,6 +316,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
 	nl->snl = snl;
 	nl->sock = sock;
 +	nl->buflen = NL_RCV_PKT_BUF_SIZE;
 +	nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
 +
 	return ret;
 }
@@ -785,19 +784,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
  *
  * Returns -1 on error, 0 if read would block or the number of bytes received.
  */
 -static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
 -			    void *buf, size_t buflen)
 +static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
 {
 	struct iovec iov;
 	int status;
 -	iov.iov_base = buf;
 -	iov.iov_len = buflen;
 -	msg.msg_iov = &iov;
 -	msg.msg_iovlen = 1;
 +	iov.iov_base = nl->buf;
 +	iov.iov_len = nl->buflen;
 +	msg->msg_iov = &iov;
 +	msg->msg_iovlen = 1;
 	do {
 -		status = recvmsg(nl->sock, &msg, 0);
 +		int bytes;
 +
 +		bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
 +
 +		if (bytes >= 0 && (size_t)bytes > nl->buflen) {
 +			nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
 +			nl->buflen = bytes;
 +			iov.iov_base = nl->buf;
 +			iov.iov_len = nl->buflen;
 +		}
 +
 +		status = recvmsg(nl->sock, msg, 0);
 	} while (status == -1 && errno == EINTR);
 	if (status == -1) {
@@ -817,19 +826,19 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
 		return -1;
 	}
 -	if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
 +	if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
 		flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
 			 "%s sender address length error: length %d", nl->name,
 -			 msg.msg_namelen);
 +			 msg->msg_namelen);
 		return -1;
 	}
 	if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
 		zlog_debug("%s: << netlink message dump [recv]", __func__);
 #ifdef NETLINK_DEBUG
 -		nl_dump(buf, status);
 +		nl_dump(nl->buf, status);
 #else
 -		zlog_hexdump(buf, status);
 +		zlog_hexdump(nl->buf, status);
 #endif /* NETLINK_DEBUG */
 	}
@@ -932,8 +941,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
  *            the filter.
  */
 int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
 -		       const struct nlsock *nl,
 -		       const struct zebra_dplane_info *zns,
 +		       struct nlsock *nl, const struct zebra_dplane_info *zns,
 		       int count, bool startup)
 {
 	int status;
@@ -942,7 +950,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
 	int read_in = 0;
 	while (1) {
 -		char buf[NL_RCV_PKT_BUF_SIZE];
 		struct sockaddr_nl snl;
 		struct msghdr msg = {.msg_name = (void *)&snl,
 				     .msg_namelen = sizeof(snl)};
@@ -951,14 +958,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
 		if (count && read_in >= count)
 			return 0;
 -		status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
 +		status = netlink_recv_msg(nl, &msg);
 		if (status == -1)
 			return -1;
 		else if (status == 0)
 			break;
 		read_in++;
 -		for (h = (struct nlmsghdr *)buf;
 +		for (h = (struct nlmsghdr *)nl->buf;
 		     (status >= 0 && NLMSG_OK(h, (unsigned int)status));
 		     h = NLMSG_NEXT(h, status)) {
 			/* Finish of reading. */
@@ -1034,10 +1041,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
  * startup  -> Are we reading in under startup conditions
  *             This is passed through eventually to filter.
  */
 -static int
 -netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
 -		  struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
 -		  bool startup)
 +static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
 +					   int startup),
 +			     struct nlmsghdr *n,
 +			     struct zebra_dplane_info *dp_info, bool startup)
 {
 	struct nlsock *nl;
@@ -1127,8 +1134,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
 	 * message at a time.
 	 */
 	while (true) {
 -		status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
 -					  sizeof(nl_batch_rx_buf));
 +		status = netlink_recv_msg(nl, &msg);
 		/*
 		 * status == -1 is a full on failure somewhere
 		 * since we don't know where the problem happened
@@ -1149,7 +1155,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
 			return status;
 		}
 -		h = (struct nlmsghdr *)nl_batch_rx_buf;
 +		h = (struct nlmsghdr *)nl->buf;
 		ignore_msg = false;
 		seq = h->nlmsg_seq;
 		/*
@@ -1708,18 +1714,24 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
 		hash_release(nlsock_hash, &zns->netlink);
 		close(zns->netlink.sock);
 		zns->netlink.sock = -1;
 +		XFREE(MTYPE_NL_BUF, zns->netlink.buf);
 +		zns->netlink.buflen = 0;
 	}
 	if (zns->netlink_cmd.sock >= 0) {
 		hash_release(nlsock_hash, &zns->netlink_cmd);
 		close(zns->netlink_cmd.sock);
 		zns->netlink_cmd.sock = -1;
 +		XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
 +		zns->netlink_cmd.buflen = 0;
 	}
 	if (zns->netlink_dplane_in.sock >= 0) {
 		hash_release(nlsock_hash, &zns->netlink_dplane_in);
 		close(zns->netlink_dplane_in.sock);
 		zns->netlink_dplane_in.sock = -1;
 +		XFREE(MTYPE_NL_BUF, zns->netlink_dplane_in.buf);
 +		zns->netlink_dplane_in.buflen = 0;
 	}
 	/* During zebra shutdown, we need to leave the dataplane socket
@@ -1730,6 +1742,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
 			hash_release(nlsock_hash, &zns->netlink_dplane_out);
 			close(zns->netlink_dplane_out.sock);
 			zns->netlink_dplane_out.sock = -1;
 +			XFREE(MTYPE_NL_BUF, zns->netlink_dplane_out.buf);
 +			zns->netlink_dplane_out.buflen = 0;
 		}
 		hash_free(nlsock_hash);
 diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
 index ae88f3372b1..9421ea1c611 100644
 --- a/zebra/kernel_netlink.h
 +++ b/zebra/kernel_netlink.h
@@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
 extern const char *nl_rttype_to_str(uint8_t rttype);
 extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
 -			      const struct nlsock *nl,
 +			      struct nlsock *nl,
 			      const struct zebra_dplane_info *dp_info,
 			      int count, bool startup);
 extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
 diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
 index 05297e143b5..4d32e54d1fb 100644
 --- a/zebra/zebra_dplane.c
 +++ b/zebra/zebra_dplane.c
@@ -1469,7 +1469,11 @@ int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
 {
 	DPLANE_CTX_VALID(ctx);
 +#ifdef HAVE_NETLINK
 	return ctx->zd_ns_info.sock;
 +#else
 +	return -1;
 +#endif
 }
 /* Accessors for nexthop information */
 diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
 index 0519e1d5b33..7a0ffbc1ee6 100644
 --- a/zebra/zebra_ns.h
 +++ b/zebra/zebra_ns.h
@@ -39,6 +39,9 @@ struct nlsock {
 	int seq;
 	struct sockaddr_nl snl;
 	char name[64];
 +
 +	uint8_t *buf;
 +	size_t buflen;
 };
 #endif
--- a/debian/patches/series
+++ b/debian/patches/series
@ -1,3 +1,6 @@
 pve/0001-enable-bgp-daemon.patch
 pve/0002-bgpd-add-an-option-for-RT-auto-derivation-to-force-A.patch
-upstream/7573cb86a259d3c9ef6eae9dd5d529f8080922cd.patch
+upstream/7573cb86a259d3c9ef6eae9dd5d529f8080922cd.patch
 frr/0001-zebra-buffering.patch
 frr/0002-zebra-buffering.patch
 frr/0003-zebra-buffering.patch