From fe2e3bae6a2207f97437c84f8a7525a7b31f38d6 Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Mon, 26 Apr 2021 18:42:12 -0400 Subject: [PATCH 1/3] Revert "bgpd: improve socket read performance" This reverts commit 97a16e648115919aab3784a6511807e35c20ee20. --- bgpd/bgp_io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c index c2d8cae580..fec96700da 100644 --- a/bgpd/bgp_io.c +++ b/bgpd/bgp_io.c @@ -462,10 +462,13 @@ done : { */ static uint16_t bgp_read(struct peer *peer, int *code_p) { + size_t readsize; // how many bytes we want to read ssize_t nbytes; // how many bytes we actually read uint16_t status = 0; + uint8_t ibw[peer->max_packet_size * BGP_READ_PACKET_MAX]; - nbytes = ringbuf_read(peer->ibuf_work, peer->fd); + readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw)); + nbytes = read(peer->fd, ibw, readsize); /* EAGAIN or EWOULDBLOCK; come back later */ if (nbytes < 0 && ERRNO_IO_RETRY(errno)) { @@ -493,6 +496,9 @@ static uint16_t bgp_read(struct peer *peer, int *code_p) *code_p = TCP_connection_closed; SET_FLAG(status, BGP_IO_FATAL_ERR); + } else { + assert(ringbuf_put(peer->ibuf_work, ibw, nbytes) + == (size_t)nbytes); } return status; From 6c55ee964e600cba385afabb8438b9d09eb509f2 Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Mon, 26 Apr 2021 18:42:19 -0400 Subject: [PATCH 2/3] Revert "lib: add ringbuf socket read function" This reverts commit d9d7af1a52d77ed0074ebb72f87678308296e74e. --- lib/ringbuf.c | 35 ----------------------------------- lib/ringbuf.h | 11 ----------- 2 files changed, 46 deletions(-) diff --git a/lib/ringbuf.c b/lib/ringbuf.c index 49221e7cb3..6efa8077c2 100644 --- a/lib/ringbuf.c +++ b/lib/ringbuf.c @@ -131,38 +131,3 @@ void ringbuf_wipe(struct ringbuf *buf) memset(buf->data, 0x00, buf->size); ringbuf_reset(buf); } - -ssize_t ringbuf_read(struct ringbuf *buf, int sock) -{ - size_t to_read = ringbuf_space(buf); - size_t bytes_to_end = buf->size - buf->end; - ssize_t bytes_read; - struct iovec iov[2] = {}; - - /* Calculate amount of read blocks. */ - if (to_read > bytes_to_end) { - iov[0].iov_base = buf->data + buf->end; - iov[0].iov_len = bytes_to_end; - iov[1].iov_base = buf->data; - iov[1].iov_len = to_read - bytes_to_end; - } else { - iov[0].iov_base = buf->data + buf->end; - iov[0].iov_len = to_read; - } - - /* Do the system call. */ - bytes_read = readv(sock, iov, 2); - if (bytes_read <= 0) - return bytes_read; - - /* Calculate the new end. */ - if ((size_t)bytes_read > bytes_to_end) - buf->end = bytes_read - bytes_to_end; - else - buf->end += bytes_read; - - /* Set emptiness state. */ - buf->empty = (buf->start == buf->end) && (buf->empty && !bytes_read); - - return bytes_read; -} diff --git a/lib/ringbuf.h b/lib/ringbuf.h index 209687512b..b8f4d9798d 100644 --- a/lib/ringbuf.h +++ b/lib/ringbuf.h @@ -126,17 +126,6 @@ void ringbuf_reset(struct ringbuf *buf); */ void ringbuf_wipe(struct ringbuf *buf); -/** - * Perform a socket/file `read()` in to the ring buffer. - * - * \param buf the ring buffer pointer. - * \param sock the file descriptor. - * \returns the number of bytes read, `0` on connection close or `-1` with - * `errno` pointing the error (see `readv()` man page for more - * information.) - */ -ssize_t ringbuf_read(struct ringbuf *buf, int sock); - #ifdef __cplusplus } #endif From 338f4a78cc0078e5f59780fe883084c26842157b Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Mon, 26 Apr 2021 18:59:48 -0400 Subject: [PATCH 3/3] bgpd: avoid allocating very large stack buffer As pointed out on code review of BGP extended messages, increasing the maximum BGP message size has the consequence of growing the dynamically sized stack buffer up to 650K. While unlikely to exceed modern stack sizes it is still unreasonably large. Remedy this with a heap buffer. Signed-off-by: Quentin Young --- bgpd/bgp_io.c | 8 ++++---- bgpd/bgpd.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c index fec96700da..99d0344c9f 100644 --- a/bgpd/bgp_io.c +++ b/bgpd/bgp_io.c @@ -465,10 +465,10 @@ static uint16_t bgp_read(struct peer *peer, int *code_p) size_t readsize; // how many bytes we want to read ssize_t nbytes; // how many bytes we actually read uint16_t status = 0; - uint8_t ibw[peer->max_packet_size * BGP_READ_PACKET_MAX]; - readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw)); - nbytes = read(peer->fd, ibw, readsize); + readsize = + MIN(ringbuf_space(peer->ibuf_work), sizeof(peer->ibuf_scratch)); + nbytes = read(peer->fd, peer->ibuf_scratch, readsize); /* EAGAIN or EWOULDBLOCK; come back later */ if (nbytes < 0 && ERRNO_IO_RETRY(errno)) { @@ -497,7 +497,7 @@ static uint16_t bgp_read(struct peer *peer, int *code_p) SET_FLAG(status, BGP_IO_FATAL_ERR); } else { - assert(ringbuf_put(peer->ibuf_work, ibw, nbytes) + assert(ringbuf_put(peer->ibuf_work, peer->ibuf_scratch, nbytes) == (size_t)nbytes); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 51134dc8c5..88588952ba 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -44,6 +44,7 @@ #include "bgp_addpath_types.h" #include "bgp_nexthop.h" #include "bgp_damp.h" +#include "bgp_io.h" #include "lib/bfd.h" @@ -1047,6 +1048,9 @@ struct peer { struct stream_fifo *ibuf; // packets waiting to be processed struct stream_fifo *obuf; // packets waiting to be written + /* used as a block to deposit raw wire data to */ + uint8_t ibuf_scratch[BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE + * BGP_READ_PACKET_MAX]; struct ringbuf *ibuf_work; // WiP buffer used by bgp_read() only struct stream *obuf_work; // WiP buffer used to construct packets