Merge pull request #5150 from qlyoung/bgp-vector-io-4

BGP vector I/O - Redux
This commit is contained in:
Mark Stapp 2019-10-17 10:57:47 -04:00 committed by GitHub
commit 10e75ceb6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 126 additions and 74 deletions

View File

@ -22,6 +22,7 @@
/* clang-format off */ /* clang-format off */
#include <zebra.h> #include <zebra.h>
#include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
#include <sys/uio.h> // for writev
#include "frr_pthread.h" #include "frr_pthread.h"
#include "linklist.h" // for list_delete, list_delete_all_node, lis... #include "linklist.h" // for list_delete, list_delete_all_node, lis...
@ -275,21 +276,45 @@ static uint16_t bgp_write(struct peer *peer)
{ {
uint8_t type; uint8_t type;
struct stream *s; struct stream *s;
int num;
int update_last_write = 0; int update_last_write = 0;
unsigned int count = 0; unsigned int count;
uint32_t uo = 0; uint32_t uo = 0;
uint16_t status = 0; uint16_t status = 0;
uint32_t wpkt_quanta_old; uint32_t wpkt_quanta_old;
int writenum = 0;
int num;
unsigned int iovsz;
unsigned int strmsz;
unsigned int total_written;
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta, wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
memory_order_relaxed); memory_order_relaxed);
struct stream *ostreams[wpkt_quanta_old];
struct stream **streams = ostreams;
struct iovec iov[wpkt_quanta_old];
s = stream_fifo_head(peer->obuf);
if (!s)
goto done;
count = iovsz = 0;
while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) {
ostreams[iovsz] = s;
iov[iovsz].iov_base = stream_pnt(s);
iov[iovsz].iov_len = STREAM_READABLE(s);
writenum += STREAM_READABLE(s);
s = s->next;
++iovsz;
++count;
}
strmsz = iovsz;
total_written = 0;
while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
int writenum;
do { do {
writenum = stream_get_endp(s) - stream_get_getp(s); num = writev(peer->fd, iov, iovsz);
num = write(peer->fd, stream_pnt(s), writenum);
if (num < 0) { if (num < 0) {
if (!ERRNO_IO_RETRY(errno)) { if (!ERRNO_IO_RETRY(errno)) {
@ -299,12 +324,49 @@ static uint16_t bgp_write(struct peer *peer)
SET_FLAG(status, BGP_IO_TRANS_ERR); SET_FLAG(status, BGP_IO_TRANS_ERR);
} }
goto done; break;
} else if (num != writenum) } else if (num != writenum) {
stream_forward_getp(s, num); unsigned int msg_written = 0;
unsigned int ic = iovsz;
for (unsigned int i = 0; i < ic; i++) {
size_t ss = iov[i].iov_len;
if (ss > (unsigned int) num)
break;
msg_written++;
iovsz--;
writenum -= ss;
num -= ss;
}
total_written += msg_written;
assert(total_written < count);
memmove(&iov, &iov[msg_written],
sizeof(iov[0]) * iovsz);
streams = &streams[msg_written];
stream_forward_getp(streams[0], num);
iov[0].iov_base = stream_pnt(streams[0]);
iov[0].iov_len = STREAM_READABLE(streams[0]);
writenum -= num;
num = 0;
assert(writenum > 0);
} else {
total_written = strmsz;
}
} while (num != writenum); } while (num != writenum);
/* Handle statistics */
for (unsigned int i = 0; i < total_written; i++) {
s = stream_fifo_pop(peer->obuf);
assert(s == ostreams[i]);
/* Retrieve BGP packet type. */ /* Retrieve BGP packet type. */
stream_set_getp(s, BGP_MARKER_SIZE + 2); stream_set_getp(s, BGP_MARKER_SIZE + 2);
type = stream_getc(s); type = stream_getc(s);
@ -351,9 +413,8 @@ static uint16_t bgp_write(struct peer *peer)
break; break;
} }
count++; stream_free(s);
ostreams[i] = NULL;
stream_free(stream_fifo_pop(peer->obuf));
update_last_write = 1; update_last_write = 1;
} }

View File

@ -22,7 +22,7 @@
#ifndef _FRR_BGP_IO_H #ifndef _FRR_BGP_IO_H
#define _FRR_BGP_IO_H #define _FRR_BGP_IO_H
#define BGP_WRITE_PACKET_MAX 10U #define BGP_WRITE_PACKET_MAX 64U
#define BGP_READ_PACKET_MAX 10U #define BGP_READ_PACKET_MAX 10U
#include "bgpd/bgpd.h" #include "bgpd/bgpd.h"

View File

@ -1586,36 +1586,24 @@ DEFUN (no_bgp_update_delay,
} }
static int bgp_wpkt_quanta_config_vty(struct vty *vty, const char *num, static int bgp_wpkt_quanta_config_vty(struct vty *vty, uint32_t quanta,
char set) bool set)
{ {
VTY_DECLVAR_CONTEXT(bgp, bgp); VTY_DECLVAR_CONTEXT(bgp, bgp);
if (set) { quanta = set ? quanta : BGP_WRITE_PACKET_MAX;
uint32_t quanta = strtoul(num, NULL, 10); atomic_store_explicit(&bgp->wpkt_quanta, quanta, memory_order_relaxed);
atomic_store_explicit(&bgp->wpkt_quanta, quanta,
memory_order_relaxed);
} else {
atomic_store_explicit(&bgp->wpkt_quanta, BGP_WRITE_PACKET_MAX,
memory_order_relaxed);
}
return CMD_SUCCESS; return CMD_SUCCESS;
} }
static int bgp_rpkt_quanta_config_vty(struct vty *vty, const char *num, static int bgp_rpkt_quanta_config_vty(struct vty *vty, uint32_t quanta,
char set) bool set)
{ {
VTY_DECLVAR_CONTEXT(bgp, bgp); VTY_DECLVAR_CONTEXT(bgp, bgp);
if (set) { quanta = set ? quanta : BGP_READ_PACKET_MAX;
uint32_t quanta = strtoul(num, NULL, 10); atomic_store_explicit(&bgp->rpkt_quanta, quanta, memory_order_relaxed);
atomic_store_explicit(&bgp->rpkt_quanta, quanta,
memory_order_relaxed);
} else {
atomic_store_explicit(&bgp->rpkt_quanta, BGP_READ_PACKET_MAX,
memory_order_relaxed);
}
return CMD_SUCCESS; return CMD_SUCCESS;
} }
@ -1636,47 +1624,32 @@ void bgp_config_write_rpkt_quanta(struct vty *vty, struct bgp *bgp)
vty_out(vty, " read-quanta %d\n", quanta); vty_out(vty, " read-quanta %d\n", quanta);
} }
/* Packet quanta configuration */ /* Packet quanta configuration
DEFUN (bgp_wpkt_quanta, *
* XXX: The value set here controls the size of a stack buffer in the IO
* thread. When changing these limits be careful to prevent stack overflow.
*
* Furthermore, the maximums used here should correspond to
* BGP_WRITE_PACKET_MAX and BGP_READ_PACKET_MAX.
*/
DEFPY (bgp_wpkt_quanta,
bgp_wpkt_quanta_cmd, bgp_wpkt_quanta_cmd,
"write-quanta (1-10)", "[no] write-quanta (1-64)$quanta",
NO_STR
"How many packets to write to peer socket per run\n" "How many packets to write to peer socket per run\n"
"Number of packets\n") "Number of packets\n")
{ {
int idx_number = 1; return bgp_wpkt_quanta_config_vty(vty, quanta, !no);
return bgp_wpkt_quanta_config_vty(vty, argv[idx_number]->arg, 1);
} }
DEFUN (no_bgp_wpkt_quanta, DEFPY (bgp_rpkt_quanta,
no_bgp_wpkt_quanta_cmd,
"no write-quanta (1-10)",
NO_STR
"How many packets to write to peer socket per I/O cycle\n"
"Number of packets\n")
{
int idx_number = 2;
return bgp_wpkt_quanta_config_vty(vty, argv[idx_number]->arg, 0);
}
DEFUN (bgp_rpkt_quanta,
bgp_rpkt_quanta_cmd, bgp_rpkt_quanta_cmd,
"read-quanta (1-10)", "[no] read-quanta (1-10)$quanta",
"How many packets to read from peer socket per I/O cycle\n"
"Number of packets\n")
{
int idx_number = 1;
return bgp_rpkt_quanta_config_vty(vty, argv[idx_number]->arg, 1);
}
DEFUN (no_bgp_rpkt_quanta,
no_bgp_rpkt_quanta_cmd,
"no read-quanta (1-10)",
NO_STR NO_STR
"How many packets to read from peer socket per I/O cycle\n" "How many packets to read from peer socket per I/O cycle\n"
"Number of packets\n") "Number of packets\n")
{ {
int idx_number = 2; return bgp_rpkt_quanta_config_vty(vty, quanta, !no);
return bgp_rpkt_quanta_config_vty(vty, argv[idx_number]->arg, 0);
} }
void bgp_config_write_coalesce_time(struct vty *vty, struct bgp *bgp) void bgp_config_write_coalesce_time(struct vty *vty, struct bgp *bgp)
@ -13072,9 +13045,7 @@ void bgp_vty_init(void)
install_element(BGP_NODE, &bgp_update_delay_establish_wait_cmd); install_element(BGP_NODE, &bgp_update_delay_establish_wait_cmd);
install_element(BGP_NODE, &bgp_wpkt_quanta_cmd); install_element(BGP_NODE, &bgp_wpkt_quanta_cmd);
install_element(BGP_NODE, &no_bgp_wpkt_quanta_cmd);
install_element(BGP_NODE, &bgp_rpkt_quanta_cmd); install_element(BGP_NODE, &bgp_rpkt_quanta_cmd);
install_element(BGP_NODE, &no_bgp_rpkt_quanta_cmd);
install_element(BGP_NODE, &bgp_coalesce_time_cmd); install_element(BGP_NODE, &bgp_coalesce_time_cmd);
install_element(BGP_NODE, &no_bgp_coalesce_time_cmd); install_element(BGP_NODE, &no_bgp_coalesce_time_cmd);

View File

@ -2167,6 +2167,8 @@ Dumping Messages and Routing Tables
Other BGP Commands Other BGP Commands
------------------ ------------------
The following are available in the top level *enable* mode:
.. index:: clear bgp \* .. index:: clear bgp \*
.. clicmd:: clear bgp \* .. clicmd:: clear bgp \*
@ -2202,6 +2204,24 @@ Other BGP Commands
Clear peer using soft reconfiguration in this address-family and sub-address-family. Clear peer using soft reconfiguration in this address-family and sub-address-family.
The following are available in the ``router bgp`` mode:
.. index:: write-quanta (1-64)
.. clicmd:: write-quanta (1-64)
BGP message Tx I/O is vectored. This means that multiple packets are written
to the peer socket at the same time each I/O cycle, in order to minimize
system call overhead. This value controls how many are written at a time.
Under certain load conditions, reducing this value could make peer traffic
less 'bursty'. In practice, leave this settings on the default (64) unless
you truly know what you are doing.
.. index:: read-quanta (1-10)
.. index:: read-quanta (1-10)
Unlike Tx, BGP Rx traffic is not vectored. Packets are read off the wire one
at a time in a loop. This setting controls how many iterations the loop runs
for. As with write-quanta, it is best to leave this setting on the default.
.. _bgp-displaying-bgp-information: .. _bgp-displaying-bgp-information: