mirror of
https://git.proxmox.com/git/mirror_frr
synced 2025-08-02 23:38:55 +00:00
Merge pull request #5150 from qlyoung/bgp-vector-io-4
BGP vector I/O - Redux
This commit is contained in:
commit
10e75ceb6a
103
bgpd/bgp_io.c
103
bgpd/bgp_io.c
@ -22,6 +22,7 @@
|
||||
/* clang-format off */
|
||||
#include <zebra.h>
|
||||
#include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
|
||||
#include <sys/uio.h> // for writev
|
||||
|
||||
#include "frr_pthread.h"
|
||||
#include "linklist.h" // for list_delete, list_delete_all_node, lis...
|
||||
@ -275,35 +276,96 @@ static uint16_t bgp_write(struct peer *peer)
|
||||
{
|
||||
uint8_t type;
|
||||
struct stream *s;
|
||||
int num;
|
||||
int update_last_write = 0;
|
||||
unsigned int count = 0;
|
||||
unsigned int count;
|
||||
uint32_t uo = 0;
|
||||
uint16_t status = 0;
|
||||
uint32_t wpkt_quanta_old;
|
||||
|
||||
int writenum = 0;
|
||||
int num;
|
||||
unsigned int iovsz;
|
||||
unsigned int strmsz;
|
||||
unsigned int total_written;
|
||||
|
||||
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
|
||||
memory_order_relaxed);
|
||||
struct stream *ostreams[wpkt_quanta_old];
|
||||
struct stream **streams = ostreams;
|
||||
struct iovec iov[wpkt_quanta_old];
|
||||
|
||||
while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
|
||||
int writenum;
|
||||
do {
|
||||
writenum = stream_get_endp(s) - stream_get_getp(s);
|
||||
num = write(peer->fd, stream_pnt(s), writenum);
|
||||
s = stream_fifo_head(peer->obuf);
|
||||
|
||||
if (num < 0) {
|
||||
if (!ERRNO_IO_RETRY(errno)) {
|
||||
BGP_EVENT_ADD(peer, TCP_fatal_error);
|
||||
SET_FLAG(status, BGP_IO_FATAL_ERR);
|
||||
} else {
|
||||
SET_FLAG(status, BGP_IO_TRANS_ERR);
|
||||
}
|
||||
if (!s)
|
||||
goto done;
|
||||
|
||||
goto done;
|
||||
} else if (num != writenum)
|
||||
stream_forward_getp(s, num);
|
||||
count = iovsz = 0;
|
||||
while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) {
|
||||
ostreams[iovsz] = s;
|
||||
iov[iovsz].iov_base = stream_pnt(s);
|
||||
iov[iovsz].iov_len = STREAM_READABLE(s);
|
||||
writenum += STREAM_READABLE(s);
|
||||
s = s->next;
|
||||
++iovsz;
|
||||
++count;
|
||||
}
|
||||
|
||||
} while (num != writenum);
|
||||
strmsz = iovsz;
|
||||
total_written = 0;
|
||||
|
||||
do {
|
||||
num = writev(peer->fd, iov, iovsz);
|
||||
|
||||
if (num < 0) {
|
||||
if (!ERRNO_IO_RETRY(errno)) {
|
||||
BGP_EVENT_ADD(peer, TCP_fatal_error);
|
||||
SET_FLAG(status, BGP_IO_FATAL_ERR);
|
||||
} else {
|
||||
SET_FLAG(status, BGP_IO_TRANS_ERR);
|
||||
}
|
||||
|
||||
break;
|
||||
} else if (num != writenum) {
|
||||
unsigned int msg_written = 0;
|
||||
unsigned int ic = iovsz;
|
||||
|
||||
for (unsigned int i = 0; i < ic; i++) {
|
||||
size_t ss = iov[i].iov_len;
|
||||
|
||||
if (ss > (unsigned int) num)
|
||||
break;
|
||||
|
||||
msg_written++;
|
||||
iovsz--;
|
||||
writenum -= ss;
|
||||
num -= ss;
|
||||
}
|
||||
|
||||
total_written += msg_written;
|
||||
|
||||
assert(total_written < count);
|
||||
|
||||
memmove(&iov, &iov[msg_written],
|
||||
sizeof(iov[0]) * iovsz);
|
||||
streams = &streams[msg_written];
|
||||
stream_forward_getp(streams[0], num);
|
||||
iov[0].iov_base = stream_pnt(streams[0]);
|
||||
iov[0].iov_len = STREAM_READABLE(streams[0]);
|
||||
|
||||
writenum -= num;
|
||||
num = 0;
|
||||
assert(writenum > 0);
|
||||
} else {
|
||||
total_written = strmsz;
|
||||
}
|
||||
|
||||
} while (num != writenum);
|
||||
|
||||
/* Handle statistics */
|
||||
for (unsigned int i = 0; i < total_written; i++) {
|
||||
s = stream_fifo_pop(peer->obuf);
|
||||
|
||||
assert(s == ostreams[i]);
|
||||
|
||||
/* Retrieve BGP packet type. */
|
||||
stream_set_getp(s, BGP_MARKER_SIZE + 2);
|
||||
@ -351,9 +413,8 @@ static uint16_t bgp_write(struct peer *peer)
|
||||
break;
|
||||
}
|
||||
|
||||
count++;
|
||||
|
||||
stream_free(stream_fifo_pop(peer->obuf));
|
||||
stream_free(s);
|
||||
ostreams[i] = NULL;
|
||||
update_last_write = 1;
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
#ifndef _FRR_BGP_IO_H
|
||||
#define _FRR_BGP_IO_H
|
||||
|
||||
#define BGP_WRITE_PACKET_MAX 10U
|
||||
#define BGP_WRITE_PACKET_MAX 64U
|
||||
#define BGP_READ_PACKET_MAX 10U
|
||||
|
||||
#include "bgpd/bgpd.h"
|
||||
|
@ -1586,36 +1586,24 @@ DEFUN (no_bgp_update_delay,
|
||||
}
|
||||
|
||||
|
||||
static int bgp_wpkt_quanta_config_vty(struct vty *vty, const char *num,
|
||||
char set)
|
||||
static int bgp_wpkt_quanta_config_vty(struct vty *vty, uint32_t quanta,
|
||||
bool set)
|
||||
{
|
||||
VTY_DECLVAR_CONTEXT(bgp, bgp);
|
||||
|
||||
if (set) {
|
||||
uint32_t quanta = strtoul(num, NULL, 10);
|
||||
atomic_store_explicit(&bgp->wpkt_quanta, quanta,
|
||||
memory_order_relaxed);
|
||||
} else {
|
||||
atomic_store_explicit(&bgp->wpkt_quanta, BGP_WRITE_PACKET_MAX,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
quanta = set ? quanta : BGP_WRITE_PACKET_MAX;
|
||||
atomic_store_explicit(&bgp->wpkt_quanta, quanta, memory_order_relaxed);
|
||||
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
static int bgp_rpkt_quanta_config_vty(struct vty *vty, const char *num,
|
||||
char set)
|
||||
static int bgp_rpkt_quanta_config_vty(struct vty *vty, uint32_t quanta,
|
||||
bool set)
|
||||
{
|
||||
VTY_DECLVAR_CONTEXT(bgp, bgp);
|
||||
|
||||
if (set) {
|
||||
uint32_t quanta = strtoul(num, NULL, 10);
|
||||
atomic_store_explicit(&bgp->rpkt_quanta, quanta,
|
||||
memory_order_relaxed);
|
||||
} else {
|
||||
atomic_store_explicit(&bgp->rpkt_quanta, BGP_READ_PACKET_MAX,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
quanta = set ? quanta : BGP_READ_PACKET_MAX;
|
||||
atomic_store_explicit(&bgp->rpkt_quanta, quanta, memory_order_relaxed);
|
||||
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
@ -1636,47 +1624,32 @@ void bgp_config_write_rpkt_quanta(struct vty *vty, struct bgp *bgp)
|
||||
vty_out(vty, " read-quanta %d\n", quanta);
|
||||
}
|
||||
|
||||
/* Packet quanta configuration */
|
||||
DEFUN (bgp_wpkt_quanta,
|
||||
/* Packet quanta configuration
|
||||
*
|
||||
* XXX: The value set here controls the size of a stack buffer in the IO
|
||||
* thread. When changing these limits be careful to prevent stack overflow.
|
||||
*
|
||||
* Furthermore, the maximums used here should correspond to
|
||||
* BGP_WRITE_PACKET_MAX and BGP_READ_PACKET_MAX.
|
||||
*/
|
||||
DEFPY (bgp_wpkt_quanta,
|
||||
bgp_wpkt_quanta_cmd,
|
||||
"write-quanta (1-10)",
|
||||
"[no] write-quanta (1-64)$quanta",
|
||||
NO_STR
|
||||
"How many packets to write to peer socket per run\n"
|
||||
"Number of packets\n")
|
||||
{
|
||||
int idx_number = 1;
|
||||
return bgp_wpkt_quanta_config_vty(vty, argv[idx_number]->arg, 1);
|
||||
return bgp_wpkt_quanta_config_vty(vty, quanta, !no);
|
||||
}
|
||||
|
||||
DEFUN (no_bgp_wpkt_quanta,
|
||||
no_bgp_wpkt_quanta_cmd,
|
||||
"no write-quanta (1-10)",
|
||||
NO_STR
|
||||
"How many packets to write to peer socket per I/O cycle\n"
|
||||
"Number of packets\n")
|
||||
{
|
||||
int idx_number = 2;
|
||||
return bgp_wpkt_quanta_config_vty(vty, argv[idx_number]->arg, 0);
|
||||
}
|
||||
|
||||
DEFUN (bgp_rpkt_quanta,
|
||||
DEFPY (bgp_rpkt_quanta,
|
||||
bgp_rpkt_quanta_cmd,
|
||||
"read-quanta (1-10)",
|
||||
"How many packets to read from peer socket per I/O cycle\n"
|
||||
"Number of packets\n")
|
||||
{
|
||||
int idx_number = 1;
|
||||
return bgp_rpkt_quanta_config_vty(vty, argv[idx_number]->arg, 1);
|
||||
}
|
||||
|
||||
DEFUN (no_bgp_rpkt_quanta,
|
||||
no_bgp_rpkt_quanta_cmd,
|
||||
"no read-quanta (1-10)",
|
||||
"[no] read-quanta (1-10)$quanta",
|
||||
NO_STR
|
||||
"How many packets to read from peer socket per I/O cycle\n"
|
||||
"Number of packets\n")
|
||||
{
|
||||
int idx_number = 2;
|
||||
return bgp_rpkt_quanta_config_vty(vty, argv[idx_number]->arg, 0);
|
||||
return bgp_rpkt_quanta_config_vty(vty, quanta, !no);
|
||||
}
|
||||
|
||||
void bgp_config_write_coalesce_time(struct vty *vty, struct bgp *bgp)
|
||||
@ -13072,9 +13045,7 @@ void bgp_vty_init(void)
|
||||
install_element(BGP_NODE, &bgp_update_delay_establish_wait_cmd);
|
||||
|
||||
install_element(BGP_NODE, &bgp_wpkt_quanta_cmd);
|
||||
install_element(BGP_NODE, &no_bgp_wpkt_quanta_cmd);
|
||||
install_element(BGP_NODE, &bgp_rpkt_quanta_cmd);
|
||||
install_element(BGP_NODE, &no_bgp_rpkt_quanta_cmd);
|
||||
|
||||
install_element(BGP_NODE, &bgp_coalesce_time_cmd);
|
||||
install_element(BGP_NODE, &no_bgp_coalesce_time_cmd);
|
||||
|
@ -2167,6 +2167,8 @@ Dumping Messages and Routing Tables
|
||||
Other BGP Commands
|
||||
------------------
|
||||
|
||||
The following are available in the top level *enable* mode:
|
||||
|
||||
.. index:: clear bgp \*
|
||||
.. clicmd:: clear bgp \*
|
||||
|
||||
@ -2202,6 +2204,24 @@ Other BGP Commands
|
||||
|
||||
Clear peer using soft reconfiguration in this address-family and sub-address-family.
|
||||
|
||||
The following are available in the ``router bgp`` mode:
|
||||
|
||||
.. index:: write-quanta (1-64)
|
||||
.. clicmd:: write-quanta (1-64)
|
||||
|
||||
BGP message Tx I/O is vectored. This means that multiple packets are written
|
||||
to the peer socket at the same time each I/O cycle, in order to minimize
|
||||
system call overhead. This value controls how many are written at a time.
|
||||
Under certain load conditions, reducing this value could make peer traffic
|
||||
less 'bursty'. In practice, leave this settings on the default (64) unless
|
||||
you truly know what you are doing.
|
||||
|
||||
.. index:: read-quanta (1-10)
|
||||
.. index:: read-quanta (1-10)
|
||||
|
||||
Unlike Tx, BGP Rx traffic is not vectored. Packets are read off the wire one
|
||||
at a time in a loop. This setting controls how many iterations the loop runs
|
||||
for. As with write-quanta, it is best to leave this setting on the default.
|
||||
|
||||
.. _bgp-displaying-bgp-information:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user