From 772aae8b1dca9b14117aa3716286d763b6f3da88 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Mon, 7 Oct 2019 11:31:33 -0400 Subject: [PATCH 1/3] lib: Add getsockopt_so_recvbuf Add a new function getsockopt_so_recvbuf which tells you the operating systems receive buffer size. Signed-off-by: Donald Sharp --- lib/sockopt.c | 15 +++++++++++++++ lib/sockopt.h | 1 + 2 files changed, 16 insertions(+) diff --git a/lib/sockopt.c b/lib/sockopt.c index 8e38a29278..7726d74ff7 100644 --- a/lib/sockopt.c +++ b/lib/sockopt.c @@ -72,6 +72,21 @@ int getsockopt_so_sendbuf(const int sock) return optval; } +int getsockopt_so_recvbuf(const int sock) +{ + uint32_t optval; + socklen_t optlen = sizeof(optval); + int ret = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&optval, + &optlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "fd %d: can't getsockopt SO_RCVBUF: %d (%s)", sock, + errno, safe_strerror(errno)); + return ret; + } + return optval; +} + static void *getsockopt_cmsg_data(struct msghdr *msgh, int level, int type) { struct cmsghdr *cmsg; diff --git a/lib/sockopt.h b/lib/sockopt.h index 732fec92aa..f6b57b8e07 100644 --- a/lib/sockopt.h +++ b/lib/sockopt.h @@ -30,6 +30,7 @@ extern "C" { extern void setsockopt_so_recvbuf(int sock, int size); extern void setsockopt_so_sendbuf(const int sock, int size); extern int getsockopt_so_sendbuf(const int sock); +extern int getsockopt_so_recvbuf(const int sock); extern int setsockopt_ipv6_pktinfo(int, int); extern int setsockopt_ipv6_checksum(int, int); From c2d020ad713d5606156b9502fde6f7dc8bda1b90 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 4 Oct 2019 14:33:01 -0400 Subject: [PATCH 2/3] bgpd: Add ability to set tcp socket buffer size Add -s X or --socket_size X to the bgp cli to allow the end user to specify the outgoing bgp tcp kernel socket buffer size. It is recommended that this option is only used on large scale operations. Signed-off-by: Donald Sharp --- bgpd/bgp_main.c | 12 +++++++++--- bgpd/bgp_network.c | 14 ++++++++++---- bgpd/bgpd.c | 3 ++- bgpd/bgpd.h | 6 +++++- tests/bgpd/test_aspath.c | 2 +- tests/bgpd/test_capability.c | 2 +- tests/bgpd/test_mp_attr.c | 3 ++- tests/bgpd/test_mpath.c | 3 ++- tests/bgpd/test_packet.c | 3 ++- tests/bgpd/test_peer_attr.c | 3 ++- 10 files changed, 36 insertions(+), 15 deletions(-) diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index ef73b47ffb..6b91a2cf1f 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -75,6 +75,7 @@ static const struct option longopts[] = { {"ecmp", required_argument, NULL, 'e'}, {"int_num", required_argument, NULL, 'I'}, {"no_zebra", no_argument, NULL, 'Z'}, + {"socket_size", required_argument, NULL, 's'}, {0}}; /* signal definitions */ @@ -386,17 +387,19 @@ int main(int argc, char **argv) int no_zebra_flag = 0; int skip_runas = 0; int instance = 0; + int buffer_size = BGP_SOCKET_SNDBUF_SIZE; frr_preinit(&bgpd_di, argc, argv); frr_opt_add( - "p:l:SnZe:I:" DEPRECATED_OPTIONS, longopts, + "p:l:SnZe:I:s:" DEPRECATED_OPTIONS, longopts, " -p, --bgp_port Set BGP listen port number (0 means do not listen).\n" " -l, --listenon Listen on specified address (implies -n)\n" " -n, --no_kernel Do not install route to kernel.\n" " -Z, --no_zebra Do not communicate with Zebra.\n" " -S, --skip_runas Skip capabilities checks, and changing user and group IDs.\n" " -e, --ecmp Specify ECMP to use.\n" - " -I, --int_num Set instance number (label-manager)\n"); + " -I, --int_num Set instance number (label-manager)\n" + " -s, --socket_size Set BGP peer socket send buffer size\n"); /* Command line argument treatment. */ while (1) { @@ -452,6 +455,9 @@ int main(int argc, char **argv) zlog_err("Instance %i out of range (0..%u)", instance, (unsigned short)-1); break; + case 's': + buffer_size = atoi(optarg); + break; default: frr_help_exit(1); break; @@ -461,7 +467,7 @@ int main(int argc, char **argv) memset(&bgpd_privs, 0, sizeof(bgpd_privs)); /* BGP master init. */ - bgp_master_init(frr_init()); + bgp_master_init(frr_init(), buffer_size); bm->port = bgp_port; if (bgp_port == 0) bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 1394c60b25..4031d2dfde 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -320,6 +320,14 @@ static int bgp_get_instance_for_inc_conn(int sock, struct bgp **bgp_inst) #endif } +static void bgp_socket_set_buffer_size(const int fd) +{ + if (getsockopt_so_sendbuf(fd) < (int)bm->socket_buffer) + setsockopt_so_sendbuf(fd, bm->socket_buffer); + if (getsockopt_so_recvbuf(fd) < (int)bm->socket_buffer) + setsockopt_so_recvbuf(fd, bm->socket_buffer); +} + /* Accept bgp connection. */ static int bgp_accept(struct thread *thread) { @@ -371,8 +379,7 @@ static int bgp_accept(struct thread *thread) return -1; } - /* Set socket send buffer size */ - setsockopt_so_sendbuf(bgp_sock, BGP_SOCKET_SNDBUF_SIZE); + bgp_socket_set_buffer_size(bgp_sock); /* Check remote IP address */ peer1 = peer_lookup(bgp, &su); @@ -621,8 +628,7 @@ int bgp_connect(struct peer *peer) set_nonblocking(peer->fd); - /* Set socket send buffer size */ - setsockopt_so_sendbuf(peer->fd, BGP_SOCKET_SNDBUF_SIZE); + bgp_socket_set_buffer_size(peer->fd); if (bgp_set_socket_ttl(peer, peer->fd) < 0) return -1; diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 9c0e25bf25..49f7337d76 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -7823,7 +7823,7 @@ int bgp_config_write(struct vty *vty) return 0; } -void bgp_master_init(struct thread_master *master) +void bgp_master_init(struct thread_master *master, const int buffer_size) { qobj_init(); @@ -7838,6 +7838,7 @@ void bgp_master_init(struct thread_master *master) bm->t_rmap_update = NULL; bm->rmap_update_timer = RMAP_DEFAULT_UPDATE_TIMER; bm->terminating = false; + bm->socket_buffer = buffer_size; bgp_process_queue_init(); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 9b0346da12..87d94500f7 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -156,6 +156,9 @@ struct bgp_master { /* BGP-EVPN VRF ID. Defaults to default VRF (if any) */ struct bgp* bgp_evpn; + /* How big should we set the socket buffer size */ + uint32_t socket_buffer; + bool terminating; /* global flag that sigint terminate seen */ QOBJ_FIELDS }; @@ -1575,7 +1578,8 @@ extern char *peer_uptime(time_t uptime2, char *buf, size_t len, bool use_json, extern int bgp_config_write(struct vty *); -extern void bgp_master_init(struct thread_master *master); +extern void bgp_master_init(struct thread_master *master, + const int buffer_size); extern void bgp_init(unsigned short instance); extern void bgp_pthreads_run(void); diff --git a/tests/bgpd/test_aspath.c b/tests/bgpd/test_aspath.c index b5db36703a..925d3112d3 100644 --- a/tests/bgpd/test_aspath.c +++ b/tests/bgpd/test_aspath.c @@ -1339,7 +1339,7 @@ int main(void) { int i = 0; qobj_init(); - bgp_master_init(thread_master_create(NULL)); + bgp_master_init(thread_master_create(NULL), BGP_SOCKET_SNDBUF_SIZE); master = bm->master; bgp_option_set(BGP_OPT_NO_LISTEN); bgp_attr_init(); diff --git a/tests/bgpd/test_capability.c b/tests/bgpd/test_capability.c index db1cf0611d..96e398512b 100644 --- a/tests/bgpd/test_capability.c +++ b/tests/bgpd/test_capability.c @@ -912,7 +912,7 @@ int main(void) qobj_init(); master = thread_master_create(NULL); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_mp_attr.c b/tests/bgpd/test_mp_attr.c index 603b678cf1..fbf2a9fed2 100644 --- a/tests/bgpd/test_mp_attr.c +++ b/tests/bgpd/test_mp_attr.c @@ -37,6 +37,7 @@ #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_nexthop.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_network.h" #define VT100_RESET "\x1b[0m" #define VT100_RED "\x1b[31m" @@ -1078,7 +1079,7 @@ int main(void) cmd_init(0); bgp_vty_init(); master = thread_master_create("test mp attr"); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); bgp_attr_init(); diff --git a/tests/bgpd/test_mpath.c b/tests/bgpd/test_mpath.c index 0ecd0fdfec..21f4b38773 100644 --- a/tests/bgpd/test_mpath.c +++ b/tests/bgpd/test_mpath.c @@ -38,6 +38,7 @@ #include "bgpd/bgp_nexthop.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_network.h" #define VT100_RESET "\x1b[0m" #define VT100_RED "\x1b[31m" @@ -379,7 +380,7 @@ static int global_test_init(void) qobj_init(); master = thread_master_create(NULL); zclient = zclient_new(master, &zclient_options_default); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_packet.c b/tests/bgpd/test_packet.c index 9719aceec9..7a038fb02e 100644 --- a/tests/bgpd/test_packet.c +++ b/tests/bgpd/test_packet.c @@ -34,6 +34,7 @@ #include "bgpd/bgp_debug.h" #include "bgpd/bgp_packet.h" #include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_network.h" /* need these to link in libbgp */ struct zebra_privs_t *bgpd_privs = NULL; @@ -58,7 +59,7 @@ int main(int argc, char *argv[]) qobj_init(); bgp_attr_init(); master = thread_master_create(NULL); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_peer_attr.c b/tests/bgpd/test_peer_attr.c index e5d3030ed1..422d397479 100644 --- a/tests/bgpd/test_peer_attr.c +++ b/tests/bgpd/test_peer_attr.c @@ -29,6 +29,7 @@ #include "bgpd/bgp_route.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_network.h" #ifdef ENABLE_BGP_VNC #include "bgpd/rfapi/rfapi_backend.h" @@ -1388,7 +1389,7 @@ static void bgp_startup(void) master = thread_master_create(NULL); yang_init(); nb_init(master, NULL, 0); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); bgp_option_set(BGP_OPT_NO_LISTEN); vrf_init(NULL, NULL, NULL, NULL, NULL); frr_pthread_init(); From 11a9a23664c776cc4cd3794f25f378dc1489e949 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 4 Oct 2019 14:43:58 -0400 Subject: [PATCH 3/3] doc: Add cli options for bgp to documentation Document the bgp cli options. Signed-off-by: Donald Sharp --- doc/manpages/bgpd.rst | 42 ++++++++++++++++++++++++++++++++++++++++++ doc/user/bgp.rst | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/doc/manpages/bgpd.rst b/doc/manpages/bgpd.rst index f1736ffd0b..079aad8c48 100644 --- a/doc/manpages/bgpd.rst +++ b/doc/manpages/bgpd.rst @@ -21,6 +21,48 @@ OPTIONS available for the |DAEMON| command: .. include:: common-options.rst +.. option:: -p, --bgp_port + + Set the bgp protocol's port number. When port number is 0, that means do not + listen bgp port. + +.. option:: -l, --listenon + + Specify a specific IP address for bgpd to listen on, rather than its default + of ``0.0.0.0`` / ``::``. This can be useful to constrain bgpd to an internal + address, or to run multiple bgpd processes on one host. + +.. option:: -n, --no_kernel + + Do not install learned routes into the linux kernel. This option is useful + for a route-reflector environment or if you are running multiple bgp + processes in the same namespace. This option is different than the --no_zebra + option in that a ZAPI connection is made. + +.. option:: -S, --skip_runas + + Skip the normal process of checking capabilities and changing user and group + information. + +.. option:: -e, --ecmp + + Run BGP with a limited ecmp capability, that is different than what BGP + was compiled with. The value specified must be greater than 0 and less + than or equal to the MULTIPATH_NUM specified on compilation. + +.. option:: -Z, --no_zebra + + Do not communicate with zebra at all. This is different than the --no_kernel + option in that we do not even open a ZAPI connection to the zebra process. + +.. option:: -s, --socket_size + + When opening tcp connections to our peers, set the socket send buffer + size that the kernel will use for the peers socket. This option + is only really useful at a very large scale. Experimentation should + be done to see if this is helping or not at the scale you are running + at. + LABEL MANAGER ------------- diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index c99a5c49a3..c81a19c03e 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -35,6 +35,44 @@ be specified (:ref:`common-invocation-options`). of ``0.0.0.0`` / ``::``. This can be useful to constrain bgpd to an internal address, or to run multiple bgpd processes on one host. +.. option:: -n, --no_kernel + + Do not install learned routes into the linux kernel. This option is useful + for a route-reflector environment or if you are running multiple bgp + processes in the same namespace. This option is different than the --no_zebra + option in that a ZAPI connection is made. + +.. option:: -S, --skip_runas + + Skip the normal process of checking capabilities and changing user and group + information. + +.. option:: -e, --ecmp + + Run BGP with a limited ecmp capability, that is different than what BGP + was compiled with. The value specified must be greater than 0 and less + than or equal to the MULTIPATH_NUM specified on compilation. + +.. option:: -Z, --no_zebra + + Do not communicate with zebra at all. This is different than the --no_kernel + option in that we do not even open a ZAPI connection to the zebra process. + +.. option:: -s, --socket_size + + When opening tcp connections to our peers, set the socket send buffer + size that the kernel will use for the peers socket. This option + is only really useful at a very large scale. Experimentation should + be done to see if this is helping or not at the scale you are running + at. + +LABEL MANAGER +------------- + +.. option:: -I, --int_num + + Set zclient id. This is required when using Zebra label manager in proxy mode. + .. _bgp-basic-concepts: Basic Concepts