diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index ef73b47ffb..6b91a2cf1f 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -75,6 +75,7 @@ static const struct option longopts[] = { {"ecmp", required_argument, NULL, 'e'}, {"int_num", required_argument, NULL, 'I'}, {"no_zebra", no_argument, NULL, 'Z'}, + {"socket_size", required_argument, NULL, 's'}, {0}}; /* signal definitions */ @@ -386,17 +387,19 @@ int main(int argc, char **argv) int no_zebra_flag = 0; int skip_runas = 0; int instance = 0; + int buffer_size = BGP_SOCKET_SNDBUF_SIZE; frr_preinit(&bgpd_di, argc, argv); frr_opt_add( - "p:l:SnZe:I:" DEPRECATED_OPTIONS, longopts, + "p:l:SnZe:I:s:" DEPRECATED_OPTIONS, longopts, " -p, --bgp_port Set BGP listen port number (0 means do not listen).\n" " -l, --listenon Listen on specified address (implies -n)\n" " -n, --no_kernel Do not install route to kernel.\n" " -Z, --no_zebra Do not communicate with Zebra.\n" " -S, --skip_runas Skip capabilities checks, and changing user and group IDs.\n" " -e, --ecmp Specify ECMP to use.\n" - " -I, --int_num Set instance number (label-manager)\n"); + " -I, --int_num Set instance number (label-manager)\n" + " -s, --socket_size Set BGP peer socket send buffer size\n"); /* Command line argument treatment. */ while (1) { @@ -452,6 +455,9 @@ int main(int argc, char **argv) zlog_err("Instance %i out of range (0..%u)", instance, (unsigned short)-1); break; + case 's': + buffer_size = atoi(optarg); + break; default: frr_help_exit(1); break; @@ -461,7 +467,7 @@ int main(int argc, char **argv) memset(&bgpd_privs, 0, sizeof(bgpd_privs)); /* BGP master init. */ - bgp_master_init(frr_init()); + bgp_master_init(frr_init(), buffer_size); bm->port = bgp_port; if (bgp_port == 0) bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 1394c60b25..4031d2dfde 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -320,6 +320,14 @@ static int bgp_get_instance_for_inc_conn(int sock, struct bgp **bgp_inst) #endif } +static void bgp_socket_set_buffer_size(const int fd) +{ + if (getsockopt_so_sendbuf(fd) < (int)bm->socket_buffer) + setsockopt_so_sendbuf(fd, bm->socket_buffer); + if (getsockopt_so_recvbuf(fd) < (int)bm->socket_buffer) + setsockopt_so_recvbuf(fd, bm->socket_buffer); +} + /* Accept bgp connection. */ static int bgp_accept(struct thread *thread) { @@ -371,8 +379,7 @@ static int bgp_accept(struct thread *thread) return -1; } - /* Set socket send buffer size */ - setsockopt_so_sendbuf(bgp_sock, BGP_SOCKET_SNDBUF_SIZE); + bgp_socket_set_buffer_size(bgp_sock); /* Check remote IP address */ peer1 = peer_lookup(bgp, &su); @@ -621,8 +628,7 @@ int bgp_connect(struct peer *peer) set_nonblocking(peer->fd); - /* Set socket send buffer size */ - setsockopt_so_sendbuf(peer->fd, BGP_SOCKET_SNDBUF_SIZE); + bgp_socket_set_buffer_size(peer->fd); if (bgp_set_socket_ttl(peer, peer->fd) < 0) return -1; diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 9c0e25bf25..49f7337d76 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -7823,7 +7823,7 @@ int bgp_config_write(struct vty *vty) return 0; } -void bgp_master_init(struct thread_master *master) +void bgp_master_init(struct thread_master *master, const int buffer_size) { qobj_init(); @@ -7838,6 +7838,7 @@ void bgp_master_init(struct thread_master *master) bm->t_rmap_update = NULL; bm->rmap_update_timer = RMAP_DEFAULT_UPDATE_TIMER; bm->terminating = false; + bm->socket_buffer = buffer_size; bgp_process_queue_init(); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 9b0346da12..87d94500f7 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -156,6 +156,9 @@ struct bgp_master { /* BGP-EVPN VRF ID. Defaults to default VRF (if any) */ struct bgp* bgp_evpn; + /* How big should we set the socket buffer size */ + uint32_t socket_buffer; + bool terminating; /* global flag that sigint terminate seen */ QOBJ_FIELDS }; @@ -1575,7 +1578,8 @@ extern char *peer_uptime(time_t uptime2, char *buf, size_t len, bool use_json, extern int bgp_config_write(struct vty *); -extern void bgp_master_init(struct thread_master *master); +extern void bgp_master_init(struct thread_master *master, + const int buffer_size); extern void bgp_init(unsigned short instance); extern void bgp_pthreads_run(void); diff --git a/doc/manpages/bgpd.rst b/doc/manpages/bgpd.rst index f1736ffd0b..079aad8c48 100644 --- a/doc/manpages/bgpd.rst +++ b/doc/manpages/bgpd.rst @@ -21,6 +21,48 @@ OPTIONS available for the |DAEMON| command: .. include:: common-options.rst +.. option:: -p, --bgp_port + + Set the bgp protocol's port number. When port number is 0, that means do not + listen bgp port. + +.. option:: -l, --listenon + + Specify a specific IP address for bgpd to listen on, rather than its default + of ``0.0.0.0`` / ``::``. This can be useful to constrain bgpd to an internal + address, or to run multiple bgpd processes on one host. + +.. option:: -n, --no_kernel + + Do not install learned routes into the linux kernel. This option is useful + for a route-reflector environment or if you are running multiple bgp + processes in the same namespace. This option is different than the --no_zebra + option in that a ZAPI connection is made. + +.. option:: -S, --skip_runas + + Skip the normal process of checking capabilities and changing user and group + information. + +.. option:: -e, --ecmp + + Run BGP with a limited ecmp capability, that is different than what BGP + was compiled with. The value specified must be greater than 0 and less + than or equal to the MULTIPATH_NUM specified on compilation. + +.. option:: -Z, --no_zebra + + Do not communicate with zebra at all. This is different than the --no_kernel + option in that we do not even open a ZAPI connection to the zebra process. + +.. option:: -s, --socket_size + + When opening tcp connections to our peers, set the socket send buffer + size that the kernel will use for the peers socket. This option + is only really useful at a very large scale. Experimentation should + be done to see if this is helping or not at the scale you are running + at. + LABEL MANAGER ------------- diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index b8c216ff82..b916fcf413 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -35,6 +35,44 @@ be specified (:ref:`common-invocation-options`). of ``0.0.0.0`` / ``::``. This can be useful to constrain bgpd to an internal address, or to run multiple bgpd processes on one host. +.. option:: -n, --no_kernel + + Do not install learned routes into the linux kernel. This option is useful + for a route-reflector environment or if you are running multiple bgp + processes in the same namespace. This option is different than the --no_zebra + option in that a ZAPI connection is made. + +.. option:: -S, --skip_runas + + Skip the normal process of checking capabilities and changing user and group + information. + +.. option:: -e, --ecmp + + Run BGP with a limited ecmp capability, that is different than what BGP + was compiled with. The value specified must be greater than 0 and less + than or equal to the MULTIPATH_NUM specified on compilation. + +.. option:: -Z, --no_zebra + + Do not communicate with zebra at all. This is different than the --no_kernel + option in that we do not even open a ZAPI connection to the zebra process. + +.. option:: -s, --socket_size + + When opening tcp connections to our peers, set the socket send buffer + size that the kernel will use for the peers socket. This option + is only really useful at a very large scale. Experimentation should + be done to see if this is helping or not at the scale you are running + at. + +LABEL MANAGER +------------- + +.. option:: -I, --int_num + + Set zclient id. This is required when using Zebra label manager in proxy mode. + .. _bgp-basic-concepts: Basic Concepts diff --git a/lib/sockopt.c b/lib/sockopt.c index 8e38a29278..7726d74ff7 100644 --- a/lib/sockopt.c +++ b/lib/sockopt.c @@ -72,6 +72,21 @@ int getsockopt_so_sendbuf(const int sock) return optval; } +int getsockopt_so_recvbuf(const int sock) +{ + uint32_t optval; + socklen_t optlen = sizeof(optval); + int ret = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&optval, + &optlen); + if (ret < 0) { + flog_err_sys(EC_LIB_SYSTEM_CALL, + "fd %d: can't getsockopt SO_RCVBUF: %d (%s)", sock, + errno, safe_strerror(errno)); + return ret; + } + return optval; +} + static void *getsockopt_cmsg_data(struct msghdr *msgh, int level, int type) { struct cmsghdr *cmsg; diff --git a/lib/sockopt.h b/lib/sockopt.h index 732fec92aa..f6b57b8e07 100644 --- a/lib/sockopt.h +++ b/lib/sockopt.h @@ -30,6 +30,7 @@ extern "C" { extern void setsockopt_so_recvbuf(int sock, int size); extern void setsockopt_so_sendbuf(const int sock, int size); extern int getsockopt_so_sendbuf(const int sock); +extern int getsockopt_so_recvbuf(const int sock); extern int setsockopt_ipv6_pktinfo(int, int); extern int setsockopt_ipv6_checksum(int, int); diff --git a/tests/bgpd/test_aspath.c b/tests/bgpd/test_aspath.c index b5db36703a..925d3112d3 100644 --- a/tests/bgpd/test_aspath.c +++ b/tests/bgpd/test_aspath.c @@ -1339,7 +1339,7 @@ int main(void) { int i = 0; qobj_init(); - bgp_master_init(thread_master_create(NULL)); + bgp_master_init(thread_master_create(NULL), BGP_SOCKET_SNDBUF_SIZE); master = bm->master; bgp_option_set(BGP_OPT_NO_LISTEN); bgp_attr_init(); diff --git a/tests/bgpd/test_capability.c b/tests/bgpd/test_capability.c index db1cf0611d..96e398512b 100644 --- a/tests/bgpd/test_capability.c +++ b/tests/bgpd/test_capability.c @@ -912,7 +912,7 @@ int main(void) qobj_init(); master = thread_master_create(NULL); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_mp_attr.c b/tests/bgpd/test_mp_attr.c index 603b678cf1..fbf2a9fed2 100644 --- a/tests/bgpd/test_mp_attr.c +++ b/tests/bgpd/test_mp_attr.c @@ -37,6 +37,7 @@ #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_nexthop.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_network.h" #define VT100_RESET "\x1b[0m" #define VT100_RED "\x1b[31m" @@ -1078,7 +1079,7 @@ int main(void) cmd_init(0); bgp_vty_init(); master = thread_master_create("test mp attr"); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); bgp_attr_init(); diff --git a/tests/bgpd/test_mpath.c b/tests/bgpd/test_mpath.c index 0ecd0fdfec..21f4b38773 100644 --- a/tests/bgpd/test_mpath.c +++ b/tests/bgpd/test_mpath.c @@ -38,6 +38,7 @@ #include "bgpd/bgp_nexthop.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_network.h" #define VT100_RESET "\x1b[0m" #define VT100_RED "\x1b[31m" @@ -379,7 +380,7 @@ static int global_test_init(void) qobj_init(); master = thread_master_create(NULL); zclient = zclient_new(master, &zclient_options_default); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_packet.c b/tests/bgpd/test_packet.c index 9719aceec9..7a038fb02e 100644 --- a/tests/bgpd/test_packet.c +++ b/tests/bgpd/test_packet.c @@ -34,6 +34,7 @@ #include "bgpd/bgp_debug.h" #include "bgpd/bgp_packet.h" #include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_network.h" /* need these to link in libbgp */ struct zebra_privs_t *bgpd_privs = NULL; @@ -58,7 +59,7 @@ int main(int argc, char *argv[]) qobj_init(); bgp_attr_init(); master = thread_master_create(NULL); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); vrf_init(NULL, NULL, NULL, NULL, NULL); bgp_option_set(BGP_OPT_NO_LISTEN); diff --git a/tests/bgpd/test_peer_attr.c b/tests/bgpd/test_peer_attr.c index e5d3030ed1..422d397479 100644 --- a/tests/bgpd/test_peer_attr.c +++ b/tests/bgpd/test_peer_attr.c @@ -29,6 +29,7 @@ #include "bgpd/bgp_route.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_network.h" #ifdef ENABLE_BGP_VNC #include "bgpd/rfapi/rfapi_backend.h" @@ -1388,7 +1389,7 @@ static void bgp_startup(void) master = thread_master_create(NULL); yang_init(); nb_init(master, NULL, 0); - bgp_master_init(master); + bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE); bgp_option_set(BGP_OPT_NO_LISTEN); vrf_init(NULL, NULL, NULL, NULL, NULL); frr_pthread_init();