bgpd: Add neighbor <neigh> shutdown rtt command

This would be useful in cases with lots of peers and shutdown them
automatically if RTT goes above the specified limit.

A host with 512 or more IPv6 addresses has a higher latency due to
ipv6_addr_label(). This method tries to pick the best candidate address
fo outgoing connection and literally increases processing latency.

```
Samples: 28  of event 'cycles', Event count (approx.): 22131542
  Children      Self  Command  Shared Object      Symbol
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] entry_SYSCALL_64_fastpath
  +  100.00%     0.00%  ping6    [unknown]          [.] 0x0df0ad0b8047022a
  +  100.00%     0.00%  ping6    libc-2.17.so       [.] __sendto_nocancel
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] sys_sendto
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] SYSC_sendto
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] sock_sendmsg
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] inet_sendmsg
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] rawv6_sendmsg
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ip6_dst_lookup_flow
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ip6_dst_lookup_tail
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ip6_route_get_saddr
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ipv6_dev_get_saddr
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] __ipv6_dev_get_saddr
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ipv6_get_saddr_eval
  +  100.00%     0.00%  ping6    [kernel.kallsyms]  [k] ipv6_addr_label
  +  100.00%   100.00%  ping6    [kernel.kallsyms]  [k] __ipv6_addr_label
  +    0.00%     0.00%  ping6    [kernel.kallsyms]  [k] schedule
```

This is how it works:

```
~# vtysh -c 'show bgp neigh 192.168.0.2 json' | jq '."192.168.0.2".estimatedRttInMsecs'
9
~# tc qdisc add dev eth1 root netem delay 120ms
~# vtysh -c 'show bgp neigh 192.168.0.2 json' | jq '."192.168.0.2".estimatedRttInMsecs'
89
~# vtysh -c 'show bgp neigh 192.168.0.2 json' | jq '."192.168.0.2".estimatedRttInMsecs'
null
~# vtysh -c 'show bgp neigh 192.168.0.2 json' | jq '."192.168.0.2".lastResetDueTo'
"Admin. shutdown"
```

Warning message:
bgpd[14807]: 192.168.0.2 shutdown due to high round-trip-time (200ms > 150ms)

Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
This commit is contained in:
Donatas Abraitis 2020-07-25 09:10:56 +03:00
parent e410d56307
commit 8336c896fd
5 changed files with 95 additions and 0 deletions

View File

@ -1105,6 +1105,9 @@ void bgp_fsm_change_status(struct peer *peer, int status)
peer->ostatus = peer->status;
peer->status = status;
/* Reset received keepalives counter on every FSM change */
peer->rtt_keepalive_rcv = 0;
/* Fire backward transition hook if that's the case */
if (peer->ostatus > peer->status)
hook_call(peer_backward_transition, peer);

View File

@ -1434,6 +1434,25 @@ static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
peer->rtt = sockopt_tcp_rtt(peer->fd);
/* If the peer's RTT is higher than expected, shutdown
* the peer automatically.
*/
if (CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN)
&& peer->rtt > peer->rtt_expected) {
peer->rtt_keepalive_rcv++;
if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
zlog_warn(
"%s shutdown due to high round-trip-time (%dms > %dms)",
peer->host, peer->rtt, peer->rtt_expected);
peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
}
} else {
if (peer->rtt_keepalive_rcv)
peer->rtt_keepalive_rcv--;
}
return Receive_KEEPALIVE_message;
}

View File

@ -4480,6 +4480,64 @@ ALIAS(no_neighbor_shutdown_msg, no_neighbor_shutdown_cmd,
NO_STR NEIGHBOR_STR NEIGHBOR_ADDR_STR2
"Administratively shut down this neighbor\n")
DEFUN(neighbor_shutdown_rtt,
neighbor_shutdown_rtt_cmd,
"neighbor <A.B.C.D|X:X::X:X|WORD> shutdown rtt (1-65535) [count (1-255)]",
NEIGHBOR_STR
NEIGHBOR_ADDR_STR2
"Administratively shut down this neighbor\n"
"Shutdown if round-trip-time is higher than expected\n"
"Round-trip-time in milliseconds\n"
"Specify the number of keepalives before shutdown\n"
"The number of keepalives with higher RTT to shutdown\n")
{
int idx_peer = 1;
int idx_rtt = 4;
int idx_count = 0;
struct peer *peer;
peer = peer_and_group_lookup_vty(vty, argv[idx_peer]->arg);
if (!peer)
return CMD_WARNING_CONFIG_FAILED;
peer->rtt_expected = strtol(argv[idx_rtt]->arg, NULL, 10);
if (argv_find(argv, argc, "count", &idx_count))
peer->rtt_keepalive_conf =
strtol(argv[idx_count + 1]->arg, NULL, 10);
return peer_flag_set_vty(vty, argv[idx_peer]->arg,
PEER_FLAG_RTT_SHUTDOWN);
}
DEFUN(no_neighbor_shutdown_rtt,
no_neighbor_shutdown_rtt_cmd,
"no neighbor <A.B.C.D|X:X::X:X|WORD> shutdown rtt [(1-65535) [count (1-255)]]",
NO_STR
NEIGHBOR_STR
NEIGHBOR_ADDR_STR2
"Administratively shut down this neighbor\n"
"Shutdown if round-trip-time is higher than expected\n"
"Round-trip-time in milliseconds\n"
"Specify the number of keepalives before shutdown\n"
"The number of keepalives with higher RTT to shutdown\n")
{
int idx_peer = 2;
struct peer *peer;
peer = peer_and_group_lookup_vty(vty, argv[idx_peer]->arg);
if (!peer)
return CMD_WARNING_CONFIG_FAILED;
peer->rtt_expected = 0;
peer->rtt_keepalive_conf = 1;
return peer_flag_unset_vty(vty, argv[idx_peer]->arg,
PEER_FLAG_RTT_SHUTDOWN);
}
/* neighbor capability dynamic. */
DEFUN (neighbor_capability_dynamic,
neighbor_capability_dynamic_cmd,
@ -14829,6 +14887,10 @@ static void bgp_config_write_peer_global(struct vty *vty, struct bgp *bgp,
vty_out(vty, " neighbor %s shutdown\n", addr);
}
if (peergroup_flag_check(peer, PEER_FLAG_RTT_SHUTDOWN))
vty_out(vty, " neighbor %s shutdown rtt %u count %u\n", addr,
peer->rtt_expected, peer->rtt_keepalive_conf);
/* bfd */
if (peer->bfd_info) {
if (!peer_group_active(peer) || !g_peer->bfd_info) {
@ -16628,6 +16690,8 @@ void bgp_vty_init(void)
install_element(BGP_NODE, &no_neighbor_shutdown_cmd);
install_element(BGP_NODE, &neighbor_shutdown_msg_cmd);
install_element(BGP_NODE, &no_neighbor_shutdown_msg_cmd);
install_element(BGP_NODE, &neighbor_shutdown_rtt_cmd);
install_element(BGP_NODE, &no_neighbor_shutdown_rtt_cmd);
/* "neighbor capability extended-nexthop" commands.*/
install_element(BGP_NODE, &neighbor_capability_enhe_cmd);

View File

@ -1604,6 +1604,9 @@ struct peer *peer_create(union sockunion *su, const char *conf_if,
/* Default TTL set. */
peer->ttl = (peer->sort == BGP_PEER_IBGP) ? MAXTTL : BGP_DEFAULT_TTL;
/* Default configured keepalives count for shutdown rtt command */
peer->rtt_keepalive_conf = 1;
SET_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE);
if (afi && safi) {
@ -3865,6 +3868,7 @@ struct peer_flag_action {
static const struct peer_flag_action peer_flag_action_list[] = {
{PEER_FLAG_PASSIVE, 0, peer_change_reset},
{PEER_FLAG_SHUTDOWN, 0, peer_change_reset},
{PEER_FLAG_RTT_SHUTDOWN, 0, peer_change_none},
{PEER_FLAG_DONT_CAPABILITY, 0, peer_change_none},
{PEER_FLAG_OVERRIDE_CAPABILITY, 0, peer_change_none},
{PEER_FLAG_STRICT_CAP_MATCH, 0, peer_change_none},
@ -3967,6 +3971,7 @@ static void peer_flag_modify_action(struct peer *peer, uint32_t flag)
peer_nsf_stop(peer);
UNSET_FLAG(peer->sflags, PEER_STATUS_PREFIX_OVERFLOW);
if (peer->t_pmax_restart) {
BGP_TIMER_OFF(peer->t_pmax_restart);
if (bgp_debug_neighbor_events(peer))

View File

@ -968,6 +968,9 @@ struct peer {
int fd; /* File descriptor */
int ttl; /* TTL of TCP connection to the peer. */
int rtt; /* Estimated round-trip-time from TCP_INFO */
int rtt_expected; /* Expected round-trip-time for a peer */
uint8_t rtt_keepalive_rcv; /* Received count for RTT shutdown */
uint8_t rtt_keepalive_conf; /* Configured count for RTT shutdown */
int gtsm_hops; /* minimum hopcount to peer */
char *desc; /* Description of the peer. */
unsigned short port; /* Destination port for peer */
@ -1118,6 +1121,7 @@ struct peer {
#define PEER_FLAG_GRACEFUL_RESTART_HELPER (1U << 23) /* Helper */
#define PEER_FLAG_GRACEFUL_RESTART (1U << 24) /* Graceful Restart */
#define PEER_FLAG_GRACEFUL_RESTART_GLOBAL_INHERIT (1U << 25) /* Global-Inherit */
#define PEER_FLAG_RTT_SHUTDOWN (1U << 26) /* shutdown rtt */
/*
*GR-Disabled mode means unset PEER_FLAG_GRACEFUL_RESTART