bgpd: Show the reason when the session is killed due to RTT

Simulated latency with:

```
tc qdisc add dev eth3 root netem delay 100ms
```

```
donatas-laptop# sh ip bgp summary failed

IPv4 Unicast Summary (VRF default):
BGP router identifier 192.0.2.252, local AS number 65000 vrf-id 0
BGP table version 28
RIB entries 0, using 0 bytes of memory
Peers 1, using 724 KiB of memory

Neighbor        EstdCnt DropCnt ResetTime Reason
192.168.10.65         2       2  00:00:17 Admin. shutdown (RTT)

Displayed neighbors 1
Total number of neighbors 1
donatas-laptop#
```

Another end received:

```
%NOTIFICATION: received from neighbor 192.168.10.17 6/2 (Cease/Administrative Shutdown) "shutdown due to high round-trip-time (104ms > 5ms, hit 21 times)"
```

Signed-off-by: Donatas Abraitis <donatas@opensourcerouting.org>
This commit is contained in:
Donatas Abraitis 2022-11-04 15:40:52 +02:00
parent 9f4fa17629
commit 5597214ccb
4 changed files with 30 additions and 11 deletions

View File

@ -634,7 +634,8 @@ const char *const peer_down_str[] = {"",
"AS Set config change",
"Waiting for peer OPEN",
"Reached received prefix count",
"Socket Error"};
"Socket Error",
"Admin. shutdown (RTT)"};
static void bgp_graceful_restart_timer_off(struct peer *peer)
{
@ -1832,7 +1833,9 @@ int bgp_start(struct peer *peer)
flog_err(EC_BGP_FSM,
"%s [FSM] Trying to start suppressed peer - this is never supposed to happen!",
peer->host);
if (CHECK_FLAG(peer->flags, PEER_FLAG_SHUTDOWN))
if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
else if (CHECK_FLAG(peer->flags, PEER_FLAG_SHUTDOWN))
peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
else if (CHECK_FLAG(peer->bgp->flags, BGP_FLAG_SHUTDOWN))
peer->last_reset = PEER_DOWN_USER_SHUTDOWN;

View File

@ -1011,9 +1011,12 @@ static void bgp_notify_send_internal(struct peer *peer, uint8_t code,
if (code == BGP_NOTIFY_CEASE) {
if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
peer->last_reset = PEER_DOWN_USER_RESET;
else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN)
peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
else
else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) {
if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
else
peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
} else
peer->last_reset = PEER_DOWN_NOTIFY_SEND;
} else
peer->last_reset = PEER_DOWN_NOTIFY_SEND;
@ -1749,15 +1752,24 @@ static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
/* If the peer's RTT is higher than expected, shutdown
* the peer automatically.
*/
if (CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN)
&& peer->rtt > peer->rtt_expected) {
if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN))
return Receive_KEEPALIVE_message;
if (peer->rtt > peer->rtt_expected) {
peer->rtt_keepalive_rcv++;
if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
zlog_warn(
"%s shutdown due to high round-trip-time (%dms > %dms)",
peer->host, peer->rtt, peer->rtt_expected);
char rtt_shutdown_reason[BUFSIZ] = {};
snprintfrr(
rtt_shutdown_reason,
sizeof(rtt_shutdown_reason),
"shutdown due to high round-trip-time (%dms > %dms, hit %u times)",
peer->rtt, peer->rtt_expected,
peer->rtt_keepalive_rcv);
zlog_warn("%s %s", peer->host, rtt_shutdown_reason);
SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
peer_tx_shutdown_message_set(peer, rtt_shutdown_reason);
peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
}
} else {

View File

@ -5272,8 +5272,10 @@ static int peer_flag_modify_vty(struct vty *vty, const char *ip_str,
return CMD_WARNING_CONFIG_FAILED;
}
if (!set && flag == PEER_FLAG_SHUTDOWN)
if (!set && flag == PEER_FLAG_SHUTDOWN) {
peer_tx_shutdown_message_unset(peer);
UNSET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
}
if (set)
ret = peer_flag_set(peer, flag);

View File

@ -1522,6 +1522,7 @@ struct peer {
/* LLGR aware peer */
#define PEER_STATUS_LLGR_WAIT (1U << 11)
#define PEER_STATUS_REFRESH_PENDING (1U << 12) /* refresh request from peer */
#define PEER_STATUS_RTT_SHUTDOWN (1U << 13) /* In shutdown state due to RTT */
/* Configured timer values. */
_Atomic uint32_t holdtime;
@ -1734,6 +1735,7 @@ struct peer {
#define PEER_DOWN_WAITING_OPEN 32U /* Waiting for open to succeed */
#define PEER_DOWN_PFX_COUNT 33U /* Reached received prefix count */
#define PEER_DOWN_SOCKET_ERROR 34U /* Some socket error happened */
#define PEER_DOWN_RTT_SHUTDOWN 35U /* Automatically shutdown due to RTT */
/*
* Remember to update peer_down_str in bgp_fsm.c when you add
* a new value to the last_reset reason