diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index fa1dcbb762..c8791687ea 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -85,7 +85,8 @@ libbgp_a_SOURCES = \ bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \ bgp_nht.c bgp_updgrp.c bgp_updgrp_packet.c bgp_updgrp_adv.c bgp_bfd.c \ bgp_encap_tlv.c $(BGP_VNC_RFAPI_SRC) bgp_attr_evpn.c \ - bgp_evpn.c bgp_evpn_vty.c bgp_vpn.c bgp_label.c bgp_rd.c + bgp_evpn.c bgp_evpn_vty.c bgp_vpn.c bgp_label.c bgp_rd.c \ + bgp_keepalives.c noinst_HEADERS = \ bgp_memory.h \ @@ -97,7 +98,7 @@ noinst_HEADERS = \ bgp_advertise.h bgp_vty.h bgp_mpath.h bgp_nht.h \ bgp_updgrp.h bgp_bfd.h bgp_encap_tlv.h bgp_encap_types.h \ $(BGP_VNC_RFAPI_HD) bgp_attr_evpn.h bgp_evpn.h bgp_evpn_vty.h \ - bgp_vpn.h bgp_label.h bgp_rd.h bgp_evpn_private.h + bgp_vpn.h bgp_label.h bgp_rd.h bgp_evpn_private.h bgp_keepalives.h \ bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a $(BGP_VNC_RFP_LIB) ../lib/libfrr.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 176089ebd0..aa883f7364 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -49,6 +49,7 @@ #include "bgpd/bgp_nht.h" #include "bgpd/bgp_bfd.h" #include "bgpd/bgp_memory.h" +#include "bgpd/bgp_keepalives.h" DEFINE_HOOK(peer_backward_transition, (struct peer * peer), (peer)) DEFINE_HOOK(peer_established, (struct peer * peer), (peer)) @@ -86,7 +87,6 @@ int bgp_event(struct thread *); static int bgp_start_timer(struct thread *); static int bgp_connect_timer(struct thread *); static int bgp_holdtime_timer(struct thread *); -static int bgp_keepalive_timer(struct thread *); /* BGP FSM functions. */ static int bgp_start(struct peer *); @@ -253,7 +253,7 @@ void bgp_timer_set(struct peer *peer) } BGP_TIMER_OFF(peer->t_connect); BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); break; @@ -265,7 +265,7 @@ void bgp_timer_set(struct peer *peer) BGP_TIMER_ON(peer->t_connect, bgp_connect_timer, peer->v_connect); BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); break; @@ -282,7 +282,7 @@ void bgp_timer_set(struct peer *peer) peer->v_connect); } BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); break; @@ -296,7 +296,7 @@ void bgp_timer_set(struct peer *peer) } else { BGP_TIMER_OFF(peer->t_holdtime); } - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); break; @@ -309,12 +309,11 @@ void bgp_timer_set(struct peer *peer) timer and KeepAlive timers are not started. */ if (peer->v_holdtime == 0) { BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); } else { BGP_TIMER_ON(peer->t_holdtime, bgp_holdtime_timer, peer->v_holdtime); - BGP_TIMER_ON(peer->t_keepalive, bgp_keepalive_timer, - peer->v_keepalive); + peer_keepalives_on(peer); } BGP_TIMER_OFF(peer->t_routeadv); break; @@ -329,12 +328,11 @@ void bgp_timer_set(struct peer *peer) and keepalive must be turned off. */ if (peer->v_holdtime == 0) { BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); } else { BGP_TIMER_ON(peer->t_holdtime, bgp_holdtime_timer, peer->v_holdtime); - BGP_TIMER_ON(peer->t_keepalive, bgp_keepalive_timer, - peer->v_keepalive); + peer_keepalives_on(peer); } break; case Deleted: @@ -346,7 +344,7 @@ void bgp_timer_set(struct peer *peer) BGP_TIMER_OFF(peer->t_start); BGP_TIMER_OFF(peer->t_connect); BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); break; } @@ -412,24 +410,6 @@ static int bgp_holdtime_timer(struct thread *thread) return 0; } -/* BGP keepalive fire ! */ -static int bgp_keepalive_timer(struct thread *thread) -{ - struct peer *peer; - - peer = THREAD_ARG(thread); - peer->t_keepalive = NULL; - - if (bgp_debug_neighbor_events(peer)) - zlog_debug("%s [FSM] Timer (keepalive timer expire)", - peer->host); - - THREAD_VAL(thread) = KeepAlive_timer_expired; - bgp_event(thread); /* bgp_event unlocks peer */ - - return 0; -} - int bgp_routeadv_timer(struct thread *thread) { struct peer *peer; @@ -963,9 +943,6 @@ int bgp_stop(struct peer *peer) char orf_name[BUFSIZ]; int ret = 0; - // immediately remove from threads - peer_writes_off(peer); - if (peer_dynamic_neighbor(peer) && !(CHECK_FLAG(peer->flags, PEER_FLAG_DELETE))) { if (bgp_debug_neighbor_events(peer)) @@ -1053,7 +1030,7 @@ int bgp_stop(struct peer *peer) BGP_TIMER_OFF(peer->t_start); BGP_TIMER_OFF(peer->t_connect); BGP_TIMER_OFF(peer->t_holdtime); - BGP_TIMER_OFF(peer->t_keepalive); + peer_keepalives_off(peer); BGP_TIMER_OFF(peer->t_routeadv); /* Stream reset. */ @@ -1401,13 +1378,6 @@ static int bgp_fsm_open(struct peer *peer) return 0; } -/* Keepalive send to peer. */ -static int bgp_fsm_keepalive_expire(struct peer *peer) -{ - bgp_keepalive_send(peer); - return 0; -} - /* FSM error, unexpected event. This is error of BGP connection. So cut the peer and change to Idle status. */ static int bgp_fsm_event_error(struct peer *peer) @@ -1761,9 +1731,8 @@ static const struct { {bgp_stop, Clearing}, /* TCP_fatal_error */ {bgp_stop, Clearing}, /* ConnectRetry_timer_expired */ {bgp_fsm_holdtime_expire, Clearing}, /* Hold_Timer_expired */ - {bgp_fsm_keepalive_expire, - Established}, /* KeepAlive_timer_expired */ - {bgp_stop, Clearing}, /* Receive_OPEN_message */ + {bgp_ignore, Established}, /* KeepAlive_timer_expired */ + {bgp_stop, Clearing}, /* Receive_OPEN_message */ {bgp_fsm_keepalive, Established}, /* Receive_KEEPALIVE_message */ {bgp_fsm_update, Established}, /* Receive_UPDATE_message */ diff --git a/bgpd/bgp_keepalives.c b/bgpd/bgp_keepalives.c new file mode 100644 index 0000000000..d142e31af1 --- /dev/null +++ b/bgpd/bgp_keepalives.c @@ -0,0 +1,247 @@ +/* BGP Keepalives. + * + * Implemented server-style in a pthread. + * -------------------------------------- + * Copyright (C) 2017 Cumulus Networks, Inc. + * + * This file is part of Free Range Routing. + * + * Free Range Routing is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any later + * version. + * + * Free Range Routing is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GN5U General Public License along + * with Free Range Routing; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include +#include +#include + +#include "thread.h" +#include "log.h" +#include "vty.h" +#include "monotime.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_keepalives.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_packet.h" + +/** + * Peer KeepAlive Timer. + * Associates a peer with the time of its last keepalive. + */ +struct pkat { + // the peer to send keepalives to + struct peer *peer; + // absolute time of last keepalive sent + struct timeval last; +}; + +/* List of peers we are sending keepalives for, and associated mutex. */ +static pthread_mutex_t peerlist_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t peerlist_cond; +static struct list *peerlist; + +/* Thread control flag. */ +bool bgp_keepalives_thread_run; + +static struct pkat *pkat_new(struct peer *peer) +{ + struct pkat *pkat = XMALLOC(MTYPE_TMP, sizeof(struct pkat)); + pkat->peer = peer; + monotime(&pkat->last); + return pkat; +} + +static void pkat_del(void *pkat) +{ + XFREE(MTYPE_TMP, pkat); +} +/** + * Cleanup thread resources at termination. + * + * @param arg not used + */ +static void cleanup_handler(void *arg) +{ + if (peerlist) + list_delete(peerlist); + + peerlist = NULL; + + pthread_mutex_unlock(&peerlist_mtx); + pthread_cond_destroy(&peerlist_cond); + memset(&peerlist_cond, 0, sizeof(peerlist_cond)); +} + +/* + * Walks the list of peers, sending keepalives to those that are due for them. + * + * For any given peer, if the elapsed time since its last keepalive exceeds its + * configured keepalive timer, a keepalive is sent to the peer and its + * last-sent time is reset. Additionally, If the elapsed time does not exceed + * the configured keepalive timer, but the time until the next keepalive is due + * is within a hardcoded tolerance, a keepalive is sent as if the configured + * timer was exceeded. Doing this helps alleviate nanosecond sleeps between + * ticks by grouping together peers who are due for keepalives at roughly the + * same time. This tolerance value is arbitrarily chosen to be 100ms. + * + * In addition, this function calculates the maximum amount of time that the + * keepalive thread can sleep before another tick needs to take place. This is + * equivalent to shortest time until a keepalive is due for any one peer. + * + * @return maximum time to wait until next update (0 if infinity) + */ +static struct timeval update() +{ + struct listnode *ln; + struct pkat *pkat; + + int update_set = 0; // whether next_update has been set + struct timeval next_update; // max sleep until next tick + static struct timeval elapsed; // elapsed time since keepalive + static struct timeval ka = {0}; // peer->v_keepalive as a timeval + static struct timeval diff; // ka - elapsed + + // see function comment + static struct timeval tolerance = {0, 100000}; + + for (ALL_LIST_ELEMENTS_RO(peerlist, ln, pkat)) { + // calculate elapsed time since last keepalive + monotime_since(&pkat->last, &elapsed); + + // calculate difference between elapsed time and configured time + ka.tv_sec = pkat->peer->v_keepalive; + timersub(&ka, &elapsed, &diff); + + int send_keepalive = elapsed.tv_sec >= ka.tv_sec + || timercmp(&diff, &tolerance, <); + + if (send_keepalive) { + if (bgp_debug_neighbor_events(pkat->peer)) + zlog_debug( + "%s [FSM] Timer (keepalive timer expire)", + pkat->peer->host); + + bgp_keepalive_send(pkat->peer); + monotime(&pkat->last); + memset(&elapsed, 0x00, sizeof(struct timeval)); + diff = ka; // time until next keepalive == peer + // keepalive time + } + + // if calculated next update for this peer < current delay, use + // it + if (!update_set || timercmp(&diff, &next_update, <)) { + next_update = diff; + update_set = 1; + } + } + + return next_update; +} + +void *peer_keepalives_start(void *arg) +{ + struct timeval currtime = {0, 0}; + struct timeval next_update = {0, 0}; + struct timespec next_update_ts = {0, 0}; + + // initialize synchronization primitives + pthread_mutex_lock(&peerlist_mtx); + + // use monotonic clock with condition variable + pthread_condattr_t attrs; + pthread_condattr_init(&attrs); + pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC); + pthread_cond_init(&peerlist_cond, &attrs); + + // initialize peerlist + peerlist = list_new(); + peerlist->del = pkat_del; + + // register cleanup handlers + pthread_cleanup_push(&cleanup_handler, NULL); + + bgp_keepalives_thread_run = true; + + while (bgp_keepalives_thread_run) { + if (peerlist->count > 0) + pthread_cond_timedwait(&peerlist_cond, &peerlist_mtx, + &next_update_ts); + else + while (peerlist->count == 0 + && bgp_keepalives_thread_run) + pthread_cond_wait(&peerlist_cond, + &peerlist_mtx); + + monotime(&currtime); + next_update = update(); + timeradd(&currtime, &next_update, &next_update); + TIMEVAL_TO_TIMESPEC(&next_update, &next_update_ts); + } + + // clean up + pthread_cleanup_pop(1); + + return NULL; +} + +/* --- thread external functions ------------------------------------------- */ + +void peer_keepalives_on(struct peer *peer) +{ + pthread_mutex_lock(&peerlist_mtx); + { + struct listnode *ln, *nn; + struct pkat *pkat; + + for (ALL_LIST_ELEMENTS(peerlist, ln, nn, pkat)) + if (pkat->peer == peer) { + pthread_mutex_unlock(&peerlist_mtx); + return; + } + + pkat = pkat_new(peer); + listnode_add(peerlist, pkat); + peer_lock(peer); + } + pthread_mutex_unlock(&peerlist_mtx); + peer_keepalives_wake(); +} + +void peer_keepalives_off(struct peer *peer) +{ + pthread_mutex_lock(&peerlist_mtx); + { + struct listnode *ln, *nn; + struct pkat *pkat; + + for (ALL_LIST_ELEMENTS(peerlist, ln, nn, pkat)) + if (pkat->peer == peer) { + XFREE(MTYPE_TMP, pkat); + list_delete_node(peerlist, ln); + peer_unlock(peer); + } + } + pthread_mutex_unlock(&peerlist_mtx); +} + +void peer_keepalives_wake() +{ + pthread_mutex_lock(&peerlist_mtx); + { + pthread_cond_signal(&peerlist_cond); + } + pthread_mutex_unlock(&peerlist_mtx); +} diff --git a/bgpd/bgp_keepalives.h b/bgpd/bgp_keepalives.h new file mode 100644 index 0000000000..281c32f173 --- /dev/null +++ b/bgpd/bgp_keepalives.h @@ -0,0 +1,83 @@ +/* BGP Keepalives. + * + * Implemented server-style in a pthread. + * -------------------------------------- + * Copyright (C) 2017 Cumulus Networks, Inc. + * + * This file is part of Free Range Routing. + * + * Free Range Routing is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any later + * version. + * + * Free Range Routing is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GN5U General Public License along + * with Free Range Routing; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#ifndef _BGP_KEEPALIVES_H_ +#define _BGP_KEEPALIVES_H_ + +#include "bgpd.h" + +/* Thread control flag. + * + * Setting this flag to 'false' while the thread is running will result in + * thread termination. + */ +extern bool bgp_keepalives_thread_run; + +/* Turns on keepalives for a peer. + * + * This function adds the peer to an internal list of peers to generate + * keepalives for. + * + * At set intervals, a BGP KEEPALIVE packet is generated and placed on + * peer->obuf. This operation is thread-safe with respect to peer->obuf. + * + * peer->v_keepalive determines the interval. Changing this value before + * unregistering this peer with peer_keepalives_off() results in undefined + * behavior. + * + * If the peer is already registered for keepalives via this function, nothing + * happens. + */ +extern void peer_keepalives_on(struct peer *); + +/* Turns off keepalives for a peer. + * + * Removes the peer from the internal list of peers to generate keepalives for. + * + * If the peer is already unregistered for keepalives, nothing happens. + */ +extern void peer_keepalives_off(struct peer *); + +/* Entry function for keepalives pthread. + * + * This function loops over an internal list of peers, generating keepalives at + * regular intervals as determined by each peer's keepalive timer. + * + * See peer_keepalives_on() for additional details. + * + * @param arg pthread arg, not used + */ +extern void *peer_keepalives_start(void *arg); + +/* Poking function for keepalives pthread. + * + * Under normal circumstances the pthread will automatically wake itself + * whenever it is necessary to do work. This function may be used to force the + * thread to wake up and see if there is any work to do, or if it is time to + * die. + * + * It is not necessary to call this after peer_keepalives_on(). + */ +extern void peer_keepalives_wake(void); + +#endif /* _BGP_KEEPALIVES_H */ diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index b4ea45b3e0..d8cd7d5173 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -20,6 +20,7 @@ #include +#include #include "vector.h" #include "command.h" #include "getopt.h" @@ -55,6 +56,7 @@ #include "bgpd/bgp_filter.h" #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_packet.h" +#include "bgpd/bgp_keepalives.h" #ifdef ENABLE_BGP_VNC #include "bgpd/rfapi/rfapi_backend.h" @@ -393,8 +395,9 @@ int main(int argc, char **argv) snprintf(bgpd_di.startinfo, sizeof(bgpd_di.startinfo), ", bgp@%s:%d", (bm->address ? bm->address : ""), bm->port); - pthread_t packet_writes; + pthread_t packet_writes, keepalives; pthread_create(&packet_writes, NULL, &peer_writes_start, NULL); + pthread_create(&keepalives, NULL, &keepalives_start, NULL); frr_config_fork(); frr_run(bm->master);