mirror of
https://git.proxmox.com/git/mirror_frr
synced 2025-08-12 14:27:36 +00:00
bgpd: Fix crash during shutdown due to race condition
[New LWP 2524] [New LWP 2539] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Core was generated by `/opt/avi/bin/bgpd -f /run/frr/avi_ns3_bgpd.config -i /opt/avi/etc/avi_ns3_bgpd.'. Program terminated with signal SIGABRT, Aborted. [Current thread is 1 (Thread 0x7f92ac8f1740 (LWP 2524))] 0 0x00007f92acb3800b in raise () from /lib/x86_64-linux-gnu/libc.so.6 [Current thread is 1 (Thread 0x7f92ac8f1740 (LWP 2524))] 0 0x00007f92acb3800b in raise () from /lib/x86_64-linux-gnu/libc.so.6 1 0x00007f92acb17859 in abort () from /lib/x86_64-linux-gnu/libc.so.6 2 0x00007f92acb17729 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 3 0x00007f92acb28fd6 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6 4 0x00007f92accf2164 in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0 5 0x000055b46be1ef63 in bgp_keepalives_wake () at bgpd/bgp_keepalives.c:311 6 0x000055b46be1f111 in bgp_keepalives_stop (fpt=0x55b46cfacf20, result=<optimized out>) at bgpd/bgp_keepalives.c:323 7 0x00007f92acea9521 in frr_pthread_stop (fpt=0x55b46cfacf20, result=result@entry=0x0) at lib/frr_pthread.c:176 8 0x00007f92acea9586 in frr_pthread_stop_all () at lib/frr_pthread.c:188 9 0x000055b46bdde54a in bgp_pthreads_finish () at bgpd/bgpd.c:8150 10 0x000055b46bd696ca in bgp_exit (status=0) at bgpd/bgp_main.c:210 11 sigint () at bgpd/bgp_main.c:154 12 0x00007f92acecc1e9 in quagga_sigevent_process () at lib/sigevent.c:105 13 0x00007f92aced689a in thread_fetch (m=m@entry=0x55b46cf23540, fetch=fetch@entry=0x7fff95379238) at lib/thread.c:1487 14 0x00007f92aceb2681 in frr_run (master=0x55b46cf23540) at lib/libfrr.c:1010 15 0x000055b46bd676f4 in main (argc=11, argv=0x7fff953795a8) at bgpd/bgp_main.c:482 Root cause: This is due to race condition between main thread & keepalive thread during clean-up. This happens when the keepalive thread is processing a wake signal owning the mutex, when meanwhile the main thread tries to stop the keepalives thread. In main thread, the keepalive thread’s running bit (fpt->running) is set to false, without taking the mutex & then it blocks on mutex. Meanwhile, keepalive thread which owns the mutex sees that the running bit is false & executes bgp_keepalives_finish() which also frees up mutex. Main thread that is waiting on mutex with pthread_mutex_lock() will cause core while trying to access mutex. Fix: Take the lock in main thread while setting the fpt->running to false. Signed-off-by: Samanvitha B Bhargav <bsamanvitha@vmware.com>
This commit is contained in:
parent
ea59d5e08d
commit
8c9d306c8d
@ -269,8 +269,9 @@ void bgp_keepalives_on(struct peer *peer)
|
||||
peer_lock(peer);
|
||||
}
|
||||
SET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON);
|
||||
/* Force the keepalive thread to wake up */
|
||||
pthread_cond_signal(peerhash_cond);
|
||||
}
|
||||
bgp_keepalives_wake();
|
||||
}
|
||||
|
||||
void bgp_keepalives_off(struct peer *peer)
|
||||
@ -300,19 +301,15 @@ void bgp_keepalives_off(struct peer *peer)
|
||||
}
|
||||
}
|
||||
|
||||
void bgp_keepalives_wake(void)
|
||||
{
|
||||
frr_with_mutex (peerhash_mtx) {
|
||||
pthread_cond_signal(peerhash_cond);
|
||||
}
|
||||
}
|
||||
|
||||
int bgp_keepalives_stop(struct frr_pthread *fpt, void **result)
|
||||
{
|
||||
assert(fpt->running);
|
||||
|
||||
atomic_store_explicit(&fpt->running, false, memory_order_relaxed);
|
||||
bgp_keepalives_wake();
|
||||
frr_with_mutex (peerhash_mtx) {
|
||||
atomic_store_explicit(&fpt->running, false,
|
||||
memory_order_relaxed);
|
||||
pthread_cond_signal(peerhash_cond);
|
||||
}
|
||||
|
||||
pthread_join(fpt->thread, result);
|
||||
return 0;
|
||||
|
@ -73,18 +73,6 @@ extern void bgp_keepalives_init(void);
|
||||
*/
|
||||
extern void *bgp_keepalives_start(void *arg);
|
||||
|
||||
/**
|
||||
* Poking function for keepalives pthread.
|
||||
*
|
||||
* Under normal circumstances the pthread will automatically wake itself
|
||||
* whenever it is necessary to do work. This function may be used to force the
|
||||
* thread to wake up and see if there is any work to do, or if it is time to
|
||||
* die.
|
||||
*
|
||||
* It is not necessary to call this after bgp_keepalives_on().
|
||||
*/
|
||||
extern void bgp_keepalives_wake(void);
|
||||
|
||||
/**
|
||||
* Stops the thread and blocks until it terminates.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user