From 1649931691a4dcba1b88e50749d37314a1a6b2ab Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 14 Jan 2010 08:39:06 +0000 Subject: [PATCH] Fix freeze of IPC library connection on sem_wait This patch solves library waiting on sem_wait. It doesn't solve all other problems, which can make corosync not to exit (malloc race, global lock deadlock, ...) RHBZ#547511 git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@2643 fd59a12c-fef9-0310-b244-a6a79926bd2f --- lib/coroipcc.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/coroipcc.c b/lib/coroipcc.c index aa9546c0..f8841710 100644 --- a/lib/coroipcc.c +++ b/lib/coroipcc.c @@ -71,6 +71,11 @@ #include "util.h" +/* + * Define sem_wait timeout (real timeout will be (n-1;n) ) + */ +#define IPC_SEMWAIT_TIMEOUT 2 + struct ipc_instance { int fd; #if _POSIX_THREAD_PROCESS_SHARED < 1 @@ -456,13 +461,31 @@ reply_receive ( { #if _POSIX_THREAD_PROCESS_SHARED < 1 struct sembuf sop; +#else + struct timespec timeout; + struct pollfd pfd; #endif coroipc_response_header_t *response_header; int res; #if _POSIX_THREAD_PROCESS_SHARED > 0 retry_semwait: - res = sem_wait (&ipc_instance->control_buffer->sem1); + timeout.tv_sec = time(NULL) + IPC_SEMWAIT_TIMEOUT; + timeout.tv_nsec = 0; + + res = sem_timedwait (&ipc_instance->control_buffer->sem1, &timeout); + if (res == -1 && errno == ETIMEDOUT) { + pfd.fd = ipc_instance->fd; + pfd.events = 0; + + poll (&pfd, 1, 0); + if (pfd.revents == POLLERR || pfd.revents == POLLHUP) { + return (CS_ERR_LIBRARY); + } + + goto retry_semwait; + } + if (res == -1 && errno == EINTR) { goto retry_semwait; } @@ -505,12 +528,30 @@ reply_receive_in_buf ( { #if _POSIX_THREAD_PROCESS_SHARED < 1 struct sembuf sop; +#else + struct timespec timeout; + struct pollfd pfd; #endif int res; #if _POSIX_THREAD_PROCESS_SHARED > 0 retry_semwait: - res = sem_wait (&ipc_instance->control_buffer->sem1); + timeout.tv_sec = time(NULL) + IPC_SEMWAIT_TIMEOUT; + timeout.tv_nsec = 0; + + res = sem_timedwait (&ipc_instance->control_buffer->sem1, &timeout); + if (res == -1 && errno == ETIMEDOUT) { + pfd.fd = ipc_instance->fd; + pfd.events = 0; + + poll (&pfd, 1, 0); + if (pfd.revents == POLLERR || pfd.revents == POLLHUP) { + return (CS_ERR_LIBRARY); + } + + goto retry_semwait; + } + if (res == -1 && errno == EINTR) { goto retry_semwait; }