diff --git a/qdevices/qnetd-client-net.c b/qdevices/qnetd-client-net.c index 391bb87..3d2b546 100644 --- a/qdevices/qnetd-client-net.c +++ b/qdevices/qnetd-client-net.c @@ -61,18 +61,18 @@ static int qnetd_client_net_socket_poll_loop_set_events_cb(PRFileDesc *prfd, short *events, void *user_data1, void *user_data2) { - struct qnetd_instance *instance = (struct qnetd_instance *)user_data1; struct qnetd_client *client = (struct qnetd_client *)user_data2; if (client->schedule_disconnect) { - qnetd_instance_client_disconnect(instance, client, 0); - - if (pr_poll_loop_del_prfd(&instance->main_poll_loop, prfd) == -1) { - log(LOG_ERR, "pr_poll_loop_del_prfd for client socket failed"); - - return (-2); - } - + /* + * Disconnect logic used to be there but it was moved to + * qnetd-instance.c (see qnetd_instance_poll_loop_pre_poll_cb + * function for reasoning). + * + * This condition (= set_events_cb and client scheduled for disconnect) + * shouldn't really happen, but if it happens just don't add client to + * pr loop and wait for next pre_poll_cb. + */ return (-1); } diff --git a/qdevices/qnetd-instance.c b/qdevices/qnetd-instance.c index e8d9d14..d1f846a 100644 --- a/qdevices/qnetd-instance.c +++ b/qdevices/qnetd-instance.c @@ -35,6 +35,7 @@ #include #include +#include "log.h" #include "qnetd-instance.h" #include "qnetd-client.h" #include "qnetd-client-dpd-timer.h" @@ -42,6 +43,45 @@ #include "qnetd-log-debug.h" #include "qnetd-client-algo-timer.h" +static int +qnetd_instance_poll_loop_pre_poll_cb(void *user_data1, void *user_data2) +{ + struct qnetd_instance *instance = (struct qnetd_instance *)user_data1; + struct qnetd_client *client; + struct qnetd_client *client_next; + + /* + * This functionality used to be per client fd in + * the qnetd_client_net_socket_poll_loop_set_events_cb. Problem is, that + * disconnect calls algorithm which may send message to other client + * with fd which was already processed in the pr-poll-loop so POLLOUT is + * not set till new loop exec is called (and that usually happens + * because old one timeouts). To reproduce this problem use + * ffsplit and make qnetd disconnect one of the clients - ffsplit needs to + * send ack/nack votes, but it doesn't send them during first iteration + * and waits for dpd timeout. + */ + client = TAILQ_FIRST(&instance->clients); + while (client != NULL) { + client_next = TAILQ_NEXT(client, entries); + + if (client->schedule_disconnect) { + if (pr_poll_loop_del_prfd(&instance->main_poll_loop, + client->socket) == -1) { + log(LOG_ERR, "pr_poll_loop_del_prfd for client socket failed"); + + return (-1); + } + + qnetd_instance_client_disconnect(instance, client, 0); + } + + client = client_next; + } + + return (0); +} + int qnetd_instance_init(struct qnetd_instance *instance, enum tlv_tls_supported tls_supported, int tls_client_cert_required, size_t max_clients, @@ -62,6 +102,14 @@ qnetd_instance_init(struct qnetd_instance *instance, pr_poll_loop_init(&instance->main_poll_loop); + if (pr_poll_loop_add_pre_poll_cb(&instance->main_poll_loop, + qnetd_instance_poll_loop_pre_poll_cb, + instance, NULL) == -1) { + log(LOG_ERR, "Can't add instance pre poll loop cb"); + + return (-1); + } + return (0); } @@ -83,6 +131,11 @@ qnetd_instance_destroy(struct qnetd_instance *instance) qnetd_cluster_list_free(&instance->clusters); qnetd_client_list_free(&instance->clients); + if (pr_poll_loop_del_pre_poll_cb(&instance->main_poll_loop, + qnetd_instance_poll_loop_pre_poll_cb) == -1) { + log(LOG_WARNING, "Can't delete instance pre poll loop cb"); + } + pr_poll_loop_destroy(&instance->main_poll_loop); return (0);