mirror of
https://git.proxmox.com/git/mirror_corosync-qdevice
synced 2025-04-29 06:57:55 +00:00

Heuristics is designed to be component of its own, which doesn't depend on qdevice_instance. Removing qdevice_instance pointer was easy as soon as exec notifier got two user data pointers. Signed-off-by: Jan Friesse <jfriesse@redhat.com>
453 lines
13 KiB
C
453 lines
13 KiB
C
/*
|
|
* Copyright (c) 2015-2020 Red Hat, Inc.
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Author: Jan Friesse (jfriesse@redhat.com)
|
|
*
|
|
* This software licensed under BSD license, the text of which follows:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* - Neither the name of the Red Hat, Inc. nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
|
|
#include "log.h"
|
|
#include "qdevice-config.h"
|
|
#include "qdevice-instance.h"
|
|
#include "qdevice-heuristics-exec-list.h"
|
|
/*TODO Remove this 3 line includes when porting on pr-poll-loop */
|
|
#include "qdevice-heuristics.h"
|
|
#include "qdevice-heuristics-cmd.h"
|
|
#include "qdevice-votequorum.h"
|
|
#include "qdevice-model.h"
|
|
#include "utils.h"
|
|
|
|
int
|
|
qdevice_instance_init(struct qdevice_instance *instance,
|
|
const struct qdevice_advanced_settings *advanced_settings)
|
|
{
|
|
|
|
memset(instance, 0, sizeof(*instance));
|
|
|
|
node_list_init(&instance->config_node_list);
|
|
|
|
instance->vq_last_poll = ((time_t) -1);
|
|
instance->advanced_settings = advanced_settings;
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
qdevice_instance_destroy(struct qdevice_instance *instance)
|
|
{
|
|
|
|
node_list_free(&instance->config_node_list);
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
qdevice_instance_configure_from_cmap_heuristics(struct qdevice_instance *instance)
|
|
{
|
|
char *str;
|
|
long long int lli;
|
|
int i;
|
|
int res;
|
|
cs_error_t cs_err;
|
|
cmap_iter_handle_t iter_handle;
|
|
char key_name[CMAP_KEYNAME_MAXLEN + 1];
|
|
size_t value_len;
|
|
cmap_value_types_t type;
|
|
struct qdevice_heuristics_exec_list tmp_exec_list;
|
|
struct qdevice_heuristics_exec_list *exec_list;
|
|
char *command;
|
|
char exec_name[CMAP_KEYNAME_MAXLEN + 1];
|
|
char tmp_key[CMAP_KEYNAME_MAXLEN + 1];
|
|
size_t no_execs;
|
|
int send_exec_list;
|
|
|
|
instance->heuristics_instance.timeout = instance->heartbeat_interval / 2;
|
|
if (cmap_get_string(instance->cmap_handle,
|
|
"quorum.device.heuristics.timeout", &str) == CS_OK) {
|
|
if (utils_strtonum(str, instance->advanced_settings->heuristics_min_timeout,
|
|
instance->advanced_settings->heuristics_max_timeout, &lli) == -1) {
|
|
log(LOG_ERR, "heuristics.timeout must be valid number in "
|
|
"range <%"PRIu32",%"PRIu32">",
|
|
instance->advanced_settings->heuristics_min_timeout,
|
|
instance->advanced_settings->heuristics_max_timeout);
|
|
|
|
free(str);
|
|
return (-1);
|
|
} else {
|
|
instance->heuristics_instance.timeout = lli;
|
|
}
|
|
|
|
free(str);
|
|
}
|
|
|
|
instance->heuristics_instance.sync_timeout = instance->sync_heartbeat_interval / 2;
|
|
if (cmap_get_string(instance->cmap_handle,
|
|
"quorum.device.heuristics.sync_timeout", &str) == CS_OK) {
|
|
if (utils_strtonum(str, instance->advanced_settings->heuristics_min_timeout,
|
|
instance->advanced_settings->heuristics_max_timeout, &lli) == -1) {
|
|
log(LOG_ERR, "heuristics.sync_timeout must be valid number in "
|
|
"range <%"PRIu32",%"PRIu32">",
|
|
instance->advanced_settings->heuristics_min_timeout,
|
|
instance->advanced_settings->heuristics_max_timeout);
|
|
|
|
free(str);
|
|
return (-1);
|
|
} else {
|
|
instance->heuristics_instance.sync_timeout = lli;
|
|
}
|
|
|
|
free(str);
|
|
}
|
|
|
|
instance->heuristics_instance.interval = instance->heartbeat_interval * 3;
|
|
if (cmap_get_string(instance->cmap_handle,
|
|
"quorum.device.heuristics.interval", &str) == CS_OK) {
|
|
if (utils_strtonum(str, instance->advanced_settings->heuristics_min_interval,
|
|
instance->advanced_settings->heuristics_max_interval, &lli) == -1) {
|
|
log(LOG_ERR, "heuristics.interval must be valid number in "
|
|
"range <%"PRIu32",%"PRIu32">",
|
|
instance->advanced_settings->heuristics_min_interval,
|
|
instance->advanced_settings->heuristics_max_interval);
|
|
|
|
free(str);
|
|
return (-1);
|
|
} else {
|
|
instance->heuristics_instance.interval = lli;
|
|
}
|
|
|
|
free(str);
|
|
}
|
|
|
|
instance->heuristics_instance.mode = QDEVICE_DEFAULT_HEURISTICS_MODE;
|
|
|
|
if (cmap_get_string(instance->cmap_handle, "quorum.device.heuristics.mode", &str) == CS_OK) {
|
|
if ((i = utils_parse_bool_str(str)) == -1) {
|
|
if (strcasecmp(str, "sync") != 0) {
|
|
log(LOG_ERR, "quorum.device.heuristics.mode value is not valid.");
|
|
|
|
free(str);
|
|
return (-1);
|
|
} else {
|
|
instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_SYNC;
|
|
}
|
|
} else {
|
|
if (i == 1) {
|
|
instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_ENABLED;
|
|
} else {
|
|
instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
|
|
}
|
|
}
|
|
|
|
free(str);
|
|
}
|
|
|
|
send_exec_list = 0;
|
|
exec_list = NULL;
|
|
qdevice_heuristics_exec_list_init(&tmp_exec_list);
|
|
|
|
if (instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_DISABLED) {
|
|
exec_list = NULL;
|
|
send_exec_list = 1;
|
|
} else if (instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_ENABLED ||
|
|
instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_SYNC) {
|
|
/*
|
|
* Walk thru list of commands to exec
|
|
*/
|
|
cs_err = cmap_iter_init(instance->cmap_handle, "quorum.device.heuristics.exec_", &iter_handle);
|
|
if (cs_err != CS_OK) {
|
|
log(LOG_ERR, "Can't iterate quorum.device.heuristics.exec_ keys. "
|
|
"Error %s", cs_strerror(cs_err));
|
|
|
|
return (-1);
|
|
}
|
|
|
|
while ((cs_err = cmap_iter_next(instance->cmap_handle, iter_handle, key_name,
|
|
&value_len, &type)) == CS_OK) {
|
|
if (type != CMAP_VALUETYPE_STRING) {
|
|
log(LOG_WARNING, "%s key is not of string type. Ignoring", key_name);
|
|
continue ;
|
|
}
|
|
|
|
res = sscanf(key_name, "quorum.device.heuristics.exec_%[^.]%s", exec_name, tmp_key);
|
|
if (res != 1) {
|
|
log(LOG_WARNING, "%s key is not correct heuristics exec name. Ignoring", key_name);
|
|
continue ;
|
|
}
|
|
|
|
cs_err = cmap_get_string(instance->cmap_handle, key_name, &command);
|
|
if (cs_err != CS_OK) {
|
|
log(LOG_WARNING, "Can't get value of %s key. Ignoring", key_name);
|
|
continue ;
|
|
}
|
|
|
|
if (qdevice_heuristics_exec_list_add(&tmp_exec_list, exec_name, command) == NULL) {
|
|
log(LOG_WARNING, "Can't store value of %s key into list. Ignoring", key_name);
|
|
}
|
|
|
|
free(command);
|
|
}
|
|
|
|
no_execs = qdevice_heuristics_exec_list_size(&tmp_exec_list);
|
|
|
|
if (no_execs == 0) {
|
|
log(LOG_INFO, "No valid heuristics execs defined. Disabling heuristics.");
|
|
instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
|
|
exec_list = NULL;
|
|
send_exec_list = 1;
|
|
} else if (no_execs > instance->advanced_settings->heuristics_max_execs) {
|
|
log(LOG_ERR, "Too much (%zu) heuristics execs defined (max is %zu)."
|
|
" Disabling heuristics.", no_execs,
|
|
instance->advanced_settings->heuristics_max_execs);
|
|
instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
|
|
exec_list = NULL;
|
|
send_exec_list = 1;
|
|
} else if (qdevice_heuristics_exec_list_eq(&tmp_exec_list,
|
|
&instance->heuristics_instance.exec_list) == 1) {
|
|
log(LOG_DEBUG, "Heuristics list is unchanged");
|
|
send_exec_list = 0;
|
|
} else {
|
|
log(LOG_DEBUG, "Heuristics list changed");
|
|
exec_list = &tmp_exec_list;
|
|
send_exec_list = 1;
|
|
}
|
|
|
|
} else {
|
|
log(LOG_CRIT, "Undefined heuristics mode");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
if (send_exec_list) {
|
|
if (qdevice_heuristics_change_exec_list(&instance->heuristics_instance,
|
|
exec_list, instance->sync_in_progress) != 0) {
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
qdevice_heuristics_exec_list_free(&tmp_exec_list);
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
qdevice_instance_configure_from_cmap(struct qdevice_instance *instance)
|
|
{
|
|
char *str;
|
|
|
|
if (cmap_get_string(instance->cmap_handle, "quorum.device.model", &str) != CS_OK) {
|
|
log(LOG_ERR, "Can't read quorum.device.model cmap key.");
|
|
|
|
return (-1);
|
|
}
|
|
|
|
if (qdevice_model_str_to_type(str, &instance->model_type) != 0) {
|
|
log(LOG_ERR, "Configured device model %s is not supported.", str);
|
|
free(str);
|
|
|
|
return (-1);
|
|
}
|
|
free(str);
|
|
|
|
if (cmap_get_uint32(instance->cmap_handle, "runtime.votequorum.this_node_id",
|
|
&instance->node_id) != CS_OK) {
|
|
log(LOG_ERR, "Unable to retrieve this node nodeid.");
|
|
|
|
return (-1);
|
|
}
|
|
|
|
if (cmap_get_uint32(instance->cmap_handle, "quorum.device.timeout", &instance->heartbeat_interval) != CS_OK) {
|
|
instance->heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
|
|
}
|
|
|
|
if (cmap_get_uint32(instance->cmap_handle, "quorum.device.sync_timeout",
|
|
&instance->sync_heartbeat_interval) != CS_OK) {
|
|
instance->sync_heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
|
|
}
|
|
|
|
if (qdevice_instance_configure_from_cmap_heuristics(instance) != 0) {
|
|
return (-1);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
#define QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS 5
|
|
|
|
int
|
|
qdevice_instance_wait_for_initial_heuristics_exec_result(struct qdevice_instance *instance)
|
|
{
|
|
struct pollfd pfds[QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS];
|
|
int no_pfds;
|
|
int poll_res;
|
|
int timeout;
|
|
int i;
|
|
int case_processed;
|
|
int res;
|
|
|
|
while (!instance->vq_node_list_initial_heuristics_finished) {
|
|
no_pfds = 0;
|
|
|
|
assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
|
|
pfds[no_pfds].fd = instance->heuristics_instance.pipe_log_recv;
|
|
pfds[no_pfds].events = POLLIN;
|
|
pfds[no_pfds].revents = 0;
|
|
no_pfds++;
|
|
|
|
assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
|
|
pfds[no_pfds].fd = instance->heuristics_instance.pipe_cmd_recv;
|
|
pfds[no_pfds].events = POLLIN;
|
|
pfds[no_pfds].revents = 0;
|
|
no_pfds++;
|
|
|
|
assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
|
|
pfds[no_pfds].fd = instance->votequorum_poll_fd;
|
|
pfds[no_pfds].events = POLLIN;
|
|
pfds[no_pfds].revents = 0;
|
|
no_pfds++;
|
|
|
|
if (!send_buffer_list_empty(&instance->heuristics_instance.cmd_out_buffer_list)) {
|
|
assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
|
|
pfds[no_pfds].fd = instance->heuristics_instance.pipe_cmd_send;
|
|
pfds[no_pfds].events = POLLOUT;
|
|
pfds[no_pfds].revents = 0;
|
|
no_pfds++;
|
|
}
|
|
|
|
/*
|
|
* We know this is never larger than QDEVICE_DEFAULT_HEURISTICS_MAX_TIMEOUT * 2
|
|
*/
|
|
timeout = (int)instance->heuristics_instance.sync_timeout * 2;
|
|
|
|
poll_res = poll(pfds, no_pfds, timeout);
|
|
if (poll_res > 0) {
|
|
for (i = 0; i < no_pfds; i++) {
|
|
if (pfds[i].revents & POLLIN) {
|
|
case_processed = 0;
|
|
switch (i) {
|
|
case 0:
|
|
case_processed = 1;
|
|
|
|
res = qdevice_heuristics_log_read_from_pipe(&instance->heuristics_instance);
|
|
if (res == -1) {
|
|
return (-1);
|
|
}
|
|
break;
|
|
case 1:
|
|
case_processed = 1;
|
|
res = qdevice_heuristics_cmd_read_from_pipe(&instance->heuristics_instance);
|
|
if (res == -1) {
|
|
return (-1);
|
|
}
|
|
break;
|
|
case 2:
|
|
case_processed = 1;
|
|
res = qdevice_votequorum_dispatch(instance);
|
|
if (res == -1) {
|
|
return (-1);
|
|
}
|
|
case 3:
|
|
/*
|
|
* Read on heuristics cmd send fs shouldn't happen
|
|
*/
|
|
break;
|
|
}
|
|
|
|
if (!case_processed) {
|
|
log(LOG_CRIT, "Unhandled read on poll descriptor %u", i);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if (pfds[i].revents & POLLOUT) {
|
|
case_processed = 0;
|
|
switch (i) {
|
|
case 0:
|
|
case 1:
|
|
case 2:
|
|
/*
|
|
* Write on heuristics log, cmd recv or vq shouldn't happen
|
|
*/
|
|
break;
|
|
case 3:
|
|
case_processed = 1;
|
|
res = qdevice_heuristics_cmd_write(&instance->heuristics_instance);
|
|
if (res == -1) {
|
|
return (-1);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (!case_processed) {
|
|
log(LOG_CRIT, "Unhandled write on poll descriptor %u", i);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
if ((pfds[i].revents & (POLLERR|POLLHUP|POLLNVAL)) &&
|
|
!(pfds[i].revents & (POLLIN|POLLOUT))) {
|
|
switch (i) {
|
|
case 0:
|
|
case 1:
|
|
case 3:
|
|
/*
|
|
* Closed pipe doesn't mean return of POLLIN. To display
|
|
* better log message, we call read log as if POLLIN would
|
|
* be set.
|
|
*/
|
|
res = qdevice_heuristics_log_read_from_pipe(&instance->heuristics_instance);
|
|
if (res == -1) {
|
|
return (-1);
|
|
}
|
|
|
|
log(LOG_ERR, "POLLERR (%u) on heuristics pipe. Exiting",
|
|
pfds[i].revents);
|
|
return (-1);
|
|
break;
|
|
case 2:
|
|
log(LOG_ERR, "POLLERR (%u) on corosync socket. Exiting",
|
|
pfds[i].revents);
|
|
return (-1);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else if (poll_res == 0) {
|
|
log(LOG_ERR, "Timeout waiting for initial heuristics exec result");
|
|
return (-1);
|
|
} else {
|
|
log_err(LOG_ERR, "Initial heuristics exec result poll failed");
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
}
|