mirror of
https://git.proxmox.com/git/mirror_corosync
synced 2026-01-24 15:26:45 +00:00
votequorum: add last_man_standing support (default: off)
this flag (0|1) can be configured via quorum.last_man_standing and when enabled, it allows expected_votes to be dynamically recalculated. Assuming an 8 nodes cluster, every node votes 1 (mandatory requirement for this feature). In the first event, 3 nodes are lost. The remaining partition of 5 is barely quorate. After a configurable timeout (quorum.last_man_standing_window, default 10sec) the quorate partition is allow to recalculate expected_votes based on the remaining nodes. This operation will bring expected_votes to 5 and quorum to 3. Repeating the above loop, in the next event, 2 more nodes are allowed to die. etc. etc. Reviewed-by: Steven Dake <sdake@redhat.com> Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
This commit is contained in:
parent
9589611dc4
commit
10098dba27
@ -360,6 +360,7 @@ static int main_config_parser_cb(const char *path,
|
||||
if ((strcmp(path, "quorum.expected_votes") == 0) ||
|
||||
(strcmp(path, "quorum.votes") == 0) ||
|
||||
(strcmp(path, "quorum.quorumdev_poll") == 0) ||
|
||||
(strcmp(path, "quorum.last_man_standing_window") == 0) ||
|
||||
(strcmp(path, "quorum.leaving_timeout") == 0)) {
|
||||
i = atoi(value);
|
||||
icmap_set_uint32(path, i);
|
||||
@ -369,6 +370,7 @@ static int main_config_parser_cb(const char *path,
|
||||
if ((strcmp(path, "quorum.two_node") == 0) ||
|
||||
(strcmp(path, "quorum.wait_for_all") == 0) ||
|
||||
(strcmp(path, "quorum.auto_tie_breaker") == 0) ||
|
||||
(strcmp(path, "quorum.last_man_standing") == 0) ||
|
||||
(strcmp(path, "quorum.quorate") == 0)) {
|
||||
i = atoi(value);
|
||||
icmap_set_uint8(path, i);
|
||||
|
||||
@ -79,6 +79,7 @@
|
||||
#define DEFAULT_EXPECTED 1024
|
||||
#define DEFAULT_QDEV_POLL 10000
|
||||
#define DEFAULT_LEAVE_TMO 10000
|
||||
#define DEFAULT_LMS_WIN 10000
|
||||
|
||||
LOGSYS_DECLARE_SUBSYS ("VOTEQ");
|
||||
|
||||
@ -121,9 +122,14 @@ static int cluster_is_quorate;
|
||||
static int first_trans = 1;
|
||||
static unsigned int quorumdev_poll = DEFAULT_QDEV_POLL;
|
||||
static unsigned int leaving_timeout = DEFAULT_LEAVE_TMO;
|
||||
|
||||
static uint8_t wait_for_all = 0;
|
||||
static uint8_t auto_tie_breaker = 0;
|
||||
static int lowest_node_id = -1;
|
||||
static uint8_t last_man_standing = 0;
|
||||
static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
|
||||
static int last_man_standing_timer_set = 0;
|
||||
static corosync_timer_handle_t last_man_standing_timer;
|
||||
|
||||
static struct cluster_node *us;
|
||||
static struct cluster_node *quorum_device = NULL;
|
||||
@ -375,6 +381,8 @@ static void votequorum_init(struct corosync_api_v1 *api,
|
||||
|
||||
icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
|
||||
icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
|
||||
icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
|
||||
icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
|
||||
|
||||
/*
|
||||
* TODO: we need to know the lowest node-id in the cluster
|
||||
@ -875,6 +883,16 @@ static int quorum_exec_send_reconfigure(int param, int nodeid, int value)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void lms_timer_fn(void *arg)
|
||||
{
|
||||
ENTER();
|
||||
last_man_standing_timer_set = 0;
|
||||
if (cluster_is_quorate) {
|
||||
recalculate_quorum(1,1);
|
||||
}
|
||||
LEAVE();
|
||||
}
|
||||
|
||||
static void quorum_confchg_fn (
|
||||
enum totem_configuration_type configuration_type,
|
||||
const unsigned int *member_list, size_t member_list_entries,
|
||||
@ -902,6 +920,17 @@ static void quorum_confchg_fn (
|
||||
}
|
||||
}
|
||||
|
||||
if (last_man_standing) {
|
||||
if ((member_list_entries >= quorum) && (left_list_entries)) {
|
||||
if (last_man_standing_timer_set) {
|
||||
corosync_api->timer_delete(last_man_standing_timer);
|
||||
last_man_standing_timer_set = 0;
|
||||
}
|
||||
corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000, NULL, lms_timer_fn, &last_man_standing_timer);
|
||||
last_man_standing_timer_set = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (member_list_entries) {
|
||||
memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
|
||||
quorum_members_entries = member_list_entries;
|
||||
@ -975,6 +1004,16 @@ static void message_handler_req_exec_votequorum_nodeinfo (
|
||||
|
||||
log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d\n", req_exec_quorum_nodeinfo->votes, req_exec_quorum_nodeinfo->expected_votes, req_exec_quorum_nodeinfo->wait_for_all);
|
||||
|
||||
if ((last_man_standing) && (req_exec_quorum_nodeinfo->votes > 1)) {
|
||||
log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
|
||||
"cluster nodes votes are set to 1. Disabling LMS.");
|
||||
last_man_standing = 0;
|
||||
if (last_man_standing_timer_set) {
|
||||
corosync_api->timer_delete(last_man_standing_timer);
|
||||
last_man_standing_timer_set = 0;
|
||||
}
|
||||
}
|
||||
|
||||
node->flags &= ~NODE_FLAGS_BEENDOWN;
|
||||
|
||||
if (new_node || req_exec_quorum_nodeinfo->first_trans ||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user