mirror of
https://git.proxmox.com/git/mirror_corosync
synced 2025-10-17 17:13:14 +00:00
cpg: Process join list after downlists
let's say following situation will happen: - we have 3 nodes - on wire messages looks like D1,J1,D2,J2,D3,J3 (D is downlist, J is joinlist) - let's say, D1 and D3 contains node 2 - it means that J2 is applied, but right after that, D1 (or D3) is applied what means, node 2 is again considered down It's solved by collecting joinlists and apply them after downlist, so order is: - apply best matching downlist - apply all joinlists Signed-off-by: Jan Friesse <jfriesse@redhat.com> Reviewed-by: Steven Dake <sdake@redhat.com>
This commit is contained in:
parent
816d7687b0
commit
35446d6bcc
83
exec/cpg.c
83
exec/cpg.c
@ -143,6 +143,7 @@ enum cpg_downlist_state_e {
|
||||
};
|
||||
static enum cpg_downlist_state_e downlist_state;
|
||||
static struct list_head downlist_messages_head;
|
||||
static struct list_head joinlist_messages_head;
|
||||
|
||||
struct cpg_pd {
|
||||
void *conn;
|
||||
@ -285,6 +286,10 @@ static void downlist_messages_delete (void);
|
||||
|
||||
static void downlist_master_choose_and_send (void);
|
||||
|
||||
static void joinlist_inform_clients (void);
|
||||
|
||||
static void joinlist_messages_delete (void);
|
||||
|
||||
static void cpg_sync_init (
|
||||
const unsigned int *trans_list,
|
||||
size_t trans_list_entries,
|
||||
@ -298,6 +303,12 @@ static void cpg_sync_activate (void);
|
||||
|
||||
static void cpg_sync_abort (void);
|
||||
|
||||
static void do_proc_join(
|
||||
const mar_cpg_name_t *name,
|
||||
uint32_t pid,
|
||||
unsigned int nodeid,
|
||||
int reason);
|
||||
|
||||
static int notify_lib_totem_membership (
|
||||
void *conn,
|
||||
int member_list_entries,
|
||||
@ -456,6 +467,13 @@ struct downlist_msg {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct joinlist_msg {
|
||||
mar_uint32_t sender_nodeid;
|
||||
uint32_t pid;
|
||||
mar_cpg_name_t group_name;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
|
||||
|
||||
static void cpg_sync_init (
|
||||
@ -527,8 +545,11 @@ static void cpg_sync_activate (void)
|
||||
downlist_master_choose_and_send ();
|
||||
}
|
||||
|
||||
joinlist_inform_clients ();
|
||||
|
||||
downlist_messages_delete ();
|
||||
downlist_state = CPG_DOWNLIST_NONE;
|
||||
joinlist_messages_delete ();
|
||||
|
||||
notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
|
||||
}
|
||||
@ -537,6 +558,7 @@ static void cpg_sync_abort (void)
|
||||
{
|
||||
downlist_state = CPG_DOWNLIST_NONE;
|
||||
downlist_messages_delete ();
|
||||
joinlist_messages_delete ();
|
||||
}
|
||||
|
||||
static int notify_lib_totem_membership (
|
||||
@ -865,6 +887,34 @@ static void downlist_master_choose_and_send (void)
|
||||
qb_map_destroy(group_map);
|
||||
}
|
||||
|
||||
static void joinlist_inform_clients (void)
|
||||
{
|
||||
struct joinlist_msg *stored_msg;
|
||||
struct list_head *iter;
|
||||
unsigned int i;
|
||||
|
||||
i = 0;
|
||||
for (iter = joinlist_messages_head.next;
|
||||
iter != &joinlist_messages_head;
|
||||
iter = iter->next) {
|
||||
|
||||
stored_msg = list_entry(iter, struct joinlist_msg, list);
|
||||
|
||||
log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%d, ip:%s, pid:%d",
|
||||
i++, stored_msg->group_name.value,
|
||||
(char*)api->totem_ifaces_print(stored_msg->sender_nodeid),
|
||||
stored_msg->pid);
|
||||
|
||||
/* Ignore our own messages */
|
||||
if (stored_msg->sender_nodeid == api->totem_nodeid_get()) {
|
||||
continue ;
|
||||
}
|
||||
|
||||
do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid,
|
||||
CONFCHG_CPG_REASON_NODEUP);
|
||||
}
|
||||
}
|
||||
|
||||
static void downlist_messages_delete (void)
|
||||
{
|
||||
struct downlist_msg *stored_msg;
|
||||
@ -882,6 +932,23 @@ static void downlist_messages_delete (void)
|
||||
}
|
||||
}
|
||||
|
||||
static void joinlist_messages_delete (void)
|
||||
{
|
||||
struct joinlist_msg *stored_msg;
|
||||
struct list_head *iter, *iter_next;
|
||||
|
||||
for (iter = joinlist_messages_head.next;
|
||||
iter != &joinlist_messages_head;
|
||||
iter = iter_next) {
|
||||
|
||||
iter_next = iter->next;
|
||||
|
||||
stored_msg = list_entry(iter, struct joinlist_msg, list);
|
||||
list_del (&stored_msg->list);
|
||||
free (stored_msg);
|
||||
}
|
||||
list_init (&joinlist_messages_head);
|
||||
}
|
||||
|
||||
static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
|
||||
{
|
||||
@ -889,6 +956,7 @@ static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
|
||||
logsys_subsys_init();
|
||||
#endif
|
||||
list_init (&downlist_messages_head);
|
||||
list_init (&joinlist_messages_head);
|
||||
api = corosync_api;
|
||||
return (NULL);
|
||||
}
|
||||
@ -1199,18 +1267,19 @@ static void message_handler_req_exec_cpg_joinlist (
|
||||
const char *message = message_v;
|
||||
const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message;
|
||||
const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header));
|
||||
struct joinlist_msg *stored_msg;
|
||||
|
||||
log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x",
|
||||
nodeid);
|
||||
|
||||
/* Ignore our own messages */
|
||||
if (nodeid == api->totem_nodeid_get()) {
|
||||
return;
|
||||
}
|
||||
|
||||
while ((const char*)jle < message + res->size) {
|
||||
do_proc_join (&jle->group_name, jle->pid, nodeid,
|
||||
CONFCHG_CPG_REASON_NODEUP);
|
||||
stored_msg = malloc (sizeof (struct joinlist_msg));
|
||||
memset(stored_msg, 0, sizeof (struct joinlist_msg));
|
||||
stored_msg->sender_nodeid = nodeid;
|
||||
stored_msg->pid = jle->pid;
|
||||
memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t));
|
||||
list_init (&stored_msg->list);
|
||||
list_add (&stored_msg->list, &joinlist_messages_head);
|
||||
jle++;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user