From b9df4424b1ea91b98c98c208c295f4323be3204d Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 2 Dec 2010 14:35:00 +0100 Subject: [PATCH] Display warning when not possible to form cluster This may typically happen if local firewall is enabled. Patch adds new item to statistics called continuous_gather where is number of continuous entered gather state. If this number is bigger then MAX_NO_CONT_GATHER, warning message is displayed. This is also used on exiting, so stop of corosync is now possible even with enabled firewall. Signed-off-by: Jan Friesse Reviewed-by: Steven Dake --- exec/main.c | 15 +++++++++++++++ exec/totemsrp.c | 15 +++++++++++++++ include/corosync/totem/totem.h | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/exec/main.c b/exec/main.c index cd6cb839..b04f503b 100644 --- a/exec/main.c +++ b/exec/main.c @@ -198,8 +198,17 @@ void corosync_shutdown_request (void) static void *corosync_exit_thread_handler (void *arg) { + totempg_stats_t * stats; + sem_wait (&corosync_exit_sem); + stats = api->totem_get_stats(); + if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER || + stats->mrp->srp->operational_entered == 0) { + unlink_all_completed (); + /* NOTREACHED */ + } + corosync_service_unlink_all (api, unlink_all_completed); return arg; @@ -626,6 +635,9 @@ static void corosync_totem_stats_updater (void *data) objdb->object_key_replace (stats->mrp->srp->hdr.handle, "rx_msg_dropped", strlen("rx_msg_dropped"), &stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped)); + objdb->object_key_replace (stats->mrp->srp->hdr.handle, + "continuous_gather", strlen("continuous_gather"), + &stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather)); total_mtt_rx_token = 0; total_token_holdtime = 0; @@ -784,6 +796,9 @@ static void corosync_totem_stats_init (void) objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "rx_msg_dropped", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); + objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, + "continuous_gather", &zero_32, + sizeof (zero_32), OBJDB_VALUETYPE_UINT32); } /* start stats timer */ diff --git a/exec/totemsrp.c b/exec/totemsrp.c index f7a66383..c9ad3917 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -502,6 +502,7 @@ struct totemsrp_instance { struct memb_commit_token *commit_token; totemsrp_stats_t stats; + void * token_recv_event_handle; void * token_sent_event_handle; char commit_token_storage[9000]; @@ -1789,6 +1790,8 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) instance->memb_state = MEMB_STATE_OPERATIONAL; instance->stats.operational_entered++; + instance->stats.continuous_gather = 0; + instance->my_received_flg = 1; reset_pause_timeout (instance); @@ -1853,6 +1856,15 @@ static void memb_state_gather_enter ( instance->memb_state = MEMB_STATE_GATHER; instance->stats.gather_entered++; + instance->stats.continuous_gather++; + + if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) { + log_printf (instance->totemsrp_log_level_warning, + "Totem is unable to form a cluster because of an " + "operating system or network fault. The most common " + "cause of this message is that the local firewall is " + "configured improperly.\n"); + } return; } @@ -1897,6 +1909,7 @@ static void memb_state_commit_enter ( reset_token_timeout (instance); // REVIEWED instance->stats.commit_entered++; + instance->stats.continuous_gather = 0; /* * reset all flow control variables since we are starting a new ring @@ -2093,6 +2106,8 @@ originated: instance->memb_state = MEMB_STATE_RECOVERY; instance->stats.recovery_entered++; + instance->stats.continuous_gather = 0; + return; } diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index 4e2e475c..cf78e4c0 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -52,6 +52,11 @@ #define SEND_THREADS_MAX 16 #define INTERFACE_MAX 2 +/* + * Maximum number of continuous gather states + */ +#define MAX_NO_CONT_GATHER 3 + struct totem_interface { struct totem_ip_address bindnet; struct totem_ip_address boundto; @@ -250,6 +255,7 @@ typedef struct { uint64_t recovery_token_lost; uint64_t consensus_timeouts; uint64_t rx_msg_dropped; + uint32_t continuous_gather; int earliest_token; int latest_token;