Display warning when not possible to form cluster

This may typically happen if local firewall is enabled. Patch adds new
item to statistics called continuous_gather where is number of
continuous entered gather state. If this number is bigger then
MAX_NO_CONT_GATHER, warning message is displayed. This is also used on
exiting, so stop of corosync is now possible even with enabled firewall.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Steven Dake <sdake@redhat.com>
This commit is contained in:
Jan Friesse 2010-12-02 14:35:00 +01:00
parent bafb69bf75
commit b9df4424b1
3 changed files with 36 additions and 0 deletions

View File

@ -198,8 +198,17 @@ void corosync_shutdown_request (void)
static void *corosync_exit_thread_handler (void *arg)
{
totempg_stats_t * stats;
sem_wait (&corosync_exit_sem);
stats = api->totem_get_stats();
if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
stats->mrp->srp->operational_entered == 0) {
unlink_all_completed ();
/* NOTREACHED */
}
corosync_service_unlink_all (api, unlink_all_completed);
return arg;
@ -626,6 +635,9 @@ static void corosync_totem_stats_updater (void *data)
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"rx_msg_dropped", strlen("rx_msg_dropped"),
&stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"continuous_gather", strlen("continuous_gather"),
&stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather));
total_mtt_rx_token = 0;
total_token_holdtime = 0;
@ -784,6 +796,9 @@ static void corosync_totem_stats_init (void)
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"rx_msg_dropped", &zero_64,
sizeof (zero_64), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"continuous_gather", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
}
/* start stats timer */

View File

@ -502,6 +502,7 @@ struct totemsrp_instance {
struct memb_commit_token *commit_token;
totemsrp_stats_t stats;
void * token_recv_event_handle;
void * token_sent_event_handle;
char commit_token_storage[9000];
@ -1789,6 +1790,8 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->stats.operational_entered++;
instance->stats.continuous_gather = 0;
instance->my_received_flg = 1;
reset_pause_timeout (instance);
@ -1853,6 +1856,15 @@ static void memb_state_gather_enter (
instance->memb_state = MEMB_STATE_GATHER;
instance->stats.gather_entered++;
instance->stats.continuous_gather++;
if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) {
log_printf (instance->totemsrp_log_level_warning,
"Totem is unable to form a cluster because of an "
"operating system or network fault. The most common "
"cause of this message is that the local firewall is "
"configured improperly.\n");
}
return;
}
@ -1897,6 +1909,7 @@ static void memb_state_commit_enter (
reset_token_timeout (instance); // REVIEWED
instance->stats.commit_entered++;
instance->stats.continuous_gather = 0;
/*
* reset all flow control variables since we are starting a new ring
@ -2093,6 +2106,8 @@ originated:
instance->memb_state = MEMB_STATE_RECOVERY;
instance->stats.recovery_entered++;
instance->stats.continuous_gather = 0;
return;
}

View File

@ -52,6 +52,11 @@
#define SEND_THREADS_MAX 16
#define INTERFACE_MAX 2
/*
* Maximum number of continuous gather states
*/
#define MAX_NO_CONT_GATHER 3
struct totem_interface {
struct totem_ip_address bindnet;
struct totem_ip_address boundto;
@ -250,6 +255,7 @@ typedef struct {
uint64_t recovery_token_lost;
uint64_t consensus_timeouts;
uint64_t rx_msg_dropped;
uint32_t continuous_gather;
int earliest_token;
int latest_token;