mirror of
https://git.proxmox.com/git/mirror_corosync
synced 2025-07-15 06:17:01 +00:00

this change breaks onwire compatibility. cpg is the only user of sync_* interface and it's the only service that will require extra testing. Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com> Reviewed-by: Steven Dake <sdake@redhat.com> Reviewed-by: Jan Friesse <jfriesse@redhat.com>
708 lines
18 KiB
C
708 lines
18 KiB
C
/*
|
|
* Copyright (c) 2010-2012 Red Hat, Inc.
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Author: Angus Salkeld <asalkeld@redhat.com>
|
|
*
|
|
* This software licensed under BSD license, the text of which follows:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* - Neither the name of the MontaVista Software, Inc. nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/ioctl.h>
|
|
#include <linux/types.h>
|
|
#include <linux/watchdog.h>
|
|
#include <sys/reboot.h>
|
|
|
|
#include <corosync/corotypes.h>
|
|
#include <corosync/corodefs.h>
|
|
#include <corosync/coroapi.h>
|
|
#include <corosync/list.h>
|
|
#include <corosync/logsys.h>
|
|
#include <corosync/icmap.h>
|
|
#include "../exec/fsm.h"
|
|
|
|
#include "service.h"
|
|
|
|
typedef enum {
|
|
WD_RESOURCE_GOOD,
|
|
WD_RESOURCE_FAILED,
|
|
WD_RESOURCE_STATE_UNKNOWN,
|
|
WD_RESOURCE_NOT_MONITORED
|
|
} wd_resource_state_t;
|
|
|
|
struct resource {
|
|
char res_path[ICMAP_KEYNAME_MAXLEN];
|
|
char *recovery;
|
|
char name[CS_MAX_NAME_LENGTH];
|
|
time_t last_updated;
|
|
struct cs_fsm fsm;
|
|
|
|
corosync_timer_handle_t check_timer;
|
|
uint64_t check_timeout;
|
|
icmap_track_t icmap_track;
|
|
};
|
|
|
|
LOGSYS_DECLARE_SUBSYS("WD");
|
|
|
|
/*
|
|
* Service Interfaces required by service_message_handler struct
|
|
*/
|
|
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
|
|
static int wd_exec_exit_fn (void);
|
|
static void wd_resource_check_fn (void* resource_ref);
|
|
|
|
static struct corosync_api_v1 *api;
|
|
#define WD_DEFAULT_TIMEOUT_SEC 6
|
|
#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
|
|
#define WD_MIN_TIMEOUT_MS 500
|
|
#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
|
|
static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
|
|
static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
|
|
static int dog = -1;
|
|
static corosync_timer_handle_t wd_timer;
|
|
static int watchdog_ok = 1;
|
|
|
|
struct corosync_service_engine wd_service_engine = {
|
|
.name = "corosync watchdog service",
|
|
.id = WD_SERVICE,
|
|
.priority = 1,
|
|
.private_data_size = 0,
|
|
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
|
|
.lib_init_fn = NULL,
|
|
.lib_exit_fn = NULL,
|
|
.lib_engine = NULL,
|
|
.lib_engine_count = 0,
|
|
.exec_engine = NULL,
|
|
.exec_engine_count = 0,
|
|
.confchg_fn = NULL,
|
|
.exec_init_fn = wd_exec_init_fn,
|
|
.exec_exit_fn = wd_exec_exit_fn,
|
|
.exec_dump_fn = NULL
|
|
};
|
|
|
|
static DECLARE_LIST_INIT (confchg_notify);
|
|
|
|
/*
|
|
* F S M
|
|
*/
|
|
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
|
|
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
|
|
|
|
enum wd_resource_state {
|
|
WD_S_RUNNING,
|
|
WD_S_FAILED,
|
|
WD_S_STOPPED
|
|
};
|
|
|
|
enum wd_resource_event {
|
|
WD_E_FAILURE,
|
|
WD_E_CONFIG_CHANGED
|
|
};
|
|
|
|
const char * wd_running_str = "running";
|
|
const char * wd_failed_str = "failed";
|
|
const char * wd_failure_str = "failure";
|
|
const char * wd_stopped_str = "stopped";
|
|
const char * wd_config_changed_str = "config_changed";
|
|
|
|
struct cs_fsm_entry wd_fsm_table[] = {
|
|
{ WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
|
|
{ WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
|
|
{ WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
|
|
{ WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
|
|
{ WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
|
|
{ WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
|
|
};
|
|
|
|
struct corosync_service_engine *wd_get_service_engine_ver0 (void)
|
|
{
|
|
return (&wd_service_engine);
|
|
}
|
|
|
|
static const char * wd_res_state_to_str(struct cs_fsm* fsm,
|
|
int32_t state)
|
|
{
|
|
switch (state) {
|
|
case WD_S_STOPPED:
|
|
return wd_stopped_str;
|
|
break;
|
|
case WD_S_RUNNING:
|
|
return wd_running_str;
|
|
break;
|
|
case WD_S_FAILED:
|
|
return wd_failed_str;
|
|
break;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static const char * wd_res_event_to_str(struct cs_fsm* fsm,
|
|
int32_t event)
|
|
{
|
|
switch (event) {
|
|
case WD_E_CONFIG_CHANGED:
|
|
return wd_config_changed_str;
|
|
break;
|
|
case WD_E_FAILURE:
|
|
return wd_failure_str;
|
|
break;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* returns (CS_TRUE == OK, CS_FALSE == failed)
|
|
*/
|
|
static int32_t wd_resource_state_is_ok (struct resource *ref)
|
|
{
|
|
char* state;
|
|
uint64_t last_updated;
|
|
uint64_t my_time;
|
|
uint64_t allowed_period;
|
|
char key_name[ICMAP_KEYNAME_MAXLEN];
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
|
|
if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
return CS_FALSE;
|
|
}
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
|
|
if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
|
|
/* key does not exist.
|
|
*/
|
|
return CS_FALSE;
|
|
}
|
|
free (state);
|
|
|
|
if (last_updated == 0) {
|
|
/* initial value */
|
|
free(state);
|
|
return CS_TRUE;
|
|
}
|
|
|
|
my_time = cs_timestamp_get();
|
|
|
|
/*
|
|
* Here we check that the monitor has written a timestamp within the poll_period
|
|
* plus a grace factor of (0.5 * poll_period).
|
|
*/
|
|
allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
|
|
if ((last_updated + allowed_period) < my_time) {
|
|
log_printf (LOGSYS_LEVEL_ERROR,
|
|
"last_updated %"PRIu64" ms too late, period:%"PRIu64".",
|
|
(uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
|
|
ref->check_timeout);
|
|
free(state);
|
|
return CS_FALSE;
|
|
}
|
|
|
|
if (strcmp (state, wd_failed_str) == 0) {
|
|
free(state);
|
|
return CS_FALSE;
|
|
}
|
|
|
|
free(state);
|
|
return CS_TRUE;
|
|
}
|
|
|
|
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
|
|
{
|
|
char *state;
|
|
uint64_t tmp_value;
|
|
uint64_t next_timeout;
|
|
struct resource *ref = (struct resource*)data;
|
|
char key_name[ICMAP_KEYNAME_MAXLEN];
|
|
|
|
next_timeout = ref->check_timeout;
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
|
|
if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
|
|
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
|
|
log_printf (LOGSYS_LEVEL_DEBUG,
|
|
"poll_period changing from:%"PRIu64" to %"PRIu64".",
|
|
ref->check_timeout, tmp_value);
|
|
/*
|
|
* To easy in the transition between poll_period's we are going
|
|
* to make the first timeout the bigger of the new and old value.
|
|
* This is to give the monitoring system time to adjust.
|
|
*/
|
|
next_timeout = CS_MAX(tmp_value, ref->check_timeout);
|
|
ref->check_timeout = tmp_value;
|
|
} else {
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
|
|
tmp_value, ref->name);
|
|
}
|
|
}
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
|
|
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"resource %s missing a recovery key.", ref->name);
|
|
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
|
|
return;
|
|
}
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
|
|
if (icmap_get_string(key_name, &state) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"resource %s missing a state key.", ref->name);
|
|
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
|
|
return;
|
|
}
|
|
if (ref->check_timer) {
|
|
api->timer_delete(ref->check_timer);
|
|
ref->check_timer = 0;
|
|
}
|
|
|
|
if (strcmp(wd_stopped_str, state) == 0) {
|
|
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
|
|
} else {
|
|
api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
|
|
ref, wd_resource_check_fn, &ref->check_timer);
|
|
cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
|
|
}
|
|
free(state);
|
|
}
|
|
|
|
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
|
|
{
|
|
struct resource* ref = (struct resource*)data;
|
|
|
|
if (ref->check_timer) {
|
|
api->timer_delete(ref->check_timer);
|
|
ref->check_timer = 0;
|
|
}
|
|
|
|
log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
|
|
ref->recovery, (char*)ref->name);
|
|
if (strcmp (ref->recovery, "watchdog") == 0 ||
|
|
strcmp (ref->recovery, "quit") == 0) {
|
|
watchdog_ok = 0;
|
|
}
|
|
else if (strcmp (ref->recovery, "reboot") == 0) {
|
|
reboot(RB_AUTOBOOT);
|
|
}
|
|
else if (strcmp (ref->recovery, "shutdown") == 0) {
|
|
reboot(RB_POWER_OFF);
|
|
}
|
|
cs_fsm_state_set(fsm, WD_S_FAILED, data);
|
|
}
|
|
|
|
static void wd_key_changed(
|
|
int32_t event,
|
|
const char *key_name,
|
|
struct icmap_notify_value new_val,
|
|
struct icmap_notify_value old_val,
|
|
void *user_data)
|
|
{
|
|
struct resource* ref = (struct resource*)user_data;
|
|
char *last_key_part;
|
|
|
|
if (ref == NULL) {
|
|
return ;
|
|
}
|
|
|
|
last_key_part = strrchr(key_name, '.');
|
|
if (last_key_part == NULL) {
|
|
return ;
|
|
}
|
|
last_key_part++;
|
|
|
|
if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
|
|
if (strcmp(last_key_part, "last_updated") == 0 ||
|
|
strcmp(last_key_part, "current") == 0) {
|
|
return;
|
|
}
|
|
|
|
cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref);
|
|
}
|
|
|
|
if (event == ICMAP_TRACK_DELETE && ref != NULL) {
|
|
if (strcmp(last_key_part, "state") != 0) {
|
|
return ;
|
|
}
|
|
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"resource \"%s\" deleted from cmap!",
|
|
ref->name);
|
|
|
|
api->timer_delete(ref->check_timer);
|
|
ref->check_timer = 0;
|
|
icmap_track_delete(ref->icmap_track);
|
|
|
|
free(ref);
|
|
}
|
|
}
|
|
|
|
static void wd_resource_check_fn (void* resource_ref)
|
|
{
|
|
struct resource* ref = (struct resource*)resource_ref;
|
|
|
|
if (wd_resource_state_is_ok (ref) == CS_FALSE) {
|
|
cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref);
|
|
return;
|
|
}
|
|
api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS,
|
|
ref, wd_resource_check_fn, &ref->check_timer);
|
|
}
|
|
|
|
/*
|
|
* return 0 - fully configured
|
|
* return -1 - partially configured
|
|
*/
|
|
static int32_t wd_resource_create (char *res_path, char *res_name)
|
|
{
|
|
char *state;
|
|
uint64_t tmp_value;
|
|
struct resource *ref = calloc (1, sizeof (struct resource));
|
|
char key_name[ICMAP_KEYNAME_MAXLEN];
|
|
|
|
strcpy(ref->res_path, res_path);
|
|
ref->check_timeout = WD_DEFAULT_TIMEOUT_MS;
|
|
ref->check_timer = 0;
|
|
|
|
strcpy(ref->name, res_name);
|
|
ref->fsm.name = ref->name;
|
|
ref->fsm.table = wd_fsm_table;
|
|
ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
|
|
ref->fsm.curr_entry = 0;
|
|
ref->fsm.curr_state = WD_S_STOPPED;
|
|
ref->fsm.state_to_str = wd_res_state_to_str;
|
|
ref->fsm.event_to_str = wd_res_event_to_str;
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
|
|
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
|
|
icmap_set_uint64(key_name, ref->check_timeout);
|
|
} else {
|
|
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
|
|
ref->check_timeout = tmp_value;
|
|
} else {
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
|
|
tmp_value, ref->name);
|
|
}
|
|
}
|
|
|
|
icmap_track_add(res_path,
|
|
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
|
|
wd_key_changed,
|
|
ref, &ref->icmap_track);
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
|
|
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"resource %s missing a recovery key.", ref->name);
|
|
return -1;
|
|
}
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
|
|
if (icmap_get_string(key_name, &state) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"resource %s missing a state key.", ref->name);
|
|
return -1;
|
|
}
|
|
|
|
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
|
|
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
|
|
/* key does not exist.
|
|
*/
|
|
ref->last_updated = 0;
|
|
} else {
|
|
ref->last_updated = tmp_value;
|
|
}
|
|
|
|
/*
|
|
* delay the first check to give the monitor time to start working.
|
|
*/
|
|
tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
|
|
api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
|
|
ref,
|
|
wd_resource_check_fn, &ref->check_timer);
|
|
|
|
cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void wd_tickle_fn (void* arg)
|
|
{
|
|
ENTER();
|
|
|
|
if (watchdog_ok) {
|
|
if (dog > 0) {
|
|
ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
|
|
}
|
|
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
|
|
wd_tickle_fn, &wd_timer);
|
|
}
|
|
else {
|
|
log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
|
|
}
|
|
|
|
}
|
|
|
|
static void wd_resource_created_cb(
|
|
int32_t event,
|
|
const char *key_name,
|
|
struct icmap_notify_value new_val,
|
|
struct icmap_notify_value old_val,
|
|
void *user_data)
|
|
{
|
|
char res_name[ICMAP_KEYNAME_MAXLEN];
|
|
char res_type[ICMAP_KEYNAME_MAXLEN];
|
|
char tmp_key[ICMAP_KEYNAME_MAXLEN];
|
|
int res;
|
|
|
|
if (event != ICMAP_TRACK_ADD) {
|
|
return ;
|
|
}
|
|
|
|
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
|
|
if (res != 3) {
|
|
return ;
|
|
}
|
|
|
|
if (strcmp(tmp_key, "state") != 0) {
|
|
return ;
|
|
}
|
|
|
|
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
|
|
wd_resource_create (tmp_key, res_name);
|
|
}
|
|
|
|
static void wd_scan_resources (void)
|
|
{
|
|
int res_count = 0;
|
|
icmap_track_t icmap_track = NULL;
|
|
icmap_iter_t iter;
|
|
const char *key_name;
|
|
int res;
|
|
char res_name[ICMAP_KEYNAME_MAXLEN];
|
|
char res_type[ICMAP_KEYNAME_MAXLEN];
|
|
char tmp_key[ICMAP_KEYNAME_MAXLEN];
|
|
|
|
ENTER();
|
|
|
|
iter = icmap_iter_init("resources.");
|
|
while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
|
|
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
|
|
if (res != 3) {
|
|
continue ;
|
|
}
|
|
|
|
if (strcmp(tmp_key, "state") != 0) {
|
|
continue ;
|
|
}
|
|
|
|
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
|
|
if (wd_resource_create (tmp_key, res_name) == 0) {
|
|
res_count++;
|
|
}
|
|
}
|
|
icmap_iter_finalize(iter);
|
|
|
|
icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
|
|
wd_resource_created_cb, NULL, &icmap_track);
|
|
icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
|
|
wd_resource_created_cb, NULL, &icmap_track);
|
|
|
|
if (res_count == 0) {
|
|
log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
|
|
}
|
|
}
|
|
|
|
|
|
static void watchdog_timeout_apply (uint32_t new)
|
|
{
|
|
struct watchdog_info ident;
|
|
uint32_t original_timeout = watchdog_timeout;
|
|
|
|
if (new == original_timeout) {
|
|
return;
|
|
}
|
|
|
|
watchdog_timeout = new;
|
|
|
|
if (dog > 0) {
|
|
ioctl(dog, WDIOC_GETSUPPORT, &ident);
|
|
if (ident.options & WDIOF_SETTIMEOUT) {
|
|
/* yay! the dog is trained.
|
|
*/
|
|
ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
|
|
}
|
|
ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
|
|
}
|
|
|
|
if (watchdog_timeout == new) {
|
|
tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
|
|
|
|
/* reset the tickle timer in case it was reduced.
|
|
*/
|
|
api->timer_delete (wd_timer);
|
|
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
|
|
wd_tickle_fn, &wd_timer);
|
|
|
|
log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
|
|
log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
|
|
} else {
|
|
log_printf (LOGSYS_LEVEL_WARNING,
|
|
"Could not change the Watchdog timeout from %d to %d seconds",
|
|
original_timeout, new);
|
|
}
|
|
|
|
}
|
|
|
|
static int setup_watchdog(void)
|
|
{
|
|
struct watchdog_info ident;
|
|
|
|
ENTER();
|
|
if (access ("/dev/watchdog", W_OK) != 0) {
|
|
log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>");
|
|
dog = -1;
|
|
return -1;
|
|
}
|
|
|
|
/* here goes, lets hope they have "Magic Close"
|
|
*/
|
|
dog = open("/dev/watchdog", O_WRONLY);
|
|
|
|
if (dog == -1) {
|
|
log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened.");
|
|
dog = -1;
|
|
return -1;
|
|
}
|
|
|
|
/* Right we have the dog.
|
|
* Lets see what breed it is.
|
|
*/
|
|
|
|
ioctl(dog, WDIOC_GETSUPPORT, &ident);
|
|
log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync.");
|
|
log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
|
|
|
|
watchdog_timeout_apply (watchdog_timeout);
|
|
|
|
ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void wd_top_level_key_changed(
|
|
int32_t event,
|
|
const char *key_name,
|
|
struct icmap_notify_value new_val,
|
|
struct icmap_notify_value old_val,
|
|
void *user_data)
|
|
{
|
|
uint32_t tmp_value_32;
|
|
|
|
ENTER();
|
|
|
|
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
|
|
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
|
|
watchdog_timeout_apply (tmp_value_32);
|
|
}
|
|
}
|
|
else {
|
|
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
|
|
}
|
|
}
|
|
|
|
static void watchdog_timeout_get_initial (void)
|
|
{
|
|
uint32_t tmp_value_32;
|
|
icmap_track_t icmap_track = NULL;
|
|
|
|
ENTER();
|
|
|
|
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
|
|
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
|
|
|
|
icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
|
|
}
|
|
else {
|
|
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
|
|
watchdog_timeout_apply (tmp_value_32);
|
|
} else {
|
|
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
|
|
}
|
|
}
|
|
|
|
icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
|
|
wd_top_level_key_changed, NULL, &icmap_track);
|
|
|
|
}
|
|
|
|
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
|
|
{
|
|
|
|
ENTER();
|
|
#ifdef COROSYNC_SOLARIS
|
|
logsys_subsys_init();
|
|
#endif
|
|
api = corosync_api;
|
|
|
|
watchdog_timeout_get_initial();
|
|
|
|
setup_watchdog();
|
|
|
|
wd_scan_resources();
|
|
|
|
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
|
|
wd_tickle_fn, &wd_timer);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static int wd_exec_exit_fn (void)
|
|
{
|
|
char magic = 'V';
|
|
ENTER();
|
|
|
|
if (dog > 0) {
|
|
log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
|
|
write (dog, &magic, 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|