SAM Confdb integration

Patch add support for Confdb integration with SAM. It's now possible to
use SAM_RECOVERY_POLICY_CONFDB as flag to previous policies.
    
Also new function sam_mark_failed is added for ability to use RECOVERY
policy together with confdb and get expected results (specially with
integration with corosync watchdog)

Patch also makes SAM thread safe.


git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@3050 fd59a12c-fef9-0310-b244-a6a79926bd2f
This commit is contained in:
Jan Friesse 2010-09-27 07:34:21 +00:00
parent 04dcca855b
commit 1a32fc4a6c
10 changed files with 1215 additions and 76 deletions

View File

@ -66,7 +66,7 @@ confdb_test_agent_LDADD = -lconfdb -lcoroipcc ../../exec/coropoll.o
confdb_test_agent_LDFLAGS = -L../../lib
sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c
sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc ../../exec/coropoll.o
sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc -lconfdb ../../exec/coropoll.o
sam_test_agent_LDFLAGS = -L../../lib
votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c

View File

@ -46,6 +46,7 @@ typedef enum {
SAM_RECOVERY_POLICY_QUORUM = 0x08,
SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
SAM_RECOVERY_POLICY_CONFDB = 0x10,
} sam_recovery_policy_t;
/*
@ -205,6 +206,18 @@ cs_error_t sam_data_store (
const void *data,
size_t size);
/*
* Marks child as failed. This can be called only with SAM_RECOVERY_POLICY_CONFDB flag set and
* makes sense only for SAM_RECOVERY_POLICY_RESTART. This will kill child without sending warn
* signal. Confdb state key will be set to failed.
*
* - CS_OK in case no problem appeared
* - CS_ERR_BAD_HANDLE library was not initialized or was already finalized
* - CS_ERR_INVALID_PARAM recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
* - CS_ERR_LIBRARY if some internal error appeared (communication with parent
* process)
*/
cs_error_t sam_mark_failed (void);
#ifdef __cplusplus
}

View File

@ -62,7 +62,7 @@ libvotequorum_a_SOURCES = votequorum.c
libconfdb_a_SOURCES = confdb.c sa-confdb.c
libconfdb_a_LIBADD = ../lcr/lcr_ifact.o
CONFDB_LINKER_ADD = $(OS_DYFLAGS) $(OS_LDL)
SAM_LINKER_ADD = -L. -lquorum
SAM_LINKER_ADD = -L. -lquorum -lconfdb
libcoroipcc_a_SOURCES = coroipcc.c
libsam_a_SOURCES = sam.c

View File

@ -1 +1 @@
4.3.0
4.4.0

514
lib/sam.c
View File

@ -42,6 +42,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
@ -50,6 +51,7 @@
#include <corosync/coroipc_types.h>
#include <corosync/coroipcc.h>
#include <corosync/corodefs.h>
#include <corosync/confdb.h>
#include <corosync/hdb.h>
#include <corosync/quorum.h>
@ -61,6 +63,15 @@
#include <sys/wait.h>
#include <signal.h>
#define SAM_CONFDB_S_FAILED "failed"
#define SAM_CONFDB_S_REGISTERED "registered"
#define SAM_CONFDB_S_STARTED "started"
#define SAM_CONFDB_S_Q_WAIT "waiting for quorum"
#define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM))
#define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CONFDB))
#define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CONFDB)))
enum sam_internal_status_t {
SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
SAM_INTERNAL_STATUS_INITIALIZED,
@ -75,6 +86,7 @@ enum sam_command_t {
SAM_COMMAND_HB,
SAM_COMMAND_DATA_STORE,
SAM_COMMAND_WARN_SIGNAL_SET,
SAM_COMMAND_MARK_FAILED,
};
enum sam_reply_t {
@ -89,6 +101,13 @@ enum sam_parent_action_t {
SAM_PARENT_ACTION_CONTINUE
};
enum sam_confdb_key_t {
SAM_CONFDB_KEY_RECOVERY,
SAM_CONFDB_KEY_HC_PERIOD,
SAM_CONFDB_KEY_LAST_HC,
SAM_CONFDB_KEY_STATE,
};
static struct {
int time_interval;
sam_recovery_policy_t recovery_policy;
@ -109,11 +128,156 @@ static struct {
size_t user_data_size;
size_t user_data_allocated;
pthread_mutex_t lock;
quorum_handle_t quorum_handle;
uint32_t quorate;
int quorum_fd;
confdb_handle_t confdb_handle;
hdb_handle_t confdb_pid_handle;
} sam_internal_data;
extern const char *__progname;
static cs_error_t sam_confdb_update_key (enum sam_confdb_key_t key, const char *value)
{
cs_error_t err;
const char *svalue;
uint64_t hc_period, last_hc;
struct timeval tv;
const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", [SAM_RECOVERY_POLICY_RESTART] = "restart" };
switch (key) {
case SAM_CONFDB_KEY_RECOVERY:
svalue = ssvalue[SAM_RP_MASK (sam_internal_data.recovery_policy)];
if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
"recovery", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
goto exit_error;
}
break;
case SAM_CONFDB_KEY_HC_PERIOD:
hc_period = sam_internal_data.time_interval;
if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
"hc_period", &hc_period, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
goto exit_error;
}
break;
case SAM_CONFDB_KEY_LAST_HC:
if (gettimeofday (&tv, NULL) == -1) {
last_hc = 0;
} else {
last_hc = ((uint64_t)tv.tv_sec * 1000) + ((uint64_t)tv.tv_usec / 1000);
}
if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
"hc_last", &last_hc, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
goto exit_error;
}
break;
case SAM_CONFDB_KEY_STATE:
svalue = value;
if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
"state", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
goto exit_error;
}
break;
}
return (CS_OK);
exit_error:
return (err);
}
static cs_error_t sam_confdb_destroy_pid_obj (void)
{
return (confdb_object_destroy (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle));
}
static cs_error_t sam_confdb_register (void)
{
const char *obj_name;
cs_error_t err;
confdb_handle_t confdb_handle;
hdb_handle_t resource_handle, process_handle, pid_handle, obj_handle;
hdb_handle_t *res_handle;
char tmp_obj[PATH_MAX];
int i;
if ((err = confdb_initialize (&confdb_handle, NULL)) != CS_OK) {
return (err);
}
for (i = 0; i < 3; i++) {
switch (i) {
case 0:
obj_name = "resources";
obj_handle = OBJECT_PARENT_HANDLE;
res_handle = &resource_handle;
break;
case 1:
obj_name = "process";
obj_handle = resource_handle;
res_handle = &process_handle;
break;
case 2:
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, getpid ()) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", getpid ());
}
obj_name = tmp_obj;
obj_handle = process_handle;
res_handle = &pid_handle;
break;
}
if ((err = confdb_object_find_start (confdb_handle, obj_handle)) != CS_OK) {
goto finalize_error;
}
if ((err = confdb_object_find (confdb_handle, obj_handle, obj_name, strlen (obj_name),
res_handle)) != CS_OK) {
if (err == CONFDB_ERR_ACCESS) {
/*
* Try to create object
*/
if ((err = confdb_object_create (confdb_handle, obj_handle, obj_name,
strlen (obj_name), res_handle)) != CS_OK) {
goto finalize_error;
}
} else {
goto finalize_error;
}
} else {
if ((err = confdb_object_find_destroy (confdb_handle, obj_handle)) != CS_OK) {
goto finalize_error;
}
}
}
sam_internal_data.confdb_pid_handle = pid_handle;
sam_internal_data.confdb_handle = confdb_handle;
if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY, NULL)) != CS_OK) {
goto destroy_finalize_error;
}
if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD, NULL)) != CS_OK) {
goto destroy_finalize_error;
}
return (CS_OK);
destroy_finalize_error:
sam_confdb_destroy_pid_obj ();
finalize_error:
confdb_finalize (confdb_handle);
return (err);
}
static void quorum_notification_fn (
quorum_handle_t handle,
uint32_t quorate,
@ -135,8 +299,8 @@ cs_error_t sam_initialize (
return (CS_ERR_BAD_HANDLE);
}
if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART &&
recovery_policy != SAM_RECOVERY_POLICY_QUORUM_QUIT && recovery_policy != SAM_RECOVERY_POLICY_QUORUM_RESTART) {
if (SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_QUIT &&
SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_RESTART) {
return (CS_ERR_INVALID_PARAM);
}
@ -178,6 +342,8 @@ cs_error_t sam_initialize (
sam_internal_data.user_data_size = 0;
sam_internal_data.user_data_allocated = 0;
pthread_mutex_init (&sam_internal_data.lock, NULL);
return (CS_OK);
exit_error_quorum:
@ -290,8 +456,12 @@ cs_error_t sam_data_getsize (size_t *size)
return (CS_ERR_BAD_HANDLE);
}
pthread_mutex_lock (&sam_internal_data.lock);
*size = sam_internal_data.user_data_size;
pthread_mutex_unlock (&sam_internal_data.lock);
return (CS_OK);
}
@ -299,6 +469,10 @@ cs_error_t sam_data_restore (
void *data,
size_t size)
{
cs_error_t err;
err = CS_OK;
if (data == NULL) {
return (CS_ERR_INVALID_PARAM);
}
@ -310,17 +484,30 @@ cs_error_t sam_data_restore (
return (CS_ERR_BAD_HANDLE);
}
pthread_mutex_lock (&sam_internal_data.lock);
if (sam_internal_data.user_data_size == 0) {
return (CS_OK);
err = CS_OK;
goto error_unlock;
}
if (size < sam_internal_data.user_data_size) {
return (CS_ERR_INVALID_PARAM);
err = CS_ERR_INVALID_PARAM;
goto error_unlock;
}
memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size);
pthread_mutex_unlock (&sam_internal_data.lock);
return (CS_OK);
error_unlock:
pthread_mutex_unlock (&sam_internal_data.lock);
return (err);
}
cs_error_t sam_data_store (
@ -343,28 +530,36 @@ cs_error_t sam_data_store (
size = 0;
}
pthread_mutex_lock (&sam_internal_data.lock);
if (sam_internal_data.am_i_child) {
/*
* We are child so we must send data to parent
*/
command = SAM_COMMAND_DATA_STORE;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
return (CS_ERR_LIBRARY);
err = CS_ERR_LIBRARY;
goto error_unlock;
}
if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) != sizeof (size)) {
return (CS_ERR_LIBRARY);
err = CS_ERR_LIBRARY;
goto error_unlock;
}
if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) {
return (CS_ERR_LIBRARY);
err = CS_ERR_LIBRARY;
goto error_unlock;
}
/*
* And wait for reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
return (err);
goto error_unlock;
}
}
@ -379,7 +574,9 @@ cs_error_t sam_data_store (
} else {
if (sam_internal_data.user_data_allocated < size) {
if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) {
return (CS_ERR_NO_MEMORY);
err = CS_ERR_NO_MEMORY;
goto error_unlock;
}
sam_internal_data.user_data_allocated = size;
@ -392,30 +589,53 @@ cs_error_t sam_data_store (
memcpy (sam_internal_data.user_data, data, size);
}
pthread_mutex_unlock (&sam_internal_data.lock);
return (CS_OK);
error_unlock:
pthread_mutex_unlock (&sam_internal_data.lock);
return (err);
}
cs_error_t sam_start (void)
{
char command;
cs_error_t err;
sam_recovery_policy_t recpol;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
return (CS_ERR_BAD_HANDLE);
}
recpol = sam_internal_data.recovery_policy;
if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
pthread_mutex_lock (&sam_internal_data.lock);
}
command = SAM_COMMAND_START;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
return (CS_ERR_LIBRARY);
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
pthread_mutex_unlock (&sam_internal_data.lock);
}
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
return (CS_ERR_LIBRARY);
}
if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
/*
* Wait for parent reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
pthread_mutex_unlock (&sam_internal_data.lock);
return (err);
}
pthread_mutex_unlock (&sam_internal_data.lock);
}
if (sam_internal_data.hc_callback)
@ -430,6 +650,7 @@ cs_error_t sam_start (void)
cs_error_t sam_stop (void)
{
char command;
cs_error_t err;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
return (CS_ERR_BAD_HANDLE);
@ -437,8 +658,30 @@ cs_error_t sam_stop (void)
command = SAM_COMMAND_STOP;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
pthread_mutex_lock (&sam_internal_data.lock);
}
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
pthread_mutex_unlock (&sam_internal_data.lock);
}
return (CS_ERR_LIBRARY);
}
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
/*
* Wait for parent reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
pthread_mutex_unlock (&sam_internal_data.lock);
return (err);
}
pthread_mutex_unlock (&sam_internal_data.lock);
}
if (sam_internal_data.hc_callback)
if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) != sizeof (command))
@ -489,6 +732,26 @@ exit_error:
return (CS_OK);
}
cs_error_t sam_mark_failed (void)
{
char command;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED &&
sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
return (CS_ERR_BAD_HANDLE);
}
if (!(sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB)) {
return (CS_ERR_INVALID_PARAM);
}
command = SAM_COMMAND_MARK_FAILED;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
return (CS_ERR_LIBRARY);
return (CS_OK);
}
cs_error_t sam_warn_signal_set (int warn_signal)
{
@ -501,25 +764,31 @@ cs_error_t sam_warn_signal_set (int warn_signal)
return (CS_ERR_BAD_HANDLE);
}
pthread_mutex_lock (&sam_internal_data.lock);
if (sam_internal_data.am_i_child) {
/*
* We are child so we must send data to parent
*/
command = SAM_COMMAND_WARN_SIGNAL_SET;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
return (CS_ERR_LIBRARY);
err = CS_ERR_LIBRARY;
goto error_unlock;
}
if (sam_safe_write (sam_internal_data.child_fd_out, &warn_signal, sizeof (warn_signal)) !=
sizeof (warn_signal)) {
return (CS_ERR_LIBRARY);
err = CS_ERR_LIBRARY;
goto error_unlock;
}
/*
* And wait for reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
return (err);
goto error_unlock;
}
}
@ -528,14 +797,51 @@ cs_error_t sam_warn_signal_set (int warn_signal)
*/
sam_internal_data.warn_signal = warn_signal;
pthread_mutex_unlock (&sam_internal_data.lock);
return (CS_OK);
error_unlock:
pthread_mutex_unlock (&sam_internal_data.lock);
return (err);
}
static cs_error_t sam_parent_reply_send (
cs_error_t err,
int parent_fd_in,
int parent_fd_out)
{
char reply;
if (err == CS_OK) {
reply = SAM_REPLY_OK;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
err = CS_ERR_LIBRARY;
goto error_reply;
}
return (CS_OK);
}
error_reply:
reply = SAM_REPLY_ERROR;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
return (CS_ERR_LIBRARY);
}
if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
return (CS_ERR_LIBRARY);
}
return (err);
}
static cs_error_t sam_parent_warn_signal_set (
int parent_fd_in,
int parent_fd_out)
{
char reply;
char *user_data;
int warn_signal;
cs_error_t err;
@ -553,35 +859,27 @@ static cs_error_t sam_parent_warn_signal_set (
goto error_reply;
}
reply = SAM_REPLY_OK;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
err = CS_ERR_LIBRARY;
goto error_reply;
}
return (CS_OK);
return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
error_reply:
reply = SAM_REPLY_ERROR;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
return (CS_ERR_LIBRARY);
}
if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
return (CS_ERR_LIBRARY);
}
return (err);
return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static cs_error_t sam_parent_wait_for_quorum (
int parent_fd_in,
int parent_fd_out)
{
char reply;
cs_error_t err;
struct pollfd pfds[2];
int poll_err;
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_Q_WAIT)) != CS_OK) {
goto error_reply;
}
}
/*
* Update current quorum
*/
@ -630,24 +928,44 @@ static cs_error_t sam_parent_wait_for_quorum (
}
}
reply = SAM_REPLY_OK;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
err = CS_ERR_LIBRARY;
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_STARTED)) != CS_OK) {
goto error_reply;
}
}
return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
error_reply:
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED);
}
return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static cs_error_t sam_parent_confdb_state_set (
int parent_fd_in,
int parent_fd_out,
int state)
{
cs_error_t err;
const char *state_s;
if (state == 1) {
state_s = SAM_CONFDB_S_STARTED;
} else {
state_s = SAM_CONFDB_S_REGISTERED;
}
if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, state_s)) != CS_OK) {
goto error_reply;
}
return (CS_OK);
return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
error_reply:
reply = SAM_REPLY_ERROR;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
return (CS_ERR_LIBRARY);
}
if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
return (CS_ERR_LIBRARY);
}
return (err);
return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static cs_error_t sam_parent_kill_child (
@ -675,12 +993,26 @@ static cs_error_t sam_parent_kill_child (
return (CS_OK);
}
static cs_error_t sam_parent_mark_child_failed (
int *action,
pid_t child_pid)
{
sam_recovery_policy_t recpol;
recpol = sam_internal_data.recovery_policy;
sam_internal_data.term_send = 1;
sam_internal_data.recovery_policy = SAM_RECOVERY_POLICY_QUIT |
(SAM_RP_MASK_C (recpol) ? SAM_RECOVERY_POLICY_CONFDB : 0) |
(SAM_RP_MASK_Q (recpol) ? SAM_RECOVERY_POLICY_QUORUM : 0);
return (sam_parent_kill_child (action, child_pid));
}
static cs_error_t sam_parent_data_store (
int parent_fd_in,
int parent_fd_out)
{
char reply;
char *user_data;
ssize_t size;
cs_error_t err;
@ -711,28 +1043,14 @@ static cs_error_t sam_parent_data_store (
goto free_error_reply;
}
reply = SAM_REPLY_OK;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
err = CS_ERR_LIBRARY;
goto free_error_reply;
}
free (user_data);
return (CS_OK);
return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
free_error_reply:
free (user_data);
error_reply:
reply = SAM_REPLY_ERROR;
if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
return (CS_ERR_LIBRARY);
}
if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
return (CS_ERR_LIBRARY);
}
return (err);
return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static enum sam_parent_action_t sam_parent_handler (
@ -749,10 +1067,12 @@ static enum sam_parent_action_t sam_parent_handler (
struct pollfd pfds[2];
nfds_t nfds;
cs_error_t err;
sam_recovery_policy_t recpol;
status = 0;
action = SAM_PARENT_ACTION_CONTINUE;
recpol = sam_internal_data.recovery_policy;
while (action == SAM_PARENT_ACTION_CONTINUE) {
pfds[0].fd = parent_fd_in;
@ -766,7 +1086,7 @@ static enum sam_parent_action_t sam_parent_handler (
time_interval = -1;
}
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
pfds[nfds].fd = sam_internal_data.quorum_fd;
pfds[nfds].events = POLLIN;
pfds[nfds].revents = 0;
@ -820,6 +1140,10 @@ static enum sam_parent_action_t sam_parent_handler (
goto action_exit;
}
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
sam_confdb_update_key (SAM_CONFDB_KEY_LAST_HC, NULL);
}
/*
* We have read command
*/
@ -829,13 +1153,20 @@ static enum sam_parent_action_t sam_parent_handler (
/*
* Not started yet
*/
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
if (sam_parent_wait_for_quorum (parent_fd_in,
parent_fd_out) != CS_OK) {
continue;
}
}
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
if (sam_parent_confdb_state_set (parent_fd_in,
parent_fd_out, 1) != CS_OK) {
continue;
}
}
status = 1;
}
break;
@ -844,6 +1175,13 @@ static enum sam_parent_action_t sam_parent_handler (
/*
* Started
*/
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
if (sam_parent_confdb_state_set (parent_fd_in,
parent_fd_out, 0) != CS_OK) {
continue;
}
}
status = 0;
}
break;
@ -853,6 +1191,10 @@ static enum sam_parent_action_t sam_parent_handler (
case SAM_COMMAND_WARN_SIGNAL_SET:
sam_parent_warn_signal_set (parent_fd_in, parent_fd_out);
break;
case SAM_COMMAND_MARK_FAILED:
status = 1;
sam_parent_mark_child_failed (&action, child_pid);
break;
}
} /* if (pfds[0].revents != 0) */
@ -882,13 +1224,25 @@ cs_error_t sam_register (
pid_t pid;
int pipe_error;
int pipe_fd_out[2], pipe_fd_in[2];
enum sam_parent_action_t action;
enum sam_parent_action_t action, old_action;
int child_status;
sam_recovery_policy_t recpol;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) {
return (CS_ERR_BAD_HANDLE);
}
recpol = sam_internal_data.recovery_policy;
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
/*
* Register to objdb
*/
if ((error = sam_confdb_register ()) != CS_OK) {
goto error_exit;
}
}
error = CS_OK;
while (1) {
@ -905,6 +1259,12 @@ cs_error_t sam_register (
goto error_exit;
}
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
if ((error = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED)) != CS_OK) {
goto error_exit;
}
}
sam_internal_data.instance_id++;
sam_internal_data.term_send = 0;
@ -937,6 +1297,8 @@ cs_error_t sam_register (
sam_internal_data.am_i_child = 1;
sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
pthread_mutex_init (&sam_internal_data.lock, NULL);
goto error_exit;
} else {
/*
@ -961,20 +1323,34 @@ cs_error_t sam_register (
while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
;
old_action = action;
if (action == SAM_PARENT_ACTION_RECOVERY) {
if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT ||
sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUORUM_QUIT)
if (SAM_RP_MASK (sam_internal_data.recovery_policy) == SAM_RECOVERY_POLICY_QUIT)
action = SAM_PARENT_ACTION_QUIT;
}
if (action == SAM_PARENT_ACTION_QUIT) {
if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
quorum_finalize (sam_internal_data.quorum_handle);
}
if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
if (old_action == SAM_PARENT_ACTION_RECOVERY) {
/*
* Mark as failed
*/
sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_FAILED);
} else {
sam_confdb_destroy_pid_obj ();
}
}
exit (WEXITSTATUS (child_status));
}
}
}

View File

@ -116,6 +116,7 @@ dist_man_MANS = \
sam_hc_callback_register.3 \
sam_hc_send.3 \
sam_initialize.3 \
sam_mark_failed.3 \
sam_overview.8 \
sam_register.3 \
sam_start.3 \

View File

@ -31,7 +31,7 @@
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH "SAM_INITIALIZE" 3 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.TH "SAM_INITIALIZE" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
.P
@ -71,6 +71,7 @@ The \fIrecovery_policy\fR is defined as type:
SAM_RECOVERY_POLICY_QUORUM = 0x08,
SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
SAM_RECOVERY_POLICY_CONFDB = 0x10,
} sam_recovery_policy_t;
.fi
@ -94,6 +95,9 @@ quorate and process will be terminated if quorum is lost.
SAM_RECOVERY_POLICY_QUORUM_RESTART
same as \fISAM_RECOVERY_POLICY_RESTART\fR but \fBsam_start (3)\fR will block until corosync becomes
quorate and process will be restarted if quorum is lost.
.TP
SAM_RECOVERY_POLICY_CONFDB
is not policy. Used only as flag meaning confdb integration. It can be used with all previous policies.
.P
To perform event driven healthchecking, \fBsam_register(3)\fR and

73
man/sam_mark_failed.3 Normal file
View File

@ -0,0 +1,73 @@
.\"/*
.\" * Copyright (c) 2010 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Jan Friesse (jfriesse@redhat.com)
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the Red Hat, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH "SAM_STOP" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
.P
sam_mark_failed \- Mark process failed
.SH SYNOPSIS
.P
\fB#include <corosync/sam.h>\fR
.P
\fBcs_error_t sam_mark_failed (void);\fR
.SH DESCRIPTION
.P
The \fBsam_mark_failed\fR function is used with SAM_RECOVERY_POLICY_CONFDB mostly
together with SAM_RECOVERY_POLICY_RESTART to mark process failed. Process marked
failed is killed without sending warn signal and control process will exit
as with SAM_RECOVERY_POLICY_QUIT policy. Condb key state will be set to failed so
corosync watchdog can take required action.
.SH RETURN VALUE
.P
This call return CS_OK value if successful, otherwise and error is returned.
.SH ERRORS
.TP
CS_ERR_BAD_HANDLE
library was not initialized by calling \fBsam_initialize(3)\fR or was already finalized
.TP
CS_ERR_INVALID_PARAM
recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
.TP
CS_ERR_LIBRARY
some internal error appeared (communication with parent process)
.SH "SEE ALSO"
.BR sam_initialize (3)

View File

@ -32,7 +32,7 @@
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH "SAM_OVERVIEW" 8 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.TH "SAM_OVERVIEW" 8 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
.P
@ -133,6 +133,38 @@ One can in such case use files, databases, ... or much simpler in memory solutio
presented by \fBsam_data_store(3)\fR, \fBsam_data_restore(3)\fR and \fBsam_data_getsize(3)\fR
functions.
.SH Confdb integration
.P
SAM has policy flag used for confdb system integration (\fISAM_RECOVERY_POLICY_CONFDB\fR).
If process is registered with this flag, new confdb object PROCESS_NAME:PID is created with following
keys:
.RS
.IP \(bu 3
\fIrecovery\fR - will be quit or restart depending on policy
.IP \(bu 3
\fIhc_period\fR - period of health checking in milliseconds
.IP \(bu 3
\fIhc_last\fR - last known GMT time in milliseconds when health check was received
.IP \(bu 3
\fIstate\fR - state of process (can be one of registered, started, failed, waiting for quorum)
.RE
.P
Object is automatically deleted if process exits with stopped health checking.
.P
Confdb integration with corosync wathdog can be used in implicit and explicit way.
.P
Implicit way is achieved by setting recovery policy to QUIT and let process exit with started health checking.
If this happened, object is not deleted and corosync watchdog will take required action.
.P
Explicit way is usefull for situations, when developer can deal with some non-fatal fall of application.
This mode is achieved by setting policy to RESTART and using SAM same as without Confdb integration.
If real fail is needed (like too many restarts at all, per/sec, ...), it's possible to use \fBsam_mark_failed(3)\fR
and let corosync watchdog take required action.
.SH BUGS
.SH "SEE ALSO"
.BR sam_initialize (3),
@ -140,6 +172,7 @@ functions.
.BR sam_data_restore (3),
.BR sam_data_store (3),
.BR sam_finalize (3),
.BR sam_mark_failed (3),
.BR sam_start (3),
.BR sam_stop (3),
.BR sam_register (3),

View File

@ -38,6 +38,7 @@
#include <config.h>
#include <limits.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdint.h>
@ -50,6 +51,8 @@
#include <string.h>
#include <sys/wait.h>
extern const char *__progname;
static int test2_sig_delivered = 0;
static int test5_hc_cb_count = 0;
static int test6_sig_delivered = 0;
@ -864,9 +867,551 @@ static int test7 (void) {
return (2);
}
/*
* Test confdb integration + quit policy
*/
static int test8 (pid_t pid, pid_t old_pid, int test_n) {
confdb_handle_t cdb_handle;
cs_error_t err;
hdb_handle_t res_handle, proc_handle, pid_handle;
size_t value_len;
uint64_t tstamp1, tstamp2;
char key_value[256];
unsigned int instance_id;
char tmp_obj[PATH_MAX];
confdb_value_types_t cdbtype;
err = confdb_initialize (&cdb_handle, NULL);
if (err != CS_OK) {
printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
return (1);
}
printf ("%s test %d\n", __FUNCTION__, test_n);
if (test_n == 2) {
/*
* Object should not exist
*/
printf ("%s Testing if object exists (it shouldn't)\n", __FUNCTION__);
err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
if (err != CS_OK) {
printf ("Could not object_find \"resources\": %d.\n", err);
return (2);
}
err = confdb_object_find_start(cdb_handle, res_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
if (err != CS_OK) {
printf ("Could not object_find \"process\": %d.\n", err);
return (2);
}
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
}
err = confdb_object_find_start(cdb_handle, proc_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
if (err == CS_OK) {
printf ("Could find object \"%s\": %d.\n", tmp_obj, err);
return (2);
}
}
if (test_n == 1 || test_n == 2) {
printf ("%s: initialize\n", __FUNCTION__);
err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB);
if (err != CS_OK) {
fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
return 2;
}
printf ("%s: register\n", __FUNCTION__);
err = sam_register (&instance_id);
if (err != CS_OK) {
fprintf (stderr, "Can't register. Error %d\n", err);
return 2;
}
err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
if (err != CS_OK) {
printf ("Could not object_find \"resources\": %d.\n", err);
return (2);
}
err = confdb_object_find_start(cdb_handle, res_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
if (err != CS_OK) {
printf ("Could not object_find \"process\": %d.\n", err);
return (2);
}
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
}
err = confdb_object_find_start(cdb_handle, proc_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
if (err != CS_OK) {
printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"recovery\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("quit") || memcmp (key_value, "quit", value_len) != 0) {
printf ("Recovery key \"%s\" is not \"watchdog\".\n", key_value);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
printf ("State key is not \"registered\".\n");
return (2);
}
printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
err = sam_start ();
if (err != CS_OK) {
fprintf (stderr, "Can't start hc. Error %d\n", err);
return 2;
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
printf ("State key is not \"started\".\n");
return (2);
}
printf ("%s iid %d: stop\n", __FUNCTION__, instance_id);
err = sam_stop ();
if (err != CS_OK) {
fprintf (stderr, "Can't stop hc. Error %d\n", err);
return 2;
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
printf ("State key is not \"registered\".\n");
return (2);
}
printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id);
sleep (5);
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
printf ("State key is not \"registered\".\n");
return (2);
}
printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id);
err = sam_start ();
if (err != CS_OK) {
fprintf (stderr, "Can't start hc. Error %d\n", err);
return 2;
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
printf ("State key is not \"started\".\n");
return (2);
}
if (test_n == 2) {
printf ("%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id);
sleep (5);
return (2);
} else {
printf ("%s iid %d: Test HC\n", __FUNCTION__, instance_id);
err = sam_hc_send ();
if (err != CS_OK) {
fprintf (stderr, "Can't send hc. Error %d\n", err);
return 2;
}
err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp1, &value_len, &cdbtype);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
printf ("%s iid %d: Sleep 1\n", __FUNCTION__, instance_id);
sleep (1);
err = sam_hc_send ();
if (err != CS_OK) {
fprintf (stderr, "Can't send hc. Error %d\n", err);
return 2;
}
sleep (1);
err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp2, &value_len, &cdbtype);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (tstamp2 - tstamp1 < 500 || tstamp2 - tstamp1 > 2000) {
printf ("Difference %d is not within <500, 2000> interval.\n", (int)(tstamp2 - tstamp1));
return (2);
}
printf ("%s iid %d: stop 2\n", __FUNCTION__, instance_id);
err = sam_stop ();
if (err != CS_OK) {
fprintf (stderr, "Can't stop hc. Error %d\n", err);
return 2;
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
printf ("State key is not \"registered\".\n");
return (2);
}
printf ("%s iid %d: exiting\n", __FUNCTION__, instance_id);
return (0);
}
}
if (test_n == 3) {
printf ("%s Testing if status is failed\n", __FUNCTION__);
/*
* Previous should be FAILED
*/
err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
if (err != CS_OK) {
printf ("Could not object_find \"resources\": %d.\n", err);
return (2);
}
err = confdb_object_find_start(cdb_handle, res_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
if (err != CS_OK) {
printf ("Could not object_find \"process\": %d.\n", err);
return (2);
}
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
}
err = confdb_object_find_start(cdb_handle, proc_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
if (err != CS_OK) {
printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
printf ("State key is not \"failed\".\n");
return (2);
}
return (0);
}
return (2);
}
/*
* Test confdb integration + restart policy
*/
static int test9 (pid_t pid, pid_t old_pid, int test_n) {
confdb_handle_t cdb_handle;
cs_error_t err;
hdb_handle_t res_handle, proc_handle, pid_handle;
size_t value_len;
char key_value[256];
unsigned int instance_id;
char tmp_obj[PATH_MAX];
err = confdb_initialize (&cdb_handle, NULL);
if (err != CS_OK) {
printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
return (1);
}
printf ("%s test %d\n", __FUNCTION__, test_n);
if (test_n == 1) {
printf ("%s: initialize\n", __FUNCTION__);
err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CONFDB);
if (err != CS_OK) {
fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
return 2;
}
printf ("%s: register\n", __FUNCTION__);
err = sam_register (&instance_id);
if (err != CS_OK) {
fprintf (stderr, "Can't register. Error %d\n", err);
return 2;
}
printf ("%s: iid %d\n", __FUNCTION__, instance_id);
if (instance_id < 3) {
err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"),
&res_handle);
if (err != CS_OK) {
printf ("Could not object_find \"resources\": %d.\n", err);
return (2);
}
err = confdb_object_find_start(cdb_handle, res_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
if (err != CS_OK) {
printf ("Could not object_find \"process\": %d.\n", err);
return (2);
}
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
}
err = confdb_object_find_start(cdb_handle, proc_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
if (err != CS_OK) {
printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"recovery\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("restart") || memcmp (key_value, "restart", value_len) != 0) {
printf ("Recovery key \"%s\" is not \"restart\".\n", key_value);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
printf ("State key is not \"registered\".\n");
return (2);
}
printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
err = sam_start ();
if (err != CS_OK) {
fprintf (stderr, "Can't start hc. Error %d\n", err);
return 2;
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
printf ("State key is not \"started\".\n");
return (2);
}
printf ("%s iid %d: waiting for kill\n", __FUNCTION__, instance_id);
sleep (10);
return (2);
}
if (instance_id == 3) {
printf ("%s iid %d: mark failed\n", __FUNCTION__, instance_id);
if (err != CS_OK) {
fprintf (stderr, "Can't start hc. Error %d\n", err);
return 2;
}
err = sam_mark_failed ();
if (err != CS_OK) {
fprintf (stderr, "Can't mark failed. Error %d\n", err);
return 2;
}
sleep (10);
return (2);
}
return (2);
}
if (test_n == 2) {
printf ("%s Testing if status is failed\n", __FUNCTION__);
/*
* Previous should be FAILED
*/
err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
if (err != CS_OK) {
printf ("Could not object_find \"resources\": %d.\n", err);
return (2);
}
err = confdb_object_find_start(cdb_handle, res_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
if (err != CS_OK) {
printf ("Could not object_find \"process\": %d.\n", err);
return (2);
}
if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
}
err = confdb_object_find_start(cdb_handle, proc_handle);
if (err != CS_OK) {
printf ("Could not start object_find %d.\n", err);
return (2);
}
err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
if (err != CS_OK) {
printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
return (2);
}
err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
if (err != CS_OK) {
printf ("Could not get \"state\" key: %d.\n", err);
return (2);
}
if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
printf ("State key is not \"failed\".\n");
return (2);
}
return (0);
}
return (2);
}
int main(int argc, char *argv[])
{
pid_t pid;
pid_t pid, old_pid;
int err;
int stat;
int all_passed = 1;
@ -990,7 +1535,7 @@ int main(int argc, char *argv[])
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 1;
return 2;
}
if (pid == 0) {
@ -1001,6 +1546,100 @@ int main(int argc, char *argv[])
waitpid (pid, &stat, 0);
fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
if (WEXITSTATUS (stat) == 1)
no_skipped++;
if (WEXITSTATUS (stat) > 1)
all_passed = 0;
pid = fork ();
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 2;
}
if (pid == 0) {
err = test8 (getpid (), 0, 1);
sam_finalize ();
return (err);
}
waitpid (pid, &stat, 0);
old_pid = pid;
if (WEXITSTATUS (stat) == 0) {
pid = fork ();
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 2;
}
if (pid == 0) {
err = test8 (getpid (), old_pid, 2);
sam_finalize ();
return (err);
}
waitpid (pid, &stat, 0);
old_pid = pid;
if (WEXITSTATUS (stat) == 0) {
pid = fork ();
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 2;
}
if (pid == 0) {
err = test8 (old_pid, 0, 3);
sam_finalize ();
return (err);
}
waitpid (pid, &stat, 0);
}
}
if (WEXITSTATUS (stat) == 1)
no_skipped++;
if (WEXITSTATUS (stat) > 1)
all_passed = 0;
pid = fork ();
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 2;
}
if (pid == 0) {
err = test9 (getpid (), 0, 1);
sam_finalize ();
return (err);
}
waitpid (pid, &stat, 0);
old_pid = pid;
if (WEXITSTATUS (stat) == 0) {
pid = fork ();
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
return 2;
}
if (pid == 0) {
err = test9 (old_pid, 0, 2);
sam_finalize ();
return (err);
}
waitpid (pid, &stat, 0);
}
fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
if (WEXITSTATUS (stat) == 1)
no_skipped++;