From f983d37a2f98ccae9b1baf24f7cd5cc2f2288c1e Mon Sep 17 00:00:00 2001 From: Lon Hohberger Date: Fri, 13 Oct 2006 10:18:04 +0000 Subject: [PATCH] Patch contains: A mechanism to defer and recall simultaneous events in the state machines for amf_cluster, amf_application and amf_sg. The implication of this defer and recall mechanism is that it's now possible to to recover from e.g. several simultaneous SU failures in an ordered serialized manner. The events that could be deferred/recalled so far is SG_FAILOVER_NODE_EV,SG_START_EV,SG_FAILOVER_SU_EV, CLUSTER_SYNC_READY_EV,APPLICATION_START_EV and APPLICATION_ASSIGN_WORKLOAD_EV. Files involved: Index: exec/amfnode.c Index: exec/amfsg.c Index: exec/amfutil.c Index: exec/amfapp.c Index: exec/amfcomp.c Index: exec/amfcluster.c Index: exec/amf.h git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@1266 fd59a12c-fef9-0310-b244-a6a79926bd2f --- exec/amf.h | 53 +++-- exec/amfapp.c | 227 ++++++++++++------- exec/amfcluster.c | 165 ++++++++------ exec/amfcomp.c | 31 +-- exec/amfnode.c | 56 ++--- exec/amfsg.c | 538 ++++++++++++++++++++++++++++++---------------- exec/amfsu.c | 1 - exec/amfutil.c | 61 +++++- 8 files changed, 718 insertions(+), 414 deletions(-) diff --git a/exec/amf.h b/exec/amf.h index e96d90d0..4bb7f7f5 100644 --- a/exec/amf.h +++ b/exec/amf.h @@ -110,10 +110,16 @@ typedef enum { } sg_avail_control_state_t; -typedef enum { - SG_RT_FailoverSU = 1, - SG_RT_FailoverNode -} sg_recovery_type_t; +typedef enum amf_sg_event_type { + SG_UNKNOWN_EV, + SG_FAILOVER_SU_EV, + SG_FAILOVER_NODE_EV, + SG_FAILOVER_COMP_EV, + SG_SWITCH_OVER_NODE_EV, + SG_START_EV, + SG_AUTO_ADJUST_EV, + SG_ASSIGN_SI_EV +} amf_sg_event_type_t; typedef enum { SU_RC_ESCALATION_LEVEL_0 = 0, @@ -169,21 +175,25 @@ typedef enum { CLUSTER_AC_QUISING } cluster_avail_control_state_t; -typedef enum amf_cluster_event { - CLUSTER_SYNC_READY_EV = 1 -} amf_cluster_event_t; +typedef enum amf_cluster_event_type { + CLUSTER_SYNC_READY_EV, + CLUSTER_EVENT_TYPE_CNT +} amf_cluster_event_type_t; + +typedef enum amf_application_event_type { + APPLICATION_ASSIGN_WORKLOAD_EV, + APPLICATION_START_EV, + APPLICATION_EVENT_TYPE_CNT +} amf_application_event_type_t; -typedef struct amf_deferred { - struct amf_deferred *next; -} amf_deferred_t; - -typedef struct cluster_deferred { - amf_deferred_t defered_list; - struct amf_node *node; - amf_cluster_event_t event; -} cluster_deferredt_t; +typedef struct amf_fifo { + int entry_type; + struct amf_fifo *next; + int size_of_data; + uint8_t data[]; +} amf_fifo_t; typedef struct amf_cluster { /* Configuration Attributes */ @@ -201,7 +211,7 @@ typedef struct amf_cluster { /* Implementation */ poll_timer_handle timeout_handle; cluster_avail_control_state_t acsm_state; - cluster_deferredt_t *deferred_events_head; + amf_fifo_t *deferred_events; } amf_cluster_t; typedef struct amf_node { @@ -246,10 +256,11 @@ typedef struct amf_application { struct amf_application *next; struct amf_node *node_to_start; app_avail_control_state_t acsm_state; + amf_fifo_t *deferred_events; } amf_application_t; struct sg_recovery_scope { - sg_recovery_type_t recovery_type; + amf_sg_event_type_t event_type; struct amf_si **sis; struct amf_su **sus; struct amf_comp *comp; @@ -292,6 +303,7 @@ typedef struct amf_sg { sg_avail_control_state_t avail_state; struct sg_recovery_scope recovery_scope; struct amf_node *node_to_start; + amf_fifo_t *deferred_events; } amf_sg_t; typedef struct amf_su { @@ -605,6 +617,11 @@ extern char *amf_deserialize_SaUint64T (char *buf, SaUint64T *num); extern char *amf_deserialize_opaque (char *buf, void *dst, int *cnt); extern int amf_msg_mcast (int msg_id, void *buf, size_t len); extern void amf_util_init (void); +extern void amf_fifo_put (int entry_type, amf_fifo_t **root, + int size_of_data, void *data); +extern int amf_fifo_get (amf_fifo_t **root, void *data); +typedef void (*async_func_t)(void *param); +extern void amf_call_function_asynchronous (async_func_t async_func, void *param); /*===========================================================================*/ /* amfnode.c */ diff --git a/exec/amfapp.c b/exec/amfapp.c index cd8bcf6d..044f0e87 100644 --- a/exec/amfapp.c +++ b/exec/amfapp.c @@ -110,7 +110,7 @@ * A6 - save value of received node parameter * A7 - defer the event * A8 - [node == NULL] cluster_application_started else node_application_started -* A9 - recall defered events +* A9 - recall deferred events * A10 - [node == NULL] cluster_application_assigned else * node_application_assigned * @@ -131,7 +131,59 @@ * Internal (static) utility functions *****************************************************************************/ -int no_su_is_instantiating (struct amf_application *app) +typedef struct application_event { + amf_application_event_type_t event_type; + amf_application_t *app; + amf_node_t *node; +} application_event_t; + +static void application_defer_event ( + amf_application_event_type_t event_type, amf_application_t *app, + amf_node_t *node) +{ + application_event_t app_event = {event_type, app, node}; + amf_fifo_put (event_type, &app->deferred_events, + sizeof (application_event_t), &app_event); +} + + +static void application_recall_deferred_events (amf_application_t *app) +{ + application_event_t application_event; + + if (amf_fifo_get (&app->deferred_events, &application_event)) { + switch (application_event.event_type) { + case APPLICATION_ASSIGN_WORKLOAD_EV: { + log_printf (LOG_NOTICE, + "Recall APPLICATION_ASSIGN_WORKLOAD_EV"); + amf_application_assign_workload ( + application_event.app, + application_event.node); + break; + } + case APPLICATION_START_EV: { + + log_printf (LOG_NOTICE, + "Recall APPLICATION_START_EV"); + amf_application_start (application_event.app, + application_event.node); + break; + } + default: + assert (0); + break; + } + } +} +static void timer_function_application_recall_deferred_events (void *data) +{ + amf_application_t *app = (amf_application_t*)data; + + ENTER (""); + application_recall_deferred_events (app); +} + +static int no_su_is_instantiating (struct amf_application *app) { struct amf_sg *sg; struct amf_su *su; @@ -149,22 +201,6 @@ int no_su_is_instantiating (struct amf_application *app) } -#ifdef COMPILE_OUT -static int all_sg_started (struct amf_application *app) -{ - struct amf_sg *sg; - int all_su_instantiated = 1; - - for (sg = app->sg_head; sg != NULL; sg = sg->next) { - if (sg->avail_state == SG_AC_InstantiatingServiceUnits) { - all_su_instantiated = 0; - break; - } - } - return all_su_instantiated; -} -#endif - static int all_sg_assigned (struct amf_application *app) { struct amf_sg *sg; @@ -179,9 +215,58 @@ static int all_sg_assigned (struct amf_application *app) return all_sg_assigned; } +static void application_enter_starting_sgs (struct amf_application *app, + struct amf_node *node) +{ + amf_sg_t *sg = 0; + app->node_to_start = node; + app->acsm_state = APP_AC_STARTING_SGS; + + for (sg = app->sg_head; sg != NULL; sg = sg->next) { + amf_sg_start (sg, node); + } +} + +static void application_enter_assigning_workload (amf_application_t *app) +{ + amf_sg_t *sg = 0; + int posible_to_assign_si = 0; + app->acsm_state = APP_AC_ASSIGNING_WORKLOAD; + for (sg = app->sg_head; sg != NULL; sg = sg->next) { + if (amf_sg_assign_si_req (sg, 0)) { + posible_to_assign_si = 1; + } + } + if (posible_to_assign_si == 0) { + app->acsm_state = APP_AC_WORKLOAD_ASSIGNED; + } + +} + +static void application_enter_workload_assigned (amf_application_t *app) +{ + if (all_sg_assigned (app)){ + app->acsm_state = APP_AC_WORKLOAD_ASSIGNED; + if (app->node_to_start == NULL){ + amf_cluster_application_workload_assigned ( + app->cluster, app); + } else { + amf_node_application_workload_assigned( + app->node_to_start, app); + } + + amf_call_function_asynchronous ( + timer_function_application_recall_deferred_events, app); + } + + +} + /****************************************************************************** * Event methods *****************************************************************************/ + + void amf_application_start ( struct amf_application *app, struct amf_node *node) { @@ -191,12 +276,7 @@ void amf_application_start ( assert (app != NULL); switch (app->acsm_state) { case APP_AC_UNINSTANTIATED: - app->node_to_start = node; - app->acsm_state = APP_AC_STARTING_SGS; - - for (sg = app->sg_head; sg != NULL; sg = sg->next) { - amf_sg_start (sg, node); - } + application_enter_starting_sgs (app, node); break; case APP_AC_STARTING_SGS: if (app->node_to_start == node) { @@ -211,7 +291,7 @@ void amf_application_start ( } break; case APP_AC_STARTED: - /* TODO: Recall defered events */ + /* TODO: Recall deferred events */ app->node_to_start = node; app->acsm_state = APP_AC_STARTING_SGS; for (sg = app->sg_head; sg != NULL; sg = sg->next) { @@ -219,19 +299,13 @@ void amf_application_start ( } break; case APP_AC_ASSIGNING_WORKLOAD: - /* TODO: Save the start request until state == APP_AC_STARTED */ log_printf (LOG_LEVEL_ERROR, "Request to start application" - " =%s in state = %d (should be defered)", - app->name.value, app->acsm_state); - openais_exit_error (AIS_DONE_FATAL_ERR); + " =%s in state APP_AC_ASSIGNING_WORKLOAD(should be deferred)", + app->name.value); + application_defer_event (APPLICATION_START_EV, app , node); break; case APP_AC_WORKLOAD_ASSIGNED: - app->node_to_start = node; - app->acsm_state = APP_AC_STARTING_SGS; - - for (sg = app->sg_head; sg != NULL; sg = sg->next) { - amf_sg_start (sg, node); - } + application_enter_starting_sgs (app, node); break; default: assert (0); @@ -239,54 +313,44 @@ void amf_application_start ( } } -void amf_application_assign_workload ( - struct amf_application *app, struct amf_node *node) -{ - struct amf_sg *sg; - /* - * TODO: dependency level ignored - * Each dependency level should be looped and amf_sg_assign_si - * called several times. - */ +void amf_application_assign_workload (struct amf_application *app, + struct amf_node *node) +{ + /* + * TODO: dependency level ignored. Each dependency level should + * be looped and amf_sg_assign_si called several times. + */ + assert (app != NULL); app->node_to_start = node; switch (app->acsm_state) { case APP_AC_WORKLOAD_ASSIGNED: - TRACE1 ("APP_AC_WORKLOAD_ASSIGNED"); - /* Fall through */ - case APP_AC_STARTED: { - int posible_to_assign_si = 0; - app->acsm_state = APP_AC_ASSIGNING_WORKLOAD; - for (sg = app->sg_head; sg != NULL; sg = sg->next) { - if (amf_sg_assign_si_req (sg, 0)) { - posible_to_assign_si = 1; - } - } - if (posible_to_assign_si == 0) { - app->acsm_state = APP_AC_WORKLOAD_ASSIGNED; - } - break; - } + application_enter_assigning_workload (app); + break; + case APP_AC_STARTED: + application_enter_assigning_workload (app); + break; case APP_AC_ASSIGNING_WORKLOAD: if (app->node_to_start == node) { - /*Calling object has violated the contract !*/ + /* + * Calling object has violated the contract ! + */ assert (0); } else { - /* - * TODO: Save the request to assign workload until state == - * WORKLOAD_ASSIGNED - */ - log_printf (LOG_LEVEL_ERROR, "Request to assign workload to" - " application =%s in state = %d (should be defered)", - app->name.value, app->acsm_state); - openais_exit_error (AIS_DONE_FATAL_ERR); + " application =%s in state APP_AC_ASSIGNING_WORKLOAD " + "(should be deferred)", app->name.value); + + application_defer_event (APPLICATION_ASSIGN_WORKLOAD_EV, app, + node); } break; default: - /*Calling object has violated the contract !*/ + /* + * Calling object has violated the contract ! + */ assert (0); break; } @@ -295,8 +359,8 @@ void amf_application_assign_workload ( /****************************************************************************** * Event response methods *****************************************************************************/ -void amf_application_sg_started ( - struct amf_application *app, struct amf_sg *sg, struct amf_node *node) +void amf_application_sg_started (struct amf_application *app, struct amf_sg *sg, + struct amf_node *node) { ENTER ("'%s'", app->name.value); @@ -329,17 +393,11 @@ void amf_application_sg_assigned ( switch (app->acsm_state) { case APP_AC_ASSIGNING_WORKLOAD: - if (all_sg_assigned (app)){ - app->acsm_state = APP_AC_WORKLOAD_ASSIGNED; - if (app->node_to_start == NULL){ - amf_cluster_application_workload_assigned (app->cluster, app); - } else { - amf_node_application_workload_assigned (app->node_to_start, app); - } - } + application_enter_workload_assigned (app); break; default: - log_printf (LOG_LEVEL_ERROR, "amf_application_sg_assigned()" + log_printf (LOG_LEVEL_ERROR, + "amf_application_sg_assigned()" " called in state = %d", app->acsm_state); openais_exit_error (AIS_DONE_FATAL_ERR); break; @@ -355,7 +413,8 @@ void amf_application_init (void) } struct amf_application *amf_application_new (struct amf_cluster *cluster) { - struct amf_application *app = amf_calloc (1, sizeof (struct amf_application)); + struct amf_application *app = amf_calloc (1, + sizeof (struct amf_application)); app->cluster = cluster; app->next = cluster->application_head; @@ -408,7 +467,8 @@ void *amf_application_serialize ( } struct amf_application *amf_application_deserialize ( - struct amf_cluster *cluster, char *buf) { + struct amf_cluster *cluster, char *buf) +{ char *tmp = buf; struct amf_application *app = amf_application_new (cluster); @@ -430,7 +490,8 @@ struct amf_application *amf_application_find ( for (app = cluster->application_head; app != NULL; app = app->next) { if (app->name.length == strlen(name) && - strncmp (name, (char*)app->name.value, app->name.length) == 0) { + strncmp (name, (char*)app->name.value, app->name.length) + == 0) { break; } } diff --git a/exec/amfcluster.c b/exec/amfcluster.c index 8e57aebc..991d21e2 100644 --- a/exec/amfcluster.c +++ b/exec/amfcluster.c @@ -1,3 +1,4 @@ + /** @file amfcluster.c * * Copyright (c) 2006 Ericsson AB. @@ -138,6 +139,53 @@ #include "main.h" #include "service.h" +/****************************************************************************** + * Internal (static) utility functions + *****************************************************************************/ + + +typedef struct cluster_event { + amf_cluster_event_type_t event_type; + amf_cluster_t *cluster; + amf_node_t *node; +} cluster_event_t; + + +static void cluster_defer_event (amf_cluster_event_type_t event_type, + struct amf_cluster *cluster, struct amf_node * node) +{ + cluster_event_t sync_ready_event = {event_type, cluster, node}; + amf_fifo_put (event_type, &cluster->deferred_events, + sizeof (cluster_event_t), + &sync_ready_event); +} + +static void cluster_recall_deferred_events (amf_cluster_t *cluster) +{ + cluster_event_t cluster_event; + + if (amf_fifo_get (&cluster->deferred_events, &cluster_event)) { + switch (cluster_event.event_type) { + case CLUSTER_SYNC_READY_EV: + log_printf (LOG_NOTICE, + "Recall CLUSTER_SYNC_READY_EV"); + + amf_node_sync_ready (cluster_event.node); + break; + default: + assert (0); + break; + } + } +} + +static void timer_function_cluster_recall_deferred_events (void *data) +{ + amf_cluster_t *cluster = (amf_cluster_t*)data; + + ENTER (""); + cluster_recall_deferred_events (cluster); +} /** * Determine if all applications are started so that all @@ -222,7 +270,7 @@ static void start_cluster_startup_timer (struct amf_cluster *cluster) &cluster->timeout_handle); } -static inline void amf_cluster_enter_starting_applications ( +static inline void cluster_enter_starting_applications ( struct amf_cluster *cluster) { ENTER (""); @@ -231,48 +279,21 @@ static inline void amf_cluster_enter_starting_applications ( amf_cluster_start_applications (cluster); } -static void add_assign_workload_deferred_list (struct amf_cluster *cluster, - struct amf_node *node, amf_cluster_event_t event) +static void acsm_cluster_enter_started (amf_cluster_t *cluster) { - cluster_deferredt_t *tmp_deferred_list = - calloc (1, sizeof (cluster_deferredt_t)); - - tmp_deferred_list->defered_list.next = - (amf_deferred_t*) cluster->deferred_events_head; - cluster->deferred_events_head = tmp_deferred_list; -} - - -static void defer_assigning_worload_to_node (struct amf_node *node, - amf_cluster_event_t event) -{ - - add_assign_workload_deferred_list(amf_cluster, node, event); -} - -static amf_deferred_t *recall_defered_cluster_events ( - struct amf_cluster *cluster) -{ - return (amf_deferred_t*) cluster->deferred_events_head; -} - - -static void acsm_cluster_enter_started (struct amf_cluster *cluster) -{ - - amf_deferred_t *deferred_events; - + ENTER (""); amf_cluster->acsm_state = CLUSTER_AC_STARTED; - - for (deferred_events = recall_defered_cluster_events (cluster); - deferred_events != NULL; - deferred_events = deferred_events->next){ - - amf_node_sync_ready (((cluster_deferredt_t*)deferred_events)->node); - } + amf_call_function_asynchronous ( + timer_function_cluster_recall_deferred_events, cluster); } -int amf_cluster_applications_started_with_no_starting_sgs (struct amf_cluster *cluster) +/****************************************************************************** + * Event methods + *****************************************************************************/ + + +int amf_cluster_applications_started_with_no_starting_sgs ( +struct amf_cluster *cluster) { return !cluster_applications_are_starting_sgs (cluster); } @@ -287,24 +308,27 @@ void amf_cluster_start_tmo_event (int is_sync_masterm, switch (cluster->acsm_state) { case CLUSTER_AC_STARTING_APPLICATIONS: if (cluster_applications_are_starting_sgs (cluster)) { - dprintf ("Cluster startup timeout, start waiting over time"); - amf_cluster->acsm_state = CLUSTER_AC_WAITING_OVER_TIME; - } else { - dprintf ("Cluster startup timeout, assigning workload"); + dprintf ("Cluster startup timeout," + "start waiting over time"); + amf_cluster->acsm_state = + CLUSTER_AC_WAITING_OVER_TIME; + } else { + dprintf ("Cluster startup timeout," + " assigning workload"); acsm_cluster_enter_assigning_workload (cluster); } break; case CLUSTER_AC_ASSIGNING_WORKLOAD: - /* ignore cluster startup timer expiration */ + /* ignore cluster startup timer expiration */ case CLUSTER_AC_STARTED: - /* ignore cluster startup timer expiration */ + /* ignore cluster startup timer expiration */ case CLUSTER_AC_WAITING_OVER_TIME: - /* ignore cluster startup timer expiration */ + /* ignore cluster startup timer expiration */ break; - default: - log_printf(LOG_LEVEL_ERROR, "Cluster timout expired in wrong cluster" - " state = %d", cluster->acsm_state); + log_printf(LOG_LEVEL_ERROR, "Cluster timout expired" + " in wrong cluster" + " state = %d", cluster->acsm_state); assert(0); break; } @@ -329,33 +353,40 @@ void amf_cluster_start_applications(struct amf_cluster *cluster) void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node) { - log_printf(LOG_NOTICE, "Cluster: starting applications."); switch (amf_cluster->acsm_state) { case CLUSTER_AC_UNINSTANTIATED: - if (amf_cluster->saAmfClusterAdminState == SA_AMF_ADMIN_UNLOCKED) { - amf_cluster_enter_starting_applications (cluster); + if (amf_cluster->saAmfClusterAdminState == + SA_AMF_ADMIN_UNLOCKED) { + cluster_enter_starting_applications (cluster); } break; case CLUSTER_AC_STARTING_APPLICATIONS: - amf_cluster_enter_starting_applications(cluster); + cluster_enter_starting_applications(cluster); break; case CLUSTER_AC_ASSIGNING_WORKLOAD: - defer_assigning_worload_to_node (node, CLUSTER_SYNC_READY_EV); - log_printf (LOG_LEVEL_ERROR, "Sync ready not implemented in " + /* + * Defer assigning workload to those syncronized nodes to + * CLUSTER_AC_STARTED state. + */ + cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster, + node); + log_printf (LOG_LEVEL_ERROR, + "Sync ready not implemented in " "cluster state: %u\n", amf_cluster->acsm_state); assert (0); break; case CLUSTER_AC_WAITING_OVER_TIME: - /* TODO: Defer the implementation of assigning - * workload to those syncronized nodes to CLUSTER_AC_STARTED - * state. - */ - defer_assigning_worload_to_node (node, CLUSTER_SYNC_READY_EV); + /* + * Defer assigning workload to those syncronized nodes to + * CLUSTER_AC_STARTED state. + */ + cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster, + node); break; case CLUSTER_AC_STARTED: TRACE1 ("Node sync ready sent from cluster in " - "CLUSTER_AC_STARTED state"); + "CLUSTER_AC_STARTED state"); amf_node_sync_ready (node); break; @@ -363,7 +394,6 @@ void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node) assert (0); break; } - } void amf_cluster_init (void) @@ -399,12 +429,14 @@ void amf_cluster_application_started ( } } -struct amf_cluster *amf_cluster_new (void) { - struct amf_cluster *cluster = amf_calloc (1, sizeof (struct amf_cluster)); +struct amf_cluster *amf_cluster_new (void) +{ + struct amf_cluster *cluster = amf_calloc (1, + sizeof (struct amf_cluster)); cluster->saAmfClusterStartupTimeout = -1; cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED; - cluster->deferred_events_head = 0; + cluster->deferred_events = 0; cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED; return cluster; } @@ -462,7 +494,8 @@ void *amf_cluster_serialize (struct amf_cluster *cluster, int *len) return buf; } -struct amf_cluster *amf_cluster_deserialize (char *buf) { +struct amf_cluster *amf_cluster_deserialize (char *buf) +{ char *tmp = buf; struct amf_cluster *cluster = amf_cluster_new (); diff --git a/exec/amfcomp.c b/exec/amfcomp.c index 5f3447c3..53f9aad3 100644 --- a/exec/amfcomp.c +++ b/exec/amfcomp.c @@ -312,16 +312,6 @@ static void report_error_suspected ( comp->su, comp, recommended_recovery); } -char *amf_comp_dn_make (struct amf_comp *comp, SaNameT *name) -{ - int i = snprintf ((char*) name->value, SA_MAX_NAME_LENGTH, - "safComp=%s,safSu=%s,safSg=%s,safApp=%s", - comp->name.value, comp->su->name.value, - comp->su->sg->name.value, comp->su->sg->application->name.value); - assert (i <= SA_MAX_NAME_LENGTH); - name->length = i; - return (char *)name->value; -} #ifndef xprintf #define xprintf(...) @@ -665,6 +655,17 @@ static int clc_terminate (struct amf_comp *comp) } #endif +char *amf_comp_dn_make (struct amf_comp *comp, SaNameT *name) +{ + int i = snprintf ((char*) name->value, SA_MAX_NAME_LENGTH, + "safComp=%s,safSu=%s,safSg=%s,safApp=%s", + comp->name.value, comp->su->name.value, + comp->su->sg->name.value, comp->su->sg->application->name.value); + assert (i <= SA_MAX_NAME_LENGTH); + name->length = i; + return (char *)name->value; +} + struct amf_healthcheck *amf_comp_find_healthcheck ( struct amf_comp *comp, SaAmfHealthcheckKeyT *key) { @@ -1203,7 +1204,7 @@ static void lib_csi_set_request ( static void stop_component_instantiate_timer (struct amf_comp *component) { if (component->instantiate_timeout_handle) { - dprintf ("Stop cluster startup timer"); + dprintf ("Stop component instantiate timer"); poll_timer_delete (aisexec_poll_handle, component->instantiate_timeout_handle); component->instantiate_timeout_handle = 0; @@ -1436,7 +1437,7 @@ void amf_comp_instantiate (struct amf_comp *comp) void amf_comp_instantiate_tmo_event (struct amf_comp *comp) { - ENTER ("Comp instantiate timeout after %d seconds '%s' '%s'", + ENTER ("Comp instantiate timeout after %d ms '%s' '%s'", comp->saAmfCompInstantiateTimeout, comp->su->name.value, comp->name.value); @@ -1451,8 +1452,12 @@ void amf_comp_instantiate_tmo_event (struct amf_comp *comp) amf_comp_operational_state_set (comp, SA_AMF_OPERATIONAL_DISABLED); comp_presence_state_set (comp, SA_AMF_PRESENCE_INSTANTIATION_FAILED); + break; + case SA_AMF_PRESENCE_INSTANTIATED: + assert (comp->instantiate_timeout_handle == 0); break; default: + dprintf("Presence state = %d", comp->saAmfCompPresenceState); assert (0); break; } @@ -1865,7 +1870,7 @@ SaAmfReadinessStateT amf_comp_get_saAmfCompReadinessState ( * component process is executing has unexpectadly left the * node. If there is a pending interaction between AMF * (component) and the 'real' component process, then component - * will indicate to its subordinate objects the the interaction + * will indicate to its subordinate objects the interaction * failed. Pending presence state changes is indicated by * reporting the new state is uninstantiated while pending csi * operations are indicated by 'operation failed'. diff --git a/exec/amfnode.c b/exec/amfnode.c index d335a8f8..95de253b 100644 --- a/exec/amfnode.c +++ b/exec/amfnode.c @@ -177,14 +177,14 @@ * Internal (static) utility functions *****************************************************************************/ -static void amf_node_acsm_enter_leaving_spontaneously(struct amf_node *node) +static void node_acsm_enter_leaving_spontaneously(struct amf_node *node) { ENTER("'%s'", node->name.value); node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED; node->nodeid = 0; } -static void amf_node_acsm_enter_failing_over (struct amf_node *node) +static void node_acsm_enter_failing_over (struct amf_node *node) { struct amf_application *app; struct amf_sg *sg; @@ -223,42 +223,6 @@ static void amf_node_acsm_enter_failing_over (struct amf_node *node) } } -#ifdef COMPILE_OUT -static int all_applications_on_node_started (struct amf_node *node, - struct amf_cluster *cluster) -{ - int all_started = 1; - struct amf_application *app; - struct amf_sg *sg; - struct amf_su *su; - - for (app = cluster->application_head; app != NULL; app = app->next) { - for (sg = app->sg_head; sg != NULL; sg = sg->next) { - for (su = sg->su_head; su != NULL; su = su->next) { - /* - * TODO: Replace the if-statement below with the if-statementin - * this comment when the real problem is fixed ! - */ - if (su->saAmfSUPresenceState != - SA_AMF_PRESENCE_INSTANTIATED && - name_match(&su->saAmfSUHostedByNode,&node->name)) { - all_started = 0; goto done; - } - if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED ) { - all_started = 0; - goto done; - } - } - } - } - - done: - return all_started; - -} -#endif - - /****************************************************************************** * Event methods *****************************************************************************/ @@ -280,14 +244,14 @@ void amf_node_leave (struct amf_node *node) case NODE_ACSM_ESCALLATION_LEVEL_0: case NODE_ACSM_ESCALLATION_LEVEL_2: case NODE_ACSM_ESCALLATION_LEVEL_3: - amf_node_acsm_enter_leaving_spontaneously(node); - amf_node_acsm_enter_failing_over (node); + node_acsm_enter_leaving_spontaneously(node); + node_acsm_enter_failing_over (node); break; case NODE_ACSM_REPAIR_NEEDED: break; default: log_printf (LOG_LEVEL_ERROR, "amf_node_leave()called in state = %d" - " (should have been defered)", node->acsm_state); + " (should have been deferred)", node->acsm_state); openais_exit_error (AIS_DONE_FATAL_ERR); break; @@ -372,7 +336,7 @@ void amf_node_sync_ready (struct amf_node *node) break; default: log_printf (LOG_LEVEL_ERROR, "amf_node_sync_ready()called in state" - " = %d (should have been defered)", node->acsm_state); + " = %d (should have been deferred)", node->acsm_state); openais_exit_error (AIS_DONE_FATAL_ERR); break; @@ -467,10 +431,10 @@ void amf_node_application_workload_assigned (struct amf_node *node, void amf_node_sg_failed_over (struct amf_node *node, struct amf_sg *sg_in) { struct amf_sg *sg; - struct amf_application *app; + struct amf_application *app = 0; int all_sg_has_failed_over = 1; - assert (node != NULL && app != NULL); + assert (node != NULL); ENTER ("Node=%s: SG '%s' started", node->name.value, sg_in->name.value); @@ -485,6 +449,10 @@ void amf_node_sg_failed_over (struct amf_node *node, struct amf_sg *sg_in) } } } + + break; + case NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN: + /* Accept reports of failed over sg that has completed. */ break; default: log_printf (LOG_LEVEL_ERROR, "amf_node_sg_failed_over()" diff --git a/exec/amfsg.c b/exec/amfsg.c index a93203ee..da6edf39 100644 --- a/exec/amfsg.c +++ b/exec/amfsg.c @@ -157,41 +157,119 @@ static void standby_su_activated_cbfn ( static void dependent_si_deactivated_cbfn ( struct amf_si_assignment *si_assignment, int result); -static const char *sg_recovery_type_text[] = { +static const char *sg_event_type_text[] = { "Unknown", - "FailoverSU", - "FailoverNode" + "Failover su", + "Failover node", + "Failover comp", + "Switchover node", + "Start", + "Autoadjust", + "Assign si" }; -static void return_to_idle (struct amf_sg *sg) +typedef struct sg_event { + amf_sg_event_type_t event_type; + amf_sg_t *sg; + amf_su_t *su; + amf_comp_t *comp; + amf_node_t *node; +} sg_event_t; + +static void sg_set_event (amf_sg_event_type_t sg_event_type, + amf_sg_t *sg, amf_su_t *su, amf_comp_t *comp, amf_node_t * node, + sg_event_t *sg_event) +{ + sg_event->event_type = sg_event_type; + sg_event->node = node; + sg_event->su = su; + sg_event->comp = comp; + sg_event->sg = sg; + +} + + +static void sg_defer_event (amf_sg_event_type_t event_type, + sg_event_t *sg_event) +{ + amf_fifo_put (event_type, &sg_event->sg->deferred_events, + sizeof (sg_event_t), + sg_event); +} + + +static void sg_recall_deferred_events (amf_sg_t *sg) +{ + sg_event_t sg_event; + + ENTER (""); + if (amf_fifo_get (&sg->deferred_events, &sg_event)) { + switch (sg_event.event_type) { + case SG_FAILOVER_SU_EV: + amf_sg_failover_su_req (sg_event.sg, + sg_event.su, sg_event.node); + break; + case SG_FAILOVER_NODE_EV: + amf_sg_failover_node_req (sg_event.sg, + sg_event.node); + break; + case SG_FAILOVER_COMP_EV: + case SG_SWITCH_OVER_NODE_EV: + case SG_START_EV: + case SG_AUTO_ADJUST_EV: + default: + break; + + } + } +} + + +static void timer_function_sg_recall_deferred_events (void *data) +{ + amf_sg_t *sg = (amf_sg_t*)data; + ENTER (""); + + sg_recall_deferred_events (sg); +} + +static void sg_enter_idle (amf_sg_t *sg) { SaNameT dn; - ENTER ("sg: %s state: %d", sg->name.value,sg->avail_state); + ENTER ("sg: %s state: %d", sg->name.value, sg->avail_state); sg->avail_state = SG_AC_Idle; - if (sg->recovery_scope.recovery_type != 0) { - switch (sg->recovery_scope.recovery_type) { - case SG_RT_FailoverSU: + if (sg->recovery_scope.event_type != 0) { + switch (sg->recovery_scope.event_type) { + case SG_FAILOVER_SU_EV: assert (sg->recovery_scope.sus[0] != NULL); amf_su_dn_make (sg->recovery_scope.sus[0], &dn); log_printf ( - LOG_NOTICE, "'%s' %s recovery action finished", + LOG_NOTICE, + "'%s' %s recovery action finished", dn.value, - sg_recovery_type_text[sg->recovery_scope.recovery_type]); + sg_event_type_text[sg->recovery_scope.event_type]); break; - case SG_RT_FailoverNode: - amf_node_sg_failed_over (sg->recovery_scope.node, sg); + case SG_FAILOVER_NODE_EV: + amf_node_sg_failed_over ( + sg->recovery_scope.node, sg); log_printf ( - LOG_NOTICE, "'%s for %s' recovery action finished", - sg_recovery_type_text[sg->recovery_scope.recovery_type], + LOG_NOTICE, + "'%s for %s' recovery action finished", + sg_event_type_text[sg->recovery_scope.event_type], sg->name.value); break; + case SG_START_EV: + amf_application_sg_started (sg->application, + sg, this_amf_node); + break; default: log_printf ( - LOG_NOTICE, "'%s' recovery action finished", - sg_recovery_type_text[0]); + LOG_NOTICE, + "'%s' recovery action finished", + sg_event_type_text[0]); break; } } @@ -204,6 +282,10 @@ static void return_to_idle (struct amf_sg *sg) } memset (&sg->recovery_scope, 0, sizeof (struct sg_recovery_scope)); sg->node_to_start = NULL; + + amf_call_function_asynchronous ( + timer_function_sg_recall_deferred_events, sg); + } static int su_instantiated_count (struct amf_sg *sg) @@ -227,7 +309,8 @@ static int has_any_su_in_scope_active_workload (struct amf_sg *sg) while (*sus != NULL) { si_assignment = amf_su_get_next_si_assignment (*sus, NULL); while (si_assignment != NULL) { - if (si_assignment->saAmfSISUHAState != SA_AMF_HA_ACTIVE) { + if (si_assignment->saAmfSISUHAState != + SA_AMF_HA_ACTIVE) { break; } si_assignment = amf_su_get_next_si_assignment ( @@ -253,7 +336,8 @@ static int is_standby_for_non_active_si_in_scope (struct amf_sg *sg) while (*sis != NULL) { si_assignment = (*sis)->assigned_sis; while (si_assignment != NULL) { - if (si_assignment->saAmfSISUHAState == SA_AMF_HA_ACTIVE) { + if (si_assignment->saAmfSISUHAState == + SA_AMF_HA_ACTIVE) { break; } si_assignment = si_assignment->next; @@ -262,7 +346,8 @@ static int is_standby_for_non_active_si_in_scope (struct amf_sg *sg) /* There is no ACTIVE assignment ..*/ si_assignment = (*sis)->assigned_sis; while (si_assignment != NULL) { - if (si_assignment->saAmfSISUHAState == SA_AMF_HA_STANDBY) { + if (si_assignment->saAmfSISUHAState == + SA_AMF_HA_STANDBY) { break; } si_assignment = si_assignment->next; @@ -291,6 +376,22 @@ static void acsm_enter_terminating_suspected (struct amf_sg *sg) } } +static inline int su_presense_state_is_ored (amf_su_t *su, + SaAmfPresenceStateT state1,SaAmfPresenceStateT state2, + SaAmfPresenceStateT state3) +{ + return(su->saAmfSUPresenceState == state1 || su->saAmfSUPresenceState == + state2 || su->saAmfSUPresenceState == state3) ? 1 : 0; +} + +static inline int su_presense_state_is_not (amf_su_t *su, + SaAmfPresenceStateT state1,SaAmfPresenceStateT state2, + SaAmfPresenceStateT state3) +{ + return(su->saAmfSUPresenceState != state1 && su->saAmfSUPresenceState != + state2 && su->saAmfSUPresenceState != state3) ? 1 : 0; +} + /** * Callback function used by SI when there is no dependent SI to @@ -300,19 +401,24 @@ static void acsm_enter_terminating_suspected (struct amf_sg *sg) static void dependent_si_deactivated_cbfn2 (struct amf_sg *sg) { struct amf_su **sus = sg->recovery_scope.sus; + ENTER("'%s'", sg->name.value); - /* Select next state depending on if some SU in the scope is - * needs to be terminated. + /* + * Select next state depending on if some + * SU in the scope is needs to be terminated. */ + while (*sus != NULL) { - ENTER("SU %s pr_state='%d'",(*sus)->name.value, - (*sus)->saAmfSUPresenceState); - if (((*sus)->saAmfSUPresenceState == - SA_AMF_PRESENCE_UNINSTANTIATED) || - ((*sus)->saAmfSUPresenceState == - SA_AMF_PRESENCE_TERMINATION_FAILED) || - ((*sus)->saAmfSUPresenceState == + + amf_su_t *su = *sus; + + ENTER("SU %s pr_state='%d'",su->name.value, + su->saAmfSUPresenceState); + + if (su_presense_state_is_ored (su, + SA_AMF_PRESENCE_UNINSTANTIATED, + SA_AMF_PRESENCE_TERMINATION_FAILED, SA_AMF_PRESENCE_INSTANTIATION_FAILED)) { sus++; continue; @@ -329,10 +435,11 @@ static void dependent_si_deactivated_cbfn2 (struct amf_sg *sg) } -static void timer_function_dependent_si_deactivated2 (void *sg) +static void timer_function_dependent_si_deactivated2 (void *data) { ENTER (""); + amf_sg_t *sg = (amf_sg_t *)data; dependent_si_deactivated_cbfn2 (sg); } @@ -356,7 +463,8 @@ static struct amf_si *si_get_dependent (struct amf_si *si) 2, res_arr); if (is_match) { - tmp_si = amf_si_find (si->application, (char*)res_arr[1].value); + tmp_si = amf_si_find (si->application, + (char*)res_arr[1].value); } else { log_printf (LOG_LEVEL_ERROR, "distinguished name for " "amf_si_depedency failed\n"); @@ -366,7 +474,7 @@ static struct amf_si *si_get_dependent (struct amf_si *si) return tmp_si; } -struct amf_si *amf_dependent_get_next (struct amf_si *si, +static struct amf_si *amf_dependent_get_next (struct amf_si *si, struct amf_si *si_iter) { struct amf_si *tmp_si; @@ -401,7 +509,7 @@ struct amf_si *amf_dependent_get_next (struct amf_si *si, return tmp_si; } -static void acsm_enter_deactivating_dependent_workload (struct amf_sg *sg) +static void acsm_enter_deactivating_dependent_workload (amf_sg_t *sg) { struct amf_si **sis= sg->recovery_scope.sis; struct amf_si_assignment *si_assignment; @@ -410,7 +518,7 @@ static void acsm_enter_deactivating_dependent_workload (struct amf_sg *sg) sg->avail_state = SG_AC_DeactivatingDependantWorkload; ENTER("'%s'",sg->name.value); - /* + /* * For each SI in the recovery scope, find all active assignments * and request them to be deactivated. */ @@ -425,11 +533,14 @@ static void acsm_enter_deactivating_dependent_workload (struct amf_sg *sg) while (si_assignment != NULL) { - if (si_assignment->saAmfSISUHAState == SA_AMF_HA_ACTIVE) { - si_assignment->requested_ha_state = SA_AMF_HA_QUIESCED; + if (si_assignment->saAmfSISUHAState == + SA_AMF_HA_ACTIVE) { + si_assignment->requested_ha_state = + SA_AMF_HA_QUIESCED; callback_pending = 1; amf_si_ha_state_assume ( - si_assignment, dependent_si_deactivated_cbfn); + si_assignment, + dependent_si_deactivated_cbfn); } si_assignment = si_assignment->next; } @@ -439,10 +550,11 @@ static void acsm_enter_deactivating_dependent_workload (struct amf_sg *sg) } if (callback_pending == 0) { - poll_timer_handle handle; + static poll_timer_handle dependent_si_deactivated_handle; ENTER(""); poll_timer_add (aisexec_poll_handle, 0, sg, - timer_function_dependent_si_deactivated2, &handle); + timer_function_dependent_si_deactivated2, + &dependent_si_deactivated_handle); } } /** @@ -465,8 +577,10 @@ static void acsm_enter_activating_standby (struct amf_sg *sg) while (*sis != NULL) { si_assignment = (*sis)->assigned_sis; while (si_assignment != NULL) { - if (si_assignment->saAmfSISUHAState == SA_AMF_HA_STANDBY) { - si_assignment->requested_ha_state = SA_AMF_HA_ACTIVE; + if (si_assignment->saAmfSISUHAState == + SA_AMF_HA_STANDBY) { + si_assignment->requested_ha_state = + SA_AMF_HA_ACTIVE; amf_si_ha_state_assume ( si_assignment, standby_su_activated_cbfn); is_no_standby_activated = 0; @@ -496,14 +610,17 @@ static void acsm_enter_repairing_su (struct amf_sg *sg) while (*sus != NULL) { if (su_instantiated_count ((*sus)->sg) < (*sus)->sg->saAmfSGNumPrefInserviceSUs) { - struct amf_node *node = amf_node_find(&((*sus)->saAmfSUHostedByNode)); + struct amf_node *node = + amf_node_find(&((*sus)->saAmfSUHostedByNode)); if (node == NULL) { - log_printf (LOG_LEVEL_ERROR, "no node to hosted on su found" + log_printf (LOG_LEVEL_ERROR, + "no node to hosted on su found" "amf_si_depedency failed\n"); openais_exit_error (AIS_DONE_FATAL_ERR); } - if (node->saAmfNodeOperState == SA_AMF_OPERATIONAL_ENABLED) { - /* node is synchronized */ + if (node->saAmfNodeOperState == + SA_AMF_OPERATIONAL_ENABLED) { + /* node is synchronized */ is_any_su_instantiated = 1; amf_su_instantiate ((*sus)); } @@ -513,7 +630,7 @@ static void acsm_enter_repairing_su (struct amf_sg *sg) } if (is_any_su_instantiated == 0) { - return_to_idle (sg); + sg_enter_idle (sg); } } @@ -607,7 +724,7 @@ static void set_scope_for_failover_su (struct amf_sg *sg, struct amf_su *su) struct amf_si **sis; struct amf_su **sus; SaNameT dn; - sg->recovery_scope.recovery_type = SG_RT_FailoverSU; + sg->recovery_scope.event_type = SG_FAILOVER_SU_EV; sg->recovery_scope.node = NULL; sg->recovery_scope.comp = NULL; @@ -623,7 +740,7 @@ static void set_scope_for_failover_su (struct amf_sg *sg, struct amf_su *su) amf_su_dn_make (sg->recovery_scope.sus[0], &dn); log_printf ( LOG_NOTICE, "'%s' for %s recovery action started", - sg_recovery_type_text[sg->recovery_scope.recovery_type], + sg_event_type_text[sg->recovery_scope.event_type], dn.value); si_assignment = amf_su_get_next_si_assignment (su, NULL); @@ -655,7 +772,7 @@ static void set_scope_for_failover_node (struct amf_sg *sg, struct amf_node *nod struct amf_su *su; ENTER ("'%s'", node->name.value); - sg->recovery_scope.recovery_type = SG_RT_FailoverNode; + sg->recovery_scope.event_type = SG_FAILOVER_NODE_EV; sg->recovery_scope.node = node; sg->recovery_scope.comp = NULL; sg->recovery_scope.sus = (struct amf_su **) @@ -665,7 +782,7 @@ static void set_scope_for_failover_node (struct amf_sg *sg, struct amf_node *nod log_printf ( LOG_NOTICE, "'%s' for node %s recovery action started", - sg_recovery_type_text[sg->recovery_scope.recovery_type], + sg_event_type_text[sg->recovery_scope.event_type], node->name.value); assert ((sg->recovery_scope.sus != NULL) && @@ -684,7 +801,8 @@ static void set_scope_for_failover_node (struct amf_sg *sg, struct amf_node *nod if (is_si_in_scope(sg, si_assignment->si) == 0) { add_si_to_scope(sg, si_assignment->si ); } - si_assignment = amf_su_get_next_si_assignment (su, si_assignment); + si_assignment = amf_su_get_next_si_assignment ( + su, si_assignment); } sus++; } @@ -734,7 +852,8 @@ static void delete_si_assignments (struct amf_su *su) if (si_assignment->su == su) { struct amf_si_assignment *tmp = si_assignment; *prev = si_assignment->next; - dprintf ("SI assignment %s unlinked", tmp->name.value); + dprintf ("SI assignment %s unlinked", + tmp->name.value); free (tmp); } else { prev = &si_assignment->next; @@ -789,7 +908,8 @@ static void dependent_si_deactivated_cbfn ( si_assignment->requested_ha_state) { goto still_wating; } - si_assignment = amf_su_get_next_si_assignment(su, si_assignment); + si_assignment = amf_su_get_next_si_assignment(su, + si_assignment); } @@ -800,15 +920,16 @@ static void dependent_si_deactivated_cbfn ( if (su == NULL) { sus = si_assignment->su->sg->recovery_scope.sus; - /* Select next state depending on if some SU in the scope is - * needs to be terminated. + /* + * Select next state depending on if some + * SU in the scope is needs to be terminated. */ + + while (*sus != NULL) { - if (((*sus)->saAmfSUPresenceState != - SA_AMF_PRESENCE_UNINSTANTIATED) && - ((*sus)->saAmfSUPresenceState != - SA_AMF_PRESENCE_TERMINATION_FAILED) && - ((*sus)->saAmfSUPresenceState != + if (su_presense_state_is_not (*sus, + SA_AMF_PRESENCE_UNINSTANTIATED, + SA_AMF_PRESENCE_TERMINATION_FAILED, SA_AMF_PRESENCE_INSTANTIATION_FAILED)) { break; } @@ -894,7 +1015,7 @@ static void assign_si_assumed_cbfn ( switch (sg->avail_state) { case SG_AC_AssigningOnRequest: if (si_assignment_cnt == confirmed_assignments) { - return_to_idle (sg); + sg_enter_idle (sg); amf_application_sg_assigned (sg->application, sg); } else { dprintf ("%d, %d", si_assignment_cnt, confirmed_assignments); @@ -903,7 +1024,7 @@ static void assign_si_assumed_cbfn ( case SG_AC_AssigningStandBy: { if (si_assignment_cnt == confirmed_assignments) { - return_to_idle (sg); + sg_enter_idle (sg); } break; } @@ -927,32 +1048,6 @@ static inline int div_round (int a, int b) return res; } -#ifdef COMPILE_OUT -static int all_su_has_presence_state ( - struct amf_sg *sg, struct amf_node *node_to_start, - SaAmfPresenceStateT state) -{ - struct amf_su *su; - int all_set = 1; - - for (su = sg->su_head; su != NULL; su = su->next) { - - if (su->saAmfSUPresenceState != state) { - if (node_to_start == NULL) { - all_set = 0; - break; - } else { - if (name_match(&node_to_start->name, - &su->saAmfSUHostedByNode)) { - all_set = 0; - break; - } - } - } - } - return all_set; -} -#endif static int no_su_has_presence_state ( struct amf_sg *sg, struct amf_node *node_to_start, @@ -1011,7 +1106,7 @@ static int sg_si_count_get (struct amf_sg *sg) return(cnt); } -int amf_si_get_saAmfSINumReqActiveAssignments(struct amf_si *si) +static int amf_si_get_saAmfSINumReqActiveAssignments(struct amf_si *si) { struct amf_si_assignment *si_assignment = si->assigned_sis; int number_of_req_active_assignments = 0; @@ -1026,7 +1121,7 @@ int amf_si_get_saAmfSINumReqActiveAssignments(struct amf_si *si) } -int amf_si_get_saAmfSINumReqStandbyAssignments(struct amf_si *si) +static int amf_si_get_saAmfSINumReqStandbyAssignments(struct amf_si *si) { struct amf_si_assignment *si_assignment = si->assigned_sis; int number_of_req_active_assignments = 0; @@ -1215,6 +1310,10 @@ static int assign_si (struct amf_sg *sg, int dependency_level) inservice_count = su_inservice_count_get (sg); if (sg->saAmfSGNumPrefActiveSUs > 0) { + dprintf("LHL sg_si_count_get (sg) %d ,sg->saAmfSGMaxActiveSIsperSUs %d, ", + sg_si_count_get (sg), + sg->saAmfSGMaxActiveSIsperSUs); + active_sus_needed = div_round ( sg_si_count_get (sg), sg->saAmfSGMaxActiveSIsperSUs); @@ -1296,10 +1395,6 @@ static int assign_si (struct amf_sg *sg, int dependency_level) if (inservice_count > 0) { assigned = sg_assign_nm_active (sg, su_active_assign); assigned += sg_assign_nm_standby (sg, su_standby_assign); - -#if 0 - assert (assigned > 0); -#endif sg->saAmfSGNumCurrAssignedSUs = inservice_count; /** @@ -1334,102 +1429,157 @@ static int assign_si (struct amf_sg *sg, int dependency_level) int amf_sg_assign_si_req (struct amf_sg *sg, int dependency_level) { int posible_to_assign_si; - + sg->recovery_scope.event_type = SG_ASSIGN_SI_EV; sg->avail_state = SG_AC_AssigningOnRequest; if ((posible_to_assign_si = assign_si (sg, dependency_level)) == 0) { - return_to_idle (sg); + sg_enter_idle (sg); } return posible_to_assign_si; } -void amf_sg_failover_node_req ( - struct amf_sg *sg, struct amf_node *node) + +void amf_sg_failover_node_req (struct amf_sg *sg, struct amf_node *node) { - ENTER("'%s, %s'",node->name.value, sg->name.value); + sg_event_t sg_event; - /* - * TODO: Defer all new events. Workaround is to exit. - */ - if (sg->avail_state != SG_AC_Idle) { - log_printf (LOG_LEVEL_ERROR, "To handle multiple simultaneous SG" - " recovery actions is not implemented yet:" - " SG '%s', NODE '%s', avail_state %d", - sg->name.value, node->name.value, sg->avail_state); - openais_exit_error (AIS_DONE_FATAL_ERR); - return; - } + switch (sg->avail_state) { + case SG_AC_Idle: + set_scope_for_failover_node(sg, node); + if (has_any_su_in_scope_active_workload (sg)) { + acsm_enter_deactivating_dependent_workload (sg); + } else { + amf_su_t **sus = sg->recovery_scope.sus; - set_scope_for_failover_node(sg, node); + /* + * Select next state depending on if some + * SU in the scope needs to be terminated. + */ + while (*sus != NULL) { - if (has_any_su_in_scope_active_workload (sg)) { - acsm_enter_deactivating_dependent_workload (sg); - } else { - struct amf_su **sus = sg->recovery_scope.sus; + amf_su_t *su = *sus; + ENTER("SU %s pr_state='%d'",su->name.value, + su->saAmfSUPresenceState); - /* Select next state depending on if some SU in the scope is - * needs to be terminated. - */ - while (*sus != NULL) { - ENTER("SU %s pr_state='%d'",(*sus)->name.value, - (*sus)->saAmfSUPresenceState); - if (((*sus)->saAmfSUPresenceState == - SA_AMF_PRESENCE_UNINSTANTIATED) || - ((*sus)->saAmfSUPresenceState == - SA_AMF_PRESENCE_TERMINATION_FAILED) || - ((*sus)->saAmfSUPresenceState == - SA_AMF_PRESENCE_INSTANTIATION_FAILED)) { - sus++; - continue; - } + if (su_presense_state_is_ored (su, + SA_AMF_PRESENCE_UNINSTANTIATED, + SA_AMF_PRESENCE_TERMINATION_FAILED, + SA_AMF_PRESENCE_INSTANTIATION_FAILED)) { + sus++; + continue; + } + break; + } + + if (*sus != NULL) { + acsm_enter_terminating_suspected (sg); + } else { + delete_si_assignments_in_scope (sg); + sg_enter_idle (sg); + } + + } + break; + case SG_AC_DeactivatingDependantWorkload: + case SG_AC_TerminatingSuspected: + case SG_AC_ActivatingStandby: + case SG_AC_AssigningStandbyToSpare: + case SG_AC_ReparingComponent: + case SG_AC_ReparingSu: + case SG_AC_AssigningOnRequest: + case SG_AC_InstantiatingServiceUnits: + case SG_AC_RemovingAssignment: + case SG_AC_AssigningActiveworkload: + case SG_AC_AssigningAutoAdjust: + case SG_AC_AssigningStandBy: + case SG_AC_WaitingAfterOperationFailed: + sg_set_event (SG_FAILOVER_NODE_EV, sg, 0, 0, node, &sg_event); + sg_defer_event (SG_FAILOVER_NODE_EV, &sg_event); + break; + default: + assert (0); break; - } - - if (*sus != NULL) { - acsm_enter_terminating_suspected (sg); - } else { - delete_si_assignments_in_scope (sg); - return_to_idle (sg); - } - - } + } } + + void amf_sg_start (struct amf_sg *sg, struct amf_node *node) { - struct amf_su *su; - sg_avail_control_state_t old_avail_state = sg->avail_state; - int instantiated_sus = 0; - ENTER ("'%s'", sg->name.value); + sg_event_t sg_event; + sg->recovery_scope.event_type = SG_START_EV; + switch (sg->avail_state) { + case SG_AC_Idle: { - sg->node_to_start = node; + amf_su_t *su; + sg_avail_control_state_t old_avail_state = sg->avail_state; + int instantiated_sus = 0; - sg->avail_state = SG_AC_InstantiatingServiceUnits; + ENTER ("'%s'", sg->name.value); - for (su = sg->su_head; - (su != NULL) && (instantiated_sus < sg->saAmfSGNumPrefInserviceSUs); - su = su->next) { + sg->node_to_start = node; - if (node == NULL) { - /* Cluster start */ - amf_su_instantiate (su); - instantiated_sus++; - } else { - /* Node start, match if SU is hosted on the specified node*/ - if (name_match (&node->name, &su->saAmfSUHostedByNode)) { - amf_su_instantiate (su); - instantiated_sus++; + sg->avail_state = SG_AC_InstantiatingServiceUnits; + + for (su = sg->su_head; + (su != NULL) && + (instantiated_sus < sg->saAmfSGNumPrefInserviceSUs); + su = su->next) { + + if (node == NULL) { + + /* + * Cluster start + */ + + amf_su_instantiate (su); + instantiated_sus++; + + } else { + + /* + * Node start, match if SU is hosted on the + * specified node + */ + + if (name_match (&node->name, + &su->saAmfSUHostedByNode)) { + amf_su_instantiate (su); + instantiated_sus++; + } + } + } + + if (instantiated_sus == 0) { + sg->avail_state = old_avail_state; + } + break; } - } - } - - if (instantiated_sus == 0) { - sg->avail_state = old_avail_state; + case SG_AC_InstantiatingServiceUnits: + sg_set_event (SG_START_EV, sg, 0, 0, node, &sg_event); + sg_defer_event (SG_START_EV, &sg_event); + break; + case SG_AC_DeactivatingDependantWorkload: + case SG_AC_TerminatingSuspected: + case SG_AC_ActivatingStandby: + case SG_AC_AssigningStandbyToSpare: + case SG_AC_ReparingComponent: + case SG_AC_ReparingSu: + case SG_AC_AssigningOnRequest: + case SG_AC_RemovingAssignment: + case SG_AC_AssigningActiveworkload: + case SG_AC_AssigningAutoAdjust: + case SG_AC_AssigningStandBy: + case SG_AC_WaitingAfterOperationFailed: + default: + assert (0); + break; } } + void amf_sg_su_state_changed (struct amf_sg *sg, struct amf_su *su, SaAmfStateT type, int state) { @@ -1441,16 +1591,14 @@ void amf_sg_su_state_changed (struct amf_sg *sg, if (sg->avail_state == SG_AC_InstantiatingServiceUnits) { if (no_su_has_presence_state(sg, sg->node_to_start, SA_AMF_PRESENCE_INSTANTIATING)) { - su->sg->avail_state = SG_AC_Idle; - amf_application_sg_started ( - sg->application, sg, this_amf_node); + sg_enter_idle (sg); } } else if (sg->avail_state == SG_AC_ReparingSu) { if (all_su_in_scope_has_presence_state(su->sg, SA_AMF_PRESENCE_INSTANTIATED)) { su->sg->avail_state = SG_AC_AssigningStandBy; if (assign_si (sg, 0) == 0) { - return_to_idle (sg); + sg_enter_idle (sg); } } else { @@ -1484,9 +1632,7 @@ void amf_sg_su_state_changed (struct amf_sg *sg, if (sg->avail_state == SG_AC_InstantiatingServiceUnits) { if (no_su_has_presence_state(sg, sg->node_to_start, SA_AMF_PRESENCE_INSTANTIATING)) { - su->sg->avail_state = SG_AC_Idle; - amf_application_sg_started ( - sg->application, sg, this_amf_node); + sg_enter_idle (sg); } } } else { @@ -1502,27 +1648,42 @@ void amf_sg_init (void) log_init ("AMF"); } -void amf_sg_failover_su_req ( - struct amf_sg *sg, struct amf_su *su, struct amf_node *node) +void amf_sg_failover_su_req (struct amf_sg *sg, struct amf_su *su, + struct amf_node *node) { ENTER (""); - /* - * TODO: Defer all new events. Workaround is to exit. - */ - if (sg->avail_state != SG_AC_Idle) { - log_printf (LOG_LEVEL_ERROR, "To handle multiple simultaneous SG" - " recovery actions is not implemented yet:" - " SG '%s', SU '%s', avail_state %d", - sg->name.value, su->name.value, sg->avail_state); - openais_exit_error (AIS_DONE_FATAL_ERR); - return; - } - set_scope_for_failover_su (sg, su); - if (has_any_su_in_scope_active_workload (sg)) { - acsm_enter_deactivating_dependent_workload (sg); - } else { - acsm_enter_terminating_suspected (sg); - } + sg_event_t sg_event; + + switch (sg->avail_state) { + case SG_AC_Idle: + set_scope_for_failover_su (sg, su); + if (has_any_su_in_scope_active_workload (sg)) { + acsm_enter_deactivating_dependent_workload (sg); + } else { + acsm_enter_terminating_suspected (sg); + } + break; + case SG_AC_DeactivatingDependantWorkload: + case SG_AC_TerminatingSuspected: + case SG_AC_ActivatingStandby: + case SG_AC_AssigningStandbyToSpare: + case SG_AC_ReparingComponent: + case SG_AC_ReparingSu: + case SG_AC_AssigningOnRequest: + case SG_AC_InstantiatingServiceUnits: + case SG_AC_RemovingAssignment: + case SG_AC_AssigningActiveworkload: + case SG_AC_AssigningAutoAdjust: + case SG_AC_AssigningStandBy: + case SG_AC_WaitingAfterOperationFailed: + sg_set_event (SG_FAILOVER_SU_EV, sg, su, 0, 0, &sg_event); + sg_defer_event (SG_FAILOVER_SU_EV, &sg_event); + break; + default: + assert (0); + break; + + } } /** @@ -1555,6 +1716,7 @@ struct amf_sg *amf_sg_new (struct amf_application *app, char *name) sg->application = app; sg->next = app->sg_head; app->sg_head = sg; + sg->deferred_events = NULL; return sg; } diff --git a/exec/amfsu.c b/exec/amfsu.c index 9748fcc6..2b45742c 100644 --- a/exec/amfsu.c +++ b/exec/amfsu.c @@ -576,7 +576,6 @@ void amf_su_comp_error_suspected ( * TODO: delegate to node */ SaNameT dn; - su_operational_state_set (su, SA_AMF_OPERATIONAL_DISABLED); amf_comp_operational_state_set ( comp, SA_AMF_OPERATIONAL_DISABLED); diff --git a/exec/amfutil.c b/exec/amfutil.c index 675a6c5d..711408ec 100644 --- a/exec/amfutil.c +++ b/exec/amfutil.c @@ -404,7 +404,7 @@ struct amf_cluster *amf_config_read (char **error_string) sg = amf_sg_new (app, trim_str (loc)); sg_cnt++; sg->recovery_scope.comp = NULL; - sg->recovery_scope.recovery_type = 0; + sg->recovery_scope.event_type = 0; sg->recovery_scope.node = NULL; sg->recovery_scope.sis = NULL; sg->recovery_scope.sus = NULL; @@ -1366,3 +1366,62 @@ void amf_util_init (void) { log_init ("AMF"); } + +void amf_fifo_put (int entry_type, amf_fifo_t **root, int size_of_data, + void *data) +{ + amf_fifo_t *fifo; + amf_fifo_t **new_item = root; + + /* Insert newest entry at the end of the single linked list */ + for (fifo = *root; fifo != NULL; fifo = fifo->next) { + if (fifo->next == NULL) { + new_item = &fifo->next; + } + } + *new_item = amf_malloc (size_of_data + sizeof (amf_fifo_t)); + fifo = *new_item; + + /* Set data of this entry*/ + fifo->entry_type = entry_type; + fifo->next = NULL; + fifo->size_of_data = size_of_data; + memcpy (fifo->data, data, size_of_data); +} + +int amf_fifo_get (amf_fifo_t **root, void *data) +{ + amf_fifo_t *fifo; + int result = 0; + + fifo = *root; + if (fifo != NULL) { + /* Unlink oldest entry*/ + *root = fifo->next; + memcpy (data, fifo->data, fifo->size_of_data); + free (fifo); + result = 1; + } + return result; +} + + +/** + * + * Use timer to call function f (void *data) after that current + * execution in this thread has been re-assumed because of a + * time-out. Time-out time is 0 msec so f will be called as soon + * as possible. * + * + * @param async_func + * @param func_param + */ + +void amf_call_function_asynchronous (async_func_t async_func, void *func_param) +{ + + static poll_timer_handle async_func_timer_handle; + poll_timer_add (aisexec_poll_handle, 0, func_param, async_func, + &async_func_timer_handle); +} +