diff --git a/configure.ac b/configure.ac index 14c0e175..a27ba95a 100644 --- a/configure.ac +++ b/configure.ac @@ -470,6 +470,7 @@ if test "x${enable_vqsim}" = xyes; then [], AC_MSG_WARN([vqsim will lack readline support])) PACKAGE_FEATURES="$PACKAGE_FEATURES vqsim" + WITH_LIST="$WITH_LIST --with vqsim" fi AM_CONDITIONAL(VQSIM_READLINE, [test "x${ac_cv_header_readline_readline_h}" = xyes]) diff --git a/corosync.spec.in b/corosync.spec.in index 07c004c3..8af686e0 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -12,6 +12,7 @@ %bcond_with systemd %bcond_with xmlconf %bcond_with nozzle +%bcond_with vqsim %bcond_with runautogen %global gitver %{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}} @@ -70,6 +71,9 @@ Requires(preun): /sbin/chkconfig %if %{with xmlconf} Requires: libxslt %endif +%if %{with vqsim} +BuildRequires: readline-devel +%endif %prep %setup -q -n %{name}-%{version}%{?gittarver} @@ -100,6 +104,9 @@ Requires: libxslt %endif %if %{with nozzle} --enable-nozzle \ +%endif +%if %{with vqsim} + --enable-vqsim \ %endif --with-initddir=%{_initrddir} \ --with-systemddir=%{_unitdir} \ @@ -266,6 +273,25 @@ The Corosync Cluster Engine APIs. %{_mandir}/man3/sam_*3* %{_mandir}/man3/cmap_*3* +%if %{with vqsim} +%package -n corosync-vqsim +Summary: The Corosync Cluster Engine - Votequorum Simulator +Requires: corosynclib%{?_isa} = %{version}-%{release} +Requires: pkgconfig + +%description -n corosync-vqsim +A command-line simulator for the corosync votequorum subsystem. +It uses the same code as the corosync quorum system but forks +them into subprocesses to simulate nodes. +Nodes can be added and removed as well as partitioned (to simulate +network splits) + +%files -n corosync-vqsim +%doc LICENSE +%{_bindir}/corosync-vqsim +%{_mandir}/man8/corosync-vqsim.8* +%endif + %changelog * @date@ Autotools generated version - @version@-1-@numcomm@.@alphatag@.@dirty@ - Autotools generated version diff --git a/man/Makefile.am b/man/Makefile.am index d1b7f94e..8e4eda3d 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -139,6 +139,10 @@ dist_man_MANS = corosync.conf.5 \ cmap_overview.3 \ cmap_keys.8 +if BUILD_VQSIM +dist_man_MANS += corosync-vqsim.8 +endif + if INSTALL_XMLCONF dist_man_MANS += $(xml_man) endif diff --git a/man/corosync-vqsim.8 b/man/corosync-vqsim.8 new file mode 100644 index 00000000..26a6468e --- /dev/null +++ b/man/corosync-vqsim.8 @@ -0,0 +1,94 @@ +.\"/* +.\" * Copyright (C) 2019 Red Hat, Inc. +.\" * +.\" * All rights reserved. +.\" * +.\" * Author: Christine Caulfield +.\" * +.\" * This software licensed under BSD license, the text of which follows: +.\" * +.\" * Redistribution and use in source and binary forms, with or without +.\" * modification, are permitted provided that the following conditions are met: +.\" * +.\" * - Redistributions of source code must retain the above copyright notice, +.\" * this list of conditions and the following disclaimer. +.\" * - Redistributions in binary form must reproduce the above copyright notice, +.\" * this list of conditions and the following disclaimer in the documentation +.\" * and/or other materials provided with the distribution. +.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its +.\" * contributors may be used to endorse or promote products derived from this +.\" * software without specific prior written permission. +.\" * +.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +.\" * THE POSSIBILITY OF SUCH DAMAGE. +.\" */ +.TH COROSYNC-VQSIM 8 2019-05-10 +.SH NAME +corosync-vqsim \- The votequorum simulator +.SH SYNOPSIS +.B "corosync-vqsim [\-c config_file] [\-o output file] [\-n] [\-h]" +.SH DESCRIPTION +.B corosync-vqsim +simulates the quorum functions of corosync in a single program. it can simulate +multiple nodes, network splits and a basic quorum device. + +By default vqsim will build a virtual cluster of all the nodes in the corosync.conf file, +each 'node' running in a forked subprocess (and thus asynchronously). It then provides a +command-line interface to add (up) or remove (down) nodes, and cause network splits and +rejoins. After each event it shows the new quorum status for all nodes. + +Nodes in vqsim are always referred to by their nodeid (the IP address is meaningless) and +optionally by a 'partition' which precedes the nodeid with a colon. By default all nodes +are in partition 0. Nodes can be moved between partitions using the split and join commands. +Multiple nodes can be split and joined at the same time. + +To script vqsim you must send input to it via a pipe rather than just redirecting STDIN. This +is because it runs asynchronously to enable the virtual 'nodes' to report status when needed. +(eg if you kill a subprocess using the 'kill(1)' command it gets removed from the cluster). + +By default vqsim will wait for all nodes in all partitions to reach the same +ring sequence number before returning a prompt, +there is a timeout associated with this in case of a 'node' failure and exceeding this timeout +can (optionally) quit the program signalling an error. + +You can disable waiting using the 'sync off' command or the -n command-line option. This can easily +cause unexpected behaviour so use it with care. + +The number of votes per node is read from corosync.conf. New nodes added using the 'up' command +will copy their number of votes from the first node in corosync.conf. This may not be what you +expect and I might fix it in future. As most clusters have only 1 vote per node (and this is +strongly recommended) then this should rarely be a problem. + +Once you have the 'vqsim> ' prompt you can type 'help' and get a list of sub-commands. + +.SH OPTIONS +.TP +.B -c +This specifies the fully qualified path to the corosync configuration file. + +The default is /etc/corosync/corosync.conf. +.TP +.B -o +Specifies the output destination. STDOUT by default. +.TP +.B -n +Don't pause after each command, come straight back to a prompt. Use with care! + +.TP +.B -h +Display a brief help message +.SH SEE ALSO +.BR corosync (9), +.BR corosync.conf (5), +.SH AUTHOR +Christine Caulfield +.PP diff --git a/vqsim/Makefile.am b/vqsim/Makefile.am index 2a765440..9a7fbf6f 100644 --- a/vqsim/Makefile.am +++ b/vqsim/Makefile.am @@ -30,23 +30,25 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. -MAINTAINERCLEANFILES = Makefile.in +MAINTAINERCLEANFILES = Makefile.in if BUILD_VQSIM -noinst_PROGRAMS = vqsim +noinst_HEADERS = vqsim.h -vqsim_LDADD = $(top_builddir)/common_lib/libcorosync_common.la \ - ../exec/corosync-votequorum.o ../exec/corosync-icmap.o ../exec/corosync-logsys.o \ - ../exec/corosync-coroparse.o ../exec/corosync-logconfig.o \ - ../exec/corosync-util.o \ - $(LIBQB_LIBS) +bin_PROGRAMS = corosync-vqsim + +corosync_vqsim_LDADD = $(top_builddir)/common_lib/libcorosync_common.la \ + ../exec/corosync-votequorum.o ../exec/corosync-icmap.o \ + ../exec/corosync-coroparse.o ../exec/corosync-logconfig.o \ + ../exec/corosync-util.o ../exec/corosync-logsys.o \ + $(LIBQB_LIBS) if VQSIM_READLINE -vqsim_LDADD += -lreadline +corosync_vqsim_LDADD += -lreadline endif -vqsim_DEPENDENCIES = $(top_builddir)/common_lib/libcorosync_common.la +corosync_vqsim_DEPENDENCIES = $(top_builddir)/common_lib/libcorosync_common.la -vqsim_SOURCES = vqmain.c parser.c vq_object.c vqsim_vq_engine.c +corosync_vqsim_SOURCES = vqmain.c parser.c vq_object.c vqsim_vq_engine.c endif diff --git a/vqsim/parser.c b/vqsim/parser.c index 2fc10431..d13e6780 100644 --- a/vqsim/parser.c +++ b/vqsim/parser.c @@ -30,21 +30,31 @@ static void do_usage(void) printf(" Enable quorum device in specified nodes\n"); printf("autofence on|off\n"); printf(" automatically 'down' nodes on inquorate side on netsplit\n"); + printf("timeout (default 250)\n"); + printf(" Wait a maximum of milli-seconds for the next command to complete.\n"); + printf("sync on|off (default on)\n"); + printf(" enable/disable synchronous execution of commands (wait for completion)\n"); + printf("assert on|off (default off)\n"); + printf(" Abort the simulation run if a timeout expires\n"); printf("show Show current nodes status\n"); printf("exit\n\n"); } -typedef void (*cmd_routine_t)(int argc, char **argv); +/* Commands return 0 if they return immediately, >1 if we are waiting for replies from nodes */ +typedef int (*cmd_routine_t)(int argc, char **argv); -static void run_up_cmd(int argc, char **argv); -static void run_down_cmd(int argc, char **argv); -static void run_join_cmd(int argc, char **argv); -static void run_move_cmd(int argc, char **argv); -static void run_exit_cmd(int argc, char **argv); -static void run_show_cmd(int argc, char **argv); -static void run_autofence_cmd(int argc, char **argv); -static void run_qdevice_cmd(int argc, char **argv); +static int run_up_cmd(int argc, char **argv); +static int run_down_cmd(int argc, char **argv); +static int run_join_cmd(int argc, char **argv); +static int run_move_cmd(int argc, char **argv); +static int run_exit_cmd(int argc, char **argv); +static int run_show_cmd(int argc, char **argv); +static int run_timeout_cmd(int argc, char **argv); +static int run_assert_cmd(int argc, char **argv); +static int run_autofence_cmd(int argc, char **argv); +static int run_qdevice_cmd(int argc, char **argv); +static int run_sync_cmd(int argc, char **argv); static struct cmd_list_struct { const char *cmd; @@ -59,6 +69,9 @@ static struct cmd_list_struct { { "autofence", 1, run_autofence_cmd}, { "qdevice", 1, run_qdevice_cmd}, { "show", 0, run_show_cmd}, + { "timeout", 1, run_timeout_cmd}, + { "sync", 1, run_sync_cmd}, + { "assert", 1, run_assert_cmd}, { "exit", 0, run_exit_cmd}, { "quit", 0, run_exit_cmd}, { "q", 0, run_exit_cmd}, @@ -135,11 +148,16 @@ void parse_input_command(char *rl_cmd) int last_arg_start = 0; int last_was_space = 0; int len; + int ret = 0; char *cmd; /* ^D quits */ if (rl_cmd == NULL) { - run_exit_cmd(0, NULL); + (void)run_exit_cmd(0, NULL); + } + /* '#' starts a comment */ + if (rl_cmd[0] == '#') { + return; } cmd = strdup(rl_cmd); @@ -180,7 +198,8 @@ void parse_input_command(char *rl_cmd) /* Ignore null commands */ if (strlen(argv[0]) == 0) { free(cmd); - return; + resume_kb_input(0); + return; } #ifdef HAVE_READLINE_HISTORY_H add_history(rl_cmd); @@ -193,7 +212,7 @@ void parse_input_command(char *rl_cmd) if (argc < cmd_list[i].min_args) { break; } - cmd_list[i].cmd_runner(argc, argv); + ret = cmd_list[i].cmd_runner(argc, argv); valid_cmd = 1; } } @@ -201,64 +220,86 @@ void parse_input_command(char *rl_cmd) do_usage(); } free(cmd); + + /* ret==0 means we can return immediately to command-line input */ + if (ret == 0) { + resume_kb_input(ret); + } } -static void run_up_cmd(int argc, char **argv) +static int run_up_cmd(int argc, char **argv) { int partition; int num_nodes; int *nodelist; int i,j; + int succeeded = 0; if (argc <= 1) { - return; + return 0; } + cmd_start_sync_command(); + for (i=1; i "); + fflush(stdout); + } else { + printf("#vqsim> "); + fflush(stdout); + } + +} + +void resume_kb_input(int show_status) +{ + /* If running synchronously, we don't display + the quorum messages as they come in. So run 'show' commamnd + */ + if (show_status && waiting_for_sync) { + cmd_show_node_states(); + } + + waiting_for_sync = 0; + + if (qb_loop_poll_add(poll_loop, + QB_LOOP_MED, + STDIN_FILENO, + POLLIN | POLLERR, + NULL, + stdin_read_fn)) { + if (errno != EEXIST) { + perror("qb_loop_poll_add1 returned error"); + } + } + /* Always shows the prompt here, cos we cleared waiting_for_sync */ + cmd_show_prompt_if_needed(); +} + +/* Return true (1) if all nodes in each partition have the same ring id, false(0) otherwise */ +static int all_nodes_consistent(void) +{ + int i; + struct vq_node *vqn; + struct memb_ring_id last_ring_id; + + for (i=0; ilast_ring_id.seq) { + return 0; + } + last_ring_id.seq = vqn->last_ring_id.seq; + } + } + return 1; +} + static int vq_parent_read_fn(int32_t fd, int32_t revents, void *data) { char msgbuf[8192]; @@ -162,13 +222,18 @@ static int vq_parent_read_fn(int32_t fd, int32_t revents, void *data) msg = (void*)msgbuf; switch (msg->type) { case VQMSG_QUORUM: - if (!nosync && --wait_count_to_unblock <= 0) - qb_loop_timer_del(poll_loop, kb_timer); qmsg = (void*)msgbuf; save_quorum_state(vqn, qmsg); - print_quorum_state(vqn); - if (!nosync && wait_count_to_unblock <= 0) - start_kb_input(); + if (!sync_cmds) { + print_quorum_state(vqn); + } + + /* Have the partitions stabilised? */ + if (sync_cmds && waiting_for_sync && + all_nodes_consistent()) { + qb_loop_timer_del(poll_loop, kb_timer); + resume_kb_input(sync_cmds); + } break; case VQMSG_EXEC: /* Message from votequorum, pass around the partition */ @@ -204,7 +269,7 @@ static int read_corosync_conf(void) logsys_format_set(NULL); res = coroparse_configparse(icmap_get_global_map(), &error_string); if (res == -1) { - log_printf (LOGSYS_LEVEL_INFO, "Error loading corosyc.conf %s", error_string); + log_printf (LOGSYS_LEVEL_INFO, "Error loading corosync.conf %s", error_string); return -1; } else { @@ -234,8 +299,6 @@ static void remove_node(struct vq_node *node) TAILQ_REMOVE(&part->nodelist, node, entries); free(node); - wait_count--; - /* Rebuild quorum */ send_partition_to_nodes(part, 1); } @@ -263,7 +326,7 @@ static int32_t sigchld_handler(int32_t sig, void *data) sprintf(text, "(exit code %d)", WEXITSTATUS(status)); break; } - printf("%d:%02d Quit %s\n", vqn->partition->num, vqn->nodeid, exit_status); + printf("%d:%02d: Quit %s\n", vqn->partition->num, vqn->nodeid, exit_status); remove_node(vqn); } @@ -322,20 +385,24 @@ static void init_partitions(void) } } +static int nodes_in_partition(int part) +{ + struct vq_node *vqn; + int partnodes = 0; + + TAILQ_FOREACH(vqn, &partitions[part].nodelist, entries) { + partnodes++; + } + return partnodes; +} + + static pid_t create_node(int nodeid, int partno) { struct vq_node *newvq; newvq = malloc(sizeof(struct vq_node)); if (newvq) { - if (!nosync) { - /* Number of expected "quorum" vq messages is a square - of the total nodes count, so increment the node - counter and set new square of this value as - a "to observe" counter */ - wait_count++; - wait_count_to_unblock = wait_count * wait_count; - } newvq->last_quorate = -1; /* mark "uninitialized" */ newvq->instance = vq_create_instance(poll_loop, nodeid); if (!newvq->instance) { @@ -439,29 +506,39 @@ static struct vq_node *find_by_pid(pid_t pid) } /* Routines called from the parser */ -void cmd_start_new_node(int nodeid, int partition) + + +/* + * The parser calls this before running a command where + * we might have to wait for a result to come back. + */ +void cmd_start_sync_command() +{ + if (sync_cmds) { + qb_loop_poll_del(poll_loop, STDIN_FILENO); + qb_loop_timer_add(poll_loop, + QB_LOOP_MED, + command_timeout, + NULL, + finish_wait_timeout, + &kb_timer); + waiting_for_sync = 1; + } +} + +int cmd_start_new_node(int nodeid, int partition) { struct vq_node *node; node = find_node(nodeid); if (node) { fprintf(stderr, "ERR: nodeid %d already exists in partition %d\n", nodeid, node->partition->num); - return; + return -1; } - qb_loop_poll_del(poll_loop, STDIN_FILENO); - create_node(nodeid, partition); - if (!nosync) { - /* Delay kb input handling by 0.25 second when we've just - added a node; expect that the delay will be cancelled - substantially earlier once it has reported its quorum info - (the delay is in fact a failsafe input enabler here) */ - qb_loop_timer_add(poll_loop, - QB_LOOP_MED, - 250000000, - NULL, - start_kb_input_timeout, - &kb_timer); + if (create_node(nodeid, partition) == -1) { + return -1; } + return 0; } void cmd_stop_all_nodes() @@ -489,20 +566,21 @@ void cmd_show_node_states() fprintf(output_file, "#autofence: %s\n", autofence?"on":"off"); } -void cmd_stop_node(int nodeid) +int cmd_stop_node(int nodeid) { struct vq_node *node; node = find_node(nodeid); if (!node) { fprintf(stderr, "ERR: nodeid %d is not up\n", nodeid); - return; + return -1; } /* Remove processor */ vq_quit(node->instance); /* Node will be removed when the child process exits */ + return 0; } /* Move all nodes in 'nodelist' into partition 'partition' */ @@ -510,6 +588,13 @@ void cmd_move_nodes(int partition, int num_nodes, int *nodelist) { int i; struct vq_node *node; + struct vq_node *vqn; + int total_nodes = num_nodes; + + /* Work out the number of nodes affected */ + TAILQ_FOREACH(vqn, &partitions[partition].nodelist, entries) { + total_nodes++; + } for (i=0; i "); - fflush(stdout); - } } #endif @@ -617,50 +732,26 @@ static int stdin_read_fn(int32_t fd, int32_t revents, void *data) return 0; } -static void start_kb_input(void) -{ - wait_count_to_unblock = 0; - -#ifdef HAVE_READLINE_READLINE_H - /* Readline will deal with completed lines when they arrive */ - rl_callback_handler_install("vqsim> ", parse_input_command); -#else - if (is_tty) { - printf("vqsim> "); - fflush(stdout); - } -#endif - - /* Send stdin to readline */ - if (qb_loop_poll_add(poll_loop, - QB_LOOP_MED, - STDIN_FILENO, - POLLIN | POLLERR, - NULL, - stdin_read_fn)) { - if (errno != EEXIST) { - perror("qb_loop_poll_add1 returned error"); - } - } -} static void start_kb_input_timeout(void *data) { -// fprintf(stderr, "Waiting for nodes to report status timed out\n"); - start_kb_input(); + resume_kb_input(1); } static void usage(char *program) { printf("Usage:\n"); printf("\n"); - printf("%s [-f ] [-o ]\n", program); + printf("%s [-c ] [-o ]\n", program); printf("\n"); - printf(" -f config file. defaults to /etc/corosync/corosync.conf\n"); + printf(" -c config file. defaults to /etc/corosync/corosync.conf\n"); printf(" -o output file. defaults to stdout\n"); printf(" -n no synchronization (on adding a node)\n"); printf(" -h display this help text\n"); printf("\n"); + printf("%s always takes input from STDIN, but cannot use a file.\n", program); + printf("If you want to script it then use\n cat | %s\n", program); + printf("\n"); } int main(int argc, char **argv) @@ -669,16 +760,16 @@ int main(int argc, char **argv) int ch; char *output_file_name = NULL; - while ((ch = getopt (argc, argv, "f:o:nh")) != EOF) { + while ((ch = getopt (argc, argv, "c:o:nh")) != EOF) { switch (ch) { - case 'f': + case 'c': strncpy(corosync_config_file, optarg, sizeof(corosync_config_file)); break; case 'o': output_file_name = optarg; break; case 'n': - nosync = 1; + sync_cmds = 0; break; default: usage(argv[0]); @@ -696,9 +787,8 @@ int main(int argc, char **argv) else { output_file = stdout; } -#ifndef HAVE_READLINE_READLINE_H + is_tty = isatty(STDIN_FILENO); -#endif qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FUNCTION, "*", LOG_DEBUG); @@ -717,9 +807,26 @@ int main(int argc, char **argv) sigchld_handler, &sigchld_qb_handle); - /* Create a full cluster of nodes from corosync.conf */ + +#ifdef HAVE_READLINE_READLINE_H + /* Readline will deal with completed lines when they arrive */ + /* + * For scripting add '#' to the start of the prompt so that + * parsers can ignore input lines + */ + rl_already_prompted = 1; + if (is_tty) { + rl_callback_handler_install("vqsim> ", parse_input_command); + } else { + rl_callback_handler_install("#vqsim> ", parse_input_command); + } +#endif + + + +/* Create a full cluster of nodes from corosync.conf */ read_corosync_conf(); - if (create_nodes_from_config() && !nosync) { + if (create_nodes_from_config() && sync_cmds) { /* Delay kb input handling by 1 second when we've just added the nodes from corosync.conf; expect that the delay will be cancelled substantially earlier @@ -731,8 +838,9 @@ int main(int argc, char **argv) NULL, start_kb_input_timeout, &kb_timer); + waiting_for_sync = 1; } else { - start_kb_input(); + resume_kb_input(0); } qb_loop_run(poll_loop); diff --git a/vqsim/vqsim.h b/vqsim/vqsim.h index 0c999d74..0c4c9738 100644 --- a/vqsim/vqsim.h +++ b/vqsim/vqsim.h @@ -66,12 +66,17 @@ int fork_new_instance(int nodeid, int *vq_sock, pid_t *child_pid); void parse_input_command(char *cmd); /* These are in vqmain.c */ -void cmd_stop_node(int nodeid); +int cmd_stop_node(int nodeid); void cmd_stop_all_nodes(void); -void cmd_start_new_node(int nodeid, int partition); +int cmd_start_new_node(int nodeid, int partition); void cmd_set_autofence(int onoff); +void cmd_set_sync(int onoff); +void cmd_set_assert(int onoff); void cmd_move_nodes(int partition, int num_nodes, int *nodelist); void cmd_join_partitions(int part1, int part2); void cmd_update_all_partitions(int newring); void cmd_qdevice_poll(int nodeid, int onoff); void cmd_show_node_states(void); +void cmd_set_timeout(uint64_t seconds); +void cmd_start_sync_command(void); +void resume_kb_input(int show_state); diff --git a/vqsim/vqsim_vq_engine.c b/vqsim/vqsim_vq_engine.c index cbe1d471..eb35d356 100644 --- a/vqsim/vqsim_vq_engine.c +++ b/vqsim/vqsim_vq_engine.c @@ -191,6 +191,7 @@ static void set_local_node_pos(struct corosync_api_v1 *api) uint32_t nodeid; const char *iter_key; int res; + int found = 0; iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { @@ -205,13 +206,18 @@ static void set_local_node_pos(struct corosync_api_v1 *api) res = icmap_get_uint32(iter_key, &nodeid); if (res == CS_OK) { if (nodeid == our_nodeid) { + found = 1; res = icmap_set_uint32("nodelist.local_node_pos", node_pos); - if (res != CS_OK) { - fprintf(stderr, "Failed to find node %d in corosync.conf. Quorum calculations may not be correct:\n", our_nodeid); - } } } } + if (!found) { + /* This probably indicates a dynamically-added node + * set the pos to zero and use the votes of the + * first node in corosync.conf + */ + res = icmap_set_uint32("nodelist.local_node_pos", 0); + } } static int load_quorum_instance(struct corosync_api_v1 *api)