mirror of
https://git.proxmox.com/git/mirror_corosync
synced 2025-10-04 09:48:10 +00:00
vqsim: Enhance vqsim
1. Enable scripting of vqsim and add man page I've added a 'sleep' command to help with scripting as well as documentation on how to do it. 2. Make 'sync' operation much more robust and useful Refactored a lot of code to make sure that in sync mode the prompt appears at the 'right' time. What we do is wait for all of the nodes in all partitions to have the same ring_id. If this doesn't happen then the timeout will fire as before. 3. Rename binary to corosync-vqsim and add a sub-package for it Signed-off-by: Christine Caulfield <ccaulfie@redhat.com> Reviewed-by: Jan Friesse <jfriesse@redhat.com>
This commit is contained in:
parent
01ce5a96ef
commit
c3d69712c6
@ -470,6 +470,7 @@ if test "x${enable_vqsim}" = xyes; then
|
||||
[],
|
||||
AC_MSG_WARN([vqsim will lack readline support]))
|
||||
PACKAGE_FEATURES="$PACKAGE_FEATURES vqsim"
|
||||
WITH_LIST="$WITH_LIST --with vqsim"
|
||||
fi
|
||||
AM_CONDITIONAL(VQSIM_READLINE, [test "x${ac_cv_header_readline_readline_h}" = xyes])
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
%bcond_with systemd
|
||||
%bcond_with xmlconf
|
||||
%bcond_with nozzle
|
||||
%bcond_with vqsim
|
||||
%bcond_with runautogen
|
||||
|
||||
%global gitver %{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}
|
||||
@ -70,6 +71,9 @@ Requires(preun): /sbin/chkconfig
|
||||
%if %{with xmlconf}
|
||||
Requires: libxslt
|
||||
%endif
|
||||
%if %{with vqsim}
|
||||
BuildRequires: readline-devel
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%setup -q -n %{name}-%{version}%{?gittarver}
|
||||
@ -100,6 +104,9 @@ Requires: libxslt
|
||||
%endif
|
||||
%if %{with nozzle}
|
||||
--enable-nozzle \
|
||||
%endif
|
||||
%if %{with vqsim}
|
||||
--enable-vqsim \
|
||||
%endif
|
||||
--with-initddir=%{_initrddir} \
|
||||
--with-systemddir=%{_unitdir} \
|
||||
@ -266,6 +273,25 @@ The Corosync Cluster Engine APIs.
|
||||
%{_mandir}/man3/sam_*3*
|
||||
%{_mandir}/man3/cmap_*3*
|
||||
|
||||
%if %{with vqsim}
|
||||
%package -n corosync-vqsim
|
||||
Summary: The Corosync Cluster Engine - Votequorum Simulator
|
||||
Requires: corosynclib%{?_isa} = %{version}-%{release}
|
||||
Requires: pkgconfig
|
||||
|
||||
%description -n corosync-vqsim
|
||||
A command-line simulator for the corosync votequorum subsystem.
|
||||
It uses the same code as the corosync quorum system but forks
|
||||
them into subprocesses to simulate nodes.
|
||||
Nodes can be added and removed as well as partitioned (to simulate
|
||||
network splits)
|
||||
|
||||
%files -n corosync-vqsim
|
||||
%doc LICENSE
|
||||
%{_bindir}/corosync-vqsim
|
||||
%{_mandir}/man8/corosync-vqsim.8*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* @date@ Autotools generated version <nobody@nowhere.org> - @version@-1-@numcomm@.@alphatag@.@dirty@
|
||||
- Autotools generated version
|
||||
|
@ -139,6 +139,10 @@ dist_man_MANS = corosync.conf.5 \
|
||||
cmap_overview.3 \
|
||||
cmap_keys.8
|
||||
|
||||
if BUILD_VQSIM
|
||||
dist_man_MANS += corosync-vqsim.8
|
||||
endif
|
||||
|
||||
if INSTALL_XMLCONF
|
||||
dist_man_MANS += $(xml_man)
|
||||
endif
|
||||
|
94
man/corosync-vqsim.8
Normal file
94
man/corosync-vqsim.8
Normal file
@ -0,0 +1,94 @@
|
||||
.\"/*
|
||||
.\" * Copyright (C) 2019 Red Hat, Inc.
|
||||
.\" *
|
||||
.\" * All rights reserved.
|
||||
.\" *
|
||||
.\" * Author: Christine Caulfield <ccaulfie@redhat.com>
|
||||
.\" *
|
||||
.\" * This software licensed under BSD license, the text of which follows:
|
||||
.\" *
|
||||
.\" * Redistribution and use in source and binary forms, with or without
|
||||
.\" * modification, are permitted provided that the following conditions are met:
|
||||
.\" *
|
||||
.\" * - Redistributions of source code must retain the above copyright notice,
|
||||
.\" * this list of conditions and the following disclaimer.
|
||||
.\" * - Redistributions in binary form must reproduce the above copyright notice,
|
||||
.\" * this list of conditions and the following disclaimer in the documentation
|
||||
.\" * and/or other materials provided with the distribution.
|
||||
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
|
||||
.\" * contributors may be used to endorse or promote products derived from this
|
||||
.\" * software without specific prior written permission.
|
||||
.\" *
|
||||
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
.\" * THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.\" */
|
||||
.TH COROSYNC-VQSIM 8 2019-05-10
|
||||
.SH NAME
|
||||
corosync-vqsim \- The votequorum simulator
|
||||
.SH SYNOPSIS
|
||||
.B "corosync-vqsim [\-c config_file] [\-o output file] [\-n] [\-h]"
|
||||
.SH DESCRIPTION
|
||||
.B corosync-vqsim
|
||||
simulates the quorum functions of corosync in a single program. it can simulate
|
||||
multiple nodes, network splits and a basic quorum device.
|
||||
|
||||
By default vqsim will build a virtual cluster of all the nodes in the corosync.conf file,
|
||||
each 'node' running in a forked subprocess (and thus asynchronously). It then provides a
|
||||
command-line interface to add (up) or remove (down) nodes, and cause network splits and
|
||||
rejoins. After each event it shows the new quorum status for all nodes.
|
||||
|
||||
Nodes in vqsim are always referred to by their nodeid (the IP address is meaningless) and
|
||||
optionally by a 'partition' which precedes the nodeid with a colon. By default all nodes
|
||||
are in partition 0. Nodes can be moved between partitions using the split and join commands.
|
||||
Multiple nodes can be split and joined at the same time.
|
||||
|
||||
To script vqsim you must send input to it via a pipe rather than just redirecting STDIN. This
|
||||
is because it runs asynchronously to enable the virtual 'nodes' to report status when needed.
|
||||
(eg if you kill a subprocess using the 'kill(1)' command it gets removed from the cluster).
|
||||
|
||||
By default vqsim will wait for all nodes in all partitions to reach the same
|
||||
ring sequence number before returning a prompt,
|
||||
there is a timeout associated with this in case of a 'node' failure and exceeding this timeout
|
||||
can (optionally) quit the program signalling an error.
|
||||
|
||||
You can disable waiting using the 'sync off' command or the -n command-line option. This can easily
|
||||
cause unexpected behaviour so use it with care.
|
||||
|
||||
The number of votes per node is read from corosync.conf. New nodes added using the 'up' command
|
||||
will copy their number of votes from the first node in corosync.conf. This may not be what you
|
||||
expect and I might fix it in future. As most clusters have only 1 vote per node (and this is
|
||||
strongly recommended) then this should rarely be a problem.
|
||||
|
||||
Once you have the 'vqsim> ' prompt you can type 'help' and get a list of sub-commands.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B -c
|
||||
This specifies the fully qualified path to the corosync configuration file.
|
||||
|
||||
The default is /etc/corosync/corosync.conf.
|
||||
.TP
|
||||
.B -o
|
||||
Specifies the output destination. STDOUT by default.
|
||||
.TP
|
||||
.B -n
|
||||
Don't pause after each command, come straight back to a prompt. Use with care!
|
||||
|
||||
.TP
|
||||
.B -h
|
||||
Display a brief help message
|
||||
.SH SEE ALSO
|
||||
.BR corosync (9),
|
||||
.BR corosync.conf (5),
|
||||
.SH AUTHOR
|
||||
Christine Caulfield
|
||||
.PP
|
@ -30,23 +30,25 @@
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
# THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
||||
if BUILD_VQSIM
|
||||
|
||||
noinst_PROGRAMS = vqsim
|
||||
noinst_HEADERS = vqsim.h
|
||||
|
||||
vqsim_LDADD = $(top_builddir)/common_lib/libcorosync_common.la \
|
||||
../exec/corosync-votequorum.o ../exec/corosync-icmap.o ../exec/corosync-logsys.o \
|
||||
../exec/corosync-coroparse.o ../exec/corosync-logconfig.o \
|
||||
../exec/corosync-util.o \
|
||||
$(LIBQB_LIBS)
|
||||
bin_PROGRAMS = corosync-vqsim
|
||||
|
||||
corosync_vqsim_LDADD = $(top_builddir)/common_lib/libcorosync_common.la \
|
||||
../exec/corosync-votequorum.o ../exec/corosync-icmap.o \
|
||||
../exec/corosync-coroparse.o ../exec/corosync-logconfig.o \
|
||||
../exec/corosync-util.o ../exec/corosync-logsys.o \
|
||||
$(LIBQB_LIBS)
|
||||
if VQSIM_READLINE
|
||||
vqsim_LDADD += -lreadline
|
||||
corosync_vqsim_LDADD += -lreadline
|
||||
endif
|
||||
|
||||
vqsim_DEPENDENCIES = $(top_builddir)/common_lib/libcorosync_common.la
|
||||
corosync_vqsim_DEPENDENCIES = $(top_builddir)/common_lib/libcorosync_common.la
|
||||
|
||||
vqsim_SOURCES = vqmain.c parser.c vq_object.c vqsim_vq_engine.c
|
||||
corosync_vqsim_SOURCES = vqmain.c parser.c vq_object.c vqsim_vq_engine.c
|
||||
|
||||
endif
|
||||
|
141
vqsim/parser.c
141
vqsim/parser.c
@ -30,21 +30,31 @@ static void do_usage(void)
|
||||
printf(" Enable quorum device in specified nodes\n");
|
||||
printf("autofence on|off\n");
|
||||
printf(" automatically 'down' nodes on inquorate side on netsplit\n");
|
||||
printf("timeout <n> (default 250)\n");
|
||||
printf(" Wait a maximum of <n> milli-seconds for the next command to complete.\n");
|
||||
printf("sync on|off (default on)\n");
|
||||
printf(" enable/disable synchronous execution of commands (wait for completion)\n");
|
||||
printf("assert on|off (default off)\n");
|
||||
printf(" Abort the simulation run if a timeout expires\n");
|
||||
printf("show Show current nodes status\n");
|
||||
printf("exit\n\n");
|
||||
}
|
||||
|
||||
|
||||
typedef void (*cmd_routine_t)(int argc, char **argv);
|
||||
/* Commands return 0 if they return immediately, >1 if we are waiting for replies from nodes */
|
||||
typedef int (*cmd_routine_t)(int argc, char **argv);
|
||||
|
||||
static void run_up_cmd(int argc, char **argv);
|
||||
static void run_down_cmd(int argc, char **argv);
|
||||
static void run_join_cmd(int argc, char **argv);
|
||||
static void run_move_cmd(int argc, char **argv);
|
||||
static void run_exit_cmd(int argc, char **argv);
|
||||
static void run_show_cmd(int argc, char **argv);
|
||||
static void run_autofence_cmd(int argc, char **argv);
|
||||
static void run_qdevice_cmd(int argc, char **argv);
|
||||
static int run_up_cmd(int argc, char **argv);
|
||||
static int run_down_cmd(int argc, char **argv);
|
||||
static int run_join_cmd(int argc, char **argv);
|
||||
static int run_move_cmd(int argc, char **argv);
|
||||
static int run_exit_cmd(int argc, char **argv);
|
||||
static int run_show_cmd(int argc, char **argv);
|
||||
static int run_timeout_cmd(int argc, char **argv);
|
||||
static int run_assert_cmd(int argc, char **argv);
|
||||
static int run_autofence_cmd(int argc, char **argv);
|
||||
static int run_qdevice_cmd(int argc, char **argv);
|
||||
static int run_sync_cmd(int argc, char **argv);
|
||||
|
||||
static struct cmd_list_struct {
|
||||
const char *cmd;
|
||||
@ -59,6 +69,9 @@ static struct cmd_list_struct {
|
||||
{ "autofence", 1, run_autofence_cmd},
|
||||
{ "qdevice", 1, run_qdevice_cmd},
|
||||
{ "show", 0, run_show_cmd},
|
||||
{ "timeout", 1, run_timeout_cmd},
|
||||
{ "sync", 1, run_sync_cmd},
|
||||
{ "assert", 1, run_assert_cmd},
|
||||
{ "exit", 0, run_exit_cmd},
|
||||
{ "quit", 0, run_exit_cmd},
|
||||
{ "q", 0, run_exit_cmd},
|
||||
@ -135,11 +148,16 @@ void parse_input_command(char *rl_cmd)
|
||||
int last_arg_start = 0;
|
||||
int last_was_space = 0;
|
||||
int len;
|
||||
int ret = 0;
|
||||
char *cmd;
|
||||
|
||||
/* ^D quits */
|
||||
if (rl_cmd == NULL) {
|
||||
run_exit_cmd(0, NULL);
|
||||
(void)run_exit_cmd(0, NULL);
|
||||
}
|
||||
/* '#' starts a comment */
|
||||
if (rl_cmd[0] == '#') {
|
||||
return;
|
||||
}
|
||||
|
||||
cmd = strdup(rl_cmd);
|
||||
@ -180,7 +198,8 @@ void parse_input_command(char *rl_cmd)
|
||||
/* Ignore null commands */
|
||||
if (strlen(argv[0]) == 0) {
|
||||
free(cmd);
|
||||
return;
|
||||
resume_kb_input(0);
|
||||
return;
|
||||
}
|
||||
#ifdef HAVE_READLINE_HISTORY_H
|
||||
add_history(rl_cmd);
|
||||
@ -193,7 +212,7 @@ void parse_input_command(char *rl_cmd)
|
||||
if (argc < cmd_list[i].min_args) {
|
||||
break;
|
||||
}
|
||||
cmd_list[i].cmd_runner(argc, argv);
|
||||
ret = cmd_list[i].cmd_runner(argc, argv);
|
||||
valid_cmd = 1;
|
||||
}
|
||||
}
|
||||
@ -201,64 +220,86 @@ void parse_input_command(char *rl_cmd)
|
||||
do_usage();
|
||||
}
|
||||
free(cmd);
|
||||
|
||||
/* ret==0 means we can return immediately to command-line input */
|
||||
if (ret == 0) {
|
||||
resume_kb_input(ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void run_up_cmd(int argc, char **argv)
|
||||
static int run_up_cmd(int argc, char **argv)
|
||||
{
|
||||
int partition;
|
||||
int num_nodes;
|
||||
int *nodelist;
|
||||
int i,j;
|
||||
int succeeded = 0;
|
||||
|
||||
if (argc <= 1) {
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmd_start_sync_command();
|
||||
|
||||
for (i=1; i<argc; i++) {
|
||||
if (parse_partition_nodelist(argv[i], &partition, &num_nodes, &nodelist) == 0) {
|
||||
for (j=0; j<num_nodes; j++) {
|
||||
cmd_start_new_node(nodelist[j], partition);
|
||||
if (!cmd_start_new_node(nodelist[j], partition)) {
|
||||
succeeded++;
|
||||
}
|
||||
}
|
||||
free(nodelist);
|
||||
}
|
||||
}
|
||||
return succeeded;
|
||||
}
|
||||
|
||||
static void run_down_cmd(int argc, char **argv)
|
||||
static int run_down_cmd(int argc, char **argv)
|
||||
{
|
||||
int nodeid;
|
||||
int i;
|
||||
int succeeded = 0;
|
||||
|
||||
cmd_start_sync_command();
|
||||
|
||||
for (i=1; i<argc; i++) {
|
||||
nodeid = atoi(argv[1]);
|
||||
cmd_stop_node(nodeid);
|
||||
if (!cmd_stop_node(nodeid)) {
|
||||
succeeded++;
|
||||
}
|
||||
}
|
||||
return succeeded;
|
||||
}
|
||||
|
||||
static void run_join_cmd(int argc, char **argv)
|
||||
static int run_join_cmd(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("join needs at least two partition numbers\n");
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmd_start_sync_command();
|
||||
|
||||
for (i=2; i<argc; i++) {
|
||||
cmd_join_partitions(atoi(argv[1]), atoi(argv[i]));
|
||||
}
|
||||
cmd_update_all_partitions(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void run_move_cmd(int argc, char **argv)
|
||||
static int run_move_cmd(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
int partition;
|
||||
int num_nodes;
|
||||
int *nodelist;
|
||||
|
||||
cmd_start_sync_command();
|
||||
|
||||
for (i=1; i<argc; i++) {
|
||||
if (parse_partition_nodelist(argv[i], &partition, &num_nodes, &nodelist) == 0) {
|
||||
cmd_move_nodes(partition, num_nodes, nodelist);
|
||||
@ -266,9 +307,10 @@ static void run_move_cmd(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
cmd_update_all_partitions(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void run_autofence_cmd(int argc, char **argv)
|
||||
static int run_autofence_cmd(int argc, char **argv)
|
||||
{
|
||||
int onoff = -1;
|
||||
|
||||
@ -284,9 +326,10 @@ static void run_autofence_cmd(int argc, char **argv)
|
||||
else {
|
||||
cmd_set_autofence(onoff);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_qdevice_cmd(int argc, char **argv)
|
||||
static int run_qdevice_cmd(int argc, char **argv)
|
||||
{
|
||||
int i,j;
|
||||
int partition;
|
||||
@ -303,7 +346,7 @@ static void run_qdevice_cmd(int argc, char **argv)
|
||||
|
||||
if (onoff == -1) {
|
||||
fprintf(stderr, "ERR: qdevice should be 'on' or 'off'\n");
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i=2; i<argc; i++) {
|
||||
@ -315,17 +358,61 @@ static void run_qdevice_cmd(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
cmd_update_all_partitions(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_show_cmd(int argc, char **argv)
|
||||
static int run_show_cmd(int argc, char **argv)
|
||||
{
|
||||
cmd_show_node_states();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_exit_cmd(int argc, char **argv)
|
||||
static int run_timeout_cmd(int argc, char **argv)
|
||||
{
|
||||
cmd_set_timeout(atol(argv[1]));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_sync_cmd(int argc, char **argv)
|
||||
{
|
||||
int onoff = -1;
|
||||
|
||||
if (strcasecmp(argv[1], "on") == 0) {
|
||||
onoff = 1;
|
||||
}
|
||||
if (strcasecmp(argv[1], "off") == 0) {
|
||||
onoff = 0;
|
||||
}
|
||||
if (onoff == -1) {
|
||||
fprintf(stderr, "ERR: sync value must be 'on' or 'off'\n");
|
||||
}
|
||||
else {
|
||||
cmd_set_sync(onoff);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_assert_cmd(int argc, char **argv)
|
||||
{
|
||||
int onoff = -1;
|
||||
|
||||
if (strcasecmp(argv[1], "on") == 0) {
|
||||
onoff = 1;
|
||||
}
|
||||
if (strcasecmp(argv[1], "off") == 0) {
|
||||
onoff = 0;
|
||||
}
|
||||
if (onoff == -1) {
|
||||
fprintf(stderr, "ERR: assert value must be 'on' or 'off'\n");
|
||||
}
|
||||
else {
|
||||
cmd_set_assert(onoff);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_exit_cmd(int argc, char **argv)
|
||||
{
|
||||
cmd_stop_all_nodes();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
|
274
vqsim/vqmain.c
274
vqsim/vqmain.c
@ -25,6 +25,7 @@
|
||||
/* Easier than including the config file with a ton of conflicting dependencies */
|
||||
extern int coroparse_configparse (icmap_map_t config_map, const char **error_string);
|
||||
extern int corosync_log_config_read (const char **error_string);
|
||||
static int stdin_read_fn(int32_t fd, int32_t revents, void *data);
|
||||
|
||||
/* 'Keep the compiler happy' time */
|
||||
const char *corosync_get_config_file(void);
|
||||
@ -56,21 +57,22 @@ static qb_loop_t *poll_loop;
|
||||
static int autofence;
|
||||
static int check_for_quorum;
|
||||
static FILE *output_file;
|
||||
static int nosync;
|
||||
static int sync_cmds = 1;
|
||||
static qb_loop_timer_handle kb_timer;
|
||||
static ssize_t wait_count;
|
||||
static ssize_t wait_count_to_unblock;
|
||||
static int waiting_for_sync = 0;
|
||||
static int is_tty;
|
||||
static int assert_on_timeout;
|
||||
static uint64_t command_timeout = 250000000L;
|
||||
|
||||
static struct vq_node *find_by_pid(pid_t pid);
|
||||
static void send_partition_to_nodes(struct vq_partition *partition, int newring);
|
||||
static void start_kb_input(void);
|
||||
static void start_kb_input_timeout(void *data);
|
||||
static void finish_wait_timeout(void *data);
|
||||
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
#define INPUT_BUF_SIZE 1024
|
||||
static char input_buf[INPUT_BUF_SIZE];
|
||||
static size_t input_buf_term = 0;
|
||||
static int is_tty;
|
||||
#endif
|
||||
|
||||
/* 'Keep the compiler happy' time */
|
||||
@ -78,7 +80,6 @@ static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf"
|
||||
|
||||
const char *corosync_get_config_file(void)
|
||||
{
|
||||
|
||||
return (corosync_config_file);
|
||||
}
|
||||
|
||||
@ -144,6 +145,65 @@ static void propogate_vq_message(struct vq_node *vqn, const char *msg, int len)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void cmd_show_prompt_if_needed(void)
|
||||
{
|
||||
qb_loop_timer_del(poll_loop, kb_timer);
|
||||
if (is_tty) {
|
||||
printf("vqsim> ");
|
||||
fflush(stdout);
|
||||
} else {
|
||||
printf("#vqsim> ");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void resume_kb_input(int show_status)
|
||||
{
|
||||
/* If running synchronously, we don't display
|
||||
the quorum messages as they come in. So run 'show' commamnd
|
||||
*/
|
||||
if (show_status && waiting_for_sync) {
|
||||
cmd_show_node_states();
|
||||
}
|
||||
|
||||
waiting_for_sync = 0;
|
||||
|
||||
if (qb_loop_poll_add(poll_loop,
|
||||
QB_LOOP_MED,
|
||||
STDIN_FILENO,
|
||||
POLLIN | POLLERR,
|
||||
NULL,
|
||||
stdin_read_fn)) {
|
||||
if (errno != EEXIST) {
|
||||
perror("qb_loop_poll_add1 returned error");
|
||||
}
|
||||
}
|
||||
/* Always shows the prompt here, cos we cleared waiting_for_sync */
|
||||
cmd_show_prompt_if_needed();
|
||||
}
|
||||
|
||||
/* Return true (1) if all nodes in each partition have the same ring id, false(0) otherwise */
|
||||
static int all_nodes_consistent(void)
|
||||
{
|
||||
int i;
|
||||
struct vq_node *vqn;
|
||||
struct memb_ring_id last_ring_id;
|
||||
|
||||
for (i=0; i<MAX_PARTITIONS; i++) {
|
||||
memset(&last_ring_id, 0, sizeof(last_ring_id));
|
||||
TAILQ_FOREACH(vqn, &partitions[i].nodelist, entries) {
|
||||
if (last_ring_id.seq &&
|
||||
last_ring_id.seq != vqn->last_ring_id.seq) {
|
||||
return 0;
|
||||
}
|
||||
last_ring_id.seq = vqn->last_ring_id.seq;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vq_parent_read_fn(int32_t fd, int32_t revents, void *data)
|
||||
{
|
||||
char msgbuf[8192];
|
||||
@ -162,13 +222,18 @@ static int vq_parent_read_fn(int32_t fd, int32_t revents, void *data)
|
||||
msg = (void*)msgbuf;
|
||||
switch (msg->type) {
|
||||
case VQMSG_QUORUM:
|
||||
if (!nosync && --wait_count_to_unblock <= 0)
|
||||
qb_loop_timer_del(poll_loop, kb_timer);
|
||||
qmsg = (void*)msgbuf;
|
||||
save_quorum_state(vqn, qmsg);
|
||||
print_quorum_state(vqn);
|
||||
if (!nosync && wait_count_to_unblock <= 0)
|
||||
start_kb_input();
|
||||
if (!sync_cmds) {
|
||||
print_quorum_state(vqn);
|
||||
}
|
||||
|
||||
/* Have the partitions stabilised? */
|
||||
if (sync_cmds && waiting_for_sync &&
|
||||
all_nodes_consistent()) {
|
||||
qb_loop_timer_del(poll_loop, kb_timer);
|
||||
resume_kb_input(sync_cmds);
|
||||
}
|
||||
break;
|
||||
case VQMSG_EXEC:
|
||||
/* Message from votequorum, pass around the partition */
|
||||
@ -204,7 +269,7 @@ static int read_corosync_conf(void)
|
||||
logsys_format_set(NULL);
|
||||
res = coroparse_configparse(icmap_get_global_map(), &error_string);
|
||||
if (res == -1) {
|
||||
log_printf (LOGSYS_LEVEL_INFO, "Error loading corosyc.conf %s", error_string);
|
||||
log_printf (LOGSYS_LEVEL_INFO, "Error loading corosync.conf %s", error_string);
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
@ -234,8 +299,6 @@ static void remove_node(struct vq_node *node)
|
||||
TAILQ_REMOVE(&part->nodelist, node, entries);
|
||||
free(node);
|
||||
|
||||
wait_count--;
|
||||
|
||||
/* Rebuild quorum */
|
||||
send_partition_to_nodes(part, 1);
|
||||
}
|
||||
@ -263,7 +326,7 @@ static int32_t sigchld_handler(int32_t sig, void *data)
|
||||
sprintf(text, "(exit code %d)", WEXITSTATUS(status));
|
||||
break;
|
||||
}
|
||||
printf("%d:%02d Quit %s\n", vqn->partition->num, vqn->nodeid, exit_status);
|
||||
printf("%d:%02d: Quit %s\n", vqn->partition->num, vqn->nodeid, exit_status);
|
||||
|
||||
remove_node(vqn);
|
||||
}
|
||||
@ -322,20 +385,24 @@ static void init_partitions(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int nodes_in_partition(int part)
|
||||
{
|
||||
struct vq_node *vqn;
|
||||
int partnodes = 0;
|
||||
|
||||
TAILQ_FOREACH(vqn, &partitions[part].nodelist, entries) {
|
||||
partnodes++;
|
||||
}
|
||||
return partnodes;
|
||||
}
|
||||
|
||||
|
||||
static pid_t create_node(int nodeid, int partno)
|
||||
{
|
||||
struct vq_node *newvq;
|
||||
|
||||
newvq = malloc(sizeof(struct vq_node));
|
||||
if (newvq) {
|
||||
if (!nosync) {
|
||||
/* Number of expected "quorum" vq messages is a square
|
||||
of the total nodes count, so increment the node
|
||||
counter and set new square of this value as
|
||||
a "to observe" counter */
|
||||
wait_count++;
|
||||
wait_count_to_unblock = wait_count * wait_count;
|
||||
}
|
||||
newvq->last_quorate = -1; /* mark "uninitialized" */
|
||||
newvq->instance = vq_create_instance(poll_loop, nodeid);
|
||||
if (!newvq->instance) {
|
||||
@ -439,29 +506,39 @@ static struct vq_node *find_by_pid(pid_t pid)
|
||||
}
|
||||
|
||||
/* Routines called from the parser */
|
||||
void cmd_start_new_node(int nodeid, int partition)
|
||||
|
||||
|
||||
/*
|
||||
* The parser calls this before running a command where
|
||||
* we might have to wait for a result to come back.
|
||||
*/
|
||||
void cmd_start_sync_command()
|
||||
{
|
||||
if (sync_cmds) {
|
||||
qb_loop_poll_del(poll_loop, STDIN_FILENO);
|
||||
qb_loop_timer_add(poll_loop,
|
||||
QB_LOOP_MED,
|
||||
command_timeout,
|
||||
NULL,
|
||||
finish_wait_timeout,
|
||||
&kb_timer);
|
||||
waiting_for_sync = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int cmd_start_new_node(int nodeid, int partition)
|
||||
{
|
||||
struct vq_node *node;
|
||||
|
||||
node = find_node(nodeid);
|
||||
if (node) {
|
||||
fprintf(stderr, "ERR: nodeid %d already exists in partition %d\n", nodeid, node->partition->num);
|
||||
return;
|
||||
return -1;
|
||||
}
|
||||
qb_loop_poll_del(poll_loop, STDIN_FILENO);
|
||||
create_node(nodeid, partition);
|
||||
if (!nosync) {
|
||||
/* Delay kb input handling by 0.25 second when we've just
|
||||
added a node; expect that the delay will be cancelled
|
||||
substantially earlier once it has reported its quorum info
|
||||
(the delay is in fact a failsafe input enabler here) */
|
||||
qb_loop_timer_add(poll_loop,
|
||||
QB_LOOP_MED,
|
||||
250000000,
|
||||
NULL,
|
||||
start_kb_input_timeout,
|
||||
&kb_timer);
|
||||
if (create_node(nodeid, partition) == -1) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cmd_stop_all_nodes()
|
||||
@ -489,20 +566,21 @@ void cmd_show_node_states()
|
||||
fprintf(output_file, "#autofence: %s\n", autofence?"on":"off");
|
||||
}
|
||||
|
||||
void cmd_stop_node(int nodeid)
|
||||
int cmd_stop_node(int nodeid)
|
||||
{
|
||||
struct vq_node *node;
|
||||
|
||||
node = find_node(nodeid);
|
||||
if (!node) {
|
||||
fprintf(stderr, "ERR: nodeid %d is not up\n", nodeid);
|
||||
return;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Remove processor */
|
||||
vq_quit(node->instance);
|
||||
|
||||
/* Node will be removed when the child process exits */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Move all nodes in 'nodelist' into partition 'partition' */
|
||||
@ -510,6 +588,13 @@ void cmd_move_nodes(int partition, int num_nodes, int *nodelist)
|
||||
{
|
||||
int i;
|
||||
struct vq_node *node;
|
||||
struct vq_node *vqn;
|
||||
int total_nodes = num_nodes;
|
||||
|
||||
/* Work out the number of nodes affected */
|
||||
TAILQ_FOREACH(vqn, &partitions[partition].nodelist, entries) {
|
||||
total_nodes++;
|
||||
}
|
||||
|
||||
for (i=0; i<num_nodes; i++) {
|
||||
node = find_node(nodelist[i]);
|
||||
@ -532,6 +617,11 @@ void cmd_move_nodes(int partition, int num_nodes, int *nodelist)
|
||||
void cmd_join_partitions(int part1, int part2)
|
||||
{
|
||||
struct vq_node *vqn;
|
||||
int total_nodes=0;
|
||||
|
||||
/* Work out the number of nodes affected */
|
||||
total_nodes += nodes_in_partition(part1);
|
||||
total_nodes += nodes_in_partition(part2);
|
||||
|
||||
/* TAILQ_FOREACH is not delete safe *sigh* */
|
||||
retry:
|
||||
@ -551,6 +641,18 @@ void cmd_set_autofence(int onoff)
|
||||
fprintf(output_file, "#autofence: %s\n", onoff?"on":"off");
|
||||
}
|
||||
|
||||
void cmd_set_sync(int onoff)
|
||||
{
|
||||
autofence = onoff;
|
||||
fprintf(output_file, "#sync: %s\n", onoff?"on":"off");
|
||||
sync_cmds = onoff;
|
||||
}
|
||||
|
||||
void cmd_set_assert(int onoff)
|
||||
{
|
||||
assert_on_timeout = onoff;
|
||||
}
|
||||
|
||||
void cmd_update_all_partitions(int newring)
|
||||
{
|
||||
int i;
|
||||
@ -571,6 +673,24 @@ void cmd_qdevice_poll(int nodeid, int onoff)
|
||||
}
|
||||
}
|
||||
|
||||
/* If we get called then a command has timed-out */
|
||||
static void finish_wait_timeout(void *data)
|
||||
{
|
||||
if (command_timeout) {
|
||||
fprintf(stderr, "ERR: Partition(s) not stable within timeout\n");
|
||||
if (assert_on_timeout) {
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
resume_kb_input(sync_cmds);
|
||||
}
|
||||
|
||||
void cmd_set_timeout(uint64_t seconds)
|
||||
{
|
||||
command_timeout = seconds * QB_TIME_NS_IN_MSEC;
|
||||
}
|
||||
|
||||
/* ---------------------------------- */
|
||||
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
@ -598,11 +718,6 @@ static void dummy_read_char()
|
||||
|
||||
parse_input_command((c == EOF) ? NULL : input_buf);
|
||||
input_buf_term = 0;
|
||||
|
||||
if (is_tty) {
|
||||
printf("vqsim> ");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -617,50 +732,26 @@ static int stdin_read_fn(int32_t fd, int32_t revents, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void start_kb_input(void)
|
||||
{
|
||||
wait_count_to_unblock = 0;
|
||||
|
||||
#ifdef HAVE_READLINE_READLINE_H
|
||||
/* Readline will deal with completed lines when they arrive */
|
||||
rl_callback_handler_install("vqsim> ", parse_input_command);
|
||||
#else
|
||||
if (is_tty) {
|
||||
printf("vqsim> ");
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Send stdin to readline */
|
||||
if (qb_loop_poll_add(poll_loop,
|
||||
QB_LOOP_MED,
|
||||
STDIN_FILENO,
|
||||
POLLIN | POLLERR,
|
||||
NULL,
|
||||
stdin_read_fn)) {
|
||||
if (errno != EEXIST) {
|
||||
perror("qb_loop_poll_add1 returned error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void start_kb_input_timeout(void *data)
|
||||
{
|
||||
// fprintf(stderr, "Waiting for nodes to report status timed out\n");
|
||||
start_kb_input();
|
||||
resume_kb_input(1);
|
||||
}
|
||||
|
||||
static void usage(char *program)
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("\n");
|
||||
printf("%s [-f <config-file>] [-o <output-file>]\n", program);
|
||||
printf("%s [-c <config-file>] [-o <output-file>]\n", program);
|
||||
printf("\n");
|
||||
printf(" -f config file. defaults to /etc/corosync/corosync.conf\n");
|
||||
printf(" -c config file. defaults to /etc/corosync/corosync.conf\n");
|
||||
printf(" -o output file. defaults to stdout\n");
|
||||
printf(" -n no synchronization (on adding a node)\n");
|
||||
printf(" -h display this help text\n");
|
||||
printf("\n");
|
||||
printf("%s always takes input from STDIN, but cannot use a file.\n", program);
|
||||
printf("If you want to script it then use\n cat | %s\n", program);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
@ -669,16 +760,16 @@ int main(int argc, char **argv)
|
||||
int ch;
|
||||
char *output_file_name = NULL;
|
||||
|
||||
while ((ch = getopt (argc, argv, "f:o:nh")) != EOF) {
|
||||
while ((ch = getopt (argc, argv, "c:o:nh")) != EOF) {
|
||||
switch (ch) {
|
||||
case 'f':
|
||||
case 'c':
|
||||
strncpy(corosync_config_file, optarg, sizeof(corosync_config_file));
|
||||
break;
|
||||
case 'o':
|
||||
output_file_name = optarg;
|
||||
break;
|
||||
case 'n':
|
||||
nosync = 1;
|
||||
sync_cmds = 0;
|
||||
break;
|
||||
default:
|
||||
usage(argv[0]);
|
||||
@ -696,9 +787,8 @@ int main(int argc, char **argv)
|
||||
else {
|
||||
output_file = stdout;
|
||||
}
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
|
||||
is_tty = isatty(STDIN_FILENO);
|
||||
#endif
|
||||
|
||||
qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD,
|
||||
QB_LOG_FILTER_FUNCTION, "*", LOG_DEBUG);
|
||||
@ -717,9 +807,26 @@ int main(int argc, char **argv)
|
||||
sigchld_handler,
|
||||
&sigchld_qb_handle);
|
||||
|
||||
/* Create a full cluster of nodes from corosync.conf */
|
||||
|
||||
#ifdef HAVE_READLINE_READLINE_H
|
||||
/* Readline will deal with completed lines when they arrive */
|
||||
/*
|
||||
* For scripting add '#' to the start of the prompt so that
|
||||
* parsers can ignore input lines
|
||||
*/
|
||||
rl_already_prompted = 1;
|
||||
if (is_tty) {
|
||||
rl_callback_handler_install("vqsim> ", parse_input_command);
|
||||
} else {
|
||||
rl_callback_handler_install("#vqsim> ", parse_input_command);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Create a full cluster of nodes from corosync.conf */
|
||||
read_corosync_conf();
|
||||
if (create_nodes_from_config() && !nosync) {
|
||||
if (create_nodes_from_config() && sync_cmds) {
|
||||
/* Delay kb input handling by 1 second when we've just
|
||||
added the nodes from corosync.conf; expect that
|
||||
the delay will be cancelled substantially earlier
|
||||
@ -731,8 +838,9 @@ int main(int argc, char **argv)
|
||||
NULL,
|
||||
start_kb_input_timeout,
|
||||
&kb_timer);
|
||||
waiting_for_sync = 1;
|
||||
} else {
|
||||
start_kb_input();
|
||||
resume_kb_input(0);
|
||||
}
|
||||
|
||||
qb_loop_run(poll_loop);
|
||||
|
@ -66,12 +66,17 @@ int fork_new_instance(int nodeid, int *vq_sock, pid_t *child_pid);
|
||||
void parse_input_command(char *cmd);
|
||||
|
||||
/* These are in vqmain.c */
|
||||
void cmd_stop_node(int nodeid);
|
||||
int cmd_stop_node(int nodeid);
|
||||
void cmd_stop_all_nodes(void);
|
||||
void cmd_start_new_node(int nodeid, int partition);
|
||||
int cmd_start_new_node(int nodeid, int partition);
|
||||
void cmd_set_autofence(int onoff);
|
||||
void cmd_set_sync(int onoff);
|
||||
void cmd_set_assert(int onoff);
|
||||
void cmd_move_nodes(int partition, int num_nodes, int *nodelist);
|
||||
void cmd_join_partitions(int part1, int part2);
|
||||
void cmd_update_all_partitions(int newring);
|
||||
void cmd_qdevice_poll(int nodeid, int onoff);
|
||||
void cmd_show_node_states(void);
|
||||
void cmd_set_timeout(uint64_t seconds);
|
||||
void cmd_start_sync_command(void);
|
||||
void resume_kb_input(int show_state);
|
||||
|
@ -191,6 +191,7 @@ static void set_local_node_pos(struct corosync_api_v1 *api)
|
||||
uint32_t nodeid;
|
||||
const char *iter_key;
|
||||
int res;
|
||||
int found = 0;
|
||||
|
||||
iter = icmap_iter_init("nodelist.node.");
|
||||
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
|
||||
@ -205,13 +206,18 @@ static void set_local_node_pos(struct corosync_api_v1 *api)
|
||||
res = icmap_get_uint32(iter_key, &nodeid);
|
||||
if (res == CS_OK) {
|
||||
if (nodeid == our_nodeid) {
|
||||
found = 1;
|
||||
res = icmap_set_uint32("nodelist.local_node_pos", node_pos);
|
||||
if (res != CS_OK) {
|
||||
fprintf(stderr, "Failed to find node %d in corosync.conf. Quorum calculations may not be correct:\n", our_nodeid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
/* This probably indicates a dynamically-added node
|
||||
* set the pos to zero and use the votes of the
|
||||
* first node in corosync.conf
|
||||
*/
|
||||
res = icmap_set_uint32("nodelist.local_node_pos", 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int load_quorum_instance(struct corosync_api_v1 *api)
|
||||
|
Loading…
Reference in New Issue
Block a user