diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index 903e2bbe91..122d6db6d7 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -65,149 +65,136 @@ /* Needs to be global, referenced somewhere inside libzebra. */ struct thread_master *master; -typedef enum -{ - MODE_MONITOR = 0, - MODE_GLOBAL_RESTART, - MODE_SEPARATE_RESTART, - MODE_PHASED_ZEBRA_RESTART, - MODE_PHASED_ALL_RESTART +typedef enum { + MODE_MONITOR = 0, + MODE_GLOBAL_RESTART, + MODE_SEPARATE_RESTART, + MODE_PHASED_ZEBRA_RESTART, + MODE_PHASED_ALL_RESTART } watch_mode_t; -static const char *mode_str[] = -{ - "monitor", - "global restart", - "individual daemon restart", - "phased zebra restart", - "phased global restart for any failure", +static const char *mode_str[] = { + "monitor", + "global restart", + "individual daemon restart", + "phased zebra restart", + "phased global restart for any failure", }; -typedef enum -{ - PHASE_NONE = 0, - PHASE_STOPS_PENDING, - PHASE_WAITING_DOWN, - PHASE_ZEBRA_RESTART_PENDING, - PHASE_WAITING_ZEBRA_UP +typedef enum { + PHASE_NONE = 0, + PHASE_STOPS_PENDING, + PHASE_WAITING_DOWN, + PHASE_ZEBRA_RESTART_PENDING, + PHASE_WAITING_ZEBRA_UP } restart_phase_t; -static const char *phase_str[] = -{ - "None", - "Stop jobs running", - "Waiting for other daemons to come down", - "Zebra restart job running", - "Waiting for zebra to come up", - "Start jobs running", +static const char *phase_str[] = { + "None", + "Stop jobs running", + "Waiting for other daemons to come down", + "Zebra restart job running", + "Waiting for zebra to come up", + "Start jobs running", }; #define PHASE_TIMEOUT (3*gs.restart_timeout) -struct restart_info -{ - const char *name; - const char *what; - pid_t pid; - struct timeval time; - long interval; - struct thread *t_kill; - int kills; +struct restart_info { + const char *name; + const char *what; + pid_t pid; + struct timeval time; + long interval; + struct thread *t_kill; + int kills; }; -static struct global_state -{ - watch_mode_t mode; - restart_phase_t phase; - struct thread *t_phase_hanging; - const char *vtydir; - long period; - long timeout; - long restart_timeout; - long min_restart_interval; - long max_restart_interval; - int do_ping; - struct daemon *daemons; - const char *restart_command; - const char *start_command; - const char *stop_command; - struct restart_info restart; - int unresponsive_restart; - int loglevel; - struct daemon *special; /* points to zebra when doing phased restart */ - int numdaemons; - int numpids; - int numdown; /* # of daemons that are not UP or UNRESPONSIVE */ +static struct global_state { + watch_mode_t mode; + restart_phase_t phase; + struct thread *t_phase_hanging; + const char *vtydir; + long period; + long timeout; + long restart_timeout; + long min_restart_interval; + long max_restart_interval; + int do_ping; + struct daemon *daemons; + const char *restart_command; + const char *start_command; + const char *stop_command; + struct restart_info restart; + int unresponsive_restart; + int loglevel; + struct daemon *special; /* points to zebra when doing phased restart */ + int numdaemons; + int numpids; + int numdown; /* # of daemons that are not UP or UNRESPONSIVE */ } gs = { - .mode = MODE_MONITOR, - .phase = PHASE_NONE, - .vtydir = VTYDIR, - .period = 1000*DEFAULT_PERIOD, - .timeout = DEFAULT_TIMEOUT, - .restart_timeout = DEFAULT_RESTART_TIMEOUT, - .loglevel = DEFAULT_LOGLEVEL, - .min_restart_interval = DEFAULT_MIN_RESTART, - .max_restart_interval = DEFAULT_MAX_RESTART, - .do_ping = 1, -}; +.mode = MODE_MONITOR,.phase = PHASE_NONE,.vtydir = VTYDIR,.period = + 1000 * DEFAULT_PERIOD,.timeout = + DEFAULT_TIMEOUT,.restart_timeout = + DEFAULT_RESTART_TIMEOUT,.loglevel = + DEFAULT_LOGLEVEL,.min_restart_interval = + DEFAULT_MIN_RESTART,.max_restart_interval = + DEFAULT_MAX_RESTART,.do_ping = 1,}; -typedef enum -{ - DAEMON_INIT, - DAEMON_DOWN, - DAEMON_CONNECTING, - DAEMON_UP, - DAEMON_UNRESPONSIVE +typedef enum { + DAEMON_INIT, + DAEMON_DOWN, + DAEMON_CONNECTING, + DAEMON_UP, + DAEMON_UNRESPONSIVE } daemon_state_t; #define IS_UP(DMN) \ (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE)) -static const char *state_str[] = -{ - "Init", - "Down", - "Connecting", - "Up", - "Unresponsive", +static const char *state_str[] = { + "Init", + "Down", + "Connecting", + "Up", + "Unresponsive", }; struct daemon { - const char *name; - daemon_state_t state; - int fd; - struct timeval echo_sent; - u_int connect_tries; - struct thread *t_wakeup; - struct thread *t_read; - struct thread *t_write; - struct daemon *next; - struct restart_info restart; + const char *name; + daemon_state_t state; + int fd; + struct timeval echo_sent; + u_int connect_tries; + struct thread *t_wakeup; + struct thread *t_read; + struct thread *t_write; + struct daemon *next; + struct restart_info restart; }; -static const struct option longopts[] = -{ - { "daemon", no_argument, NULL, 'd'}, - { "statedir", required_argument, NULL, 'S'}, - { "no-echo", no_argument, NULL, 'e'}, - { "loglevel", required_argument, NULL, 'l'}, - { "interval", required_argument, NULL, 'i'}, - { "timeout", required_argument, NULL, 't'}, - { "restart-timeout", required_argument, NULL, 'T'}, - { "restart", required_argument, NULL, 'r'}, - { "start-command", required_argument, NULL, 's'}, - { "kill-command", required_argument, NULL, 'k'}, - { "restart-all", required_argument, NULL, 'R'}, - { "all-restart", no_argument, NULL, 'a'}, - { "always-all-restart", no_argument, NULL, 'A'}, - { "unresponsive-restart", no_argument, NULL, 'z'}, - { "min-restart-interval", required_argument, NULL, 'm'}, - { "max-restart-interval", required_argument, NULL, 'M'}, - { "pid-file", required_argument, NULL, 'p'}, - { "blank-string", required_argument, NULL, 'b'}, - { "help", no_argument, NULL, 'h'}, - { "version", no_argument, NULL, 'v'}, - { NULL, 0, NULL, 0 } +static const struct option longopts[] = { + {"daemon", no_argument, NULL, 'd'}, + {"statedir", required_argument, NULL, 'S'}, + {"no-echo", no_argument, NULL, 'e'}, + {"loglevel", required_argument, NULL, 'l'}, + {"interval", required_argument, NULL, 'i'}, + {"timeout", required_argument, NULL, 't'}, + {"restart-timeout", required_argument, NULL, 'T'}, + {"restart", required_argument, NULL, 'r'}, + {"start-command", required_argument, NULL, 's'}, + {"kill-command", required_argument, NULL, 'k'}, + {"restart-all", required_argument, NULL, 'R'}, + {"all-restart", no_argument, NULL, 'a'}, + {"always-all-restart", no_argument, NULL, 'A'}, + {"unresponsive-restart", no_argument, NULL, 'z'}, + {"min-restart-interval", required_argument, NULL, 'm'}, + {"max-restart-interval", required_argument, NULL, 'M'}, + {"pid-file", required_argument, NULL, 'p'}, + {"blank-string", required_argument, NULL, 'b'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'v'}, + {NULL, 0, NULL, 0} }; static int try_connect(struct daemon *dmn); @@ -215,14 +202,13 @@ static int wakeup_send_echo(struct thread *t_wakeup); static void try_restart(struct daemon *dmn); static void phase_check(void); -static int -usage(const char *progname, int status) +static int usage(const char *progname, int status) { - if (status != 0) - fprintf(stderr, "Try `%s --help' for more information.\n", progname); - else - { - printf("Usage : %s [OPTION...] ...\n\n\ + if (status != 0) + fprintf(stderr, "Try `%s --help' for more information.\n", + progname); + else { + printf("Usage : %s [OPTION...] ...\n\n\ Watchdog program to monitor status of frr daemons and try to restart\n\ them if they are down or unresponsive. It determines whether a daemon is\n\ up based on whether it can connect to the daemon's vty unix stream socket.\n\ @@ -266,12 +252,9 @@ the -m and -M options allow you to control the minimum delay between\n\ restart commands. The minimum restart delay is recalculated each time\n\ a restart is attempted: if the time since the last restart attempt exceeds\n\ twice the -M value, then the restart delay is set to the -m value.\n\ -Otherwise, the interval is doubled (but capped at the -M value).\n\n", - progname,mode_str[0],progname,mode_str[1],progname,mode_str[2], - progname,mode_str[3],progname,mode_str[4],progname,mode_str[2], - mode_str[3]); +Otherwise, the interval is doubled (but capped at the -M value).\n\n", progname, mode_str[0], progname, mode_str[1], progname, mode_str[2], progname, mode_str[3], progname, mode_str[4], progname, mode_str[2], mode_str[3]); - printf("Options:\n\ + printf("Options:\n\ -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\ to syslog instead of stdout.\n\ -S, --statedir Set the vty socket directory (default is %s)\n\ @@ -329,221 +312,203 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n", it with a space. This is an ugly hack to circumvent problems\n\ passing command-line arguments with embedded spaces.\n\ -v, --version Print program version\n\ --h, --help Display this help and exit\n", - VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG, - DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART, - DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT, - DEFAULT_PIDFILE); - } +-h, --help Display this help and exit\n", VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG, DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD, DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE); + } - return status; + return status; } -static pid_t -run_background(char *shell_cmd) +static pid_t run_background(char *shell_cmd) { - pid_t child; + pid_t child; - switch (child = fork()) - { - case -1: - zlog_err("fork failed, cannot run command [%s]: %s", - shell_cmd,safe_strerror(errno)); - return -1; - case 0: - /* Child process. */ - /* Use separate process group so child processes can be killed easily. */ - if (setpgid(0,0) < 0) - zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno)); - { - char shell[] = "sh"; - char dashc[] = "-c"; - char * const argv[4] = { shell, dashc, shell_cmd, NULL}; - execv("/bin/sh", argv); - zlog_err("execv(/bin/sh -c '%s') failed: %s", - shell_cmd,safe_strerror(errno)); - _exit(127); - } - default: - /* Parent process: we will reap the child later. */ - zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd); - return child; - } + switch (child = fork()) { + case -1: + zlog_err("fork failed, cannot run command [%s]: %s", + shell_cmd, safe_strerror(errno)); + return -1; + case 0: + /* Child process. */ + /* Use separate process group so child processes can be killed easily. */ + if (setpgid(0, 0) < 0) + zlog_warn("warning: setpgid(0,0) failed: %s", + safe_strerror(errno)); + { + char shell[] = "sh"; + char dashc[] = "-c"; + char *const argv[4] = { shell, dashc, shell_cmd, NULL }; + execv("/bin/sh", argv); + zlog_err("execv(/bin/sh -c '%s') failed: %s", + shell_cmd, safe_strerror(errno)); + _exit(127); + } + default: + /* Parent process: we will reap the child later. */ + zlog_err("Forked background command [pid %d]: %s", (int)child, + shell_cmd); + return child; + } } -static struct timeval * -time_elapsed(struct timeval *result, const struct timeval *start_time) +static struct timeval *time_elapsed(struct timeval *result, + const struct timeval *start_time) { - gettimeofday(result,NULL); - result->tv_sec -= start_time->tv_sec; - result->tv_usec -= start_time->tv_usec; - while (result->tv_usec < 0) - { - result->tv_usec += 1000000L; - result->tv_sec--; - } - return result; + gettimeofday(result, NULL); + result->tv_sec -= start_time->tv_sec; + result->tv_usec -= start_time->tv_usec; + while (result->tv_usec < 0) { + result->tv_usec += 1000000L; + result->tv_sec--; + } + return result; } -static int -restart_kill(struct thread *t_kill) +static int restart_kill(struct thread *t_kill) { - struct restart_info *restart = THREAD_ARG(t_kill); - struct timeval delay; + struct restart_info *restart = THREAD_ARG(t_kill); + struct timeval delay; - time_elapsed(&delay,&restart->time); - zlog_warn("Warning: %s %s child process %d still running after " - "%ld seconds, sending signal %d", - restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec, - (restart->kills ? SIGKILL : SIGTERM)); - kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM)); - restart->kills++; - restart->t_kill = thread_add_timer(master,restart_kill,restart, - gs.restart_timeout); - return 0; + time_elapsed(&delay, &restart->time); + zlog_warn("Warning: %s %s child process %d still running after " + "%ld seconds, sending signal %d", + restart->what, restart->name, (int)restart->pid, + (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM)); + kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM)); + restart->kills++; + restart->t_kill = thread_add_timer(master, restart_kill, restart, + gs.restart_timeout); + return 0; } -static struct restart_info * -find_child(pid_t child) +static struct restart_info *find_child(pid_t child) { - if (gs.mode == MODE_GLOBAL_RESTART) - { - if (gs.restart.pid == child) - return &gs.restart; - } - else - { - struct daemon *dmn; - for (dmn = gs.daemons; dmn; dmn = dmn->next) - { - if (dmn->restart.pid == child) - return &dmn->restart; - } - } - return NULL; + if (gs.mode == MODE_GLOBAL_RESTART) { + if (gs.restart.pid == child) + return &gs.restart; + } else { + struct daemon *dmn; + for (dmn = gs.daemons; dmn; dmn = dmn->next) { + if (dmn->restart.pid == child) + return &dmn->restart; + } + } + return NULL; } -static void -sigchild(void) +static void sigchild(void) { - pid_t child; - int status; - const char *name; - const char *what; - struct restart_info *restart; + pid_t child; + int status; + const char *name; + const char *what; + struct restart_info *restart; - switch (child = waitpid(-1,&status,WNOHANG)) - { - case -1: - zlog_err("waitpid failed: %s",safe_strerror(errno)); - return; - case 0: - zlog_warn("SIGCHLD received, but waitpid did not reap a child"); - return; - } + switch (child = waitpid(-1, &status, WNOHANG)) { + case -1: + zlog_err("waitpid failed: %s", safe_strerror(errno)); + return; + case 0: + zlog_warn("SIGCHLD received, but waitpid did not reap a child"); + return; + } - if (child == integrated_write_pid) - { - integrated_write_sigchld(status); - return; - } + if (child == integrated_write_pid) { + integrated_write_sigchld(status); + return; + } - if ((restart = find_child(child)) != NULL) - { - name = restart->name; - what = restart->what; - restart->pid = 0; - gs.numpids--; - thread_cancel(restart->t_kill); - restart->t_kill = NULL; - /* Update restart time to reflect the time the command completed. */ - gettimeofday(&restart->time,NULL); - } - else - { - zlog_err("waitpid returned status for an unknown child process %d", - (int)child); - name = "(unknown)"; - what = "background"; - } - if (WIFSTOPPED(status)) - zlog_warn("warning: %s %s process %d is stopped", - what,name,(int)child); - else if (WIFSIGNALED(status)) - zlog_warn("%s %s process %d terminated due to signal %d", - what,name,(int)child,WTERMSIG(status)); - else if (WIFEXITED(status)) - { - if (WEXITSTATUS(status) != 0) - zlog_warn("%s %s process %d exited with non-zero status %d", - what,name,(int)child,WEXITSTATUS(status)); - else - zlog_debug("%s %s process %d exited normally",what,name,(int)child); - } - else - zlog_err("cannot interpret %s %s process %d wait status 0x%x", - what,name,(int)child,status); - phase_check(); + if ((restart = find_child(child)) != NULL) { + name = restart->name; + what = restart->what; + restart->pid = 0; + gs.numpids--; + thread_cancel(restart->t_kill); + restart->t_kill = NULL; + /* Update restart time to reflect the time the command completed. */ + gettimeofday(&restart->time, NULL); + } else { + zlog_err + ("waitpid returned status for an unknown child process %d", + (int)child); + name = "(unknown)"; + what = "background"; + } + if (WIFSTOPPED(status)) + zlog_warn("warning: %s %s process %d is stopped", + what, name, (int)child); + else if (WIFSIGNALED(status)) + zlog_warn("%s %s process %d terminated due to signal %d", + what, name, (int)child, WTERMSIG(status)); + else if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) + zlog_warn + ("%s %s process %d exited with non-zero status %d", + what, name, (int)child, WEXITSTATUS(status)); + else + zlog_debug("%s %s process %d exited normally", what, + name, (int)child); + } else + zlog_err("cannot interpret %s %s process %d wait status 0x%x", + what, name, (int)child, status); + phase_check(); } static int run_job(struct restart_info *restart, const char *cmdtype, const char *command, int force, int update_interval) { - struct timeval delay; + struct timeval delay; - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("attempting to %s %s",cmdtype,restart->name); + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug("attempting to %s %s", cmdtype, restart->name); - if (restart->pid) - { - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("cannot %s %s, previous pid %d still running", - cmdtype,restart->name,(int)restart->pid); - return -1; - } + if (restart->pid) { + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug + ("cannot %s %s, previous pid %d still running", + cmdtype, restart->name, (int)restart->pid); + return -1; + } - /* Note: time_elapsed test must come before the force test, since we need - to make sure that delay is initialized for use below in updating the - restart interval. */ - if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) && - !force) - { - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("postponing %s %s: " - "elapsed time %ld < retry interval %ld", - cmdtype,restart->name,(long)delay.tv_sec,restart->interval); - return -1; - } + /* Note: time_elapsed test must come before the force test, since we need + to make sure that delay is initialized for use below in updating the + restart interval. */ + if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval) + && !force) { + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug("postponing %s %s: " + "elapsed time %ld < retry interval %ld", + cmdtype, restart->name, (long)delay.tv_sec, + restart->interval); + return -1; + } - gettimeofday(&restart->time,NULL); - restart->kills = 0; - { - char cmd[strlen(command)+strlen(restart->name)+1]; - snprintf(cmd,sizeof(cmd),command,restart->name); - if ((restart->pid = run_background(cmd)) > 0) - { - restart->t_kill = thread_add_timer(master,restart_kill,restart, - gs.restart_timeout); - restart->what = cmdtype; - gs.numpids++; - } - else - restart->pid = 0; - } + gettimeofday(&restart->time, NULL); + restart->kills = 0; + { + char cmd[strlen(command) + strlen(restart->name) + 1]; + snprintf(cmd, sizeof(cmd), command, restart->name); + if ((restart->pid = run_background(cmd)) > 0) { + restart->t_kill = + thread_add_timer(master, restart_kill, restart, + gs.restart_timeout); + restart->what = cmdtype; + gs.numpids++; + } else + restart->pid = 0; + } - /* Calculate the new restart interval. */ - if (update_interval) - { - if (delay.tv_sec > 2*gs.max_restart_interval) - restart->interval = gs.min_restart_interval; - else if ((restart->interval *= 2) > gs.max_restart_interval) - restart->interval = gs.max_restart_interval; - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("restart %s interval is now %ld", - restart->name,restart->interval); - } - return restart->pid; + /* Calculate the new restart interval. */ + if (update_interval) { + if (delay.tv_sec > 2 * gs.max_restart_interval) + restart->interval = gs.min_restart_interval; + else if ((restart->interval *= 2) > gs.max_restart_interval) + restart->interval = gs.max_restart_interval; + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug("restart %s interval is now %ld", + restart->name, restart->interval); + } + return restart->pid; } #define SET_READ_HANDLER(DMN) \ @@ -551,889 +516,874 @@ run_job(struct restart_info *restart, const char *cmdtype, const char *command, #define SET_WAKEUP_DOWN(DMN) \ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \ - FUZZY(gs.period)) + FUZZY(gs.period)) #define SET_WAKEUP_UNRESPONSIVE(DMN) \ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \ - FUZZY(gs.period)) + FUZZY(gs.period)) #define SET_WAKEUP_ECHO(DMN) \ (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \ FUZZY(gs.period)) -static int -wakeup_down(struct thread *t_wakeup) +static int wakeup_down(struct thread *t_wakeup) { - struct daemon *dmn = THREAD_ARG(t_wakeup); + struct daemon *dmn = THREAD_ARG(t_wakeup); - dmn->t_wakeup = NULL; - if (try_connect(dmn) < 0) - SET_WAKEUP_DOWN(dmn); - if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP)) - try_restart(dmn); - return 0; + dmn->t_wakeup = NULL; + if (try_connect(dmn) < 0) + SET_WAKEUP_DOWN(dmn); + if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP)) + try_restart(dmn); + return 0; } -static int -wakeup_init(struct thread *t_wakeup) +static int wakeup_init(struct thread *t_wakeup) { - struct daemon *dmn = THREAD_ARG(t_wakeup); + struct daemon *dmn = THREAD_ARG(t_wakeup); - dmn->t_wakeup = NULL; - if (try_connect(dmn) < 0) - { - SET_WAKEUP_DOWN(dmn); - zlog_err("%s state -> down : initial connection attempt failed", - dmn->name); - dmn->state = DAEMON_DOWN; - } - return 0; -} - -static void -daemon_down(struct daemon *dmn, const char *why) -{ - if (IS_UP(dmn) || (dmn->state == DAEMON_INIT)) - zlog_err("%s state -> down : %s",dmn->name,why); - else if (gs.loglevel > LOG_DEBUG) - zlog_debug("%s still down : %s",dmn->name,why); - if (IS_UP(dmn)) - gs.numdown++; - dmn->state = DAEMON_DOWN; - if (dmn->fd >= 0) - { - close(dmn->fd); - dmn->fd = -1; - } - THREAD_OFF(dmn->t_read); - THREAD_OFF(dmn->t_write); - THREAD_OFF(dmn->t_wakeup); - if (try_connect(dmn) < 0) - SET_WAKEUP_DOWN(dmn); - phase_check(); -} - -static int -handle_read(struct thread *t_read) -{ - struct daemon *dmn = THREAD_ARG(t_read); - static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n"; - char buf[sizeof(resp)+100]; - ssize_t rc; - struct timeval delay; - - dmn->t_read = NULL; - if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0) - { - char why[100]; - - if (ERRNO_IO_RETRY(errno)) - { - /* Pretend it never happened. */ - SET_READ_HANDLER(dmn); - return 0; + dmn->t_wakeup = NULL; + if (try_connect(dmn) < 0) { + SET_WAKEUP_DOWN(dmn); + zlog_err("%s state -> down : initial connection attempt failed", + dmn->name); + dmn->state = DAEMON_DOWN; } - snprintf(why,sizeof(why),"unexpected read error: %s", - safe_strerror(errno)); - daemon_down(dmn,why); - return 0; - } - if (rc == 0) - { - daemon_down(dmn,"read returned EOF"); - return 0; - } - if (!dmn->echo_sent.tv_sec) - { - char why[sizeof(buf)+100]; - snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s", - (int)rc,(int)rc,buf); - daemon_down(dmn,why); - return 0; - } + return 0; +} - /* We are expecting an echo response: is there any chance that the - response would not be returned entirely in the first read? That - seems inconceivable... */ - if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp))) - { - char why[100+sizeof(buf)]; - snprintf(why,sizeof(why),"read returned bad echo response of %d bytes " - "(expecting %u): %.*s", - (int)rc,(u_int)sizeof(resp),(int)rc,buf); - daemon_down(dmn,why); - return 0; - } - - time_elapsed(&delay,&dmn->echo_sent); - dmn->echo_sent.tv_sec = 0; - if (dmn->state == DAEMON_UNRESPONSIVE) - { - if (delay.tv_sec < gs.timeout) - { - dmn->state = DAEMON_UP; - zlog_warn("%s state -> up : echo response received after %ld.%06ld " - "seconds", dmn->name, - (long)delay.tv_sec, (long)delay.tv_usec); +static void daemon_down(struct daemon *dmn, const char *why) +{ + if (IS_UP(dmn) || (dmn->state == DAEMON_INIT)) + zlog_err("%s state -> down : %s", dmn->name, why); + else if (gs.loglevel > LOG_DEBUG) + zlog_debug("%s still down : %s", dmn->name, why); + if (IS_UP(dmn)) + gs.numdown++; + dmn->state = DAEMON_DOWN; + if (dmn->fd >= 0) { + close(dmn->fd); + dmn->fd = -1; } - else - zlog_warn("%s: slow echo response finally received after %ld.%06ld " - "seconds", dmn->name, - (long)delay.tv_sec, (long)delay.tv_usec); - } - else if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("%s: echo response received after %ld.%06ld seconds", - dmn->name, (long)delay.tv_sec, (long)delay.tv_usec); + THREAD_OFF(dmn->t_read); + THREAD_OFF(dmn->t_write); + THREAD_OFF(dmn->t_wakeup); + if (try_connect(dmn) < 0) + SET_WAKEUP_DOWN(dmn); + phase_check(); +} - SET_READ_HANDLER(dmn); - if (dmn->t_wakeup) - thread_cancel(dmn->t_wakeup); - SET_WAKEUP_ECHO(dmn); +static int handle_read(struct thread *t_read) +{ + struct daemon *dmn = THREAD_ARG(t_read); + static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n"; + char buf[sizeof(resp) + 100]; + ssize_t rc; + struct timeval delay; - return 0; + dmn->t_read = NULL; + if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) { + char why[100]; + + if (ERRNO_IO_RETRY(errno)) { + /* Pretend it never happened. */ + SET_READ_HANDLER(dmn); + return 0; + } + snprintf(why, sizeof(why), "unexpected read error: %s", + safe_strerror(errno)); + daemon_down(dmn, why); + return 0; + } + if (rc == 0) { + daemon_down(dmn, "read returned EOF"); + return 0; + } + if (!dmn->echo_sent.tv_sec) { + char why[sizeof(buf) + 100]; + snprintf(why, sizeof(why), + "unexpected read returns %d bytes: %.*s", (int)rc, + (int)rc, buf); + daemon_down(dmn, why); + return 0; + } + + /* We are expecting an echo response: is there any chance that the + response would not be returned entirely in the first read? That + seems inconceivable... */ + if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) { + char why[100 + sizeof(buf)]; + snprintf(why, sizeof(why), + "read returned bad echo response of %d bytes " + "(expecting %u): %.*s", (int)rc, (u_int) sizeof(resp), + (int)rc, buf); + daemon_down(dmn, why); + return 0; + } + + time_elapsed(&delay, &dmn->echo_sent); + dmn->echo_sent.tv_sec = 0; + if (dmn->state == DAEMON_UNRESPONSIVE) { + if (delay.tv_sec < gs.timeout) { + dmn->state = DAEMON_UP; + zlog_warn + ("%s state -> up : echo response received after %ld.%06ld " + "seconds", dmn->name, (long)delay.tv_sec, + (long)delay.tv_usec); + } else + zlog_warn + ("%s: slow echo response finally received after %ld.%06ld " + "seconds", dmn->name, (long)delay.tv_sec, + (long)delay.tv_usec); + } else if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug("%s: echo response received after %ld.%06ld seconds", + dmn->name, (long)delay.tv_sec, (long)delay.tv_usec); + + SET_READ_HANDLER(dmn); + if (dmn->t_wakeup) + thread_cancel(dmn->t_wakeup); + SET_WAKEUP_ECHO(dmn); + + return 0; } /* * Wait till we notice that all daemons are ready before * we send we are ready to systemd */ -static void -daemon_send_ready (void) +static void daemon_send_ready(void) { - static int sent = 0; - if (!sent && gs.numdown == 0) - { + static int sent = 0; + if (!sent && gs.numdown == 0) { #if defined (HAVE_CUMULUS) - FILE *fp; + FILE *fp; - fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); - fclose(fp); + fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w"); + fclose(fp); #endif - zlog_notice ("Watchfrr: Notifying Systemd we are up and running"); - systemd_send_started(master, 0); - sent = 1; - } + zlog_notice + ("Watchfrr: Notifying Systemd we are up and running"); + systemd_send_started(master, 0); + sent = 1; + } } -static void -daemon_up(struct daemon *dmn, const char *why) +static void daemon_up(struct daemon *dmn, const char *why) { - dmn->state = DAEMON_UP; - gs.numdown--; - dmn->connect_tries = 0; - zlog_notice("%s state -> up : %s",dmn->name,why); - daemon_send_ready(); - if (gs.do_ping) - SET_WAKEUP_ECHO(dmn); - phase_check(); + dmn->state = DAEMON_UP; + gs.numdown--; + dmn->connect_tries = 0; + zlog_notice("%s state -> up : %s", dmn->name, why); + daemon_send_ready(); + if (gs.do_ping) + SET_WAKEUP_ECHO(dmn); + phase_check(); } -static int -check_connect(struct thread *t_write) +static int check_connect(struct thread *t_write) { - struct daemon *dmn = THREAD_ARG(t_write); - int sockerr; - socklen_t reslen = sizeof(sockerr); + struct daemon *dmn = THREAD_ARG(t_write); + int sockerr; + socklen_t reslen = sizeof(sockerr); - dmn->t_write = NULL; - if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0) - { - zlog_warn("%s: check_connect: getsockopt failed: %s", - dmn->name,safe_strerror(errno)); - daemon_down(dmn,"getsockopt failed checking connection success"); - return 0; - } - if ((reslen == sizeof(sockerr)) && sockerr) - { - char why[100]; - snprintf(why,sizeof(why), - "getsockopt reports that connection attempt failed: %s", - safe_strerror(sockerr)); - daemon_down(dmn,why); - return 0; - } + dmn->t_write = NULL; + if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen) + < 0) { + zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name, + safe_strerror(errno)); + daemon_down(dmn, + "getsockopt failed checking connection success"); + return 0; + } + if ((reslen == sizeof(sockerr)) && sockerr) { + char why[100]; + snprintf(why, sizeof(why), + "getsockopt reports that connection attempt failed: %s", + safe_strerror(sockerr)); + daemon_down(dmn, why); + return 0; + } - daemon_up(dmn,"delayed connect succeeded"); - return 0; + daemon_up(dmn, "delayed connect succeeded"); + return 0; } -static int -wakeup_connect_hanging(struct thread *t_wakeup) +static int wakeup_connect_hanging(struct thread *t_wakeup) { - struct daemon *dmn = THREAD_ARG(t_wakeup); - char why[100]; + struct daemon *dmn = THREAD_ARG(t_wakeup); + char why[100]; - dmn->t_wakeup = NULL; - snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds", - gs.timeout); - daemon_down(dmn,why); - return 0; + dmn->t_wakeup = NULL; + snprintf(why, sizeof(why), + "connection attempt timed out after %ld seconds", gs.timeout); + daemon_down(dmn, why); + return 0; } /* Making connection to protocol daemon. */ -static int -try_connect(struct daemon *dmn) +static int try_connect(struct daemon *dmn) { - int sock; - struct sockaddr_un addr; - socklen_t len; + int sock; + struct sockaddr_un addr; + socklen_t len; - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("%s: attempting to connect",dmn->name); - dmn->connect_tries++; + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug("%s: attempting to connect", dmn->name); + dmn->connect_tries++; - memset (&addr, 0, sizeof (struct sockaddr_un)); - addr.sun_family = AF_UNIX; - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", - gs.vtydir,dmn->name); + memset(&addr, 0, sizeof(struct sockaddr_un)); + addr.sun_family = AF_UNIX; + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", + gs.vtydir, dmn->name); #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN - len = addr.sun_len = SUN_LEN(&addr); + len = addr.sun_len = SUN_LEN(&addr); #else - len = sizeof (addr.sun_family) + strlen (addr.sun_path); -#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */ + len = sizeof(addr.sun_family) + strlen(addr.sun_path); +#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */ - /* Quick check to see if we might succeed before we go to the trouble - of creating a socket. */ - if (access(addr.sun_path, W_OK) < 0) - { - if (errno != ENOENT) - zlog_err("%s: access to socket %s denied: %s", - dmn->name,addr.sun_path,safe_strerror(errno)); - return -1; - } - - if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) - { - zlog_err("%s(%s): cannot make socket: %s", - __func__,addr.sun_path, safe_strerror(errno)); - return -1; - } - - if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) - { - zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", - __func__, addr.sun_path, sock); - close(sock); - return -1; - } - - if (connect (sock, (struct sockaddr *) &addr, len) < 0) - { - if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) - { - if (gs.loglevel > LOG_DEBUG) - zlog_debug("%s(%s): connect failed: %s", - __func__,addr.sun_path, safe_strerror(errno)); - close (sock); - return -1; + /* Quick check to see if we might succeed before we go to the trouble + of creating a socket. */ + if (access(addr.sun_path, W_OK) < 0) { + if (errno != ENOENT) + zlog_err("%s: access to socket %s denied: %s", + dmn->name, addr.sun_path, + safe_strerror(errno)); + return -1; } - if (gs.loglevel > LOG_DEBUG) - zlog_debug("%s: connection in progress",dmn->name); - dmn->state = DAEMON_CONNECTING; - dmn->fd = sock; - dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd); - dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn, - gs.timeout); - SET_READ_HANDLER(dmn); - return 0; - } - dmn->fd = sock; - SET_READ_HANDLER(dmn); - daemon_up(dmn,"connect succeeded"); - return 1; -} - -static int -phase_hanging(struct thread *t_hanging) -{ - gs.t_phase_hanging = NULL; - zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart", - phase_str[gs.phase],PHASE_TIMEOUT); - gs.phase = PHASE_NONE; - return 0; -} - -static void -set_phase(restart_phase_t new_phase) -{ - gs.phase = new_phase; - if (gs.t_phase_hanging) - thread_cancel(gs.t_phase_hanging); - gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL, - PHASE_TIMEOUT); -} - -static void -phase_check(void) -{ - switch (gs.phase) - { - case PHASE_NONE: - break; - case PHASE_STOPS_PENDING: - if (gs.numpids) - break; - zlog_info("Phased restart: all routing daemon stop jobs have completed."); - set_phase(PHASE_WAITING_DOWN); - /*FALLTHRU*/ - case PHASE_WAITING_DOWN: - if (gs.numdown+IS_UP(gs.special) < gs.numdaemons) - break; - zlog_info("Phased restart: all routing daemons now down."); - run_job(&gs.special->restart,"restart",gs.restart_command,1,1); - set_phase(PHASE_ZEBRA_RESTART_PENDING); - /*FALLTHRU*/ - case PHASE_ZEBRA_RESTART_PENDING: - if (gs.special->restart.pid) - break; - zlog_info("Phased restart: %s restart job completed.",gs.special->name); - set_phase(PHASE_WAITING_ZEBRA_UP); - /*FALLTHRU*/ - case PHASE_WAITING_ZEBRA_UP: - if (!IS_UP(gs.special)) - break; - zlog_info("Phased restart: %s is now up.",gs.special->name); - { - struct daemon *dmn; - for (dmn = gs.daemons; dmn; dmn = dmn->next) - { - if (dmn != gs.special) - run_job(&dmn->restart,"start",gs.start_command,1,0); - } - } - gs.phase = PHASE_NONE; - THREAD_OFF(gs.t_phase_hanging); - zlog_notice("Phased global restart has completed."); - break; - } -} - -static void -try_restart(struct daemon *dmn) -{ - switch (gs.mode) - { - case MODE_MONITOR: - return; - case MODE_GLOBAL_RESTART: - run_job(&gs.restart,"restart",gs.restart_command,0,1); - break; - case MODE_SEPARATE_RESTART: - run_job(&dmn->restart,"restart",gs.restart_command,0,1); - break; - case MODE_PHASED_ZEBRA_RESTART: - if (dmn != gs.special) - { - if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE)) - run_job(&dmn->restart,"restart",gs.restart_command,0,1); - else - zlog_debug("%s: postponing restart attempt because master %s daemon " - "not up [%s], or phased restart in progress", - dmn->name,gs.special->name,state_str[gs.special->state]); - break; - } - /*FALLTHRU*/ - case MODE_PHASED_ALL_RESTART: - if ((gs.phase != PHASE_NONE) || gs.numpids) - { - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("postponing phased global restart: restart already in " - "progress [%s], or outstanding child processes [%d]", - phase_str[gs.phase],gs.numpids); - break; - } - /* Is it too soon for a restart? */ - { - struct timeval delay; - if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec < - gs.special->restart.interval) - { - if (gs.loglevel > LOG_DEBUG+1) - zlog_debug("postponing phased global restart: " - "elapsed time %ld < retry interval %ld", - (long)delay.tv_sec,gs.special->restart.interval); - break; + if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + zlog_err("%s(%s): cannot make socket: %s", + __func__, addr.sun_path, safe_strerror(errno)); + return -1; } - } - run_job(&gs.restart,"restart",gs.restart_command,0,1); - break; - default: - zlog_err("error: unknown restart mode %d",gs.mode); - break; - } + + if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) { + zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", + __func__, addr.sun_path, sock); + close(sock); + return -1; + } + + if (connect(sock, (struct sockaddr *)&addr, len) < 0) { + if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { + if (gs.loglevel > LOG_DEBUG) + zlog_debug("%s(%s): connect failed: %s", + __func__, addr.sun_path, + safe_strerror(errno)); + close(sock); + return -1; + } + if (gs.loglevel > LOG_DEBUG) + zlog_debug("%s: connection in progress", dmn->name); + dmn->state = DAEMON_CONNECTING; + dmn->fd = sock; + dmn->t_write = + thread_add_write(master, check_connect, dmn, dmn->fd); + dmn->t_wakeup = + thread_add_timer(master, wakeup_connect_hanging, dmn, + gs.timeout); + SET_READ_HANDLER(dmn); + return 0; + } + + dmn->fd = sock; + SET_READ_HANDLER(dmn); + daemon_up(dmn, "connect succeeded"); + return 1; } -static int -wakeup_unresponsive(struct thread *t_wakeup) +static int phase_hanging(struct thread *t_hanging) { - struct daemon *dmn = THREAD_ARG(t_wakeup); - - dmn->t_wakeup = NULL; - if (dmn->state != DAEMON_UNRESPONSIVE) - zlog_err("%s: no longer unresponsive (now %s), " - "wakeup should have been cancelled!", - dmn->name,state_str[dmn->state]); - else - { - SET_WAKEUP_UNRESPONSIVE(dmn); - try_restart(dmn); - } - return 0; + gs.t_phase_hanging = NULL; + zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart", + phase_str[gs.phase], PHASE_TIMEOUT); + gs.phase = PHASE_NONE; + return 0; } -static int -wakeup_no_answer(struct thread *t_wakeup) +static void set_phase(restart_phase_t new_phase) { - struct daemon *dmn = THREAD_ARG(t_wakeup); - - dmn->t_wakeup = NULL; - dmn->state = DAEMON_UNRESPONSIVE; - zlog_err("%s state -> unresponsive : no response yet to ping " - "sent %ld seconds ago",dmn->name,gs.timeout); - if (gs.unresponsive_restart) - { - SET_WAKEUP_UNRESPONSIVE(dmn); - try_restart(dmn); - } - return 0; + gs.phase = new_phase; + if (gs.t_phase_hanging) + thread_cancel(gs.t_phase_hanging); + gs.t_phase_hanging = thread_add_timer(master, phase_hanging, NULL, + PHASE_TIMEOUT); } -static int -wakeup_send_echo(struct thread *t_wakeup) +static void phase_check(void) { - static const char echocmd[] = "echo " PING_TOKEN; - ssize_t rc; - struct daemon *dmn = THREAD_ARG(t_wakeup); + switch (gs.phase) { + case PHASE_NONE: + break; + case PHASE_STOPS_PENDING: + if (gs.numpids) + break; + zlog_info + ("Phased restart: all routing daemon stop jobs have completed."); + set_phase(PHASE_WAITING_DOWN); - dmn->t_wakeup = NULL; - if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) || - ((size_t)rc != sizeof(echocmd))) - { - char why[100+sizeof(echocmd)]; - snprintf(why,sizeof(why),"write '%s' returned %d instead of %u", - echocmd,(int)rc,(u_int)sizeof(echocmd)); - daemon_down(dmn,why); - } - else - { - gettimeofday(&dmn->echo_sent,NULL); - dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout); - } - return 0; + /*FALLTHRU*/ + case PHASE_WAITING_DOWN: + if (gs.numdown + IS_UP(gs.special) < gs.numdaemons) + break; + zlog_info("Phased restart: all routing daemons now down."); + run_job(&gs.special->restart, "restart", gs.restart_command, 1, + 1); + set_phase(PHASE_ZEBRA_RESTART_PENDING); + + /*FALLTHRU*/ + case PHASE_ZEBRA_RESTART_PENDING: + if (gs.special->restart.pid) + break; + zlog_info("Phased restart: %s restart job completed.", + gs.special->name); + set_phase(PHASE_WAITING_ZEBRA_UP); + + /*FALLTHRU*/ + case PHASE_WAITING_ZEBRA_UP: + if (!IS_UP(gs.special)) + break; + zlog_info("Phased restart: %s is now up.", gs.special->name); + { + struct daemon *dmn; + for (dmn = gs.daemons; dmn; dmn = dmn->next) { + if (dmn != gs.special) + run_job(&dmn->restart, "start", + gs.start_command, 1, 0); + } + } + gs.phase = PHASE_NONE; + THREAD_OFF(gs.t_phase_hanging); + zlog_notice("Phased global restart has completed."); + break; + } } -static void -sigint(void) +static void try_restart(struct daemon *dmn) { - zlog_notice("Terminating on signal"); - systemd_send_stopping (); - exit(0); + switch (gs.mode) { + case MODE_MONITOR: + return; + case MODE_GLOBAL_RESTART: + run_job(&gs.restart, "restart", gs.restart_command, 0, 1); + break; + case MODE_SEPARATE_RESTART: + run_job(&dmn->restart, "restart", gs.restart_command, 0, 1); + break; + case MODE_PHASED_ZEBRA_RESTART: + if (dmn != gs.special) { + if ((gs.special->state == DAEMON_UP) + && (gs.phase == PHASE_NONE)) + run_job(&dmn->restart, "restart", + gs.restart_command, 0, 1); + else + zlog_debug + ("%s: postponing restart attempt because master %s daemon " + "not up [%s], or phased restart in progress", + dmn->name, gs.special->name, + state_str[gs.special->state]); + break; + } + + /*FALLTHRU*/ + case MODE_PHASED_ALL_RESTART: + if ((gs.phase != PHASE_NONE) || gs.numpids) { + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug + ("postponing phased global restart: restart already in " + "progress [%s], or outstanding child processes [%d]", + phase_str[gs.phase], gs.numpids); + break; + } + /* Is it too soon for a restart? */ + { + struct timeval delay; + if (time_elapsed(&delay, &gs.special->restart.time)-> + tv_sec < gs.special->restart.interval) { + if (gs.loglevel > LOG_DEBUG + 1) + zlog_debug + ("postponing phased global restart: " + "elapsed time %ld < retry interval %ld", + (long)delay.tv_sec, + gs.special->restart.interval); + break; + } + } + run_job(&gs.restart, "restart", gs.restart_command, 0, 1); + break; + default: + zlog_err("error: unknown restart mode %d", gs.mode); + break; + } } -static int -valid_command(const char *cmd) +static int wakeup_unresponsive(struct thread *t_wakeup) { - char *p; + struct daemon *dmn = THREAD_ARG(t_wakeup); - return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%'); + dmn->t_wakeup = NULL; + if (dmn->state != DAEMON_UNRESPONSIVE) + zlog_err("%s: no longer unresponsive (now %s), " + "wakeup should have been cancelled!", + dmn->name, state_str[dmn->state]); + else { + SET_WAKEUP_UNRESPONSIVE(dmn); + try_restart(dmn); + } + return 0; +} + +static int wakeup_no_answer(struct thread *t_wakeup) +{ + struct daemon *dmn = THREAD_ARG(t_wakeup); + + dmn->t_wakeup = NULL; + dmn->state = DAEMON_UNRESPONSIVE; + zlog_err("%s state -> unresponsive : no response yet to ping " + "sent %ld seconds ago", dmn->name, gs.timeout); + if (gs.unresponsive_restart) { + SET_WAKEUP_UNRESPONSIVE(dmn); + try_restart(dmn); + } + return 0; +} + +static int wakeup_send_echo(struct thread *t_wakeup) +{ + static const char echocmd[] = "echo " PING_TOKEN; + ssize_t rc; + struct daemon *dmn = THREAD_ARG(t_wakeup); + + dmn->t_wakeup = NULL; + if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0) || + ((size_t) rc != sizeof(echocmd))) { + char why[100 + sizeof(echocmd)]; + snprintf(why, sizeof(why), + "write '%s' returned %d instead of %u", echocmd, + (int)rc, (u_int) sizeof(echocmd)); + daemon_down(dmn, why); + } else { + gettimeofday(&dmn->echo_sent, NULL); + dmn->t_wakeup = + thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout); + } + return 0; +} + +static void sigint(void) +{ + zlog_notice("Terminating on signal"); + systemd_send_stopping(); + exit(0); +} + +static int valid_command(const char *cmd) +{ + char *p; + + return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's') + && !strchr(p + 1, '%'); } /* This is an ugly hack to circumvent problems with passing command-line arguments that contain spaces. The fix is to use a configuration file. */ -static char * -translate_blanks(const char *cmd, const char *blankstr) +static char *translate_blanks(const char *cmd, const char *blankstr) { - char *res; - char *p; - size_t bslen = strlen(blankstr); + char *res; + char *p; + size_t bslen = strlen(blankstr); - if (!(res = strdup(cmd))) - { - perror("strdup"); - exit(1); - } - while ((p = strstr(res,blankstr)) != NULL) - { - *p = ' '; - if (bslen != 1) - memmove(p+1,p+bslen,strlen(p+bslen)+1); - } - return res; + if (!(res = strdup(cmd))) { + perror("strdup"); + exit(1); + } + while ((p = strstr(res, blankstr)) != NULL) { + *p = ' '; + if (bslen != 1) + memmove(p + 1, p + bslen, strlen(p + bslen) + 1); + } + return res; } -struct zebra_privs_t watchfrr_privs = -{ +struct zebra_privs_t watchfrr_privs = { #ifdef VTY_GROUP - .vty_group = VTY_GROUP, + .vty_group = VTY_GROUP, #endif }; -int -main(int argc, char **argv) +int main(int argc, char **argv) { - const char *progname; - int opt; - int daemon_mode = 0; - const char *pidfile = DEFAULT_PIDFILE; - const char *special = "zebra"; - const char *blankstr = NULL; - static struct quagga_signal_t my_signals[] = - { - { - .signal = SIGINT, - .handler = sigint, - }, - { - .signal = SIGTERM, - .handler = sigint, - }, - { - .signal = SIGCHLD, - .handler = sigchild, - }, - }; + const char *progname; + int opt; + int daemon_mode = 0; + const char *pidfile = DEFAULT_PIDFILE; + const char *special = "zebra"; + const char *blankstr = NULL; + static struct quagga_signal_t my_signals[] = { + { + .signal = SIGINT, + .handler = sigint, + }, + { + .signal = SIGTERM, + .handler = sigint, + }, + { + .signal = SIGCHLD, + .handler = sigchild, + }, + }; - if ((progname = strrchr (argv[0], '/')) != NULL) - progname++; - else - progname = argv[0]; - - gs.restart.name = "all"; - while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh", - longopts, 0)) != EOF) - { - switch (opt) - { - case 0: - break; - case 'a': - if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART)) - { - fputs("Ambiguous operating mode selected.\n",stderr); - return usage(progname,1); - } - gs.mode = MODE_PHASED_ZEBRA_RESTART; - break; - case 'A': - if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART)) - { - fputs("Ambiguous operating mode selected.\n",stderr); - return usage(progname,1); - } - gs.mode = MODE_PHASED_ALL_RESTART; - break; - case 'b': - blankstr = optarg; - break; - case 'd': - daemon_mode = 1; - break; - case 'e': - gs.do_ping = 0; - break; - case 'k': - if (!valid_command(optarg)) - { - fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n", - optarg); - return usage(progname,1); - } - gs.stop_command = optarg; - break; - case 'l': - { - char garbage[3]; - if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) || - (gs.loglevel < LOG_EMERG)) - { - fprintf(stderr,"Invalid loglevel argument: %s\n",optarg); - return usage(progname,1); - } - } - break; - case 'm': - { - char garbage[3]; - if ((sscanf(optarg,"%ld%1s", - &gs.min_restart_interval,garbage) != 1) || - (gs.min_restart_interval < 0)) - { - fprintf(stderr,"Invalid min_restart_interval argument: %s\n", - optarg); - return usage(progname,1); - } - } - break; - case 'M': - { - char garbage[3]; - if ((sscanf(optarg,"%ld%1s", - &gs.max_restart_interval,garbage) != 1) || - (gs.max_restart_interval < 0)) - { - fprintf(stderr,"Invalid max_restart_interval argument: %s\n", - optarg); - return usage(progname,1); - } - } - break; - case 'i': - { - char garbage[3]; - int period; - if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) || - (gs.period < 1)) - { - fprintf(stderr,"Invalid interval argument: %s\n",optarg); - return usage(progname,1); - } - gs.period = 1000*period; - } - break; - case 'p': - pidfile = optarg; - break; - case 'r': - if ((gs.mode == MODE_GLOBAL_RESTART) || - (gs.mode == MODE_SEPARATE_RESTART)) - { - fputs("Ambiguous operating mode selected.\n",stderr); - return usage(progname,1); - } - if (!valid_command(optarg)) - { - fprintf(stderr, - "Invalid restart command, must contain '%%s': %s\n", - optarg); - return usage(progname,1); - } - gs.restart_command = optarg; - if (gs.mode == MODE_MONITOR) - gs.mode = MODE_SEPARATE_RESTART; - break; - case 'R': - if (gs.mode != MODE_MONITOR) - { - fputs("Ambiguous operating mode selected.\n",stderr); - return usage(progname,1); - } - if (strchr(optarg,'%')) - { - fprintf(stderr, - "Invalid restart-all arg, must not contain '%%s': %s\n", - optarg); - return usage(progname,1); - } - gs.restart_command = optarg; - gs.mode = MODE_GLOBAL_RESTART; - break; - case 's': - if (!valid_command(optarg)) - { - fprintf(stderr,"Invalid start command, must contain '%%s': %s\n", - optarg); - return usage(progname,1); - } - gs.start_command = optarg; - break; - case 'S': - gs.vtydir = optarg; - break; - case 't': - { - char garbage[3]; - if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) || - (gs.timeout < 1)) - { - fprintf(stderr,"Invalid timeout argument: %s\n",optarg); - return usage(progname,1); - } - } - break; - case 'T': - { - char garbage[3]; - if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) || - (gs.restart_timeout < 1)) - { - fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg); - return usage(progname,1); - } - } - break; - case 'z': - gs.unresponsive_restart = 1; - break; - case 'v': - printf ("%s version %s\n", progname, FRR_VERSION); - puts("Copyright 2004 Andrew J. Schorr"); - return 0; - case 'h': - return usage(progname,0); - default: - fputs("Invalid option.\n",stderr); - return usage(progname,1); - } - } - - if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) - { - fputs("Option -z requires a -r or -R restart option.\n",stderr); - return usage(progname,1); - } - switch (gs.mode) - { - case MODE_MONITOR: - if (gs.restart_command || gs.start_command || gs.stop_command) - { - fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n", - mode_str[gs.mode]); - return usage(progname,1); - } - break; - case MODE_GLOBAL_RESTART: - case MODE_SEPARATE_RESTART: - if (!gs.restart_command || gs.start_command || gs.stop_command) - { - fprintf(stderr,"No start/kill commands needed in [%s] mode.\n", - mode_str[gs.mode]); - return usage(progname,1); - } - break; - case MODE_PHASED_ZEBRA_RESTART: - case MODE_PHASED_ALL_RESTART: - if (!gs.restart_command || !gs.start_command || !gs.stop_command) - { - fprintf(stderr, - "Need start, kill, and restart commands in [%s] mode.\n", - mode_str[gs.mode]); - return usage(progname,1); - } - break; - } - - if (blankstr) - { - if (gs.restart_command) - gs.restart_command = translate_blanks(gs.restart_command,blankstr); - if (gs.start_command) - gs.start_command = translate_blanks(gs.start_command,blankstr); - if (gs.stop_command) - gs.stop_command = translate_blanks(gs.stop_command,blankstr); - } - - gs.restart.interval = gs.min_restart_interval; - - zprivs_init (&watchfrr_privs); - - master = thread_master_create(); - cmd_init(-1); - memory_init(); - vty_init(master); - watchfrr_vty_init(); - vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH); - - signal_init (master, array_size(my_signals), my_signals); - srandom(time(NULL)); - - { - int i; - struct daemon *tail = NULL; - - for (i = optind; i < argc; i++) - { - struct daemon *dmn; - - if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn)))) - { - fprintf(stderr,"calloc(1,%u) failed: %s\n", - (u_int)sizeof(*dmn), safe_strerror(errno)); - return 1; - } - dmn->name = dmn->restart.name = argv[i]; - dmn->state = DAEMON_INIT; - gs.numdaemons++; - gs.numdown++; - dmn->fd = -1; - dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn, - 100+(random() % 900)); - dmn->restart.interval = gs.min_restart_interval; - if (tail) - tail->next = dmn; + if ((progname = strrchr(argv[0], '/')) != NULL) + progname++; else - gs.daemons = dmn; - tail = dmn; + progname = argv[0]; + gs.restart.name = "all"; + while ((opt = + getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh", + longopts, 0)) != EOF) { + switch (opt) { + case 0: + break; + case 'a': + if ((gs.mode != MODE_MONITOR) + && (gs.mode != MODE_SEPARATE_RESTART)) { + fputs("Ambiguous operating mode selected.\n", + stderr); + return usage(progname, 1); + } + gs.mode = MODE_PHASED_ZEBRA_RESTART; + break; + case 'A': + if ((gs.mode != MODE_MONITOR) + && (gs.mode != MODE_SEPARATE_RESTART)) { + fputs("Ambiguous operating mode selected.\n", + stderr); + return usage(progname, 1); + } + gs.mode = MODE_PHASED_ALL_RESTART; + break; + case 'b': + blankstr = optarg; + break; + case 'd': + daemon_mode = 1; + break; + case 'e': + gs.do_ping = 0; + break; + case 'k': + if (!valid_command(optarg)) { + fprintf(stderr, + "Invalid kill command, must contain '%%s': %s\n", + optarg); + return usage(progname, 1); + } + gs.stop_command = optarg; + break; + case 'l': + { + char garbage[3]; + if ((sscanf + (optarg, "%d%1s", &gs.loglevel, + garbage) != 1) + || (gs.loglevel < LOG_EMERG)) { + fprintf(stderr, + "Invalid loglevel argument: %s\n", + optarg); + return usage(progname, 1); + } + } + break; + case 'm': + { + char garbage[3]; + if ((sscanf(optarg, "%ld%1s", + &gs.min_restart_interval, + garbage) != 1) + || (gs.min_restart_interval < 0)) { + fprintf(stderr, + "Invalid min_restart_interval argument: %s\n", + optarg); + return usage(progname, 1); + } + } + break; + case 'M': + { + char garbage[3]; + if ((sscanf(optarg, "%ld%1s", + &gs.max_restart_interval, + garbage) != 1) + || (gs.max_restart_interval < 0)) { + fprintf(stderr, + "Invalid max_restart_interval argument: %s\n", + optarg); + return usage(progname, 1); + } + } + break; + case 'i': + { + char garbage[3]; + int period; + if ((sscanf(optarg, "%d%1s", &period, garbage) + != 1) || (gs.period < 1)) { + fprintf(stderr, + "Invalid interval argument: %s\n", + optarg); + return usage(progname, 1); + } + gs.period = 1000 * period; + } + break; + case 'p': + pidfile = optarg; + break; + case 'r': + if ((gs.mode == MODE_GLOBAL_RESTART) || + (gs.mode == MODE_SEPARATE_RESTART)) { + fputs("Ambiguous operating mode selected.\n", + stderr); + return usage(progname, 1); + } + if (!valid_command(optarg)) { + fprintf(stderr, + "Invalid restart command, must contain '%%s': %s\n", + optarg); + return usage(progname, 1); + } + gs.restart_command = optarg; + if (gs.mode == MODE_MONITOR) + gs.mode = MODE_SEPARATE_RESTART; + break; + case 'R': + if (gs.mode != MODE_MONITOR) { + fputs("Ambiguous operating mode selected.\n", + stderr); + return usage(progname, 1); + } + if (strchr(optarg, '%')) { + fprintf(stderr, + "Invalid restart-all arg, must not contain '%%s': %s\n", + optarg); + return usage(progname, 1); + } + gs.restart_command = optarg; + gs.mode = MODE_GLOBAL_RESTART; + break; + case 's': + if (!valid_command(optarg)) { + fprintf(stderr, + "Invalid start command, must contain '%%s': %s\n", + optarg); + return usage(progname, 1); + } + gs.start_command = optarg; + break; + case 'S': + gs.vtydir = optarg; + break; + case 't': + { + char garbage[3]; + if ((sscanf + (optarg, "%ld%1s", &gs.timeout, + garbage) != 1) || (gs.timeout < 1)) { + fprintf(stderr, + "Invalid timeout argument: %s\n", + optarg); + return usage(progname, 1); + } + } + break; + case 'T': + { + char garbage[3]; + if ((sscanf + (optarg, "%ld%1s", &gs.restart_timeout, + garbage) != 1) + || (gs.restart_timeout < 1)) { + fprintf(stderr, + "Invalid restart timeout argument: %s\n", + optarg); + return usage(progname, 1); + } + } + break; + case 'z': + gs.unresponsive_restart = 1; + break; + case 'v': + printf("%s version %s\n", progname, FRR_VERSION); + puts("Copyright 2004 Andrew J. Schorr"); + return 0; + case 'h': + return usage(progname, 0); + default: + fputs("Invalid option.\n", stderr); + return usage(progname, 1); + } + } + + if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) { + fputs("Option -z requires a -r or -R restart option.\n", + stderr); + return usage(progname, 1); + } + switch (gs.mode) { + case MODE_MONITOR: + if (gs.restart_command || gs.start_command || gs.stop_command) { + fprintf(stderr, + "No kill/(re)start commands needed for %s mode.\n", + mode_str[gs.mode]); + return usage(progname, 1); + } + break; + case MODE_GLOBAL_RESTART: + case MODE_SEPARATE_RESTART: + if (!gs.restart_command || gs.start_command || gs.stop_command) { + fprintf(stderr, + "No start/kill commands needed in [%s] mode.\n", + mode_str[gs.mode]); + return usage(progname, 1); + } + break; + case MODE_PHASED_ZEBRA_RESTART: + case MODE_PHASED_ALL_RESTART: + if (!gs.restart_command || !gs.start_command + || !gs.stop_command) { + fprintf(stderr, + "Need start, kill, and restart commands in [%s] mode.\n", + mode_str[gs.mode]); + return usage(progname, 1); + } + break; + } + + if (blankstr) { + if (gs.restart_command) + gs.restart_command = + translate_blanks(gs.restart_command, blankstr); + if (gs.start_command) + gs.start_command = + translate_blanks(gs.start_command, blankstr); + if (gs.stop_command) + gs.stop_command = + translate_blanks(gs.stop_command, blankstr); + } + + gs.restart.interval = gs.min_restart_interval; + + zprivs_init(&watchfrr_privs); + + master = thread_master_create(); + cmd_init(-1); + memory_init(); + vty_init(master); + watchfrr_vty_init(); + vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH); + + signal_init(master, array_size(my_signals), my_signals); + srandom(time(NULL)); + + { + int i; + struct daemon *tail = NULL; + + for (i = optind; i < argc; i++) { + struct daemon *dmn; + + if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) { + fprintf(stderr, "calloc(1,%u) failed: %s\n", + (u_int) sizeof(*dmn), + safe_strerror(errno)); + return 1; + } + dmn->name = dmn->restart.name = argv[i]; + dmn->state = DAEMON_INIT; + gs.numdaemons++; + gs.numdown++; + dmn->fd = -1; + dmn->t_wakeup = + thread_add_timer_msec(master, wakeup_init, dmn, + 100 + (random() % 900)); + dmn->restart.interval = gs.min_restart_interval; + if (tail) + tail->next = dmn; + else + gs.daemons = dmn; + tail = dmn; + + if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || + (gs.mode == MODE_PHASED_ALL_RESTART)) && + !strcmp(dmn->name, special)) + gs.special = dmn; + } + } + if (!gs.daemons) { + fputs("Must specify one or more daemons to monitor.\n", stderr); + return usage(progname, 1); + } if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || - (gs.mode == MODE_PHASED_ALL_RESTART)) && - !strcmp(dmn->name,special)) - gs.special = dmn; - } - } - if (!gs.daemons) - { - fputs("Must specify one or more daemons to monitor.\n",stderr); - return usage(progname,1); - } - if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) || - (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special) - { - fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n", - mode_str[gs.mode],special); - return usage(progname,1); - } - - zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0, - LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON); - zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED); - if (daemon_mode) - { - zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG)); - if (daemon (0, 0) < 0) - { - fprintf(stderr, "Watchfrr daemon failed: %s", strerror(errno)); - exit (1); + (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special) { + fprintf(stderr, + "In mode [%s], but cannot find master daemon %s\n", + mode_str[gs.mode], special); + return usage(progname, 1); } - } - else - zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG)); - /* Make sure we're not already running. */ - pid_output (pidfile); + zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0, + LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON); + zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED); + if (daemon_mode) { + zlog_set_level(NULL, ZLOG_DEST_SYSLOG, + MIN(gs.loglevel, LOG_DEBUG)); + if (daemon(0, 0) < 0) { + fprintf(stderr, "Watchfrr daemon failed: %s", + strerror(errno)); + exit(1); + } + } else + zlog_set_level(NULL, ZLOG_DEST_STDOUT, + MIN(gs.loglevel, LOG_DEBUG)); - /* Announce which daemons are being monitored. */ - { - struct daemon *dmn; - size_t len = 0; + /* Make sure we're not already running. */ + pid_output(pidfile); - for (dmn = gs.daemons; dmn; dmn = dmn->next) - len += strlen(dmn->name)+1; - - { - char buf[len+1]; - char *p = buf; - - for (dmn = gs.daemons; dmn; dmn = dmn->next) + /* Announce which daemons are being monitored. */ { - if (p != buf) - *p++ = ' '; - strcpy(p,dmn->name); - p += strlen(p); + struct daemon *dmn; + size_t len = 0; + + for (dmn = gs.daemons; dmn; dmn = dmn->next) + len += strlen(dmn->name) + 1; + + { + char buf[len + 1]; + char *p = buf; + + for (dmn = gs.daemons; dmn; dmn = dmn->next) { + if (p != buf) + *p++ = ' '; + strcpy(p, dmn->name); + p += strlen(p); + } + zlog_notice("%s %s watching [%s], mode [%s]", + progname, FRR_VERSION, buf, + mode_str[gs.mode]); + } } - zlog_notice("%s %s watching [%s], mode [%s]", - progname, FRR_VERSION, buf, mode_str[gs.mode]); - } - } - { - struct thread thread; + { + struct thread thread; - while (thread_fetch (master, &thread)) - thread_call (&thread); - } + while (thread_fetch(master, &thread)) + thread_call(&thread); + } - systemd_send_stopping (); - /* Not reached. */ - return 0; + systemd_send_stopping(); + /* Not reached. */ + return 0; } diff --git a/watchfrr/watchfrr.h b/watchfrr/watchfrr.h index 4a479c72e6..719ad4dfd8 100644 --- a/watchfrr/watchfrr.h +++ b/watchfrr/watchfrr.h @@ -26,4 +26,4 @@ extern void watchfrr_vty_init(void); extern pid_t integrated_write_pid; extern void integrated_write_sigchld(int status); -#endif /* FRR_WATCHFRR_H */ +#endif /* FRR_WATCHFRR_H */ diff --git a/watchfrr/watchfrr_vty.c b/watchfrr/watchfrr_vty.c index 4fffb020d7..bf3e1510a7 100644 --- a/watchfrr/watchfrr_vty.c +++ b/watchfrr/watchfrr_vty.c @@ -31,18 +31,18 @@ pid_t integrated_write_pid; static int integrated_result_fd; -DEFUN (config_write_integrated, - config_write_integrated_cmd, - "write integrated", - "Write running configuration to memory, network, or terminal\n" - "Write integrated all-daemon Frr.conf file\n") +DEFUN(config_write_integrated, + config_write_integrated_cmd, + "write integrated", + "Write running configuration to memory, network, or terminal\n" + "Write integrated all-daemon Frr.conf file\n") { pid_t child; sigset_t oldmask, sigmask; if (integrated_write_pid != -1) { vty_out(vty, "%% configuration write already in progress.%s", - VTY_NEWLINE); + VTY_NEWLINE); return CMD_WARNING; } @@ -61,20 +61,20 @@ DEFUN (config_write_integrated, child = fork(); if (child == -1) { vty_out(vty, "%% configuration write fork() failed: %s.%s", - safe_strerror(errno), VTY_NEWLINE); + safe_strerror(errno), VTY_NEWLINE); sigprocmask(SIG_SETMASK, &oldmask, NULL); return CMD_WARNING; } if (child != 0) { - /* note: the VTY won't write a command return value to vtysh; the - * session temporarily enters an intentional "hang" state. This is - * to make sure latency in vtysh doing the config write (several - * seconds is not rare to see) does not interfere with watchfrr's - * supervisor job. - * - * The fd is duplicated here so we don't need to hold a vty pointer - * (which could become invalid in the meantime). - */ + /* note: the VTY won't write a command return value to vtysh; the + * session temporarily enters an intentional "hang" state. This is + * to make sure latency in vtysh doing the config write (several + * seconds is not rare to see) does not interfere with watchfrr's + * supervisor job. + * + * The fd is duplicated here so we don't need to hold a vty pointer + * (which could become invalid in the meantime). + */ integrated_write_pid = child; integrated_result_fd = dup(vty->wfd); sigprocmask(SIG_SETMASK, &oldmask, NULL); @@ -93,7 +93,7 @@ DEFUN (config_write_integrated, /* unbuffered write; we just messed with stdout... */ char msg[512]; snprintf(msg, sizeof(msg), "error executing %s: %s\n", - VTYSH_BIN_PATH, safe_strerror(errno)); + VTYSH_BIN_PATH, safe_strerror(errno)); write(1, msg, strlen(msg)); exit(1); } @@ -104,11 +104,11 @@ void integrated_write_sigchld(int status) if (WIFEXITED(status)) { zlog_info("configuration write completed with exit code %d", - WEXITSTATUS(status)); + WEXITSTATUS(status)); reply[3] = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { zlog_warn("configuration write terminated by signal %d", - WTERMSIG(status)); + WTERMSIG(status)); } else { zlog_warn("configuration write terminated"); }