mirror of
https://git.proxmox.com/git/mirror_frr
synced 2025-08-12 14:01:11 +00:00
Merge pull request #4988 from donaldsharp/watchfrr_ignore_daemon
Watchfrr ignore daemon
This commit is contained in:
commit
f0b7ed8823
@ -58,6 +58,7 @@ Protocols
|
||||
vnc
|
||||
vrrp
|
||||
bmp
|
||||
watchfrr
|
||||
|
||||
########
|
||||
Appendix
|
||||
|
@ -43,6 +43,7 @@ user_RSTFILES = \
|
||||
doc/user/zebra.rst \
|
||||
doc/user/bfd.rst \
|
||||
doc/user/flowspec.rst \
|
||||
doc/user/watchfrr.rst \
|
||||
# end
|
||||
|
||||
EXTRA_DIST += \
|
||||
|
30
doc/user/watchfrr.rst
Normal file
30
doc/user/watchfrr.rst
Normal file
@ -0,0 +1,30 @@
|
||||
.. _watchfrr:
|
||||
|
||||
********
|
||||
WATCHFRR
|
||||
********
|
||||
|
||||
:abbr:`WATCHFRR` is a daemon that handles failed daemon processes and
|
||||
intelligently restarts them as needed.
|
||||
|
||||
Starting WATCHFRR
|
||||
=================
|
||||
|
||||
WATCHFRR is started as per normal systemd startup and typically does not
|
||||
require end users management.
|
||||
|
||||
WATCHFRR commands
|
||||
=================
|
||||
|
||||
.. index:: show watchfrr
|
||||
.. clicmd:: show watchfrr
|
||||
|
||||
Give status information about the state of the different daemons being
|
||||
watched by WATCHFRR
|
||||
|
||||
.. index:: [no] watchfrr ignore DAEMON
|
||||
.. clicmd:: [no] watchfrr ignore DAEMON
|
||||
|
||||
Tell WATCHFRR to ignore a particular DAEMON if it goes unresponsive.
|
||||
This is particularly useful when you are a developer and need to debug
|
||||
a working system, without watchfrr pulling the rug out from under you.
|
@ -19,3 +19,6 @@ watchfrr_watchfrr_SOURCES = \
|
||||
watchfrr/watchfrr_errors.c \
|
||||
watchfrr/watchfrr_vty.c \
|
||||
# end
|
||||
|
||||
watchfrr/watchfrr_vty_clippy.c: $(CLIPPY_DEPS)
|
||||
watchfrr/watchfrr_vty.$(OBJEXT): watchfrr/watchfrr_vty_clippy.c
|
||||
|
@ -159,6 +159,15 @@ struct daemon {
|
||||
struct thread *t_write;
|
||||
struct daemon *next;
|
||||
struct restart_info restart;
|
||||
|
||||
/*
|
||||
* For a given daemon, if we've turned on ignore timeouts
|
||||
* ignore the timeout value and assume everything is ok
|
||||
* This is for daemon debugging w/ gdb after we have started
|
||||
* FRR and realize we have something that needs to be looked
|
||||
* at
|
||||
*/
|
||||
bool ignore_timeout;
|
||||
};
|
||||
|
||||
#define OPTION_MINRESTART 2000
|
||||
@ -191,6 +200,25 @@ static void phase_check(void);
|
||||
static void restart_done(struct daemon *dmn);
|
||||
|
||||
static const char *progname;
|
||||
|
||||
void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
|
||||
{
|
||||
struct daemon *dmn;
|
||||
|
||||
for (dmn = gs.daemons; dmn; dmn = dmn->next) {
|
||||
if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (dmn) {
|
||||
dmn->ignore_timeout = ignore;
|
||||
vty_out(vty, "%s switching to %s\n", dmn->name,
|
||||
ignore ? "ignore" : "watch");
|
||||
} else
|
||||
vty_out(vty, "%s is not configured for running at the moment",
|
||||
dname);
|
||||
}
|
||||
|
||||
static void printhelp(FILE *target)
|
||||
{
|
||||
fprintf(target,
|
||||
@ -533,7 +561,9 @@ static int wakeup_init(struct thread *t_wakeup)
|
||||
static void restart_done(struct daemon *dmn)
|
||||
{
|
||||
if (dmn->state != DAEMON_DOWN) {
|
||||
zlog_warn("wtf?");
|
||||
zlog_warn(
|
||||
"Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
|
||||
dmn->name, state_str[dmn->state]);
|
||||
return;
|
||||
}
|
||||
if (dmn->t_wakeup)
|
||||
@ -961,6 +991,8 @@ static int wakeup_no_answer(struct thread *t_wakeup)
|
||||
|
||||
dmn->t_wakeup = NULL;
|
||||
dmn->state = DAEMON_UNRESPONSIVE;
|
||||
if (dmn->ignore_timeout)
|
||||
return 0;
|
||||
flog_err(EC_WATCHFRR_CONNECTION,
|
||||
"%s state -> unresponsive : no response yet to ping "
|
||||
"sent %ld seconds ago",
|
||||
@ -1014,7 +1046,8 @@ void watchfrr_status(struct vty *vty)
|
||||
(long)gs.restart.pid);
|
||||
|
||||
for (dmn = gs.daemons; dmn; dmn = dmn->next) {
|
||||
vty_out(vty, " %-20s %s\n", dmn->name, state_str[dmn->state]);
|
||||
vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
|
||||
dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
|
||||
if (dmn->restart.pid)
|
||||
vty_out(vty, " restart running, pid %ld\n",
|
||||
(long)dmn->restart.pid);
|
||||
|
@ -41,4 +41,6 @@ extern void watchfrr_status(struct vty *vty);
|
||||
*/
|
||||
extern bool check_all_up(void);
|
||||
|
||||
extern void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname,
|
||||
bool ignore);
|
||||
#endif /* FRR_WATCHFRR_H */
|
||||
|
@ -134,6 +134,23 @@ DEFUN (show_watchfrr,
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef VTYSH_EXTRACT_PL
|
||||
#include "watchfrr/watchfrr_vty_clippy.c"
|
||||
#endif
|
||||
|
||||
DEFPY (watchfrr_ignore_daemon,
|
||||
watchfrr_ignore_daemon_cmd,
|
||||
"[no] watchfrr ignore DAEMON$dname",
|
||||
NO_STR
|
||||
"Watchfrr Specific sub-command\n"
|
||||
"Ignore a specified daemon when it does not respond to echo request\n"
|
||||
"The daemon to ignore\n")
|
||||
{
|
||||
watchfrr_set_ignore_daemon(vty, dname, no ? false : true );
|
||||
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
void integrated_write_sigchld(int status)
|
||||
{
|
||||
uint8_t reply[4] = {0, 0, 0, CMD_WARNING};
|
||||
@ -168,6 +185,9 @@ void watchfrr_vty_init(void)
|
||||
integrated_write_pid = -1;
|
||||
install_element(ENABLE_NODE, &config_write_integrated_cmd);
|
||||
install_element(ENABLE_NODE, &show_debugging_watchfrr_cmd);
|
||||
|
||||
install_element(ENABLE_NODE, &watchfrr_ignore_daemon_cmd);
|
||||
|
||||
install_element(CONFIG_NODE, &show_debugging_watchfrr_cmd);
|
||||
install_element(VIEW_NODE, &show_watchfrr_cmd);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user