diff --git a/configure.ac b/configure.ac index 5d4b26eba0..9ba95fe98b 100644 --- a/configure.ac +++ b/configure.ac @@ -783,7 +783,6 @@ fi if test "$enable_datacenter" = "yes" ; then AC_DEFINE([HAVE_DATACENTER], [1], [Compile extensions for a DataCenter]) - AC_MSG_WARN([The --enable-datacenter compile time option is deprecated. Please modify the init script to pass -F datacenter to the daemons instead.]) DFLT_NAME="datacenter" else DFLT_NAME="traditional" @@ -2695,6 +2694,16 @@ vici socket path : ${vici_socket} The above user and group must have read/write access to the state file directory and to the config files in the config file directory." +if test -n "$enable_datacenter"; then + AC_MSG_WARN([The --enable-datacenter compile time option is deprecated. Please modify the init script to pass -F datacenter to the daemons instead.]) +fi +if test -n "$enable_time_check"; then + AC_MSG_WARN([The --enable-time-check compile time option is deprecated. Please use the service cputime-stats configuration option instead.]) +fi +if test -n "$enable_cpu_time"; then + AC_MSG_WARN([The --enable-cpu-time compile time option is deprecated. Please use the service cputime-warning NNN configuration option instead.]) +fi + if test "$enable_doc" != "no" -a "$frr_py_mod_sphinx" = "false"; then AC_MSG_WARN([sphinx is missing but required to build documentation]) fi diff --git a/lib/command.c b/lib/command.c index 7be54907ed..e00d84a051 100644 --- a/lib/command.c +++ b/lib/command.c @@ -434,6 +434,36 @@ static int config_write_host(struct vty *vty) } log_config_write(vty); + /* print disable always, but enable only if default is flipped + * => prep for future removal of compile-time knob + */ + if (!cputime_enabled) + vty_out(vty, "no service cputime-stats\n"); +#ifdef EXCLUDE_CPU_TIME + else + vty_out(vty, "service cputime-stats\n"); +#endif + + if (!cputime_threshold) + vty_out(vty, "no service cputime-warning\n"); +#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000 + else /* again, always print non-default */ +#else + else if (cputime_threshold != 5000000) +#endif + vty_out(vty, "service cputime-warning %lu\n", + cputime_threshold); + + if (!walltime_threshold) + vty_out(vty, "no service walltime-warning\n"); +#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000 + else /* again, always print non-default */ +#else + else if (walltime_threshold != 5000000) +#endif + vty_out(vty, "service walltime-warning %lu\n", + walltime_threshold); + if (host.advanced) vty_out(vty, "service advanced-vty\n"); diff --git a/lib/subdir.am b/lib/subdir.am index 4015c67ea8..b4ba078e46 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -168,6 +168,7 @@ clippy_scan += \ lib/northbound_cli.c \ lib/plist.c \ lib/routemap_cli.c \ + lib/thread.c \ lib/vty.c \ # end diff --git a/lib/thread.c b/lib/thread.c index 3af89fad5a..dd5c1fed40 100644 --- a/lib/thread.c +++ b/lib/thread.c @@ -90,7 +90,22 @@ static struct list *masters; static void thread_free(struct thread_master *master, struct thread *thread); +#ifndef EXCLUDE_CPU_TIME +#define EXCLUDE_CPU_TIME 0 +#endif +#ifndef CONSUMED_TIME_CHECK +#define CONSUMED_TIME_CHECK 0 +#endif + +bool cputime_enabled = !EXCLUDE_CPU_TIME; +unsigned long cputime_threshold = CONSUMED_TIME_CHECK; +unsigned long walltime_threshold = CONSUMED_TIME_CHECK; + /* CLI start ---------------------------------------------------------------- */ +#ifndef VTYSH_EXTRACT_PL +#include "lib/thread_clippy.c" +#endif + static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a) { int size = sizeof(a->func); @@ -120,7 +135,6 @@ static void cpu_record_hash_free(void *a) XFREE(MTYPE_THREAD_STATS, hist); } -#ifndef EXCLUDE_CPU_TIME static void vty_out_cpu_thread_history(struct vty *vty, struct cpu_thread_history *a) { @@ -187,6 +201,14 @@ static void cpu_record_print(struct vty *vty, uint8_t filter) struct thread_master *m; struct listnode *ln; + if (!cputime_enabled) + vty_out(vty, + "\n" + "Collecting CPU time statistics is currently disabled. Following statistics\n" + "will be zero or may display data from when collection was enabled. Use the\n" + " \"service cputime-stats\" command to start collecting data.\n" + "\nCounters and wallclock times are always maintained and should be accurate.\n"); + memset(&tmp, 0, sizeof(tmp)); tmp.funcname = "TOTAL"; tmp.types = filter; @@ -236,7 +258,6 @@ static void cpu_record_print(struct vty *vty, uint8_t filter) if (tmp.total_calls > 0) vty_out_cpu_thread_history(vty, &tmp); } -#endif static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[]) { @@ -306,7 +327,6 @@ static uint8_t parse_filter(const char *filterstr) return filter; } -#ifndef EXCLUDE_CPU_TIME DEFUN_NOSH (show_thread_cpu, show_thread_cpu_cmd, "show thread cpu [FILTER]", @@ -331,7 +351,61 @@ DEFUN_NOSH (show_thread_cpu, cpu_record_print(vty, filter); return CMD_SUCCESS; } -#endif + +DEFPY (service_cputime_stats, + service_cputime_stats_cmd, + "[no] service cputime-stats", + NO_STR + "Set up miscellaneous service\n" + "Collect CPU usage statistics\n") +{ + cputime_enabled = !no; + return CMD_SUCCESS; +} + +DEFPY (service_cputime_warning, + service_cputime_warning_cmd, + "[no] service cputime-warning (1-4294967295)", + NO_STR + "Set up miscellaneous service\n" + "Warn for tasks exceeding CPU usage threshold\n" + "Warning threshold in milliseconds\n") +{ + if (no) + cputime_threshold = 0; + else + cputime_threshold = cputime_warning * 1000; + return CMD_SUCCESS; +} + +ALIAS (service_cputime_warning, + no_service_cputime_warning_cmd, + "no service cputime-warning", + NO_STR + "Set up miscellaneous service\n" + "Warn for tasks exceeding CPU usage threshold\n") + +DEFPY (service_walltime_warning, + service_walltime_warning_cmd, + "[no] service walltime-warning (1-4294967295)", + NO_STR + "Set up miscellaneous service\n" + "Warn for tasks exceeding total wallclock threshold\n" + "Warning threshold in milliseconds\n") +{ + if (no) + walltime_threshold = 0; + else + walltime_threshold = walltime_warning * 1000; + return CMD_SUCCESS; +} + +ALIAS (service_walltime_warning, + no_service_walltime_warning_cmd, + "no service walltime-warning", + NO_STR + "Set up miscellaneous service\n" + "Warn for tasks exceeding total wallclock threshold\n") static void show_thread_poll_helper(struct vty *vty, struct thread_master *m) { @@ -421,11 +495,15 @@ DEFUN (clear_thread_cpu, void thread_cmd_init(void) { -#ifndef EXCLUDE_CPU_TIME install_element(VIEW_NODE, &show_thread_cpu_cmd); -#endif install_element(VIEW_NODE, &show_thread_poll_cmd); install_element(ENABLE_NODE, &clear_thread_cpu_cmd); + + install_element(CONFIG_NODE, &service_cputime_stats_cmd); + install_element(CONFIG_NODE, &service_cputime_warning_cmd); + install_element(CONFIG_NODE, &no_service_cputime_warning_cmd); + install_element(CONFIG_NODE, &service_walltime_warning_cmd); + install_element(CONFIG_NODE, &no_service_walltime_warning_cmd); } /* CLI end ------------------------------------------------------------------ */ @@ -1784,9 +1862,8 @@ void thread_getrusage(RUSAGE_T *r) #define FRR_RUSAGE RUSAGE_SELF #endif monotime(&r->real); -#ifndef EXCLUDE_CPU_TIME - getrusage(FRR_RUSAGE, &(r->cpu)); -#endif + if (cputime_enabled) + getrusage(FRR_RUSAGE, &(r->cpu)); } /* @@ -1802,13 +1879,14 @@ void thread_getrusage(RUSAGE_T *r) */ void thread_call(struct thread *thread) { -#ifndef EXCLUDE_CPU_TIME - _Atomic unsigned long realtime, cputime; - unsigned long exp; - unsigned long helper; -#endif RUSAGE_T before, after; + /* if the thread being called is the CLI, it may change cputime_enabled + * ("service cputime-stats" command), which can result in nonsensical + * and very confusing warnings + */ + bool cputime_enabled_here = cputime_enabled; + if (thread->master->ready_run_loop) before = thread->master->last_getrusage; else @@ -1828,43 +1906,45 @@ void thread_call(struct thread *thread) GETRUSAGE(&after); thread->master->last_getrusage = after; -#ifndef EXCLUDE_CPU_TIME - realtime = thread_consumed_time(&after, &before, &helper); - cputime = helper; + unsigned long walltime, cputime; + unsigned long exp; - /* update realtime */ - atomic_fetch_add_explicit(&thread->hist->real.total, realtime, + walltime = thread_consumed_time(&after, &before, &cputime); + + /* update walltime */ + atomic_fetch_add_explicit(&thread->hist->real.total, walltime, memory_order_seq_cst); exp = atomic_load_explicit(&thread->hist->real.max, memory_order_seq_cst); - while (exp < realtime + while (exp < walltime && !atomic_compare_exchange_weak_explicit( - &thread->hist->real.max, &exp, realtime, - memory_order_seq_cst, memory_order_seq_cst)) + &thread->hist->real.max, &exp, walltime, + memory_order_seq_cst, memory_order_seq_cst)) ; - /* update cputime */ - atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime, - memory_order_seq_cst); - exp = atomic_load_explicit(&thread->hist->cpu.max, - memory_order_seq_cst); - while (exp < cputime - && !atomic_compare_exchange_weak_explicit( - &thread->hist->cpu.max, &exp, cputime, - memory_order_seq_cst, memory_order_seq_cst)) - ; + if (cputime_enabled_here && cputime_enabled) { + /* update cputime */ + atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime, + memory_order_seq_cst); + exp = atomic_load_explicit(&thread->hist->cpu.max, + memory_order_seq_cst); + while (exp < cputime + && !atomic_compare_exchange_weak_explicit( + &thread->hist->cpu.max, &exp, cputime, + memory_order_seq_cst, memory_order_seq_cst)) + ; + } atomic_fetch_add_explicit(&thread->hist->total_calls, 1, memory_order_seq_cst); atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type, memory_order_seq_cst); -#ifdef CONSUMED_TIME_CHECK - if (cputime > CONSUMED_TIME_CHECK) { + if (cputime_enabled_here && cputime_enabled && cputime_threshold + && cputime > cputime_threshold) { /* - * We have a CPU Hog on our hands. The time FRR - * has spent doing actual work ( not sleeping ) - * is greater than 5 seconds. + * We have a CPU Hog on our hands. The time FRR has spent + * doing actual work (not sleeping) is greater than 5 seconds. * Whinge about it now, so we're aware this is yet another task * to fix. */ @@ -1874,13 +1954,13 @@ void thread_call(struct thread *thread) EC_LIB_SLOW_THREAD_CPU, "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)", thread->xref->funcname, (unsigned long)thread->func, - realtime / 1000, cputime / 1000); - } else if (realtime > CONSUMED_TIME_CHECK) { + walltime / 1000, cputime / 1000); + + } else if (walltime_threshold && walltime > walltime_threshold) { /* - * The runtime for a task is greater than 5 seconds, but - * the cpu time is under 5 seconds. Let's whine - * about this because this could imply some sort of - * scheduling issue. + * The runtime for a task is greater than 5 seconds, but the + * cpu time is under 5 seconds. Let's whine about this because + * this could imply some sort of scheduling issue. */ atomic_fetch_add_explicit(&thread->hist->total_wall_warn, 1, memory_order_seq_cst); @@ -1888,10 +1968,8 @@ void thread_call(struct thread *thread) EC_LIB_SLOW_THREAD_WALL, "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)", thread->xref->funcname, (unsigned long)thread->func, - realtime / 1000, cputime / 1000); + walltime / 1000, cputime / 1000); } -#endif /* CONSUMED_TIME_CHECK */ -#endif /* Exclude CPU Time */ } /* Execute thread */ diff --git a/lib/thread.h b/lib/thread.h index fee728dbf9..737ed005c5 100644 --- a/lib/thread.h +++ b/lib/thread.h @@ -33,6 +33,13 @@ extern "C" { #endif +extern bool cputime_enabled; +extern unsigned long cputime_threshold; +/* capturing wallclock time is always enabled since it is fast (reading + * hardware TSC w/o syscalls) + */ +extern unsigned long walltime_threshold; + struct rusage_t { struct rusage cpu; struct timeval real; diff --git a/lib/vty.c b/lib/vty.c index 50d116c564..f64ab83847 100644 --- a/lib/vty.c +++ b/lib/vty.c @@ -502,37 +502,37 @@ static int vty_command(struct vty *vty, char *buf) zlog_notice("%s%s", prompt_str, buf); } -#ifdef CONSUMED_TIME_CHECK - { - RUSAGE_T before; - RUSAGE_T after; - unsigned long realtime, cputime; + RUSAGE_T before; + RUSAGE_T after; + unsigned long walltime, cputime; - GETRUSAGE(&before); -#endif /* CONSUMED_TIME_CHECK */ + /* cmd_execute() may change cputime_enabled if we're executing the + * "service cputime-stats" command, which can result in nonsensical + * and very confusing warnings + */ + bool cputime_enabled_here = cputime_enabled; - ret = cmd_execute(vty, buf, NULL, 0); + GETRUSAGE(&before); - /* Get the name of the protocol if any */ - protocolname = frr_protoname; + ret = cmd_execute(vty, buf, NULL, 0); -#ifdef CONSUMED_TIME_CHECK - GETRUSAGE(&after); - realtime = thread_consumed_time(&after, &before, &cputime); - if (cputime > CONSUMED_TIME_CHECK) { - /* Warn about CPU hog that must be fixed. */ - flog_warn( - EC_LIB_SLOW_THREAD_CPU, - "CPU HOG: command took %lums (cpu time %lums): %s", - realtime / 1000, cputime / 1000, buf); - } else if (realtime > CONSUMED_TIME_CHECK) { - flog_warn( - EC_LIB_SLOW_THREAD_WALL, - "STARVATION: command took %lums (cpu time %lums): %s", - realtime / 1000, cputime / 1000, buf); - } - } -#endif /* CONSUMED_TIME_CHECK */ + GETRUSAGE(&after); + + walltime = thread_consumed_time(&after, &before, &cputime); + + if (cputime_enabled_here && cputime_enabled && cputime_threshold + && cputime > cputime_threshold) + /* Warn about CPU hog that must be fixed. */ + flog_warn(EC_LIB_SLOW_THREAD_CPU, + "CPU HOG: command took %lums (cpu time %lums): %s", + walltime / 1000, cputime / 1000, buf); + else if (walltime_threshold && walltime > walltime_threshold) + flog_warn(EC_LIB_SLOW_THREAD_WALL, + "STARVATION: command took %lums (cpu time %lums): %s", + walltime / 1000, cputime / 1000, buf); + + /* Get the name of the protocol if any */ + protocolname = frr_protoname; if (ret != CMD_SUCCESS) switch (ret) { diff --git a/vtysh/vtysh.c b/vtysh/vtysh.c index 41b7bc8a10..507c6ce882 100644 --- a/vtysh/vtysh.c +++ b/vtysh/vtysh.c @@ -2851,7 +2851,6 @@ DEFUN (vtysh_show_poll, return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n"); } -#ifndef EXCLUDE_CPU_TIME DEFUN (vtysh_show_thread, vtysh_show_thread_cmd, "show thread cpu [FILTER]", @@ -2862,7 +2861,6 @@ DEFUN (vtysh_show_thread, { return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n"); } -#endif DEFUN (vtysh_show_work_queues, vtysh_show_work_queues_cmd, @@ -4567,9 +4565,7 @@ void vtysh_init_vty(void) install_element(VIEW_NODE, &vtysh_show_modules_cmd); install_element(VIEW_NODE, &vtysh_show_work_queues_cmd); install_element(VIEW_NODE, &vtysh_show_work_queues_daemon_cmd); -#ifndef EXCLUDE_CPU_TIME install_element(VIEW_NODE, &vtysh_show_thread_cmd); -#endif install_element(VIEW_NODE, &vtysh_show_poll_cmd); /* Logging */