diff --git a/doc/developer/topotests.rst b/doc/developer/topotests.rst index 5f90b8c5a4..6ccb00c772 100644 --- a/doc/developer/topotests.rst +++ b/doc/developer/topotests.rst @@ -402,6 +402,63 @@ environment. .. _screen: https://www.gnu.org/software/screen/ .. _tmux: https://github.com/tmux/tmux/wiki +Capturing Packets +""""""""""""""""" + +One can view and capture packets on any of the networks or interfaces defined by +the topotest by specifying the ``--pcap=NET|INTF|all[,NET|INTF,...]`` CLI option +as shown in the examples below. + +.. code:: shell + + # Capture on all networks in isis_topo1 test + sudo -E pytest isis_topo1 --pcap=all + + # Capture on `sw1` network + sudo -E pytest isis_topo1 --pcap=sw1 + + # Capture on `sw1` network and on interface `eth0` on router `r2` + sudo -E pytest isis_topo1 --pcap=sw1,r2:r2-eth0 + +For each capture a window is opened displaying a live summary of the captured +packets. Additionally, the entire packet stream is captured in a pcap file in +the tests log directory e.g.,:: + +.. code:: console + + $ sudo -E pytest isis_topo1 --pcap=sw1,r2:r2-eth0 + ... + $ ls -l /tmp/topotests/isis_topo1.test_isis_topo1/ + -rw------- 1 root root 45172 Apr 19 05:30 capture-r2-r2-eth0.pcap + -rw------- 1 root root 48412 Apr 19 05:30 capture-sw1.pcap + ... +- +Viewing Live Daemon Logs +"""""""""""""""""""""""" + +One can live view daemon or the frr logs in separate windows using the +``--logd`` CLI option as shown below. + +.. code:: shell + + # View `ripd` logs on all routers in test + sudo -E pytest rip_allow_ecmp --logd=ripd + + # View `ripd` logs on all routers and `mgmtd` log on `r1` + sudo -E pytest rip_allow_ecmp --logd=ripd --logd=mgmtd,r1 + +For each capture a window is opened displaying a live summary of the captured +packets. Additionally, the entire packet stream is captured in a pcap file in +the tests log directory e.g.,:: + +When using a unified log file `frr.log` one substitutes `frr` for the daemon +name in the ``--logd`` CLI option, e.g., + +.. code:: shell + + # View `frr` log on all routers in test + sudo -E pytest some_test_suite --logd=frr + Spawning Debugging CLI, ``vtysh`` or Shells on Routers on Test Failure """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -421,12 +478,30 @@ the help command from within a CLI launched on error: test_bgp_multiview_topo1/test_bgp_routingTable> help - Commands: - help :: this help - sh [hosts] :: execute on - term [hosts] :: open shell terminals for hosts - vtysh [hosts] :: open vtysh terminals for hosts - [hosts] :: execute vtysh-command on hosts + Basic Commands: + cli :: open a secondary CLI window + help :: this help + hosts :: list hosts + quit :: quit the cli + + HOST can be a host or one of the following: + - '*' for all hosts + - '.' for the parent munet + - a regex specified between '/' (e.g., '/rtr.*/') + + New Window Commands: + logd HOST [HOST ...] DAEMON :: tail -f on the logfile of the given DAEMON for the given HOST[S] + pcap NETWORK :: capture packets from NETWORK into file capture-NETWORK.pcap the command is run within a new window which also shows packet summaries. NETWORK can also be an interface specified as HOST:INTF. To capture inside the host namespace. + stderr HOST [HOST ...] DAEMON :: tail -f on the stderr of the given DAEMON for the given HOST[S] + stdlog HOST [HOST ...] :: tail -f on the `frr.log` for the given HOST[S] + stdout HOST [HOST ...] DAEMON :: tail -f on the stdout of the given DAEMON for the given HOST[S] + term HOST [HOST ...] :: open terminal[s] (TMUX or XTerm) on HOST[S], * for all + vtysh ROUTER [ROUTER ...] :: + xterm HOST [HOST ...] :: open XTerm[s] on HOST[S], * for all + Inline Commands: + [ROUTER ...] COMMAND :: execute vtysh COMMAND on the router[s] + [HOST ...] sh :: execute on hosts + [HOST ...] shi :: execute on HOST[s] test_bgp_multiview_topo1/test_bgp_routingTable> r1 show int br ------ Host: r1 ------ diff --git a/tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-3.py b/tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-3.py index f8a01a1c9d..1a8f8302ff 100644 --- a/tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-3.py +++ b/tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-3.py @@ -1159,7 +1159,7 @@ def test_BGP_GR_TC_31_2_p1(request): reset_config_on_routers(tgen) logger.info( - "[Phase 1] : Test Setup " "[Disable Mode]R1-----R2[Restart Mode] initialized " + "[Phase 1] : Test Setup " "[Disable Mode]R1-----R2[Helper Mode] initialized " ) # Configure graceful-restart @@ -1251,7 +1251,7 @@ def test_BGP_GR_TC_31_2_p1(request): tc_name, result ) - logger.info("[Phase 2] : R2 Goes from Disable to Restart Mode ") + logger.info("[Phase 2] : R1 Goes from Disable to Restart Mode ") # Configure graceful-restart input_dict = { @@ -1356,31 +1356,7 @@ def test_BGP_GR_TC_31_2_p1(request): }, } - # here the verify_graceful_restart fro the neighbor would be - # "NotReceived" as the latest GR config is not yet applied. - for addr_type in ADDR_TYPES: - result = verify_graceful_restart( - tgen, topo, addr_type, input_dict, dut="r1", peer="r2" - ) - assert result is True, "Testcase {} : Failed \n Error {}".format( - tc_name, result - ) - - for addr_type in ADDR_TYPES: - # Verifying RIB routes - next_hop = next_hop_per_address_family( - tgen, dut, peer, addr_type, NEXT_HOP_IP_2 - ) - input_topo = {key: topo["routers"][key] for key in ["r2"]} - result = verify_rib(tgen, addr_type, dut, input_topo, next_hop, protocol) - assert result is True, "Testcase {} : Failed \n Error {}".format( - tc_name, result - ) - - logger.info("[Phase 6] : R1 is about to come up now ") - start_router_daemons(tgen, "r1", ["bgpd"]) - - logger.info("[Phase 4] : R1 is UP now, so time to collect GR stats ") + logger.info("[Phase 4] : R1 is UP and GR state is correct ") for addr_type in ADDR_TYPES: result = verify_graceful_restart( diff --git a/tests/topotests/conftest.py b/tests/topotests/conftest.py index 4224b037b0..8e4d13df7d 100755 --- a/tests/topotests/conftest.py +++ b/tests/topotests/conftest.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 eval: (blacken-mode 1) -*- """ Topotest conftest.py file. """ @@ -5,25 +6,24 @@ Topotest conftest.py file. import glob import os -import pdb import re import resource import subprocess import sys import time -import pytest - import lib.fixtures -from lib import topolog -from lib.micronet_compat import Mininet +import pytest +from lib.micronet_compat import ConfigOptionsProxy, Mininet from lib.topogen import diagnose_env, get_topogen from lib.topolog import logger -from lib.topotest import g_extra_config as topotest_extra_config from lib.topotest import json_cmp_result +from munet import cli from munet.base import Commander, proc_error from munet.cleanup import cleanup_current, cleanup_previous -from munet import cli +from munet.testing.util import pause_test + +from lib import topolog, topotest def pytest_addoption(parser): @@ -61,12 +61,28 @@ def pytest_addoption(parser): help="Comma-separated list of routers to spawn gdb on, or 'all'", ) + parser.addoption( + "--logd", + action="append", + metavar="DAEMON[,ROUTER[,...]", + help=( + "Tail-F of DAEMON log file. Specify routers in comma-separated list after " + "daemon to limit to a subset of routers" + ), + ) + parser.addoption( "--pause", action="store_true", help="Pause after each test", ) + parser.addoption( + "--pause-at-end", + action="store_true", + help="Pause before taking munet down", + ) + parser.addoption( "--pause-on-error", action="store_true", @@ -80,6 +96,13 @@ def pytest_addoption(parser): help="Do not pause after (disables default when --shell or -vtysh given)", ) + parser.addoption( + "--pcap", + default="", + metavar="NET[,NET...]", + help="Comma-separated list of networks to capture packets on, or 'all'", + ) + rundir_help = "directory for running in and log files" parser.addini("rundir", rundir_help, default="/tmp/topotests") parser.addoption("--rundir", metavar="DIR", help=rundir_help) @@ -135,7 +158,7 @@ def pytest_addoption(parser): def check_for_memleaks(): - assert topotest_extra_config["valgrind_memleaks"] + assert topotest.g_pytest_config.option.valgrind_memleaks leaks = [] tgen = get_topogen() # pylint: disable=redefined-outer-name @@ -179,16 +202,15 @@ def check_for_memleaks(): @pytest.fixture(autouse=True, scope="module") def module_check_memtest(request): - del request # disable unused warning yield - if topotest_extra_config["valgrind_memleaks"]: + if request.config.option.valgrind_memleaks: if get_topogen() is not None: check_for_memleaks() def pytest_runtest_logstart(nodeid, location): # location is (filename, lineno, testname) - topolog.logstart(nodeid, location, topotest_extra_config["rundir"]) + topolog.logstart(nodeid, location, topotest.g_pytest_config.option.rundir) def pytest_runtest_logfinish(nodeid, location): @@ -199,10 +221,9 @@ def pytest_runtest_logfinish(nodeid, location): @pytest.hookimpl(hookwrapper=True) def pytest_runtest_call(item: pytest.Item) -> None: "Hook the function that is called to execute the test." - del item # disable unused warning # For topology only run the CLI then exit - if topotest_extra_config["topology_only"]: + if item.config.option.topology_only: get_topogen().cli() pytest.exit("exiting after --topology-only") @@ -210,7 +231,7 @@ def pytest_runtest_call(item: pytest.Item) -> None: yield # Check for leaks if requested - if topotest_extra_config["valgrind_memleaks"]: + if item.config.option.valgrind_memleaks: check_for_memleaks() @@ -233,6 +254,7 @@ def pytest_configure(config): """ Assert that the environment is correctly configured, and get extra config. """ + topotest.g_pytest_config = ConfigOptionsProxy(config) if config.getoption("--collect-only"): return @@ -254,11 +276,13 @@ def pytest_configure(config): # Set some defaults for the pytest.ini [pytest] section # --------------------------------------------------- - rundir = config.getoption("--rundir") + rundir = config.option.rundir if not rundir: rundir = config.getini("rundir") if not rundir: rundir = "/tmp/topotests" + config.option.rundir = rundir + if not config.getoption("--junitxml"): config.option.xmlpath = os.path.join(rundir, "topotests.xml") xmlpath = config.option.xmlpath @@ -271,8 +295,6 @@ def pytest_configure(config): mv_path = commander.get_exec_path("mv") commander.cmd_status([mv_path, xmlpath, xmlpath + suffix]) - topotest_extra_config["rundir"] = rundir - # Set the log_file (exec) to inside the rundir if not specified if not config.getoption("--log-file") and not config.getini("log_file"): config.option.log_file = os.path.join(rundir, "exec.log") @@ -302,69 +324,19 @@ def pytest_configure(config): elif b and not is_xdist and not have_windows: pytest.exit("{} use requires byobu/TMUX/SCREEN/XTerm".format(feature)) - # --------------------------------------- - # Record our options in global dictionary - # --------------------------------------- + # + # Check for window capability if given options that require window + # + assert_feature_windows(config.option.gdb_routers, "GDB") + assert_feature_windows(config.option.gdb_daemons, "GDB") + assert_feature_windows(config.option.cli_on_error, "--cli-on-error") + assert_feature_windows(config.option.shell, "--shell") + assert_feature_windows(config.option.shell_on_error, "--shell-on-error") + assert_feature_windows(config.option.vtysh, "--vtysh") + assert_feature_windows(config.option.vtysh_on_error, "--vtysh-on-error") - topotest_extra_config["rundir"] = rundir - - asan_abort = config.getoption("--asan-abort") - topotest_extra_config["asan_abort"] = asan_abort - - gdb_routers = config.getoption("--gdb-routers") - gdb_routers = gdb_routers.split(",") if gdb_routers else [] - topotest_extra_config["gdb_routers"] = gdb_routers - - gdb_daemons = config.getoption("--gdb-daemons") - gdb_daemons = gdb_daemons.split(",") if gdb_daemons else [] - topotest_extra_config["gdb_daemons"] = gdb_daemons - assert_feature_windows(gdb_routers or gdb_daemons, "GDB") - - gdb_breakpoints = config.getoption("--gdb-breakpoints") - gdb_breakpoints = gdb_breakpoints.split(",") if gdb_breakpoints else [] - topotest_extra_config["gdb_breakpoints"] = gdb_breakpoints - - cli_on_error = config.getoption("--cli-on-error") - topotest_extra_config["cli_on_error"] = cli_on_error - assert_feature_windows(cli_on_error, "--cli-on-error") - - shell = config.getoption("--shell") - topotest_extra_config["shell"] = shell.split(",") if shell else [] - assert_feature_windows(shell, "--shell") - - strace = config.getoption("--strace-daemons") - topotest_extra_config["strace_daemons"] = strace.split(",") if strace else [] - - shell_on_error = config.getoption("--shell-on-error") - topotest_extra_config["shell_on_error"] = shell_on_error - assert_feature_windows(shell_on_error, "--shell-on-error") - - topotest_extra_config["valgrind_extra"] = config.getoption("--valgrind-extra") - topotest_extra_config["valgrind_memleaks"] = config.getoption("--valgrind-memleaks") - - vtysh = config.getoption("--vtysh") - topotest_extra_config["vtysh"] = vtysh.split(",") if vtysh else [] - assert_feature_windows(vtysh, "--vtysh") - - vtysh_on_error = config.getoption("--vtysh-on-error") - topotest_extra_config["vtysh_on_error"] = vtysh_on_error - assert_feature_windows(vtysh_on_error, "--vtysh-on-error") - - pause_on_error = vtysh or shell or config.getoption("--pause-on-error") - if config.getoption("--no-pause-on-error"): - pause_on_error = False - - topotest_extra_config["pause_on_error"] = pause_on_error - assert_feature_windows(pause_on_error, "--pause-on-error") - - pause = config.getoption("--pause") - topotest_extra_config["pause"] = pause - assert_feature_windows(pause, "--pause") - - topology_only = config.getoption("--topology-only") - if topology_only and is_xdist: + if config.option.topology_only and is_xdist: pytest.exit("Cannot use --topology-only with distributed test mode") - topotest_extra_config["topology_only"] = topology_only # Check environment now that we have config if not diagnose_env(rundir): @@ -430,10 +402,7 @@ def pytest_runtest_makereport(item, call): # We want to pause, if requested, on any error not just test cases # (e.g., call.when == "setup") if not pause: - pause = ( - topotest_extra_config["pause_on_error"] - or topotest_extra_config["pause"] - ) + pause = item.config.option.pause_on_error or item.config.option.pause # (topogen) Set topology error to avoid advancing in the test. tgen = get_topogen() # pylint: disable=redefined-outer-name @@ -445,9 +414,9 @@ def pytest_runtest_makereport(item, call): isatty = sys.stdout.isatty() error_cmd = None - if error and topotest_extra_config["vtysh_on_error"]: + if error and item.config.option.vtysh_on_error: error_cmd = commander.get_exec_path(["vtysh"]) - elif error and topotest_extra_config["shell_on_error"]: + elif error and item.config.option.shell_on_error: error_cmd = os.getenv("SHELL", commander.get_exec_path(["bash"])) if error_cmd: @@ -499,7 +468,7 @@ def pytest_runtest_makereport(item, call): if p.wait(): logger.warning("xterm proc failed: %s:", proc_error(p, o, e)) - if error and topotest_extra_config["cli_on_error"]: + if error and item.config.option.cli_on_error: # Really would like something better than using this global here. # Not all tests use topogen though so get_topogen() won't work. if Mininet.g_mnet_inst: @@ -507,23 +476,8 @@ def pytest_runtest_makereport(item, call): else: logger.error("Could not launch CLI b/c no mininet exists yet") - while pause and isatty: - try: - user = raw_input( - 'PAUSED, "cli" for CLI, "pdb" to debug, "Enter" to continue: ' - ) - except NameError: - user = input('PAUSED, "cli" for CLI, "pdb" to debug, "Enter" to continue: ') - user = user.strip() - - if user == "cli": - cli.cli(Mininet.g_mnet_inst) - elif user == "pdb": - pdb.set_trace() # pylint: disable=forgotten-debug-statement - elif user: - print('Unrecognized input: "%s"' % user) - else: - break + if pause and isatty: + pause_test() # diff --git a/tests/topotests/lib/micronet_compat.py b/tests/topotests/lib/micronet_compat.py index 211d61fb6d..ccb3e56b61 100644 --- a/tests/topotests/lib/micronet_compat.py +++ b/tests/topotests/lib/micronet_compat.py @@ -12,6 +12,49 @@ from munet import cli from munet.base import BaseMunet, LinuxNamespace +def cli_opt_list(option_list): + if not option_list: + return [] + if isinstance(option_list, str): + return [x for x in option_list.split(",") if x] + return [x for x in option_list if x] + + +def name_in_cli_opt_str(name, option_list): + ol = cli_opt_list(option_list) + return name in ol or "all" in ol + + +class ConfigOptionsProxy: + def __init__(self, pytestconfig=None): + if isinstance(pytestconfig, ConfigOptionsProxy): + self.config = pytestconfig.config + else: + self.config = pytestconfig + self.option = self.config.option + + def getoption(self, opt, defval=None): + if not self.config: + return defval + + value = self.config.getoption(opt) + if value is None: + return defval + + return value + + def get_option(self, opt, defval=None): + return self.getoption(opt, defval) + + def get_option_list(self, opt): + value = self.get_option(opt, "") + return cli_opt_list(value) + + def name_in_option_list(self, name, opt): + optlist = self.get_option_list(opt) + return "all" in optlist or name in optlist + + class Node(LinuxNamespace): """Node (mininet compat).""" @@ -23,6 +66,8 @@ class Node(LinuxNamespace): nkwargs["unet"] = kwargs["unet"] if "private_mounts" in kwargs: nkwargs["private_mounts"] = kwargs["private_mounts"] + if "logger" in kwargs: + nkwargs["logger"] = kwargs["logger"] # This is expected by newer munet CLI code self.config_dirname = "" @@ -120,7 +165,7 @@ class Mininet(BaseMunet): g_mnet_inst = None - def __init__(self, rundir=None): + def __init__(self, rundir=None, pytestconfig=None): """ Create a Micronet. """ @@ -132,6 +177,8 @@ class Mininet(BaseMunet): self.host_params = {} self.prefix_len = 8 + self.cfgopt = ConfigOptionsProxy(pytestconfig) + # SNMPd used to require this, which was set int he mininet shell # that all commands executed from. This is goofy default so let's not # do it if we don't have to. The snmpd.conf files have been updated @@ -268,12 +315,9 @@ ff02::2\tip6-allrouters cli.add_cli_config(self, cdict) - # shellopt = ( - # self.pytest_config.getoption("--shell") if self.pytest_config else None - # ) - # shellopt = shellopt if shellopt is not None else "" - # if shellopt == "all" or "." in shellopt.split(","): - # self.run_in_window("bash") + shellopt = self.cfgopt.get_option_list("--shell") + if "all" in shellopt or "." in shellopt: + self.run_in_window("bash") # This is expected by newer munet CLI code self.config_dirname = "" @@ -335,6 +379,24 @@ ff02::2\tip6-allrouters def start(self): """Start the micronet topology.""" + pcapopt = self.cfgopt.get_option_list("--pcap") + if "all" in pcapopt: + pcapopt = self.switches.keys() + for pcap in pcapopt: + if ":" in pcap: + host, intf = pcap.split(":") + pcap = f"{host}-{intf}" + host = self.hosts[host] + else: + host = self + intf = pcap + pcapfile = f"{self.rundir}/capture-{pcap}.pcap" + host.run_in_window( + f"tshark -s 9200 -i {intf} -P -w {pcapfile}", + background=True, + title=f"cap:{pcap}", + ) + self.logger.debug("%s: Starting (no-op).", self) def stop(self): diff --git a/tests/topotests/lib/topogen.py b/tests/topotests/lib/topogen.py index 7843063165..3583ce17ed 100644 --- a/tests/topotests/lib/topogen.py +++ b/tests/topotests/lib/topogen.py @@ -25,6 +25,7 @@ Basic usage instructions: * After running stop Mininet with: tgen.stop_topology() """ +import configparser import grp import inspect import json @@ -38,16 +39,11 @@ import subprocess import sys from collections import OrderedDict -if sys.version_info[0] > 2: - import configparser -else: - import ConfigParser as configparser - import lib.topolog as topolog from lib.micronet import Commander from lib.micronet_compat import Mininet from lib.topolog import logger -from lib.topotest import g_extra_config +from munet.testing.util import pause_test from lib import topotest @@ -193,7 +189,7 @@ class Topogen(object): self._load_config() # Create new log directory - self.logdir = topotest.get_logs_path(g_extra_config["rundir"]) + self.logdir = topotest.get_logs_path(topotest.g_pytest_config.option.rundir) subprocess.check_call( "mkdir -p {0} && chmod 1777 {0}".format(self.logdir), shell=True ) @@ -213,7 +209,7 @@ class Topogen(object): # Mininet(Micronet) to build the actual topology. assert not inspect.isclass(topodef) - self.net = Mininet(rundir=self.logdir) + self.net = Mininet(rundir=self.logdir, pytestconfig=topotest.g_pytest_config) # Adjust the parent namespace topotest.fix_netns_limits(self.net) @@ -455,7 +451,18 @@ class Topogen(object): first is a simple kill with no sleep, the second will sleep if not killed and try with a different signal. """ + pause = bool(self.net.cfgopt.get_option("--pause-at-end")) + pause = pause or bool(self.net.cfgopt.get_option("--pause")) + if pause: + try: + pause_test("Before MUNET delete") + except KeyboardInterrupt: + print("^C...continuing") + except Exception as error: + self.logger.error("\n...continuing after error: %s", error) + logger.info("stopping topology: {}".format(self.modname)) + errors = "" for gear in self.gears.values(): errors += gear.stop() diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index 2686373ad1..93daf0bd8c 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -9,13 +9,13 @@ # Network Device Education Foundation, Inc. ("NetDEF") # +import configparser import difflib import errno import functools import glob import json import os -import pdb import platform import re import resource @@ -24,22 +24,17 @@ import subprocess import sys import tempfile import time +from collections.abc import Mapping from copy import deepcopy import lib.topolog as topolog +from lib.micronet_compat import Node from lib.topolog import logger - -if sys.version_info[0] > 2: - import configparser - from collections.abc import Mapping -else: - import ConfigParser as configparser - from collections import Mapping +from munet.base import Timeout from lib import micronet -from lib.micronet_compat import Node -g_extra_config = {} +g_pytest_config = None def get_logs_path(rundir): @@ -474,32 +469,6 @@ def int2dpid(dpid): ) -def pid_exists(pid): - "Check whether pid exists in the current process table." - - if pid <= 0: - return False - try: - os.waitpid(pid, os.WNOHANG) - except: - pass - try: - os.kill(pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - # ESRCH == No such process - return False - elif err.errno == errno.EPERM: - # EPERM clearly means there's a process to deny access to - return True - else: - # According to "man 2 kill" possible error values are - # (EINVAL, EPERM, ESRCH) - raise - else: - return True - - def get_textdiff(text1, text2, title1="", title2="", **opts): "Returns empty string if same or formatted diff" @@ -1086,7 +1055,7 @@ def checkAddressSanitizerError(output, router, component, logdir=""): # No Address Sanitizer Error in Output. Now check for AddressSanitizer daemon file if logdir: - filepattern = logdir + "/" + router + "/" + component + ".asan.*" + filepattern = logdir + "/" + router + ".asan." + component + ".*" logger.debug( "Log check for %s on %s, pattern %s\n" % (component, router, filepattern) ) @@ -1303,7 +1272,8 @@ def fix_host_limits(): def setup_node_tmpdir(logdir, name): # Cleanup old log, valgrind, and core files. subprocess.check_call( - "rm -rf {0}/{1}.valgrind.* {1}.*.asan {0}/{1}/".format(logdir, name), shell=True + "rm -rf {0}/{1}.valgrind.* {0}/{1}.asan.* {0}/{1}/".format(logdir, name), + shell=True, ) # Setup the per node directory. @@ -1339,7 +1309,7 @@ class Router(Node): # specified, then attempt to generate an unique logdir. self.logdir = params.get("logdir") if self.logdir is None: - self.logdir = get_logs_path(g_extra_config["rundir"]) + self.logdir = get_logs_path(g_pytest_config.getoption("--rundir")) if not params.get("logger"): # If logger is present topogen has already set this up @@ -1532,7 +1502,7 @@ class Router(Node): ) except Exception as ex: logger.error("%s can't remove IPs %s", self, str(ex)) - # pdb.set_trace() + # breakpoint() # assert False, "can't remove IPs %s" % str(ex) def checkCapability(self, daemon, param): @@ -1598,10 +1568,7 @@ class Router(Node): if (daemon == "zebra") and (self.daemons["mgmtd"] == 0): # Add mgmtd with zebra - if it exists - try: - mgmtd_path = os.path.join(self.daemondir, "mgmtd") - except: - pdb.set_trace() + mgmtd_path = os.path.join(self.daemondir, "mgmtd") if os.path.isfile(mgmtd_path): self.daemons["mgmtd"] = 1 self.daemons_options["mgmtd"] = "" @@ -1609,11 +1576,7 @@ class Router(Node): if (daemon == "zebra") and (self.daemons["staticd"] == 0): # Add staticd with zebra - if it exists - try: - staticd_path = os.path.join(self.daemondir, "staticd") - except: - pdb.set_trace() - + staticd_path = os.path.join(self.daemondir, "staticd") if os.path.isfile(staticd_path): self.daemons["staticd"] = 1 self.daemons_options["staticd"] = "" @@ -1688,8 +1651,7 @@ class Router(Node): # used self.cmd("echo 100000 > /proc/sys/net/mpls/platform_labels") - shell_routers = g_extra_config["shell"] - if "all" in shell_routers or self.name in shell_routers: + if g_pytest_config.name_in_option_list(self.name, "--shell"): self.run_in_window(os.getenv("SHELL", "bash"), title="sh-%s" % self.name) if self.daemons["eigrpd"] == 1: @@ -1706,8 +1668,7 @@ class Router(Node): status = self.startRouterDaemons(tgen=tgen) - vtysh_routers = g_extra_config["vtysh"] - if "all" in vtysh_routers or self.name in vtysh_routers: + if g_pytest_config.name_in_option_list(self.name, "--vtysh"): self.run_in_window("vtysh", title="vt-%s" % self.name) if self.unified_config: @@ -1727,13 +1688,13 @@ class Router(Node): def startRouterDaemons(self, daemons=None, tgen=None): "Starts FRR daemons for this router." - asan_abort = g_extra_config["asan_abort"] - gdb_breakpoints = g_extra_config["gdb_breakpoints"] - gdb_daemons = g_extra_config["gdb_daemons"] - gdb_routers = g_extra_config["gdb_routers"] - valgrind_extra = g_extra_config["valgrind_extra"] - valgrind_memleaks = g_extra_config["valgrind_memleaks"] - strace_daemons = g_extra_config["strace_daemons"] + asan_abort = bool(g_pytest_config.option.asan_abort) + gdb_breakpoints = g_pytest_config.get_option_list("--gdb-breakpoints") + gdb_daemons = g_pytest_config.get_option_list("--gdb-daemons") + gdb_routers = g_pytest_config.get_option_list("--gdb-routers") + valgrind_extra = bool(g_pytest_config.option.valgrind_extra) + valgrind_memleaks = bool(g_pytest_config.option.valgrind_memleaks) + strace_daemons = g_pytest_config.get_option_list("--strace-daemons") # Get global bundle data if not self.path_exists("/etc/frr/support_bundle_commands.conf"): @@ -1757,11 +1718,21 @@ class Router(Node): self.reportCores = True # XXX: glue code forward ported from removed function. - if self.version == None: + if self.version is None: self.version = self.cmd( os.path.join(self.daemondir, "bgpd") + " -v" ).split()[2] logger.info("{}: running version: {}".format(self.name, self.version)) + + logd_options = {} + for logd in g_pytest_config.get_option("--logd", []): + if "," in logd: + daemon, routers = logd.split(",", 1) + logd_options[daemon] = routers.split(",") + else: + daemon = logd + logd_options[daemon] = ["all"] + # If `daemons` was specified then some upper API called us with # specific daemons, otherwise just use our own configuration. daemons_list = [] @@ -1773,22 +1744,36 @@ class Router(Node): if self.daemons[daemon] == 1: daemons_list.append(daemon) + tail_log_files = [] + check_daemon_files = [] + def start_daemon(daemon, extra_opts=None): daemon_opts = self.daemons_options.get(daemon, "") + + # get pid and vty filenames and remove the files + m = re.match(r"(.* |^)-n (\d+)( ?.*|$)", daemon_opts) + dfname = daemon if not m else "{}-{}".format(daemon, m.group(2)) + runbase = "/var/run/{}/{}".format(self.routertype, dfname) + # If this is a new system bring-up remove the pid/vty files, otherwise + # do not since apparently presence of the pidfile impacts BGP GR + self.cmd_status("rm -f {0}.pid {0}.vty".format(runbase)) + rediropt = " > {0}.out 2> {0}.err".format(daemon) if daemon == "snmpd": binary = "/usr/sbin/snmpd" cmdenv = "" cmdopt = "{} -C -c /etc/frr/snmpd.conf -p ".format( daemon_opts - ) + "/var/run/{}/snmpd.pid -x /etc/frr/agentx".format(self.routertype) + ) + "{}.pid -x /etc/frr/agentx".format(runbase) + # check_daemon_files.append(runbase + ".pid") else: binary = os.path.join(self.daemondir, daemon) + check_daemon_files.extend([runbase + ".pid", runbase + ".vty"]) cmdenv = "ASAN_OPTIONS=" if asan_abort: - cmdenv = "abort_on_error=1:" - cmdenv += "log_path={0}/{1}.{2}.asan ".format( + cmdenv += "abort_on_error=1:" + cmdenv += "log_path={0}/{1}.asan.{2} ".format( self.logdir, self.name, daemon ) @@ -1811,9 +1796,16 @@ class Router(Node): daemon, self.logdir, self.name ) - cmdopt = "{} --command-log-always --log file:{}.log --log-level debug".format( - daemon_opts, daemon - ) + cmdopt = "{} --command-log-always ".format(daemon_opts) + cmdopt += "--log file:{}.log --log-level debug".format(daemon) + + if daemon in logd_options: + logdopt = logd_options[daemon] + if "all" in logdopt or self.name in logdopt: + tail_log_files.append( + "{}/{}/{}.log".format(self.logdir, self.name, daemon) + ) + if extra_opts: cmdopt += " " + extra_opts @@ -1840,8 +1832,6 @@ class Router(Node): logger.info( "%s: %s %s launched in gdb window", self, self.routertype, daemon ) - # Need better check for daemons running. - time.sleep(5) else: if daemon != "snmpd": cmdopt += " -d " @@ -1900,16 +1890,50 @@ class Router(Node): start_daemon(daemon) # Check if daemons are running. - rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype) - if re.search(r"No such file or directory", rundaemons): - return "Daemons are not running" + wait_time = 30 if (gdb_routers or gdb_daemons) else 10 + timeout = Timeout(wait_time) + for remaining in timeout: + if not check_daemon_files: + break + check = check_daemon_files[0] + if self.path_exists(check): + check_daemon_files.pop(0) + continue + self.logger.debug("Waiting {}s for {} to appear".format(remaining, check)) + time.sleep(0.5) + + if check_daemon_files: + assert False, "Timeout({}) waiting for {} to appear on {}".format( + wait_time, check_daemon_files[0], self.name + ) # Update the permissions on the log files self.cmd("chown frr:frr -R {}/{}".format(self.logdir, self.name)) self.cmd("chmod ug+rwX,o+r -R {}/{}".format(self.logdir, self.name)) + if "frr" in logd_options: + logdopt = logd_options["frr"] + if "all" in logdopt or self.name in logdopt: + tail_log_files.append("{}/{}/frr.log".format(self.logdir, self.name)) + + for tailf in tail_log_files: + self.run_in_window("tail -f " + tailf, title=tailf, background=True) + return "" + def pid_exists(self, pid): + if pid <= 0: + return False + try: + # If we are not using PID namespaces then we will be a parent of the pid, + # otherwise the init process of the PID namespace will have reaped the proc. + os.waitpid(pid, os.WNOHANG) + except Exception: + pass + + rc, o, e = self.cmd_status("kill -0 " + str(pid), warn=False) + return rc == 0 or "No such process" not in e + def killRouterDaemons( self, daemons, wait=True, assertOnError=True, minErrorVersion="5.1" ): @@ -1929,15 +1953,15 @@ class Router(Node): if re.search(r"%s" % daemon, d): daemonpidfile = d.rstrip() daemonpid = self.cmd("cat %s" % daemonpidfile).rstrip() - if daemonpid.isdigit() and pid_exists(int(daemonpid)): + if daemonpid.isdigit() and self.pid_exists(int(daemonpid)): logger.debug( "{}: killing {}".format( self.name, os.path.basename(daemonpidfile.rsplit(".", 1)[0]), ) ) - os.kill(int(daemonpid), signal.SIGKILL) - if pid_exists(int(daemonpid)): + self.cmd_status("kill -KILL {}".format(daemonpid)) + if self.pid_exists(int(daemonpid)): numRunning += 1 while wait and numRunning > 0: sleep( @@ -1951,7 +1975,7 @@ class Router(Node): for d in dmns[:-1]: if re.search(r"%s" % daemon, d): daemonpid = self.cmd("cat %s" % d.rstrip()).rstrip() - if daemonpid.isdigit() and pid_exists( + if daemonpid.isdigit() and self.pid_exists( int(daemonpid) ): logger.info( @@ -1962,8 +1986,10 @@ class Router(Node): ), ) ) - os.kill(int(daemonpid), signal.SIGKILL) - if daemonpid.isdigit() and not pid_exists( + self.cmd_status( + "kill -KILL {}".format(daemonpid) + ) + if daemonpid.isdigit() and not self.pid_exists( int(daemonpid) ): numRunning -= 1 diff --git a/tests/topotests/munet/cli.py b/tests/topotests/munet/cli.py index 45db3990c7..f58ea99d67 100644 --- a/tests/topotests/munet/cli.py +++ b/tests/topotests/munet/cli.py @@ -24,8 +24,17 @@ import tempfile import termios import tty -from . import linux -from .config import list_to_dict_with_key + +try: + from . import linux + from .config import list_to_dict_with_key +except ImportError: + # We cannot use relative imports and still run this module directly as a script, and + # there are some use cases where we want to run this file as a script. + sys.path.append(os.path.dirname(os.path.realpath(__file__))) + import linux + + from config import list_to_dict_with_key ENDMARKER = b"\x00END\x00" @@ -609,7 +618,16 @@ async def doline( return True await run_command( - unet, outf, nline, execfmt, banner, hosts, toplevel, kinds, ns_only, interactive + unet, + outf, + nline, + execfmt, + banner, + hosts, + toplevel, + kinds, + ns_only, + interactive, ) return True @@ -657,10 +675,10 @@ async def cli_client(sockpath, prompt="munet> "): async def local_cli(unet, outf, prompt, histfile, background): """Implement the user-side CLI for local munet.""" - if unet: - completer = Completer(unet) - readline.parse_and_bind("tab: complete") - readline.set_completer(completer.complete) + assert unet is not None + completer = Completer(unet) + readline.parse_and_bind("tab: complete") + readline.set_completer(completer.complete) print("\n--- Munet CLI Starting ---\n\n") while True: @@ -669,8 +687,6 @@ async def local_cli(unet, outf, prompt, histfile, background): if line is None: return - assert unet is not None - if not await doline(unet, line, outf, background): return except KeyboardInterrupt: @@ -706,10 +722,19 @@ async def cli_client_connected(unet, background, reader, writer): break line = line.decode("utf-8").strip() - # def writef(x): - # writer.write(x.encode("utf-8")) + class EncodingFile: + """Wrap a writer to encode in utf-8.""" - if not await doline(unet, line, writer, background, notty=True): + def __init__(self, writer): + self.writer = writer + + def write(self, x): + self.writer.write(x.encode("utf-8")) + + def flush(self): + self.writer.flush() + + if not await doline(unet, line, EncodingFile(writer), background, notty=True): logging.debug("server closing cli connection") return diff --git a/tests/topotests/munet/mutini.py b/tests/topotests/munet/mutini.py index e805f3245d..e5f9931714 100755 --- a/tests/topotests/munet/mutini.py +++ b/tests/topotests/munet/mutini.py @@ -296,8 +296,12 @@ def be_init(new_pg, exec_args): # No exec so we are the "child". new_process_group() + # Reap children as init process + vdebug("installing local handler for SIGCHLD") + signal.signal(signal.SIGCHLD, sig_sigchld) + while True: - logging.info("parent: waiting to reap zombies") + logging.info("init: waiting to reap zombies") linux.pause() # NOTREACHED