From 6ec742d95fc444457d27f5d5b59c6045550fe722 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 19 May 2022 15:54:59 -0400 Subject: [PATCH 1/3] tests: Ensure routes are not queued when calling verify_rib Lots of tests call verify_rib that takes a list of routes that need to be verified in some fashion. This verify_rib functionality will try up to 12 seconds before failing the check that zebra has the route and has installed it. Unfortunately the verify_rib code was not looking to see if the route was queued for installation and was then allowing tests to immediately do subsuquent steps that depended on that route actually being installed sometimes causing tests to fail. Write a bit of additional code that looks at the queued status and allows the test to wait a bit longer for zebra to finish processing before allowing the test to move on to the next bit. Signed-off-by: Donald Sharp --- tests/topotests/lib/common_config.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/topotests/lib/common_config.py b/tests/topotests/lib/common_config.py index 5a5c7e3df4..5c8b760544 100644 --- a/tests/topotests/lib/common_config.py +++ b/tests/topotests/lib/common_config.py @@ -3383,6 +3383,10 @@ def verify_rib( st_found = True found_routes.append(st_rt) + if "queued" in rib_routes_json[st_rt][0]: + errormsg = "Route {} is queued\n".format(st_rt) + return errormsg + if fib and next_hop: if type(next_hop) is not list: next_hop = [next_hop] @@ -3607,6 +3611,10 @@ def verify_rib( st_found = True found_routes.append(st_rt) + if "queued" in rib_routes_json[st_rt][0]: + errormsg = "Route {} is queued\n".format(st_rt) + return errormsg + if next_hop: if type(next_hop) is not list: next_hop = [next_hop] From cb8018f4c3f07cd88946c1d248af34de15eaacc4 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 19 May 2022 19:08:39 -0400 Subject: [PATCH 2/3] tests: Make verify_rib wait up to 40 seconds. Under heavy load I am seeing verify_rib failing after 12 seconds but succeeding after 17: 2022-05-19 18:52:54,374 DEBUG: topolog: Exiting lib API: verify_rib 2022-05-19 18:52:54,374 DEBUG: topolog: Function returned True 2022-05-19 18:52:54,374 WARNING: topolog: RETRY DIAGNOSTIC: SUCCEED after FAILED with requested timeout of 12.0s; however, succeeded in 14.7s, investigate timeout timing There is no reason to not have the test wait a bit longer for very very heavily loaded systems. Change the time to 40 seconds. Signed-off-by: Donald Sharp --- tests/topotests/lib/common_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/topotests/lib/common_config.py b/tests/topotests/lib/common_config.py index 5c8b760544..3f70dbddf7 100644 --- a/tests/topotests/lib/common_config.py +++ b/tests/topotests/lib/common_config.py @@ -3245,7 +3245,7 @@ def configure_interface_mac(tgen, input_dict): ############################################# # Verification APIs ############################################# -@retry(retry_timeout=12) +@retry(retry_timeout=40) def verify_rib( tgen, addr_type, From 006ef829d84f3d9d0a2bee486af1aca987dec36c Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 19 May 2022 19:28:43 -0400 Subject: [PATCH 3/3] tests: Allow a bit longer for bfd topo tests to synchronize Allowing only 4 seconds for a bfd test to synchronize is going to run into problems on extremely loaded systems. The test system should value it actually converged over it actually converged in a reasonable time, especially on test systems that are loaded because of many multiples of tests running at the same time. If it is important to actually test that something got done by the RFC, the CI system as it is currently written is not the correct place for this. Signed-off-by: Donald Sharp --- tests/topotests/bfd_topo1/test_bfd_topo1.py | 2 +- tests/topotests/bfd_topo2/test_bfd_topo2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/topotests/bfd_topo1/test_bfd_topo1.py b/tests/topotests/bfd_topo1/test_bfd_topo1.py index adf02b02d4..c9020f16d3 100644 --- a/tests/topotests/bfd_topo1/test_bfd_topo1.py +++ b/tests/topotests/bfd_topo1/test_bfd_topo1.py @@ -100,7 +100,7 @@ def test_bfd_connection(): test_func = partial( topotest.router_json_cmp, router, "show bfd peers json", expected ) - _, result = topotest.run_and_expect(test_func, None, count=8, wait=0.5) + _, result = topotest.run_and_expect(test_func, None, count=30, wait=0.5) assertmsg = '"{}" JSON output mismatches'.format(router.name) assert result is None, assertmsg diff --git a/tests/topotests/bfd_topo2/test_bfd_topo2.py b/tests/topotests/bfd_topo2/test_bfd_topo2.py index 57ce0cdf09..a9b9358ef0 100644 --- a/tests/topotests/bfd_topo2/test_bfd_topo2.py +++ b/tests/topotests/bfd_topo2/test_bfd_topo2.py @@ -144,7 +144,7 @@ def test_bfd_connection(): test_func = partial( topotest.router_json_cmp, router, "show bfd peers json", expected ) - _, result = topotest.run_and_expect(test_func, None, count=8, wait=0.5) + _, result = topotest.run_and_expect(test_func, None, count=30, wait=0.5) assertmsg = '"{}" JSON output mismatches'.format(router.name) assert result is None, assertmsg