From 6ec742d95fc444457d27f5d5b59c6045550fe722 Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@nvidia.com>
Date: Thu, 19 May 2022 15:54:59 -0400
Subject: [PATCH 1/3] tests: Ensure routes are not queued when calling
 verify_rib

Lots of tests call verify_rib that takes a list of routes that
need to be verified in some fashion.  This verify_rib functionality
will try up to 12 seconds before failing the check that zebra
has the route and has installed it.

Unfortunately the verify_rib code was not looking to see if
the route was queued for installation and was then allowing
tests to immediately do subsuquent steps that depended on
that route actually being installed sometimes causing tests
to fail.

Write a bit of additional code that looks at the queued
status and allows the test to wait a bit longer for zebra
to finish processing before allowing the test to move on
to the next bit.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
---
 tests/topotests/lib/common_config.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/topotests/lib/common_config.py b/tests/topotests/lib/common_config.py
index 5a5c7e3df4..5c8b760544 100644
--- a/tests/topotests/lib/common_config.py
+++ b/tests/topotests/lib/common_config.py
@@ -3383,6 +3383,10 @@ def verify_rib(
                             st_found = True
                             found_routes.append(st_rt)
 
+                            if "queued" in rib_routes_json[st_rt][0]:
+                                errormsg = "Route {} is queued\n".format(st_rt)
+                                return errormsg
+
                             if fib and next_hop:
                                 if type(next_hop) is not list:
                                     next_hop = [next_hop]
@@ -3607,6 +3611,10 @@ def verify_rib(
                         st_found = True
                         found_routes.append(st_rt)
 
+                        if "queued" in rib_routes_json[st_rt][0]:
+                            errormsg = "Route {} is queued\n".format(st_rt)
+                            return errormsg
+
                         if next_hop:
                             if type(next_hop) is not list:
                                 next_hop = [next_hop]

From cb8018f4c3f07cd88946c1d248af34de15eaacc4 Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@nvidia.com>
Date: Thu, 19 May 2022 19:08:39 -0400
Subject: [PATCH 2/3] tests: Make verify_rib wait up to 40 seconds.

Under heavy load I am seeing verify_rib failing after 12 seconds
but succeeding after 17:

2022-05-19 18:52:54,374 DEBUG: topolog: Exiting lib API: verify_rib
2022-05-19 18:52:54,374 DEBUG: topolog: Function returned True
2022-05-19 18:52:54,374 WARNING: topolog: RETRY DIAGNOSTIC: SUCCEED after FAILED with requested timeout of 12.0s; however, succeeded in 14.7s, investigate timeout timing

There is no reason to not have the test wait a bit longer for very very
heavily loaded systems.  Change the time to 40 seconds.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
---
 tests/topotests/lib/common_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/topotests/lib/common_config.py b/tests/topotests/lib/common_config.py
index 5c8b760544..3f70dbddf7 100644
--- a/tests/topotests/lib/common_config.py
+++ b/tests/topotests/lib/common_config.py
@@ -3245,7 +3245,7 @@ def configure_interface_mac(tgen, input_dict):
 #############################################
 # Verification APIs
 #############################################
-@retry(retry_timeout=12)
+@retry(retry_timeout=40)
 def verify_rib(
     tgen,
     addr_type,

From 006ef829d84f3d9d0a2bee486af1aca987dec36c Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@nvidia.com>
Date: Thu, 19 May 2022 19:28:43 -0400
Subject: [PATCH 3/3] tests: Allow a bit longer for bfd topo tests to
 synchronize

Allowing only 4 seconds for a bfd test to synchronize is going
to run into problems on extremely loaded systems.  The test
system should value it actually converged over it actually
converged in a reasonable time, especially on test systems
that are loaded because of many multiples of tests running
at the same time.  If it is important to actually test
that something got done by the RFC, the CI system as it
is currently written is not the correct place for this.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
---
 tests/topotests/bfd_topo1/test_bfd_topo1.py | 2 +-
 tests/topotests/bfd_topo2/test_bfd_topo2.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/topotests/bfd_topo1/test_bfd_topo1.py b/tests/topotests/bfd_topo1/test_bfd_topo1.py
index adf02b02d4..c9020f16d3 100644
--- a/tests/topotests/bfd_topo1/test_bfd_topo1.py
+++ b/tests/topotests/bfd_topo1/test_bfd_topo1.py
@@ -100,7 +100,7 @@ def test_bfd_connection():
         test_func = partial(
             topotest.router_json_cmp, router, "show bfd peers json", expected
         )
-        _, result = topotest.run_and_expect(test_func, None, count=8, wait=0.5)
+        _, result = topotest.run_and_expect(test_func, None, count=30, wait=0.5)
         assertmsg = '"{}" JSON output mismatches'.format(router.name)
         assert result is None, assertmsg
 
diff --git a/tests/topotests/bfd_topo2/test_bfd_topo2.py b/tests/topotests/bfd_topo2/test_bfd_topo2.py
index 57ce0cdf09..a9b9358ef0 100644
--- a/tests/topotests/bfd_topo2/test_bfd_topo2.py
+++ b/tests/topotests/bfd_topo2/test_bfd_topo2.py
@@ -144,7 +144,7 @@ def test_bfd_connection():
         test_func = partial(
             topotest.router_json_cmp, router, "show bfd peers json", expected
         )
-        _, result = topotest.run_and_expect(test_func, None, count=8, wait=0.5)
+        _, result = topotest.run_and_expect(test_func, None, count=30, wait=0.5)
         assertmsg = '"{}" JSON output mismatches'.format(router.name)
         assert result is None, assertmsg