From c32c32c931b96531acdb25fd1fc37cd3b0a2f42e Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@nvidia.com>
Date: Wed, 1 Dec 2021 16:28:42 -0500
Subject: [PATCH 1/2] zebra: Ensure zebra_nhg_sweep_table accounts for double
 deletes

I'm seeing this crash in various forms:
Program terminated with signal SIGSEGV, Segmentation fault.
50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
[Current thread is 1 (Thread 0x7f418efbc7c0 (LWP 3580253))]
(gdb) bt
(gdb) f 4
267 (*func)(hb, arg);
(gdb) p hb
$1 = (struct hash_bucket *) 0x558cdaafb250
(gdb) p *hb
$2 = {len = 0, next = 0x0, key = 0, data = 0x0}
(gdb)

I've also seen a crash where data is 0x03.

My suspicion is that hash_iterate is calling zebra_nhg_sweep_entry which
does delete the particular entry we are looking at as well as possibly other
entries when the ref count for those entries gets set to 0 as well.

Then we have this loop in hash_iterate.c:

   for (i = 0; i < hash->size; i++)
            for (hb = hash->index[i]; hb; hb = hbnext) {
                    /* get pointer to next hash bucket here, in case (*func)
                     * decides to delete hb by calling hash_release
                     */
                    hbnext = hb->next;
                    (*func)(hb, arg);
            }
Suppose in the previous loop hbnext is set to hb->next and we call
zebra_nhg_sweep_entry. This deletes the previous entry and also
happens to cause the hbnext entry to be deleted as well, because of nhg
refcounts. At this point in time the memory pointed to by hbnext is
not owned by the pthread anymore and we can end up on a state where
it's overwritten by another pthread in zebra with data for other incoming events.

What to do?  Let's change the sweep function to a hash_walk and have
it stop iterating and to start over if there is a possible double
delete operation.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
(cherry picked from commit 07b9ebca65832813cc00722401f282a51a11ac17)
---
 zebra/zebra_nhg.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c
index aa015992d5..fac312cf7c 100644
--- a/zebra/zebra_nhg.c
+++ b/zebra/zebra_nhg.c
@@ -2995,7 +2995,7 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
 	dplane_ctx_fini(&ctx);
 }
 
-static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
+static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
 {
 	struct nhg_hash_entry *nhe = NULL;
 
@@ -3009,7 +3009,7 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
 	 * from an upper level proto.
 	 */
 	if (zrouter.startup_time < nhe->uptime)
-		return;
+		return HASHWALK_CONTINUE;
 
 	/*
 	 * If it's proto-owned and not being used by a route, remove it since
@@ -3019,20 +3019,41 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
 	 */
 	if (PROTO_OWNED(nhe) && nhe->refcnt == 1) {
 		zebra_nhg_decrement_ref(nhe);
-		return;
+		return HASHWALK_ABORT;
 	}
 
 	/*
 	 * If its being ref'd by routes, just let it be uninstalled via a route
 	 * removal.
 	 */
-	if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
+	if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) {
 		zebra_nhg_uninstall_kernel(nhe);
+		return HASHWALK_ABORT;
+	}
+
+	return HASHWALK_CONTINUE;
 }
 
 void zebra_nhg_sweep_table(struct hash *hash)
 {
-	hash_iterate(hash, zebra_nhg_sweep_entry, NULL);
+	uint32_t count;
+
+	/*
+	 * Yes this is extremely odd.  Effectively nhg's have
+	 * other nexthop groups that depend on them and when you
+	 * remove them, you can have other entries blown up.
+	 * our hash code does not work with deleting multiple
+	 * entries at a time and will possibly cause crashes
+	 * So what to do?  Whenever zebra_nhg_sweep_entry
+	 * deletes an entry it will return HASHWALK_ABORT,
+	 * cause that deletion might have triggered more.
+	 * then we can just keep sweeping this table
+	 * until nothing more is found to do.
+	 */
+	do {
+		count = hashcount(hash);
+		hash_walk(hash, zebra_nhg_sweep_entry, NULL);
+	} while (count != hashcount(hash));
 }
 
 static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)

From 21ab672e6cf97d2849e51792918db1e9c88240e5 Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@nvidia.com>
Date: Wed, 1 Dec 2021 17:03:38 -0500
Subject: [PATCH 2/2] lib: Update hash.h documentation to warn of a possible
 crash

Multiple deletions from the hash_walk or hash_iteration calls
during a single invocation of the passed in function can and
will cause the program to crash.  Warn against doing such a
thing.

Signed-off-by: Donald Sharp <sharpd@nvidia.com>
(cherry picked from commit 341743ac5b2a11fe27416a2b34cc470f36ea4d17)
---
 lib/hash.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/hash.h b/lib/hash.h
index f3b24f051b..91770d1813 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -235,9 +235,10 @@ extern void *hash_release(struct hash *hash, void *data);
 /*
  * Iterate over the elements in a hash table.
  *
- * It is safe to delete items passed to the iteration function from the hash
- * table during iteration. More than one item cannot be deleted during each
- * iteration. Please note that adding entries to the hash
+ * The passed in arg to the handler function is the only safe
+ * item to delete from the hash.
+ *
+ * Please note that adding entries to the hash
  * during the walk will cause undefined behavior in that some new entries
  * will be walked and some will not.  So do not do this.
  *
@@ -258,8 +259,10 @@ extern void hash_iterate(struct hash *hash,
 /*
  * Iterate over the elements in a hash table, stopping on condition.
  *
- * It is safe to delete items passed to the iteration function from the hash
- * table during iteration.  Please note that adding entries to the hash
+ * The passed in arg to the handler function is the only safe item
+ * to delete from the hash.
+ *
+ * Please note that adding entries to the hash
  * during the walk will cause undefined behavior in that some new entries
  * will be walked and some will not.  So do not do this.
  *