diff --git a/lib/hash.h b/lib/hash.h index f3b24f051b..91770d1813 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -235,9 +235,10 @@ extern void *hash_release(struct hash *hash, void *data); /* * Iterate over the elements in a hash table. * - * It is safe to delete items passed to the iteration function from the hash - * table during iteration. More than one item cannot be deleted during each - * iteration. Please note that adding entries to the hash + * The passed in arg to the handler function is the only safe + * item to delete from the hash. + * + * Please note that adding entries to the hash * during the walk will cause undefined behavior in that some new entries * will be walked and some will not. So do not do this. * @@ -258,8 +259,10 @@ extern void hash_iterate(struct hash *hash, /* * Iterate over the elements in a hash table, stopping on condition. * - * It is safe to delete items passed to the iteration function from the hash - * table during iteration. Please note that adding entries to the hash + * The passed in arg to the handler function is the only safe item + * to delete from the hash. + * + * Please note that adding entries to the hash * during the walk will cause undefined behavior in that some new entries * will be walked and some will not. So do not do this. * diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index aa015992d5..fac312cf7c 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -2995,7 +2995,7 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) dplane_ctx_fini(&ctx); } -static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) +static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) { struct nhg_hash_entry *nhe = NULL; @@ -3009,7 +3009,7 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) * from an upper level proto. */ if (zrouter.startup_time < nhe->uptime) - return; + return HASHWALK_CONTINUE; /* * If it's proto-owned and not being used by a route, remove it since @@ -3019,20 +3019,41 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) */ if (PROTO_OWNED(nhe) && nhe->refcnt == 1) { zebra_nhg_decrement_ref(nhe); - return; + return HASHWALK_ABORT; } /* * If its being ref'd by routes, just let it be uninstalled via a route * removal. */ - if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) { zebra_nhg_uninstall_kernel(nhe); + return HASHWALK_ABORT; + } + + return HASHWALK_CONTINUE; } void zebra_nhg_sweep_table(struct hash *hash) { - hash_iterate(hash, zebra_nhg_sweep_entry, NULL); + uint32_t count; + + /* + * Yes this is extremely odd. Effectively nhg's have + * other nexthop groups that depend on them and when you + * remove them, you can have other entries blown up. + * our hash code does not work with deleting multiple + * entries at a time and will possibly cause crashes + * So what to do? Whenever zebra_nhg_sweep_entry + * deletes an entry it will return HASHWALK_ABORT, + * cause that deletion might have triggered more. + * then we can just keep sweeping this table + * until nothing more is found to do. + */ + do { + count = hashcount(hash); + hash_walk(hash, zebra_nhg_sweep_entry, NULL); + } while (count != hashcount(hash)); } static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)