diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index aa015992d5..fac312cf7c 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -2995,7 +2995,7 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) dplane_ctx_fini(&ctx); } -static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) +static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) { struct nhg_hash_entry *nhe = NULL; @@ -3009,7 +3009,7 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) * from an upper level proto. */ if (zrouter.startup_time < nhe->uptime) - return; + return HASHWALK_CONTINUE; /* * If it's proto-owned and not being used by a route, remove it since @@ -3019,20 +3019,41 @@ static void zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg) */ if (PROTO_OWNED(nhe) && nhe->refcnt == 1) { zebra_nhg_decrement_ref(nhe); - return; + return HASHWALK_ABORT; } /* * If its being ref'd by routes, just let it be uninstalled via a route * removal. */ - if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) + if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) { zebra_nhg_uninstall_kernel(nhe); + return HASHWALK_ABORT; + } + + return HASHWALK_CONTINUE; } void zebra_nhg_sweep_table(struct hash *hash) { - hash_iterate(hash, zebra_nhg_sweep_entry, NULL); + uint32_t count; + + /* + * Yes this is extremely odd. Effectively nhg's have + * other nexthop groups that depend on them and when you + * remove them, you can have other entries blown up. + * our hash code does not work with deleting multiple + * entries at a time and will possibly cause crashes + * So what to do? Whenever zebra_nhg_sweep_entry + * deletes an entry it will return HASHWALK_ABORT, + * cause that deletion might have triggered more. + * then we can just keep sweeping this table + * until nothing more is found to do. + */ + do { + count = hashcount(hash); + hash_walk(hash, zebra_nhg_sweep_entry, NULL); + } while (count != hashcount(hash)); } static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)