diff --git a/zebra/rib.h b/zebra/rib.h index 931c97638e..3717a12814 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -107,7 +107,7 @@ struct route_entry { /* Uptime. */ time_t uptime; - /* Type fo this route. */ + /* Type of this route. */ int type; /* VRF identifier. */ @@ -347,10 +347,16 @@ extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, struct prefix_ipv6 *src_p, const struct nexthop *nh, uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t mtu, uint8_t distance, route_tag_t tag); - +/* + * Multipath route apis. + */ extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, struct prefix_ipv6 *src_p, struct route_entry *re, struct nexthop_group *ng); +extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, + struct route_entry *re, + struct nhg_hash_entry *nhe); extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index ea24333cb2..1d41b3ea14 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -1425,9 +1425,6 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, struct interface *ifp; char nhbuf[INET6_ADDRSTRLEN] = ""; - if (IS_ZEBRA_DEBUG_RECV) - zlog_debug("nh type %d", api_nh->type); - switch (api_nh->type) { case NEXTHOP_TYPE_IFINDEX: nexthop = nexthop_from_ifindex(api_nh->ifindex, api_nh->vrf_id); @@ -1526,6 +1523,18 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_WEIGHT)) nexthop->weight = api_nh->weight; + if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_HAS_BACKUP)) { + if (api_nh->backup_idx < api->backup_nexthop_num) { + /* Capture backup info */ + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nexthop->backup_idx = api_nh->backup_idx; + } else { + /* Warn about invalid backup index */ + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) + zlog_debug("%s: invalid backup nh idx %d", + __func__, api_nh->backup_idx); + } + } done: return nexthop; } @@ -1540,9 +1549,13 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) struct route_entry *re; struct nexthop *nexthop = NULL, *last_nh; struct nexthop_group *ng = NULL; - struct nexthop_group *backup_ng = NULL; + struct nhg_backup_info *bnhg = NULL; int i, ret; vrf_id_t vrf_id; + struct nhg_hash_entry nhe; + enum lsp_types_t label_type; + char nhbuf[NEXTHOP_STRLEN]; + char labelbuf[MPLS_LABEL_STRLEN]; s = msg; if (zapi_route_decode(s, &api) < 0) { @@ -1622,30 +1635,48 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) && api_nh->type != NEXTHOP_TYPE_IFINDEX && api_nh->type != NEXTHOP_TYPE_BLACKHOLE && api_nh->label_num > 0) { - enum lsp_types_t label_type; label_type = lsp_type_from_re_type(client->proto); - - if (IS_ZEBRA_DEBUG_RECV) { - zlog_debug( - "%s: adding %d labels of type %d (1st=%u)", - __func__, api_nh->label_num, label_type, - api_nh->labels[0]); - } - nexthop_add_labels(nexthop, label_type, api_nh->label_num, &api_nh->labels[0]); } - /* Add new nexthop to temporary list */ + if (IS_ZEBRA_DEBUG_RECV) { + labelbuf[0] = '\0'; + nhbuf[0] = '\0'; + + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + + if (nexthop->nh_label && + nexthop->nh_label->num_labels > 0) { + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, + labelbuf, sizeof(labelbuf), + false); + } + + zlog_debug("%s: nh=%s, vrf_id=%d %s", + __func__, nhbuf, api_nh->vrf_id, labelbuf); + } + + /* Add new nexthop to temporary list. This list is + * canonicalized - sorted - so that it can be hashed later + * in route processing. We expect that the sender has sent + * the list sorted, and the zapi client api attempts to enforce + * that, so this should be inexpensive - but it is necessary + * to support shared nexthop-groups. + */ nexthop_group_add_sorted(ng, nexthop); - nexthop = NULL; } /* Allocate temporary list of backup nexthops, if necessary */ if (api.backup_nexthop_num > 0) { - backup_ng = nexthop_group_new(); + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: adding %d backup nexthops", + __func__, api.backup_nexthop_num); + + bnhg = zebra_nhg_backup_alloc(); nexthop = NULL; last_nh = NULL; } @@ -1662,42 +1693,61 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) "%s: Backup Nexthops Specified: %d but we failed to properly create one", __func__, api.backup_nexthop_num); nexthop_group_delete(&ng); - nexthop_group_delete(&backup_ng); + zebra_nhg_backup_free(&bnhg); XFREE(MTYPE_RE, re); return; } + /* Backup nexthops can't have backups; that's not valid. */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + if (IS_ZEBRA_DEBUG_RECV) { + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + zlog_debug("%s: backup nh %s with BACKUP flag!", + __func__, nhbuf); + } + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nexthop->backup_idx = 0; + } + /* MPLS labels for BGP-LU or Segment Routing */ if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_LABEL) && api_nh->type != NEXTHOP_TYPE_IFINDEX && api_nh->type != NEXTHOP_TYPE_BLACKHOLE && api_nh->label_num > 0) { - enum lsp_types_t label_type; label_type = lsp_type_from_re_type(client->proto); - - if (IS_ZEBRA_DEBUG_RECV) { - zlog_debug( - "%s: adding %d labels of type %d (1st=%u)", - __func__, api_nh->label_num, label_type, - api_nh->labels[0]); - } - nexthop_add_labels(nexthop, label_type, api_nh->label_num, &api_nh->labels[0]); } - /* Note that the order of the backup nexthops is significant - * at this point - we don't sort this list as we do the - * primary nexthops, we just append. - */ - if (last_nh) { - NEXTHOP_APPEND(last_nh, nexthop); - } else { - backup_ng->nexthop = nexthop; + if (IS_ZEBRA_DEBUG_RECV) { + labelbuf[0] = '\0'; + nhbuf[0] = '\0'; + + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + + if (nexthop->nh_label && + nexthop->nh_label->num_labels > 0) { + mpls_label2str(nexthop->nh_label->num_labels, + nexthop->nh_label->label, + labelbuf, sizeof(labelbuf), + false); + } + + zlog_debug("%s: backup nh=%s, vrf_id=%d %s", + __func__, nhbuf, api_nh->vrf_id, labelbuf); } + /* Note that the order of the backup nexthops is significant, + * so we don't sort this list as we do the primary nexthops, + * we just append. + */ + if (last_nh) + NEXTHOP_APPEND(last_nh, nexthop); + else + bnhg->nhe->nhg.nexthop = nexthop; + last_nh = nexthop; } @@ -1716,7 +1766,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) "%s: Received SRC Prefix but afi is not v6", __func__); nexthop_group_delete(&ng); - nexthop_group_delete(&backup_ng); + zebra_nhg_backup_free(&bnhg); XFREE(MTYPE_RE, re); return; } @@ -1728,10 +1778,17 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) "%s: Received safi: %d but we can only accept UNICAST or MULTICAST", __func__, api.safi); nexthop_group_delete(&ng); + zebra_nhg_backup_free(&bnhg); XFREE(MTYPE_RE, re); return; } - ret = rib_add_multipath(afi, api.safi, &api.prefix, src_p, re, ng); + + /* Include backup info with the route */ + memset(&nhe, 0, sizeof(nhe)); + nhe.nhg.nexthop = ng->nexthop; + nhe.backup_info = bnhg; + ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p, + re, &nhe); /* Stats */ switch (api.prefix.family) { diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 459d2bc620..a2365ee76b 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -113,10 +113,15 @@ struct dplane_route_info { struct dplane_nexthop_info nhe; /* Nexthops */ + uint32_t zd_nhg_id; struct nexthop_group zd_ng; + /* Backup nexthops (if present) */ + struct nexthop_group backup_ng; + /* "Previous" nexthops, used only in route updates without netlink */ struct nexthop_group zd_old_ng; + struct nexthop_group old_backup_ng; /* TODO -- use fixed array of nexthops, to avoid mallocs? */ @@ -472,6 +477,14 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) (*pctx)->u.rinfo.zd_ng.nexthop = NULL; } + /* Free backup info also (if present) */ + if ((*pctx)->u.rinfo.backup_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free((*pctx)->u.rinfo.backup_ng.nexthop); + + (*pctx)->u.rinfo.backup_ng.nexthop = NULL; + } + if ((*pctx)->u.rinfo.zd_old_ng.nexthop) { /* This deals with recursive nexthops too */ nexthops_free((*pctx)->u.rinfo.zd_old_ng.nexthop); @@ -479,6 +492,13 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx) (*pctx)->u.rinfo.zd_old_ng.nexthop = NULL; } + if ((*pctx)->u.rinfo.old_backup_ng.nexthop) { + /* This deals with recursive nexthops too */ + nexthops_free((*pctx)->u.rinfo.old_backup_ng.nexthop); + + (*pctx)->u.rinfo.old_backup_ng.nexthop = NULL; + } + break; case DPLANE_OP_NH_INSTALL: @@ -1038,6 +1058,12 @@ void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh) nexthop_group_copy_nh_sorted(&(ctx->u.rinfo.zd_ng), nh); } +uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.zd_nhg_id; +} + const struct nexthop_group *dplane_ctx_get_ng( const struct zebra_dplane_ctx *ctx) { @@ -1046,14 +1072,30 @@ const struct nexthop_group *dplane_ctx_get_ng( return &(ctx->u.rinfo.zd_ng); } -const struct nexthop_group *dplane_ctx_get_old_ng( - const struct zebra_dplane_ctx *ctx) +const struct nexthop_group * +dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.backup_ng); +} + +const struct nexthop_group * +dplane_ctx_get_old_ng(const struct zebra_dplane_ctx *ctx) { DPLANE_CTX_VALID(ctx); return &(ctx->u.rinfo.zd_old_ng); } +const struct nexthop_group * +dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.rinfo.old_backup_ng); +} + const struct zebra_dplane_info *dplane_ctx_get_ns( const struct zebra_dplane_ctx *ctx) { @@ -1514,6 +1556,13 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, /* Copy nexthops; recursive info is included too */ copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), re->nhe->nhg.nexthop, NULL); + ctx->u.rinfo.zd_nhg_id = re->nhe->id; + + /* Copy backup nexthop info, if present */ + if (re->nhe->backup_info && re->nhe->backup_info->nhe) { + copy_nexthops(&(ctx->u.rinfo.backup_ng.nexthop), + re->nhe->backup_info->nhe->nhg.nexthop, NULL); + } /* Ensure that the dplane nexthops' flags are clear. */ for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) @@ -1532,9 +1581,8 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE)); #ifdef HAVE_NETLINK - if (re->nhe_id) { - struct nhg_hash_entry *nhe = - zebra_nhg_resolve(zebra_nhg_lookup_id(re->nhe_id)); + if (re->nhe) { + struct nhg_hash_entry *nhe = zebra_nhg_resolve(re->nhe); ctx->u.rinfo.nhe.id = nhe->id; /* @@ -1581,7 +1629,6 @@ static int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, { struct zebra_vrf *zvrf = NULL; struct zebra_ns *zns = NULL; - int ret = EINVAL; if (!ctx || !nhe) @@ -1850,6 +1897,17 @@ dplane_route_update_internal(struct route_node *rn, */ copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop), old_re->nhe->nhg.nexthop, NULL); + + if (zebra_nhg_get_backup_nhg(old_re->nhe) != NULL) { + struct nexthop_group *nhg; + struct nexthop **nh; + + nhg = zebra_nhg_get_backup_nhg(old_re->nhe); + nh = &(ctx->u.rinfo.old_backup_ng.nexthop); + + if (nhg->nexthop) + copy_nexthops(nh, nhg->nexthop, NULL); + } #endif /* !HAVE_NETLINK */ } diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index c0b04e71b0..9ce4df197c 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -270,11 +270,19 @@ void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance); uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh); + +uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx); const struct nexthop_group *dplane_ctx_get_ng( const struct zebra_dplane_ctx *ctx); const struct nexthop_group *dplane_ctx_get_old_ng( const struct zebra_dplane_ctx *ctx); +/* Backup nexthop information (list of nexthops) if present. */ +const struct nexthop_group * +dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx); +const struct nexthop_group * +dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx); + /* Accessors for nexthop information */ uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx); afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx); diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index dc0af050d7..672611049c 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -63,6 +63,9 @@ static struct nhg_hash_entry * depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id); static void depends_decrement_free(struct nhg_connected_tree_head *head); +static struct nhg_backup_info * +nhg_backup_copy(const struct nhg_backup_info *orig); + static void nhg_connected_free(struct nhg_connected *dep) { @@ -341,7 +344,7 @@ struct nhg_hash_entry *zebra_nhg_alloc(void) return nhe; } -static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *copy, +static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *orig, uint32_t id) { struct nhg_hash_entry *nhe; @@ -350,14 +353,18 @@ static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *copy, nhe->id = id; - nexthop_group_copy(&(nhe->nhg), &(copy->nhg)); + nexthop_group_copy(&(nhe->nhg), &(orig->nhg)); - nhe->vrf_id = copy->vrf_id; - nhe->afi = copy->afi; - nhe->type = copy->type ? copy->type : ZEBRA_ROUTE_NHG; + nhe->vrf_id = orig->vrf_id; + nhe->afi = orig->afi; + nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG; nhe->refcnt = 0; nhe->dplane_ref = zebra_router_get_next_sequence(); + /* Copy backup info also, if present */ + if (orig->backup_info) + nhe->backup_info = nhg_backup_copy(orig->backup_info); + return nhe; } @@ -381,12 +388,17 @@ static void *zebra_nhg_hash_alloc(void *arg) uint32_t zebra_nhg_hash_key(const void *arg) { const struct nhg_hash_entry *nhe = arg; + uint32_t val, key = 0x5a351234; - uint32_t key = 0x5a351234; + val = nexthop_group_hash(&(nhe->nhg)); + if (nhe->backup_info) { + val = jhash_2words(val, + nexthop_group_hash( + &(nhe->backup_info->nhe->nhg)), + key); + } - key = jhash_3words(nhe->vrf_id, nhe->afi, - nexthop_group_hash(&(nhe->nhg)), - key); + key = jhash_3words(nhe->vrf_id, nhe->afi, val, key); return key; } @@ -398,6 +410,50 @@ uint32_t zebra_nhg_id_key(const void *arg) return nhe->id; } +/* Helper with common nhg/nhe nexthop comparison logic */ +static bool nhg_compare_nexthops(const struct nexthop *nh1, + const struct nexthop *nh2) +{ + if (nh1 && !nh2) + return false; + + if (!nh1 && nh2) + return false; + + /* + * We have to check the active flag of each individual one, + * not just the overall active_num. This solves the special case + * issue of a route with a nexthop group with one nexthop + * resolving to itself and thus marking it inactive. If we + * have two different routes each wanting to mark a different + * nexthop inactive, they need to hash to two different groups. + * + * If we just hashed on num_active, they would hash the same + * which is incorrect. + * + * ex) + * 1.1.1.0/24 + * -> 1.1.1.1 dummy1 (inactive) + * -> 1.1.2.1 dummy2 + * + * 1.1.2.0/24 + * -> 1.1.1.1 dummy1 + * -> 1.1.2.1 dummy2 (inactive) + * + * Without checking each individual one, they would hash to + * the same group and both have 1.1.1.1 dummy1 marked inactive. + * + */ + if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE) + != CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE)) + return false; + + if (!nexthop_same(nh1, nh2)) + return false; + + return true; +} + bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) { const struct nhg_hash_entry *nhe1 = arg1; @@ -415,45 +471,44 @@ bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) if (nhe1->afi != nhe2->afi) return false; - /* Nexthops should be sorted */ + /* Nexthops should be in-order, so we simply compare them in-place */ for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop; nexthop1 || nexthop2; nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { - if (nexthop1 && !nexthop2) - return false; - if (!nexthop1 && nexthop2) + if (!nhg_compare_nexthops(nexthop1, nexthop2)) return false; + } - /* - * We have to check the active flag of each individual one, - * not just the overall active_num. This solves the special case - * issue of a route with a nexthop group with one nexthop - * resolving to itself and thus marking it inactive. If we - * have two different routes each wanting to mark a different - * nexthop inactive, they need to hash to two different groups. - * - * If we just hashed on num_active, they would hash the same - * which is incorrect. - * - * ex) - * 1.1.1.0/24 - * -> 1.1.1.1 dummy1 (inactive) - * -> 1.1.2.1 dummy2 - * - * 1.1.2.0/24 - * -> 1.1.1.1 dummy1 - * -> 1.1.2.1 dummy2 (inactive) - * - * Without checking each individual one, they would hash to - * the same group and both have 1.1.1.1 dummy1 marked inactive. - * - */ - if (CHECK_FLAG(nexthop1->flags, NEXTHOP_FLAG_ACTIVE) - != CHECK_FLAG(nexthop2->flags, NEXTHOP_FLAG_ACTIVE)) - return false; + /* If there's no backup info, comparison is done. */ + if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL)) + return true; - if (!nexthop_same(nexthop1, nexthop2)) + /* Compare backup info also - test the easy things first */ + if (nhe1->backup_info && (nhe2->backup_info == NULL)) + return false; + if (nhe2->backup_info && (nhe1->backup_info == NULL)) + return false; + + /* Compare number of backups before actually comparing any */ + for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop, + nexthop2 = nhe2->backup_info->nhe->nhg.nexthop; + nexthop1 && nexthop2; + nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { + ; + } + + /* Did we find the end of one list before the other? */ + if (nexthop1 || nexthop2) + return false; + + /* Have to compare the backup nexthops */ + for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop, + nexthop2 = nhe2->backup_info->nhe->nhg.nexthop; + nexthop1 || nexthop2; + nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { + + if (!nhg_compare_nexthops(nexthop1, nexthop2)) return false; } @@ -530,6 +585,11 @@ static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id, bool created = false; bool recursive = false; + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p", + __func__, id, nhg, vrf_id, type, + nhg_depends); + /* * If it has an id at this point, we must have gotten it from the kernel */ @@ -1154,6 +1214,10 @@ depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, depend = depends_find(nh, afi); + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nh %pNHv => %p", + __func__, nh, depend); + if (depend) depends_add(head, depend); @@ -1198,10 +1262,87 @@ zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi) return nhe; } +/* + * Allocate backup nexthop info object. Typically these are embedded in + * nhg_hash_entry objects. + */ +struct nhg_backup_info *zebra_nhg_backup_alloc(void) +{ + struct nhg_backup_info *p; + + p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info)); + + p->nhe = zebra_nhg_alloc(); + + /* Identify the embedded group used to hold the list of backups */ + SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP); + + return p; +} + +/* + * Free backup nexthop info object, deal with any embedded allocations + */ +void zebra_nhg_backup_free(struct nhg_backup_info **p) +{ + if (p && *p) { + if ((*p)->nhe) + zebra_nhg_free((*p)->nhe); + + XFREE(MTYPE_NHG, (*p)); + } +} + +/* Accessor for backup nexthop info */ +struct nhg_hash_entry *zebra_nhg_get_backup_nhe(struct nhg_hash_entry *nhe) +{ + struct nhg_hash_entry *p = NULL; + + if (nhe) { + if (nhe->backup_info) + p = nhe->backup_info->nhe; + } + + return p; +} + +/* Accessor for backup nexthop group */ +struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe) +{ + struct nexthop_group *p = NULL; + + if (nhe) { + if (nhe->backup_info && nhe->backup_info->nhe) + p = &(nhe->backup_info->nhe->nhg); + } + + return p; +} + +/* + * Helper to return a copy of a backup_info - note that this is a shallow + * copy, meant to be used when creating a new nhe from info passed in with + * a route e.g. + */ +static struct nhg_backup_info * +nhg_backup_copy(const struct nhg_backup_info *orig) +{ + struct nhg_backup_info *b; + + b = zebra_nhg_backup_alloc(); + + /* Copy list of nexthops */ + nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg)); + + return b; +} + static void zebra_nhg_free_members(struct nhg_hash_entry *nhe) { nexthops_free(nhe->nhg.nexthop); + zebra_nhg_backup_free(&nhe->backup_info); + /* Decrement to remove connection ref */ nhg_connected_tree_decrement_ref(&nhe->nhg_depends); nhg_connected_tree_free(&nhe->nhg_depends); diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index dc3a47c020..340de952be 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -50,6 +50,9 @@ struct nhg_hash_entry { struct nexthop_group nhg; + /* If supported, a mapping of backup nexthops. */ + struct nhg_backup_info *backup_info; + /* If this is not a group, it * will be a single nexthop * and must have an interface @@ -72,6 +75,7 @@ struct nhg_hash_entry { * faster with ID's. */ struct nhg_connected_tree_head nhg_depends, nhg_dependents; + /* * Is this nexthop group valid, ie all nexthops are fully resolved. * What is fully resolved? It's a nexthop that is either self contained @@ -102,11 +106,25 @@ struct nhg_hash_entry { * from the kernel. Therefore, it is unhashable. */ #define NEXTHOP_GROUP_UNHASHABLE (1 << 4) + +/* + * Backup nexthop support - identify groups that are backups for + * another group. + */ +#define NEXTHOP_GROUP_BACKUP (1 << 5) + }; /* Was this one we created, either this session or previously? */ #define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG) +/* + * Backup nexthops: this is a group object itself, so + * that the backup nexthops can use the same code as a normal object. + */ +struct nhg_backup_info { + struct nhg_hash_entry *nhe; +}; enum nhg_ctx_op_e { NHG_CTX_OP_NONE = 0, @@ -162,13 +180,20 @@ bool zebra_nhg_kernel_nexthops_enabled(void); /** * NHE abstracted tree functions. - * Use these where possible instead of the direct ones access ones. + * Use these where possible instead of direct access. */ struct nhg_hash_entry *zebra_nhg_alloc(void); void zebra_nhg_free(struct nhg_hash_entry *nhe); /* In order to clear a generic hash, we need a generic api, sigh. */ void zebra_nhg_hash_free(void *p); +/* Allocate, free backup nexthop info objects */ +struct nhg_backup_info *zebra_nhg_backup_alloc(void); +void zebra_nhg_backup_free(struct nhg_backup_info **p); + +struct nhg_hash_entry *zebra_nhg_get_backup_nhe(struct nhg_hash_entry *nhe); +struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe); + extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe); extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe); diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 29d59b515f..d059bc9290 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -2338,7 +2338,6 @@ static void rib_addnode(struct route_node *rn, void rib_unlink(struct route_node *rn, struct route_entry *re) { rib_dest_t *dest; - struct nhg_hash_entry *nhe = NULL; assert(rn && re); @@ -2353,11 +2352,10 @@ void rib_unlink(struct route_node *rn, struct route_entry *re) if (dest->selected_fib == re) dest->selected_fib = NULL; - if (re->nhe_id) { - nhe = zebra_nhg_lookup_id(re->nhe_id); - if (nhe) - zebra_nhg_decrement_ref(nhe); - } else if (re->nhe->nhg.nexthop) + if (re->nhe && re->nhe_id) { + assert(re->nhe->id == re->nhe_id); + zebra_nhg_decrement_ref(re->nhe); + } else if (re->nhe && re->nhe->nhg.nexthop) nexthops_free(re->nhe->nhg.nexthop); nexthops_free(re->fib_ng.nexthop); @@ -2574,51 +2572,52 @@ void rib_lookup_and_pushup(struct prefix_ipv4 *p, vrf_id_t vrf_id) } } -int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, - struct prefix_ipv6 *src_p, struct route_entry *re, - struct nexthop_group *ng) +/* + * Internal route-add implementation; there are a couple of different public + * signatures. Callers in this path are responsible for the memory they + * allocate: if they allocate a nexthop_group or backup nexthop info, they + * must free those objects. If this returns < 0, an error has occurred and the + * route_entry 're' has not been captured; the caller should free that also. + */ +int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, + struct route_entry *re, + struct nhg_hash_entry *re_nhe) { struct nhg_hash_entry *nhe = NULL; struct route_table *table; struct route_node *rn; struct route_entry *same = NULL; int ret = 0; + struct nexthop_group *ng; - if (!re) - return 0; + if (!re || !re_nhe) + return -1; assert(!src_p || !src_p->prefixlen || afi == AFI_IP6); + /* TODO */ + ng = &(re_nhe->nhg); + /* Lookup table. */ table = zebra_vrf_get_table_with_table_id(afi, safi, re->vrf_id, re->table); - if (!table) { - if (ng) - nexthop_group_delete(&ng); - XFREE(MTYPE_RE, re); - return 0; - } + if (!table) + return -1; - if (re->nhe_id) { - nhe = zebra_nhg_lookup_id(re->nhe_id); + if (re_nhe->id > 0) { + nhe = zebra_nhg_lookup_id(re_nhe->id); if (!nhe) { flog_err( EC_ZEBRA_TABLE_LOOKUP_FAILED, "Zebra failed to find the nexthop hash entry for id=%u in a route entry", - re->nhe_id); - XFREE(MTYPE_RE, re); + re_nhe->id); + return -1; } } else { nhe = zebra_nhg_rib_find(0, ng, afi); - - /* - * The nexthops got copied over into an nhe, - * so free them now. - */ - nexthop_group_delete(&ng); - if (!nhe) { char buf[PREFIX_STRLEN] = ""; char buf2[PREFIX_STRLEN] = ""; @@ -2631,7 +2630,6 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, src_p ? prefix2str(src_p, buf2, sizeof(buf2)) : ""); - XFREE(MTYPE_RE, re); return -1; } } @@ -2709,15 +2707,50 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, ret = 1; /* Free implicit route.*/ - if (same) { + if (same) rib_delnode(rn, same); - ret = -1; - } route_unlock_node(rn); return ret; } +/* + * Add a single route. + */ +int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, + struct prefix_ipv6 *src_p, struct route_entry *re, + struct nexthop_group *ng) +{ + int ret; + struct nhg_hash_entry nhe = {}; + + if (!re) + return -1; + + /* We either need nexthop(s) or an existing nexthop id */ + if (ng == NULL && re->nhe_id == 0) + return -1; + + /* + * Use a temporary nhe to convey info to the common/main api. + */ + if (ng) + nhe.nhg.nexthop = ng->nexthop; + else if (re->nhe_id > 0) + nhe.id = re->nhe_id; + + ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe); + + /* In this path, the callers expect memory to be freed. */ + nexthop_group_delete(&ng); + + /* In error cases, free the route also */ + if (ret < 0) + XFREE(MTYPE_RE, re); + + return ret; +} + void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, unsigned short instance, int flags, struct prefix *p, struct prefix_ipv6 *src_p, const struct nexthop *nh,