zebra: add per-nexthop backup index

Use a backup index in a nexthop directly (if it has a backup
nexthop); revise the zebra nhe/nhg code; revise zapi route
decoding to match; revise the dataplane route datastructs.

Refactor some of the rib_add_multipath code to be prepared to
be called with an nhe, carrying nexthop and (possibly) backup
info together.

Signed-off-by: Mark Stapp <mjs@voltanet.io>
This commit is contained in:
Mark Stapp 2019-12-24 14:22:03 -05:00
parent 1df3b1dc65
commit 1d48702ede
7 changed files with 446 additions and 118 deletions

View File

@ -107,7 +107,7 @@ struct route_entry {
/* Uptime. */ /* Uptime. */
time_t uptime; time_t uptime;
/* Type fo this route. */ /* Type of this route. */
int type; int type;
/* VRF identifier. */ /* VRF identifier. */
@ -347,10 +347,16 @@ extern int rib_add(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
struct prefix_ipv6 *src_p, const struct nexthop *nh, struct prefix_ipv6 *src_p, const struct nexthop *nh,
uint32_t nhe_id, uint32_t table_id, uint32_t metric, uint32_t nhe_id, uint32_t table_id, uint32_t metric,
uint32_t mtu, uint8_t distance, route_tag_t tag); uint32_t mtu, uint8_t distance, route_tag_t tag);
/*
* Multipath route apis.
*/
extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, extern int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re, struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng); struct nexthop_group *ng);
extern int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p,
struct route_entry *re,
struct nhg_hash_entry *nhe);
extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, extern void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p, unsigned short instance, int flags, struct prefix *p,

View File

@ -1425,9 +1425,6 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re,
struct interface *ifp; struct interface *ifp;
char nhbuf[INET6_ADDRSTRLEN] = ""; char nhbuf[INET6_ADDRSTRLEN] = "";
if (IS_ZEBRA_DEBUG_RECV)
zlog_debug("nh type %d", api_nh->type);
switch (api_nh->type) { switch (api_nh->type) {
case NEXTHOP_TYPE_IFINDEX: case NEXTHOP_TYPE_IFINDEX:
nexthop = nexthop_from_ifindex(api_nh->ifindex, api_nh->vrf_id); nexthop = nexthop_from_ifindex(api_nh->ifindex, api_nh->vrf_id);
@ -1526,6 +1523,18 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re,
if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_WEIGHT)) if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_WEIGHT))
nexthop->weight = api_nh->weight; nexthop->weight = api_nh->weight;
if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_HAS_BACKUP)) {
if (api_nh->backup_idx < api->backup_nexthop_num) {
/* Capture backup info */
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP);
nexthop->backup_idx = api_nh->backup_idx;
} else {
/* Warn about invalid backup index */
if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT)
zlog_debug("%s: invalid backup nh idx %d",
__func__, api_nh->backup_idx);
}
}
done: done:
return nexthop; return nexthop;
} }
@ -1540,9 +1549,13 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
struct route_entry *re; struct route_entry *re;
struct nexthop *nexthop = NULL, *last_nh; struct nexthop *nexthop = NULL, *last_nh;
struct nexthop_group *ng = NULL; struct nexthop_group *ng = NULL;
struct nexthop_group *backup_ng = NULL; struct nhg_backup_info *bnhg = NULL;
int i, ret; int i, ret;
vrf_id_t vrf_id; vrf_id_t vrf_id;
struct nhg_hash_entry nhe;
enum lsp_types_t label_type;
char nhbuf[NEXTHOP_STRLEN];
char labelbuf[MPLS_LABEL_STRLEN];
s = msg; s = msg;
if (zapi_route_decode(s, &api) < 0) { if (zapi_route_decode(s, &api) < 0) {
@ -1622,30 +1635,48 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
&& api_nh->type != NEXTHOP_TYPE_IFINDEX && api_nh->type != NEXTHOP_TYPE_IFINDEX
&& api_nh->type != NEXTHOP_TYPE_BLACKHOLE && api_nh->type != NEXTHOP_TYPE_BLACKHOLE
&& api_nh->label_num > 0) { && api_nh->label_num > 0) {
enum lsp_types_t label_type;
label_type = lsp_type_from_re_type(client->proto); label_type = lsp_type_from_re_type(client->proto);
if (IS_ZEBRA_DEBUG_RECV) {
zlog_debug(
"%s: adding %d labels of type %d (1st=%u)",
__func__, api_nh->label_num, label_type,
api_nh->labels[0]);
}
nexthop_add_labels(nexthop, label_type, nexthop_add_labels(nexthop, label_type,
api_nh->label_num, api_nh->label_num,
&api_nh->labels[0]); &api_nh->labels[0]);
} }
/* Add new nexthop to temporary list */ if (IS_ZEBRA_DEBUG_RECV) {
labelbuf[0] = '\0';
nhbuf[0] = '\0';
nexthop2str(nexthop, nhbuf, sizeof(nhbuf));
if (nexthop->nh_label &&
nexthop->nh_label->num_labels > 0) {
mpls_label2str(nexthop->nh_label->num_labels,
nexthop->nh_label->label,
labelbuf, sizeof(labelbuf),
false);
}
zlog_debug("%s: nh=%s, vrf_id=%d %s",
__func__, nhbuf, api_nh->vrf_id, labelbuf);
}
/* Add new nexthop to temporary list. This list is
* canonicalized - sorted - so that it can be hashed later
* in route processing. We expect that the sender has sent
* the list sorted, and the zapi client api attempts to enforce
* that, so this should be inexpensive - but it is necessary
* to support shared nexthop-groups.
*/
nexthop_group_add_sorted(ng, nexthop); nexthop_group_add_sorted(ng, nexthop);
nexthop = NULL;
} }
/* Allocate temporary list of backup nexthops, if necessary */ /* Allocate temporary list of backup nexthops, if necessary */
if (api.backup_nexthop_num > 0) { if (api.backup_nexthop_num > 0) {
backup_ng = nexthop_group_new(); if (IS_ZEBRA_DEBUG_RECV)
zlog_debug("%s: adding %d backup nexthops",
__func__, api.backup_nexthop_num);
bnhg = zebra_nhg_backup_alloc();
nexthop = NULL; nexthop = NULL;
last_nh = NULL; last_nh = NULL;
} }
@ -1662,42 +1693,61 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
"%s: Backup Nexthops Specified: %d but we failed to properly create one", "%s: Backup Nexthops Specified: %d but we failed to properly create one",
__func__, api.backup_nexthop_num); __func__, api.backup_nexthop_num);
nexthop_group_delete(&ng); nexthop_group_delete(&ng);
nexthop_group_delete(&backup_ng); zebra_nhg_backup_free(&bnhg);
XFREE(MTYPE_RE, re); XFREE(MTYPE_RE, re);
return; return;
} }
/* Backup nexthops can't have backups; that's not valid. */
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
if (IS_ZEBRA_DEBUG_RECV) {
nexthop2str(nexthop, nhbuf, sizeof(nhbuf));
zlog_debug("%s: backup nh %s with BACKUP flag!",
__func__, nhbuf);
}
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP);
nexthop->backup_idx = 0;
}
/* MPLS labels for BGP-LU or Segment Routing */ /* MPLS labels for BGP-LU or Segment Routing */
if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_LABEL) if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_LABEL)
&& api_nh->type != NEXTHOP_TYPE_IFINDEX && api_nh->type != NEXTHOP_TYPE_IFINDEX
&& api_nh->type != NEXTHOP_TYPE_BLACKHOLE && api_nh->type != NEXTHOP_TYPE_BLACKHOLE
&& api_nh->label_num > 0) { && api_nh->label_num > 0) {
enum lsp_types_t label_type;
label_type = lsp_type_from_re_type(client->proto); label_type = lsp_type_from_re_type(client->proto);
if (IS_ZEBRA_DEBUG_RECV) {
zlog_debug(
"%s: adding %d labels of type %d (1st=%u)",
__func__, api_nh->label_num, label_type,
api_nh->labels[0]);
}
nexthop_add_labels(nexthop, label_type, nexthop_add_labels(nexthop, label_type,
api_nh->label_num, api_nh->label_num,
&api_nh->labels[0]); &api_nh->labels[0]);
} }
/* Note that the order of the backup nexthops is significant if (IS_ZEBRA_DEBUG_RECV) {
* at this point - we don't sort this list as we do the labelbuf[0] = '\0';
* primary nexthops, we just append. nhbuf[0] = '\0';
*/
if (last_nh) { nexthop2str(nexthop, nhbuf, sizeof(nhbuf));
NEXTHOP_APPEND(last_nh, nexthop);
} else { if (nexthop->nh_label &&
backup_ng->nexthop = nexthop; nexthop->nh_label->num_labels > 0) {
mpls_label2str(nexthop->nh_label->num_labels,
nexthop->nh_label->label,
labelbuf, sizeof(labelbuf),
false);
}
zlog_debug("%s: backup nh=%s, vrf_id=%d %s",
__func__, nhbuf, api_nh->vrf_id, labelbuf);
} }
/* Note that the order of the backup nexthops is significant,
* so we don't sort this list as we do the primary nexthops,
* we just append.
*/
if (last_nh)
NEXTHOP_APPEND(last_nh, nexthop);
else
bnhg->nhe->nhg.nexthop = nexthop;
last_nh = nexthop; last_nh = nexthop;
} }
@ -1716,7 +1766,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
"%s: Received SRC Prefix but afi is not v6", "%s: Received SRC Prefix but afi is not v6",
__func__); __func__);
nexthop_group_delete(&ng); nexthop_group_delete(&ng);
nexthop_group_delete(&backup_ng); zebra_nhg_backup_free(&bnhg);
XFREE(MTYPE_RE, re); XFREE(MTYPE_RE, re);
return; return;
} }
@ -1728,10 +1778,17 @@ static void zread_route_add(ZAPI_HANDLER_ARGS)
"%s: Received safi: %d but we can only accept UNICAST or MULTICAST", "%s: Received safi: %d but we can only accept UNICAST or MULTICAST",
__func__, api.safi); __func__, api.safi);
nexthop_group_delete(&ng); nexthop_group_delete(&ng);
zebra_nhg_backup_free(&bnhg);
XFREE(MTYPE_RE, re); XFREE(MTYPE_RE, re);
return; return;
} }
ret = rib_add_multipath(afi, api.safi, &api.prefix, src_p, re, ng);
/* Include backup info with the route */
memset(&nhe, 0, sizeof(nhe));
nhe.nhg.nexthop = ng->nexthop;
nhe.backup_info = bnhg;
ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p,
re, &nhe);
/* Stats */ /* Stats */
switch (api.prefix.family) { switch (api.prefix.family) {

View File

@ -113,10 +113,15 @@ struct dplane_route_info {
struct dplane_nexthop_info nhe; struct dplane_nexthop_info nhe;
/* Nexthops */ /* Nexthops */
uint32_t zd_nhg_id;
struct nexthop_group zd_ng; struct nexthop_group zd_ng;
/* Backup nexthops (if present) */
struct nexthop_group backup_ng;
/* "Previous" nexthops, used only in route updates without netlink */ /* "Previous" nexthops, used only in route updates without netlink */
struct nexthop_group zd_old_ng; struct nexthop_group zd_old_ng;
struct nexthop_group old_backup_ng;
/* TODO -- use fixed array of nexthops, to avoid mallocs? */ /* TODO -- use fixed array of nexthops, to avoid mallocs? */
@ -472,6 +477,14 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx)
(*pctx)->u.rinfo.zd_ng.nexthop = NULL; (*pctx)->u.rinfo.zd_ng.nexthop = NULL;
} }
/* Free backup info also (if present) */
if ((*pctx)->u.rinfo.backup_ng.nexthop) {
/* This deals with recursive nexthops too */
nexthops_free((*pctx)->u.rinfo.backup_ng.nexthop);
(*pctx)->u.rinfo.backup_ng.nexthop = NULL;
}
if ((*pctx)->u.rinfo.zd_old_ng.nexthop) { if ((*pctx)->u.rinfo.zd_old_ng.nexthop) {
/* This deals with recursive nexthops too */ /* This deals with recursive nexthops too */
nexthops_free((*pctx)->u.rinfo.zd_old_ng.nexthop); nexthops_free((*pctx)->u.rinfo.zd_old_ng.nexthop);
@ -479,6 +492,13 @@ static void dplane_ctx_free(struct zebra_dplane_ctx **pctx)
(*pctx)->u.rinfo.zd_old_ng.nexthop = NULL; (*pctx)->u.rinfo.zd_old_ng.nexthop = NULL;
} }
if ((*pctx)->u.rinfo.old_backup_ng.nexthop) {
/* This deals with recursive nexthops too */
nexthops_free((*pctx)->u.rinfo.old_backup_ng.nexthop);
(*pctx)->u.rinfo.old_backup_ng.nexthop = NULL;
}
break; break;
case DPLANE_OP_NH_INSTALL: case DPLANE_OP_NH_INSTALL:
@ -1038,6 +1058,12 @@ void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh)
nexthop_group_copy_nh_sorted(&(ctx->u.rinfo.zd_ng), nh); nexthop_group_copy_nh_sorted(&(ctx->u.rinfo.zd_ng), nh);
} }
uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return ctx->u.rinfo.zd_nhg_id;
}
const struct nexthop_group *dplane_ctx_get_ng( const struct nexthop_group *dplane_ctx_get_ng(
const struct zebra_dplane_ctx *ctx) const struct zebra_dplane_ctx *ctx)
{ {
@ -1046,14 +1072,30 @@ const struct nexthop_group *dplane_ctx_get_ng(
return &(ctx->u.rinfo.zd_ng); return &(ctx->u.rinfo.zd_ng);
} }
const struct nexthop_group *dplane_ctx_get_old_ng( const struct nexthop_group *
const struct zebra_dplane_ctx *ctx) dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return &(ctx->u.rinfo.backup_ng);
}
const struct nexthop_group *
dplane_ctx_get_old_ng(const struct zebra_dplane_ctx *ctx)
{ {
DPLANE_CTX_VALID(ctx); DPLANE_CTX_VALID(ctx);
return &(ctx->u.rinfo.zd_old_ng); return &(ctx->u.rinfo.zd_old_ng);
} }
const struct nexthop_group *
dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return &(ctx->u.rinfo.old_backup_ng);
}
const struct zebra_dplane_info *dplane_ctx_get_ns( const struct zebra_dplane_info *dplane_ctx_get_ns(
const struct zebra_dplane_ctx *ctx) const struct zebra_dplane_ctx *ctx)
{ {
@ -1514,6 +1556,13 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
/* Copy nexthops; recursive info is included too */ /* Copy nexthops; recursive info is included too */
copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop), copy_nexthops(&(ctx->u.rinfo.zd_ng.nexthop),
re->nhe->nhg.nexthop, NULL); re->nhe->nhg.nexthop, NULL);
ctx->u.rinfo.zd_nhg_id = re->nhe->id;
/* Copy backup nexthop info, if present */
if (re->nhe->backup_info && re->nhe->backup_info->nhe) {
copy_nexthops(&(ctx->u.rinfo.backup_ng.nexthop),
re->nhe->backup_info->nhe->nhg.nexthop, NULL);
}
/* Ensure that the dplane nexthops' flags are clear. */ /* Ensure that the dplane nexthops' flags are clear. */
for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop)) for (ALL_NEXTHOPS(ctx->u.rinfo.zd_ng, nexthop))
@ -1532,9 +1581,8 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx,
dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE)); dplane_ctx_ns_init(ctx, zns, (op == DPLANE_OP_ROUTE_UPDATE));
#ifdef HAVE_NETLINK #ifdef HAVE_NETLINK
if (re->nhe_id) { if (re->nhe) {
struct nhg_hash_entry *nhe = struct nhg_hash_entry *nhe = zebra_nhg_resolve(re->nhe);
zebra_nhg_resolve(zebra_nhg_lookup_id(re->nhe_id));
ctx->u.rinfo.nhe.id = nhe->id; ctx->u.rinfo.nhe.id = nhe->id;
/* /*
@ -1581,7 +1629,6 @@ static int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx,
{ {
struct zebra_vrf *zvrf = NULL; struct zebra_vrf *zvrf = NULL;
struct zebra_ns *zns = NULL; struct zebra_ns *zns = NULL;
int ret = EINVAL; int ret = EINVAL;
if (!ctx || !nhe) if (!ctx || !nhe)
@ -1850,6 +1897,17 @@ dplane_route_update_internal(struct route_node *rn,
*/ */
copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop), copy_nexthops(&(ctx->u.rinfo.zd_old_ng.nexthop),
old_re->nhe->nhg.nexthop, NULL); old_re->nhe->nhg.nexthop, NULL);
if (zebra_nhg_get_backup_nhg(old_re->nhe) != NULL) {
struct nexthop_group *nhg;
struct nexthop **nh;
nhg = zebra_nhg_get_backup_nhg(old_re->nhe);
nh = &(ctx->u.rinfo.old_backup_ng.nexthop);
if (nhg->nexthop)
copy_nexthops(nh, nhg->nexthop, NULL);
}
#endif /* !HAVE_NETLINK */ #endif /* !HAVE_NETLINK */
} }

View File

@ -270,11 +270,19 @@ void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance);
uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh); void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh);
uint32_t dplane_ctx_get_nhg_id(const struct zebra_dplane_ctx *ctx);
const struct nexthop_group *dplane_ctx_get_ng( const struct nexthop_group *dplane_ctx_get_ng(
const struct zebra_dplane_ctx *ctx); const struct zebra_dplane_ctx *ctx);
const struct nexthop_group *dplane_ctx_get_old_ng( const struct nexthop_group *dplane_ctx_get_old_ng(
const struct zebra_dplane_ctx *ctx); const struct zebra_dplane_ctx *ctx);
/* Backup nexthop information (list of nexthops) if present. */
const struct nexthop_group *
dplane_ctx_get_backup_ng(const struct zebra_dplane_ctx *ctx);
const struct nexthop_group *
dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx);
/* Accessors for nexthop information */ /* Accessors for nexthop information */
uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx); uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx);
afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx); afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx);

View File

@ -63,6 +63,9 @@ static struct nhg_hash_entry *
depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id); depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id);
static void depends_decrement_free(struct nhg_connected_tree_head *head); static void depends_decrement_free(struct nhg_connected_tree_head *head);
static struct nhg_backup_info *
nhg_backup_copy(const struct nhg_backup_info *orig);
static void nhg_connected_free(struct nhg_connected *dep) static void nhg_connected_free(struct nhg_connected *dep)
{ {
@ -341,7 +344,7 @@ struct nhg_hash_entry *zebra_nhg_alloc(void)
return nhe; return nhe;
} }
static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *copy, static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *orig,
uint32_t id) uint32_t id)
{ {
struct nhg_hash_entry *nhe; struct nhg_hash_entry *nhe;
@ -350,14 +353,18 @@ static struct nhg_hash_entry *zebra_nhg_copy(const struct nhg_hash_entry *copy,
nhe->id = id; nhe->id = id;
nexthop_group_copy(&(nhe->nhg), &(copy->nhg)); nexthop_group_copy(&(nhe->nhg), &(orig->nhg));
nhe->vrf_id = copy->vrf_id; nhe->vrf_id = orig->vrf_id;
nhe->afi = copy->afi; nhe->afi = orig->afi;
nhe->type = copy->type ? copy->type : ZEBRA_ROUTE_NHG; nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG;
nhe->refcnt = 0; nhe->refcnt = 0;
nhe->dplane_ref = zebra_router_get_next_sequence(); nhe->dplane_ref = zebra_router_get_next_sequence();
/* Copy backup info also, if present */
if (orig->backup_info)
nhe->backup_info = nhg_backup_copy(orig->backup_info);
return nhe; return nhe;
} }
@ -381,12 +388,17 @@ static void *zebra_nhg_hash_alloc(void *arg)
uint32_t zebra_nhg_hash_key(const void *arg) uint32_t zebra_nhg_hash_key(const void *arg)
{ {
const struct nhg_hash_entry *nhe = arg; const struct nhg_hash_entry *nhe = arg;
uint32_t val, key = 0x5a351234;
uint32_t key = 0x5a351234; val = nexthop_group_hash(&(nhe->nhg));
if (nhe->backup_info) {
val = jhash_2words(val,
nexthop_group_hash(
&(nhe->backup_info->nhe->nhg)),
key);
}
key = jhash_3words(nhe->vrf_id, nhe->afi, key = jhash_3words(nhe->vrf_id, nhe->afi, val, key);
nexthop_group_hash(&(nhe->nhg)),
key);
return key; return key;
} }
@ -398,6 +410,50 @@ uint32_t zebra_nhg_id_key(const void *arg)
return nhe->id; return nhe->id;
} }
/* Helper with common nhg/nhe nexthop comparison logic */
static bool nhg_compare_nexthops(const struct nexthop *nh1,
const struct nexthop *nh2)
{
if (nh1 && !nh2)
return false;
if (!nh1 && nh2)
return false;
/*
* We have to check the active flag of each individual one,
* not just the overall active_num. This solves the special case
* issue of a route with a nexthop group with one nexthop
* resolving to itself and thus marking it inactive. If we
* have two different routes each wanting to mark a different
* nexthop inactive, they need to hash to two different groups.
*
* If we just hashed on num_active, they would hash the same
* which is incorrect.
*
* ex)
* 1.1.1.0/24
* -> 1.1.1.1 dummy1 (inactive)
* -> 1.1.2.1 dummy2
*
* 1.1.2.0/24
* -> 1.1.1.1 dummy1
* -> 1.1.2.1 dummy2 (inactive)
*
* Without checking each individual one, they would hash to
* the same group and both have 1.1.1.1 dummy1 marked inactive.
*
*/
if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE)
!= CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE))
return false;
if (!nexthop_same(nh1, nh2))
return false;
return true;
}
bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
{ {
const struct nhg_hash_entry *nhe1 = arg1; const struct nhg_hash_entry *nhe1 = arg1;
@ -415,45 +471,44 @@ bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
if (nhe1->afi != nhe2->afi) if (nhe1->afi != nhe2->afi)
return false; return false;
/* Nexthops should be sorted */ /* Nexthops should be in-order, so we simply compare them in-place */
for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop; for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop;
nexthop1 || nexthop2; nexthop1 || nexthop2;
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) { nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
if (nexthop1 && !nexthop2)
return false;
if (!nexthop1 && nexthop2) if (!nhg_compare_nexthops(nexthop1, nexthop2))
return false; return false;
}
/* /* If there's no backup info, comparison is done. */
* We have to check the active flag of each individual one, if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL))
* not just the overall active_num. This solves the special case return true;
* issue of a route with a nexthop group with one nexthop
* resolving to itself and thus marking it inactive. If we
* have two different routes each wanting to mark a different
* nexthop inactive, they need to hash to two different groups.
*
* If we just hashed on num_active, they would hash the same
* which is incorrect.
*
* ex)
* 1.1.1.0/24
* -> 1.1.1.1 dummy1 (inactive)
* -> 1.1.2.1 dummy2
*
* 1.1.2.0/24
* -> 1.1.1.1 dummy1
* -> 1.1.2.1 dummy2 (inactive)
*
* Without checking each individual one, they would hash to
* the same group and both have 1.1.1.1 dummy1 marked inactive.
*
*/
if (CHECK_FLAG(nexthop1->flags, NEXTHOP_FLAG_ACTIVE)
!= CHECK_FLAG(nexthop2->flags, NEXTHOP_FLAG_ACTIVE))
return false;
if (!nexthop_same(nexthop1, nexthop2)) /* Compare backup info also - test the easy things first */
if (nhe1->backup_info && (nhe2->backup_info == NULL))
return false;
if (nhe2->backup_info && (nhe1->backup_info == NULL))
return false;
/* Compare number of backups before actually comparing any */
for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
nexthop1 && nexthop2;
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
;
}
/* Did we find the end of one list before the other? */
if (nexthop1 || nexthop2)
return false;
/* Have to compare the backup nexthops */
for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
nexthop1 || nexthop2;
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
if (!nhg_compare_nexthops(nexthop1, nexthop2))
return false; return false;
} }
@ -530,6 +585,11 @@ static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id,
bool created = false; bool created = false;
bool recursive = false; bool recursive = false;
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p",
__func__, id, nhg, vrf_id, type,
nhg_depends);
/* /*
* If it has an id at this point, we must have gotten it from the kernel * If it has an id at this point, we must have gotten it from the kernel
*/ */
@ -1154,6 +1214,10 @@ depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
depend = depends_find(nh, afi); depend = depends_find(nh, afi);
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
zlog_debug("%s: nh %pNHv => %p",
__func__, nh, depend);
if (depend) if (depend)
depends_add(head, depend); depends_add(head, depend);
@ -1198,10 +1262,87 @@ zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi)
return nhe; return nhe;
} }
/*
* Allocate backup nexthop info object. Typically these are embedded in
* nhg_hash_entry objects.
*/
struct nhg_backup_info *zebra_nhg_backup_alloc(void)
{
struct nhg_backup_info *p;
p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info));
p->nhe = zebra_nhg_alloc();
/* Identify the embedded group used to hold the list of backups */
SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP);
return p;
}
/*
* Free backup nexthop info object, deal with any embedded allocations
*/
void zebra_nhg_backup_free(struct nhg_backup_info **p)
{
if (p && *p) {
if ((*p)->nhe)
zebra_nhg_free((*p)->nhe);
XFREE(MTYPE_NHG, (*p));
}
}
/* Accessor for backup nexthop info */
struct nhg_hash_entry *zebra_nhg_get_backup_nhe(struct nhg_hash_entry *nhe)
{
struct nhg_hash_entry *p = NULL;
if (nhe) {
if (nhe->backup_info)
p = nhe->backup_info->nhe;
}
return p;
}
/* Accessor for backup nexthop group */
struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe)
{
struct nexthop_group *p = NULL;
if (nhe) {
if (nhe->backup_info && nhe->backup_info->nhe)
p = &(nhe->backup_info->nhe->nhg);
}
return p;
}
/*
* Helper to return a copy of a backup_info - note that this is a shallow
* copy, meant to be used when creating a new nhe from info passed in with
* a route e.g.
*/
static struct nhg_backup_info *
nhg_backup_copy(const struct nhg_backup_info *orig)
{
struct nhg_backup_info *b;
b = zebra_nhg_backup_alloc();
/* Copy list of nexthops */
nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg));
return b;
}
static void zebra_nhg_free_members(struct nhg_hash_entry *nhe) static void zebra_nhg_free_members(struct nhg_hash_entry *nhe)
{ {
nexthops_free(nhe->nhg.nexthop); nexthops_free(nhe->nhg.nexthop);
zebra_nhg_backup_free(&nhe->backup_info);
/* Decrement to remove connection ref */ /* Decrement to remove connection ref */
nhg_connected_tree_decrement_ref(&nhe->nhg_depends); nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
nhg_connected_tree_free(&nhe->nhg_depends); nhg_connected_tree_free(&nhe->nhg_depends);

View File

@ -50,6 +50,9 @@ struct nhg_hash_entry {
struct nexthop_group nhg; struct nexthop_group nhg;
/* If supported, a mapping of backup nexthops. */
struct nhg_backup_info *backup_info;
/* If this is not a group, it /* If this is not a group, it
* will be a single nexthop * will be a single nexthop
* and must have an interface * and must have an interface
@ -72,6 +75,7 @@ struct nhg_hash_entry {
* faster with ID's. * faster with ID's.
*/ */
struct nhg_connected_tree_head nhg_depends, nhg_dependents; struct nhg_connected_tree_head nhg_depends, nhg_dependents;
/* /*
* Is this nexthop group valid, ie all nexthops are fully resolved. * Is this nexthop group valid, ie all nexthops are fully resolved.
* What is fully resolved? It's a nexthop that is either self contained * What is fully resolved? It's a nexthop that is either self contained
@ -102,11 +106,25 @@ struct nhg_hash_entry {
* from the kernel. Therefore, it is unhashable. * from the kernel. Therefore, it is unhashable.
*/ */
#define NEXTHOP_GROUP_UNHASHABLE (1 << 4) #define NEXTHOP_GROUP_UNHASHABLE (1 << 4)
/*
* Backup nexthop support - identify groups that are backups for
* another group.
*/
#define NEXTHOP_GROUP_BACKUP (1 << 5)
}; };
/* Was this one we created, either this session or previously? */ /* Was this one we created, either this session or previously? */
#define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG) #define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG)
/*
* Backup nexthops: this is a group object itself, so
* that the backup nexthops can use the same code as a normal object.
*/
struct nhg_backup_info {
struct nhg_hash_entry *nhe;
};
enum nhg_ctx_op_e { enum nhg_ctx_op_e {
NHG_CTX_OP_NONE = 0, NHG_CTX_OP_NONE = 0,
@ -162,13 +180,20 @@ bool zebra_nhg_kernel_nexthops_enabled(void);
/** /**
* NHE abstracted tree functions. * NHE abstracted tree functions.
* Use these where possible instead of the direct ones access ones. * Use these where possible instead of direct access.
*/ */
struct nhg_hash_entry *zebra_nhg_alloc(void); struct nhg_hash_entry *zebra_nhg_alloc(void);
void zebra_nhg_free(struct nhg_hash_entry *nhe); void zebra_nhg_free(struct nhg_hash_entry *nhe);
/* In order to clear a generic hash, we need a generic api, sigh. */ /* In order to clear a generic hash, we need a generic api, sigh. */
void zebra_nhg_hash_free(void *p); void zebra_nhg_hash_free(void *p);
/* Allocate, free backup nexthop info objects */
struct nhg_backup_info *zebra_nhg_backup_alloc(void);
void zebra_nhg_backup_free(struct nhg_backup_info **p);
struct nhg_hash_entry *zebra_nhg_get_backup_nhe(struct nhg_hash_entry *nhe);
struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe);
extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe); extern struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe);
extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe); extern unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe);

View File

@ -2338,7 +2338,6 @@ static void rib_addnode(struct route_node *rn,
void rib_unlink(struct route_node *rn, struct route_entry *re) void rib_unlink(struct route_node *rn, struct route_entry *re)
{ {
rib_dest_t *dest; rib_dest_t *dest;
struct nhg_hash_entry *nhe = NULL;
assert(rn && re); assert(rn && re);
@ -2353,11 +2352,10 @@ void rib_unlink(struct route_node *rn, struct route_entry *re)
if (dest->selected_fib == re) if (dest->selected_fib == re)
dest->selected_fib = NULL; dest->selected_fib = NULL;
if (re->nhe_id) { if (re->nhe && re->nhe_id) {
nhe = zebra_nhg_lookup_id(re->nhe_id); assert(re->nhe->id == re->nhe_id);
if (nhe) zebra_nhg_decrement_ref(re->nhe);
zebra_nhg_decrement_ref(nhe); } else if (re->nhe && re->nhe->nhg.nexthop)
} else if (re->nhe->nhg.nexthop)
nexthops_free(re->nhe->nhg.nexthop); nexthops_free(re->nhe->nhg.nexthop);
nexthops_free(re->fib_ng.nexthop); nexthops_free(re->fib_ng.nexthop);
@ -2574,51 +2572,52 @@ void rib_lookup_and_pushup(struct prefix_ipv4 *p, vrf_id_t vrf_id)
} }
} }
int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p, /*
struct prefix_ipv6 *src_p, struct route_entry *re, * Internal route-add implementation; there are a couple of different public
struct nexthop_group *ng) * signatures. Callers in this path are responsible for the memory they
* allocate: if they allocate a nexthop_group or backup nexthop info, they
* must free those objects. If this returns < 0, an error has occurred and the
* route_entry 're' has not been captured; the caller should free that also.
*/
int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p,
struct route_entry *re,
struct nhg_hash_entry *re_nhe)
{ {
struct nhg_hash_entry *nhe = NULL; struct nhg_hash_entry *nhe = NULL;
struct route_table *table; struct route_table *table;
struct route_node *rn; struct route_node *rn;
struct route_entry *same = NULL; struct route_entry *same = NULL;
int ret = 0; int ret = 0;
struct nexthop_group *ng;
if (!re) if (!re || !re_nhe)
return 0; return -1;
assert(!src_p || !src_p->prefixlen || afi == AFI_IP6); assert(!src_p || !src_p->prefixlen || afi == AFI_IP6);
/* TODO */
ng = &(re_nhe->nhg);
/* Lookup table. */ /* Lookup table. */
table = zebra_vrf_get_table_with_table_id(afi, safi, re->vrf_id, table = zebra_vrf_get_table_with_table_id(afi, safi, re->vrf_id,
re->table); re->table);
if (!table) { if (!table)
if (ng) return -1;
nexthop_group_delete(&ng);
XFREE(MTYPE_RE, re);
return 0;
}
if (re->nhe_id) { if (re_nhe->id > 0) {
nhe = zebra_nhg_lookup_id(re->nhe_id); nhe = zebra_nhg_lookup_id(re_nhe->id);
if (!nhe) { if (!nhe) {
flog_err( flog_err(
EC_ZEBRA_TABLE_LOOKUP_FAILED, EC_ZEBRA_TABLE_LOOKUP_FAILED,
"Zebra failed to find the nexthop hash entry for id=%u in a route entry", "Zebra failed to find the nexthop hash entry for id=%u in a route entry",
re->nhe_id); re_nhe->id);
XFREE(MTYPE_RE, re);
return -1; return -1;
} }
} else { } else {
nhe = zebra_nhg_rib_find(0, ng, afi); nhe = zebra_nhg_rib_find(0, ng, afi);
/*
* The nexthops got copied over into an nhe,
* so free them now.
*/
nexthop_group_delete(&ng);
if (!nhe) { if (!nhe) {
char buf[PREFIX_STRLEN] = ""; char buf[PREFIX_STRLEN] = "";
char buf2[PREFIX_STRLEN] = ""; char buf2[PREFIX_STRLEN] = "";
@ -2631,7 +2630,6 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
src_p ? prefix2str(src_p, buf2, sizeof(buf2)) src_p ? prefix2str(src_p, buf2, sizeof(buf2))
: ""); : "");
XFREE(MTYPE_RE, re);
return -1; return -1;
} }
} }
@ -2709,15 +2707,50 @@ int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
ret = 1; ret = 1;
/* Free implicit route.*/ /* Free implicit route.*/
if (same) { if (same)
rib_delnode(rn, same); rib_delnode(rn, same);
ret = -1;
}
route_unlock_node(rn); route_unlock_node(rn);
return ret; return ret;
} }
/*
* Add a single route.
*/
int rib_add_multipath(afi_t afi, safi_t safi, struct prefix *p,
struct prefix_ipv6 *src_p, struct route_entry *re,
struct nexthop_group *ng)
{
int ret;
struct nhg_hash_entry nhe = {};
if (!re)
return -1;
/* We either need nexthop(s) or an existing nexthop id */
if (ng == NULL && re->nhe_id == 0)
return -1;
/*
* Use a temporary nhe to convey info to the common/main api.
*/
if (ng)
nhe.nhg.nexthop = ng->nexthop;
else if (re->nhe_id > 0)
nhe.id = re->nhe_id;
ret = rib_add_multipath_nhe(afi, safi, p, src_p, re, &nhe);
/* In this path, the callers expect memory to be freed. */
nexthop_group_delete(&ng);
/* In error cases, free the route also */
if (ret < 0)
XFREE(MTYPE_RE, re);
return ret;
}
void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type,
unsigned short instance, int flags, struct prefix *p, unsigned short instance, int flags, struct prefix *p,
struct prefix_ipv6 *src_p, const struct nexthop *nh, struct prefix_ipv6 *src_p, const struct nexthop *nh,