diff --git a/doc/user/pbr.rst b/doc/user/pbr.rst index 99ef258cb2..c869c6bc45 100644 --- a/doc/user/pbr.rst +++ b/doc/user/pbr.rst @@ -39,7 +39,7 @@ listing of ECMP nexthops used to forward packets for when a pbr-map is matched. sub-mode where you can specify individual nexthops. To exit this mode type exit or end as per normal conventions for leaving a sub-mode. -.. clicmd:: nexthop [A.B.C.D|X:X::X:XX] [interface] [nexthop-vrf NAME] [label LABELS] +.. clicmd:: nexthop [A.B.C.D|X:X::X:XX] [interface [onlink]] [nexthop-vrf NAME] [label LABELS] Create a v4 or v6 nexthop. All normal rules for creating nexthops that you are used to are allowed here. The syntax was intentionally kept the same as diff --git a/lib/hash.h b/lib/hash.h index e7ba3187f5..00953ff3b3 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -236,7 +236,8 @@ extern void *hash_release(struct hash *hash, void *data); * Iterate over the elements in a hash table. * * It is safe to delete items passed to the iteration function from the hash - * table during iteration. Please note that adding entries to the hash + * table during iteration. More than one item cannot be deleted during each + * iteration. Please note that adding entries to the hash * during the walk will cause undefined behavior in that some new entries * will be walked and some will not. So do not do this. * diff --git a/lib/log.c b/lib/log.c index 4054185019..b629658f75 100644 --- a/lib/log.c +++ b/lib/log.c @@ -452,7 +452,10 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY(ZEBRA_OPAQUE_MESSAGE), DESC_ENTRY(ZEBRA_OPAQUE_REGISTER), DESC_ENTRY(ZEBRA_OPAQUE_UNREGISTER), - DESC_ENTRY(ZEBRA_NEIGH_DISCOVER)}; + DESC_ENTRY(ZEBRA_NEIGH_DISCOVER), + DESC_ENTRY(ZEBRA_NHG_ADD), + DESC_ENTRY(ZEBRA_NHG_DEL), + DESC_ENTRY(ZEBRA_NHG_NOTIFY_OWNER)}; #undef DESC_ENTRY static const struct zebra_desc_table unknown = {0, "unknown", '?'}; diff --git a/lib/nexthop_group.c b/lib/nexthop_group.c index 687cac4062..83905abe43 100644 --- a/lib/nexthop_group.c +++ b/lib/nexthop_group.c @@ -41,6 +41,7 @@ struct nexthop_hold { char *nhvrf_name; union sockunion *addr; char *intf; + bool onlink; char *labels; uint32_t weight; char *backup_str; @@ -560,6 +561,10 @@ static int nhgl_cmp(struct nexthop_hold *nh1, struct nexthop_hold *nh2) if (ret) return ret; + ret = ((int)nh2->onlink) - ((int)nh1->onlink); + if (ret) + return ret; + return nhgc_cmp_helper(nh1->labels, nh2->labels); } @@ -673,8 +678,8 @@ DEFPY(no_nexthop_group_backup, no_nexthop_group_backup_cmd, static void nexthop_group_save_nhop(struct nexthop_group_cmd *nhgc, const char *nhvrf_name, const union sockunion *addr, - const char *intf, const char *labels, - const uint32_t weight, + const char *intf, bool onlink, + const char *labels, const uint32_t weight, const char *backup_str) { struct nexthop_hold *nh; @@ -690,6 +695,8 @@ static void nexthop_group_save_nhop(struct nexthop_group_cmd *nhgc, if (labels) nh->labels = XSTRDUP(MTYPE_TMP, labels); + nh->onlink = onlink; + nh->weight = weight; if (backup_str) @@ -738,9 +745,10 @@ static void nexthop_group_unsave_nhop(struct nexthop_group_cmd *nhgc, */ static bool nexthop_group_parse_nexthop(struct nexthop *nhop, const union sockunion *addr, - const char *intf, const char *name, - const char *labels, int *lbl_ret, - uint32_t weight, const char *backup_str) + const char *intf, bool onlink, + const char *name, const char *labels, + int *lbl_ret, uint32_t weight, + const char *backup_str) { int ret = 0; struct vrf *vrf; @@ -764,6 +772,9 @@ static bool nexthop_group_parse_nexthop(struct nexthop *nhop, return false; } + if (onlink) + SET_FLAG(nhop->flags, NEXTHOP_FLAG_ONLINK); + if (addr) { if (addr->sa.sa_family == AF_INET) { nhop->gate.ipv4.s_addr = addr->sin.sin_addr.s_addr; @@ -820,15 +831,15 @@ static bool nexthop_group_parse_nexthop(struct nexthop *nhop, static bool nexthop_group_parse_nhh(struct nexthop *nhop, const struct nexthop_hold *nhh) { - return (nexthop_group_parse_nexthop(nhop, nhh->addr, nhh->intf, - nhh->nhvrf_name, nhh->labels, NULL, - nhh->weight, nhh->backup_str)); + return (nexthop_group_parse_nexthop( + nhop, nhh->addr, nhh->intf, nhh->onlink, nhh->nhvrf_name, + nhh->labels, NULL, nhh->weight, nhh->backup_str)); } DEFPY(ecmp_nexthops, ecmp_nexthops_cmd, "[no] nexthop\ <\ - $addr [INTERFACE$intf]\ + $addr [INTERFACE$intf [onlink$onlink]]\ |INTERFACE$intf\ >\ [{ \ @@ -842,6 +853,7 @@ DEFPY(ecmp_nexthops, ecmp_nexthops_cmd, "v4 Address\n" "v6 Address\n" "Interface to use\n" + "Treat nexthop as directly attached to the interface\n" "Interface to use\n" "If the nexthop is in a different vrf tell us\n" "The nexthop-vrf Name\n" @@ -870,8 +882,9 @@ DEFPY(ecmp_nexthops, ecmp_nexthops_cmd, } } - legal = nexthop_group_parse_nexthop(&nhop, addr, intf, vrf_name, label, - &lbl_ret, weight, backup_idx); + legal = nexthop_group_parse_nexthop(&nhop, addr, intf, !!onlink, + vrf_name, label, &lbl_ret, weight, + backup_idx); if (nhop.type == NEXTHOP_TYPE_IPV6 && IN6_IS_ADDR_LINKLOCAL(&nhop.gate.ipv6)) { @@ -933,8 +946,8 @@ DEFPY(ecmp_nexthops, ecmp_nexthops_cmd, } /* Save config always */ - nexthop_group_save_nhop(nhgc, vrf_name, addr, intf, label, - weight, backup_idx); + nexthop_group_save_nhop(nhgc, vrf_name, addr, intf, !!onlink, + label, weight, backup_idx); if (legal && nhg_hooks.add_nexthop) nhg_hooks.add_nexthop(nhgc, nh); @@ -1106,6 +1119,9 @@ static void nexthop_group_write_nexthop_internal(struct vty *vty, if (nh->intf) vty_out(vty, " %s", nh->intf); + if (nh->onlink) + vty_out(vty, " onlink"); + if (nh->nhvrf_name) vty_out(vty, " nexthop-vrf %s", nh->nhvrf_name); diff --git a/lib/route_types.txt b/lib/route_types.txt index b549c11cfc..37cc2fb590 100644 --- a/lib/route_types.txt +++ b/lib/route_types.txt @@ -84,7 +84,7 @@ ZEBRA_ROUTE_PBR, pbr, pbrd, 'F', 1, 1, 0, "PBR" ZEBRA_ROUTE_BFD, bfd, bfdd, '-', 0, 0, 0, "BFD" ZEBRA_ROUTE_OPENFABRIC, openfabric, fabricd, 'f', 1, 1, 1, "OpenFabric" ZEBRA_ROUTE_VRRP, vrrp, vrrpd, '-', 0, 0, 0, "VRRP" -ZEBRA_ROUTE_NHG, nhg, none, '-', 0, 0, 0, "Nexthop Group" +ZEBRA_ROUTE_NHG, zebra, none, '-', 0, 0, 0, "Nexthop Group" ZEBRA_ROUTE_SRTE, srte, none, '-', 0, 0, 0, "SR-TE" ZEBRA_ROUTE_ALL, wildcard, none, '-', 0, 0, 0, "-" diff --git a/lib/zclient.c b/lib/zclient.c index c5016d22e2..b7d240b4e8 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -1017,6 +1017,57 @@ done: return ret; } +int zapi_nhg_encode(struct stream *s, int cmd, struct zapi_nhg *api_nhg) +{ + int i; + + if (cmd != ZEBRA_NHG_DEL && cmd != ZEBRA_NHG_ADD) { + flog_err(EC_LIB_ZAPI_ENCODE, + "%s: Specified zapi NHG command (%d) doesn't exist\n", + __func__, cmd); + return -1; + } + + stream_reset(s); + zclient_create_header(s, cmd, VRF_DEFAULT); + + stream_putw(s, api_nhg->proto); + stream_putl(s, api_nhg->id); + + if (cmd == ZEBRA_NHG_ADD) { + /* Nexthops */ + zapi_nexthop_group_sort(api_nhg->nexthops, + api_nhg->nexthop_num); + + stream_putw(s, api_nhg->nexthop_num); + + for (i = 0; i < api_nhg->nexthop_num; i++) + zapi_nexthop_encode(s, &api_nhg->nexthops[i], 0, 0); + + /* Backup nexthops */ + + stream_putw(s, api_nhg->backup_nexthop_num); + + for (i = 0; i < api_nhg->backup_nexthop_num; i++) + zapi_nexthop_encode(s, &api_nhg->backup_nexthops[i], 0, + 0); + } + + stream_putw_at(s, 0, stream_get_endp(s)); + + return 0; +} + +int zclient_nhg_send(struct zclient *zclient, int cmd, struct zapi_nhg *api_nhg) +{ + api_nhg->proto = zclient->redist_default; + + if (zapi_nhg_encode(zclient->obuf, cmd, api_nhg)) + return -1; + + return zclient_send_message(zclient); +} + int zapi_route_encode(uint8_t cmd, struct stream *s, struct zapi_route *api) { struct zapi_nexthop *api_nh; @@ -1058,6 +1109,9 @@ int zapi_route_encode(uint8_t cmd, struct stream *s, struct zapi_route *api) stream_write(s, (uint8_t *)&api->src_prefix.prefix, psize); } + if (CHECK_FLAG(api->message, ZAPI_MESSAGE_NHG)) + stream_putl(s, api->nhgid); + /* Nexthops. */ if (CHECK_FLAG(api->message, ZAPI_MESSAGE_NEXTHOP)) { /* limit the number of nexthops if necessary */ @@ -1171,8 +1225,8 @@ int zapi_route_encode(uint8_t cmd, struct stream *s, struct zapi_route *api) /* * Decode a single zapi nexthop object */ -static int zapi_nexthop_decode(struct stream *s, struct zapi_nexthop *api_nh, - uint32_t api_flags, uint32_t api_message) +int zapi_nexthop_decode(struct stream *s, struct zapi_nexthop *api_nh, + uint32_t api_flags, uint32_t api_message) { int i, ret = -1; @@ -1328,6 +1382,9 @@ int zapi_route_decode(struct stream *s, struct zapi_route *api) } } + if (CHECK_FLAG(api->message, ZAPI_MESSAGE_NHG)) + STREAM_GETL(s, api->nhgid); + /* Nexthops. */ if (CHECK_FLAG(api->message, ZAPI_MESSAGE_NEXTHOP)) { STREAM_GETW(s, api->nexthop_num); @@ -1432,6 +1489,22 @@ int zapi_pbr_rule_encode(uint8_t cmd, struct stream *s, struct pbr_rule *zrule) return 0; } +bool zapi_nhg_notify_decode(struct stream *s, uint32_t *id, + enum zapi_nhg_notify_owner *note) +{ + uint32_t read_id; + + STREAM_GET(note, s, sizeof(*note)); + STREAM_GETL(s, read_id); + + *id = read_id; + + return true; + +stream_failure: + return false; +} + bool zapi_route_notify_decode(struct stream *s, struct prefix *p, uint32_t *tableid, enum zapi_route_notify_owner *note) @@ -1582,6 +1655,9 @@ int zapi_nexthop_from_nexthop(struct zapi_nexthop *znh, znh->ifindex = nh->ifindex; znh->gate = nh->gate; + if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK)) + SET_FLAG(znh->flags, ZAPI_NEXTHOP_FLAG_ONLINK); + if (nh->nh_label && (nh->nh_label->num_labels > 0)) { /* Validate */ @@ -3733,6 +3809,11 @@ static int zclient_read(struct thread *thread) (*zclient->rule_notify_owner)(command, zclient, length, vrf_id); break; + case ZEBRA_NHG_NOTIFY_OWNER: + if (zclient->nhg_notify_owner) + (*zclient->nhg_notify_owner)(command, zclient, length, + vrf_id); + break; case ZEBRA_GET_LABEL_CHUNK: if (zclient->label_chunk) (*zclient->label_chunk)(command, zclient, length, @@ -4001,3 +4082,13 @@ int zclient_send_neigh_discovery_req(struct zclient *zclient, stream_putw_at(s, 0, stream_get_endp(s)); return zclient_send_message(zclient); } + +/* + * Get a starting nhg point for a routing protocol + */ +uint32_t zclient_get_nhg_start(uint32_t proto) +{ + assert(proto < ZEBRA_ROUTE_MAX); + + return ZEBRA_NHG_PROTO_SPACING * proto; +} diff --git a/lib/zclient.h b/lib/zclient.h index 050877f27a..959a101395 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -209,6 +209,9 @@ typedef enum { ZEBRA_MLAG_CLIENT_REGISTER, ZEBRA_MLAG_CLIENT_UNREGISTER, ZEBRA_MLAG_FORWARD_MSG, + ZEBRA_NHG_ADD, + ZEBRA_NHG_DEL, + ZEBRA_NHG_NOTIFY_OWNER, ZEBRA_ERROR, ZEBRA_CLIENT_CAPABILITIES, ZEBRA_OPAQUE_MESSAGE, @@ -354,6 +357,7 @@ struct zclient { int (*mlag_process_up)(void); int (*mlag_process_down)(void); int (*mlag_handle_msg)(struct stream *msg, int len); + int (*nhg_notify_owner)(ZAPI_CALLBACK_ARGS); int (*handle_error)(enum zebra_error_types error); int (*opaque_msg_handler)(ZAPI_CALLBACK_ARGS); int (*opaque_register_handler)(ZAPI_CALLBACK_ARGS); @@ -370,6 +374,7 @@ struct zclient { #define ZAPI_MESSAGE_SRCPFX 0x20 /* Backup nexthops are present */ #define ZAPI_MESSAGE_BACKUP_NEXTHOPS 0x40 +#define ZAPI_MESSAGE_NHG 0x80 /* * This should only be used by a DAEMON that needs to communicate @@ -433,6 +438,20 @@ struct zapi_nexthop { #define ZAPI_NEXTHOP_FLAG_WEIGHT 0x04 #define ZAPI_NEXTHOP_FLAG_HAS_BACKUP 0x08 /* Nexthop has a backup */ +/* + * ZAPI Nexthop Group. For use with protocol creation of nexthop groups. + */ +struct zapi_nhg { + uint16_t proto; + uint32_t id; + + uint16_t nexthop_num; + struct zapi_nexthop nexthops[MULTIPATH_NUM]; + + uint16_t backup_nexthop_num; + struct zapi_nexthop backup_nexthops[MULTIPATH_NUM]; +}; + /* * Some of these data structures do not map easily to * a actual data structure size giving different compilers @@ -514,6 +533,8 @@ struct zapi_route { uint16_t backup_nexthop_num; struct zapi_nexthop backup_nexthops[MULTIPATH_NUM]; + uint32_t nhgid; + uint8_t distance; uint32_t metric; @@ -592,6 +613,13 @@ enum zapi_route_notify_owner { ZAPI_ROUTE_REMOVE_FAIL, }; +enum zapi_nhg_notify_owner { + ZAPI_NHG_FAIL_INSTALL, + ZAPI_NHG_INSTALLED, + ZAPI_NHG_REMOVED, + ZAPI_NHG_REMOVE_FAIL, +}; + enum zapi_rule_notify_owner { ZAPI_RULE_FAIL_INSTALL, ZAPI_RULE_INSTALLED, @@ -671,6 +699,22 @@ struct zclient_options { extern struct zclient_options zclient_options_default; +/* + * We reserve the top 4 bits for l2-NHG, everything else + * is for zebra/proto l3-NHG. + * + * Each client is going to get it's own nexthop group space + * and we'll separate them, we'll figure out where to start based upon + * the route_types.h + */ +#define ZEBRA_NHG_PROTO_UPPER \ + ((uint32_t)250000000) /* Bottom 28 bits then rounded down */ +#define ZEBRA_NHG_PROTO_SPACING (ZEBRA_NHG_PROTO_UPPER / ZEBRA_ROUTE_MAX) +#define ZEBRA_NHG_PROTO_LOWER \ + (ZEBRA_NHG_PROTO_SPACING * (ZEBRA_ROUTE_CONNECT + 1)) + +extern uint32_t zclient_get_nhg_start(uint32_t proto); + extern struct zclient *zclient_new(struct thread_master *m, struct zclient_options *opt); @@ -853,7 +897,11 @@ extern int zclient_send_rnh(struct zclient *zclient, int command, int zapi_nexthop_encode(struct stream *s, const struct zapi_nexthop *api_nh, uint32_t api_flags, uint32_t api_message); extern int zapi_route_encode(uint8_t, struct stream *, struct zapi_route *); -extern int zapi_route_decode(struct stream *, struct zapi_route *); +extern int zapi_route_decode(struct stream *s, struct zapi_route *api); +extern int zapi_nexthop_decode(struct stream *s, struct zapi_nexthop *api_nh, + uint32_t api_flags, uint32_t api_message); +bool zapi_nhg_notify_decode(struct stream *s, uint32_t *id, + enum zapi_nhg_notify_owner *note); bool zapi_route_notify_decode(struct stream *s, struct prefix *p, uint32_t *tableid, enum zapi_route_notify_owner *note); @@ -864,6 +912,12 @@ bool zapi_ipset_notify_decode(struct stream *s, uint32_t *unique, enum zapi_ipset_notify_owner *note); + +extern int zapi_nhg_encode(struct stream *s, int cmd, struct zapi_nhg *api_nhg); +extern int zapi_nhg_decode(struct stream *s, int cmd, struct zapi_nhg *api_nhg); +extern int zclient_nhg_send(struct zclient *zclient, int cmd, + struct zapi_nhg *api_nhg); + #define ZEBRA_IPSET_NAME_SIZE 32 bool zapi_ipset_entry_notify_decode(struct stream *s, diff --git a/sharpd/sharp_globals.h b/sharpd/sharp_globals.h index 8eba57f4dd..0bd47454a9 100644 --- a/sharpd/sharp_globals.h +++ b/sharpd/sharp_globals.h @@ -31,6 +31,7 @@ struct sharp_routes { /* The nexthop info we are using for installation */ struct nexthop nhop; struct nexthop backup_nhop; + uint32_t nhgid; struct nexthop_group nhop_group; struct nexthop_group backup_nhop_group; diff --git a/sharpd/sharp_main.c b/sharpd/sharp_main.c index ccf34b10dd..4cd92c7f3d 100644 --- a/sharpd/sharp_main.c +++ b/sharpd/sharp_main.c @@ -47,6 +47,7 @@ #include "sharp_zebra.h" #include "sharp_vty.h" #include "sharp_globals.h" +#include "sharp_nht.h" DEFINE_MGROUP(SHARPD, "sharpd") @@ -164,7 +165,7 @@ int main(int argc, char **argv, char **envp) sharp_global_init(); - nexthop_group_init(NULL, NULL, NULL, NULL); + sharp_nhgroup_init(); vrf_init(NULL, NULL, NULL, NULL, NULL); sharp_zebra_init(); diff --git a/sharpd/sharp_nht.c b/sharpd/sharp_nht.c index 174f186863..7484dd3b06 100644 --- a/sharpd/sharp_nht.c +++ b/sharpd/sharp_nht.c @@ -25,11 +25,15 @@ #include "nexthop.h" #include "nexthop_group.h" #include "vty.h" +#include "typesafe.h" +#include "zclient.h" #include "sharp_nht.h" #include "sharp_globals.h" +#include "sharp_zebra.h" DEFINE_MTYPE_STATIC(SHARPD, NH_TRACKER, "Nexthop Tracker") +DEFINE_MTYPE_STATIC(SHARPD, NHG, "Nexthop Group") struct sharp_nh_tracker *sharp_nh_tracker_get(struct prefix *p) { @@ -65,3 +69,157 @@ void sharp_nh_tracker_dump(struct vty *vty) nht->updates); } } + +PREDECL_RBTREE_UNIQ(sharp_nhg_rb); + +struct sharp_nhg { + struct sharp_nhg_rb_item mylistitem; + + uint32_t id; + + char name[256]; + + bool installed; +}; + +static uint32_t nhg_id; + +static uint32_t sharp_get_next_nhid(void) +{ + zlog_debug("NHG ID assigned: %u", nhg_id); + return nhg_id++; +} + +struct sharp_nhg_rb_head nhg_head; + +static int sharp_nhg_compare_func(const struct sharp_nhg *a, + const struct sharp_nhg *b) +{ + return strncmp(a->name, b->name, strlen(a->name)); +} + +DECLARE_RBTREE_UNIQ(sharp_nhg_rb, struct sharp_nhg, mylistitem, + sharp_nhg_compare_func); + +static struct sharp_nhg *sharp_nhgroup_find_id(uint32_t id) +{ + struct sharp_nhg *lookup; + + /* Yea its just a for loop, I don't want add complexity + * to sharpd with another RB tree for just IDs + */ + + frr_each (sharp_nhg_rb, &nhg_head, lookup) { + if (lookup->id == id) + return lookup; + } + + return NULL; +} + +static void sharp_nhgroup_add_cb(const char *name) +{ + struct sharp_nhg *snhg; + + snhg = XCALLOC(MTYPE_NHG, sizeof(*snhg)); + snhg->id = sharp_get_next_nhid(); + strlcpy(snhg->name, name, sizeof(snhg->name)); + + sharp_nhg_rb_add(&nhg_head, snhg); +} + +static void sharp_nhgroup_add_nexthop_cb(const struct nexthop_group_cmd *nhgc, + const struct nexthop *nhop) +{ + struct sharp_nhg lookup; + struct sharp_nhg *snhg; + struct nexthop_group_cmd *bnhgc = NULL; + + strlcpy(lookup.name, nhgc->name, sizeof(lookup.name)); + snhg = sharp_nhg_rb_find(&nhg_head, &lookup); + + if (nhgc->backup_list_name[0]) + bnhgc = nhgc_find(nhgc->backup_list_name); + + nhg_add(snhg->id, &nhgc->nhg, (bnhgc ? &bnhgc->nhg : NULL)); +} + +static void sharp_nhgroup_del_nexthop_cb(const struct nexthop_group_cmd *nhgc, + const struct nexthop *nhop) +{ + struct sharp_nhg lookup; + struct sharp_nhg *snhg; + struct nexthop_group_cmd *bnhgc = NULL; + + strlcpy(lookup.name, nhgc->name, sizeof(lookup.name)); + snhg = sharp_nhg_rb_find(&nhg_head, &lookup); + + if (nhgc->backup_list_name[0]) + bnhgc = nhgc_find(nhgc->backup_list_name); + + nhg_add(snhg->id, &nhgc->nhg, (bnhgc ? &bnhgc->nhg : NULL)); +} + +static void sharp_nhgroup_delete_cb(const char *name) +{ + struct sharp_nhg lookup; + struct sharp_nhg *snhg; + + strlcpy(lookup.name, name, sizeof(lookup.name)); + snhg = sharp_nhg_rb_find(&nhg_head, &lookup); + if (!snhg) + return; + + nhg_del(snhg->id); + sharp_nhg_rb_del(&nhg_head, snhg); + XFREE(MTYPE_NHG, snhg); +} + +uint32_t sharp_nhgroup_get_id(const char *name) +{ + struct sharp_nhg lookup; + struct sharp_nhg *snhg; + + strlcpy(lookup.name, name, sizeof(lookup.name)); + snhg = sharp_nhg_rb_find(&nhg_head, &lookup); + if (!snhg) + return 0; + + return snhg->id; +} + +void sharp_nhgroup_id_set_installed(uint32_t id, bool installed) +{ + struct sharp_nhg *snhg; + + snhg = sharp_nhgroup_find_id(id); + if (!snhg) { + zlog_debug("%s: nhg %u not found", __func__, id); + return; + } + + snhg->installed = installed; +} + +bool sharp_nhgroup_id_is_installed(uint32_t id) +{ + struct sharp_nhg *snhg; + + snhg = sharp_nhgroup_find_id(id); + if (!snhg) { + zlog_debug("%s: nhg %u not found", __func__, id); + return false; + } + + return snhg->installed; +} + +void sharp_nhgroup_init(void) +{ + sharp_nhg_rb_init(&nhg_head); + nhg_id = zclient_get_nhg_start(ZEBRA_ROUTE_SHARP); + + nexthop_group_init(sharp_nhgroup_add_cb, sharp_nhgroup_add_nexthop_cb, + sharp_nhgroup_del_nexthop_cb, + sharp_nhgroup_delete_cb); +} diff --git a/sharpd/sharp_nht.h b/sharpd/sharp_nht.h index 0b00774a81..da33502878 100644 --- a/sharpd/sharp_nht.h +++ b/sharpd/sharp_nht.h @@ -35,4 +35,10 @@ struct sharp_nh_tracker { extern struct sharp_nh_tracker *sharp_nh_tracker_get(struct prefix *p); extern void sharp_nh_tracker_dump(struct vty *vty); + +extern uint32_t sharp_nhgroup_get_id(const char *name); +extern void sharp_nhgroup_id_set_installed(uint32_t id, bool installed); +extern bool sharp_nhgroup_id_is_installed(uint32_t id); + +extern void sharp_nhgroup_init(void); #endif diff --git a/sharpd/sharp_vty.c b/sharpd/sharp_vty.c index d390ea8192..d062f027ab 100644 --- a/sharpd/sharp_vty.c +++ b/sharpd/sharp_vty.c @@ -192,6 +192,7 @@ DEFPY (install_routes, struct vrf *vrf; struct prefix prefix; uint32_t rts; + uint32_t nhgid = 0; sg.r.total_routes = routes; sg.r.installed_routes = 0; @@ -244,6 +245,8 @@ DEFPY (install_routes, return CMD_WARNING; } + nhgid = sharp_nhgroup_get_id(nexthop_group); + sg.r.nhgid = nhgid; sg.r.nhop_group.nexthop = nhgc->nhg.nexthop; /* Use group's backup nexthop info if present */ @@ -296,7 +299,7 @@ DEFPY (install_routes, sg.r.inst = instance; sg.r.vrf_id = vrf->vrf_id; rts = routes; - sharp_install_routes_helper(&prefix, sg.r.vrf_id, sg.r.inst, + sharp_install_routes_helper(&prefix, sg.r.vrf_id, sg.r.inst, nhgid, &sg.r.nhop_group, &sg.r.backup_nhop_group, rts); diff --git a/sharpd/sharp_zebra.c b/sharpd/sharp_zebra.c index 08f5a07b7e..d167e8e277 100644 --- a/sharpd/sharp_zebra.c +++ b/sharpd/sharp_zebra.c @@ -217,7 +217,7 @@ int sharp_install_lsps_helper(bool install_p, bool update_p, } void sharp_install_routes_helper(struct prefix *p, vrf_id_t vrf_id, - uint8_t instance, + uint8_t instance, uint32_t nhgid, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg, uint32_t routes) @@ -239,7 +239,7 @@ void sharp_install_routes_helper(struct prefix *p, vrf_id_t vrf_id, monotime(&sg.r.t_start); for (i = 0; i < routes; i++) { - route_add(p, vrf_id, (uint8_t)instance, nhg, backup_nhg); + route_add(p, vrf_id, (uint8_t)instance, nhgid, nhg, backup_nhg); if (v4) p->u.prefix4.s_addr = htonl(++temp); else @@ -288,7 +288,7 @@ static void handle_repeated(bool installed) if (!installed) { sg.r.installed_routes = 0; sharp_install_routes_helper(&p, sg.r.vrf_id, sg.r.inst, - &sg.r.nhop_group, + sg.r.nhgid, &sg.r.nhop_group, &sg.r.backup_nhop_group, sg.r.total_routes); } @@ -357,8 +357,58 @@ void vrf_label_add(vrf_id_t vrf_id, afi_t afi, mpls_label_t label) zclient_send_vrf_label(zclient, vrf_id, afi, label, ZEBRA_LSP_SHARP); } -void route_add(const struct prefix *p, vrf_id_t vrf_id, - uint8_t instance, const struct nexthop_group *nhg, +void nhg_add(uint32_t id, const struct nexthop_group *nhg, + const struct nexthop_group *backup_nhg) +{ + struct zapi_nhg api_nhg = {}; + struct zapi_nexthop *api_nh; + struct nexthop *nh; + + api_nhg.id = id; + for (ALL_NEXTHOPS_PTR(nhg, nh)) { + if (api_nhg.nexthop_num >= MULTIPATH_NUM) { + zlog_warn( + "%s: number of nexthops greater than max multipath size, truncating", + __func__); + break; + } + + api_nh = &api_nhg.nexthops[api_nhg.nexthop_num]; + + zapi_nexthop_from_nexthop(api_nh, nh); + api_nhg.nexthop_num++; + } + + if (backup_nhg) { + for (ALL_NEXTHOPS_PTR(backup_nhg, nh)) { + if (api_nhg.backup_nexthop_num >= MULTIPATH_NUM) { + zlog_warn( + "%s: number of backup nexthops greater than max multipath size, truncating", + __func__); + break; + } + api_nh = &api_nhg.backup_nexthops + [api_nhg.backup_nexthop_num]; + + zapi_backup_nexthop_from_nexthop(api_nh, nh); + api_nhg.backup_nexthop_num++; + } + } + + zclient_nhg_send(zclient, ZEBRA_NHG_ADD, &api_nhg); +} + +void nhg_del(uint32_t id) +{ + struct zapi_nhg api_nhg = {}; + + api_nhg.id = id; + + zclient_nhg_send(zclient, ZEBRA_NHG_DEL, &api_nhg); +} + +void route_add(const struct prefix *p, vrf_id_t vrf_id, uint8_t instance, + uint32_t nhgid, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg) { struct zapi_route api; @@ -376,14 +426,20 @@ void route_add(const struct prefix *p, vrf_id_t vrf_id, SET_FLAG(api.flags, ZEBRA_FLAG_ALLOW_RECURSION); SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); - for (ALL_NEXTHOPS_PTR(nhg, nh)) { - api_nh = &api.nexthops[i]; + /* Only send via ID if nhgroup has been successfully installed */ + if (nhgid && sharp_nhgroup_id_is_installed(nhgid)) { + SET_FLAG(api.message, ZAPI_MESSAGE_NHG); + api.nhgid = nhgid; + } else { + for (ALL_NEXTHOPS_PTR(nhg, nh)) { + api_nh = &api.nexthops[i]; - zapi_nexthop_from_nexthop(api_nh, nh); + zapi_nexthop_from_nexthop(api_nh, nh); - i++; + i++; + } + api.nexthop_num = i; } - api.nexthop_num = i; /* Include backup nexthops, if present */ if (backup_nhg && backup_nhg->nexthop) { @@ -668,6 +724,33 @@ void sharp_zebra_send_arp(const struct interface *ifp, const struct prefix *p) zclient_send_neigh_discovery_req(zclient, ifp, p); } +static int nhg_notify_owner(ZAPI_CALLBACK_ARGS) +{ + enum zapi_nhg_notify_owner note; + uint32_t id; + + if (!zapi_nhg_notify_decode(zclient->ibuf, &id, ¬e)) + return -1; + + switch (note) { + case ZAPI_NHG_INSTALLED: + sharp_nhgroup_id_set_installed(id, true); + zlog_debug("Installed nhg %u", id); + break; + case ZAPI_NHG_FAIL_INSTALL: + zlog_debug("Failed install of nhg %u", id); + break; + case ZAPI_NHG_REMOVED: + zlog_debug("Removed nhg %u", id); + break; + case ZAPI_NHG_REMOVE_FAIL: + zlog_debug("Failed removal of nhg %u", id); + break; + } + + return 0; +} + void sharp_zebra_init(void) { struct zclient_options opt = {.receive_notify = true}; @@ -684,6 +767,7 @@ void sharp_zebra_init(void) zclient->route_notify_owner = route_notify_owner; zclient->nexthop_update = sharp_nexthop_update; zclient->import_check_update = sharp_nexthop_update; + zclient->nhg_notify_owner = nhg_notify_owner; zclient->redistribute_route_add = sharp_redistribute_route; zclient->redistribute_route_del = sharp_redistribute_route; diff --git a/sharpd/sharp_zebra.h b/sharpd/sharp_zebra.h index 0a44fa694f..4a767ababf 100644 --- a/sharpd/sharp_zebra.h +++ b/sharpd/sharp_zebra.h @@ -29,15 +29,18 @@ int sharp_zclient_create(uint32_t session_id); int sharp_zclient_delete(uint32_t session_id); extern void vrf_label_add(vrf_id_t vrf_id, afi_t afi, mpls_label_t label); +extern void nhg_add(uint32_t id, const struct nexthop_group *nhg, + const struct nexthop_group *backup_nhg); +extern void nhg_del(uint32_t id); extern void route_add(const struct prefix *p, vrf_id_t, uint8_t instance, - const struct nexthop_group *nhg, + uint32_t nhgid, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg); extern void route_delete(struct prefix *p, vrf_id_t vrf_id, uint8_t instance); extern void sharp_zebra_nexthop_watch(struct prefix *p, vrf_id_t vrf_id, bool import, bool watch, bool connected); extern void sharp_install_routes_helper(struct prefix *p, vrf_id_t vrf_id, - uint8_t instance, + uint8_t instance, uint32_t nhgid, const struct nexthop_group *nhg, const struct nexthop_group *backup_nhg, uint32_t routes); diff --git a/tests/topotests/all-protocol-startup/test_all_protocol_startup.py b/tests/topotests/all-protocol-startup/test_all_protocol_startup.py index 2a00398e28..f2af5fdc5c 100755 --- a/tests/topotests/all-protocol-startup/test_all_protocol_startup.py +++ b/tests/topotests/all-protocol-startup/test_all_protocol_startup.py @@ -374,26 +374,36 @@ def route_get_nhg_id(route_str): nhg_id = int(match.group(1)) return nhg_id -def verify_nexthop_group(nhg_id, recursive=False): +def verify_nexthop_group(nhg_id, recursive=False, ecmp=0): # Verify NHG is valid/installed output = net["r1"].cmd('vtysh -c "show nexthop-group rib %d"' % nhg_id) match = re.search(r"Valid", output) assert match is not None, "Nexthop Group ID=%d not marked Valid" % nhg_id - # If recursive, we need to look at its resolved group - if recursive: - match = re.search(r"Depends: \((\d+)\)", output) - resolved_id = int(match.group(1)) - verify_nexthop_group(resolved_id, False) + if ecmp or recursive: + match = re.search(r"Depends:.*\n", output) + assert match is not None, "Nexthop Group ID=%d has no depends" % nhg_id + + # list of IDs in group + depends = re.findall(r"\((\d+)\)", match.group(0)) + + if ecmp: + assert (len(depends) == ecmp), "Nexthop Group ID=%d doesn't match ecmp size" % nhg_id + else: + # If recursive, we need to look at its resolved group + assert (len(depends) == 1), "Nexthop Group ID=%d should only have one recursive depend" % nhg_id + resolved_id = int(depends[0]) + verify_nexthop_group(resolved_id, False) + else: match = re.search(r"Installed", output) assert match is not None, "Nexthop Group ID=%d not marked Installed" % nhg_id -def verify_route_nexthop_group(route_str, recursive=False): +def verify_route_nexthop_group(route_str, recursive=False, ecmp=0): # Verify route and that zebra created NHGs for and they are valid/installed nhg_id = route_get_nhg_id(route_str) - verify_nexthop_group(nhg_id, recursive) + verify_nexthop_group(nhg_id, recursive, ecmp) def test_nexthop_groups(): global fatal_error @@ -1101,6 +1111,34 @@ def test_nexthop_groups_with_route_maps(): net["r1"].cmd('vtysh -c "c t" -c "no route-map NOPE"') net["r1"].cmd('vtysh -c "c t" -c "no ip prefix-list NOPE seq 5 permit %s/32"' % permit_route_str) +def test_nexthop_group_replace(): + global fatal_error + global net + + # Skip if previous fatal error condition is raised + if (fatal_error != ""): + pytest.skip(fatal_error) + + print("\n\n** Verifying Nexthop Groups") + print("******************************************\n") + + ### Nexthop Group Tests + + ## 2-Way ECMP Directly Connected + + net["r1"].cmd('vtysh -c "c t" -c "nexthop-group replace" -c "nexthop 1.1.1.1 r1-eth1 onlink" -c "nexthop 1.1.1.2 r1-eth2 onlink"') + + # Create with sharpd using nexthop-group + net["r1"].cmd('vtysh -c "sharp install routes 3.3.3.1 nexthop-group replace 1"') + + verify_route_nexthop_group("3.3.3.1/32") + + # Change the nexthop group + net["r1"].cmd('vtysh -c "c t" -c "nexthop-group replace" -c "no nexthop 1.1.1.1 r1-eth1 onlink" -c "nexthop 1.1.1.3 r1-eth1 onlink" -c "nexthop 1.1.1.4 r1-eth4 onlink"') + + # Verify it updated. We can just check install and ecmp count here. + verify_route_nexthop_group("3.3.3.1/32", False, 3) + def test_mpls_interfaces(): global fatal_error global net diff --git a/zebra/rib.h b/zebra/rib.h index be680a112f..9ddc35b091 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -333,6 +333,10 @@ extern void route_entry_copy_nexthops(struct route_entry *re, int route_entry_update_nhe(struct route_entry *re, struct nhg_hash_entry *new_nhghe); +/* NHG replace has happend, we have to update route_entry pointers to new one */ +void rib_handle_nhg_replace(struct nhg_hash_entry *old, + struct nhg_hash_entry *new); + #define route_entry_dump(prefix, src, re) _route_entry_dump(__func__, prefix, src, re) extern void _route_entry_dump(const char *func, union prefixconstptr pp, union prefixconstptr src_pp, diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 146650f602..1ce3c435fe 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -125,6 +125,31 @@ static bool kernel_nexthops_supported(void) && zebra_nhg_kernel_nexthops_enabled()); } +/* + * Some people may only want to use NHGs created by protos and not + * implicitly created by Zebra. This check accounts for that. + */ +static bool proto_nexthops_only(void) +{ + return zebra_nhg_proto_nexthops_only(); +} + +/* Is this a proto created NHG? */ +static bool is_proto_nhg(uint32_t id, int type) +{ + /* If type is available, use it as the source of truth */ + if (type) { + if (type != ZEBRA_ROUTE_NHG) + return true; + return false; + } + + if (id >= ZEBRA_NHG_PROTO_LOWER) + return true; + + return false; +} + /* * The ipv4_ll data structure is used for all 5549 * additions to the kernel. Let's figure out the @@ -1748,7 +1773,10 @@ ssize_t netlink_route_multipath_msg_encode(int cmd, nl_attr_nest_end(&req->n, nest); } - if ((!fpm && kernel_nexthops_supported()) || (fpm && force_nhg)) { + if ((!fpm && kernel_nexthops_supported() + && (!proto_nexthops_only() + || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0))) + || (fpm && force_nhg)) { /* Kernel supports nexthop objects */ if (IS_ZEBRA_DEBUG_KERNEL) zlog_debug("%s: %pFX nhg_id is %u", __func__, p, @@ -2072,6 +2100,35 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd, mpls_lse_t out_lse[MPLS_MAX_LABELS]; char label_buf[256]; int num_labels = 0; + uint32_t id = dplane_ctx_get_nhe_id(ctx); + int type = dplane_ctx_get_nhe_type(ctx); + + if (!id) { + flog_err( + EC_ZEBRA_NHG_FIB_UPDATE, + "Failed trying to update a nexthop group in the kernel that does not have an ID"); + return -1; + } + + /* + * Nothing to do if the kernel doesn't support nexthop objects or + * we dont want to install this type of NHG + */ + if (!kernel_nexthops_supported()) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } + + if (proto_nexthops_only() && !is_proto_nhg(id, type)) { + if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: nhg_id %u (%s): proto-based nexthops only, ignoring", + __func__, id, zebra_route_string(type)); + return 0; + } label_buf[0] = '\0'; @@ -2092,15 +2149,6 @@ ssize_t netlink_nexthop_msg_encode(uint16_t cmd, req->nhm.nh_family = AF_UNSPEC; /* TODO: Scope? */ - uint32_t id = dplane_ctx_get_nhe_id(ctx); - - if (!id) { - flog_err( - EC_ZEBRA_NHG_FIB_UPDATE, - "Failed trying to update a nexthop group in the kernel that does not have an ID"); - return -1; - } - if (!nl_attr_put32(&req->n, buflen, NHA_ID, id)) return 0; @@ -2221,8 +2269,7 @@ nexthop_done: nh->vrf_id, label_buf); } - req->nhm.nh_protocol = - zebra2proto(dplane_ctx_get_nhe_type(ctx)); +req->nhm.nh_protocol = zebra2proto(type); } else if (cmd != RTM_DELNEXTHOP) { flog_err( diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index e436e5a288..c33bca3d86 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -712,6 +712,34 @@ static int zsend_ipv4_nexthop_lookup_mrib(struct zserv *client, return zserv_send_message(client, s); } +static int nhg_notify(uint16_t type, uint16_t instance, uint32_t id, + enum zapi_nhg_notify_owner note) +{ + struct zserv *client; + struct stream *s; + + client = zserv_find_client(type, instance); + if (!client) { + if (IS_ZEBRA_DEBUG_PACKET) { + zlog_debug("Not Notifying Owner: %u(%u) about %u(%d)", + type, instance, id, note); + } + return 0; + } + + s = stream_new(ZEBRA_MAX_PACKET_SIZ); + stream_reset(s); + + zclient_create_header(s, ZEBRA_NHG_NOTIFY_OWNER, VRF_DEFAULT); + + stream_put(s, ¬e, sizeof(note)); + stream_putl(s, id); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zserv_send_message(client, s); +} + /* * Common utility send route notification, called from a path using a * route_entry and from a path using a dataplane context. @@ -1411,27 +1439,32 @@ stream_failure: return; } - -void zserv_nexthop_num_warn(const char *caller, const struct prefix *p, +bool zserv_nexthop_num_warn(const char *caller, const struct prefix *p, const unsigned int nexthop_num) { if (nexthop_num > zrouter.multipath_num) { char buff[PREFIX2STR_BUFFER]; - prefix2str(p, buff, sizeof(buff)); + if (p) + prefix2str(p, buff, sizeof(buff)); + flog_warn( EC_ZEBRA_MORE_NH_THAN_MULTIPATH, "%s: Prefix %s has %d nexthops, but we can only use the first %d", - caller, buff, nexthop_num, zrouter.multipath_num); + caller, (p ? buff : "(NULL)"), nexthop_num, + zrouter.multipath_num); + return true; } + + return false; } /* * Create a new nexthop based on a zapi nexthop. */ -static struct nexthop *nexthop_from_zapi(struct route_entry *re, - const struct zapi_nexthop *api_nh, - const struct zapi_route *api) +static struct nexthop *nexthop_from_zapi(const struct zapi_nexthop *api_nh, + uint32_t flags, struct prefix *p, + uint16_t backup_nexthop_num) { struct nexthop *nexthop = NULL; struct ipaddr vtep_ip; @@ -1469,14 +1502,13 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, /* Special handling for IPv4 routes sourced from EVPN: * the nexthop and associated MAC need to be installed. */ - if (CHECK_FLAG(api->flags, ZEBRA_FLAG_EVPN_ROUTE)) { + if (CHECK_FLAG(flags, ZEBRA_FLAG_EVPN_ROUTE)) { memset(&vtep_ip, 0, sizeof(struct ipaddr)); vtep_ip.ipa_type = IPADDR_V4; memcpy(&(vtep_ip.ipaddr_v4), &(api_nh->gate.ipv4), sizeof(struct in_addr)); zebra_vxlan_evpn_vrf_route_add( - api_nh->vrf_id, &api_nh->rmac, - &vtep_ip, &api->prefix); + api_nh->vrf_id, &api_nh->rmac, &vtep_ip, p); } break; case NEXTHOP_TYPE_IPV6: @@ -1503,14 +1535,13 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, /* Special handling for IPv6 routes sourced from EVPN: * the nexthop and associated MAC need to be installed. */ - if (CHECK_FLAG(api->flags, ZEBRA_FLAG_EVPN_ROUTE)) { + if (CHECK_FLAG(flags, ZEBRA_FLAG_EVPN_ROUTE)) { memset(&vtep_ip, 0, sizeof(struct ipaddr)); vtep_ip.ipa_type = IPADDR_V6; memcpy(&vtep_ip.ipaddr_v6, &(api_nh->gate.ipv6), sizeof(struct in6_addr)); zebra_vxlan_evpn_vrf_route_add( - api_nh->vrf_id, &api_nh->rmac, - &vtep_ip, &api->prefix); + api_nh->vrf_id, &api_nh->rmac, &vtep_ip, p); } break; case NEXTHOP_TYPE_BLACKHOLE: @@ -1558,7 +1589,7 @@ static struct nexthop *nexthop_from_zapi(struct route_entry *re, for (i = 0; i < api_nh->backup_num; i++) { /* Validate backup index */ - if (api_nh->backup_idx[i] < api->backup_nexthop_num) { + if (api_nh->backup_idx[i] < backup_nexthop_num) { nexthop->backup_idx[i] = api_nh->backup_idx[i]; } else { if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) @@ -1576,99 +1607,69 @@ done: return nexthop; } -static void zread_route_add(ZAPI_HANDLER_ARGS) +static bool zapi_read_nexthops(struct zserv *client, struct prefix *p, + struct zapi_nexthop *nhops, uint32_t flags, + uint32_t message, uint16_t nexthop_num, + uint16_t backup_nh_num, + struct nexthop_group **png, + struct nhg_backup_info **pbnhg) { - struct stream *s; - struct zapi_route api; - struct zapi_nexthop *api_nh; - afi_t afi; - struct prefix_ipv6 *src_p = NULL; - struct route_entry *re; - struct nexthop *nexthop = NULL, *last_nh; struct nexthop_group *ng = NULL; struct nhg_backup_info *bnhg = NULL; - int i, ret; - vrf_id_t vrf_id; - struct nhg_hash_entry nhe; - enum lsp_types_t label_type; - char nhbuf[NEXTHOP_STRLEN]; - char labelbuf[MPLS_LABEL_STRLEN]; + uint16_t i; + struct nexthop *last_nh = NULL; - s = msg; - if (zapi_route_decode(s, &api) < 0) { + assert(!(png && pbnhg)); + + if (png) + ng = nexthop_group_new(); + + if (pbnhg && backup_nh_num > 0) { if (IS_ZEBRA_DEBUG_RECV) - zlog_debug("%s: Unable to decode zapi_route sent", - __func__); - return; + zlog_debug("%s: adding %d backup nexthops", __func__, + backup_nh_num); + + bnhg = zebra_nhg_backup_alloc(); } - vrf_id = zvrf_id(zvrf); - - if (IS_ZEBRA_DEBUG_RECV) { - char buf_prefix[PREFIX_STRLEN]; - - prefix2str(&api.prefix, buf_prefix, sizeof(buf_prefix)); - zlog_debug("%s: p=(%u:%u)%s, msg flags=0x%x, flags=0x%x", - __func__, vrf_id, api.tableid, buf_prefix, (int)api.message, api.flags); - } - - /* Allocate new route. */ - re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); - re->type = api.type; - re->instance = api.instance; - re->flags = api.flags; - re->uptime = monotime(NULL); - re->vrf_id = vrf_id; - - if (api.tableid) - re->table = api.tableid; - else - re->table = zvrf->table_id; - - if (!CHECK_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP) - || api.nexthop_num == 0) { - flog_warn(EC_ZEBRA_RX_ROUTE_NO_NEXTHOPS, - "%s: received a route without nexthops for prefix %pFX from client %s", - __func__, &api.prefix, - zebra_route_string(client->proto)); - - XFREE(MTYPE_RE, re); - return; - } - - /* Report misuse of the backup flag */ - if (CHECK_FLAG(api.message, ZAPI_MESSAGE_BACKUP_NEXTHOPS) && - api.backup_nexthop_num == 0) { - if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) - zlog_debug("%s: client %s: BACKUP flag set but no backup nexthops, prefix %pFX", - __func__, - zebra_route_string(client->proto), &api.prefix); - } - - /* Use temporary list of nexthops */ - ng = nexthop_group_new(); - /* * TBD should _all_ of the nexthop add operations use * api_nh->vrf_id instead of re->vrf_id ? I only changed * for cases NEXTHOP_TYPE_IPV4 and NEXTHOP_TYPE_IPV6. */ - for (i = 0; i < api.nexthop_num; i++) { - api_nh = &api.nexthops[i]; + for (i = 0; i < nexthop_num; i++) { + struct nexthop *nexthop; + enum lsp_types_t label_type; + char nhbuf[NEXTHOP_STRLEN]; + char labelbuf[MPLS_LABEL_STRLEN]; + struct zapi_nexthop *api_nh = &nhops[i]; /* Convert zapi nexthop */ - nexthop = nexthop_from_zapi(re, api_nh, &api); + nexthop = nexthop_from_zapi(api_nh, flags, p, backup_nh_num); if (!nexthop) { flog_warn( EC_ZEBRA_NEXTHOP_CREATION_FAILED, - "%s: Nexthops Specified: %d but we failed to properly create one", - __func__, api.nexthop_num); - nexthop_group_delete(&ng); - XFREE(MTYPE_RE, re); - return; + "%s: Nexthops Specified: %u(%u) but we failed to properly create one", + __func__, nexthop_num, i); + if (ng) + nexthop_group_delete(&ng); + if (bnhg) + zebra_nhg_backup_free(&bnhg); + return false; } - if (CHECK_FLAG(api.message, ZAPI_MESSAGE_SRTE)) { + if (bnhg + && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + if (IS_ZEBRA_DEBUG_RECV) { + nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + zlog_debug("%s: backup nh %s with BACKUP flag!", + __func__, nhbuf); + } + UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); + nexthop->backup_num = 0; + } + + if (CHECK_FLAG(message, ZAPI_MESSAGE_SRTE)) { SET_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE); nexthop->srte_color = api_nh->srte_color; } @@ -1703,100 +1704,266 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) __func__, nhbuf, api_nh->vrf_id, labelbuf); } - /* Add new nexthop to temporary list. This list is - * canonicalized - sorted - so that it can be hashed later - * in route processing. We expect that the sender has sent - * the list sorted, and the zapi client api attempts to enforce - * that, so this should be inexpensive - but it is necessary - * to support shared nexthop-groups. - */ - nexthop_group_add_sorted(ng, nexthop); + if (ng) { + /* Add new nexthop to temporary list. This list is + * canonicalized - sorted - so that it can be hashed + * later in route processing. We expect that the sender + * has sent the list sorted, and the zapi client api + * attempts to enforce that, so this should be + * inexpensive - but it is necessary to support shared + * nexthop-groups. + */ + nexthop_group_add_sorted(ng, nexthop); + } + if (bnhg) { + /* Note that the order of the backup nexthops is + * significant, so we don't sort this list as we do the + * primary nexthops, we just append. + */ + if (last_nh) + NEXTHOP_APPEND(last_nh, nexthop); + else + bnhg->nhe->nhg.nexthop = nexthop; + + last_nh = nexthop; + } } - /* Allocate temporary list of backup nexthops, if necessary */ - if (api.backup_nexthop_num > 0) { + + /* succesfully read, set caller pointers now */ + if (png) + *png = ng; + + if (pbnhg) + *pbnhg = bnhg; + + return true; +} + +int zapi_nhg_decode(struct stream *s, int cmd, struct zapi_nhg *api_nhg) +{ + uint16_t i; + struct zapi_nexthop *znh; + + STREAM_GETW(s, api_nhg->proto); + STREAM_GETL(s, api_nhg->id); + + if (cmd == ZEBRA_NHG_DEL) + goto done; + + /* Nexthops */ + STREAM_GETW(s, api_nhg->nexthop_num); + + if (zserv_nexthop_num_warn(__func__, NULL, api_nhg->nexthop_num)) + return -1; + + if (api_nhg->nexthop_num <= 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: No nexthops sent", __func__); + return -1; + } + + for (i = 0; i < api_nhg->nexthop_num; i++) { + znh = &((api_nhg->nexthops)[i]); + + if (zapi_nexthop_decode(s, znh, 0, 0) != 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop creation failed", __func__); + return -1; + } + } + + /* Backup Nexthops */ + STREAM_GETW(s, api_nhg->backup_nexthop_num); + + if (zserv_nexthop_num_warn(__func__, NULL, api_nhg->backup_nexthop_num)) + return -1; + + for (i = 0; i < api_nhg->backup_nexthop_num; i++) { + znh = &((api_nhg->backup_nexthops)[i]); + + if (zapi_nexthop_decode(s, znh, 0, 0) != 0) { + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Backup Nexthop creation failed", + __func__); + return -1; + } + } + +done: + return 0; + +stream_failure: + flog_warn( + EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop Group decode failed with some sort of stream read failure", + __func__); + return -1; +} + +static void zread_nhg_del(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_nhg api_nhg = {}; + struct nhg_hash_entry *nhe; + + s = msg; + if (zapi_nhg_decode(s, hdr->command, &api_nhg) < 0) { if (IS_ZEBRA_DEBUG_RECV) - zlog_debug("%s: adding %d backup nexthops", - __func__, api.backup_nexthop_num); - - bnhg = zebra_nhg_backup_alloc(); - nexthop = NULL; - last_nh = NULL; + zlog_debug("%s: Unable to decode zapi_nhg sent", + __func__); + return; } - /* Copy backup nexthops also, if present */ - for (i = 0; i < api.backup_nexthop_num; i++) { - api_nh = &api.backup_nexthops[i]; + /* + * Delete the received nhg id + */ - /* Convert zapi backup nexthop */ - nexthop = nexthop_from_zapi(re, api_nh, &api); - if (!nexthop) { - flog_warn( - EC_ZEBRA_NEXTHOP_CREATION_FAILED, - "%s: Backup Nexthops Specified: %d but we failed to properly create one", - __func__, api.backup_nexthop_num); - nexthop_group_delete(&ng); - zebra_nhg_backup_free(&bnhg); - XFREE(MTYPE_RE, re); - return; - } + nhe = zebra_nhg_proto_del(api_nhg.id, api_nhg.proto); - /* Backup nexthops can't have backups; that's not valid. */ - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { - if (IS_ZEBRA_DEBUG_RECV) { - nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); - zlog_debug("%s: backup nh %s with BACKUP flag!", - __func__, nhbuf); - } - UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP); - nexthop->backup_num = 0; - } + if (nhe) { + zebra_nhg_decrement_ref(nhe); + nhg_notify(api_nhg.proto, client->instance, api_nhg.id, + ZAPI_NHG_REMOVED); + } else + nhg_notify(api_nhg.proto, client->instance, api_nhg.id, + ZAPI_NHG_REMOVE_FAIL); +} - if (CHECK_FLAG(api.message, ZAPI_MESSAGE_SRTE)) { - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_SRTE); - nexthop->srte_color = api_nh->srte_color; - } +static void zread_nhg_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_nhg api_nhg = {}; + struct nexthop_group *nhg = NULL; + struct nhg_backup_info *bnhg = NULL; + struct nhg_hash_entry *nhe; - /* MPLS labels for BGP-LU or Segment Routing */ - if (CHECK_FLAG(api_nh->flags, ZAPI_NEXTHOP_FLAG_LABEL) - && api_nh->type != NEXTHOP_TYPE_IFINDEX - && api_nh->type != NEXTHOP_TYPE_BLACKHOLE - && api_nh->label_num > 0) { + s = msg; + if (zapi_nhg_decode(s, hdr->command, &api_nhg) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_nhg sent", + __func__); + return; + } - label_type = lsp_type_from_re_type(client->proto); - nexthop_add_labels(nexthop, label_type, - api_nh->label_num, - &api_nh->labels[0]); - } + if ((!zapi_read_nexthops(client, NULL, api_nhg.nexthops, 0, 0, + api_nhg.nexthop_num, + api_nhg.backup_nexthop_num, &nhg, NULL)) + || (!zapi_read_nexthops(client, NULL, api_nhg.backup_nexthops, 0, 0, + api_nhg.backup_nexthop_num, + api_nhg.backup_nexthop_num, NULL, &bnhg))) { - if (IS_ZEBRA_DEBUG_RECV) { - labelbuf[0] = '\0'; - nhbuf[0] = '\0'; + flog_warn(EC_ZEBRA_NEXTHOP_CREATION_FAILED, + "%s: Nexthop Group Creation failed", __func__); + return; + } - nexthop2str(nexthop, nhbuf, sizeof(nhbuf)); + /* + * Create the nhg + */ + nhe = zebra_nhg_proto_add(api_nhg.id, api_nhg.proto, nhg, 0); - if (nexthop->nh_label && - nexthop->nh_label->num_labels > 0) { - mpls_label2str(nexthop->nh_label->num_labels, - nexthop->nh_label->label, - labelbuf, sizeof(labelbuf), - false); - } + nexthop_group_delete(&nhg); + zebra_nhg_backup_free(&bnhg); - zlog_debug("%s: backup nh=%s, vrf_id=%d %s", - __func__, nhbuf, api_nh->vrf_id, labelbuf); - } + /* + * TODO: + * Assume fully resolved for now and install. + * + * Resolution is going to need some more work. + */ + if (nhe) + nhg_notify(api_nhg.proto, client->instance, api_nhg.id, + ZAPI_NHG_INSTALLED); + else + nhg_notify(api_nhg.proto, client->instance, api_nhg.id, + ZAPI_NHG_FAIL_INSTALL); +} - /* Note that the order of the backup nexthops is significant, - * so we don't sort this list as we do the primary nexthops, - * we just append. - */ - if (last_nh) - NEXTHOP_APPEND(last_nh, nexthop); - else - bnhg->nhe->nhg.nexthop = nexthop; +static void zread_route_add(ZAPI_HANDLER_ARGS) +{ + struct stream *s; + struct zapi_route api; + afi_t afi; + struct prefix_ipv6 *src_p = NULL; + struct route_entry *re; + struct nexthop_group *ng = NULL; + struct nhg_backup_info *bnhg = NULL; + int ret; + vrf_id_t vrf_id; + struct nhg_hash_entry nhe; - last_nh = nexthop; + s = msg; + if (zapi_route_decode(s, &api) < 0) { + if (IS_ZEBRA_DEBUG_RECV) + zlog_debug("%s: Unable to decode zapi_route sent", + __func__); + return; + } + + vrf_id = zvrf_id(zvrf); + + if (IS_ZEBRA_DEBUG_RECV) { + char buf_prefix[PREFIX_STRLEN]; + + prefix2str(&api.prefix, buf_prefix, sizeof(buf_prefix)); + zlog_debug("%s: p=(%u:%u)%s, msg flags=0x%x, flags=0x%x", + __func__, vrf_id, api.tableid, buf_prefix, + (int)api.message, api.flags); + } + + /* Allocate new route. */ + re = XCALLOC(MTYPE_RE, sizeof(struct route_entry)); + re->type = api.type; + re->instance = api.instance; + re->flags = api.flags; + re->uptime = monotime(NULL); + re->vrf_id = vrf_id; + + if (api.tableid) + re->table = api.tableid; + else + re->table = zvrf->table_id; + + if (!CHECK_FLAG(api.message, ZAPI_MESSAGE_NHG) + && (!CHECK_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP) + || api.nexthop_num == 0)) { + flog_warn( + EC_ZEBRA_RX_ROUTE_NO_NEXTHOPS, + "%s: received a route without nexthops for prefix %pFX from client %s", + __func__, &api.prefix, + zebra_route_string(client->proto)); + + XFREE(MTYPE_RE, re); + return; + } + + /* Report misuse of the backup flag */ + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_BACKUP_NEXTHOPS) + && api.backup_nexthop_num == 0) { + if (IS_ZEBRA_DEBUG_RECV || IS_ZEBRA_DEBUG_EVENT) + zlog_debug( + "%s: client %s: BACKUP flag set but no backup nexthops, prefix %pFX", + __func__, zebra_route_string(client->proto), + &api.prefix); + } + + if (CHECK_FLAG(api.message, ZAPI_MESSAGE_NHG)) + re->nhe_id = api.nhgid; + + if (!re->nhe_id + && (!zapi_read_nexthops(client, &api.prefix, api.nexthops, + api.flags, api.message, api.nexthop_num, + api.backup_nexthop_num, &ng, NULL) + || !zapi_read_nexthops(client, &api.prefix, api.backup_nexthops, + api.flags, api.message, + api.backup_nexthop_num, + api.backup_nexthop_num, NULL, &bnhg))) { + + nexthop_group_delete(&ng); + zebra_nhg_backup_free(&bnhg); + XFREE(MTYPE_RE, re); + return; } if (CHECK_FLAG(api.message, ZAPI_MESSAGE_DISTANCE)) @@ -1831,13 +1998,21 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) return; } - /* Include backup info with the route. We use a temporary nhe here; + /* + * If we have an ID, this proto owns the NHG it sent along with the + * route, so we just send the ID into rib code with it. + * + * Havent figured out how to handle backup NHs with this yet, so lets + * keep that separate. + * Include backup info with the route. We use a temporary nhe here; * if this is a new/unknown nhe, a new copy will be allocated * and stored. */ - zebra_nhe_init(&nhe, afi, ng->nexthop); - nhe.nhg.nexthop = ng->nexthop; - nhe.backup_info = bnhg; + if (!re->nhe_id) { + zebra_nhe_init(&nhe, afi, ng->nexthop); + nhe.nhg.nexthop = ng->nexthop; + nhe.backup_info = bnhg; + } ret = rib_add_multipath_nhe(afi, api.safi, &api.prefix, src_p, re, &nhe); @@ -3118,7 +3293,10 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = { [ZEBRA_MLAG_CLIENT_UNREGISTER] = zebra_mlag_client_unregister, [ZEBRA_MLAG_FORWARD_MSG] = zebra_mlag_forward_client_msg, [ZEBRA_CLIENT_CAPABILITIES] = zread_client_capabilities, - [ZEBRA_NEIGH_DISCOVER] = zread_neigh_discover}; + [ZEBRA_NEIGH_DISCOVER] = zread_neigh_discover, + [ZEBRA_NHG_ADD] = zread_nhg_add, + [ZEBRA_NHG_DEL] = zread_nhg_del, +}; /* * Process a batch of zapi messages. diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h index e7c755c2bf..9f23a313bf 100644 --- a/zebra/zapi_msg.h +++ b/zebra/zapi_msg.h @@ -90,7 +90,7 @@ zsend_ipset_entry_notify_owner(struct zebra_pbr_ipset_entry *ipset, enum zapi_ipset_entry_notify_owner note); extern void zsend_iptable_notify_owner(struct zebra_pbr_iptable *iptable, enum zapi_iptable_notify_owner note); -extern void zserv_nexthop_num_warn(const char *caller, const struct prefix *p, +extern bool zserv_nexthop_num_warn(const char *caller, const struct prefix *p, const unsigned int nexthop_num); extern void zsend_capabilities_all_clients(void); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index abd0adb64e..76d7d00e4a 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -73,6 +73,7 @@ const uint32_t DPLANE_DEFAULT_NEW_WORK = 100; */ struct dplane_nexthop_info { uint32_t id; + uint32_t old_id; afi_t afi; vrf_id_t vrf_id; int type; @@ -1242,6 +1243,12 @@ uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx) return ctx->u.rinfo.nhe.id; } +uint32_t dplane_ctx_get_old_nhe_id(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + return ctx->u.rinfo.nhe.old_id; +} + afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx) { DPLANE_CTX_VALID(ctx); @@ -1912,6 +1919,7 @@ int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, struct nhg_hash_entry *nhe = zebra_nhg_resolve(re->nhe); ctx->u.rinfo.nhe.id = nhe->id; + ctx->u.rinfo.nhe.old_id = 0; /* * Check if the nhe is installed/queued before doing anything * with this route. @@ -2328,6 +2336,7 @@ dplane_route_update_internal(struct route_node *rn, ctx->u.rinfo.zd_old_instance = old_re->instance; ctx->u.rinfo.zd_old_distance = old_re->distance; ctx->u.rinfo.zd_old_metric = old_re->metric; + ctx->u.rinfo.nhe.old_id = old_re->nhe->id; #ifndef HAVE_NETLINK /* For bsd, capture previous re's nexthops too, sigh. @@ -2349,6 +2358,40 @@ dplane_route_update_internal(struct route_node *rn, #endif /* !HAVE_NETLINK */ } + /* + * If the old and new context type, and nexthop group id + * are the same there is no need to send down a route replace + * as that we know we have sent a nexthop group replace + * or an upper level protocol has sent us the exact + * same route again. + */ + if ((dplane_ctx_get_type(ctx) == dplane_ctx_get_old_type(ctx)) + && (dplane_ctx_get_nhe_id(ctx) + == dplane_ctx_get_old_nhe_id(ctx)) + && (dplane_ctx_get_nhe_id(ctx) >= ZEBRA_NHG_PROTO_LOWER)) { + struct nexthop *nexthop; + + if (IS_ZEBRA_DEBUG_DPLANE) + zlog_debug( + "%s: Ignoring Route exactly the same", + __func__); + + for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), + nexthop)) { + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_RECURSIVE)) + continue; + + if (CHECK_FLAG(nexthop->flags, + NEXTHOP_FLAG_ACTIVE)) + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_FIB); + } + + dplane_ctx_free(&ctx); + return ZEBRA_DPLANE_REQUEST_SUCCESS; + } + /* Enqueue context for processing */ ret = dplane_update_enqueue(ctx); } diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index 1d852b1bac..fd70211f7c 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -316,6 +316,7 @@ dplane_ctx_get_old_backup_ng(const struct zebra_dplane_ctx *ctx); /* Accessors for nexthop information */ uint32_t dplane_ctx_get_nhe_id(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_get_old_nhe_id(const struct zebra_dplane_ctx *ctx); afi_t dplane_ctx_get_nhe_afi(const struct zebra_dplane_ctx *ctx); vrf_id_t dplane_ctx_get_nhe_vrf_id(const struct zebra_dplane_ctx *ctx); int dplane_ctx_get_nhe_type(const struct zebra_dplane_ctx *ctx); diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index aabbd875ec..6aa9ba0ebc 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -53,14 +53,15 @@ uint32_t id_counter; /* */ static bool g_nexthops_enabled = true; +static bool proto_nexthops_only; -static struct nhg_hash_entry *depends_find(const struct nexthop *nh, - afi_t afi); +static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi, + int type); static void depends_add(struct nhg_connected_tree_head *head, struct nhg_hash_entry *depend); static struct nhg_hash_entry * depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, - afi_t afi); + afi_t afi, int type); static struct nhg_hash_entry * depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id); static void depends_decrement_free(struct nhg_connected_tree_head *head); @@ -68,6 +69,35 @@ static void depends_decrement_free(struct nhg_connected_tree_head *head); static struct nhg_backup_info * nhg_backup_copy(const struct nhg_backup_info *orig); +/* Helper function for getting the next allocatable ID */ +static uint32_t nhg_get_next_id(void) +{ + while (1) { + id_counter++; + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID %u checking", __func__, id_counter); + + if (id_counter == ZEBRA_NHG_PROTO_LOWER) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID counter wrapped", __func__); + + id_counter = 0; + continue; + } + + if (zebra_nhg_lookup_id(id_counter)) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: ID already exists", __func__); + + continue; + } + + break; + } + + return id_counter; +} static void nhg_connected_free(struct nhg_connected *dep) { @@ -431,7 +461,6 @@ static void *zebra_nhg_hash_alloc(void *arg) nhe->nhg.nexthop->vrf_id, nhe->id); } - zebra_nhg_insert_id(nhe); return nhe; } @@ -439,17 +468,17 @@ static void *zebra_nhg_hash_alloc(void *arg) uint32_t zebra_nhg_hash_key(const void *arg) { const struct nhg_hash_entry *nhe = arg; - uint32_t val, key = 0x5a351234; + uint32_t key = 0x5a351234; + uint32_t primary = 0; + uint32_t backup = 0; - val = nexthop_group_hash(&(nhe->nhg)); - if (nhe->backup_info) { - val = jhash_2words(val, - nexthop_group_hash( - &(nhe->backup_info->nhe->nhg)), - key); - } + primary = nexthop_group_hash(&(nhe->nhg)); + if (nhe->backup_info) + backup = nexthop_group_hash(&(nhe->backup_info->nhe->nhg)); - key = jhash_3words(nhe->vrf_id, nhe->afi, val, key); + key = jhash_3words(primary, backup, nhe->type, key); + + key = jhash_2words(nhe->vrf_id, nhe->afi, key); return key; } @@ -512,6 +541,9 @@ bool zebra_nhg_hash_equal(const void *arg1, const void *arg2) if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id)) return true; + if (nhe1->type != nhe2->type) + return false; + if (nhe1->vrf_id != nhe2->vrf_id) return false; @@ -611,7 +643,7 @@ static int zebra_nhg_process_grp(struct nexthop_group *nhg, } static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends, - struct nexthop *nh, afi_t afi) + struct nexthop *nh, afi_t afi, int type) { struct nhg_hash_entry *depend = NULL; struct nexthop_group resolved_ng = {}; @@ -622,7 +654,7 @@ static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends, zlog_debug("%s: head %p, nh %pNHv", __func__, nhg_depends, nh); - depend = zebra_nhg_rib_find(0, &resolved_ng, afi); + depend = zebra_nhg_rib_find(0, &resolved_ng, afi, type); if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: nh %pNHv => %p (%u)", @@ -671,8 +703,26 @@ static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ * assign the next global id value if necessary. */ if (lookup->id == 0) - lookup->id = ++id_counter; - newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc); + lookup->id = nhg_get_next_id(); + + if (lookup->id < ZEBRA_NHG_PROTO_LOWER) { + /* + * This is a zebra hashed/owned NHG. + * + * It goes in HASH and ID table. + */ + newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc); + zebra_nhg_insert_id(newnhe); + } else { + /* + * This is upperproto owned NHG and should not be hashed to. + * + * It goes in ID table. + */ + newnhe = + hash_get(zrouter.nhgs_id, lookup, zebra_nhg_hash_alloc); + } + created = true; /* Mail back the new object */ @@ -709,14 +759,16 @@ static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE)) SET_FLAG(newnhe->flags, NEXTHOP_GROUP_VALID); - if (nh->next == NULL) { + if (nh->next == NULL && newnhe->id < ZEBRA_NHG_PROTO_LOWER) { if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) { /* Single recursive nexthop */ handle_recursive_depend(&newnhe->nhg_depends, - nh->resolved, afi); + nh->resolved, afi, + newnhe->type); recursive = true; } } else { + /* Proto-owned are groups by default */ /* List of nexthops */ for (nh = newnhe->nhg.nexthop; nh; nh = nh->next) { if (IS_ZEBRA_DEBUG_NHG_DETAIL) @@ -726,7 +778,8 @@ static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ NEXTHOP_FLAG_RECURSIVE) ? "(R)" : ""); - depends_find_add(&newnhe->nhg_depends, nh, afi); + depends_find_add(&newnhe->nhg_depends, nh, afi, + newnhe->type); } } @@ -753,8 +806,8 @@ static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ __func__, nh); /* Single recursive nexthop */ - handle_recursive_depend(&backup_nhe->nhg_depends, - nh->resolved, afi); + handle_recursive_depend(&backup_nhe->nhg_depends, nh->resolved, + afi, backup_nhe->type); recursive = true; } else { /* One or more backup NHs */ @@ -766,8 +819,8 @@ static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */ NEXTHOP_FLAG_RECURSIVE) ? "(R)" : ""); - depends_find_add(&backup_nhe->nhg_depends, - nh, afi); + depends_find_add(&backup_nhe->nhg_depends, nh, afi, + backup_nhe->type); } } @@ -960,30 +1013,6 @@ static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, return ctx; } -static bool zebra_nhg_contains_unhashable(struct nhg_hash_entry *nhe) -{ - struct nhg_connected *rb_node_dep = NULL; - - frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) { - if (CHECK_FLAG(rb_node_dep->nhe->flags, - NEXTHOP_GROUP_UNHASHABLE)) - return true; - } - - return false; -} - -static void zebra_nhg_set_unhashable(struct nhg_hash_entry *nhe) -{ - SET_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE); - SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); - - flog(LOG_INFO, - EC_ZEBRA_DUPLICATE_NHG_MESSAGE, - "Nexthop Group with ID (%d) is a duplicate, therefore unhashable, ignoring", - nhe->id); -} - static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe) { struct nhg_connected *rb_node_dep; @@ -1025,23 +1054,27 @@ done: zebra_nhg_set_invalid(nhe); } +static void zebra_nhg_release_all_deps(struct nhg_hash_entry *nhe) +{ + /* Remove it from any lists it may be on */ + zebra_nhg_depends_release(nhe); + zebra_nhg_dependents_release(nhe); + if (nhe->ifp) + if_nhg_dependents_del(nhe->ifp, nhe); +} static void zebra_nhg_release(struct nhg_hash_entry *nhe) { if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: nhe %p (%u)", __func__, nhe, nhe->id); - /* Remove it from any lists it may be on */ - zebra_nhg_depends_release(nhe); - zebra_nhg_dependents_release(nhe); - if (nhe->ifp) - if_nhg_dependents_del(nhe->ifp, nhe); + zebra_nhg_release_all_deps(nhe); /* - * If its unhashable, we didn't store it here and have to be + * If its not zebra owned, we didn't store it here and have to be * sure we don't clear one thats actually being used. */ - if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE)) + if (nhe->id < ZEBRA_NHG_PROTO_LOWER) hash_release(zrouter.nhgs, nhe); hash_release(zrouter.nhgs_id, nhe); @@ -1117,8 +1150,8 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx) return -ENOENT; } - if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, type, - afi)) + if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, afi, + type)) depends_decrement_free(&nhg_depends); /* These got copied over in zebra_nhg_alloc() */ @@ -1127,54 +1160,7 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx) nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, type); - if (nhe) { - if (id != nhe->id) { - struct nhg_hash_entry *kernel_nhe = NULL; - - /* Duplicate but with different ID from - * the kernel - */ - - /* The kernel allows duplicate nexthops - * as long as they have different IDs. - * We are ignoring those to prevent - * syncing problems with the kernel - * changes. - * - * We maintain them *ONLY* in the ID hash table to - * track them and set the flag to indicated - * their attributes are unhashable. - */ - - kernel_nhe = zebra_nhe_copy(nhe, id); - - if (IS_ZEBRA_DEBUG_NHG_DETAIL) - zlog_debug("%s: copying kernel nhe (%u), dup of %u", - __func__, id, nhe->id); - - zebra_nhg_insert_id(kernel_nhe); - zebra_nhg_set_unhashable(kernel_nhe); - } else if (zebra_nhg_contains_unhashable(nhe)) { - /* The group we got contains an unhashable/duplicated - * depend, so lets mark this group as unhashable as well - * and release it from the non-ID hash. - */ - if (IS_ZEBRA_DEBUG_NHG_DETAIL) - zlog_debug("%s: nhe %p (%u) unhashable", - __func__, nhe, nhe->id); - - hash_release(zrouter.nhgs, nhe); - zebra_nhg_set_unhashable(nhe); - } else { - /* It actually created a new nhe */ - if (IS_ZEBRA_DEBUG_NHG_DETAIL) - zlog_debug("%s: nhe %p (%u) is new", - __func__, nhe, nhe->id); - - SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); - SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); - } - } else { + if (!nhe) { flog_err( EC_ZEBRA_TABLE_LOOKUP_FAILED, "Zebra failed to find or create a nexthop hash entry for ID (%u)", @@ -1182,6 +1168,12 @@ static int nhg_ctx_process_new(struct nhg_ctx *ctx) return -1; } + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: nhe %p (%u) is new", __func__, nhe, nhe->id); + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID); + SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED); + return 0; } @@ -1283,7 +1275,7 @@ int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp, zlog_debug("%s: nh %pNHv, id %u, count %d", __func__, nh, id, (int)count); - if (id > id_counter) + if (id > id_counter && id < ZEBRA_NHG_PROTO_LOWER) /* Increase our counter so we don't try to create * an ID that already exists */ @@ -1326,14 +1318,14 @@ int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id) /* Some dependency helper functions */ static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh, - afi_t afi) + afi_t afi, int type) { struct nhg_hash_entry *nhe; struct nexthop *lookup = NULL; lookup = nexthop_dup(nh, NULL); - nhe = zebra_nhg_find_nexthop(0, lookup, afi, 0); + nhe = zebra_nhg_find_nexthop(0, lookup, afi, type); nexthops_free(lookup); @@ -1341,7 +1333,7 @@ static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh, } static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh, - afi_t afi) + afi_t afi, int type) { struct nhg_hash_entry *nhe; struct nexthop lookup = {}; @@ -1351,7 +1343,7 @@ static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh, */ nexthop_copy_no_recurse(&lookup, nh, NULL); - nhe = zebra_nhg_find_nexthop(0, &lookup, afi, 0); + nhe = zebra_nhg_find_nexthop(0, &lookup, afi, type); /* The copy may have allocated labels; free them if necessary. */ nexthop_del_labels(&lookup); @@ -1363,7 +1355,8 @@ static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh, return nhe; } -static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi) +static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi, + int type) { struct nhg_hash_entry *nhe = NULL; @@ -1374,9 +1367,9 @@ static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi) * in the non-recursive case (by not alloc/freeing) */ if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) - nhe = depends_find_recursive(nh, afi); + nhe = depends_find_recursive(nh, afi, type); else - nhe = depends_find_singleton(nh, afi); + nhe = depends_find_singleton(nh, afi, type); if (IS_ZEBRA_DEBUG_NHG_DETAIL) { @@ -1409,11 +1402,11 @@ static void depends_add(struct nhg_connected_tree_head *head, static struct nhg_hash_entry * depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh, - afi_t afi) + afi_t afi, int type) { struct nhg_hash_entry *depend = NULL; - depend = depends_find(nh, afi); + depend = depends_find(nh, afi, type); if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: nh %pNHv => %p", @@ -1445,8 +1438,9 @@ static void depends_decrement_free(struct nhg_connected_tree_head *head) } /* Find an nhe based on a list of nexthops */ -struct nhg_hash_entry * -zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi) +struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id, + struct nexthop_group *nhg, + afi_t rt_afi, int type) { struct nhg_hash_entry *nhe = NULL; vrf_id_t vrf_id; @@ -1458,7 +1452,7 @@ zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi) assert(nhg->nexthop); vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nhg->nexthop->vrf_id; - zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, 0); + zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, type); if (IS_ZEBRA_DEBUG_NHG_DETAIL) zlog_debug("%s: => nhe %p (%u)", @@ -2326,6 +2320,22 @@ static uint32_t nexthop_list_active_update(struct route_node *rn, return counter; } + +static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg) +{ + struct nexthop *nh; + uint32_t curr_active = 0; + + /* Assume all active for now */ + + for (nh = nhg->nexthop; nh; nh = nh->next) { + SET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE); + curr_active++; + } + + return curr_active; +} + /* * Iterate over all nexthops of the given RIB entry and refresh their * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag, @@ -2338,6 +2348,9 @@ int nexthop_active_update(struct route_node *rn, struct route_entry *re) struct nhg_hash_entry *curr_nhe; uint32_t curr_active = 0, backup_active = 0; + if (re->nhe->id >= ZEBRA_NHG_PROTO_LOWER) + return proto_nhg_nexthop_active_update(&re->nhe->nhg); + afi_t rt_afi = family2afi(rn->p.family); UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED); @@ -2543,7 +2556,8 @@ void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe) && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) { /* Change its type to us since we are installing it */ - nhe->type = ZEBRA_ROUTE_NHG; + if (!ZEBRA_NHG_CREATED(nhe)) + nhe->type = ZEBRA_ROUTE_NHG; int ret = dplane_nexthop_add(nhe); @@ -2700,3 +2714,251 @@ bool zebra_nhg_kernel_nexthops_enabled(void) { return g_nexthops_enabled; } + +/* + * Global control to only use kernel nexthops for protocol created NHGs. + * There are some use cases where you may not want zebra to implicitly + * create kernel nexthops for all routes and only create them for NHGs + * passed down by upper level protos. + * + * Default is off. + */ +void zebra_nhg_set_proto_nexthops_only(bool set) +{ + proto_nexthops_only = set; +} + +bool zebra_nhg_proto_nexthops_only(void) +{ + return proto_nexthops_only; +} + +/* Add NHE from upper level proto */ +struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, + struct nexthop_group *nhg, afi_t afi) +{ + struct nhg_hash_entry lookup; + struct nhg_hash_entry *new, *old; + struct nhg_connected *rb_node_dep = NULL; + struct nexthop *newhop; + bool replace = false; + + if (!nhg->nexthop) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, no nexthops passed to add", + __func__, id); + return NULL; + } + + + /* Set nexthop list as active, since they wont go through rib + * processing. + * + * Assuming valid/onlink for now. + * + * Once resolution is figured out, we won't need this! + */ + for (ALL_NEXTHOPS_PTR(nhg, newhop)) { + if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, backup nexthops not supported", + __func__, id); + return NULL; + } + + if (newhop->type == NEXTHOP_TYPE_BLACKHOLE) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, blackhole nexthop not supported", + __func__, id); + return NULL; + } + + if (newhop->type == NEXTHOP_TYPE_IFINDEX) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, nexthop without gateway not supported", + __func__, id); + return NULL; + } + + if (!newhop->ifindex) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, nexthop without ifindex is not supported", + __func__, id); + return NULL; + } + SET_FLAG(newhop->flags, NEXTHOP_FLAG_ACTIVE); + } + + zebra_nhe_init(&lookup, afi, nhg->nexthop); + lookup.nhg.nexthop = nhg->nexthop; + lookup.id = id; + lookup.type = type; + + old = zebra_nhg_lookup_id(id); + + if (old) { + /* + * This is a replace, just release NHE from ID for now, The + * depends/dependents may still be used in the replacement. + */ + replace = true; + hash_release(zrouter.nhgs_id, old); + } + + new = zebra_nhg_rib_find_nhe(&lookup, afi); + + zebra_nhg_increment_ref(new); + + zebra_nhg_set_valid_if_active(new); + + zebra_nhg_install_kernel(new); + + if (old) { + /* + * Check to handle recving DEL while routes still in use then + * a replace. + * + * In this case we would have decremented the refcnt already + * but set the FLAG here. Go ahead and increment once to fix + * the misordering we have been sent. + */ + if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED)) + zebra_nhg_increment_ref(old); + + rib_handle_nhg_replace(old, new); + + /* if this != 1 at this point, we have a bug */ + assert(old->refcnt == 1); + + /* We have to decrement its singletons + * because some might not exist in NEW. + */ + if (!zebra_nhg_depends_is_empty(old)) { + frr_each (nhg_connected_tree, &old->nhg_depends, + rb_node_dep) + zebra_nhg_decrement_ref(rb_node_dep->nhe); + } + + /* Free all the things */ + zebra_nhg_release_all_deps(old); + + /* Dont call the dec API, we dont want to uninstall the ID */ + old->refcnt = 0; + zebra_nhg_free(old); + old = NULL; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: %s nhe %p (%u), vrf %d, type %s", __func__, + (replace ? "replaced" : "added"), new, new->id, + new->vrf_id, zebra_route_string(new->type)); + + return new; +} + +/* Delete NHE from upper level proto, caller must decrement ref */ +struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type) +{ + struct nhg_hash_entry *nhe; + + nhe = zebra_nhg_lookup_id(id); + + if (!nhe) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, lookup failed", __func__, id); + + return NULL; + } + + if (type != nhe->type) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, type %s mismatch, sent by %s, ignoring", + __func__, id, zebra_route_string(nhe->type), + zebra_route_string(type)); + return NULL; + } + + if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED)) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug("%s: id %u, already released", __func__, id); + + return NULL; + } + + SET_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED); + + if (nhe->refcnt > 1) { + if (IS_ZEBRA_DEBUG_NHG) + zlog_debug( + "%s: id %u, still being used by routes refcnt %u", + __func__, nhe->id, nhe->refcnt); + return nhe; + } + + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: deleted nhe %p (%u), vrf %d, type %s", __func__, + nhe, nhe->id, nhe->vrf_id, + zebra_route_string(nhe->type)); + + return nhe; +} + +struct nhg_score_proto_iter { + int type; + struct list *found; +}; + +static void zebra_nhg_score_proto_entry(struct hash_bucket *bucket, void *arg) +{ + struct nhg_hash_entry *nhe; + struct nhg_score_proto_iter *iter; + + nhe = (struct nhg_hash_entry *)bucket->data; + iter = arg; + + /* Needs to match type and outside zebra ID space */ + if (nhe->type == iter->type && nhe->id >= ZEBRA_NHG_PROTO_LOWER) { + if (IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug( + "%s: found nhe %p (%u), vrf %d, type %s after client disconnect", + __func__, nhe, nhe->id, nhe->vrf_id, + zebra_route_string(nhe->type)); + + /* Add to removal list */ + listnode_add(iter->found, nhe); + } +} + +/* Remove specific by proto NHGs */ +unsigned long zebra_nhg_score_proto(int type) +{ + struct nhg_hash_entry *nhe; + struct nhg_score_proto_iter iter = {}; + struct listnode *ln; + unsigned long count; + + iter.type = type; + iter.found = list_new(); + + /* Find matching entries to remove */ + hash_iterate(zrouter.nhgs_id, zebra_nhg_score_proto_entry, &iter); + + /* Now remove them */ + for (ALL_LIST_ELEMENTS_RO(iter.found, ln, nhe)) { + /* + * This should be the last ref if we remove client routes too, + * and thus should remove and free them. + */ + zebra_nhg_decrement_ref(nhe); + } + + count = iter.found->count; + list_delete(&iter.found); + + return count; +} diff --git a/zebra/zebra_nhg.h b/zebra/zebra_nhg.h index de5f097472..052fa65d06 100644 --- a/zebra/zebra_nhg.h +++ b/zebra/zebra_nhg.h @@ -102,20 +102,23 @@ struct nhg_hash_entry { * Is this a nexthop that is recursively resolved? */ #define NEXTHOP_GROUP_RECURSIVE (1 << 3) -/* - * This is a nexthop group we got from the kernel, it is identical to - * one we already have. (The kernel allows duplicate nexthops, we don't - * since we hash on them). We are only tracking it in our ID table, - * it is unusable by our created routes but may be used by routes we get - * from the kernel. Therefore, it is unhashable. - */ -#define NEXTHOP_GROUP_UNHASHABLE (1 << 4) /* * Backup nexthop support - identify groups that are backups for * another group. */ -#define NEXTHOP_GROUP_BACKUP (1 << 5) +#define NEXTHOP_GROUP_BACKUP (1 << 4) + +/* + * The NHG has been release by an upper level protocol via the + * `zebra_nhg_proto_del()` API. + * + * We use this flag to track this state in case the NHG is still being used + * by routes therefore holding their refcnts as well. Otherwise, the NHG will + * be removed and uninstalled. + * + */ +#define NEXTHOP_GROUP_PROTO_RELEASED (1 << 5) /* * Track FPM installation status.. @@ -124,7 +127,11 @@ struct nhg_hash_entry { }; /* Was this one we created, either this session or previously? */ -#define ZEBRA_NHG_CREATED(NHE) ((NHE->type) == ZEBRA_ROUTE_NHG) +#define ZEBRA_NHG_CREATED(NHE) \ + (((NHE->type) <= ZEBRA_ROUTE_MAX) && (NHE->type != ZEBRA_ROUTE_KERNEL)) + +/* Is this an NHE owned by zebra and not an upper level protocol? */ +#define ZEBRA_OWNED(NHE) (NHE->type == ZEBRA_ROUTE_NHG) /* * Backup nexthops: this is a group object itself, so @@ -186,6 +193,10 @@ struct nhg_ctx { void zebra_nhg_enable_kernel_nexthops(bool set); bool zebra_nhg_kernel_nexthops_enabled(void); +/* Global control for zebra to only use proto-owned nexthops */ +void zebra_nhg_set_proto_nexthops_only(bool set); +bool zebra_nhg_proto_nexthops_only(void); + /** * NHE abstracted tree functions. * Use these where possible instead of direct access. @@ -249,13 +260,50 @@ extern int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, extern int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id); /* Find an nhe based on a nexthop_group */ -extern struct nhg_hash_entry * -zebra_nhg_rib_find(uint32_t id, struct nexthop_group *nhg, afi_t rt_afi); +extern struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id, + struct nexthop_group *nhg, + afi_t rt_afi, int type); /* Find an nhe based on a route's nhe, used during route creation */ struct nhg_hash_entry * zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi); + +/** + * Functions for Add/Del/Replace via protocol NHG creation. + * + * The NHEs will not be hashed. They will only be present in the + * ID table and therefore not sharable. + * + * It is the owning protocols job to manage these. + */ + +/* + * Add NHE. If already exists, Replace. + * + * Returns allocated NHE on success, otherwise NULL. + */ +struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type, + struct nexthop_group *nhg, + afi_t afi); + +/* + * Del NHE. + * + * Returns deleted NHE on success, otherwise NULL. + * + * Caller must decrement ref with zebra_nhg_decrement_ref() when done. + */ +struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type); + +/* + * Remove specific by proto NHGs. + * + * Called after client disconnect. + * + */ +unsigned long zebra_nhg_score_proto(int type); + /* Reference counter functions */ extern void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe); extern void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe); diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index ff30de18a3..aac0e628fe 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -235,7 +235,7 @@ int route_entry_update_nhe(struct route_entry *re, goto done; } - if ((re->nhe_id != 0) && (re->nhe_id != new_nhghe->id)) { + if ((re->nhe_id != 0) && re->nhe && (re->nhe != new_nhghe)) { old = re->nhe; route_entry_attach_ref(re, new_nhghe); @@ -250,6 +250,29 @@ done: return ret; } +void rib_handle_nhg_replace(struct nhg_hash_entry *old, + struct nhg_hash_entry *new) +{ + struct zebra_router_table *zrt; + struct route_node *rn; + struct route_entry *re, *next; + + if (IS_ZEBRA_DEBUG_RIB_DETAILED || IS_ZEBRA_DEBUG_NHG_DETAIL) + zlog_debug("%s: replacing routes nhe (%u) OLD %p NEW %p", + __func__, new->id, new, old); + + /* We have to do them ALL */ + RB_FOREACH (zrt, zebra_router_table_head, &zrouter.tables) { + for (rn = route_top(zrt->table); rn; + rn = srcdest_route_next(rn)) { + RNODE_FOREACH_RE_SAFE (rn, re, next) { + if (re->nhe && re->nhe == old) + route_entry_update_nhe(re, new); + } + } + } +} + struct route_entry *rib_match(afi_t afi, safi_t safi, vrf_id_t vrf_id, union g_addr *addr, struct route_node **rn_out) { @@ -2906,14 +2929,14 @@ int rib_add_multipath_nhe(afi_t afi, safi_t safi, struct prefix *p, if (!table) return -1; - if (re_nhe->id > 0) { - nhe = zebra_nhg_lookup_id(re_nhe->id); + if (re->nhe_id > 0) { + nhe = zebra_nhg_lookup_id(re->nhe_id); if (!nhe) { flog_err( EC_ZEBRA_TABLE_LOOKUP_FAILED, "Zebra failed to find the nexthop hash entry for id=%u in a route entry", - re_nhe->id); + re->nhe_id); return -1; } diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 0088b49512..6785151705 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -1300,13 +1300,10 @@ static void show_nexthop_group_out(struct vty *vty, struct nhg_hash_entry *nhe) struct nhg_connected *rb_node_dep = NULL; struct nexthop_group *backup_nhg; - vty_out(vty, "ID: %u\n", nhe->id); + vty_out(vty, "ID: %u (%s)\n", nhe->id, zebra_route_string(nhe->type)); vty_out(vty, " RefCnt: %d\n", nhe->refcnt); vty_out(vty, " VRF: %s\n", vrf_id_to_name(nhe->vrf_id)); - if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_UNHASHABLE)) - vty_out(vty, " Duplicate - from kernel not hashable\n"); - if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)) { vty_out(vty, " Valid"); if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) @@ -1569,6 +1566,17 @@ DEFPY_HIDDEN(nexthop_group_use_enable, return CMD_SUCCESS; } +DEFPY_HIDDEN(proto_nexthop_group_only, proto_nexthop_group_only_cmd, + "[no] zebra nexthop proto only", + NO_STR ZEBRA_STR + "Nexthop configuration\n" + "Configure exclusive use of proto nexthops\n" + "Only use proto nexthops\n") +{ + zebra_nhg_set_proto_nexthops_only(!no); + return CMD_SUCCESS; +} + DEFUN (no_ip_nht_default_route, no_ip_nht_default_route_cmd, "no ip nht resolve-via-default", @@ -3451,6 +3459,9 @@ static int config_write_protocol(struct vty *vty) if (!zebra_nhg_kernel_nexthops_enabled()) vty_out(vty, "no zebra nexthop kernel enable\n"); + if (zebra_nhg_proto_nexthops_only()) + vty_out(vty, "zebra nexthop proto only\n"); + #ifdef HAVE_NETLINK /* Include netlink info */ netlink_config_write_helper(vty); @@ -3885,6 +3896,7 @@ void zebra_vty_init(void) install_element(CONFIG_NODE, &zebra_packet_process_cmd); install_element(CONFIG_NODE, &no_zebra_packet_process_cmd); install_element(CONFIG_NODE, &nexthop_group_use_enable_cmd); + install_element(CONFIG_NODE, &proto_nexthop_group_only_cmd); install_element(VIEW_NODE, &show_nexthop_group_cmd); install_element(VIEW_NODE, &show_interface_nexthop_group_cmd); diff --git a/zebra/zserv.c b/zebra/zserv.c index 4c8656af0d..44f4641fcf 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -590,6 +590,7 @@ static void zserv_client_free(struct zserv *client) /* Close file descriptor. */ if (client->sock) { unsigned long nroutes; + unsigned long nnhgs; close(client->sock); @@ -600,6 +601,13 @@ static void zserv_client_free(struct zserv *client) "client %d disconnected %lu %s routes removed from the rib", client->sock, nroutes, zebra_route_string(client->proto)); + + /* Not worrying about instance for now */ + nnhgs = zebra_nhg_score_proto(client->proto); + zlog_notice( + "client %d disconnected %lu %s nhgs removed from the rib", + client->sock, nnhgs, + zebra_route_string(client->proto)); } client->sock = -1; }