From ce7f5b21221f0b3557d1f4a40793230d8bc4cf02 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:13:21 +0100 Subject: [PATCH 1/6] bgpd: don't reuse nexthop variable in loop/switch While the loop is currently exited in all cases after using nexthop, it is a footgun to have "nh" around to be reused in another iteration of the loop. This would leave nexthop with partial data from the previous use. Make it local where needed instead. Signed-off-by: David Lamparter --- bgpd/bgp_pbr.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/bgpd/bgp_pbr.c b/bgpd/bgp_pbr.c index 2d61c0f00a..b85a8e2254 100644 --- a/bgpd/bgp_pbr.c +++ b/bgpd/bgp_pbr.c @@ -2624,7 +2624,6 @@ static void bgp_pbr_policyroute_add_to_zebra(struct bgp *bgp, static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, struct bgp_pbr_entry_main *api, bool add) { - struct nexthop nh; int i = 0; int continue_loop = 1; float rate = 0; @@ -2639,7 +2638,6 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, struct bgp_pbr_val_mask bpvm; memset(&range, 0, sizeof(range)); - memset(&nh, 0, sizeof(nh)); memset(&bpf, 0, sizeof(bpf)); memset(&bpof, 0, sizeof(bpof)); if (CHECK_FLAG(api->match_bitmask, PREFIX_SRC_PRESENT) || @@ -2652,8 +2650,6 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, dst = &api->dst_prefix; if (api->type == BGP_PBR_IPRULE) bpf.type = api->type; - memset(&nh, 0, sizeof(nh)); - nh.vrf_id = VRF_UNKNOWN; if (api->match_protocol_num) { proto = (uint8_t)api->protocol[0].value; if (api->afi == AF_INET6 && proto == IPPROTO_ICMPV6) @@ -2778,8 +2774,10 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, case ACTION_TRAFFICRATE: /* drop packet */ if (api->actions[i].u.r.rate == 0) { - nh.vrf_id = api->vrf_id; - nh.type = NEXTHOP_TYPE_BLACKHOLE; + struct nexthop nh = { + .vrf_id = api->vrf_id, + .type = NEXTHOP_TYPE_BLACKHOLE, + }; bgp_pbr_policyroute_add_to_zebra( bgp, path, &bpf, &bpof, &nh, &rate); } else { @@ -2802,18 +2800,15 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, /* terminate action: run other filters */ break; - case ACTION_REDIRECT_IP: - nh.vrf_id = api->vrf_id; + case ACTION_REDIRECT_IP: { + struct nexthop nh = { .vrf_id = api->vrf_id }; + if (api->afi == AFI_IP) { nh.type = NEXTHOP_TYPE_IPV4; - nh.gate.ipv4.s_addr = - api->actions[i].u.zr. - redirect_ip_v4.s_addr; + nh.gate.ipv4 = api->actions[i].u.zr.redirect_ip_v4; } else { nh.type = NEXTHOP_TYPE_IPV6; - memcpy(&nh.gate.ipv6, - &api->actions[i].u.zr.redirect_ip_v6, - sizeof(struct in6_addr)); + nh.gate.ipv6 = api->actions[i].u.zr.redirect_ip_v6; } bgp_pbr_policyroute_add_to_zebra(bgp, path, &bpf, &bpof, &nh, &rate); @@ -2822,7 +2817,10 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, */ continue_loop = 0; break; - case ACTION_REDIRECT: + } + case ACTION_REDIRECT: { + struct nexthop nh = {}; + if (api->afi == AFI_IP) nh.type = NEXTHOP_TYPE_IPV4; else @@ -2832,6 +2830,7 @@ static void bgp_pbr_handle_entry(struct bgp *bgp, struct bgp_path_info *path, &nh, &rate); continue_loop = 0; break; + } case ACTION_MARKING: if (BGP_DEBUG(pbr, PBR)) { bgp_pbr_print_policy_route(api); From 95cf0b227980999e2af22a2c171e5237e5ffca8e Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:15:17 +0100 Subject: [PATCH 2/6] fpm: guard against garbage in unused address bytes Zero out the 12 unused bytes (for the IPv6 address) when reading in an IPv4 address. Signed-off-by: David Lamparter --- fpm/fpm_pb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fpm/fpm_pb.h b/fpm/fpm_pb.h index 23d7e43993..8847365a37 100644 --- a/fpm/fpm_pb.h +++ b/fpm/fpm_pb.h @@ -111,6 +111,7 @@ static inline int fpm__nexthop__get(const Fpm__Nexthop *nh, nexthop->vrf_id = VRF_DEFAULT; nexthop->type = NEXTHOP_TYPE_IPV4; + memset(&nexthop->gate, 0, sizeof(nexthop->gate)); nexthop->gate.ipv4 = ipv4; if (ifindex) { nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX; From c88589f5e9351654c04322eb395003297656989d Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:16:10 +0100 Subject: [PATCH 3/6] pbrd: initialize structs used in hash_lookup Doesn't seem to break anything but really poor style to pass potentially uninitialized data to hash_lookup. Signed-off-by: David Lamparter --- pbrd/pbr_nht.c | 8 ++++---- pbrd/pbr_vty.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pbrd/pbr_nht.c b/pbrd/pbr_nht.c index ff252f8505..d5cee5f3e4 100644 --- a/pbrd/pbr_nht.c +++ b/pbrd/pbr_nht.c @@ -493,7 +493,7 @@ void pbr_nht_change_group(const char *name) } for (ALL_NEXTHOPS(nhgc->nhg, nhop)) { - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct pbr_nexthop_cache *pnhc; lookup.nexthop = *nhop; @@ -565,7 +565,7 @@ void pbr_nht_add_individual_nexthop(struct pbr_map_sequence *pbrms, struct pbr_nexthop_group_cache *pnhgc; struct pbr_nexthop_group_cache find; struct pbr_nexthop_cache *pnhc; - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct nexthop *nh; char buf[PBR_NHC_NAMELEN]; @@ -624,7 +624,7 @@ static void pbr_nht_release_individual_nexthop(struct pbr_map_sequence *pbrms) struct pbr_nexthop_group_cache *pnhgc; struct pbr_nexthop_group_cache find; struct pbr_nexthop_cache *pnhc; - struct pbr_nexthop_cache lup; + struct pbr_nexthop_cache lup = {}; struct nexthop *nh; enum nexthop_types_t nh_type = 0; @@ -690,7 +690,7 @@ struct pbr_nexthop_group_cache *pbr_nht_add_group(const char *name) DEBUGD(&pbr_dbg_nht, "%s: Retrieved NHGC @ %p", __func__, pnhgc); for (ALL_NEXTHOPS(nhgc->nhg, nhop)) { - struct pbr_nexthop_cache lookupc; + struct pbr_nexthop_cache lookupc = {}; struct pbr_nexthop_cache *pnhc; lookupc.nexthop = *nhop; diff --git a/pbrd/pbr_vty.c b/pbrd/pbr_vty.c index 08fe56c7bb..aa98913571 100644 --- a/pbrd/pbr_vty.c +++ b/pbrd/pbr_vty.c @@ -1488,7 +1488,7 @@ pbrms_nexthop_group_write_individual_nexthop( { struct pbr_nexthop_group_cache find; struct pbr_nexthop_group_cache *pnhgc; - struct pbr_nexthop_cache lookup; + struct pbr_nexthop_cache lookup = {}; struct pbr_nexthop_cache *pnhc; memset(&find, 0, sizeof(find)); From b666ee510eb480da50476b1bbc84bdf8365df95c Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:17:21 +0100 Subject: [PATCH 4/6] zebra: guard against junk in nexthop->rmap_src rmap_src wasn't initialized, so for IPv4 the unused 12 bytes would contain whatever junk is on the stack on function entry. Also move the IPv4 parse before the IPv6 parse so if it's successful we can be sure the other bytes haven't been touched. Signed-off-by: David Lamparter --- zebra/zebra_routemap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c index 73ffa09c16..2813f037a2 100644 --- a/zebra/zebra_routemap.c +++ b/zebra/zebra_routemap.c @@ -959,10 +959,11 @@ route_set_src(void *rule, const struct prefix *prefix, void *object) /* set src compilation. */ static void *route_set_src_compile(const char *arg) { - union g_addr src, *psrc; + union g_addr src = {}, *psrc; - if ((inet_pton(AF_INET6, arg, &src.ipv6) == 1) - || (inet_pton(AF_INET, arg, &src.ipv4) == 1)) { + /* IPv4 first, to ensure no garbage in the 12 unused bytes */ + if ((inet_pton(AF_INET, arg, &src.ipv4) == 1) || + (inet_pton(AF_INET6, arg, &src.ipv6) == 1)) { psrc = XMALLOC(MTYPE_ROUTE_MAP_COMPILED, sizeof(union g_addr)); *psrc = src; return psrc; From 4a0e1419a69d07496c7adfb744beecd00e1efef2 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:19:04 +0100 Subject: [PATCH 5/6] lib: guard against padding garbage in ZAPI read When reading in a nexthop from ZAPI, only set the fields that actually have meaning. While it shouldn't happen to begin with, we can otherwise carry padding garbage into the unused leftover union bytes. Signed-off-by: David Lamparter --- lib/zclient.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/zclient.c b/lib/zclient.c index d8c75c9029..5deea8f0cf 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -2300,7 +2300,27 @@ struct nexthop *nexthop_from_zapi_nexthop(const struct zapi_nexthop *znh) n->type = znh->type; n->vrf_id = znh->vrf_id; n->ifindex = znh->ifindex; - n->gate = znh->gate; + + /* only copy values that have meaning - make sure "spare bytes" are + * left zeroed for hashing (look at _nexthop_hash_bytes) + */ + switch (znh->type) { + case NEXTHOP_TYPE_BLACKHOLE: + n->bh_type = znh->bh_type; + break; + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + n->gate.ipv4 = znh->gate.ipv4; + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + n->gate.ipv6 = znh->gate.ipv6; + break; + case NEXTHOP_TYPE_IFINDEX: + /* nothing, ifindex is always copied */ + break; + } + n->srte_color = znh->srte_color; n->weight = znh->weight; From 001fcfa1dd9f7dc2639b4f5c7a52ab59cc425452 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 22 Jan 2025 11:23:31 +0100 Subject: [PATCH 6/6] lib: clean up nexthop hashing mess We were hashing 4 bytes of the address. Even for IPv6 addresses. Oops. The reason this was done was to try to make it faster, but made a complex maze out of everything. Time for a refactor. Signed-off-by: David Lamparter --- lib/nexthop.c | 87 ++++++----------------------------------- lib/nexthop.h | 104 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 80 insertions(+), 111 deletions(-) diff --git a/lib/nexthop.c b/lib/nexthop.c index 332581fbd8..ee6c2b7ec0 100644 --- a/lib/nexthop.c +++ b/lib/nexthop.c @@ -772,68 +772,30 @@ unsigned int nexthop_level(const struct nexthop *nexthop) return rv; } -/* Only hash word-sized things, let cmp do the rest. */ -uint32_t nexthop_hash_quick(const struct nexthop *nexthop) +uint32_t nexthop_hash(const struct nexthop *nexthop) { uint32_t key = 0x45afe398; - int i; - key = jhash_3words(nexthop->type, nexthop->vrf_id, - nexthop->nh_label_type, key); + /* type, vrf, ifindex, ip addresses - see nexthop.h */ + key = _nexthop_hash_bytes(nexthop, key); + + key = jhash_1word(nexthop->flags & NEXTHOP_FLAGS_HASHED, key); if (nexthop->nh_label) { - int labels = nexthop->nh_label->num_labels; + const struct mpls_label_stack *ls = nexthop->nh_label; - i = 0; - - while (labels >= 3) { - key = jhash_3words(nexthop->nh_label->label[i], - nexthop->nh_label->label[i + 1], - nexthop->nh_label->label[i + 2], - key); - labels -= 3; - i += 3; - } - - if (labels >= 2) { - key = jhash_2words(nexthop->nh_label->label[i], - nexthop->nh_label->label[i + 1], - key); - labels -= 2; - i += 2; - } - - if (labels >= 1) - key = jhash_1word(nexthop->nh_label->label[i], key); + /* num_labels itself isn't useful to hash, if the number of + * labels is different, the hash value will change just due to + * that already. + */ + key = jhash(ls->label, sizeof(ls->label[0]) * ls->num_labels, key); } - key = jhash_2words(nexthop->ifindex, - CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK), - key); - /* Include backup nexthops, if present */ if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) { int backups = nexthop->backup_num; - i = 0; - - while (backups >= 3) { - key = jhash_3words(nexthop->backup_idx[i], - nexthop->backup_idx[i + 1], - nexthop->backup_idx[i + 2], key); - backups -= 3; - i += 3; - } - - while (backups >= 2) { - key = jhash_2words(nexthop->backup_idx[i], - nexthop->backup_idx[i + 1], key); - backups -= 2; - i += 2; - } - - if (backups >= 1) - key = jhash_1word(nexthop->backup_idx[i], key); + key = jhash(nexthop->backup_idx, sizeof(nexthop->backup_idx[0]) * backups, key); } if (nexthop->nh_srv6) { @@ -868,31 +830,6 @@ uint32_t nexthop_hash_quick(const struct nexthop *nexthop) return key; } - -#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */ - -/* For a more granular hash */ -uint32_t nexthop_hash(const struct nexthop *nexthop) -{ - uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {}; - /* Get all the quick stuff */ - uint32_t key = nexthop_hash_quick(nexthop); - - assert(((sizeof(nexthop->gate) + sizeof(nexthop->src) - + sizeof(nexthop->rmap_src)) - / 3) - == (GATE_SIZE * sizeof(uint32_t))); - - memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE); - memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE); - memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src, - GATE_SIZE); - - key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key); - - return key; -} - void nexthop_copy_no_recurse(struct nexthop *copy, const struct nexthop *nexthop, struct nexthop *rparent) diff --git a/lib/nexthop.h b/lib/nexthop.h index 5dfb58d846..cea7c77e3e 100644 --- a/lib/nexthop.h +++ b/lib/nexthop.h @@ -8,6 +8,7 @@ #ifndef _LIB_NEXTHOP_H #define _LIB_NEXTHOP_H +#include "jhash.h" #include "prefix.h" #include "mpls.h" #include "vxlan.h" @@ -56,15 +57,48 @@ struct nexthop { struct nexthop *next; struct nexthop *prev; - /* - * What vrf is this nexthop associated with? + + /* begin of hashed data - all fields from here onwards are given to + * jhash() as one consecutive chunk. DO NOT create "padding holes". + * DO NOT insert pointers that need to be deep-hashed. + * + * static_assert() below needs to be updated when fields are added */ + char _hash_begin[0]; + + /* see above */ + enum nexthop_types_t type; + + /* What vrf is this nexthop associated with? */ vrf_id_t vrf_id; /* Interface index. */ ifindex_t ifindex; - enum nexthop_types_t type; + /* Type of label(s), if any */ + enum lsp_types_t nh_label_type; + + /* padding: keep 16 byte alignment here */ + + /* Nexthop address + * make sure all 16 bytes for IPv6 are zeroed when putting in an IPv4 + * address since the entire thing is hashed as-is + */ + union { + union g_addr gate; + enum blackhole_type bh_type; + }; + union g_addr src; + union g_addr rmap_src; /* Src is set via routemap */ + + /* end of hashed data - remaining fields in this struct are not + * directly fed into jhash(). Most of them are actually part of the + * hash but have special rules or handling attached. + */ + char _hash_end[0]; + + /* Weight of the nexthop ( for unequal cost ECMP ) */ + uint8_t weight; uint16_t flags; #define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */ @@ -82,18 +116,15 @@ struct nexthop { #define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */ #define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */ + /* which flags are part of nexthop_hash(). Should probably be split + * off into a separate field... + */ +#define NEXTHOP_FLAGS_HASHED NEXTHOP_FLAG_ONLINK + #define NEXTHOP_IS_ACTIVE(flags) \ (CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \ && !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE)) - /* Nexthop address */ - union { - union g_addr gate; - enum blackhole_type bh_type; - }; - union g_addr src; - union g_addr rmap_src; /* Src is set via routemap */ - /* Nexthops obtained by recursive resolution. * * If the nexthop struct needs to be resolved recursively, @@ -104,15 +135,9 @@ struct nexthop { /* Recursive parent */ struct nexthop *rparent; - /* Type of label(s), if any */ - enum lsp_types_t nh_label_type; - /* Label(s) associated with this nexthop. */ struct mpls_label_stack *nh_label; - /* Weight of the nexthop ( for unequal cost ECMP ) */ - uint8_t weight; - /* Count and index of corresponding backup nexthop(s) in a backup list; * only meaningful if the HAS_BACKUP flag is set. */ @@ -138,6 +163,29 @@ struct nexthop { struct nexthop_srv6 *nh_srv6; }; +/* all hashed fields (including padding, if it is necessary to add) need to + * be listed in the static_assert below + */ + +#define S(field) sizeof(((struct nexthop *)NULL)->field) + +static_assert( + offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin) == + S(type) + S(vrf_id) + S(ifindex) + S(nh_label_type) + S(gate) + S(src) + S(rmap_src), + "struct nexthop contains padding, this can break things. insert _pad fields at appropriate places"); + +#undef S + +/* this is here to show exactly what is meant by the comments above about + * the hashing + */ +static inline uint32_t _nexthop_hash_bytes(const struct nexthop *nh, uint32_t seed) +{ + return jhash(&nh->_hash_begin, + offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin), + seed); +} + /* Utility to append one nexthop to another. */ #define NEXTHOP_APPEND(to, new) \ do { \ @@ -183,27 +231,11 @@ struct nexthop *nexthop_from_blackhole(enum blackhole_type bh_type, /* * Hash a nexthop. Suitable for use with hash tables. * - * This function uses the following values when computing the hash: - * - vrf_id - * - ifindex - * - type - * - gate - * - * nexthop - * The nexthop to hash - * - * Returns: - * 32-bit hash of nexthop + * Please double check the code on what is included in the hash, there was + * documentation here but it got outdated and the only thing worse than no + * doc is incorrect doc. */ uint32_t nexthop_hash(const struct nexthop *nexthop); -/* - * Hash a nexthop only on word-sized attributes: - * - vrf_id - * - ifindex - * - type - * - (some) flags - */ -uint32_t nexthop_hash_quick(const struct nexthop *nexthop); extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2); extern bool nexthop_same_no_labels(const struct nexthop *nh1,