From e5218ec873949bea27a116a48372e04c6b70291f Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Thu, 16 May 2019 13:28:25 -0700 Subject: [PATCH 1/6] Zebra: Data structures for RMAC processing in FPM - FPM MAC structure: This data structure will contain all the information required for FPM message generation for an RMAC. struct fpm_mac_info_t { struct ethaddr macaddr; uint32_t zebra_flags; /* Could be used to build FPM messages */ vni_t vni; ifindex_t vxlan_if; ifindex_t svi_if; /* L2 or L3 Bridge interface */ struct in_addr r_vtep_ip; /* Remote VTEP IP */ /* Linkage to put MAC on the FPM processing queue. */ TAILQ_ENTRY(fpm_mac_info_t) fpm_mac_q_entries; uint8_t fpm_flags; }; - Queue structure for FPM processing: For FPM processing, we build a queue of "fpm_mac_info_t". When RMAC is added or deleted from zebra, fpm_mac_info_t node is enqueued in this queue for the corresponding operation. FPM thread will dequeue these nodes one by one to generate a netlink message. TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q; - Hash table for "fpm_mac_info_t" When zebra tries to enqueue fpm_mac_info_t for a new RMAC add/delete operation, it is possible that this RMAC is already present in the queue. So, to avoid multiple messages for duplicate RMAC nodes, insert fpm_mac_info_t into a hash table. struct hash *fpm_mac_info_table; - Before enqueueing any MAC, try to fetch the fpm_mac_info_t from the hash table first. - Entry is deleted from the hash table when the node is dequeued. - For hash table key generation, parameters used are "mac adress" and "vni" This will provide a fairly unique key for a MAC(fpm_mac_info_hash_keymake). - Compare function uses "mac address", "RVTEP address" and "VNI" as the key which is sufficient to distinguish any two RMACs. This compare function is used for fpm_mac_info_t lookup (zfpm_mac_info_cmp). Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 100 ++++++++++++++++++++++++++++++++++++++ zebra/zebra_fpm_private.h | 28 +++++++++++ 2 files changed, 128 insertions(+) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index 35a5d69ee3..a98515fafc 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -30,15 +30,20 @@ #include "network.h" #include "command.h" #include "version.h" +#include "jhash.h" #include "zebra/rib.h" #include "zebra/zserv.h" #include "zebra/zebra_ns.h" #include "zebra/zebra_vrf.h" #include "zebra/zebra_errors.h" +#include "zebra/zebra_memory.h" #include "fpm/fpm.h" #include "zebra_fpm_private.h" +#include "zebra/zebra_router.h" + +DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); /* * Interval at which we attempt to connect to the FPM. @@ -178,6 +183,25 @@ typedef struct zfpm_glob_t_ { */ TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q; + /* + * List of fpm_mac_info structures to be processed + */ + TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q; + + /* + * Hash table of fpm_mac_info_t entries + * + * While adding fpm_mac_info_t for a MAC to the mac_q, + * it is possible that another fpm_mac_info_t node for the this MAC + * is already present in the queue. + * This is possible in the case of consecutive add->delete operations. + * To avoid such duplicate insertions in the mac_q, + * define a hash table for fpm_mac_info_t which can be looked up + * to see if an fpm_mac_info_t node for a MAC is already present + * in the mac_q. + */ + struct hash *fpm_mac_info_table; + /* * Stream socket to the FPM. */ @@ -1276,6 +1300,75 @@ static int zfpm_trigger_update(struct route_node *rn, const char *reason) return 0; } +/* + * Generate Key for FPM MAC info hash entry + * Key is generated using MAC address and VNI id which should be sufficient + * to provide uniqueness + */ +static unsigned int zfpm_mac_info_hash_keymake(const void *p) +{ + struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p; + uint32_t mac_key; + + mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a); + + return jhash_2words(mac_key, fpm_mac->vni, 0); +} + +/* + * Compare function for FPM MAC info hash lookup + */ +static bool zfpm_mac_info_cmp(const void *p1, const void *p2) +{ + const struct fpm_mac_info_t *fpm_mac1 = p1; + const struct fpm_mac_info_t *fpm_mac2 = p2; + + if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN) + != 0) + return false; + if (fpm_mac1->r_vtep_ip.s_addr != fpm_mac2->r_vtep_ip.s_addr) + return false; + if (fpm_mac1->vni != fpm_mac2->vni) + return false; + + return true; +} + +/* + * Lookup FPM MAC info hash entry. + */ +static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key) +{ + return hash_lookup(zfpm_g->fpm_mac_info_table, key); +} + +/* + * Callback to allocate fpm_mac_info_t structure. + */ +static void *zfpm_mac_info_alloc(void *p) +{ + const struct fpm_mac_info_t *key = p; + struct fpm_mac_info_t *fpm_mac; + + fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t)); + + memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN); + memcpy(&fpm_mac->r_vtep_ip, &key->r_vtep_ip, sizeof(struct in_addr)); + fpm_mac->vni = key->vni; + + return (void *)fpm_mac; +} + +/* + * Delink and free fpm_mac_info_t. + */ +static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac) +{ + hash_release(zfpm_g->fpm_mac_info_table, fpm_mac); + TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries); + XFREE(MTYPE_FPM_MAC_INFO, fpm_mac); +} + /* * zfpm_stats_timer_cb */ @@ -1589,6 +1682,13 @@ static int zfpm_init(struct thread_master *master) memset(zfpm_g, 0, sizeof(*zfpm_g)); zfpm_g->master = master; TAILQ_INIT(&zfpm_g->dest_q); + TAILQ_INIT(&zfpm_g->mac_q); + + /* Create hash table for fpm_mac_info_t enties */ + zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake, + zfpm_mac_info_cmp, + "FPM MAC info hash table"); + zfpm_g->sock = -1; zfpm_g->state = ZFPM_STATE_IDLE; diff --git a/zebra/zebra_fpm_private.h b/zebra/zebra_fpm_private.h index 943aad9864..3ca39c45a9 100644 --- a/zebra/zebra_fpm_private.h +++ b/zebra/zebra_fpm_private.h @@ -53,6 +53,34 @@ static inline void zfpm_debug(const char *format, ...) } #endif +/* This structure contains the MAC addresses enqueued for FPM processing. */ +struct fpm_mac_info_t { + struct ethaddr macaddr; + uint32_t zebra_flags; /* Could be used to build FPM messages */ + vni_t vni; + ifindex_t vxlan_if; + ifindex_t svi_if; /* L2 or L3 Bridge interface */ + struct in_addr r_vtep_ip; /* Remote VTEP IP */ + + /* Linkage to put MAC on the FPM processing queue. */ + TAILQ_ENTRY(fpm_mac_info_t) fpm_mac_q_entries; + + uint8_t fpm_flags; + +#define ZEBRA_MAC_UPDATE_FPM 0x1 /* This flag indicates if we want to upadte + * data plane for this MAC. If a MAC is added + * and then deleted immediately, we do not want + * to update data plane for such operation. + * Unset the ZEBRA_MAC_UPDATE_FPM flag in this + * case. FPM thread while processing the queue + * node will check this flag and dequeue the + * node silently without sending any update to + * the data plane. + */ +#define ZEBRA_MAC_DELETE_FPM 0x2 /* This flag is set if it is a delete operation + * for the MAC. + */ +}; /* * Externs From a780a738968ffc077a81d108b906cf9a06f3ef4d Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Thu, 16 May 2019 14:43:41 -0700 Subject: [PATCH 2/6] Zebra: Handle RMAC add/delete operation and add fpm_mac_info_t - Define a hook "zebra_mac_update" which can be registered by multiple data plane components (e.g. FPM, dplane). DEFINE_HOOK(zebra_rmac_update, (zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, bool delete, const char *reason), (rmac, zl3vni, delete, reason)) - While performing RMAC add/delete for an L3VNI, call "zebra_mac_update" hook. - This hook call triggers "zfpm_trigger_rmac_update". In this function, we do a lookup for the RMAC in fpm_mac_info_table. If already present, this node is updated with the latest RMAC info. Else, a new fpm_mac_info_t node is created and inserted in the queue and hash data structures. Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 124 ++++++++++++++++++++++++++++++++++++ zebra/zebra_vxlan.c | 21 ++++-- zebra/zebra_vxlan_private.h | 6 ++ 3 files changed, 147 insertions(+), 4 deletions(-) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index a98515fafc..2875324be9 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -42,6 +42,7 @@ #include "fpm/fpm.h" #include "zebra_fpm_private.h" #include "zebra/zebra_router.h" +#include "zebra_vxlan_private.h" DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); @@ -283,6 +284,7 @@ static int zfpm_write_cb(struct thread *thread); static void zfpm_set_state(zfpm_state_t state, const char *reason); static void zfpm_start_connect_timer(const char *reason); static void zfpm_start_stats_timer(void); +static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac); /* * zfpm_thread_should_yield @@ -615,9 +617,17 @@ static int zfpm_conn_down_thread_cb(struct thread *thread) struct route_node *rnode; zfpm_rnodes_iter_t *iter; rib_dest_t *dest; + struct fpm_mac_info_t *mac = NULL; assert(zfpm_g->state == ZFPM_STATE_IDLE); + /* + * Delink and free all fpm_mac_info_t nodes + * in the mac_q and fpm_mac_info_hash + */ + while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL) + zfpm_mac_info_del(mac); + zfpm_g->t_conn_down = NULL; iter = &zfpm_g->t_conn_down_state.iter; @@ -1369,6 +1379,119 @@ static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac) XFREE(MTYPE_FPM_MAC_INFO, fpm_mac); } +/* + * zfpm_trigger_rmac_update + * + * Zebra code invokes this function to indicate that we should + * send an update to FPM for given MAC entry. + * + * This function checks if we already have enqueued an update for this RMAC, + * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update. + */ +static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, + bool delete, const char *reason) +{ + char buf[ETHER_ADDR_STRLEN]; + struct fpm_mac_info_t *fpm_mac, key; + struct interface *vxlan_if, *svi_if; + + /* + * Ignore if the connection is down. We will update the FPM about + * all destinations once the connection comes up. + */ + if (!zfpm_conn_is_up()) + return 0; + + if (reason) { + zfpm_debug("triggering update to FPM - Reason: %s - %s", + reason, + prefix_mac2str(&rmac->macaddr, buf, sizeof(buf))); + } + + vxlan_if = zl3vni_map_to_vxlan_if(zl3vni); + svi_if = zl3vni_map_to_svi_if(zl3vni); + + memset(&key, 0, sizeof(struct fpm_mac_info_t)); + + memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN); + key.r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr; + key.vni = zl3vni->vni; + + /* Check if this MAC is already present in the queue. */ + fpm_mac = zfpm_mac_info_lookup(&key); + + if (fpm_mac) { + if (!!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) + == delete) { + /* + * MAC is already present in the queue + * with the same op as this one. Do nothing + */ + zfpm_g->stats.redundant_triggers++; + return 0; + } + + /* + * A new op for an already existing fpm_mac_info_t node. + * Update the existing node for the new op. + */ + if (!delete) { + /* + * New op is "add". Previous op is "delete". + * Update the fpm_mac_info_t for the new add. + */ + fpm_mac->zebra_flags = rmac->flags; + + fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0; + fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0; + + UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM); + } else { + /* + * New op is "delete". Previous op is "add". + * Thus, no-op. Unset ZEBRA_MAC_UPDATE_FPM flag. + */ + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM); + } + + return 0; + } + + fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key, + zfpm_mac_info_alloc); + if (!fpm_mac) + return 0; + + memcpy(&fpm_mac->zebra_flags, &rmac->flags, sizeof(uint32_t)); + + fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0; + fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0; + + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM); + if (delete) + SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM); + + TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries); + + zfpm_g->stats.updates_triggered++; + + /* + * For now, since we do not have mac_q processing code which takes care + * of delinkng and deleting fpm_mac, delete fpm_mac anyway. + * Remove this delete when that code is added in the subsequent commit. + */ + zfpm_mac_info_del(fpm_mac); + + /* If writes are already enabled, return. */ + if (zfpm_g->t_write) + return 0; + + zfpm_write_on(); + return 0; +} + /* * zfpm_stats_timer_cb */ @@ -1731,6 +1854,7 @@ static int zfpm_init(struct thread_master *master) static int zebra_fpm_module_init(void) { hook_register(rib_update, zfpm_trigger_update); + hook_register(zebra_rmac_update, zfpm_trigger_rmac_update); hook_register(frr_late_init, zfpm_init); return 0; } diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index 077c1ff8f0..7596387fc7 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -60,6 +60,9 @@ DEFINE_MTYPE_STATIC(ZEBRA, MAC, "VNI MAC"); DEFINE_MTYPE_STATIC(ZEBRA, NEIGH, "VNI Neighbor"); DEFINE_MTYPE_STATIC(ZEBRA, ZVXLAN_SG, "zebra VxLAN multicast group"); +DEFINE_HOOK(zebra_rmac_update, (zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, + bool delete, const char *reason), (rmac, zl3vni, delete, reason)) + /* definitions */ /* PMSI strings. */ #define VXLAN_FLOOD_STR_NO_INFO "-" @@ -143,8 +146,6 @@ static zebra_l3vni_t *zl3vni_lookup(vni_t vni); static void *zl3vni_alloc(void *p); static zebra_l3vni_t *zl3vni_add(vni_t vni, vrf_id_t vrf_id); static int zl3vni_del(zebra_l3vni_t *zl3vni); -static struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni); -static struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni); static void zebra_vxlan_process_l3vni_oper_up(zebra_l3vni_t *zl3vni); static void zebra_vxlan_process_l3vni_oper_down(zebra_l3vni_t *zl3vni); @@ -4459,6 +4460,10 @@ static int zl3vni_remote_rmac_add(zebra_l3vni_t *zl3vni, struct ethaddr *rmac, memset(&zrmac->fwd_info, 0, sizeof(zrmac->fwd_info)); zrmac->fwd_info.r_vtep_ip = vtep_ip->ipaddr_v4; + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, false, + "new RMAC added"); + /* install rmac in kernel */ zl3vni_rmac_install(zl3vni, zrmac); } @@ -4479,6 +4484,10 @@ static void zl3vni_remote_rmac_del(zebra_l3vni_t *zl3vni, zebra_mac_t *zrmac, /* uninstall from kernel */ zl3vni_rmac_uninstall(zl3vni, zrmac); + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, true, + "RMAC deleted"); + /* del the rmac entry */ zl3vni_rmac_del(zl3vni, zrmac); } @@ -4790,7 +4799,7 @@ static int zl3vni_del(zebra_l3vni_t *zl3vni) return 0; } -static struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) +struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) { struct zebra_ns *zns = NULL; struct route_node *rn = NULL; @@ -4821,7 +4830,7 @@ static struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni) return NULL; } -static struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni) +struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni) { struct zebra_if *zif = NULL; /* zebra_if for vxlan_if */ struct zebra_l2info_vxlan *vxl = NULL; /* l2 info for vxlan_if */ @@ -5089,6 +5098,10 @@ static void zl3vni_del_rmac_hash_entry(struct hash_bucket *bucket, void *ctx) zrmac = (zebra_mac_t *)bucket->data; zl3vni = (zebra_l3vni_t *)ctx; zl3vni_rmac_uninstall(zl3vni, zrmac); + + /* Send RMAC for FPM processing */ + hook_call(zebra_rmac_update, zrmac, zl3vni, true, "RMAC deleted"); + zl3vni_rmac_del(zl3vni, zrmac); } diff --git a/zebra/zebra_vxlan_private.h b/zebra/zebra_vxlan_private.h index 1dd42b7083..8e78042646 100644 --- a/zebra/zebra_vxlan_private.h +++ b/zebra/zebra_vxlan_private.h @@ -431,6 +431,12 @@ struct nh_walk_ctx { }; extern zebra_l3vni_t *zl3vni_from_vrf(vrf_id_t vrf_id); +extern struct interface *zl3vni_map_to_vxlan_if(zebra_l3vni_t *zl3vni); +extern struct interface *zl3vni_map_to_svi_if(zebra_l3vni_t *zl3vni); + +DECLARE_HOOK(zebra_rmac_update, (zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, + bool delete, const char *reason), (rmac, zl3vni, delete, reason)) + #ifdef __cplusplus } From 21d814eb0bf5918ed8d34de6e8c1c866e6681f09 Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Thu, 16 May 2019 15:53:46 -0700 Subject: [PATCH 3/6] Zebra: FPM processing of mac_q and dest_q - FPM write thread calls "zfpm_build_updates()" to process mac_q and dest_q and to write update buffer over the FPM socket. - "zfpm_build_updates()" processes all the update queues one by one in a while loop. It will break the while loop and return if Queue processing function returns "FPM_WRITE_STOP" OR FPM write buffer is full OR all the queues are empty (no more update to process). - "zfpm_build_route_updates()" dequeues and processes route nodes from "dest_q". - "zfpm_build_mac_updates()" dequeues and processes MAC nodes from "mac_q" - These queue processing functions return with "FPM_WRITE_STOP" if the write buffer is full. Return value is "FPM_GOTO_NEXT_Q" if enough updates are processed from this queue and we want to move on to the next queue. - In each call, a queue processing function will process max "FPM_QUEUE_PROCESS_LIMIT (10000)" updates to avoid starvation of other queues. Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 180 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 161 insertions(+), 19 deletions(-) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index 2875324be9..46fd405761 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -50,6 +50,7 @@ DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); * Interval at which we attempt to connect to the FPM. */ #define ZFPM_CONNECT_RETRY_IVL 5 +#define FPM_MAX_MAC_MSG_LEN 512 /* * Sizes of outgoing and incoming stream buffers for writing/reading @@ -812,6 +813,14 @@ done: return 0; } +static bool zfpm_updates_pending(void) +{ + if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q))) + return true; + + return false; +} + /* * zfpm_writes_pending * @@ -828,9 +837,9 @@ static int zfpm_writes_pending(void) return 1; /* - * Check if there are any prefixes on the outbound queue. + * Check if there are any updates scheduled on the outbound queues. */ - if (!TAILQ_EMPTY(&zfpm_g->dest_q)) + if (zfpm_updates_pending()) return 1; return 0; @@ -895,12 +904,29 @@ struct route_entry *zfpm_route_for_update(rib_dest_t *dest) } /* - * zfpm_build_updates + * Define an enum for return codes for queue processing functions * - * Process the outgoing queue and write messages to the outbound - * buffer. + * FPM_WRITE_STOP: This return code indicates that the write buffer is full. + * Stop processing all the queues and empty the buffer by writing its content + * to the socket. + * + * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is + * empty or we have processed enough updates from this queue. + * So, move on to the next queue. */ -static void zfpm_build_updates(void) +enum { + FPM_WRITE_STOP = 0, + FPM_GOTO_NEXT_Q = 1 +}; + +#define FPM_QUEUE_PROCESS_LIMIT 10000 + +/* + * zfpm_build_route_updates + * + * Process the dest_q queue and write FPM messages to the outbound buffer. + */ +static int zfpm_build_route_updates(void) { struct stream *s; rib_dest_t *dest; @@ -911,25 +937,27 @@ static void zfpm_build_updates(void) struct route_entry *re; int is_add, write_msg; fpm_msg_type_e msg_type; + uint16_t q_limit; + + if (TAILQ_EMPTY(&zfpm_g->dest_q)) + return FPM_GOTO_NEXT_Q; s = zfpm_g->obuf; + q_limit = FPM_QUEUE_PROCESS_LIMIT; - assert(stream_empty(s)); - - do { - + do { /* * Make sure there is enough space to write another message. */ if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN) - break; + return FPM_WRITE_STOP; buf = STREAM_DATA(s) + stream_get_endp(s); buf_end = buf + STREAM_WRITEABLE(s); dest = TAILQ_FIRST(&zfpm_g->dest_q); if (!dest) - break; + return FPM_GOTO_NEXT_Q; assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)); @@ -989,9 +1017,130 @@ static void zfpm_build_updates(void) if (rib_gc_dest(dest->rnode)) zfpm_g->stats.dests_del_after_update++; + q_limit--; + if (q_limit == 0) { + /* + * We have processed enough updates in this queue. + * Now yield for other queues. + */ + return FPM_GOTO_NEXT_Q; + } } while (1); } +/* + * zfpm_encode_mac + * + * Encode a message to FPM with information about the given MAC. + * + * Returns the number of bytes written to the buffer. + */ +static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len, fpm_msg_type_e *msg_type) +{ + size_t len = 0; + + *msg_type = FPM_MSG_TYPE_NONE; + + switch (zfpm_g->message_format) { + + case ZFPM_MSG_FORMAT_NONE: + break; + case ZFPM_MSG_FORMAT_NETLINK: + break; + case ZFPM_MSG_FORMAT_PROTOBUF: + break; + } + return len; +} + +static int zfpm_build_mac_updates(void) +{ + struct stream *s; + struct fpm_mac_info_t *mac; + unsigned char *buf, *data, *buf_end; + fpm_msg_hdr_t *hdr; + size_t data_len, msg_len; + fpm_msg_type_e msg_type; + uint16_t q_limit; + + if (TAILQ_EMPTY(&zfpm_g->mac_q)) + return FPM_GOTO_NEXT_Q; + + s = zfpm_g->obuf; + q_limit = FPM_QUEUE_PROCESS_LIMIT; + + do { + /* Make sure there is enough space to write another message. */ + if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN) + return FPM_WRITE_STOP; + + buf = STREAM_DATA(s) + stream_get_endp(s); + buf_end = buf + STREAM_WRITEABLE(s); + + mac = TAILQ_FIRST(&zfpm_g->mac_q); + if (!mac) + return FPM_GOTO_NEXT_Q; + + /* Check for no-op */ + if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) { + zfpm_g->stats.nop_deletes_skipped++; + zfpm_mac_info_del(mac); + continue; + } + + hdr = (fpm_msg_hdr_t *)buf; + hdr->version = FPM_PROTO_VERSION; + + data = fpm_msg_data(hdr); + data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data, + &msg_type); + /* assert(data_len); */ + + hdr->msg_type = msg_type; + msg_len = fpm_data_len_to_msg_len(data_len); + hdr->msg_len = htons(msg_len); + stream_forward_endp(s, msg_len); + + /* Remove the MAC from the queue, and delete it. */ + zfpm_mac_info_del(mac); + + q_limit--; + if (q_limit == 0) { + /* + * We have processed enough updates in this queue. + * Now yield for other queues. + */ + return FPM_GOTO_NEXT_Q; + } + } while (1); +} + +/* + * zfpm_build_updates + * + * Process the outgoing queues and write messages to the outbound + * buffer. + */ +static void zfpm_build_updates(void) +{ + struct stream *s; + + s = zfpm_g->obuf; + assert(stream_empty(s)); + + do { + /* + * Stop processing the queues if zfpm_g->obuf is full + * or we do not have more updates to process + */ + if (zfpm_build_mac_updates() == FPM_WRITE_STOP) + break; + if (zfpm_build_route_updates() == FPM_WRITE_STOP) + break; + } while (zfpm_updates_pending()); +} + /* * zfpm_write_cb */ @@ -1477,13 +1626,6 @@ static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, zfpm_g->stats.updates_triggered++; - /* - * For now, since we do not have mac_q processing code which takes care - * of delinkng and deleting fpm_mac, delete fpm_mac anyway. - * Remove this delete when that code is added in the subsequent commit. - */ - zfpm_mac_info_del(fpm_mac); - /* If writes are already enabled, return. */ if (zfpm_g->t_write) return 0; From fbe748e59fdff41a0ae8a07c42041bcfecb39f92 Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Thu, 16 May 2019 16:40:19 -0700 Subject: [PATCH 4/6] Zebra: Handle FPM connection up/down events - When the connection with the FPM socket is established, iterate through all the L3VNIs and send all the RMACs for FPM processing zfpm_conn_up_thread_cb" - We have already handled connection down even in previous commits. When the FPM connection goes down, empty mac_q and FPM mac info hash table "zfpm_conn_down_thread_cb" Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index 46fd405761..83d0dba61f 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -50,7 +50,6 @@ DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); * Interval at which we attempt to connect to the FPM. */ #define ZFPM_CONNECT_RETRY_IVL 5 -#define FPM_MAX_MAC_MSG_LEN 512 /* * Sizes of outgoing and incoming stream buffers for writing/reading @@ -69,6 +68,9 @@ DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO"); * Interval over which we collect statistics. */ #define ZFPM_STATS_IVL_SECS 10 +#define FPM_MAX_MAC_MSG_LEN 512 + +static void zfpm_iterate_rmac_table(struct hash_backet *backet, void *args); /* * Structure that holds state for iterating over all route_node @@ -519,6 +521,9 @@ static int zfpm_conn_up_thread_cb(struct thread *thread) goto done; } + /* Enqueue FPM updates for all the RMAC entries */ + hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table, NULL); + while ((rnode = zfpm_rnodes_iter_next(iter))) { dest = rib_dest_from_rnode(rnode); @@ -1634,6 +1639,33 @@ static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, return 0; } +/* + * This function is called when the FPM connections is established. + * Iterate over all the RMAC entries for the given L3VNI + * and enqueue the RMAC for FPM processing. + */ +static void zfpm_trigger_rmac_update_wrapper(struct hash_backet *backet, + void *args) +{ + zebra_mac_t *zrmac = (zebra_mac_t *)backet->data; + zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)args; + + zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added"); +} + +/* + * This function is called when the FPM connections is established. + * This function iterates over all the L3VNIs to trigger + * FPM updates for RMACs currently available. + */ +static void zfpm_iterate_rmac_table(struct hash_backet *backet, void *args) +{ + zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)backet->data; + + hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper, + (void *)zl3vni); +} + /* * zfpm_stats_timer_cb */ @@ -1951,8 +1983,8 @@ static int zfpm_init(struct thread_master *master) /* Create hash table for fpm_mac_info_t enties */ zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake, - zfpm_mac_info_cmp, - "FPM MAC info hash table"); + zfpm_mac_info_cmp, + "FPM MAC info hash table"); zfpm_g->sock = -1; zfpm_g->state = ZFPM_STATE_IDLE; From 9da60d0a19b1a838865e37e6554481dc851c1bcb Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Thu, 16 May 2019 17:29:08 -0700 Subject: [PATCH 5/6] Zebra: Build nelink message for RMAC updates - Function "zfpm_netlink_encode_mac()" builds a netlink message for RMAC updates. - To build a netlink message for RMAC updates, we use "ndmsg" in rtlink. - FPM Message structure is: FPM header -> nlmsg header -> ndmsg fields -> ndmsg attributes - Netlink message will look like: {'ndm_type': 0, 'family': 7, '__pad': (), 'header': {'flags': 1281, 'length':64, 'type': 28, 'pid': 0, 'sequence_number': 0}, 'state': 2, 'flags': 22, 'attrs': [('NDA_LLADDR', 'b2:66:eb:b9:5b:d3'), ('NDA_DST', '10.100.0.2'), ('NDA_MASTER', 11), ('NDA_VNI', 1000)], 'ifindex': 18} - Message details: nlmsghdr.nlmsg_type = RTM_NEWNEIGH(28) or RTM_DELNEIGH(29) nlmsghdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE for "add" , "NLM_F_REQUEST" for delete. ndmsg.ndm_family = AF_BRIDGE ndmsg.ndm_ifindex = vxlan_if (ifindex) ndmsg.ndm_state = NUD_REACHABLE ndmsg.ndm_flags |= NTF_SELF | NTF_MASTER | NTF_EXT_LEARNED Attribute "NDA_LLADDR" for MAC address Attribute "NDA_DST" for remote vtep ip Attribute "NDA_MASTER" for bridge interface ifindex. Attribute "NDA_VNI" for VNI id. Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 7 ++++- zebra/zebra_fpm_netlink.c | 63 +++++++++++++++++++++++++++++++++++++++ zebra/zebra_fpm_private.h | 3 ++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index 83d0dba61f..3e74db8111 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -1052,6 +1052,11 @@ static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, case ZFPM_MSG_FORMAT_NONE: break; case ZFPM_MSG_FORMAT_NETLINK: +#ifdef HAVE_NETLINK + len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len); + assert(fpm_msg_align(len) == len); + *msg_type = FPM_MSG_TYPE_NETLINK; +#endif /* HAVE_NETLINK */ break; case ZFPM_MSG_FORMAT_PROTOBUF: break; @@ -1100,7 +1105,7 @@ static int zfpm_build_mac_updates(void) data = fpm_msg_data(hdr); data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data, &msg_type); - /* assert(data_len); */ + assert(data_len); hdr->msg_type = msg_type; msg_len = fpm_data_len_to_msg_len(data_len); diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c index 2ac79b100c..4f2013585d 100644 --- a/zebra/zebra_fpm_netlink.c +++ b/zebra/zebra_fpm_netlink.c @@ -580,4 +580,67 @@ int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re, return netlink_route_info_encode(ri, in_buf, in_buf_len); } +/* + * zfpm_netlink_encode_mac + * + * Create a netlink message corresponding to the given MAC. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len) +{ + char buf1[ETHER_ADDR_STRLEN]; + size_t buf_offset; + + struct { + struct nlmsghdr hdr; + struct ndmsg ndm; + char buf[0]; + } *req; + req = (void *)in_buf; + + buf_offset = ((char *)req->buf) - ((char *)req); + if (in_buf_len < buf_offset) + return 0; + memset(req, 0, buf_offset); + + /* Construct nlmsg header */ + req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)); + req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ? + RTM_DELNEIGH : RTM_NEWNEIGH; + req->hdr.nlmsg_flags = NLM_F_REQUEST; + if (req->hdr.nlmsg_type == RTM_NEWNEIGH) + req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE); + + /* Construct ndmsg */ + req->ndm.ndm_family = AF_BRIDGE; + req->ndm.ndm_ifindex = mac->vxlan_if; + + req->ndm.ndm_state = NUD_REACHABLE; + req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER; + if (CHECK_FLAG(mac->zebra_flags, + (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW))) + req->ndm.ndm_state |= NUD_NOARP; + else + req->ndm.ndm_flags |= NTF_EXT_LEARNED; + + /* Add attributes */ + addattr_l(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6); + addattr_l(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4); + addattr32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if); + addattr32(&req->hdr, in_buf_len, NDA_VNI, mac->vni); + + assert(req->hdr.nlmsg_len < in_buf_len); + + zfpm_debug("Tx %s family %s ifindex %u MAC %s DEST %s", + nl_msg_type_to_str(req->hdr.nlmsg_type), + nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex, + prefix_mac2str(&mac->macaddr, buf1, sizeof(buf1)), + inet_ntoa(mac->r_vtep_ip)); + + return req->hdr.nlmsg_len; +} + #endif /* HAVE_NETLINK */ diff --git a/zebra/zebra_fpm_private.h b/zebra/zebra_fpm_private.h index 3ca39c45a9..c169ee8c22 100644 --- a/zebra/zebra_fpm_private.h +++ b/zebra/zebra_fpm_private.h @@ -92,6 +92,9 @@ extern int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, extern int zfpm_protobuf_encode_route(rib_dest_t *dest, struct route_entry *re, uint8_t *in_buf, size_t in_buf_len); +extern int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, + size_t in_buf_len); + extern struct route_entry *zfpm_route_for_update(rib_dest_t *dest); #ifdef __cplusplus From c5431822debf21339dda12defd645683caa29e92 Mon Sep 17 00:00:00 2001 From: Ameya Dharkar Date: Mon, 17 Jun 2019 12:03:41 -0700 Subject: [PATCH 6/6] Zebra: Address review comments for RMAC FPM feature 1 Address minor review comments. Signed-off-by: Ameya Dharkar --- zebra/zebra_fpm.c | 5 ++--- zebra/zebra_fpm_netlink.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c index 3e74db8111..32b9763c56 100644 --- a/zebra/zebra_fpm.c +++ b/zebra/zebra_fpm.c @@ -1030,7 +1030,7 @@ static int zfpm_build_route_updates(void) */ return FPM_GOTO_NEXT_Q; } - } while (1); + } while (true); } /* @@ -1623,8 +1623,7 @@ static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni, if (!fpm_mac) return 0; - memcpy(&fpm_mac->zebra_flags, &rmac->flags, sizeof(uint32_t)); - + fpm_mac->zebra_flags = rmac->flags; fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0; fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0; diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c index 4f2013585d..d5479bc627 100644 --- a/zebra/zebra_fpm_netlink.c +++ b/zebra/zebra_fpm_netlink.c @@ -594,14 +594,14 @@ int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf, char buf1[ETHER_ADDR_STRLEN]; size_t buf_offset; - struct { + struct macmsg { struct nlmsghdr hdr; struct ndmsg ndm; char buf[0]; } *req; req = (void *)in_buf; - buf_offset = ((char *)req->buf) - ((char *)req); + buf_offset = offsetof(struct macmsg, buf); if (in_buf_len < buf_offset) return 0; memset(req, 0, buf_offset);