diff --git a/bgpd/bgp_errors.c b/bgpd/bgp_errors.c index 8a33ce6789..0f9d5fc73a 100644 --- a/bgpd/bgp_errors.c +++ b/bgpd/bgp_errors.c @@ -462,6 +462,18 @@ static struct log_ref ferr_bgp_err[] = { .description = "As part of normal collision detection for opening a connection to a peer, BGP has detected that the remote peer's router-id is the same as ours", .suggestion = "Change one of the two router-id's", }, + { + .code = EC_BGP_INVALID_BGP_INSTANCE, + .title = "BGP instance for the specifc vrf is invalid", + .description = "Indicates that specified bgp instance is NULL", + .suggestion = "Get log files from router and open an issue", + }, + { + .code = EC_BGP_INVALID_ROUTE, + .title = "BGP route node is invalid", + .description = "BGP route for the specified AFI/SAFI is NULL", + .suggestion = "Get log files from router and open an issue", + }, { .code = END_FERR, } diff --git a/bgpd/bgp_errors.h b/bgpd/bgp_errors.h index 49c58ae6b0..20056d382a 100644 --- a/bgpd/bgp_errors.h +++ b/bgpd/bgp_errors.h @@ -99,6 +99,8 @@ enum bgp_log_refs { EC_BGP_INVALID_NEXTHOP_LENGTH, EC_BGP_DOPPELGANGER_CONFIG, EC_BGP_ROUTER_ID_SAME, + EC_BGP_INVALID_BGP_INSTANCE, + EC_BGP_INVALID_ROUTE, }; extern void bgp_error_init(void); diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 4468408415..dd31a9b7cb 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -792,9 +792,13 @@ void bgp_adjust_routeadv(struct peer *peer) BGP_TIMER_OFF(peer->t_routeadv); peer->synctime = bgp_clock(); - thread_add_timer_msec(bm->master, bgp_generate_updgrp_packets, - peer, 0, - &peer->t_generate_updgrp_packets); + /* If suppress fib pending is enabled, route is advertised to + * peers when the status is received from the FIB. The delay + * is added to update group packet generate which will allow + * more routes to be sent in the update message + */ + BGP_UPDATE_GROUP_TIMER_ON(&peer->t_generate_updgrp_packets, + bgp_generate_updgrp_packets); return; } diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index b9156df617..cd464d8c58 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -47,6 +47,18 @@ thread_cancel_event(bm->master, (P)); \ } while (0) +#define BGP_UPDATE_GROUP_TIMER_ON(T, F) \ + do { \ + if (BGP_SUPPRESS_FIB_ENABLED(peer->bgp) && \ + PEER_ROUTE_ADV_DELAY(peer)) \ + thread_add_timer_msec(bm->master, (F), peer, \ + (BGP_DEFAULT_UPDATE_ADVERTISEMENT_TIME * 1000),\ + T); \ + else \ + thread_add_timer_msec(bm->master, (F), peer, \ + 0, T); \ + } while (0) \ + #define BGP_MSEC_JITTER 10 /* Status codes for bgp_event_update() */ diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c index 38455b5e02..53fd3b5fe3 100644 --- a/bgpd/bgp_io.c +++ b/bgpd/bgp_io.c @@ -149,12 +149,17 @@ static int bgp_process_writes(struct thread *thread) fatal = true; } + /* If suppress fib pending is enabled, route is advertised to peers when + * the status is received from the FIB. The delay is added + * to update group packet generate which will allow more routes to be + * sent in the update message + */ if (reschedule) { thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd, &peer->t_write); } else if (!fatal) { - BGP_TIMER_ON(peer->t_generate_updgrp_packets, - bgp_generate_updgrp_packets, 0); + BGP_UPDATE_GROUP_TIMER_ON(&peer->t_generate_updgrp_packets, + bgp_generate_updgrp_packets); } return 0; diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index f8a29821d8..9873057fa2 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -2497,10 +2497,13 @@ void subgroup_process_announce_selected(struct update_subgroup *subgrp, struct attr attr; afi_t afi; safi_t safi; + struct bgp *bgp; + bool advertise; p = bgp_dest_get_prefix(dest); afi = SUBGRP_AFI(subgrp); safi = SUBGRP_SAFI(subgrp); + bgp = SUBGRP_INST(subgrp); onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? (SUBGRP_PFIRST(subgrp))->peer : NULL); @@ -2515,13 +2518,23 @@ void subgroup_process_announce_selected(struct update_subgroup *subgrp, memset(&attr, 0, sizeof(struct attr)); /* It's initialized in bgp_announce_check() */ - /* Announcement to the subgroup. If the route is filtered withdraw it. + /* Announcement to the subgroup. If the route is filtered withdraw it. + * If BGP_NODE_FIB_INSTALL_PENDING is set and data plane install status + * is pending (BGP_NODE_FIB_INSTALL_PENDING), do not advertise the + * route */ + advertise = bgp_check_advertise(bgp, dest); + if (selected) { if (subgroup_announce_check(dest, selected, subgrp, p, &attr, - false)) - bgp_adj_out_set_subgroup(dest, subgrp, &attr, selected); - else + false)) { + /* Route is selected, if the route is already installed + * in FIB, then it is advertised + */ + if (advertise) + bgp_adj_out_set_subgroup(dest, subgrp, &attr, + selected); + } else bgp_adj_out_unset_subgroup(dest, subgrp, 1, addpath_tx_id); } @@ -3525,6 +3538,19 @@ int bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, else has_valid_label = bgp_is_valid_label(label); + /* The flag BGP_NODE_FIB_INSTALL_PENDING is for the following + * condition : + * Suppress fib is enabled + * BGP_OPT_NO_FIB is not enabled + * Route type is BGP_ROUTE_NORMAL (peer learnt routes) + * Route is being installed first time (BGP_NODE_FIB_INSTALLED not set) + */ + if (BGP_SUPPRESS_FIB_ENABLED(bgp) && + (sub_type == BGP_ROUTE_NORMAL) && + (!bgp_option_check(BGP_OPT_NO_FIB)) && + (!CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED))) + SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); + /* When peer's soft reconfiguration enabled. Record input packet in Adj-RIBs-In. */ if (!soft_reconfig @@ -6789,6 +6815,12 @@ void bgp_aggregate_route(struct bgp *bgp, const struct prefix *p, afi_t afi, if (dest_p->prefixlen <= p->prefixlen) continue; + /* If suppress fib is enabled and route not installed + * in FIB, skip the route + */ + if (!bgp_check_advertise(bgp, dest)) + continue; + match = 0; for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { @@ -7284,6 +7316,12 @@ void bgp_aggregate_increment(struct bgp *bgp, const struct prefix *p, if (BGP_PATH_HOLDDOWN(pi)) return; + /* If suppress fib is enabled and route not installed + * in FIB, do not update the aggregate route + */ + if (!bgp_check_advertise(bgp, pi->net)) + return; + child = bgp_node_get(table, p); /* Aggregate address configuration check. */ diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index e4c6f9a0e2..0b76d7504b 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -524,6 +524,13 @@ static inline void prep_for_rmap_apply(struct bgp_path_info *dst_pi, } } +static inline bool bgp_check_advertise(struct bgp *bgp, struct bgp_dest *dest) +{ + return (!(BGP_SUPPRESS_FIB_ENABLED(bgp) && + CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING) && + (!bgp_option_check(BGP_OPT_NO_FIB)))); +} + /* called before bgp_process() */ DECLARE_HOOK(bgp_process, (struct bgp * bgp, afi_t afi, safi_t safi, struct bgp_dest *bn, diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index 4e9abf863d..738d41ee6d 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -102,6 +102,8 @@ struct bgp_node { #define BGP_NODE_LABEL_CHANGED (1 << 2) #define BGP_NODE_REGISTERED_FOR_LABEL (1 << 3) #define BGP_NODE_SELECT_DEFER (1 << 4) +#define BGP_NODE_FIB_INSTALL_PENDING (1 << 5) +#define BGP_NODE_FIB_INSTALLED (1 << 6) struct bgp_addpath_node_data tx_addpath; diff --git a/bgpd/bgp_updgrp_adv.c b/bgpd/bgp_updgrp_adv.c index 3cfb73d8a8..a03232466c 100644 --- a/bgpd/bgp_updgrp_adv.c +++ b/bgpd/bgp_updgrp_adv.c @@ -315,6 +315,7 @@ static void updgrp_show_adj(struct bgp *bgp, afi_t afi, safi_t safi, static int subgroup_coalesce_timer(struct thread *thread) { struct update_subgroup *subgrp; + struct bgp *bgp; subgrp = THREAD_ARG(thread); if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) @@ -323,6 +324,7 @@ static int subgroup_coalesce_timer(struct thread *thread) subgrp->v_coalesce); subgrp->t_coalesce = NULL; subgrp->v_coalesce = 0; + bgp = SUBGRP_INST(subgrp); subgroup_announce_route(subgrp); @@ -334,7 +336,8 @@ static int subgroup_coalesce_timer(struct thread *thread) * to * announce, this is the method currently employed to trigger the EOR. */ - if (!bgp_update_delay_active(SUBGRP_INST(subgrp))) { + if (!bgp_update_delay_active(SUBGRP_INST(subgrp)) && + !(BGP_SUPPRESS_FIB_ENABLED(bgp))) { struct peer_af *paf; struct peer *peer; @@ -458,10 +461,14 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, struct peer *peer; afi_t afi; safi_t safi; + struct peer *adv_peer; + struct peer_af *paf; + struct bgp *bgp; peer = SUBGRP_PEER(subgrp); afi = SUBGRP_AFI(subgrp); safi = SUBGRP_SAFI(subgrp); + bgp = SUBGRP_INST(subgrp); if (DISABLE_BGP_ANNOUNCE) return; @@ -504,9 +511,21 @@ void bgp_adj_out_set_subgroup(struct bgp_dest *dest, * mrai timers so the socket writes can happen. */ if (!bgp_adv_fifo_count(&subgrp->sync->update)) { - struct peer_af *paf; - SUBGRP_FOREACH_PEER (subgrp, paf) { + /* If there are no routes in the withdraw list, set + * the flag PEER_STATUS_ADV_DELAY which will allow + * more routes to be sent in the update message + */ + if (BGP_SUPPRESS_FIB_ENABLED(bgp)) { + adv_peer = PAF_PEER(paf); + if (!bgp_adv_fifo_count( + &subgrp->sync->withdraw)) + SET_FLAG(adv_peer->thread_flags, + PEER_THREAD_SUBGRP_ADV_DELAY); + else + UNSET_FLAG(adv_peer->thread_flags, + PEER_THREAD_SUBGRP_ADV_DELAY); + } bgp_adjust_routeadv(PAF_PEER(paf)); } } @@ -617,10 +636,13 @@ void subgroup_announce_table(struct update_subgroup *subgrp, afi_t afi; safi_t safi; int addpath_capable; + struct bgp *bgp; + bool advertise; peer = SUBGRP_PEER(subgrp); afi = SUBGRP_AFI(subgrp); safi = SUBGRP_SAFI(subgrp); + bgp = SUBGRP_INST(subgrp); addpath_capable = bgp_addpath_encode_tx(peer, afi, safi); if (safi == SAFI_LABELED_UNICAST) @@ -637,6 +659,9 @@ void subgroup_announce_table(struct update_subgroup *subgrp, for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) { const struct prefix *dest_p = bgp_dest_get_prefix(dest); + /* Check if the route can be advertised */ + advertise = bgp_check_advertise(bgp, dest); + for (ri = bgp_dest_get_bgp_path_info(dest); ri; ri = ri->next) if (CHECK_FLAG(ri->flags, BGP_PATH_SELECTED) @@ -646,10 +671,14 @@ void subgroup_announce_table(struct update_subgroup *subgrp, ri))) { if (subgroup_announce_check(dest, ri, subgrp, dest_p, &attr, - false)) - bgp_adj_out_set_subgroup(dest, subgrp, - &attr, ri); - else { + false)) { + /* Check if route can be advertised */ + if (advertise) + bgp_adj_out_set_subgroup(dest, + subgrp, + &attr, + ri); + } else { /* If default originate is enabled for * the peer, do not send explicit * withdraw. This will prevent deletion @@ -944,6 +973,13 @@ void group_announce_route(struct bgp *bgp, afi_t afi, safi_t safi, struct updwalk_context ctx; ctx.pi = pi; ctx.dest = dest; + + /* If suppress fib is enabled, the route will be advertised when + * FIB status is received + */ + if (!bgp_check_advertise(bgp, dest)) + return; + update_group_af_walk(bgp, afi, safi, group_announce_route_walkcb, &ctx); } diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 6977223b83..40e9707866 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -1514,6 +1514,20 @@ void cli_show_router_bgp_router_id(struct vty *vty, struct lyd_node *dnode, vty_out(vty, " bgp router-id %s\n", yang_dnode_get_string(dnode, NULL)); } +DEFPY (bgp_suppress_fib_pending, + bgp_suppress_fib_pending_cmd, + "[no] bgp suppress-fib-pending", + NO_STR + BGP_STR + "Advertise only routes that are programmed in kernel to peers\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + + bgp_suppress_fib_pending_set(bgp, !no); + return CMD_SUCCESS; +} + + /* BGP Cluster ID. */ DEFUN_YANG(bgp_cluster_id, bgp_cluster_id_cmd, @@ -16833,6 +16847,10 @@ int bgp_config_write(struct vty *vty) vty_out(vty, " bgp router-id %pI4\n", &bgp->router_id_static); + /* Suppress fib pending */ + if (CHECK_FLAG(bgp->flags, BGP_FLAG_SUPPRESS_FIB_PENDING)) + vty_out(vty, " bgp suppress-fib-pending\n"); + /* BGP log-neighbor-changes. */ if (!!CHECK_FLAG(bgp->flags, BGP_FLAG_LOG_NEIGHBOR_CHANGES) != SAVE_BGP_LOG_NEIGHBOR_CHANGES) @@ -17348,6 +17366,9 @@ void bgp_vty_init(void) install_element(BGP_NODE, &bgp_router_id_cmd); install_element(BGP_NODE, &no_bgp_router_id_cmd); + /* "bgp suppress-fib-pending" command */ + install_element(BGP_NODE, &bgp_suppress_fib_pending_cmd); + /* "bgp cluster-id" commands. */ install_element(BGP_NODE, &bgp_cluster_id_cmd); install_element(BGP_NODE, &no_bgp_cluster_id_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 957db4cbc1..17bb41cb9f 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -2380,6 +2380,99 @@ static int iptable_notify_owner(ZAPI_CALLBACK_ARGS) return 0; } +/* Process route notification messages from RIB */ +static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + struct prefix p; + enum zapi_route_notify_owner note; + uint32_t table_id; + char buf[PREFIX_STRLEN]; + afi_t afi; + safi_t safi; + struct bgp_dest *dest; + struct bgp *bgp; + struct bgp_path_info *pi, *new_select; + + if (!zapi_route_notify_decode(zclient->ibuf, &p, &table_id, ¬e, + &afi, &safi)) { + zlog_err("%s : error in msg decode", __PRETTY_FUNCTION__); + return -1; + } + + /* Get the bgp instance */ + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) { + flog_err(EC_BGP_INVALID_BGP_INSTANCE, + "%s : bgp instance not found vrf %d", + __PRETTY_FUNCTION__, vrf_id); + return -1; + } + + if (BGP_DEBUG(zebra, ZEBRA)) + prefix2str(&p, buf, sizeof(buf)); + + /* Find the bgp route node */ + dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, &p, + &bgp->vrf_prd); + if (!dest) + return -1; + + bgp_dest_unlock_node(dest); + + switch (note) { + case ZAPI_ROUTE_INSTALLED: + new_select = NULL; + /* Clear the flags so that route can be processed */ + if (CHECK_FLAG(dest->flags, + BGP_NODE_FIB_INSTALL_PENDING)) { + UNSET_FLAG(dest->flags, + BGP_NODE_FIB_INSTALL_PENDING); + SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("route %s : INSTALLED", buf); + /* Find the best route */ + for (pi = dest->info; pi; pi = pi->next) { + /* Process aggregate route */ + bgp_aggregate_increment(bgp, &p, pi, + afi, safi); + if (CHECK_FLAG(pi->flags, + BGP_PATH_SELECTED)) + new_select = pi; + } + /* Advertise the route */ + if (new_select) + group_announce_route(bgp, afi, safi, + dest, new_select); + else { + flog_err(EC_BGP_INVALID_ROUTE, + "selected route %s not found", + buf); + return -1; + } + } + break; + case ZAPI_ROUTE_REMOVED: + /* Route deleted from dataplane, reset the installed flag + * so that route can be reinstalled when client sends + * route add later + */ + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED); + break; + case ZAPI_ROUTE_FAIL_INSTALL: + /* Error will be logged by zebra module */ + break; + case ZAPI_ROUTE_BETTER_ADMIN_WON: + /* No action required */ + break; + case ZAPI_ROUTE_REMOVE_FAIL: + zlog_warn("%s: Route %s failure to remove", + __func__, buf); + break; + } + return 0; +} + /* this function is used to forge ip rule, * - either for iptable/ipset using fwmark id * - or for sample ip rule cmd @@ -2910,6 +3003,7 @@ void bgp_zebra_init(struct thread_master *master, unsigned short instance) zclient->ipset_notify_owner = ipset_notify_owner; zclient->ipset_entry_notify_owner = ipset_entry_notify_owner; zclient->iptable_notify_owner = iptable_notify_owner; + zclient->route_notify_owner = bgp_zebra_route_notify_owner; zclient->instance = instance; } diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 4cd603ee8b..26c4579013 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -109,6 +109,9 @@ struct community_list_handler *bgp_clist; unsigned int multipath_num = MULTIPATH_NUM; +/* Number of bgp instances configured for suppress fib config */ +unsigned int bgp_suppress_fib_count; + static void bgp_if_finish(struct bgp *bgp); static void peer_drop_dynamic_neighbor(struct peer *peer); @@ -390,6 +393,43 @@ void bgp_router_id_static_set(struct bgp *bgp, struct in_addr id) true /* is config */); } +/* Set the suppress fib pending for the bgp configuration */ +void bgp_suppress_fib_pending_set(struct bgp *bgp, bool set) +{ + bool send_msg = false; + + if (bgp->inst_type == BGP_INSTANCE_TYPE_VIEW) + return; + + if (set) { + SET_FLAG(bgp->flags, BGP_FLAG_SUPPRESS_FIB_PENDING); + /* Send msg to zebra for the first instance of bgp enabled + * with suppress fib + */ + if (bgp_suppress_fib_count == 0) + send_msg = true; + bgp_suppress_fib_count++; + } else { + UNSET_FLAG(bgp->flags, BGP_FLAG_SUPPRESS_FIB_PENDING); + bgp_suppress_fib_count--; + + /* Send msg to zebra if there are no instances enabled + * with suppress fib + */ + if (bgp_suppress_fib_count == 0) + send_msg = true; + } + /* Send route notify request to RIB */ + if (send_msg) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Sending ZEBRA_ROUTE_NOTIFY_REQUEST"); + + if (zclient) + zebra_route_notify_send(ZEBRA_ROUTE_NOTIFY_REQUEST, + zclient, set); + } +} + /* BGP's cluster-id control. */ int bgp_cluster_id_set(struct bgp *bgp, struct in_addr *cluster_id) { diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 287aeca143..a2a3f374e2 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -455,11 +455,12 @@ struct bgp { #define BGP_FLAG_DELETE_IN_PROGRESS (1 << 22) #define BGP_FLAG_SELECT_DEFER_DISABLE (1 << 23) #define BGP_FLAG_GR_DISABLE_EOR (1 << 24) -#define BGP_FLAG_EBGP_REQUIRES_POLICY (1 << 25) -#define BGP_FLAG_SHOW_NEXTHOP_HOSTNAME (1 << 26) +#define BGP_FLAG_EBGP_REQUIRES_POLICY (1 << 25) +#define BGP_FLAG_SHOW_NEXTHOP_HOSTNAME (1 << 26) /* This flag is set if the instance is in administrative shutdown */ -#define BGP_FLAG_SHUTDOWN (1 << 27) +#define BGP_FLAG_SHUTDOWN (1 << 27) +#define BGP_FLAG_SUPPRESS_FIB_PENDING (1 << 28) enum global_mode GLOBAL_GR_FSM[BGP_GLOBAL_GR_MODE] [BGP_GLOBAL_GR_EVENT_CMD]; @@ -712,6 +713,9 @@ struct afi_safi_info { #define BGP_SELECT_DEFER_DISABLE(bgp) \ (CHECK_FLAG(bgp->flags, BGP_FLAG_SELECT_DEFER_DISABLE)) +#define BGP_SUPPRESS_FIB_ENABLED(bgp) \ + (CHECK_FLAG(bgp->flags, BGP_FLAG_SUPPRESS_FIB_PENDING)) + /* BGP peer-group support. */ struct peer_group { /* Name of the peer-group. */ @@ -1283,6 +1287,8 @@ struct peer { #define PEER_THREAD_WRITES_ON (1U << 0) #define PEER_THREAD_READS_ON (1U << 1) #define PEER_THREAD_KEEPALIVES_ON (1U << 2) +#define PEER_THREAD_SUBGRP_ADV_DELAY (1U << 3) + /* workqueues */ struct work_queue *clear_node_queue; @@ -1507,6 +1513,9 @@ DECLARE_QOBJ_TYPE(peer) || CHECK_FLAG((P)->sflags, PEER_STATUS_PREFIX_OVERFLOW) \ || CHECK_FLAG((P)->bgp->flags, BGP_FLAG_SHUTDOWN)) +#define PEER_ROUTE_ADV_DELAY(peer) \ + (CHECK_FLAG(peer->thread_flags, PEER_THREAD_SUBGRP_ADV_DELAY)) + #define PEER_PASSWORD_MINLEN (1) #define PEER_PASSWORD_MAXLEN (80) @@ -1677,6 +1686,7 @@ struct bgp_nlri { #define BGP_DEFAULT_STALEPATH_TIME 360 #define BGP_DEFAULT_SELECT_DEFERRAL_TIME 360 #define BGP_DEFAULT_RIB_STALE_TIME 500 +#define BGP_DEFAULT_UPDATE_ADVERTISEMENT_TIME 1 /* BGP uptime string length. */ #define BGP_UPTIME_LEN 25 @@ -1853,6 +1863,7 @@ extern int bgp_handle_socket(struct bgp *bgp, struct vrf *vrf, extern void bgp_router_id_zebra_bump(vrf_id_t, const struct prefix *); extern void bgp_router_id_static_set(struct bgp *, struct in_addr); +extern void bgp_suppress_fib_pending_set(struct bgp *bgp, bool set); extern int bgp_cluster_id_set(struct bgp *, struct in_addr *); extern int bgp_cluster_id_unset(struct bgp *); diff --git a/doc/user/bgp.rst b/doc/user/bgp.rst index 24c4a7dbf6..00ae4c082e 100644 --- a/doc/user/bgp.rst +++ b/doc/user/bgp.rst @@ -3462,6 +3462,48 @@ starting the daemon and the configuration gets saved, the option will persist unless removed from the configuration with the negating command prior to the configuration write operation. +.. _bgp-suppress-fib: + +Suppressing routes not installed in FIB +======================================= + +The FRR implementation of BGP advertises prefixes learnt from a peer to other +peers even if the routes do not get installed in the FIB. There can be +scenarios where the hardware tables in some of the routers (along the path from +the source to destination) is full which will result in all routes not getting +installed in the FIB. If these routes are advertised to the downstream routers +then traffic will start flowing and will be dropped at the intermediate router. + +The solution is to provide a configurable option to check for the FIB install +status of the prefixes and advertise to peers if the prefixes are successfully +installed in the FIB. The advertisement of the prefixes are suppressed if it is +not installed in FIB. + +The following conditions apply will apply when checking for route installation +status in FIB: +1. The advertisement or suppression of routes based on FIB install status + applies only for newly learnt routes from peer (routes which are not in + BGP local RIB). +2. If the route received from peer already exists in BGP local RIB and route + attributes have changed (best path changed), the old path is deleted and + new path is installed in FIB. The FIB install status will not have any + effect. Therefore only when the route is received first time the checks + apply. +3. The feature will not apply for routes learnt through other means like + redistribution to bgp from other protocols. This is applicable only to + peer learnt routes. +4. If a route is installed in FIB and then gets deleted from the dataplane, + then routes will not be withdrawn from peers. This will be considered as + dataplane issue. +5. The feature will slightly increase the time required to advertise the routes + to peers since the route install status needs to be received from the FIB +6. If routes are received by the peer before the configuration is applied, then + the bgp sessions need to be reset for the configuration to take effect. +7. If the route which is already installed in dataplane is removed for some + reason, sending withdraw message to peers is not currently supported. + +.. index:: [no] bgp suppress-fib-pending +.. clicmd:: [no] bgp suppress-fib-pending .. _routing-policy: diff --git a/eigrpd/eigrp_zebra.c b/eigrpd/eigrp_zebra.c index 473cc75a2a..0795fbd6df 100644 --- a/eigrpd/eigrp_zebra.c +++ b/eigrpd/eigrp_zebra.c @@ -88,7 +88,8 @@ static int eigrp_zebra_route_notify_owner(ZAPI_CALLBACK_ARGS) enum zapi_route_notify_owner note; uint32_t table; - if (!zapi_route_notify_decode(zclient->ibuf, &p, &table, ¬e)) + if (!zapi_route_notify_decode(zclient->ibuf, &p, &table, ¬e, NULL, + NULL)) return -1; return 0; diff --git a/lib/log.c b/lib/log.c index b629658f75..7b37ba7f27 100644 --- a/lib/log.c +++ b/lib/log.c @@ -455,7 +455,8 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY(ZEBRA_NEIGH_DISCOVER), DESC_ENTRY(ZEBRA_NHG_ADD), DESC_ENTRY(ZEBRA_NHG_DEL), - DESC_ENTRY(ZEBRA_NHG_NOTIFY_OWNER)}; + DESC_ENTRY(ZEBRA_NHG_NOTIFY_OWNER), + DESC_ENTRY(ZEBRA_ROUTE_NOTIFY_REQUEST)}; #undef DESC_ENTRY static const struct zebra_desc_table unknown = {0, "unknown", '?'}; diff --git a/lib/zclient.c b/lib/zclient.c index d0144279e5..bab1acf667 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -1494,9 +1494,12 @@ stream_failure: bool zapi_route_notify_decode(struct stream *s, struct prefix *p, uint32_t *tableid, - enum zapi_route_notify_owner *note) + enum zapi_route_notify_owner *note, + afi_t *afi, safi_t *safi) { uint32_t t; + afi_t afi_val; + safi_t safi_val; STREAM_GET(note, s, sizeof(*note)); @@ -1504,9 +1507,16 @@ bool zapi_route_notify_decode(struct stream *s, struct prefix *p, STREAM_GETC(s, p->prefixlen); STREAM_GET(&p->u.prefix, s, prefix_blen(p)); STREAM_GETL(s, t); + STREAM_GETC(s, afi_val); + STREAM_GETC(s, safi_val); *tableid = t; + if (afi) + *afi = afi_val; + if (safi) + *safi = safi_val; + return true; stream_failure: @@ -1815,6 +1825,22 @@ int zebra_redistribute_default_send(int command, struct zclient *zclient, return zclient_send_message(zclient); } +/* Send route notify request to zebra */ +int zebra_route_notify_send(int command, struct zclient *zclient, bool set) +{ + struct stream *s; + + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, command, 0); + stream_putc(s, !!set); + + stream_putw_at(s, 0, stream_get_endp(s)); + + return zclient_send_message(zclient); +} + /* Get prefix in ZServ format; family should be filled in on prefix */ static int zclient_stream_get_prefix(struct stream *s, struct prefix *p) { diff --git a/lib/zclient.h b/lib/zclient.h index 80dca3fc56..231fdad09b 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -218,6 +218,7 @@ typedef enum { ZEBRA_OPAQUE_REGISTER, ZEBRA_OPAQUE_UNREGISTER, ZEBRA_NEIGH_DISCOVER, + ZEBRA_ROUTE_NOTIFY_REQUEST, } zebra_message_types_t; enum zebra_error_types { @@ -778,6 +779,10 @@ extern int zebra_redistribute_send(int command, struct zclient *, afi_t, extern int zebra_redistribute_default_send(int command, struct zclient *zclient, afi_t afi, vrf_id_t vrf_id); +/* Send route notify request to zebra */ +extern int zebra_route_notify_send(int command, struct zclient *zclient, + bool set); + /* If state has changed, update state and call zebra_redistribute_send. */ extern void zclient_redistribute(int command, struct zclient *, afi_t, int type, unsigned short instance, vrf_id_t vrf_id); @@ -910,7 +915,8 @@ bool zapi_nhg_notify_decode(struct stream *s, uint32_t *id, enum zapi_nhg_notify_owner *note); bool zapi_route_notify_decode(struct stream *s, struct prefix *p, uint32_t *tableid, - enum zapi_route_notify_owner *note); + enum zapi_route_notify_owner *note, + afi_t *afi, safi_t *safi); bool zapi_rule_notify_decode(struct stream *s, uint32_t *seqno, uint32_t *priority, uint32_t *unique, char *ifname, enum zapi_rule_notify_owner *note); diff --git a/pbrd/pbr_zebra.c b/pbrd/pbr_zebra.c index 32660b4dee..82a2d2feb5 100644 --- a/pbrd/pbr_zebra.c +++ b/pbrd/pbr_zebra.c @@ -167,7 +167,8 @@ static int route_notify_owner(ZAPI_CALLBACK_ARGS) enum zapi_route_notify_owner note; uint32_t table_id; - if (!zapi_route_notify_decode(zclient->ibuf, &p, &table_id, ¬e)) + if (!zapi_route_notify_decode(zclient->ibuf, &p, &table_id, ¬e, + NULL, NULL)) return -1; switch (note) { diff --git a/sharpd/sharp_zebra.c b/sharpd/sharp_zebra.c index 231de6403d..7a335fca3b 100644 --- a/sharpd/sharp_zebra.c +++ b/sharpd/sharp_zebra.c @@ -301,7 +301,8 @@ static int route_notify_owner(ZAPI_CALLBACK_ARGS) enum zapi_route_notify_owner note; uint32_t table; - if (!zapi_route_notify_decode(zclient->ibuf, &p, &table, ¬e)) + if (!zapi_route_notify_decode(zclient->ibuf, &p, &table, ¬e, + NULL, NULL)) return -1; switch (note) { diff --git a/staticd/static_zebra.c b/staticd/static_zebra.c index a9b570de83..0bccbfbead 100644 --- a/staticd/static_zebra.c +++ b/staticd/static_zebra.c @@ -110,7 +110,8 @@ static int route_notify_owner(ZAPI_CALLBACK_ARGS) enum zapi_route_notify_owner note; uint32_t table_id; - if (!zapi_route_notify_decode(zclient->ibuf, &p, &table_id, ¬e)) + if (!zapi_route_notify_decode(zclient->ibuf, &p, &table_id, ¬e, + NULL, NULL)) return -1; switch (note) { diff --git a/tests/topotests/bgp_suppress_fib/r1/bgpd.conf b/tests/topotests/bgp_suppress_fib/r1/bgpd.conf new file mode 100644 index 0000000000..69c563d37c --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r1/bgpd.conf @@ -0,0 +1,15 @@ +! exit1 +router bgp 1 + no bgp ebgp-requires-policy + neighbor 10.0.0.2 remote-as 2 + + address-family ipv4 unicast + redistribute static + neighbor 10.0.0.2 route-map rmap out + exit-address-family + +ip prefix-list plist seq 5 permit any + +route-map rmap permit 1 + match ip address prefix-list plist +! diff --git a/tests/topotests/bgp_suppress_fib/r1/zebra.conf b/tests/topotests/bgp_suppress_fib/r1/zebra.conf new file mode 100644 index 0000000000..7b442164ff --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r1/zebra.conf @@ -0,0 +1,9 @@ +! exit1 +interface r1-eth0 + ip address 10.0.0.1/30 +! +ip forwarding +! +ip route 40.0.0.0/8 blackhole +ip route 50.0.0.0/8 blackhole +! diff --git a/tests/topotests/bgp_suppress_fib/r2/bgpd.conf b/tests/topotests/bgp_suppress_fib/r2/bgpd.conf new file mode 100644 index 0000000000..8321c915e3 --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r2/bgpd.conf @@ -0,0 +1,6 @@ +! +router bgp 2 + no bgp ebgp-requires-policy + bgp suppress-fib-pending + neighbor 10.0.0.1 remote-as 1 + neighbor 10.0.0.10 remote-as 3 diff --git a/tests/topotests/bgp_suppress_fib/r2/zebra.conf b/tests/topotests/bgp_suppress_fib/r2/zebra.conf new file mode 100644 index 0000000000..443fffc703 --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r2/zebra.conf @@ -0,0 +1,13 @@ +! +interface r2-eth0 + ip address 10.0.0.2/30 +! +interface r2-eth1 + ip address 10.0.0.9/30 + +access-list access seq 5 permit 40.0.0.0/8 + +route-map LIMIT permit 10 + match ip address access + +ip protocol bgp route-map LIMIT diff --git a/tests/topotests/bgp_suppress_fib/r3/bgpd.conf b/tests/topotests/bgp_suppress_fib/r3/bgpd.conf new file mode 100644 index 0000000000..11715d45d7 --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r3/bgpd.conf @@ -0,0 +1,9 @@ +! +router bgp 3 + no bgp ebgp-requires-policy + neighbor 10.0.0.9 remote-as 2 + +route-map rmap permit 1 + match ip address prefix-list plist + ! +! diff --git a/tests/topotests/bgp_suppress_fib/r3/v4_route.json b/tests/topotests/bgp_suppress_fib/r3/v4_route.json new file mode 100644 index 0000000000..19294eb492 --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r3/v4_route.json @@ -0,0 +1,29 @@ +{ + "40.0.0.0\/8":[ + { + "prefix":"40.0.0.0\/8", + "protocol":"bgp", + "selected":true, + "destSelected":true, + "distance":20, + "metric":0, + "installed":true, + "table":254, + "internalStatus":16, + "internalFlags":8, + "internalNextHopNum":1, + "internalNextHopActiveNum":1, + "nexthops":[ + { + "flags":3, + "fib":true, + "ip":"10.0.0.9", + "afi":"ipv4", + "interfaceIndex":2, + "interfaceName":"r3-eth0", + "active":true + } + ] + } + ] +} diff --git a/tests/topotests/bgp_suppress_fib/r3/v4_route2.json b/tests/topotests/bgp_suppress_fib/r3/v4_route2.json new file mode 100644 index 0000000000..a35d49e9e8 --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r3/v4_route2.json @@ -0,0 +1,4 @@ +{ + "0.0.0.0\/0":[ + ] +} diff --git a/tests/topotests/bgp_suppress_fib/r3/zebra.conf b/tests/topotests/bgp_suppress_fib/r3/zebra.conf new file mode 100644 index 0000000000..793b043a7b --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/r3/zebra.conf @@ -0,0 +1,6 @@ +! +interface r3-eth0 + ip address 10.0.0.10/30 +! +ip forwarding +! diff --git a/tests/topotests/bgp_suppress_fib/test_bgp_suppress_fib.py b/tests/topotests/bgp_suppress_fib/test_bgp_suppress_fib.py new file mode 100644 index 0000000000..cf8be5f44f --- /dev/null +++ b/tests/topotests/bgp_suppress_fib/test_bgp_suppress_fib.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python + +# +# test_bgp_suppress_fib.py +# +# Copyright (c) 2019 by +# +# Permission to use, copy, modify, and/or distribute this software +# for any purpose with or without fee is hereby granted, provided +# that the above copyright notice and this permission notice appear +# in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# + +""" +""" + +import os +import sys +import json +import time +import pytest +from functools import partial +from time import sleep + +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, "../")) + +# pylint: disable=C0413 +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.topolog import logger +from mininet.topo import Topo + + +class TemplateTopo(Topo): + def build(self, *_args, **_opts): + tgen = get_topogen(self) + + for routern in range(1, 4): + tgen.add_router("r{}".format(routern)) + + switch = tgen.add_switch("s1") + switch.add_link(tgen.gears["r1"]) + switch.add_link(tgen.gears["r2"]) + + switch = tgen.add_switch("s2") + switch.add_link(tgen.gears["r2"]) + switch.add_link(tgen.gears["r3"]) + +def setup_module(mod): + tgen = Topogen(TemplateTopo, mod.__name__) + tgen.start_topology() + + router_list = tgen.routers() + + for i, (rname, router) in enumerate(router_list.items(), 1): + router.load_config( + TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_BGP, os.path.join(CWD, "{}/bgpd.conf".format(rname)) + ) + + tgen.start_router() + + +def teardown_module(mod): + tgen = get_topogen() + tgen.stop_topology() + + +def test_bgp_route(): + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + r3 = tgen.gears["r3"] + + sleep(5) + + json_file = "{}/r3/v4_route.json".format(CWD) + expected = json.loads(open(json_file).read()) + + test_func = partial( + topotest.router_json_cmp, + r3, + "show ip route 40.0.0.0 json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=2, wait=0.5) + assertmsg = '"r3" JSON output mismatches' + assert result is None, assertmsg + + json_file = "{}/r3/v4_route2.json".format(CWD) + expected = json.loads(open(json_file).read()) + + test_func = partial( + topotest.router_json_cmp, + r3, + "show ip route 50.0.0.0 json", + expected, + ) + _, result = topotest.run_and_expect(test_func, None, count=3, wait=0.5) + assertmsg = '"r3" JSON output mismatches' + assert result is None, assertmsg + +if __name__ == "__main__": + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args)) diff --git a/zebra/zapi_msg.c b/zebra/zapi_msg.c index 18017c9700..1d68019909 100644 --- a/zebra/zapi_msg.c +++ b/zebra/zapi_msg.c @@ -742,7 +742,8 @@ static int nhg_notify(uint16_t type, uint16_t instance, uint32_t id, static int route_notify_internal(const struct prefix *p, int type, uint16_t instance, vrf_id_t vrf_id, uint32_t table_id, - enum zapi_route_notify_owner note) + enum zapi_route_notify_owner note, + afi_t afi, safi_t safi) { struct zserv *client; struct stream *s; @@ -778,16 +779,21 @@ static int route_notify_internal(const struct prefix *p, int type, stream_putl(s, table_id); + /* Encode AFI, SAFI in the message */ + stream_putc(s, afi); + stream_putc(s, safi); + stream_putw_at(s, 0, stream_get_endp(s)); return zserv_send_message(client, s); } int zsend_route_notify_owner(struct route_entry *re, const struct prefix *p, - enum zapi_route_notify_owner note) + enum zapi_route_notify_owner note, + afi_t afi, safi_t safi) { return (route_notify_internal(p, re->type, re->instance, re->vrf_id, - re->table, note)); + re->table, note, afi, safi)); } /* @@ -801,7 +807,19 @@ int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, dplane_ctx_get_instance(ctx), dplane_ctx_get_vrf(ctx), dplane_ctx_get_table(ctx), - note)); + note, + dplane_ctx_get_afi(ctx), + dplane_ctx_get_safi(ctx))); +} + +static void zread_route_notify_request(ZAPI_HANDLER_ARGS) +{ + uint8_t notify; + + STREAM_GETC(msg, notify); + client->notify_owner = notify; +stream_failure: + return; } void zsend_rule_notify_owner(const struct zebra_dplane_ctx *ctx, @@ -3275,6 +3293,7 @@ void (*const zserv_handlers[])(ZAPI_HANDLER_ARGS) = { [ZEBRA_NEIGH_DISCOVER] = zread_neigh_discover, [ZEBRA_NHG_ADD] = zread_nhg_add, [ZEBRA_NHG_DEL] = zread_nhg_del, + [ZEBRA_ROUTE_NOTIFY_REQUEST] = zread_route_notify_request, }; /* diff --git a/zebra/zapi_msg.h b/zebra/zapi_msg.h index 9f23a313bf..efc52059b6 100644 --- a/zebra/zapi_msg.h +++ b/zebra/zapi_msg.h @@ -77,7 +77,8 @@ extern int zsend_interface_link_params(struct zserv *zclient, extern int zsend_pw_update(struct zserv *client, struct zebra_pw *pw); extern int zsend_route_notify_owner(struct route_entry *re, const struct prefix *p, - enum zapi_route_notify_owner note); + enum zapi_route_notify_owner note, + afi_t afi, safi_t safi); extern int zsend_route_notify_owner_ctx(const struct zebra_dplane_ctx *ctx, enum zapi_route_notify_owner note); diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index e76ecc85a6..b688704962 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -481,7 +481,8 @@ void rib_install_kernel(struct route_node *rn, struct route_entry *re, * know that they've lost */ if (old && (old != re) && (old->type != re->type)) - zsend_route_notify_owner(old, p, ZAPI_ROUTE_BETTER_ADMIN_WON); + zsend_route_notify_owner(old, p, ZAPI_ROUTE_BETTER_ADMIN_WON, + info->afi, info->safi); /* Update fib selection */ dest->selected_fib = re; @@ -1748,6 +1749,7 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) uint32_t seq; rib_dest_t *dest; bool fib_changed = false; + struct rib_table_info *info; zvrf = vrf_info_lookup(dplane_ctx_get_vrf(ctx)); vrf = vrf_lookup_by_id(dplane_ctx_get_vrf(ctx)); @@ -1767,6 +1769,7 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) dest = rib_dest_from_rnode(rn); srcdest_rnode_prefixes(rn, &dest_pfx, &src_pfx); + info = srcdest_rnode_table_info(rn); op = dplane_ctx_get_op(ctx); status = dplane_ctx_get_status(ctx); @@ -1906,7 +1909,8 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx) SET_FLAG(old_re->status, ROUTE_ENTRY_FAILED); if (re) zsend_route_notify_owner(re, dest_pfx, - ZAPI_ROUTE_FAIL_INSTALL); + ZAPI_ROUTE_FAIL_INSTALL, + info->afi, info->safi); zlog_warn("%s(%u:%u):%pFX: Route install failed", VRF_LOGNAME(vrf), dplane_ctx_get_vrf(ctx),