From f7e1c681f4bf4e2cc1486318d24c42c9e5fb6349 Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 24 Mar 2020 14:38:37 -0700 Subject: [PATCH] bgpd: Implement options for link bandwidth handling Support configurable options to control how link bandwidth is handled by the receiver. The default behavior is to automatically honor the link bandwidths received and use it to perform a weighted ECMP BUT only if all paths in the multipath have associated link bandwidth; if one or more paths do not have link bandwidth, normal ECMP is performed among the multipaths. This behavior is as recommended by https://tools.ietf.org/html/draft-ietf-idr-link-bandwidth. The additional options available are to (a) completely ignore any link bandwidth (i.e., weighted ECMP is effectively disabled), (b) skip paths in the multipath which do not have link bandwidth and perform weighted ECMP among the other paths (if at least some paths have the bandwidth) or (c) use a default weight (value chosen is 1) for the paths which do not have link bandwidth. The command syntax is bgp bestpath bandwidth Signed-off-by: Vivek Venkatraman --- bgpd/bgp_mpath.c | 24 ++++++++++++++++----- bgpd/bgp_mpath.h | 7 ++++--- bgpd/bgp_route.c | 2 +- bgpd/bgp_vty.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ bgpd/bgp_zebra.c | 44 ++++++++++++++++++++++++++++++++------- bgpd/bgp_zebra.h | 3 +++ bgpd/bgpd.c | 1 + bgpd/bgpd.h | 17 +++++++++++++++ 8 files changed, 135 insertions(+), 17 deletions(-) diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index a3d40849ce..f66f56cb49 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -419,6 +419,10 @@ static void bgp_path_info_mpath_lb_update(struct bgp_path_info *path, bool set, return; } if (set) { + if (cum_bw) + SET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT); + else + UNSET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT); if (all_paths_lb) SET_FLAG(mpath->mp_flags, BGP_MP_LB_ALL); else @@ -446,14 +450,24 @@ struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path) /* * bgp_path_info_chkwtd * - * Given bestpath bgp_path_info, return if we should attempt to - * do weighted ECMP or not + * Return if we should attempt to do weighted ECMP or not + * The path passed in is the bestpath. */ -bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path) +bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, struct bgp_path_info *path) { - if (!path->mpath) + /* Check if told to ignore weights or not multipath */ + if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW || !path->mpath) return false; - return (path->mpath->mp_flags & BGP_MP_LB_ALL); + + /* All paths in multipath should have associated weight (bandwidth) + * unless told explicitly otherwise. + */ + if (bgp->lb_handling != BGP_LINK_BW_SKIP_MISSING && + bgp->lb_handling != BGP_LINK_BW_DEFWT_4_MISSING) + return (path->mpath->mp_flags & BGP_MP_LB_ALL); + + /* At least one path should have bandwidth. */ + return (path->mpath->mp_flags & BGP_MP_LB_PRESENT); } /* diff --git a/bgpd/bgp_mpath.h b/bgpd/bgp_mpath.h index 6d3df29415..34f94b256b 100644 --- a/bgpd/bgp_mpath.h +++ b/bgpd/bgp_mpath.h @@ -40,8 +40,8 @@ struct bgp_path_info_mpath { /* Flags - relevant as noted. */ uint16_t mp_flags; -/* Attached to best path, indicates that all multipaths have link-bandwidth */ -#define BGP_MP_LB_ALL 0x1 +#define BGP_MP_LB_PRESENT 0x1 /* Link-bandwidth present for >= 1 path */ +#define BGP_MP_LB_ALL 0x2 /* Link-bandwidth present for all multipaths */ /* Aggregated attribute for advertising multipath route */ struct attr *mp_attr; @@ -86,7 +86,8 @@ bgp_path_info_mpath_next(struct bgp_path_info *path); /* Accessors for multipath information */ extern uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path); extern struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path); -extern bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path); +extern bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, + struct bgp_path_info *path); extern uint64_t bgp_path_info_mpath_cumbw(struct bgp_path_info *path); #endif /* _QUAGGA_BGP_MPATH_H */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 8bb4bb075b..2009375c89 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -2066,7 +2066,7 @@ bool subgroup_announce_check(struct bgp_node *rn, struct bgp_path_info *pi, * been explicitly set by user policy. */ if (nh_reset && - bgp_path_info_mpath_chkwtd(pi) && + bgp_path_info_mpath_chkwtd(bgp, pi) && (cum_bw = bgp_path_info_mpath_cumbw(pi)) != 0 && !CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_LINK_BW_SET)) attr->ecommunity = ecommunity_replace_linkbw( diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index aea6dbbf4e..544bee2a54 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -2973,6 +2973,49 @@ DEFUN (no_bgp_bestpath_med, return CMD_SUCCESS; } +/* "bgp bestpath bandwidth" configuration. */ +DEFPY (bgp_bestpath_bw, + bgp_bestpath_bw_cmd, + "[no$no] bgp bestpath bandwidth [$bw_cfg]", + NO_STR + "BGP specific commands\n" + "Change the default bestpath selection\n" + "Link Bandwidth attribute\n" + "Ignore link bandwidth (i.e., do regular ECMP, not weighted)\n" + "Ignore paths without link bandwidth for ECMP (if other paths have it)\n" + "Assign a low default weight (value 1) to paths not having link bandwidth\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + afi_t afi; + safi_t safi; + + if (no) { + bgp->lb_handling = BGP_LINK_BW_ECMP; + } else { + if (!bw_cfg) { + vty_out(vty, "%% Bandwidth configuration must be specified\n"); + return CMD_ERR_INCOMPLETE; + } + if (!strcmp(bw_cfg, "ignore")) + bgp->lb_handling = BGP_LINK_BW_IGNORE_BW; + else if (!strcmp(bw_cfg, "skip-missing")) + bgp->lb_handling = BGP_LINK_BW_SKIP_MISSING; + else if (!strcmp(bw_cfg, "default-weight-for-missing")) + bgp->lb_handling = BGP_LINK_BW_DEFWT_4_MISSING; + else + return CMD_ERR_NO_MATCH; + } + + /* This config is used in route install, so redo that. */ + FOREACH_AFI_SAFI (afi, safi) { + if (!bgp_fibupd_safi(safi)) + continue; + bgp_zebra_announce_table(bgp, afi, safi); + } + + return CMD_SUCCESS; +} + /* "no bgp default ipv4-unicast". */ DEFUN (no_bgp_default_ipv4_unicast, no_bgp_default_ipv4_unicast_cmd, @@ -15161,6 +15204,14 @@ int bgp_config_write(struct vty *vty) vty_out(vty, "\n"); } + /* Link bandwidth handling. */ + if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW) + vty_out(vty, " bgp bestpath bandwidth ignore\n"); + else if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING) + vty_out(vty, " bgp bestpath bandwidth skip-missing\n"); + else if (bgp->lb_handling == BGP_LINK_BW_DEFWT_4_MISSING) + vty_out(vty, " bgp bestpath bandwidth default-weight-for-missing\n"); + /* BGP network import check. */ if (!!CHECK_FLAG(bgp->flags, BGP_FLAG_IMPORT_CHECK) != SAVE_BGP_IMPORT_CHECK) @@ -15567,6 +15618,9 @@ void bgp_vty_init(void) install_element(BGP_NODE, &bgp_bestpath_med_cmd); install_element(BGP_NODE, &no_bgp_bestpath_med_cmd); + /* "bgp bestpath bandwidth" commands */ + install_element(BGP_NODE, &bgp_bestpath_bw_cmd); + /* "no bgp default ipv4-unicast" commands. */ install_element(BGP_NODE, &no_bgp_default_ipv4_unicast_cmd); install_element(BGP_NODE, &bgp_default_ipv4_unicast_cmd); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index db99811d5c..b01bb788a5 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -1148,10 +1148,29 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp, return true; } -static uint32_t bgp_zebra_nhop_weight(uint32_t bw, uint64_t tot_bw) +static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr, + uint64_t tot_bw, uint32_t *nh_weight) { - uint64_t tmp = (uint64_t)bw * 100; - return ((uint32_t)(tmp / tot_bw)); + uint32_t bw; + uint64_t tmp; + + bw = attr->link_bw; + /* zero link-bandwidth and link-bandwidth not present are treated + * as the same situation. + */ + if (!bw) { + /* the only situations should be if we're either told + * to skip or use default weight. + */ + if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING) + return false; + *nh_weight = BGP_ZEBRA_DEFAULT_NHOP_WEIGHT; + } else { + tmp = (uint64_t)bw * 100; + *nh_weight = ((uint32_t)(tmp / tot_bw)); + } + + return true; } void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p, @@ -1250,15 +1269,18 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p, metric = info->attr->med; /* Determine if we're doing weighted ECMP or not */ - do_wt_ecmp = bgp_path_info_mpath_chkwtd(info); + do_wt_ecmp = bgp_path_info_mpath_chkwtd(bgp, info); if (do_wt_ecmp) cum_bw = bgp_path_info_mpath_cumbw(info); for (mpinfo = info; mpinfo; mpinfo = bgp_path_info_mpath_next(mpinfo)) { + uint32_t nh_weight; + if (valid_nh_count >= multipath_num) break; *mpinfo_cp = *mpinfo; + nh_weight = 0; /* Get nexthop address-family */ if (p->family == AF_INET @@ -1271,6 +1293,15 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p, else continue; + /* If processing for weighted ECMP, determine the next hop's + * weight. Based on user setting, we may skip the next hop + * in some situations. + */ + if (do_wt_ecmp) { + if (!bgp_zebra_use_nhop_weighted(bgp, mpinfo->attr, + cum_bw, &nh_weight)) + continue; + } api_nh = &api.nexthops[valid_nh_count]; if (nh_family == AF_INET) { if (bgp_debug_zebra(&api.prefix)) { @@ -1370,11 +1401,8 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p, } memcpy(&api_nh->rmac, &(mpinfo->attr->rmac), sizeof(struct ethaddr)); + api_nh->weight = nh_weight; - /* Update next hop's weight for weighted ECMP */ - if (do_wt_ecmp) - api_nh->weight = bgp_zebra_nhop_weight( - mpinfo->attr->link_bw, cum_bw); valid_nh_count++; } diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index e546cd5da7..a069d01503 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -23,6 +23,9 @@ #include "vxlan.h" +/* Default weight for next hop, if doing weighted ECMP. */ +#define BGP_ZEBRA_DEFAULT_NHOP_WEIGHT 1 + extern void bgp_zebra_init(struct thread_master *master, unsigned short instance); extern void bgp_zebra_init_tm_connect(struct bgp *bgp); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index d1363b70a4..3124efbb65 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -2971,6 +2971,7 @@ static struct bgp *bgp_create(as_t *as, const char *name, bgp->dynamic_neighbors_limit = BGP_DYNAMIC_NEIGHBORS_LIMIT_DEFAULT; bgp->dynamic_neighbors_count = 0; bgp->lb_ref_bw = BGP_LINK_BW_REF_BW; + bgp->lb_handling = BGP_LINK_BW_ECMP; bgp->ebgp_requires_policy = DEFAULT_EBGP_POLICY_DISABLED; bgp->reject_as_sets = BGP_REJECT_AS_SETS_DISABLED; bgp_addpath_init_bgp_data(&bgp->tx_addpath); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 24ea66b5a1..f6f9687783 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -284,6 +284,20 @@ enum global_gr_command { #define BGP_GR_SUCCESS 0 #define BGP_GR_FAILURE 1 +/* Handling of BGP link bandwidth (LB) on receiver - whether and how to + * do weighted ECMP. Note: This applies after multipath computation. + */ +enum bgp_link_bw_handling { + /* Do ECMP if some paths don't have LB - default */ + BGP_LINK_BW_ECMP, + /* Completely ignore LB, just do regular ECMP */ + BGP_LINK_BW_IGNORE_BW, + /* Skip paths without LB, do wECMP on others */ + BGP_LINK_BW_SKIP_MISSING, + /* Do wECMP with default weight for paths not having LB */ + BGP_LINK_BW_DEFWT_4_MISSING +}; + /* BGP instance structure. */ struct bgp { /* AS number of this BGP instance. */ @@ -658,6 +672,9 @@ struct bgp { /* Count of peers in established state */ uint32_t established_peers; + /* Weighted ECMP related config. */ + enum bgp_link_bw_handling lb_handling; + QOBJ_FIELDS }; DECLARE_QOBJ_TYPE(bgp)