mirror of
https://git.proxmox.com/git/mirror_frr
synced 2025-08-13 19:02:58 +00:00
bgpd: Implement options for link bandwidth handling
Support configurable options to control how link bandwidth is handled by the receiver. The default behavior is to automatically honor the link bandwidths received and use it to perform a weighted ECMP BUT only if all paths in the multipath have associated link bandwidth; if one or more paths do not have link bandwidth, normal ECMP is performed among the multipaths. This behavior is as recommended by https://tools.ietf.org/html/draft-ietf-idr-link-bandwidth. The additional options available are to (a) completely ignore any link bandwidth (i.e., weighted ECMP is effectively disabled), (b) skip paths in the multipath which do not have link bandwidth and perform weighted ECMP among the other paths (if at least some paths have the bandwidth) or (c) use a default weight (value chosen is 1) for the paths which do not have link bandwidth. The command syntax is bgp bestpath bandwidth <ignore|skip-missing|default-weight-for-missing> Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com>
This commit is contained in:
parent
7b651a321e
commit
f7e1c681f4
@ -419,6 +419,10 @@ static void bgp_path_info_mpath_lb_update(struct bgp_path_info *path, bool set,
|
||||
return;
|
||||
}
|
||||
if (set) {
|
||||
if (cum_bw)
|
||||
SET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
|
||||
else
|
||||
UNSET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
|
||||
if (all_paths_lb)
|
||||
SET_FLAG(mpath->mp_flags, BGP_MP_LB_ALL);
|
||||
else
|
||||
@ -446,14 +450,24 @@ struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path)
|
||||
/*
|
||||
* bgp_path_info_chkwtd
|
||||
*
|
||||
* Given bestpath bgp_path_info, return if we should attempt to
|
||||
* do weighted ECMP or not
|
||||
* Return if we should attempt to do weighted ECMP or not
|
||||
* The path passed in is the bestpath.
|
||||
*/
|
||||
bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path)
|
||||
bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, struct bgp_path_info *path)
|
||||
{
|
||||
if (!path->mpath)
|
||||
/* Check if told to ignore weights or not multipath */
|
||||
if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW || !path->mpath)
|
||||
return false;
|
||||
return (path->mpath->mp_flags & BGP_MP_LB_ALL);
|
||||
|
||||
/* All paths in multipath should have associated weight (bandwidth)
|
||||
* unless told explicitly otherwise.
|
||||
*/
|
||||
if (bgp->lb_handling != BGP_LINK_BW_SKIP_MISSING &&
|
||||
bgp->lb_handling != BGP_LINK_BW_DEFWT_4_MISSING)
|
||||
return (path->mpath->mp_flags & BGP_MP_LB_ALL);
|
||||
|
||||
/* At least one path should have bandwidth. */
|
||||
return (path->mpath->mp_flags & BGP_MP_LB_PRESENT);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -40,8 +40,8 @@ struct bgp_path_info_mpath {
|
||||
|
||||
/* Flags - relevant as noted. */
|
||||
uint16_t mp_flags;
|
||||
/* Attached to best path, indicates that all multipaths have link-bandwidth */
|
||||
#define BGP_MP_LB_ALL 0x1
|
||||
#define BGP_MP_LB_PRESENT 0x1 /* Link-bandwidth present for >= 1 path */
|
||||
#define BGP_MP_LB_ALL 0x2 /* Link-bandwidth present for all multipaths */
|
||||
|
||||
/* Aggregated attribute for advertising multipath route */
|
||||
struct attr *mp_attr;
|
||||
@ -86,7 +86,8 @@ bgp_path_info_mpath_next(struct bgp_path_info *path);
|
||||
/* Accessors for multipath information */
|
||||
extern uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path);
|
||||
extern struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path);
|
||||
extern bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path);
|
||||
extern bool bgp_path_info_mpath_chkwtd(struct bgp *bgp,
|
||||
struct bgp_path_info *path);
|
||||
extern uint64_t bgp_path_info_mpath_cumbw(struct bgp_path_info *path);
|
||||
|
||||
#endif /* _QUAGGA_BGP_MPATH_H */
|
||||
|
@ -2066,7 +2066,7 @@ bool subgroup_announce_check(struct bgp_node *rn, struct bgp_path_info *pi,
|
||||
* been explicitly set by user policy.
|
||||
*/
|
||||
if (nh_reset &&
|
||||
bgp_path_info_mpath_chkwtd(pi) &&
|
||||
bgp_path_info_mpath_chkwtd(bgp, pi) &&
|
||||
(cum_bw = bgp_path_info_mpath_cumbw(pi)) != 0 &&
|
||||
!CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_LINK_BW_SET))
|
||||
attr->ecommunity = ecommunity_replace_linkbw(
|
||||
|
@ -2973,6 +2973,49 @@ DEFUN (no_bgp_bestpath_med,
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
/* "bgp bestpath bandwidth" configuration. */
|
||||
DEFPY (bgp_bestpath_bw,
|
||||
bgp_bestpath_bw_cmd,
|
||||
"[no$no] bgp bestpath bandwidth [<ignore|skip-missing|default-weight-for-missing>$bw_cfg]",
|
||||
NO_STR
|
||||
"BGP specific commands\n"
|
||||
"Change the default bestpath selection\n"
|
||||
"Link Bandwidth attribute\n"
|
||||
"Ignore link bandwidth (i.e., do regular ECMP, not weighted)\n"
|
||||
"Ignore paths without link bandwidth for ECMP (if other paths have it)\n"
|
||||
"Assign a low default weight (value 1) to paths not having link bandwidth\n")
|
||||
{
|
||||
VTY_DECLVAR_CONTEXT(bgp, bgp);
|
||||
afi_t afi;
|
||||
safi_t safi;
|
||||
|
||||
if (no) {
|
||||
bgp->lb_handling = BGP_LINK_BW_ECMP;
|
||||
} else {
|
||||
if (!bw_cfg) {
|
||||
vty_out(vty, "%% Bandwidth configuration must be specified\n");
|
||||
return CMD_ERR_INCOMPLETE;
|
||||
}
|
||||
if (!strcmp(bw_cfg, "ignore"))
|
||||
bgp->lb_handling = BGP_LINK_BW_IGNORE_BW;
|
||||
else if (!strcmp(bw_cfg, "skip-missing"))
|
||||
bgp->lb_handling = BGP_LINK_BW_SKIP_MISSING;
|
||||
else if (!strcmp(bw_cfg, "default-weight-for-missing"))
|
||||
bgp->lb_handling = BGP_LINK_BW_DEFWT_4_MISSING;
|
||||
else
|
||||
return CMD_ERR_NO_MATCH;
|
||||
}
|
||||
|
||||
/* This config is used in route install, so redo that. */
|
||||
FOREACH_AFI_SAFI (afi, safi) {
|
||||
if (!bgp_fibupd_safi(safi))
|
||||
continue;
|
||||
bgp_zebra_announce_table(bgp, afi, safi);
|
||||
}
|
||||
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
/* "no bgp default ipv4-unicast". */
|
||||
DEFUN (no_bgp_default_ipv4_unicast,
|
||||
no_bgp_default_ipv4_unicast_cmd,
|
||||
@ -15161,6 +15204,14 @@ int bgp_config_write(struct vty *vty)
|
||||
vty_out(vty, "\n");
|
||||
}
|
||||
|
||||
/* Link bandwidth handling. */
|
||||
if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW)
|
||||
vty_out(vty, " bgp bestpath bandwidth ignore\n");
|
||||
else if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
|
||||
vty_out(vty, " bgp bestpath bandwidth skip-missing\n");
|
||||
else if (bgp->lb_handling == BGP_LINK_BW_DEFWT_4_MISSING)
|
||||
vty_out(vty, " bgp bestpath bandwidth default-weight-for-missing\n");
|
||||
|
||||
/* BGP network import check. */
|
||||
if (!!CHECK_FLAG(bgp->flags, BGP_FLAG_IMPORT_CHECK)
|
||||
!= SAVE_BGP_IMPORT_CHECK)
|
||||
@ -15567,6 +15618,9 @@ void bgp_vty_init(void)
|
||||
install_element(BGP_NODE, &bgp_bestpath_med_cmd);
|
||||
install_element(BGP_NODE, &no_bgp_bestpath_med_cmd);
|
||||
|
||||
/* "bgp bestpath bandwidth" commands */
|
||||
install_element(BGP_NODE, &bgp_bestpath_bw_cmd);
|
||||
|
||||
/* "no bgp default ipv4-unicast" commands. */
|
||||
install_element(BGP_NODE, &no_bgp_default_ipv4_unicast_cmd);
|
||||
install_element(BGP_NODE, &bgp_default_ipv4_unicast_cmd);
|
||||
|
@ -1148,10 +1148,29 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp,
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint32_t bgp_zebra_nhop_weight(uint32_t bw, uint64_t tot_bw)
|
||||
static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr,
|
||||
uint64_t tot_bw, uint32_t *nh_weight)
|
||||
{
|
||||
uint64_t tmp = (uint64_t)bw * 100;
|
||||
return ((uint32_t)(tmp / tot_bw));
|
||||
uint32_t bw;
|
||||
uint64_t tmp;
|
||||
|
||||
bw = attr->link_bw;
|
||||
/* zero link-bandwidth and link-bandwidth not present are treated
|
||||
* as the same situation.
|
||||
*/
|
||||
if (!bw) {
|
||||
/* the only situations should be if we're either told
|
||||
* to skip or use default weight.
|
||||
*/
|
||||
if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
|
||||
return false;
|
||||
*nh_weight = BGP_ZEBRA_DEFAULT_NHOP_WEIGHT;
|
||||
} else {
|
||||
tmp = (uint64_t)bw * 100;
|
||||
*nh_weight = ((uint32_t)(tmp / tot_bw));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
|
||||
@ -1250,15 +1269,18 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
|
||||
metric = info->attr->med;
|
||||
|
||||
/* Determine if we're doing weighted ECMP or not */
|
||||
do_wt_ecmp = bgp_path_info_mpath_chkwtd(info);
|
||||
do_wt_ecmp = bgp_path_info_mpath_chkwtd(bgp, info);
|
||||
if (do_wt_ecmp)
|
||||
cum_bw = bgp_path_info_mpath_cumbw(info);
|
||||
|
||||
for (mpinfo = info; mpinfo; mpinfo = bgp_path_info_mpath_next(mpinfo)) {
|
||||
uint32_t nh_weight;
|
||||
|
||||
if (valid_nh_count >= multipath_num)
|
||||
break;
|
||||
|
||||
*mpinfo_cp = *mpinfo;
|
||||
nh_weight = 0;
|
||||
|
||||
/* Get nexthop address-family */
|
||||
if (p->family == AF_INET
|
||||
@ -1271,6 +1293,15 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
|
||||
else
|
||||
continue;
|
||||
|
||||
/* If processing for weighted ECMP, determine the next hop's
|
||||
* weight. Based on user setting, we may skip the next hop
|
||||
* in some situations.
|
||||
*/
|
||||
if (do_wt_ecmp) {
|
||||
if (!bgp_zebra_use_nhop_weighted(bgp, mpinfo->attr,
|
||||
cum_bw, &nh_weight))
|
||||
continue;
|
||||
}
|
||||
api_nh = &api.nexthops[valid_nh_count];
|
||||
if (nh_family == AF_INET) {
|
||||
if (bgp_debug_zebra(&api.prefix)) {
|
||||
@ -1370,11 +1401,8 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
|
||||
}
|
||||
memcpy(&api_nh->rmac, &(mpinfo->attr->rmac),
|
||||
sizeof(struct ethaddr));
|
||||
api_nh->weight = nh_weight;
|
||||
|
||||
/* Update next hop's weight for weighted ECMP */
|
||||
if (do_wt_ecmp)
|
||||
api_nh->weight = bgp_zebra_nhop_weight(
|
||||
mpinfo->attr->link_bw, cum_bw);
|
||||
valid_nh_count++;
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,9 @@
|
||||
|
||||
#include "vxlan.h"
|
||||
|
||||
/* Default weight for next hop, if doing weighted ECMP. */
|
||||
#define BGP_ZEBRA_DEFAULT_NHOP_WEIGHT 1
|
||||
|
||||
extern void bgp_zebra_init(struct thread_master *master,
|
||||
unsigned short instance);
|
||||
extern void bgp_zebra_init_tm_connect(struct bgp *bgp);
|
||||
|
@ -2971,6 +2971,7 @@ static struct bgp *bgp_create(as_t *as, const char *name,
|
||||
bgp->dynamic_neighbors_limit = BGP_DYNAMIC_NEIGHBORS_LIMIT_DEFAULT;
|
||||
bgp->dynamic_neighbors_count = 0;
|
||||
bgp->lb_ref_bw = BGP_LINK_BW_REF_BW;
|
||||
bgp->lb_handling = BGP_LINK_BW_ECMP;
|
||||
bgp->ebgp_requires_policy = DEFAULT_EBGP_POLICY_DISABLED;
|
||||
bgp->reject_as_sets = BGP_REJECT_AS_SETS_DISABLED;
|
||||
bgp_addpath_init_bgp_data(&bgp->tx_addpath);
|
||||
|
17
bgpd/bgpd.h
17
bgpd/bgpd.h
@ -284,6 +284,20 @@ enum global_gr_command {
|
||||
#define BGP_GR_SUCCESS 0
|
||||
#define BGP_GR_FAILURE 1
|
||||
|
||||
/* Handling of BGP link bandwidth (LB) on receiver - whether and how to
|
||||
* do weighted ECMP. Note: This applies after multipath computation.
|
||||
*/
|
||||
enum bgp_link_bw_handling {
|
||||
/* Do ECMP if some paths don't have LB - default */
|
||||
BGP_LINK_BW_ECMP,
|
||||
/* Completely ignore LB, just do regular ECMP */
|
||||
BGP_LINK_BW_IGNORE_BW,
|
||||
/* Skip paths without LB, do wECMP on others */
|
||||
BGP_LINK_BW_SKIP_MISSING,
|
||||
/* Do wECMP with default weight for paths not having LB */
|
||||
BGP_LINK_BW_DEFWT_4_MISSING
|
||||
};
|
||||
|
||||
/* BGP instance structure. */
|
||||
struct bgp {
|
||||
/* AS number of this BGP instance. */
|
||||
@ -658,6 +672,9 @@ struct bgp {
|
||||
/* Count of peers in established state */
|
||||
uint32_t established_peers;
|
||||
|
||||
/* Weighted ECMP related config. */
|
||||
enum bgp_link_bw_handling lb_handling;
|
||||
|
||||
QOBJ_FIELDS
|
||||
};
|
||||
DECLARE_QOBJ_TYPE(bgp)
|
||||
|
Loading…
Reference in New Issue
Block a user