diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index fd2c29124d..2e37cce7e2 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -474,6 +474,17 @@ static void derive_rd_rt_for_vni(struct bgp *bgp, struct bgpevpn *vpn) bgp_evpn_derive_auto_rt_export(bgp, vpn); } +/* + * Convert nexthop (remote VTEP IP) into an IPv6 address. + */ +static void evpn_convert_nexthop_to_ipv6(struct attr *attr) +{ + if (BGP_ATTR_NEXTHOP_AFI_IP6(attr)) + return; + ipv4_to_ipv4_mapped_ipv6(&attr->mp_nexthop_global, attr->nexthop); + attr->mp_nexthop_len = IPV6_MAX_BYTELEN; +} + /* * Add (update) or delete MACIP from zebra. */ @@ -622,17 +633,17 @@ static void build_evpn_type5_route_extcomm(struct bgp *bgp_vrf, } /* - * Build extended communities for EVPN route. RT and ENCAP are - * applicable to all routes. - * TODO: currently kernel doesnt support ipv6 routes with ipv4 nexthops. - * This means that we can't do symmetric routing for ipv6 hosts routes - * in the same way as ipv4 host routes. - * We wont attach l3-vni related RTs for ipv6 routes. - * For now, We will only adevrtise ipv4 host routes - * with L3-VNI related ext-comm. + * Build extended communities for EVPN route. + * This function is applicable for type-2 and type-3 routes. The layer-2 RT + * and ENCAP extended communities are applicable for all routes. + * The default gateway extended community and MAC mobility (sticky) extended + * community are added as needed based on passed settings - only for type-2 + * routes. Likewise, the layer-3 RT and Router MAC extended communities are + * added, if present, based on passed settings - only for non-link-local + * type-2 routes. */ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, - afi_t afi) + int add_l3_ecomm) { struct ecommunity ecom_encap; struct ecommunity ecom_sticky; @@ -662,11 +673,10 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, for (ALL_LIST_ELEMENTS(vpn->export_rtl, node, nnode, ecom)) attr->ecommunity = ecommunity_merge(attr->ecommunity, ecom); - /* - * only attach l3-vni export rts for ipv4 address family and if we are - * advertising both the labels in type-2 routes + /* Add the export RTs for L3VNI if told to - caller determines + * when this should be done. */ - if (afi == AFI_IP && CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS)) { + if (add_l3_ecomm) { vrf_export_rtl = bgpevpn_get_vrf_export_rtl(vpn); if (vrf_export_rtl && !list_isempty(vrf_export_rtl)) { for (ALL_LIST_ELEMENTS(vrf_export_rtl, node, nnode, @@ -676,6 +686,7 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, } } + /* Add MAC mobility (sticky) if needed. */ if (attr->sticky) { seqnum = 0; memset(&ecom_sticky, 0, sizeof(ecom_sticky)); @@ -686,12 +697,8 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, ecommunity_merge(attr->ecommunity, &ecom_sticky); } - /* - * only attach l3-vni rmac for ipv4 address family and if we are - * advertising both the labels in type-2 routes - */ - if (afi == AFI_IP && !is_zero_mac(&attr->rmac) - && CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS)) { + /* Add RMAC, if told to. */ + if (add_l3_ecomm) { memset(&ecom_rmac, 0, sizeof(ecom_rmac)); encode_rmac_extcomm(&eval_rmac, &attr->rmac); ecom_rmac.size = 1; @@ -700,6 +707,7 @@ static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr, ecommunity_merge(attr->ecommunity, &ecom_rmac); } + /* Add default gateway, if needed. */ if (attr->default_gw) { memset(&ecom_default_gw, 0, sizeof(ecom_default_gw)); encode_default_gw_extcomm(&eval_default_gw); @@ -1260,6 +1268,7 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_node *rn; struct attr attr; struct attr *attr_new; + int add_l3_ecomm = 0; struct bgp_info *ri; afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; @@ -1279,14 +1288,23 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_PMSI_TUNNEL); - /* router mac is only needed for type-2 and type-5 routes */ + /* router mac is only needed for type-2 routes here. */ if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) bgpevpn_get_rmac(vpn, &attr.rmac); vni2label(vpn->vni, &(attr.label)); - /* Set up RT and ENCAP extended community. */ - build_evpn_route_extcomm( - vpn, &attr, IS_EVPN_PREFIX_IPADDR_V4(p) ? AFI_IP : AFI_IP6); + /* Include L3 VNI related RTs and RMAC for type-2 routes, if they're + * IPv4 or IPv6 global addresses and we're advertising L3VNI with + * these routes. + */ + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE && + (IS_EVPN_PREFIX_IPADDR_V4(p) || + !IN6_IS_ADDR_LINKLOCAL(&p->prefix.ip.ipaddr_v6)) && + CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS)) + add_l3_ecomm = 1; + + /* Set up extended community. */ + build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); /* First, create (or fetch) route node within the VNI. */ /* NOTE: There is no RD here. */ @@ -1466,22 +1484,20 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) struct attr attr; struct attr attr_sticky; struct attr attr_def_gw; - struct attr attr_ip6; - struct attr attr_sticky_ip6; - struct attr attr_def_gw_ip6; + struct attr attr_ip6_ll; struct attr *attr_new; + int add_l3_ecomm = 0; afi = AFI_L2VPN; safi = SAFI_EVPN; memset(&attr, 0, sizeof(struct attr)); memset(&attr_sticky, 0, sizeof(struct attr)); memset(&attr_def_gw, 0, sizeof(struct attr)); - memset(&attr_ip6, 0, sizeof(struct attr)); - memset(&attr_sticky_ip6, 0, sizeof(struct attr)); - memset(&attr_def_gw_ip6, 0, sizeof(struct attr)); + memset(&attr_ip6_ll, 0, sizeof(struct attr)); - /* Build path-attribute - all type-2 routes for this VNI will share the - * same path attribute. + /* Build path-attribute - multiple type-2 routes for this VNI will share + * the same path attribute, but we need separate structures for sticky + * MACs, default gateway and IPv6 link-local addresses (no L3 RT/RMAC). */ bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); bgp_attr_default_set(&attr_sticky, BGP_ORIGIN_IGP); @@ -1500,31 +1516,21 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) attr_def_gw.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; attr_def_gw.default_gw = 1; bgpevpn_get_rmac(vpn, &attr_def_gw.rmac); - bgp_attr_default_set(&attr_ip6, BGP_ORIGIN_IGP); - bgp_attr_default_set(&attr_sticky_ip6, BGP_ORIGIN_IGP); - bgp_attr_default_set(&attr_def_gw_ip6, BGP_ORIGIN_IGP); - attr_ip6.nexthop = vpn->originator_ip; - attr_ip6.mp_nexthop_global_in = vpn->originator_ip; - attr_ip6.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - bgpevpn_get_rmac(vpn, &attr_ip6.rmac); - attr_sticky_ip6.nexthop = vpn->originator_ip; - attr_sticky_ip6.mp_nexthop_global_in = vpn->originator_ip; - attr_sticky_ip6.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - attr_sticky_ip6.sticky = 1; - bgpevpn_get_rmac(vpn, &attr_sticky_ip6.rmac); - attr_def_gw_ip6.nexthop = vpn->originator_ip; - attr_def_gw_ip6.mp_nexthop_global_in = vpn->originator_ip; - attr_def_gw_ip6.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - attr_def_gw_ip6.default_gw = 1; - bgpevpn_get_rmac(vpn, &attr_def_gw_ip6.rmac); + bgp_attr_default_set(&attr_ip6_ll, BGP_ORIGIN_IGP); + attr_ip6_ll.nexthop = vpn->originator_ip; + attr_ip6_ll.mp_nexthop_global_in = vpn->originator_ip; + attr_ip6_ll.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; - /* Set up RT, ENCAP and sticky MAC extended community. */ - build_evpn_route_extcomm(vpn, &attr, AFI_IP); - build_evpn_route_extcomm(vpn, &attr_sticky, AFI_IP); - build_evpn_route_extcomm(vpn, &attr_def_gw, AFI_IP); - build_evpn_route_extcomm(vpn, &attr_ip6, AFI_IP6); - build_evpn_route_extcomm(vpn, &attr_sticky_ip6, AFI_IP6); - build_evpn_route_extcomm(vpn, &attr_def_gw_ip6, AFI_IP); + /* Add L3 VNI RTs and RMAC for non IPv6 link-local attributes if + * using L3 VNI for type-2 routes also. + */ + if (CHECK_FLAG(vpn->flags, VNI_FLAG_USE_TWO_LABELS)) + add_l3_ecomm = 1; + + build_evpn_route_extcomm(vpn, &attr, add_l3_ecomm); + build_evpn_route_extcomm(vpn, &attr_sticky, add_l3_ecomm); + build_evpn_route_extcomm(vpn, &attr_def_gw, add_l3_ecomm); + build_evpn_route_extcomm(vpn, &attr_ip6_ll, 0); /* Walk this VNI's route table and update local type-2 routes. For any * routes updated, update corresponding entry in the global table too. @@ -1538,7 +1544,11 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) continue; - if (IS_EVPN_PREFIX_IPADDR_V4(evp)) { + if (IS_EVPN_PREFIX_IPADDR_V6(evp) && + IN6_IS_ADDR_LINKLOCAL(&evp->prefix.ip.ipaddr_v6)) + update_evpn_route_entry(bgp, vpn, afi, safi, rn, + &attr_ip6_ll, 0, 1, &ri, 0); + else { if (evpn_route_is_sticky(bgp, rn)) update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr_sticky, 0, 1, &ri, @@ -1550,19 +1560,6 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) else update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr, 0, 1, &ri, 0); - } else { - if (evpn_route_is_sticky(bgp, rn)) - update_evpn_route_entry(bgp, vpn, afi, safi, rn, - &attr_sticky_ip6, 0, 1, - &ri, 0); - else if (evpn_route_is_def_gw(bgp, rn)) - update_evpn_route_entry(bgp, vpn, afi, safi, rn, - &attr_def_gw_ip6, 0, 1, - &ri, 0); - else - update_evpn_route_entry(bgp, vpn, afi, safi, rn, - &attr_ip6, 0, 1, &ri, - 0); } /* If a local route exists for this prefix, we need to update @@ -1593,11 +1590,9 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) /* Unintern temporary. */ aspath_unintern(&attr.aspath); - aspath_unintern(&attr_ip6.aspath); aspath_unintern(&attr_sticky.aspath); - aspath_unintern(&attr_sticky_ip6.aspath); aspath_unintern(&attr_def_gw.aspath); - aspath_unintern(&attr_def_gw_ip6.aspath); + aspath_unintern(&attr_ip6_ll.aspath); return 0; } @@ -1791,6 +1786,7 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, { struct bgp_node *rn; struct bgp_info *ri; + struct attr attr; struct attr *attr_new; int ret = 0; struct prefix p; @@ -1827,6 +1823,15 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, } else return 0; + /* EVPN routes currently only support a IPv4 next hop which corresponds + * to the remote VTEP. When importing into a VRF, if it is IPv6 host + * route, we have to convert the next hop to an IPv4-mapped address + * for the rest of the code to flow through. + */ + bgp_attr_dup(&attr, parent_ri->attr); + if (afi == AFI_IP6) + evpn_convert_nexthop_to_ipv6(&attr); + /* Check if route entry is already present. */ for (ri = rn->info; ri; ri = ri->next) if (ri->extra @@ -1835,7 +1840,7 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, if (!ri) { /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_ri->attr); + attr_new = bgp_attr_intern(&attr); /* Create new route with its attribute. */ ri = info_make(parent_ri->type, parent_ri->sub_type, 0, @@ -1850,21 +1855,25 @@ static int install_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, } bgp_info_add(rn, ri); } else { - if (attrhash_cmp(ri->attr, parent_ri->attr) + if (attrhash_cmp(ri->attr, &attr) && !CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) { bgp_unlock_node(rn); return 0; } /* The attribute has changed. */ /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_ri->attr); + attr_new = bgp_attr_intern(&attr); /* Restore route, if needed. */ if (CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) bgp_info_restore(rn, ri); /* Mark if nexthop has changed. */ - if (!IPV4_ADDR_SAME(&ri->attr->nexthop, &attr_new->nexthop)) + if ((afi == AFI_IP && + !IPV4_ADDR_SAME(&ri->attr->nexthop, &attr_new->nexthop)) || + (afi == AFI_IP6 && + !IPV6_ADDR_SAME(&ri->attr->mp_nexthop_global, + &attr_new->mp_nexthop_global))) SET_FLAG(ri->flags, BGP_INFO_IGP_CHANGED); /* Unintern existing, set to new. */ diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index 401529184c..58487a682c 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -2842,7 +2842,7 @@ DEFUN (no_bgp_evpn_advertise_type5, argv_find_and_parse_afi(argv, argc, &idx_afi, &afi); argv_find_and_parse_safi(argv, argc, &idx_safi, &safi); - if (!(afi == AFI_IP) || (afi == AFI_IP6)) { + if (!(afi == AFI_IP || afi == AFI_IP6)) { vty_out(vty, "%%only ipv4 or ipv6 address families are supported"); return CMD_WARNING; diff --git a/lib/ipaddr.h b/lib/ipaddr.h index 98c28008dc..44bde45add 100644 --- a/lib/ipaddr.h +++ b/lib/ipaddr.h @@ -85,4 +85,21 @@ static inline char *ipaddr2str(struct ipaddr *ip, char *buf, int size) } return buf; } + +/* + * Convert IPv4 address to IPv4-mapped IPv6 address which is of the + * form ::FFFF: (RFC 4291). This IPv6 address can then + * be used to represent the IPv4 address, wherever only an IPv6 address + * is required. + */ +static inline void ipv4_to_ipv4_mapped_ipv6(struct in6_addr *in6, + struct in_addr in) +{ + u_int32_t addr_type = htonl(0xFFFF); + + memset(in6, 0, sizeof(struct in6_addr)); + memcpy((char *)in6 + 8, &addr_type, sizeof(addr_type)); + memcpy((char *)in6 + 12, &in, sizeof(struct in_addr)); +} + #endif /* __IPADDR_H__ */ diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 1204da92fb..72dbfb12fc 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -305,6 +305,8 @@ struct nexthop *route_entry_nexthop_ipv6_ifindex_add(struct route_entry *re, nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX; nexthop->gate.ipv6 = *ipv6; nexthop->ifindex = ifindex; + if (CHECK_FLAG(re->flags, ZEBRA_FLAG_EVPN_ROUTE)) + SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK); route_entry_nexthop_add(re, nexthop); @@ -421,6 +423,10 @@ static int nexthop_active(afi_t afi, struct route_entry *re, re->nexthop_mtu = 0; } + /* Next hops (remote VTEPs) for EVPN routes are fully resolved. */ + if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN_RVTEP)) + return 1; + /* Skip nexthops that have been filtered out due to route-map */ /* The nexthops are specific to this route and so the same */ /* nexthop for a different route may not have this flag set */ @@ -859,9 +865,7 @@ static unsigned nexthop_active_check(struct route_node *rn, case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: family = AFI_IP; - if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN_RVTEP)) - SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); - else if (nexthop_active(AFI_IP, re, nexthop, set, rn)) + if (nexthop_active(AFI_IP, re, nexthop, set, rn)) SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); else UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE); @@ -2554,10 +2558,17 @@ void rib_delete(afi_t afi, safi_t safi, vrf_id_t vrf_id, int type, struct ipaddr vtep_ip; memset(&vtep_ip, 0, sizeof(struct ipaddr)); - vtep_ip.ipa_type = IPADDR_V4; - memcpy(&(vtep_ip.ipaddr_v4), - &(tmp_nh->gate.ipv4), - sizeof(struct in_addr)); + if (afi == AFI_IP) { + vtep_ip.ipa_type = IPADDR_V4; + memcpy(&(vtep_ip.ipaddr_v4), + &(tmp_nh->gate.ipv4), + sizeof(struct in_addr)); + } else { + vtep_ip.ipa_type = IPADDR_V6; + memcpy(&(vtep_ip.ipaddr_v6), + &(tmp_nh->gate.ipv6), + sizeof(struct in6_addr)); + } zebra_vxlan_evpn_vrf_route_del(re->vrf_id, rmac, &vtep_ip, p); } diff --git a/zebra/zserv.c b/zebra/zserv.c index f53baf65d0..0def903803 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -1144,6 +1144,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) struct nexthop *nexthop = NULL; int i, ret; vrf_id_t vrf_id = 0; + struct ipaddr vtep_ip; s = msg; zapi_route_decode(s, &api); @@ -1176,9 +1177,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) re, &api_nh->gate.ipv4, NULL, api_nh->vrf_id); break; - case NEXTHOP_TYPE_IPV4_IFINDEX: { - - struct ipaddr vtep_ip; + case NEXTHOP_TYPE_IPV4_IFINDEX: memset(&vtep_ip, 0, sizeof(struct ipaddr)); if (CHECK_FLAG(api.flags, @@ -1193,7 +1192,7 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) api_nh->vrf_id); /* if this an EVPN route entry, - program the nh as neigh + * program the nh as neigh */ if (CHECK_FLAG(api.flags, ZEBRA_FLAG_EVPN_ROUTE)) { @@ -1208,15 +1207,41 @@ static void zread_route_add(ZAPI_HANDLER_ARGS) &api.prefix); } break; - } case NEXTHOP_TYPE_IPV6: nexthop = route_entry_nexthop_ipv6_add( re, &api_nh->gate.ipv6, api_nh->vrf_id); break; case NEXTHOP_TYPE_IPV6_IFINDEX: + memset(&vtep_ip, 0, sizeof(struct ipaddr)); + if (CHECK_FLAG(api.flags, + ZEBRA_FLAG_EVPN_ROUTE)) { + ifindex = + get_l3vni_svi_ifindex(vrf_id); + } else { + ifindex = api_nh->ifindex; + } + nexthop = route_entry_nexthop_ipv6_ifindex_add( - re, &api_nh->gate.ipv6, api_nh->ifindex, + re, &api_nh->gate.ipv6, ifindex, api_nh->vrf_id); + + /* if this an EVPN route entry, + * program the nh as neigh + */ + if (CHECK_FLAG(api.flags, + ZEBRA_FLAG_EVPN_ROUTE)) { + SET_FLAG(nexthop->flags, + NEXTHOP_FLAG_EVPN_RVTEP); + vtep_ip.ipa_type = IPADDR_V6; + memcpy(&vtep_ip.ipaddr_v6, + &(api_nh->gate.ipv6), + sizeof(struct in6_addr)); + zebra_vxlan_evpn_vrf_route_add( + vrf_id, + &api.rmac, + &vtep_ip, + &api.prefix); + } break; case NEXTHOP_TYPE_BLACKHOLE: nexthop = route_entry_nexthop_blackhole_add(