From 128ea8abbd38eddd2a9bfa18b596acd95f081353 Mon Sep 17 00:00:00 2001 From: vivek Date: Mon, 15 May 2017 14:34:04 -0700 Subject: [PATCH] bgpd: EVPN route handling Core EVPN route handling functionality. This includes support for the following: - interface with zebra to learn about local VNIs and MACIPs as well as to install remote VTEPs (per VNI) and remote MACIPs - create/update/delete EVPN type-2 and type-3 routes - attribute creation, route selection and install - route handling per VNI and for the global routing table - parsing of received EVPN routes and handling by route type - encoding attributes for EVPN routes and EVPN prefix creation (for Updates) Signed-off-by: Vivek Venkatraman Reviewed-by: Donald Sharp Reviewed-by: Daniel Walton --- bgpd/bgp_attr.c | 11 +- bgpd/bgp_attr.h | 12 + bgpd/bgp_attr_evpn.c | 186 +-- bgpd/bgp_attr_evpn.h | 42 +- bgpd/bgp_ecommunity.h | 3 + bgpd/bgp_evpn.c | 2465 ++++++++++++++++++++++++++++++++++--- bgpd/bgp_evpn.h | 40 +- bgpd/bgp_evpn_private.h | 127 ++ bgpd/bgp_evpn_vty.c | 4 +- bgpd/bgp_memory.c | 4 + bgpd/bgp_memory.h | 4 + bgpd/bgp_route.c | 194 ++- bgpd/bgp_route.h | 32 +- bgpd/bgp_updgrp_packet.c | 24 + bgpd/bgp_zebra.c | 82 ++ bgpd/rfapi/rfapi_import.c | 4 +- 16 files changed, 2923 insertions(+), 311 deletions(-) diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index 9ceded921a..e5bebfff7d 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -2136,6 +2136,9 @@ bgp_attr_ext_communities (struct bgp_attr_parser_args *args) attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES); + /* Extract MAC mobility sequence number, if any. */ + attr->extra->mm_seqnum = bgp_attr_mac_mobility_seqnum (attr); + return BGP_ATTR_PARSE_PROCEED; } @@ -2957,9 +2960,11 @@ bgp_packet_mpattr_prefix (struct stream *s, afi_t afi, safi_t safi, stream_put (s, prd->val, 8); stream_put (s, &p->u.prefix, PSIZE (p->prefixlen)); } - else if (safi == SAFI_EVPN) + else if (afi == AFI_L2VPN && safi == SAFI_EVPN) { - bgp_packet_mpattr_route_type_5(s, p, prd, label, attr); + /* EVPN prefix - contents depend on type */ + bgp_evpn_encode_prefix (s, p, prd, label, attr, + addpath_encode, addpath_tx_id); } else if (safi == SAFI_LABELED_UNICAST) { @@ -2976,6 +2981,8 @@ bgp_packet_mpattr_prefix_size (afi_t afi, safi_t safi, struct prefix *p) int size = PSIZE (p->prefixlen); if (safi == SAFI_MPLS_VPN) size += 88; + else if (afi == AFI_L2VPN && safi == SAFI_EVPN) + size += 232; // TODO: Maximum possible for type-2, type-3 and type-5 return size; } diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index f3c1b5e3c2..c7dca435ba 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -140,6 +140,9 @@ struct attr_extra /* MP Nexthop preference */ u_char mp_nexthop_prefer_global; + /* Static MAC for EVPN */ + u_char sticky; + /* route tag */ route_tag_t tag; @@ -157,6 +160,9 @@ struct attr_extra #endif /* EVPN */ struct overlay_index evpn_overlay; + + /* EVPN MAC Mobility sequence number, if any. */ + u_int32_t mm_seqnum; }; /* BGP core attribute structure. */ @@ -333,4 +339,10 @@ bgp_rmap_nhop_changed(u_int32_t out_rmap_flags, u_int32_t in_rmap_flags) CHECK_FLAG(in_rmap_flags, BATTR_RMAP_NEXTHOP_UNCHANGED)) ? 1 : 0); } +static inline u_int32_t +mac_mobility_seqnum (struct attr *attr) +{ + return (attr && attr->extra) ? attr->extra->mm_seqnum : 0; +} + #endif /* _QUAGGA_BGP_ATTR_H */ diff --git a/bgpd/bgp_attr_evpn.c b/bgpd/bgp_attr_evpn.c index 6970d5a679..740f517a9e 100644 --- a/bgpd/bgp_attr_evpn.c +++ b/bgpd/bgp_attr_evpn.c @@ -33,21 +33,23 @@ #include "bgpd/bgp_attr_evpn.h" #include "bgpd/bgp_ecommunity.h" #include "bgpd/bgp_evpn.h" +#include "bgpd/bgp_evpn_private.h" void bgp_add_routermac_ecom(struct attr *attr, struct ethaddr *routermac) { - struct ecommunity_val routermac_ecom; + struct ecommunity_val routermac_ecom; - if (attr->extra) { - memset(&routermac_ecom, 0, sizeof(struct ecommunity_val)); - routermac_ecom.val[0] = ECOMMUNITY_ENCODE_EVPN; - routermac_ecom.val[1] = ECOMMUNITY_EVPN_SUBTYPE_ROUTERMAC; - memcpy(&routermac_ecom.val[2], routermac->octet, ETHER_ADDR_LEN); - if (!attr->extra->ecommunity) - attr->extra->ecommunity = ecommunity_new(); - ecommunity_add_val(attr->extra->ecommunity, &routermac_ecom); - ecommunity_str (attr->extra->ecommunity); - } + if (attr->extra) + { + memset(&routermac_ecom, 0, sizeof(struct ecommunity_val)); + routermac_ecom.val[0] = ECOMMUNITY_ENCODE_EVPN; + routermac_ecom.val[1] = ECOMMUNITY_EVPN_SUBTYPE_ROUTERMAC; + memcpy(&routermac_ecom.val[2], routermac->octet, ETHER_ADDR_LEN); + if (!attr->extra->ecommunity) + attr->extra->ecommunity = ecommunity_new(); + ecommunity_add_val(attr->extra->ecommunity, &routermac_ecom); + ecommunity_str (attr->extra->ecommunity); + } } /* converts to an esi @@ -57,85 +59,131 @@ void bgp_add_routermac_ecom(struct attr *attr, struct ethaddr *routermac) */ int str2esi(const char *str, struct eth_segment_id *id) { - unsigned int a[ESI_LEN]; - int i; + unsigned int a[ESI_LEN]; + int i; - if (!str) - return 0; - if (sscanf (str, "%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x", - a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, - a + 6, a + 7, a + 8, a + 9) != ESI_LEN) - { - /* error in incoming str length */ - return 0; - } - /* valid mac address */ - if (!id) - return 1; - for (i = 0; i < ESI_LEN; ++i) - id->val[i] = a[i] & 0xff; - return 1; + if (!str) + return 0; + if (sscanf (str, "%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x:%2x", + a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, + a + 6, a + 7, a + 8, a + 9) != ESI_LEN) + { + /* error in incoming str length */ + return 0; + } + /* valid mac address */ + if (!id) + return 1; + for (i = 0; i < ESI_LEN; ++i) + id->val[i] = a[i] & 0xff; + return 1; } char *esi2str(struct eth_segment_id *id) { - char *ptr; - u_char *val; + char *ptr; + u_char *val; - if (!id) - return NULL; + if (!id) + return NULL; - val = id->val; - ptr = (char *)XMALLOC(MTYPE_TMP, (ESI_LEN * 2 + ESI_LEN - 1 + 1) * sizeof(char)); + val = id->val; + ptr = (char *)XMALLOC(MTYPE_TMP, (ESI_LEN * 2 + ESI_LEN - 1 + 1) * sizeof(char)); - snprintf(ptr, (ESI_LEN * 2 + ESI_LEN - 1 + 1), - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", - val[0], val[1], val[2], val[3], val[4], - val[5], val[6], val[7], val[8], val[9]); + snprintf(ptr, (ESI_LEN * 2 + ESI_LEN - 1 + 1), + "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + val[0], val[1], val[2], val[3], val[4], + val[5], val[6], val[7], val[8], val[9]); - return ptr; + return ptr; } char *ecom_mac2str(char *ecom_mac) { - char *en; + char *en; - en = ecom_mac; - en += 2; - return prefix_mac2str((struct ethaddr *)en, NULL, 0); + en = ecom_mac; + en += 2; + + return prefix_mac2str((struct ethaddr *)en, NULL, 0); +} + +/* + * Fetch and return the sequence number from MAC Mobility extended + * community, if present, else 0. + */ +u_int32_t +bgp_attr_mac_mobility_seqnum (struct attr *attr) +{ + struct ecommunity *ecom; + int i; + + ecom = attr->extra->ecommunity; + if (!ecom || !ecom->size) + return 0; + + /* If there is a MAC Mobility extended community, return its + * sequence number. + * TODO: RFC is silent on handling of multiple MAC mobility extended + * communities for the same route. We will bail out upon the first + * one. + */ + for (i = 0; i < ecom->size; i++) + { + u_char *pnt; + u_char type, sub_type; + u_int32_t seq_num; + + pnt = (ecom->val + (i * ECOMMUNITY_SIZE)); + type = *pnt++; + sub_type = *pnt++; + if (!(type == ECOMMUNITY_ENCODE_EVPN && + sub_type == ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY)) + continue; + pnt++; + pnt++; + seq_num = (*pnt++ << 24); + seq_num |= (*pnt++ << 16); + seq_num |= (*pnt++ << 8); + seq_num |= (*pnt++); + + return seq_num; + } + + return 0; } /* dst prefix must be AF_INET or AF_INET6 prefix, to forge EVPN prefix */ extern int bgp_build_evpn_prefix(int evpn_type, uint32_t eth_tag, struct prefix *dst) { - struct evpn_addr *p_evpn_p; - struct prefix p2; - struct prefix *src = &p2; + struct evpn_addr *p_evpn_p; + struct prefix p2; + struct prefix *src = &p2; - if (!dst || dst->family == 0) - return -1; - /* store initial prefix in src */ - prefix_copy(src, dst); - memset(dst, 0, sizeof(struct prefix)); - p_evpn_p = &(dst->u.prefix_evpn); - dst->family = AF_ETHERNET; - p_evpn_p->route_type = evpn_type; - if (evpn_type == EVPN_IP_PREFIX) { - p_evpn_p->eth_tag = eth_tag; - p_evpn_p->ip_prefix_length = p2.prefixlen; - if (src->family == AF_INET) { + if (!dst || dst->family == 0) + return -1; + /* store initial prefix in src */ + prefix_copy(src, dst); + memset(dst, 0, sizeof(struct prefix)); + p_evpn_p = &(dst->u.prefix_evpn); + dst->family = AF_ETHERNET; + p_evpn_p->route_type = evpn_type; + if (evpn_type == BGP_EVPN_IP_PREFIX_ROUTE) { + p_evpn_p->eth_tag = eth_tag; + p_evpn_p->ip_prefix_length = p2.prefixlen; + if (src->family == AF_INET) { SET_IPADDR_V4 (&p_evpn_p->ip); - memcpy(&p_evpn_p->ip.ipaddr_v4, &src->u.prefix4, - sizeof(struct in_addr)); - dst->prefixlen = (u_char) PREFIX_LEN_ROUTE_TYPE_5_IPV4; - } else { + memcpy(&p_evpn_p->ip.ipaddr_v4, &src->u.prefix4, + sizeof(struct in_addr)); + dst->prefixlen = (u_char) PREFIX_LEN_ROUTE_TYPE_5_IPV4; + } else { SET_IPADDR_V6 (&p_evpn_p->ip); - memcpy(&p_evpn_p->ip.ipaddr_v6, &src->u.prefix6, - sizeof(struct in6_addr)); - dst->prefixlen = (u_char) PREFIX_LEN_ROUTE_TYPE_5_IPV6; - } - } else - return -1; - return 0; + memcpy(&p_evpn_p->ip.ipaddr_v6, &src->u.prefix6, + sizeof(struct in6_addr)); + dst->prefixlen = (u_char) PREFIX_LEN_ROUTE_TYPE_5_IPV6; + } + } else + return -1; + return 0; } diff --git a/bgpd/bgp_attr_evpn.h b/bgpd/bgp_attr_evpn.h index 3a93f6ae62..8978731d5c 100644 --- a/bgpd/bgp_attr_evpn.h +++ b/bgpd/bgp_attr_evpn.h @@ -22,31 +22,37 @@ #define _QUAGGA_BGP_ATTR_EVPN_H /* value of first byte of ESI */ -#define ESI_TYPE_ARBITRARY 0 /* */ -#define ESI_TYPE_LACP 1 /* <> */ -#define ESI_TYPE_BRIDGE 2 /* ::00 */ -#define ESI_TYPE_MAC 3 /* : */ -#define ESI_TYPE_ROUTER 4 /* : */ -#define ESI_TYPE_AS 5 /* : */ +#define ESI_TYPE_ARBITRARY 0 /* */ +#define ESI_TYPE_LACP 1 /* <> */ +#define ESI_TYPE_BRIDGE 2 /* ::00 */ +#define ESI_TYPE_MAC 3 /* : */ +#define ESI_TYPE_ROUTER 4 /* : */ +#define ESI_TYPE_AS 5 /* : */ + + #define MAX_ESI {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff} #define ESI_LEN 10 #define MAX_ET 0xffffffff + u_long eth_tag_id; struct attr; -struct eth_segment_id { - u_char val[ESI_LEN]; +struct eth_segment_id +{ + u_char val[ESI_LEN]; }; -union gw_addr { - struct in_addr ipv4; - struct in6_addr ipv6; +union gw_addr +{ + struct in_addr ipv4; + struct in6_addr ipv6; }; -struct bgp_route_evpn { - struct eth_segment_id eth_s_id; - union gw_addr gw_ip; +struct bgp_route_evpn +{ + struct eth_segment_id eth_s_id; + union gw_addr gw_ip; }; extern int str2esi(const char *str, struct eth_segment_id *id); @@ -55,5 +61,9 @@ extern char *ecom_mac2str(char *ecom_mac); extern void bgp_add_routermac_ecom(struct attr *attr, struct ethaddr *routermac); extern int bgp_build_evpn_prefix(int type, uint32_t eth_tag, - struct prefix *dst); -#endif /* _QUAGGA_BGP_ATTR_EVPN_H */ + struct prefix *dst); + +extern u_int32_t +bgp_attr_mac_mobility_seqnum (struct attr *attr); + +#endif /* _QUAGGA_BGP_ATTR_EVPN_H */ diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h index 8981ea3465..9281c0d995 100644 --- a/bgpd/bgp_ecommunity.h +++ b/bgpd/bgp_ecommunity.h @@ -32,12 +32,15 @@ #define ECOMMUNITY_ROUTE_TARGET 0x02 #define ECOMMUNITY_SITE_ORIGIN 0x03 +/* Low-order octet of the Extended Communities type field for EVPN types */ #define ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY 0x00 #define ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL 0x01 #define ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT 0x02 #define ECOMMUNITY_EVPN_SUBTYPE_ROUTERMAC 0x03 #define ECOMMUNITY_EVPN_SUBTYPE_DEF_GW 0x0d +#define ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY_FLAG_STICKY 0x01 + /* Low-order octet of the Extended Communities type field for OPAQUE types */ #define ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP 0x0c diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 0e191ffff1..d021def1b8 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -1,7 +1,8 @@ /* Ethernet-VPN Packet and vty Processing File * Copyright (C) 2016 6WIND + * Copyright (C) 2017 Cumulus Networks, Inc. * - * This file is part of FRRouting. + * This file is part of FRR. * * FRRouting is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -29,6 +30,7 @@ #include "hash.h" #include "jhash.h" #include "bitfield.h" +#include "zclient.h" #include "bgpd/bgp_attr_evpn.h" #include "bgpd/bgpd.h" @@ -40,6 +42,27 @@ #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_evpn_private.h" #include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_encap_types.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_aspath.h" + +/* + * Definitions and external declarations. + */ +extern struct zclient *zclient; + +DEFINE_QOBJ_TYPE(bgpevpn) + + +/* + * Static function declarations + */ +static void +delete_evpn_route_entry (struct bgp *bgp, struct bgpevpn *vpn, + afi_t afi, safi_t safi, struct bgp_node *rn, + struct bgp_info **ri); +static int +delete_all_vni_routes (struct bgp *bgp, struct bgpevpn *vpn); /* * Private functions. @@ -112,12 +135,1758 @@ import_rt_hash_cmp (const void *p1, const void *p2) return(memcmp(irt1->rt.val, irt2->rt.val, ECOMMUNITY_SIZE) == 0); } +/* + * Create a new import_rt + */ +static struct irt_node * +import_rt_new (struct bgp *bgp, struct ecommunity_val *rt) +{ + struct irt_node *irt; + + if (!bgp) + return NULL; + + irt = XCALLOC (MTYPE_BGP_EVPN_IMPORT_RT, sizeof (struct irt_node)); + if (!irt) + return NULL; + + irt->rt = *rt; + irt->vnis = list_new (); + + /* Add to hash */ + if (!hash_get(bgp->import_rt_hash, irt, hash_alloc_intern)) + { + XFREE(MTYPE_BGP_EVPN_IMPORT_RT, irt); + return NULL; + } + + return irt; +} + +/* + * Free the import rt node + */ +static void +import_rt_free (struct bgp *bgp, struct irt_node *irt) +{ + hash_release(bgp->import_rt_hash, irt); + XFREE(MTYPE_BGP_EVPN_IMPORT_RT, irt); +} + +/* + * Function to lookup Import RT node - used to map a RT to set of + * VNIs importing routes with that RT. + */ +static struct irt_node * +lookup_import_rt (struct bgp *bgp, struct ecommunity_val *rt) +{ + struct irt_node *irt; + struct irt_node tmp; + + memset(&tmp, 0, sizeof(struct irt_node)); + memcpy(&tmp.rt, rt, ECOMMUNITY_SIZE); + irt = hash_lookup(bgp->import_rt_hash, &tmp); + return irt; +} + +/* + * Is specified VNI present on the RT's list of "importing" VNIs? + */ +static int +is_vni_present_in_irt_vnis (struct list *vnis, struct bgpevpn *vpn) +{ + struct listnode *node, *nnode; + struct bgpevpn *tmp_vpn; + + for (ALL_LIST_ELEMENTS (vnis, node, nnode, tmp_vpn)) + { + if (tmp_vpn == vpn) + return 1; + } + + return 0; +} + +/* + * Compare Route Targets. + */ +static int +evpn_route_target_cmp (struct ecommunity *ecom1, struct ecommunity *ecom2) +{ + if (ecom1 && !ecom2) + return -1; + + if (!ecom1 && ecom2) + return 1; + + if (!ecom1 && !ecom2) + return 0; + + if (ecom1->str && !ecom2->str) + return -1; + + if (!ecom1->str && ecom2->str) + return 1; + + if (!ecom1->str && !ecom2->str) + return 0; + + return strcmp(ecom1->str, ecom2->str); +} + +/* + * Mask off global-admin field of specified extended community (RT), + * just retain the local-admin field. + */ +static inline void +mask_ecom_global_admin (struct ecommunity_val *dst, + struct ecommunity_val *src) +{ + u_char type; + + type = src->val[0]; + dst->val[0] = 0; + if (type == ECOMMUNITY_ENCODE_AS) + { + dst->val[2] = dst->val[3] = 0; + } + else if (type == ECOMMUNITY_ENCODE_AS4 || + type == ECOMMUNITY_ENCODE_IP) + { + dst->val[2] = dst->val[3] = 0; + dst->val[4] = dst->val[5] = 0; + } +} + +/* + * Map one RT to specified VNI. + */ +static void +map_vni_to_rt (struct bgp *bgp, struct bgpevpn *vpn, + struct ecommunity_val *eval) +{ + struct irt_node *irt; + struct ecommunity_val eval_tmp; + + /* If using "automatic" RT, we only care about the local-admin sub-field. + * This is to facilitate using VNI as the RT for EBGP peering too. + */ + memcpy (&eval_tmp, eval, ECOMMUNITY_SIZE); + if (!is_import_rt_configured (vpn)) + mask_ecom_global_admin (&eval_tmp, eval); + + irt = lookup_import_rt (bgp, &eval_tmp); + if (irt && irt->vnis) + if (is_vni_present_in_irt_vnis (irt->vnis, vpn)) + /* Already mapped. */ + return; + + if (!irt) + { + irt = import_rt_new (bgp, &eval_tmp); + assert (irt); + } + + /* Add VNI to the hash list for this RT. */ + listnode_add (irt->vnis, vpn); +} + +/* + * Unmap specified VNI from specified RT. If there are no other + * VNIs for this RT, then the RT hash is deleted. + */ +static void +unmap_vni_from_rt (struct bgp *bgp, struct bgpevpn *vpn, + struct irt_node *irt) +{ + /* Delete VNI from hash list for this RT. */ + listnode_delete (irt->vnis, vpn); + if (!listnode_head (irt->vnis)) + { + list_free (irt->vnis); + import_rt_free (bgp, irt); + } +} + +/* + * Create RT extended community automatically from passed information: + * of the form AS:VNI. + * NOTE: We use only the lower 16 bits of the AS. This is sufficient as + * the need is to get a RT value that will be unique across different + * VNIs but the same across routers (in the same AS) for a particular + * VNI. + */ +static void +form_auto_rt (struct bgp *bgp, struct bgpevpn *vpn, + struct list *rtl) +{ + struct ecommunity_val eval; + struct ecommunity *ecomadd; + + encode_route_target_as ((bgp->as & 0xFFFF), vpn->vni, &eval); + + ecomadd = ecommunity_new (); + ecommunity_add_val (ecomadd, &eval); + listnode_add_sort (rtl, ecomadd); +} + +/* + * Derive RD and RT for a VNI automatically. Invoked at the time of + * creation of a VNI. + */ +static void +derive_rd_rt_for_vni (struct bgp *bgp, struct bgpevpn *vpn) +{ + bgp_evpn_derive_auto_rd (bgp, vpn); + bgp_evpn_derive_auto_rt_import (bgp, vpn); + bgp_evpn_derive_auto_rt_export (bgp, vpn); +} + +/* + * Add (update) or delete MACIP from zebra. + */ +static int +bgp_zebra_send_remote_macip (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, + struct in_addr remote_vtep_ip, + int add) +{ + struct stream *s; + int ipa_len; + char buf1[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + char buf3[INET6_ADDRSTRLEN]; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return 0; + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) + return 0; + + s = zclient->obuf; + stream_reset (s); + + zclient_create_header (s, add ? ZEBRA_REMOTE_MACIP_ADD : ZEBRA_REMOTE_MACIP_DEL, + bgp->vrf_id); + stream_putl(s, vpn->vni); + stream_put (s, &p->prefix.mac.octet, ETHER_ADDR_LEN); /* Mac Addr */ + /* IP address length and IP address, if any. */ + if (IS_EVPN_PREFIX_IPADDR_NONE(p)) + stream_putl(s, 0); + else + { + ipa_len = IS_EVPN_PREFIX_IPADDR_V4(p) ? + IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN; + stream_putl(s, ipa_len); + stream_put (s, &p->prefix.ip.ip.addr, ipa_len); + } + stream_put_in_addr(s, &remote_vtep_ip); + + stream_putw_at (s, 0, stream_get_endp (s)); + + if (bgp_debug_zebra (NULL)) + zlog_debug("Tx %s MACIP, VNI %u MAC %s IP %s remote VTEP %s", + add ? "ADD" : "DEL", vpn->vni, + prefix_mac2str (&p->prefix.mac, buf1, sizeof(buf1)), + ipaddr2str (&p->prefix.ip, buf3, sizeof(buf3)), + inet_ntop(AF_INET, &remote_vtep_ip, buf2, sizeof(buf2))); + + return zclient_send_message(zclient); +} + +/* + * Add (update) or delete remote VTEP from zebra. + */ +static int +bgp_zebra_send_remote_vtep (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, int add) +{ + struct stream *s; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return 0; + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) + return 0; + + s = zclient->obuf; + stream_reset (s); + + zclient_create_header (s, add ? ZEBRA_REMOTE_VTEP_ADD : ZEBRA_REMOTE_VTEP_DEL, + bgp->vrf_id); + stream_putl(s, vpn->vni); + if (IS_EVPN_PREFIX_IPADDR_V4(p)) + stream_put_in_addr(s, &p->prefix.ip.ipaddr_v4); + else if (IS_EVPN_PREFIX_IPADDR_V6(p)) + { + zlog_err ("Bad remote IP when trying to %s remote VTEP for VNI %u", + add ? "ADD" : "DEL", vpn->vni); + return -1; + } + + stream_putw_at (s, 0, stream_get_endp (s)); + + if (bgp_debug_zebra (NULL)) + zlog_debug("Tx %s Remote VTEP, VNI %u remote VTEP %s", + add ? "ADD" : "DEL", vpn->vni, + inet_ntoa (p->prefix.ip.ipaddr_v4)); + + return zclient_send_message(zclient); +} + +/* + * Build extended communities for EVPN route. RT and ENCAP are + * applicable to all routes. + */ +static void +build_evpn_route_extcomm (struct bgpevpn *vpn, struct attr *attr) +{ + struct attr_extra *attre; + struct ecommunity ecom_encap; + struct ecommunity_val eval; + bgp_encap_types tnl_type; + struct listnode *node, *nnode; + struct ecommunity *ecom; + + attre = bgp_attr_extra_get (attr); + + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset (&ecom_encap, 0, sizeof (ecom_encap)); + encode_encap_extcomm (tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (u_int8_t *)eval.val; + + /* Add Encap */ + attre->ecommunity = ecommunity_dup (&ecom_encap); + + /* Add the export RTs */ + for (ALL_LIST_ELEMENTS (vpn->export_rtl, node, nnode, ecom)) + attre->ecommunity = ecommunity_merge (attre->ecommunity, ecom); + + attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES); +} + +/* + * Add MAC mobility extended community to attribute. + */ +static void +add_mac_mobility_to_attr (u_int32_t seq_num, struct attr *attr) +{ + struct attr_extra *attre; + struct ecommunity ecom_tmp; + struct ecommunity_val eval; + struct ecommunity *ecom_mm; + int i; + u_int8_t *pnt; + int type = 0; + int sub_type = 0; + + attre = bgp_attr_extra_get (attr); + + /* Build MM */ + encode_mac_mobility_extcomm (0, seq_num, &eval); + + /* Find current MM ecommunity */ + ecom_mm = NULL; + + if (attre->ecommunity) + { + for (i = 0; i < attre->ecommunity->size; i++) + { + pnt = attre->ecommunity->val + (i * 8); + type = *pnt++; + sub_type = *pnt++; + + if (type == ECOMMUNITY_ENCODE_EVPN && sub_type == ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY) + { + ecom_mm = (struct ecommunity*) attre->ecommunity->val + (i * 8); + break; + } + } + } + + /* Update the existing MM ecommunity */ + if (ecom_mm) + { + memcpy(ecom_mm->val, eval.val, sizeof(char) * ECOMMUNITY_SIZE); + } + + /* Add MM to existing */ + else + { + memset (&ecom_tmp, 0, sizeof (ecom_tmp)); + ecom_tmp.size = 1; + ecom_tmp.val = (u_int8_t *)eval.val; + + attre->ecommunity = ecommunity_merge (attre->ecommunity, &ecom_tmp); + } +} + +/* Install EVPN route into zebra. */ +static int +evpn_zebra_install (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, struct in_addr remote_vtep_ip) +{ + int ret; + + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) + ret = bgp_zebra_send_remote_macip (bgp, vpn, p, remote_vtep_ip, 1); + else + ret = bgp_zebra_send_remote_vtep (bgp, vpn, p, 1); + + return ret; +} + +/* Uninstall EVPN route from zebra. */ +static int +evpn_zebra_uninstall (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, struct in_addr remote_vtep_ip) +{ + int ret; + + if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) + ret = bgp_zebra_send_remote_macip (bgp, vpn, p, remote_vtep_ip, 0); + else + ret = bgp_zebra_send_remote_vtep (bgp, vpn, p, 0); + + return ret; +} + +/* + * Due to MAC mobility, the prior "local" best route has been supplanted + * by a "remote" best route. The prior route has to be deleted and withdrawn + * from peers. + */ +static void +evpn_delete_old_local_route (struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_node *rn, struct bgp_info *old_local) +{ + struct bgp_node *global_rn; + struct bgp_info *ri; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + + /* Locate route node in the global EVPN routing table. Note that + * this table is a 2-level tree (RD-level + Prefix-level) similar to + * L3VPN routes. + */ + global_rn = bgp_afi_node_lookup (bgp->rib[afi][safi], afi, safi, + (struct prefix *)&rn->p, &vpn->prd); + if (global_rn) + { + /* Delete route entry in the global EVPN table. */ + delete_evpn_route_entry (bgp, vpn, afi, safi, global_rn, &ri); + + /* Schedule for processing - withdraws to peers happen from + * this table. + */ + if (ri) + bgp_process (bgp, global_rn, afi, safi); + bgp_unlock_node (global_rn); + } + + /* Delete route entry in the VNI route table, caller to remove. */ + bgp_info_delete (rn, old_local); +} + +/* + * Calculate the best path for an EVPN route. Install/update best path in zebra, + * if appropriate. + */ +static int +evpn_route_select_install (struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_node *rn) +{ + struct bgp_info *old_select, *new_select; + struct bgp_info_pair old_and_new; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + int ret = 0; + + /* Compute the best path. */ + bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], + &old_and_new, afi, safi); + old_select = old_and_new.old; + new_select = old_and_new.new; + + /* If the best path hasn't changed - see if there is still something to update + * to zebra RIB. + */ + if (old_select && old_select == new_select && + old_select->type == ZEBRA_ROUTE_BGP && + old_select->sub_type == BGP_ROUTE_NORMAL && + !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) && + !CHECK_FLAG(old_select->flags, BGP_INFO_ATTR_CHANGED) && + !bgp->addpath_tx_used[afi][safi]) + { + if (bgp_zebra_has_route_changed (rn, old_select)) + ret = evpn_zebra_install (bgp, vpn, (struct prefix_evpn *)&rn->p, + old_select->attr->nexthop); + UNSET_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG); + bgp_zebra_clear_route_change_flags (rn); + return ret; + } + + /* If the user did a "clear" this flag will be set */ + UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR); + + /* bestpath has changed; update relevant fields and install or uninstall + * into the zebra RIB. + */ + if (old_select || new_select) + bgp_bump_version(rn); + + if (old_select) + bgp_info_unset_flag (rn, old_select, BGP_INFO_SELECTED); + if (new_select) + { + bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED); + bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); + UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); + } + + if (new_select + && new_select->type == ZEBRA_ROUTE_BGP + && new_select->sub_type == BGP_ROUTE_NORMAL) + { + ret = evpn_zebra_install (bgp, vpn, (struct prefix_evpn *) &rn->p, + new_select->attr->nexthop); + /* If an old best existed and it was a "local" route, the only reason + * it would be supplanted is due to MAC mobility procedures. So, we + * need to do an implicit delete and withdraw that route from peers. + */ + if (old_select + && old_select->peer == bgp->peer_self + && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_STATIC) + evpn_delete_old_local_route (bgp, vpn, rn, old_select); + } + else + { + if (old_select + && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_NORMAL) + ret = evpn_zebra_uninstall (bgp, vpn, (struct prefix_evpn *) &rn->p, + old_select->attr->nexthop); + } + + /* Clear any route change flags. */ + bgp_zebra_clear_route_change_flags (rn); + + /* Reap old select bgp_info, if it has been removed */ + if (old_select && CHECK_FLAG (old_select->flags, BGP_INFO_REMOVED)) + bgp_info_reap (rn, old_select); + + return ret; +} + +/* + * Create or update EVPN route entry. This could be in the VNI route table + * or the global route table. + */ +static int +update_evpn_route_entry (struct bgp *bgp, struct bgpevpn *vpn, afi_t afi, + safi_t safi, struct bgp_node *rn, struct attr *attr, + int add, int vni_table, struct bgp_info **ri) +{ + struct bgp_info *tmp_ri; + struct bgp_info *local_ri, *remote_ri; + struct attr *attr_new; + mpls_label_t label = MPLS_INVALID_LABEL; + int route_change = 1; + + *ri = NULL; + + /* See if this is an update of an existing route, or a new add. Also, + * identify if already known from remote, and if so, the one with the + * highest sequence number; this is only when adding to the VNI routing + * table. + */ + local_ri = remote_ri = NULL; + for (tmp_ri = rn->info; tmp_ri; tmp_ri = tmp_ri->next) + { + if (tmp_ri->peer == bgp->peer_self + && tmp_ri->type == ZEBRA_ROUTE_BGP + && tmp_ri->sub_type == BGP_ROUTE_STATIC) + local_ri = tmp_ri; + if (vni_table) + { + if (tmp_ri->type == ZEBRA_ROUTE_BGP + && tmp_ri->sub_type == BGP_ROUTE_NORMAL + && CHECK_FLAG (tmp_ri->flags, BGP_INFO_VALID)) + { + if (!remote_ri) + remote_ri = tmp_ri; + else if (mac_mobility_seqnum (tmp_ri->attr) > + mac_mobility_seqnum (remote_ri->attr)) + remote_ri = tmp_ri; + } + } + } + + /* If route doesn't exist already, create a new one, if told to. + * Otherwise act based on whether the attributes of the route have + * changed or not. + */ + if (!local_ri && !add) + return 0; + + if (!local_ri) + { + /* When learnt locally for the first time but already known from + * remote, we have to initiate appropriate MAC mobility steps. This + * is applicable when updating the VNI routing table. + */ + if (remote_ri) + { + u_int32_t cur_seqnum; + + /* Add MM extended community to route. */ + cur_seqnum = mac_mobility_seqnum (remote_ri->attr); + add_mac_mobility_to_attr (cur_seqnum + 1, attr); + } + + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern (attr); + + /* Extract MAC mobility sequence number, if any. */ + attr_new->extra->mm_seqnum = bgp_attr_mac_mobility_seqnum (attr_new); + + /* Create new route with its attribute. */ + tmp_ri = info_make (ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0, + bgp->peer_self, attr_new, rn); + SET_FLAG (tmp_ri->flags, BGP_INFO_VALID); + bgp_info_extra_get(tmp_ri); + + /* The VNI goes into the 'label' field of the route */ + vni2label (vpn->vni, &label); + + memcpy (&tmp_ri->extra->label, &label, BGP_LABEL_BYTES); + bgp_info_add (rn, tmp_ri); + } + else + { + tmp_ri = local_ri; + if (attrhash_cmp (tmp_ri->attr, attr) && + !CHECK_FLAG(tmp_ri->flags, BGP_INFO_REMOVED)) + route_change = 0; + else + { + /* The attribute has changed. */ + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern (attr); + bgp_info_set_flag (rn, tmp_ri, BGP_INFO_ATTR_CHANGED); + + /* Restore route, if needed. */ + if (CHECK_FLAG(tmp_ri->flags, BGP_INFO_REMOVED)) + bgp_info_restore(rn, tmp_ri); + + /* Unintern existing, set to new. */ + bgp_attr_unintern (&tmp_ri->attr); + tmp_ri->attr = attr_new; + tmp_ri->uptime = bgp_clock (); + } + } + + /* Return back the route entry. */ + *ri = tmp_ri; + return route_change; +} + +/* + * Create or update EVPN route (of type based on prefix) for specified VNI + * and schedule for processing. + */ +static int +update_evpn_route (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + struct bgp_node *rn; + struct attr attr; + struct attr *attr_new; + struct bgp_info *ri; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + int route_change; + + memset (&attr, 0, sizeof (struct attr)); + + /* Build path-attribute for this route. */ + bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); + attr.nexthop = vpn->originator_ip; + attr.extra->mp_nexthop_global_in = vpn->originator_ip; + attr.extra->mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + /* Set up RT and ENCAP extended community. */ + build_evpn_route_extcomm (vpn, &attr); + + /* First, create (or fetch) route node within the VNI. */ + /* NOTE: There is no RD here. */ + rn = bgp_node_get (vpn->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + route_change = update_evpn_route_entry (bgp, vpn, afi, safi, + rn, &attr, 1, 1, &ri); + assert (ri); + attr_new = ri->attr; + + /* Perform route selection; this is just to set the flags correctly + * as local route in the VNI always wins. + */ + evpn_route_select_install (bgp, vpn, rn); + bgp_unlock_node (rn); + + /* If this is a new route or some attribute has changed, export the + * route to the global table. The route will be advertised to peers + * from there. Note that this table is a 2-level tree (RD-level + + * Prefix-level) similar to L3VPN routes. + */ + if (route_change) + { + struct bgp_info *global_ri; + + rn = bgp_afi_node_get (bgp->rib[afi][safi], afi, safi, + (struct prefix *)p, &vpn->prd); + update_evpn_route_entry (bgp, vpn, afi, safi, rn, + attr_new, 1, 0, &global_ri); + + /* Schedule for processing and unlock node. */ + bgp_process (bgp, rn, afi, safi); + bgp_unlock_node (rn); + } + + /* Unintern temporary. */ + aspath_unintern (&attr.aspath); + bgp_attr_extra_free (&attr); + + return 0; +} + +/* + * Delete EVPN route entry. This could be in the VNI route table + * or the global route table. + */ +static void +delete_evpn_route_entry (struct bgp *bgp, struct bgpevpn *vpn, + afi_t afi, safi_t safi, struct bgp_node *rn, + struct bgp_info **ri) +{ + struct bgp_info *tmp_ri; + + *ri = NULL; + + /* Now, find matching route. */ + for (tmp_ri = rn->info; tmp_ri; tmp_ri = tmp_ri->next) + if (tmp_ri->peer == bgp->peer_self + && tmp_ri->type == ZEBRA_ROUTE_BGP + && tmp_ri->sub_type == BGP_ROUTE_STATIC) + break; + + *ri = tmp_ri; + + /* Mark route for delete. */ + if (tmp_ri) + bgp_info_delete (rn, tmp_ri); +} + +/* + * Delete EVPN route (of type based on prefix) for specified VNI and + * schedule for processing. + */ +static int +delete_evpn_route (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + struct bgp_node *rn, *global_rn; + struct bgp_info *ri; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + + /* First, locate the route node within the VNI. If it doesn't exist, there + * is nothing further to do. + */ + /* NOTE: There is no RD here. */ + rn = bgp_node_lookup (vpn->route_table, (struct prefix *)p); + if (!rn) + return 0; + + /* Next, locate route node in the global EVPN routing table. Note that + * this table is a 2-level tree (RD-level + Prefix-level) similar to + * L3VPN routes. + */ + global_rn = bgp_afi_node_lookup (bgp->rib[afi][safi], afi, safi, + (struct prefix *)p, &vpn->prd); + if (global_rn) + { + /* Delete route entry in the global EVPN table. */ + delete_evpn_route_entry (bgp, vpn, afi, safi, global_rn, &ri); + + /* Schedule for processing - withdraws to peers happen from + * this table. + */ + if (ri) + bgp_process (bgp, global_rn, afi, safi); + bgp_unlock_node (global_rn); + } + + /* Delete route entry in the VNI route table. This can just be removed. */ + delete_evpn_route_entry (bgp, vpn, afi, safi, rn, &ri); + if (ri) + bgp_info_reap (rn, ri); + bgp_unlock_node (rn); + + return 0; +} + +/* + * Update all type-2 (MACIP) local routes for this VNI - these should also + * be scheduled for advertise to peers. + */ +static int +update_all_type2_routes (struct bgp *bgp, struct bgpevpn *vpn) +{ + afi_t afi; + safi_t safi; + struct bgp_node *rn; + struct bgp_info *ri; + struct attr attr; + struct attr *attr_new; + + afi = AFI_L2VPN; + safi = SAFI_EVPN; + memset (&attr, 0, sizeof (struct attr)); + + /* Build path-attribute - all type-2 routes for this VNI will share the + * same path attribute. + */ + bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); + attr.nexthop = vpn->originator_ip; + attr.extra->mp_nexthop_global_in = vpn->originator_ip; + attr.extra->mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + /* Set up RT, ENCAP extended community. */ + build_evpn_route_extcomm (vpn, &attr); + + /* Walk this VNI's route table and update local type-2 routes. For any + * routes updated, update corresponding entry in the global table too. + */ + for (rn = bgp_table_top (vpn->route_table); rn; rn = bgp_route_next (rn)) + { + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + struct bgp_node *rd_rn; + struct bgp_info *global_ri; + + if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + continue; + + update_evpn_route_entry (bgp, vpn, afi, safi, rn, &attr, 0, 1, &ri); + + /* If a local route exists for this prefix, we need to update + * the global routing table too. + */ + if (!ri) + continue; + + /* Perform route selection; this is just to set the flags correctly + * as local route in the VNI always wins. + */ + evpn_route_select_install (bgp, vpn, rn); + + attr_new = ri->attr; + + /* Update route in global routing table. */ + rd_rn = bgp_afi_node_get (bgp->rib[afi][safi], afi, safi, + (struct prefix *)evp, &vpn->prd); + assert (rd_rn); + update_evpn_route_entry (bgp, vpn, afi, safi, rd_rn, + attr_new, 0, 0, &global_ri); + + /* Schedule for processing and unlock node. */ + bgp_process (bgp, rd_rn, afi, safi); + bgp_unlock_node (rd_rn); + } + + /* Unintern temporary. */ + aspath_unintern (&attr.aspath); + bgp_attr_extra_free (&attr); + + return 0; +} + +/* + * Delete all type-2 (MACIP) local routes for this VNI - only from the + * global routing table. These are also scheduled for withdraw from peers. + */ +static int +delete_global_type2_routes (struct bgp *bgp, struct bgpevpn *vpn) +{ + afi_t afi; + safi_t safi; + struct bgp_node *rdrn, *rn; + struct bgp_table *table; + struct bgp_info *ri; + + afi = AFI_L2VPN; + safi = SAFI_EVPN; + + rdrn = bgp_node_lookup (bgp->rib[afi][safi], (struct prefix *) &vpn->prd); + if (rdrn && rdrn->info) + { + table = (struct bgp_table *)rdrn->info; + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + { + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + + if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + continue; + + delete_evpn_route_entry (bgp, vpn, afi, safi, rn, &ri); + if (ri) + bgp_process (bgp, rn, afi, safi); + } + } + + /* Unlock RD node. */ + if (rdrn) + bgp_unlock_node (rdrn); + + return 0; +} + +/* + * Delete all type-2 (MACIP) local routes for this VNI - from the global + * table as well as the per-VNI route table. + */ +static int +delete_all_type2_routes (struct bgp *bgp, struct bgpevpn *vpn) +{ + afi_t afi; + safi_t safi; + struct bgp_node *rn; + struct bgp_info *ri; + + afi = AFI_L2VPN; + safi = SAFI_EVPN; + + /* First, walk the global route table for this VNI's type-2 local routes. + * EVPN routes are a 2-level table, first get the RD table. + */ + delete_global_type2_routes (bgp, vpn); + + /* Next, walk this VNI's route table and delete local type-2 routes. */ + for (rn = bgp_table_top (vpn->route_table); rn; rn = bgp_route_next (rn)) + { + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + + if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + continue; + + delete_evpn_route_entry (bgp, vpn, afi, safi, rn, &ri); + + /* Route entry in local table gets deleted immediately. */ + if (ri) + bgp_info_reap (rn, ri); + } + + return 0; +} + +/* + * Delete all routes in the per-VNI route table. + */ +static int +delete_all_vni_routes (struct bgp *bgp, struct bgpevpn *vpn) +{ + struct bgp_node *rn; + struct bgp_info *ri, *nextri; + + /* Walk this VNI's route table and delete all routes. */ + for (rn = bgp_table_top (vpn->route_table); rn; rn = bgp_route_next (rn)) + { + for (ri = rn->info; (ri != NULL) && (nextri = ri->next, 1); ri = nextri) + { + bgp_info_delete (rn, ri); + bgp_info_reap (rn, ri); + } + } + + return 0; +} + +/* + * Update (and advertise) local routes for a VNI. Invoked upon the VNI + * export RT getting modified or change to tunnel IP. Note that these + * situations need the route in the per-VNI table as well as the global + * table to be updated (as attributes change). + */ +static int +update_routes_for_vni (struct bgp *bgp, struct bgpevpn *vpn) +{ + int ret; + struct prefix_evpn p; + + /* Update and advertise the type-3 route (only one) followed by the + * locally learnt type-2 routes (MACIP) - for this VNI. + */ + build_evpn_type3_prefix (&p, vpn->originator_ip); + ret = update_evpn_route (bgp, vpn, &p); + if (ret) + return ret; + + return update_all_type2_routes (bgp, vpn); +} + +/* + * Delete (and withdraw) local routes for specified VNI from the global + * table and per-VNI table. After this, remove all other routes from + * the per-VNI table. Invoked upon the VNI being deleted or EVPN + * (advertise-all-vni) being disabled. + */ +static int +delete_routes_for_vni (struct bgp *bgp, struct bgpevpn *vpn) +{ + int ret; + struct prefix_evpn p; + + /* Delete and withdraw locally learnt type-2 routes (MACIP) + * followed by type-3 routes (only one) - for this VNI. + */ + ret = delete_all_type2_routes (bgp, vpn); + if (ret) + return ret; + + build_evpn_type3_prefix (&p, vpn->originator_ip); + ret = delete_evpn_route (bgp, vpn, &p); + if (ret) + return ret; + + /* Delete all routes from the per-VNI table. */ + return delete_all_vni_routes (bgp, vpn); +} + +/* + * There is a tunnel endpoint IP address change for this VNI, + * need to re-advertise routes with the new nexthop. + */ +static int +handle_tunnel_ip_change (struct bgp *bgp, struct bgpevpn *vpn, + struct in_addr originator_ip) +{ + struct prefix_evpn p; + + /* Need to withdraw type-3 route as the originator IP is part + * of the key. + */ + build_evpn_type3_prefix (&p, vpn->originator_ip); + delete_evpn_route (bgp, vpn, &p); + + /* Update the tunnel IP and re-advertise all routes for this VNI. */ + vpn->originator_ip = originator_ip; + return update_routes_for_vni (bgp, vpn); +} + +/* + * Install route entry into the VNI routing table and invoke route selection. + */ +static int +install_evpn_route_entry (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, struct bgp_info *parent_ri) +{ + struct bgp_node *rn; + struct bgp_info *ri; + struct attr *attr_new; + int ret; + + /* Create (or fetch) route within the VNI. */ + /* NOTE: There is no RD here. */ + rn = bgp_node_get (vpn->route_table, (struct prefix *)p); + + /* Check if route entry is already present. */ + for (ri = rn->info; ri; ri = ri->next) + if (ri->extra && + (struct bgp_info *)ri->extra->parent == parent_ri) + break; + + if (!ri) + { + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern (parent_ri->attr); + + /* Create new route with its attribute. */ + ri = info_make (parent_ri->type, parent_ri->sub_type, 0, + parent_ri->peer, attr_new, rn); + SET_FLAG (ri->flags, BGP_INFO_VALID); + bgp_info_extra_get(ri); + ri->extra->parent = parent_ri; + if (parent_ri->extra) + memcpy (&ri->extra->label, &parent_ri->extra->label, BGP_LABEL_BYTES); + bgp_info_add (rn, ri); + } + else + { + if (attrhash_cmp (ri->attr, parent_ri->attr) && + !CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) + { + bgp_unlock_node (rn); + return 0; + } + /* The attribute has changed. */ + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern (parent_ri->attr); + + /* Restore route, if needed. */ + if (CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) + bgp_info_restore(rn, ri); + + /* Mark if nexthop has changed. */ + if (!IPV4_ADDR_SAME (&ri->attr->nexthop, &attr_new->nexthop)) + SET_FLAG(ri->flags, BGP_INFO_IGP_CHANGED); + + /* Unintern existing, set to new. */ + bgp_attr_unintern (&ri->attr); + ri->attr = attr_new; + ri->uptime = bgp_clock (); + } + + /* Perform route selection and update zebra, if required. */ + ret = evpn_route_select_install (bgp, vpn, rn); + + return ret; +} + +/* + * Uninstall route entry from the VNI routing table and send message + * to zebra, if appropriate. + */ +static int +uninstall_evpn_route_entry (struct bgp *bgp, struct bgpevpn *vpn, + struct prefix_evpn *p, struct bgp_info *parent_ri) +{ + struct bgp_node *rn; + struct bgp_info *ri; + int ret; + + /* Locate route within the VNI. */ + /* NOTE: There is no RD here. */ + rn = bgp_node_lookup (vpn->route_table, (struct prefix *)p); + if (!rn) + return 0; + + /* Find matching route entry. */ + for (ri = rn->info; ri; ri = ri->next) + if (ri->extra && + (struct bgp_info *)ri->extra->parent == parent_ri) + break; + + if (!ri) + return 0; + + /* Mark entry for deletion */ + bgp_info_delete (rn, ri); + + /* Perform route selection and update zebra, if required. */ + ret = evpn_route_select_install (bgp, vpn, rn); + + /* Unlock route node. */ + bgp_unlock_node (rn); + + return ret; +} + +/* + * Given a route entry and a VNI, see if this route entry should be + * imported into the VNI i.e., RTs match. + */ +static int +is_route_matching_for_vni (struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_info *ri) +{ + struct attr *attr = ri->attr; + struct ecommunity *ecom; + int i; + + assert (attr); + /* Route should have valid RT to be even considered. */ + if (!(attr->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES))) + return 0; + + ecom = attr->extra->ecommunity; + if (!ecom || !ecom->size) + return 0; + + /* For each extended community RT, see if it matches this VNI. If any RT + * matches, we're done. + */ + for (i = 0; i < ecom->size; i++) + { + u_char *pnt; + u_char type, sub_type; + struct ecommunity_val *eval; + struct ecommunity_val eval_tmp; + struct irt_node *irt; + + /* Only deal with RTs */ + pnt = (ecom->val + (i * ECOMMUNITY_SIZE)); + eval = (struct ecommunity_val *) (ecom->val + (i * ECOMMUNITY_SIZE)); + type = *pnt++; + sub_type = *pnt++; + if (sub_type != ECOMMUNITY_ROUTE_TARGET) + continue; + + /* See if this RT matches specified VNIs import RTs */ + irt = lookup_import_rt (bgp, eval); + if (irt && irt->vnis) + if (is_vni_present_in_irt_vnis (irt->vnis, vpn)) + return 1; + + /* Also check for non-exact match. In this, we mask out the AS and + * only check on the local-admin sub-field. This is to facilitate using + * VNI as the RT for EBGP peering too. + */ + irt = NULL; + if (type == ECOMMUNITY_ENCODE_AS || + type == ECOMMUNITY_ENCODE_AS4 || + type == ECOMMUNITY_ENCODE_IP) + { + memcpy (&eval_tmp, eval, ECOMMUNITY_SIZE); + mask_ecom_global_admin (&eval_tmp, eval); + irt = lookup_import_rt (bgp, &eval_tmp); + } + if (irt && irt->vnis) + if (is_vni_present_in_irt_vnis (irt->vnis, vpn)) + return 1; + } + + return 0; +} + +/* + * Install or uninstall routes of specified type that are appropriate for this + * particular VNI. + */ +static int +install_uninstall_routes_for_vni (struct bgp *bgp, struct bgpevpn *vpn, + bgp_evpn_route_type rtype, int install) +{ + afi_t afi; + safi_t safi; + struct bgp_node *rd_rn, *rn; + struct bgp_table *table; + struct bgp_info *ri; + int ret; + + afi = AFI_L2VPN; + safi = SAFI_EVPN; + + /* Walk entire global routing table and evaluate routes which could be + * imported into this VPN. Note that we cannot just look at the routes for + * the VNI's RD - remote routes applicable for this VNI could have any RD. + */ + /* EVPN routes are a 2-level table. */ + for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn; rd_rn = bgp_route_next (rd_rn)) + { + table = (struct bgp_table *)(rd_rn->info); + if (!table) + continue; + + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + { + struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; + + if (evp->prefix.route_type != rtype) + continue; + + for (ri = rn->info; ri; ri = ri->next) + { + /* Consider "valid" remote routes applicable for this VNI. */ + if (!(CHECK_FLAG (ri->flags, BGP_INFO_VALID) + && ri->type == ZEBRA_ROUTE_BGP + && ri->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (is_route_matching_for_vni (bgp, vpn, ri)) + { + if (install) + ret = install_evpn_route_entry (bgp, vpn, evp, ri); + else + ret = uninstall_evpn_route_entry (bgp, vpn, evp, ri); + + if (ret) + { + zlog_err ("%u: Failed to %s EVPN %s route in VNI %u", + bgp->vrf_id, install ? "install" : "uninstall", + rtype == BGP_EVPN_MAC_IP_ROUTE ? \ + "MACIP" : "IMET", vpn->vni); + return ret; + } + } + } + } + } + + return 0; +} + +/* + * Install any existing remote routes applicable for this VNI into its + * routing table. This is invoked when a VNI becomes "live" or its Import + * RT is changed. + */ +static int +install_routes_for_vni (struct bgp *bgp, struct bgpevpn *vpn) +{ + int ret; + + /* Install type-3 routes followed by type-2 routes - the ones applicable + * for this VNI. + */ + ret = install_uninstall_routes_for_vni (bgp, vpn, BGP_EVPN_IMET_ROUTE, 1); + if (ret) + return ret; + + return install_uninstall_routes_for_vni (bgp, vpn, BGP_EVPN_MAC_IP_ROUTE, 1); +} + +/* + * Install or uninstall route in matching VNIs (list). + */ +static int +install_uninstall_route_in_vnis (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix_evpn *evp, struct bgp_info *ri, + struct list *vnis, int install) +{ + struct bgpevpn *vpn; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (vnis, node, nnode, vpn)) + { + int ret; + + if (!is_vni_live (vpn)) + continue; + + if (install) + ret = install_evpn_route_entry (bgp, vpn, evp, ri); + else + ret = uninstall_evpn_route_entry (bgp, vpn, evp, ri); + + if (ret) + { + zlog_err ("%u: Failed to %s EVPN %s route in VNI %u", + bgp->vrf_id, install ? "install" : "uninstall", + evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE ? \ + "MACIP" : "IMET", vpn->vni); + return ret; + } + } + + return 0; +} + +/* + * Install or uninstall route for appropriate VNIs. + */ +static int +install_uninstall_evpn_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix *p, struct bgp_info *ri, + int import) +{ + struct prefix_evpn *evp = (struct prefix_evpn *)p; + struct attr *attr = ri->attr; + struct ecommunity *ecom; + int i; + + assert (attr); + + /* Only type-2 and type-3 routes go into a L2 VNI. */ + if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE || + evp->prefix.route_type == BGP_EVPN_IMET_ROUTE)) + return 0; + + /* If we don't have Route Target, nothing much to do. */ + if (!(attr->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES))) + return 0; + + ecom = attr->extra->ecommunity; + if (!ecom || !ecom->size) + return -1; + + /* For each extended community RT, see which VNIs match and import + * the route into matching VNIs. + */ + for (i = 0; i < ecom->size; i++) + { + u_char *pnt; + u_char type, sub_type; + struct ecommunity_val *eval; + struct ecommunity_val eval_tmp; + struct irt_node *irt; + + /* Only deal with RTs */ + pnt = (ecom->val + (i * ECOMMUNITY_SIZE)); + eval = (struct ecommunity_val *) (ecom->val + (i * ECOMMUNITY_SIZE)); + type = *pnt++; + sub_type = *pnt++; + if (sub_type != ECOMMUNITY_ROUTE_TARGET) + continue; + + /* Are we interested in this RT? */ + irt = lookup_import_rt (bgp, eval); + if (irt && irt->vnis) + install_uninstall_route_in_vnis (bgp, afi, safi, evp, + ri, irt->vnis, import); + + /* Also check for non-exact match. In this, we mask out the AS and + * only check on the local-admin sub-field. This is to facilitate using + * VNI as the RT for EBGP peering too. + */ + irt = NULL; + if (type == ECOMMUNITY_ENCODE_AS || + type == ECOMMUNITY_ENCODE_AS4 || + type == ECOMMUNITY_ENCODE_IP) + { + memcpy (&eval_tmp, eval, ECOMMUNITY_SIZE); + mask_ecom_global_admin (&eval_tmp, eval); + irt = lookup_import_rt (bgp, &eval_tmp); + } + if (irt && irt->vnis) + install_uninstall_route_in_vnis (bgp, afi, safi, evp, + ri, irt->vnis, import); + } + + return 0; +} + +/* + * Process received EVPN type-2 route (advertise or withdraw). + */ +static int +process_type2_route (struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, u_char *pfx, int psize, + u_int32_t addpath_id) +{ + struct prefix_rd prd; + struct prefix_evpn p; + u_char ipaddr_len; + u_char macaddr_len; + mpls_label_t *label_pnt; + int ret; + + /* Type-2 route should be either 33, 37 or 49 bytes or an + * additional 3 bytes if there is a second label (VNI): + * RD (8), ESI (10), Eth Tag (4), MAC Addr Len (1), + * MAC Addr (6), IP len (1), IP (0, 4 or 16), + * MPLS Lbl1 (3), MPLS Lbl2 (0 or 3) + */ + if (psize != 33 && psize != 37 && psize != 49 && + psize != 36 && psize != 40 && psize != 52) + { + zlog_err ("%u:%s - Rx EVPN Type-2 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy (&prd.val, pfx, 8); + pfx += 8; + + /* Make EVPN prefix. */ + memset (&p, 0, sizeof (struct prefix_evpn)); + p.family = AF_ETHERNET; + p.prefixlen = EVPN_TYPE_2_ROUTE_PREFIXLEN; + p.prefix.route_type = BGP_EVPN_MAC_IP_ROUTE; + + /* Skip over Ethernet Seg Identifier for now. */ + pfx += 10; + + /* Skip over Ethernet Tag for now. */ + pfx += 4; + + /* Get the MAC Addr len */ + macaddr_len = *pfx++; + + /* Get the MAC Addr */ + if (macaddr_len == (ETHER_ADDR_LEN * 8)) + { + memcpy (&p.prefix.mac.octet, pfx, ETHER_ADDR_LEN); + pfx += ETHER_ADDR_LEN; + } + else + { + zlog_err ("%u:%s - Rx EVPN Type-2 NLRI with unsupported MAC address length %d", + peer->bgp->vrf_id, peer->host, macaddr_len); + return -1; + } + + + /* Get the IP. */ + ipaddr_len = *pfx++; + if (ipaddr_len != 0 && + ipaddr_len != IPV4_MAX_BITLEN && + ipaddr_len != IPV6_MAX_BITLEN) + { + zlog_err ("%u:%s - Rx EVPN Type-2 NLRI with unsupported IP address length %d", + peer->bgp->vrf_id, peer->host, ipaddr_len); + return -1; + } + + if (ipaddr_len) + { + ipaddr_len /= 8; /* Convert to bytes. */ + p.prefix.ip.ipa_type = (ipaddr_len == IPV4_MAX_BYTELEN) ? + IPADDR_V4 : IPADDR_V6; + memcpy (&p.prefix.ip.ip.addr, pfx, ipaddr_len); + } + pfx += ipaddr_len; + + /* Get the VNI (in MPLS label field). */ + /* Note: We ignore the second VNI, if any. */ + label_pnt = (mpls_label_t *) pfx; + + /* Process the route. */ + if (attr) + ret = bgp_update (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, label_pnt, 0, NULL); + else + ret = bgp_withdraw (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, label_pnt, NULL); + return ret; +} + +/* + * Process received EVPN type-3 route (advertise or withdraw). + */ +static int +process_type3_route (struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, u_char *pfx, int psize, + u_int32_t addpath_id) +{ + struct prefix_rd prd; + struct prefix_evpn p; + u_char ipaddr_len; + int ret; + + /* Type-3 route should be either 17 or 29 bytes: RD (8), Eth Tag (4), + * IP len (1) and IP (4 or 16). + */ + if (psize != 17 && psize != 29) + { + zlog_err ("%u:%s - Rx EVPN Type-3 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy (&prd.val, pfx, 8); + pfx += 8; + + /* Make EVPN prefix. */ + memset (&p, 0, sizeof (struct prefix_evpn)); + p.family = AF_ETHERNET; + p.prefixlen = EVPN_TYPE_3_ROUTE_PREFIXLEN; + p.prefix.route_type = BGP_EVPN_IMET_ROUTE; + + /* Skip over Ethernet Tag for now. */ + pfx += 4; + + /* Get the IP. */ + ipaddr_len = *pfx++; + if (ipaddr_len == IPV4_MAX_BITLEN) + { + p.prefix.ip.ipa_type = IPADDR_V4; + memcpy (&p.prefix.ip.ip.addr, pfx, IPV4_MAX_BYTELEN); + } + else + { + zlog_err ("%u:%s - Rx EVPN Type-3 NLRI with unsupported IP address length %d", + peer->bgp->vrf_id, peer->host, ipaddr_len); + return -1; + } + + /* Process the route. */ + if (attr) + ret = bgp_update (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, NULL, 0, NULL); + else + ret = bgp_withdraw (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, NULL, NULL); + return ret; +} + +/* + * Process received EVPN type-5 route (advertise or withdraw). + */ +static int +process_type5_route (struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, u_char *pfx, int psize, + u_int32_t addpath_id, int withdraw) +{ + struct prefix_rd prd; + struct prefix_evpn p; + struct bgp_route_evpn evpn; + u_char ippfx_len; + u_int32_t eth_tag; + mpls_label_t *label_pnt; + int ret; + + /* Type-5 route should be 34 or 58 bytes: + * RD (8), ESI (10), Eth Tag (4), IP len (1), IP (4 or 16), + * GW (4 or 16) and VNI (3). + * Note that the IP and GW should both be IPv4 or both IPv6. + */ + if (psize != 34 && psize != 58) + { + zlog_err ("%u:%s - Rx EVPN Type-5 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy (&prd.val, pfx, 8); + pfx += 8; + + /* Make EVPN prefix. */ + memset (&p, 0, sizeof (struct prefix_evpn)); + p.family = AF_ETHERNET; + p.prefix.route_type = BGP_EVPN_IP_PREFIX_ROUTE; + + /* Additional information outside of prefix - ESI and GW IP */ + memset(&evpn, 0, sizeof(evpn)); + + /* Fetch ESI */ + memcpy (&evpn.eth_s_id.val, pfx, 10); + pfx += 10; + + /* Fetch Ethernet Tag. */ + memcpy (ð_tag, pfx, 4); + p.prefix.eth_tag = ntohl (eth_tag); + pfx += 4; + + /* Fetch IP prefix length. */ + ippfx_len = *pfx++; + if (ippfx_len > IPV6_MAX_BITLEN) + { + zlog_err ("%u:%s - Rx EVPN Type-5 NLRI with invalid IP Prefix length %d", + peer->bgp->vrf_id, peer->host, ippfx_len); + return -1; + } + p.prefix.ip_prefix_length = ippfx_len; + + /* Determine IPv4 or IPv6 prefix */ + /* Since the address and GW are from the same family, this just becomes + * a simple check on the total size. + */ + if (psize == 34) + { + SET_IPADDR_V4 (&p.prefix.ip); + memcpy (&p.prefix.ip.ipaddr_v4, pfx, 4); + pfx += 4; + memcpy (&evpn.gw_ip.ipv4, pfx, 4); + pfx += 4; + p.prefixlen = PREFIX_LEN_ROUTE_TYPE_5_IPV4; + } + else + { + SET_IPADDR_V6 (&p.prefix.ip); + memcpy (&p.prefix.ip.ipaddr_v6, pfx, 16); + pfx += 16; + memcpy (&evpn.gw_ip.ipv6, pfx, 16); + pfx += 16; + p.prefixlen = PREFIX_LEN_ROUTE_TYPE_5_IPV6; + } + + label_pnt = (mpls_label_t *) pfx; + + /* Process the route. */ + if (!withdraw) + ret = bgp_update (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, label_pnt, 0, &evpn); + else + ret = bgp_withdraw (peer, (struct prefix *)&p, addpath_id, attr, afi, safi, + ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, &prd, label_pnt, &evpn); + + return ret; +} + +static void +evpn_mpattr_encode_type5 (struct stream *s, struct prefix *p, + struct prefix_rd *prd, mpls_label_t * label, + struct attr *attr) +{ + int len; + char temp[16]; + struct evpn_addr *p_evpn_p; + + memset(&temp, 0, 16); + if (p->family != AF_ETHERNET) + return; + p_evpn_p = &(p->u.prefix_evpn); + + if (IS_IPADDR_V4(&p_evpn_p->ip)) + len = 8; /* ipv4 */ + else + len = 32; /* ipv6 */ + stream_putc(s, BGP_EVPN_IP_PREFIX_ROUTE); + /* Prefix contains RD, ESI, EthTag, IP length, IP, GWIP and VNI */ + stream_putc(s, 8 + 10 + 4 + 1 + len + 3); + stream_put(s, prd->val, 8); + if (attr && attr->extra) + stream_put(s, &(attr->extra->evpn_overlay.eth_s_id), 10); + else + stream_put(s, &temp, 10); + stream_putl(s, p_evpn_p->eth_tag); + stream_putc(s, p_evpn_p->ip_prefix_length); + if (IS_IPADDR_V4(&p_evpn_p->ip)) + stream_put_ipv4(s, p_evpn_p->ip.ipaddr_v4.s_addr); + else + stream_put(s, &p_evpn_p->ip.ipaddr_v6, 16); + if (attr && attr->extra) + { + if (IS_IPADDR_V4(&p_evpn_p->ip)) + stream_put_ipv4(s, attr->extra->evpn_overlay.gw_ip.ipv4. s_addr); + else + stream_put(s, &(attr->extra->evpn_overlay.gw_ip.ipv6), 16); + } + else + { + if (IS_IPADDR_V4(&p_evpn_p->ip)) + stream_put_ipv4(s, 0); + else + stream_put(s, &temp, 16); + } + + if (label) + stream_put(s, label, 3); + else + stream_put3(s, 0); +} + /* * Cleanup specific VNI upon EVPN (advertise-all-vni) being disabled. */ static void cleanup_vni_on_disable (struct hash_backet *backet, struct bgp *bgp) { + struct bgpevpn *vpn = (struct bgpevpn *) backet->data; + + /* Remove EVPN routes and schedule for processing. */ + delete_routes_for_vni (bgp, vpn); + + /* Clear "live" flag and see if hash needs to be freed. */ + UNSET_FLAG (vpn->flags, VNI_FLAG_LIVE); + if (!is_vni_configured (vpn)) + bgp_evpn_free (bgp, vpn); } /* @@ -126,6 +1895,11 @@ cleanup_vni_on_disable (struct hash_backet *backet, struct bgp *bgp) static void free_vni_entry (struct hash_backet *backet, struct bgp *bgp) { + struct bgpevpn *vpn; + + vpn = (struct bgpevpn *) backet->data; + delete_all_vni_routes (bgp, vpn); + bgp_evpn_free(bgp, vpn); } @@ -133,194 +1907,529 @@ free_vni_entry (struct hash_backet *backet, struct bgp *bgp) * Public functions. */ -int -bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, - struct bgp_nlri *packet, int withdraw) +/* + * Encode EVPN prefix in Update (MP_REACH) + */ +void +bgp_evpn_encode_prefix (struct stream *s, struct prefix *p, + struct prefix_rd *prd, mpls_label_t *label, + struct attr *attr, int addpath_encode, + u_int32_t addpath_tx_id) { - u_char *pnt; - u_char *lim; - struct prefix p; - struct prefix_rd prd; - struct evpn_addr *p_evpn_p; - struct bgp_route_evpn evpn; - uint8_t route_type, route_length; - mpls_label_t label; - u_int32_t addpath_id = 0; + struct prefix_evpn *evp = (struct prefix_evpn *)p; + int ipa_len = 0; - /* Check peer status. */ - if (peer->status != Established) - return 0; + if (addpath_encode) + stream_putl (s, addpath_tx_id); - /* Make prefix_rd */ - prd.family = AF_UNSPEC; - prd.prefixlen = 64; + /* Route type */ + stream_putc (s, evp->prefix.route_type); - p_evpn_p = &p.u.prefix_evpn; - pnt = packet->nlri; - lim = pnt + packet->length; - while (pnt < lim) { - /* clear evpn structure */ - memset(&evpn, 0, sizeof(evpn)); + switch (evp->prefix.route_type) + { + case BGP_EVPN_MAC_IP_ROUTE: + if (IS_EVPN_PREFIX_IPADDR_V4(evp)) + ipa_len = IPV4_MAX_BYTELEN; + else if (IS_EVPN_PREFIX_IPADDR_V6(evp)) + ipa_len = IPV6_MAX_BYTELEN; + stream_putc (s, 33 + ipa_len); // 1 VNI + stream_put (s, prd->val, 8); /* RD */ + stream_put (s, 0, 10); /* ESI */ + stream_putl (s, 0); /* Ethernet Tag ID */ + stream_putc (s, 8*ETHER_ADDR_LEN); /* Mac Addr Len - bits */ + stream_put (s, evp->prefix.mac.octet, 6); /* Mac Addr */ + stream_putc (s, 8*ipa_len); /* IP address Length */ + if (ipa_len) + stream_put (s, &evp->prefix.ip.ip.addr, ipa_len); /* IP */ + stream_put (s, label, BGP_LABEL_BYTES); /* VNI is contained in 'tag' */ + break; - /* Clear prefix structure. */ - memset(&p, 0, sizeof(struct prefix)); - memset(&evpn.gw_ip, 0, sizeof(union gw_addr)); - memset(&evpn.eth_s_id, 0, sizeof(struct eth_segment_id)); + case BGP_EVPN_IMET_ROUTE: + stream_putc (s, 17); // TODO: length - assumes IPv4 address + stream_put (s, prd->val, 8); /* RD */ + stream_putl (s, 0); /* Ethernet Tag ID */ + stream_putc (s, IPV4_MAX_BITLEN); /* IP address Length - bits */ + /* Originating Router's IP Addr */ + stream_put_in_addr (s, &evp->prefix.ip.ipaddr_v4); + break; - /* Fetch Route Type */ - route_type = *pnt++; - route_length = *pnt++; - /* simply ignore. goto next route type if any */ - if (route_type != EVPN_IP_PREFIX) { - if (pnt + route_length > lim) { - zlog_err - ("not enough bytes for New Route Type left in NLRI?"); - return -1; - } - pnt += route_length; - continue; - } + case BGP_EVPN_IP_PREFIX_ROUTE: + /* TODO: AddPath support. */ + evpn_mpattr_encode_type5 (s, p, prd, label, attr); + break; - /* Fetch RD */ - if (pnt + 8 > lim) { - zlog_err("not enough bytes for RD left in NLRI?"); - return -1; - } - - /* Copy routing distinguisher to rd. */ - memcpy(&prd.val, pnt, 8); - pnt += 8; - - /* Fetch ESI */ - if (pnt + 10 > lim) { - zlog_err("not enough bytes for ESI left in NLRI?"); - return -1; - } - memcpy(&evpn.eth_s_id.val, pnt, 10); - pnt += 10; - - /* Fetch Ethernet Tag */ - if (pnt + 4 > lim) { - zlog_err("not enough bytes for Eth Tag left in NLRI?"); - return -1; - } - - if (route_type == EVPN_IP_PREFIX) { - p_evpn_p->route_type = route_type; - memcpy(&(p_evpn_p->eth_tag), pnt, 4); - p_evpn_p->eth_tag = ntohl(p_evpn_p->eth_tag); - pnt += 4; - - /* Fetch IP prefix length. */ - p_evpn_p->ip_prefix_length = *pnt++; - - if (p_evpn_p->ip_prefix_length > 128) { - zlog_err("invalid prefixlen %d in EVPN NLRI?", - p.prefixlen); - return -1; - } - /* determine IPv4 or IPv6 prefix */ - if (route_length - 4 - 10 - 8 - - 3 /* label to be read */ >= 32) { - SET_IPADDR_V6 (&p_evpn_p->ip); - memcpy(&(p_evpn_p->ip.ipaddr_v6), pnt, 16); - pnt += 16; - memcpy(&evpn.gw_ip.ipv6, pnt, 16); - pnt += 16; - } else { - SET_IPADDR_V4 (&p_evpn_p->ip); - memcpy(&(p_evpn_p->ip.ipaddr_v4), pnt, 4); - pnt += 4; - memcpy(&evpn.gw_ip.ipv4, pnt, 4); - pnt += 4; - } - p.family = AFI_L2VPN; - if (IS_IPADDR_V4(&p_evpn_p->ip)) - p.prefixlen = - (u_char) PREFIX_LEN_ROUTE_TYPE_5_IPV4; - else - p.prefixlen = PREFIX_LEN_ROUTE_TYPE_5_IPV6; - p.family = AF_ETHERNET; - } - - /* Fetch Label */ - if (pnt + BGP_LABEL_BYTES > lim) { - zlog_err("not enough bytes for Label left in NLRI?"); - return -1; - } - - memcpy(&label, pnt, BGP_LABEL_BYTES); - bgp_set_valid_label(&label); - pnt += BGP_LABEL_BYTES; - - if (!withdraw) { - bgp_update(peer, &p, addpath_id, attr, AFI_L2VPN, - SAFI_EVPN, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, - &prd, &label, 0, &evpn); - } else { - bgp_withdraw(peer, &p, addpath_id, attr, AFI_L2VPN, - SAFI_EVPN, ZEBRA_ROUTE_BGP, - BGP_ROUTE_NORMAL, &prd, &label, &evpn); - } - } - - /* Packet length consistency check. */ - if (pnt != lim) - return -1; - return 0; + default: + break; + } } -void -bgp_packet_mpattr_route_type_5(struct stream *s, - struct prefix *p, struct prefix_rd *prd, - mpls_label_t *label, struct attr *attr) +int +bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, + struct bgp_nlri *packet, int withdraw) { - int len; - char temp[16]; - struct evpn_addr *p_evpn_p; + u_char *pnt; + u_char *lim; + afi_t afi; + safi_t safi; + u_int32_t addpath_id; + int addpath_encoded; + int psize = 0; + u_char rtype; + u_char rlen; + struct prefix p; - memset(&temp, 0, 16); - if (p->family != AF_ETHERNET) - return; - p_evpn_p = &(p->u.prefix_evpn); - if (IS_IPADDR_V4(&p_evpn_p->ip)) - len = 8; /* ipv4 */ - else - len = 32; /* ipv6 */ - stream_putc(s, EVPN_IP_PREFIX); - stream_putc(s, - 8 /* RD */ + 10 /* ESI */ + 4 /* EthTag */ + 1 + len + - 3 /* label */ ); - stream_put(s, prd->val, 8); - if (attr && attr->extra) - stream_put(s, &(attr->extra->evpn_overlay.eth_s_id), 10); - else - stream_put(s, &temp, 10); - stream_putl(s, p_evpn_p->eth_tag); - stream_putc(s, p_evpn_p->ip_prefix_length); - if (IS_IPADDR_V4(&p_evpn_p->ip)) - stream_put_ipv4(s, p_evpn_p->ip.ipaddr_v4.s_addr); - else - stream_put(s, &p_evpn_p->ip.ipaddr_v6, 16); - if (attr && attr->extra) { - if (IS_IPADDR_V4(&p_evpn_p->ip)) - stream_put_ipv4(s, - attr->extra->evpn_overlay.gw_ip.ipv4. - s_addr); - else - stream_put(s, &(attr->extra->evpn_overlay.gw_ip.ipv6), - 16); - } else { - if (IS_IPADDR_V4(&p_evpn_p->ip)) - stream_put_ipv4(s, 0); - else - stream_put(s, &temp, 16); - } - if (label) - stream_put(s, label, 3); - else - stream_put3(s, 0); - return; + /* Check peer status. */ + if (peer->status != Established) + { + zlog_err ("%u:%s - EVPN update received in state %d", + peer->bgp->vrf_id, peer->host, peer->status); + return -1; + } + + /* Start processing the NLRI - there may be multiple in the MP_REACH */ + pnt = packet->nlri; + lim = pnt + packet->length; + afi = packet->afi; + safi = packet->safi; + addpath_id = 0; + + addpath_encoded = (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ADDPATH_AF_RX_ADV) && + CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ADDPATH_AF_TX_RCV)); + + for (; pnt < lim; pnt += psize) + { + /* Clear prefix structure. */ + memset (&p, 0, sizeof (struct prefix)); + + /* Deal with path-id if AddPath is supported. */ + if (addpath_encoded) + { + /* When packet overflow occurs return immediately. */ + if (pnt + BGP_ADDPATH_ID_LEN > lim) + return -1; + + addpath_id = ntohl(*((uint32_t*) pnt)); + pnt += BGP_ADDPATH_ID_LEN; + } + + /* All EVPN NLRI types start with type and length. */ + if (pnt + 2 > lim) + return -1; + + rtype = *pnt++; + psize = rlen = *pnt++; + + /* When packet overflow occur return immediately. */ + if (pnt + psize > lim) + return -1; + + switch (rtype) + { + case BGP_EVPN_MAC_IP_ROUTE: + if (process_type2_route (peer, afi, safi, + withdraw ? NULL : attr, + pnt, psize, addpath_id)) + { + zlog_err ("%u:%s - Error in processing EVPN type-2 NLRI size %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + break; + + case BGP_EVPN_IMET_ROUTE: + if (process_type3_route (peer, afi, safi, + withdraw ? NULL : attr, + pnt, psize, addpath_id)) + { + zlog_err ("%u:%s - Error in processing EVPN type-3 NLRI size %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + break; + + case BGP_EVPN_IP_PREFIX_ROUTE: + if (process_type5_route (peer, afi, safi, attr, + pnt, psize, addpath_id, withdraw)) + { + zlog_err ("%u:%s - Error in processing EVPN type-5 NLRI size %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } + break; + + default: + break; + } + + } + + /* Packet length consistency check. */ + if (pnt != lim) + return -1; + + return 0; +} + + +/* + * Map the RTs (configured or automatically derived) of a VNI to the VNI. + * The mapping will be used during route processing. + */ +void +bgp_evpn_map_vni_to_its_rts (struct bgp *bgp, struct bgpevpn *vpn) +{ + int i; + struct ecommunity_val *eval; + struct listnode *node, *nnode; + struct ecommunity *ecom; + + for (ALL_LIST_ELEMENTS (vpn->import_rtl, node, nnode, ecom)) + { + for (i = 0; i < ecom->size; i++) + { + eval = (struct ecommunity_val *) (ecom->val + (i * ECOMMUNITY_SIZE)); + map_vni_to_rt (bgp, vpn, eval); + } + } +} + +/* + * Unmap the RTs (configured or automatically derived) of a VNI from the VNI. + */ +void +bgp_evpn_unmap_vni_from_its_rts (struct bgp *bgp, struct bgpevpn *vpn) +{ + int i; + struct ecommunity_val *eval; + struct listnode *node, *nnode; + struct ecommunity *ecom; + + for (ALL_LIST_ELEMENTS (vpn->import_rtl, node, nnode, ecom)) + { + for (i = 0; i < ecom->size; i++) + { + struct irt_node *irt; + struct ecommunity_val eval_tmp; + + eval = (struct ecommunity_val *) (ecom->val + (i * ECOMMUNITY_SIZE)); + /* If using "automatic" RT, we only care about the local-admin sub-field. + * This is to facilitate using VNI as the RT for EBGP peering too. + */ + memcpy (&eval_tmp, eval, ECOMMUNITY_SIZE); + if (!is_import_rt_configured (vpn)) + mask_ecom_global_admin (&eval_tmp, eval); + + irt = lookup_import_rt (bgp, &eval_tmp); + if (irt) + unmap_vni_from_rt (bgp, vpn, irt); + } + } +} + +/* + * Derive Import RT automatically for VNI and map VNI to RT. + * The mapping will be used during route processing. + */ +void +bgp_evpn_derive_auto_rt_import (struct bgp *bgp, struct bgpevpn *vpn) +{ + form_auto_rt (bgp, vpn, vpn->import_rtl); + UNSET_FLAG (vpn->flags, VNI_FLAG_IMPRT_CFGD); + + /* Map RT to VNI */ + bgp_evpn_map_vni_to_its_rts (bgp, vpn); +} + +/* + * Derive Export RT automatically for VNI. + */ +void +bgp_evpn_derive_auto_rt_export (struct bgp *bgp, struct bgpevpn *vpn) +{ + form_auto_rt (bgp, vpn, vpn->export_rtl); + UNSET_FLAG (vpn->flags, VNI_FLAG_EXPRT_CFGD); +} + +/* + * Derive RD automatically for VNI using passed information - it + * is of the form RouterId:unique-id-for-vni. + */ +void +bgp_evpn_derive_auto_rd (struct bgp *bgp, struct bgpevpn *vpn) +{ + char buf[100]; + + vpn->prd.family = AF_UNSPEC; + vpn->prd.prefixlen = 64; + sprintf (buf, "%s:%hu", inet_ntoa (bgp->router_id), vpn->rd_id); + str2prefix_rd (buf, &vpn->prd); + UNSET_FLAG (vpn->flags, VNI_FLAG_RD_CFGD); +} + +/* + * Lookup VNI. + */ +struct bgpevpn * +bgp_evpn_lookup_vni (struct bgp *bgp, vni_t vni) +{ + struct bgpevpn *vpn; + struct bgpevpn tmp; + + memset(&tmp, 0, sizeof(struct bgpevpn)); + tmp.vni = vni; + vpn = hash_lookup (bgp->vnihash, &tmp); + return vpn; +} + +/* + * Create a new vpn - invoked upon configuration or zebra notification. + */ +struct bgpevpn * +bgp_evpn_new (struct bgp *bgp, vni_t vni, struct in_addr originator_ip) +{ + struct bgpevpn *vpn; + + if (!bgp) + return NULL; + + vpn = XCALLOC (MTYPE_BGP_EVPN, sizeof (struct bgpevpn)); + if (!vpn) + return NULL; + + /* Set values - RD and RT set to defaults. */ + vpn->vni = vni; + vpn->originator_ip = originator_ip; + + /* Initialize route-target import and export lists */ + vpn->import_rtl = list_new (); + vpn->import_rtl->cmp = (int (*)(void *, void *)) evpn_route_target_cmp; + vpn->export_rtl = list_new (); + vpn->export_rtl->cmp = (int (*)(void *, void *)) evpn_route_target_cmp; + bf_assign_index(bgp->rd_idspace, vpn->rd_id); + derive_rd_rt_for_vni (bgp, vpn); + + /* Initialize EVPN route table. */ + vpn->route_table = bgp_table_init (AFI_L2VPN, SAFI_EVPN); + + /* Add to hash */ + if (!hash_get(bgp->vnihash, vpn, hash_alloc_intern)) + { + XFREE(MTYPE_BGP_EVPN, vpn); + return NULL; + } + QOBJ_REG (vpn, bgpevpn); + return vpn; +} + +/* + * Free a given VPN - called in multiple scenarios such as zebra + * notification, configuration being deleted, advertise-all-vni disabled etc. + * This just frees appropriate memory, caller should have taken other + * needed actions. + */ +void +bgp_evpn_free (struct bgp *bgp, struct bgpevpn *vpn) +{ + bgp_table_unlock (vpn->route_table); + bgp_evpn_unmap_vni_from_its_rts (bgp, vpn); + list_delete (vpn->import_rtl); + list_delete (vpn->export_rtl); + vpn->import_rtl = NULL; + vpn->export_rtl = NULL; + bf_release_index(bgp->rd_idspace, vpn->rd_id); + hash_release (bgp->vnihash, vpn); + QOBJ_UNREG (vpn); + XFREE(MTYPE_BGP_EVPN, vpn); +} + +/* + * Import route into matching VNI(s). + */ +int +bgp_evpn_import_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix *p, struct bgp_info *ri) +{ + return install_uninstall_evpn_route (bgp, afi, safi, p, ri, 1); +} + +/* + * Unimport route from matching VNI(s). + */ +int +bgp_evpn_unimport_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix *p, struct bgp_info *ri) +{ + return install_uninstall_evpn_route (bgp, afi, safi, p, ri, 0); +} + +/* + * Handle del of a local MACIP. + */ +int +bgp_evpn_local_macip_del (struct bgp *bgp, vni_t vni, + struct ethaddr *mac, struct ipaddr *ip) +{ + struct bgpevpn *vpn; + struct prefix_evpn p; + + if (!bgp->vnihash) + { + zlog_err ("%u: VNI hash not created", bgp->vrf_id); + return -1; + } + + /* Lookup VNI hash - should exist. */ + vpn = bgp_evpn_lookup_vni (bgp, vni); + if (!vpn || !is_vni_live (vpn)) + { + zlog_warn ("%u: VNI hash entry for VNI %u %s at MACIP DEL", + bgp->vrf_id, vni, vpn ? "not live" : "not found"); + return -1; + } + + /* Remove EVPN type-2 route and schedule for processing. */ + build_evpn_type2_prefix (&p, mac, ip); + delete_evpn_route (bgp, vpn, &p); + + return 0; +} + +/* + * Handle add of a local MACIP. + */ +int +bgp_evpn_local_macip_add (struct bgp *bgp, vni_t vni, + struct ethaddr *mac, struct ipaddr *ip) +{ + struct bgpevpn *vpn; + struct prefix_evpn p; + + if (!bgp->vnihash) + { + zlog_err ("%u: VNI hash not created", bgp->vrf_id); + return -1; + } + + /* Lookup VNI hash - should exist. */ + vpn = bgp_evpn_lookup_vni (bgp, vni); + if (!vpn || !is_vni_live (vpn)) + { + zlog_warn ("%u: VNI hash entry for VNI %u %s at MACIP ADD", + bgp->vrf_id, vni, vpn ? "not live" : "not found"); + return -1; + } + + /* Create EVPN type-2 route and schedule for processing. */ + build_evpn_type2_prefix (&p, mac, ip); + if (update_evpn_route (bgp, vpn, &p)) + { + char buf[ETHER_ADDR_STRLEN]; + char buf2[INET6_ADDRSTRLEN]; + + zlog_err ("%u:Failed to create Type-2 route, VNI %u MAC %s IP %s", + bgp->vrf_id, vpn->vni, + prefix_mac2str (mac, buf, sizeof (buf)), + ipaddr2str (ip, buf2, sizeof(buf2))); + return -1; + } + + return 0; +} + +/* + * Handle del of a local VNI. + */ +int +bgp_evpn_local_vni_del (struct bgp *bgp, vni_t vni) +{ + struct bgpevpn *vpn; + + if (!bgp->vnihash) + { + zlog_err ("%u: VNI hash not created", bgp->vrf_id); + return -1; + } + + /* Locate VNI hash */ + vpn = bgp_evpn_lookup_vni (bgp, vni); + if (!vpn) + { + zlog_warn ("%u: VNI hash entry for VNI %u not found at DEL", + bgp->vrf_id, vni); + return 0; + } + + /* Remove all local EVPN routes and schedule for processing (to + * withdraw from peers). + */ + delete_routes_for_vni (bgp, vpn); + + /* Clear "live" flag and see if hash needs to be freed. */ + UNSET_FLAG (vpn->flags, VNI_FLAG_LIVE); + if (!is_vni_configured (vpn)) + bgp_evpn_free (bgp, vpn); + + return 0; +} + +/* + * Handle add (or update) of a local VNI. The only VNI change we care + * about is change to local-tunnel-ip. + */ +int +bgp_evpn_local_vni_add (struct bgp *bgp, vni_t vni, struct in_addr originator_ip) +{ + struct bgpevpn *vpn; + struct prefix_evpn p; + + if (!bgp->vnihash) + { + zlog_err ("%u: VNI hash not created", bgp->vrf_id); + return -1; + } + + /* Lookup VNI. If present and no change, exit. */ + vpn = bgp_evpn_lookup_vni (bgp, vni); + if (vpn && is_vni_live (vpn)) + { + if (IPV4_ADDR_SAME (&vpn->originator_ip, &originator_ip)) + /* Probably some other param has changed that we don't care about. */ + return 0; + + /* Local tunnel endpoint IP address has changed */ + return handle_tunnel_ip_change (bgp, vpn, originator_ip); + } + + /* Create or update as appropriate. */ + if (!vpn) + { + vpn = bgp_evpn_new (bgp, vni, originator_ip); + if (!vpn) + { + zlog_err ("%u: Failed to allocate VNI entry for VNI %u - at Add", + bgp->vrf_id, vni); + return -1; + } + } + + /* Mark as "live" */ + SET_FLAG (vpn->flags, VNI_FLAG_LIVE); + + /* Create EVPN type-3 route and schedule for processing. */ + build_evpn_type3_prefix (&p, vpn->originator_ip); + if (update_evpn_route (bgp, vpn, &p)) + { + zlog_err ("%u: Type3 route creation failure for VNI %u", + bgp->vrf_id, vni); + return -1; + } + + /* If we have learnt and retained remote routes (VTEPs, MACs) for this VNI, + * install them. + */ + install_routes_for_vni (bgp, vpn); + + return 0; } /* diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index c87f3ffc77..40b61bb00c 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -21,14 +21,31 @@ #ifndef _QUAGGA_BGP_EVPN_H #define _QUAGGA_BGP_EVPN_H -extern int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, - struct bgp_nlri *packet, int withdraw); +#include "vxlan.h" extern void -bgp_packet_mpattr_route_type_5(struct stream *s, - struct prefix *p, struct prefix_rd *prd, - mpls_label_t *label, struct attr *attr); - +bgp_evpn_encode_prefix (struct stream *s, struct prefix *p, + struct prefix_rd *prd, mpls_label_t *label, + struct attr *attr, int addpath_encode, + u_int32_t addpath_tx_id); +extern int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, + struct bgp_nlri *packet, int withdraw); +extern int +bgp_evpn_import_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix *p, struct bgp_info *ri); +extern int +bgp_evpn_unimport_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct prefix *p, struct bgp_info *ri); +extern int +bgp_evpn_local_macip_del (struct bgp *bgp, vni_t vni, + struct ethaddr *mac, struct ipaddr *ip); +extern int +bgp_evpn_local_macip_add (struct bgp *bgp, vni_t vni, + struct ethaddr *mac, struct ipaddr *ip); +extern int +bgp_evpn_local_vni_del (struct bgp *bgp, vni_t vni); +extern int +bgp_evpn_local_vni_add (struct bgp *bgp, vni_t vni, struct in_addr originator_ip); extern void bgp_evpn_cleanup_on_disable (struct bgp *bgp); extern void @@ -36,13 +53,4 @@ bgp_evpn_cleanup (struct bgp *bgp); extern void bgp_evpn_init (struct bgp *bgp); -/* EVPN route types as per RFC7432 and - * as per draft-ietf-bess-evpn-prefix-advertisement-02 - */ -#define EVPN_ETHERNET_AUTO_DISCOVERY 1 -#define EVPN_MACIP_ADVERTISEMENT 2 -#define EVPN_INCLUSIVE_MULTICAST_ETHERNET_TAG 3 -#define EVPN_ETHERNET_SEGMENT 4 -#define EVPN_IP_PREFIX 5 - -#endif /* _QUAGGA_BGP_EVPN_H */ +#endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index e9e8a75e28..f37e0ef2e3 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -94,4 +94,131 @@ struct irt_node struct list *vnis; }; +#define RT_TYPE_IMPORT 1 +#define RT_TYPE_EXPORT 2 +#define RT_TYPE_BOTH 3 + +static inline int +is_vni_configured (struct bgpevpn *vpn) +{ + return (CHECK_FLAG (vpn->flags, VNI_FLAG_CFGD)); +} + +static inline int +is_vni_live (struct bgpevpn *vpn) +{ + return (CHECK_FLAG (vpn->flags, VNI_FLAG_LIVE)); +} + +static inline int +is_rd_configured (struct bgpevpn *vpn) +{ + return (CHECK_FLAG (vpn->flags, VNI_FLAG_RD_CFGD)); +} + +static inline int +bgp_evpn_rd_matches_existing (struct bgpevpn *vpn, struct prefix_rd *prd) +{ + return(memcmp (&vpn->prd.val, prd->val, ECOMMUNITY_SIZE) == 0); +} + +static inline int +is_import_rt_configured (struct bgpevpn *vpn) +{ + return (CHECK_FLAG (vpn->flags, VNI_FLAG_IMPRT_CFGD)); +} + +static inline int +is_export_rt_configured (struct bgpevpn *vpn) +{ + return (CHECK_FLAG (vpn->flags, VNI_FLAG_EXPRT_CFGD)); +} + +static inline int +is_vni_param_configured (struct bgpevpn *vpn) +{ + return (is_rd_configured (vpn) || + is_import_rt_configured (vpn) || + is_export_rt_configured (vpn)); +} + +static inline void +vni2label (vni_t vni, mpls_label_t *label) +{ + u_char *tag = (u_char *) label; + tag[0] = (vni >> 16) & 0xFF; + tag[1] = (vni >> 8) & 0xFF; + tag[2] = vni & 0xFF; +} + +static inline vni_t +label2vni (mpls_label_t *label) +{ + u_char *tag = (u_char *) label; + vni_t vni; + + vni = ((u_int32_t) *tag++ << 16); + vni |= (u_int32_t) *tag++ << 8; + vni |= (u_int32_t) (*tag & 0xFF); + + return vni; +} + +static inline void +encode_mac_mobility_extcomm (int static_mac, u_int32_t seq, + struct ecommunity_val *eval) +{ + memset (eval, 0, sizeof (*eval)); + eval->val[0] = ECOMMUNITY_ENCODE_EVPN; + eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY; + if (static_mac) + eval->val[2] = ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY_FLAG_STICKY; + eval->val[4] = (seq >> 24) & 0xff; + eval->val[5] = (seq >> 16) & 0xff; + eval->val[6] = (seq >> 8) & 0xff; + eval->val[7] = seq & 0xff; +} + +static inline void +build_evpn_type2_prefix (struct prefix_evpn *p, struct ethaddr *mac, + struct ipaddr *ip) +{ + memset (p, 0, sizeof (struct prefix_evpn)); + p->family = AF_ETHERNET; + p->prefixlen = EVPN_TYPE_2_ROUTE_PREFIXLEN; + p->prefix.route_type = BGP_EVPN_MAC_IP_ROUTE; + memcpy(&p->prefix.mac.octet, mac->octet, ETHER_ADDR_LEN); + p->prefix.ip.ipa_type = IPADDR_NONE; + if (ip) + memcpy(&p->prefix.ip, ip, sizeof (*ip)); +} + +static inline void +build_evpn_type3_prefix (struct prefix_evpn *p, struct in_addr originator_ip) +{ + memset (p, 0, sizeof (struct prefix_evpn)); + p->family = AF_ETHERNET; + p->prefixlen = EVPN_TYPE_3_ROUTE_PREFIXLEN; + p->prefix.route_type = BGP_EVPN_IMET_ROUTE; + p->prefix.ip.ipa_type = IPADDR_V4; + p->prefix.ip.ipaddr_v4 = originator_ip; +} + + +extern void +bgp_evpn_map_vni_to_its_rts (struct bgp *bgp, struct bgpevpn *vpn); +extern void +bgp_evpn_unmap_vni_from_its_rts (struct bgp *bgp, struct bgpevpn *vpn); +extern void +bgp_evpn_derive_auto_rt_import (struct bgp *bgp, struct bgpevpn *vpn); +extern void +bgp_evpn_derive_auto_rt_export (struct bgp *bgp, struct bgpevpn *vpn); +extern void +bgp_evpn_derive_auto_rd (struct bgp *bgp, struct bgpevpn *vpn); +extern struct bgpevpn * +bgp_evpn_lookup_vni (struct bgp *bgp, vni_t vni); +extern struct bgpevpn * +bgp_evpn_new (struct bgp *bgp, vni_t vni, struct in_addr originator_ip); +extern void +bgp_evpn_free (struct bgp *bgp, struct bgpevpn *vpn); #endif /* _BGP_EVPN_PRIVATE_H */ diff --git a/bgpd/bgp_evpn_vty.c b/bgpd/bgp_evpn_vty.c index ac1faf9de1..6335b9d468 100644 --- a/bgpd/bgp_evpn_vty.c +++ b/bgpd/bgp_evpn_vty.c @@ -681,7 +681,7 @@ DEFUN(evpnrt5_network, argv[idx_ext_community]->arg, argv[idx_word]->arg, argv[idx_rmap] ? argv[idx_gwip]->arg : NULL, - EVPN_IP_PREFIX, argv[idx_esi]->arg, + BGP_EVPN_IP_PREFIX_ROUTE, argv[idx_esi]->arg, argv[idx_gwip]->arg, argv[idx_ethtag]->arg, argv[idx_routermac]->arg); } @@ -713,7 +713,7 @@ DEFUN(no_evpnrt5_network, return bgp_static_unset_safi(AFI_L2VPN, SAFI_EVPN, vty, argv[idx_ipv4_prefixlen]->arg, argv[idx_ext_community]->arg, - argv[idx_label]->arg, EVPN_IP_PREFIX, + argv[idx_label]->arg, BGP_EVPN_IP_PREFIX_ROUTE, argv[idx_esi]->arg, argv[idx_gwip]->arg, argv[idx_ethtag]->arg); } diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c index c457f4b3e9..1913706587 100644 --- a/bgpd/bgp_memory.c +++ b/bgpd/bgp_memory.c @@ -114,3 +114,7 @@ DEFINE_MTYPE(BGPD, BGP_TEA_OPTIONS_VALUE, "BGP TEA Options Value") DEFINE_MTYPE(BGPD, LCOMMUNITY, "Large Community") DEFINE_MTYPE(BGPD, LCOMMUNITY_STR, "Large Community display string") DEFINE_MTYPE(BGPD, LCOMMUNITY_VAL, "Large Community value") + +DEFINE_MTYPE(BGPD, BGP_EVPN, "BGP EVPN Information") +DEFINE_MTYPE(BGPD, BGP_EVPN_IMPORT_RT, "BGP EVPN Import RT") +DEFINE_MTYPE(BGPD, BGP_EVPN_MACIP, "BGP EVPN MAC IP") diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h index 454092cef3..185369d230 100644 --- a/bgpd/bgp_memory.h +++ b/bgpd/bgp_memory.h @@ -110,4 +110,8 @@ DECLARE_MTYPE(BGP_TEA_OPTIONS_VALUE) DECLARE_MTYPE(LCOMMUNITY) DECLARE_MTYPE(LCOMMUNITY_STR) DECLARE_MTYPE(LCOMMUNITY_VAL) + +DECLARE_MTYPE(BGP_EVPN) +DECLARE_MTYPE(BGP_EVPN_IMPORT_RT) +DECLARE_MTYPE(BGP_EVPN_MACIP) #endif /* _QUAGGA_BGP_MEMORY_H */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index ca6d138eaa..e55cd7b949 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -110,6 +110,36 @@ bgp_afi_node_get (struct bgp_table *table, afi_t afi, safi_t safi, struct prefix return rn; } +struct bgp_node * +bgp_afi_node_lookup (struct bgp_table *table, afi_t afi, safi_t safi, struct prefix *p, + struct prefix_rd *prd) +{ + struct bgp_node *rn; + struct bgp_node *prn = NULL; + + if (!table) + return NULL; + + if ((safi == SAFI_MPLS_VPN) || (safi == SAFI_ENCAP) || (safi == SAFI_EVPN)) + { + prn = bgp_node_lookup (table, (struct prefix *) prd); + if (!prn) + return NULL; + + if (prn->info == NULL) + { + bgp_unlock_node (prn); + return NULL; + } + + table = prn->info; + } + + rn = bgp_node_lookup (table, p); + + return rn; +} + /* Allocate bgp_info_extra */ static struct bgp_info_extra * bgp_info_extra_new (void) @@ -224,7 +254,7 @@ bgp_info_add (struct bgp_node *rn, struct bgp_info *ri) /* Do the actual removal of info from RIB, for use by bgp_process completion callback *only* */ -static void +void bgp_info_reap (struct bgp_node *rn, struct bgp_info *ri) { if (ri->next) @@ -359,7 +389,7 @@ bgp_info_path_with_addpath_rx_str (struct bgp_info *ri, char *buf) static int bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, int *paths_eq, struct bgp_maxpaths_cfg *mpath_cfg, int debug, - const char *pfx_buf) + char *pfx_buf, afi_t afi, safi_t safi) { struct attr *newattr, *existattr; struct attr_extra *newattre, *existattre; @@ -381,6 +411,8 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, int ret; char new_buf[PATH_ADDPATH_STR_BUFFER]; char exist_buf[PATH_ADDPATH_STR_BUFFER]; + u_int32_t new_mm_seq; + u_int32_t exist_mm_seq; *paths_eq = 0; @@ -414,6 +446,61 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, newattre = newattr->extra; existattre = existattr->extra; + /* For EVPN routes, we cannot just go by local vs remote, we have to + * look at the MAC mobility sequence number, if present. + */ + if (safi == SAFI_EVPN) + { + /* This is an error condition described in RFC 7432 Section 15.2. The RFC + * states that in this scenario "the PE MUST alert the operator" but it + * does not state what other action to take. In order to provide some + * consistency in this scenario we are going to prefer the path with the + * sticky flag. + */ + if (newattre->sticky != existattre->sticky) + { + if (!debug) + { + prefix2str (&new->net->p, pfx_buf, sizeof (*pfx_buf) * PREFIX2STR_BUFFER); + bgp_info_path_with_addpath_rx_str (new, new_buf); + bgp_info_path_with_addpath_rx_str (exist, exist_buf); + } + + if (newattre->sticky && !existattre->sticky) + { + zlog_warn("%s: %s wins over %s due to sticky MAC flag", + pfx_buf, new_buf, exist_buf); + return 1; + } + + if (!newattre->sticky && existattre->sticky) + { + zlog_warn("%s: %s loses to %s due to sticky MAC flag", + pfx_buf, new_buf, exist_buf); + return 0; + } + } + + new_mm_seq = mac_mobility_seqnum (newattr); + exist_mm_seq = mac_mobility_seqnum (existattr); + + if (new_mm_seq > exist_mm_seq) + { + if (debug) + zlog_debug("%s: %s wins over %s due to MM seq %u > %u", + pfx_buf, new_buf, exist_buf, new_mm_seq, exist_mm_seq); + return 1; + } + + if (new_mm_seq < exist_mm_seq) + { + if (debug) + zlog_debug("%s: %s loses to %s due to MM seq %u < %u", + pfx_buf, new_buf, exist_buf, new_mm_seq, exist_mm_seq); + return 0; + } + } + /* 1. Weight check. */ new_weight = exist_weight = 0; @@ -891,11 +978,12 @@ bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, * This version is compatible with */ int bgp_info_cmp_compatible (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist, - afi_t afi, safi_t safi) + char *pfx_buf, afi_t afi, safi_t safi) { int paths_eq; int ret; - ret = bgp_info_cmp (bgp, new, exist, &paths_eq, NULL, 0, __func__); + ret = bgp_info_cmp (bgp, new, exist, &paths_eq, NULL, 0, + pfx_buf, afi, safi); if (paths_eq) ret = 0; @@ -1599,16 +1687,11 @@ subgroup_announce_check (struct bgp_node *rn, struct bgp_info *ri, return 1; } -struct bgp_info_pair -{ - struct bgp_info *old; - struct bgp_info *new; -}; - -static void +void bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_maxpaths_cfg *mpath_cfg, - struct bgp_info_pair *result) + struct bgp_info_pair *result, + afi_t afi, safi_t safi) { struct bgp_info *new_select; struct bgp_info *old_select; @@ -1668,7 +1751,7 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, ri2->attr->aspath)) { if (bgp_info_cmp (bgp, ri2, new_select, &paths_eq, - mpath_cfg, debug, pfx_buf)) + mpath_cfg, debug, pfx_buf, afi, safi)) { bgp_info_unset_flag (rn, new_select, BGP_INFO_DMED_SELECTED); new_select = ri2; @@ -1725,7 +1808,8 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_CHECK); - if (bgp_info_cmp (bgp, ri, new_select, &paths_eq, mpath_cfg, debug, pfx_buf)) + if (bgp_info_cmp (bgp, ri, new_select, &paths_eq, mpath_cfg, + debug, pfx_buf, afi, safi)) { new_select = ri; } @@ -1779,7 +1863,8 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, continue; } - bgp_info_cmp (bgp, ri, new_select, &paths_eq, mpath_cfg, debug, pfx_buf); + bgp_info_cmp (bgp, ri, new_select, &paths_eq, mpath_cfg, + debug, pfx_buf, afi, safi); if (paths_eq) { @@ -1855,7 +1940,7 @@ subgroup_process_announce_selected (struct update_subgroup *subgrp, * Clear IGP changed flag and attribute changed flag for a route (all paths). * This is called at the end of route processing. */ -static void +void bgp_zebra_clear_route_change_flags (struct bgp_node *rn) { struct bgp_info *ri; @@ -1874,7 +1959,7 @@ bgp_zebra_clear_route_change_flags (struct bgp_node *rn) * if the route selection returns the same best route as earlier - to * determine if we need to update zebra or not. */ -static int +int bgp_zebra_has_route_changed (struct bgp_node *rn, struct bgp_info *selected) { struct bgp_info *mpinfo; @@ -1943,7 +2028,8 @@ bgp_process_main (struct work_queue *wq, void *data) } /* Best path selection. */ - bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new); + bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], + &old_and_new, afi, safi); old_select = old_and_new.old; new_select = old_and_new.new; @@ -2343,12 +2429,17 @@ bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer, } } #endif + + /* If this is an EVPN route, process for un-import. */ + if (safi == SAFI_EVPN) + bgp_evpn_unimport_route (peer->bgp, afi, safi, &rn->p, ri); + bgp_rib_remove (rn, ri, peer, afi, safi); } -static struct bgp_info * -info_make (int type, int sub_type, u_short instance, struct peer *peer, struct attr *attr, - struct bgp_node *rn) +struct bgp_info * +info_make (int type, int sub_type, u_short instance, struct peer *peer, + struct attr *attr, struct bgp_node *rn) { struct bgp_info *new; @@ -2435,7 +2526,8 @@ bgp_update_martian_nexthop (struct bgp *bgp, afi_t afi, safi_t safi, struct attr int ret = 0; /* Only validated for unicast and multicast currently. */ - if (safi != SAFI_UNICAST && safi != SAFI_MULTICAST) + /* Also valid for EVPN where the nexthop is an IP address. */ + if (safi != SAFI_UNICAST && safi != SAFI_MULTICAST && safi != SAFI_EVPN) return 0; /* If NEXT_HOP is present, validate it. */ @@ -2504,6 +2596,7 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, #if ENABLE_BGP_VNC int vnc_implicit_withdraw = 0; #endif + int same_attr=0; memset (&new_attr, 0, sizeof(struct attr)); memset (&new_extra, 0, sizeof(struct attr_extra)); @@ -2613,6 +2706,7 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, if (ri) { ri->uptime = bgp_clock (); + same_attr = attrhash_cmp (ri->attr, attr_new); /* Same attribute comes in. */ if (!CHECK_FLAG (ri->flags, BGP_INFO_REMOVED) @@ -2734,7 +2828,32 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } } #endif - + + /* Special handling for EVPN update of an existing route. If the + * extended community attribute has changed, we need to un-import + * the route using its existing extended community. It will be + * subsequently processed for import with the new extended community. + */ + if (safi == SAFI_EVPN && !same_attr) + { + if ((ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES)) && + (attr_new->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES))) + { + int cmp; + + cmp = ecommunity_cmp (ri->attr->extra->ecommunity, + attr_new->extra->ecommunity); + if (!cmp) + { + if (bgp_debug_update(peer, p, NULL, 1)) + zlog_debug ("Change in EXT-COMM, existing %s new %s", + ecommunity_str (ri->attr->extra->ecommunity), + ecommunity_str (attr_new->extra->ecommunity)); + bgp_evpn_unimport_route (bgp, afi, safi, p, ri); + } + } + } + /* Update to new attribute. */ bgp_attr_unintern (&ri->attr); ri->attr = attr_new; @@ -2833,6 +2952,16 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } #endif + /* If this is an EVPN route and some attribute has changed, process + * route for import. If the extended community has changed, we would + * have done the un-import earlier and the import would result in the + * route getting injected into appropriate L2 VNIs. If it is just + * some other attribute change, the import will result in updating + * the attributes for the route in the VNI(s). + */ + if (safi == SAFI_EVPN && !same_attr) + bgp_evpn_import_route (bgp, afi, safi, p, ri); + /* Process change. */ bgp_aggregate_increment (bgp, p, ri, afi, safi); @@ -2953,6 +3082,10 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, if (bgp_maximum_prefix_overflow (peer, afi, safi, 0)) return -1; + /* If this is an EVPN route, process for import. */ + if (safi == SAFI_EVPN) + bgp_evpn_import_route (bgp, afi, safi, p, new); + /* Process change. */ bgp_process (bgp, rn, afi, safi); @@ -2991,7 +3124,13 @@ bgp_update (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } if (ri) - bgp_rib_remove (rn, ri, peer, afi, safi); + { + /* If this is an EVPN route, un-import it as it is now filtered. */ + if (safi == SAFI_EVPN) + bgp_evpn_unimport_route (bgp, afi, safi, p, ri); + + bgp_rib_remove (rn, ri, peer, afi, safi); + } bgp_unlock_node (rn); @@ -3271,7 +3410,12 @@ bgp_clear_route_node (struct work_queue *wq, void *data) && ! CHECK_FLAG (ri->flags, BGP_INFO_UNUSEABLE)) bgp_info_set_flag (rn, ri, BGP_INFO_STALE); else - bgp_rib_remove (rn, ri, peer, afi, safi); + { + /* If this is an EVPN route, process for un-import. */ + if (safi == SAFI_EVPN) + bgp_evpn_unimport_route (peer->bgp, afi, safi, &rn->p, ri); + bgp_rib_remove (rn, ri, peer, afi, safi); + } } return WQ_SUCCESS; } diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 0c77cc1ee0..01d8e62d43 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -99,6 +99,9 @@ struct bgp_info_extra } vnc; #endif + + /* For imported routes into a VNI (or VRF), this points to the parent. */ + void *parent; }; struct bgp_info @@ -174,6 +177,13 @@ struct bgp_info }; +/* Structure used in BGP path selection */ +struct bgp_info_pair +{ + struct bgp_info *old; + struct bgp_info *new; +}; + /* BGP static route configuration. */ struct bgp_static { @@ -309,6 +319,7 @@ extern struct bgp_node *bgp_afi_node_get (struct bgp_table *table, afi_t afi, extern struct bgp_info *bgp_info_lock (struct bgp_info *); extern struct bgp_info *bgp_info_unlock (struct bgp_info *); extern void bgp_info_add (struct bgp_node *rn, struct bgp_info *ri); +extern void bgp_info_reap (struct bgp_node *rn, struct bgp_info *ri); extern void bgp_info_delete (struct bgp_node *rn, struct bgp_info *ri); extern struct bgp_info_extra *bgp_info_extra_get (struct bgp_info *); extern void bgp_info_set_flag (struct bgp_node *, struct bgp_info *, u_int32_t); @@ -372,6 +383,10 @@ extern u_char bgp_distance_apply (struct prefix *, struct bgp_info *, afi_t, saf extern afi_t bgp_node_afi (struct vty *); extern safi_t bgp_node_safi (struct vty *); +extern struct bgp_info * +info_make (int type, int sub_type, u_short instance, struct peer *peer, + struct attr *attr, struct bgp_node *rn); + extern void route_vty_out (struct vty *, struct prefix *, struct bgp_info *, int, safi_t, json_object *); extern void route_vty_out_tag (struct vty *, struct prefix *, struct bgp_info *, int, safi_t, json_object *); extern void route_vty_out_tmp (struct vty *, struct prefix *, struct attr *, safi_t, u_char, json_object *); @@ -396,10 +411,23 @@ extern void bgp_process_queues_drain_immediate (void); extern struct bgp_node * bgp_afi_node_get (struct bgp_table *, afi_t , safi_t , struct prefix *, struct prefix_rd *); +extern struct bgp_node * +bgp_afi_node_lookup (struct bgp_table *table, afi_t afi, safi_t safi, + struct prefix *p, struct prefix_rd *prd); extern struct bgp_info *bgp_info_new (void); extern void bgp_info_restore (struct bgp_node *, struct bgp_info *); -extern int bgp_info_cmp_compatible (struct bgp *, struct bgp_info *, - struct bgp_info *, afi_t, safi_t ); +extern int +bgp_info_cmp_compatible (struct bgp *, struct bgp_info *, struct bgp_info *, + char *pfx_buf, afi_t afi, safi_t safi); + +extern void +bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, + struct bgp_maxpaths_cfg *mpath_cfg, + struct bgp_info_pair *result, + afi_t afi, safi_t safi); +extern void bgp_zebra_clear_route_change_flags (struct bgp_node *rn); +extern int +bgp_zebra_has_route_changed (struct bgp_node *rn, struct bgp_info *selected); #endif /* _QUAGGA_BGP_ROUTE_H */ diff --git a/bgpd/bgp_updgrp_packet.c b/bgpd/bgp_updgrp_packet.c index cecb844fce..872ead000c 100644 --- a/bgpd/bgp_updgrp_packet.c +++ b/bgpd/bgp_updgrp_packet.c @@ -607,6 +607,30 @@ bpacket_reformat_for_peer (struct bpacket *pkt, struct peer_af *paf) (nhlen == 24 ? " and RD" : "")); } } + else if (paf->afi == AFI_L2VPN) + { + struct in_addr v4nh, *mod_v4nh; + int nh_modified = 0; + + stream_get_from (&v4nh, s, vec->offset + 1, 4); + mod_v4nh = &v4nh; + + /* No route-map changes allowed for EVPN nexthops. */ + if (!v4nh.s_addr) + { + mod_v4nh = &peer->nexthop.v4; + nh_modified = 1; + } + + if (nh_modified) + stream_put_in_addr_at (s, vec->offset + 1, mod_v4nh); + + if (bgp_debug_update(peer, NULL, NULL, 0)) + zlog_debug ("u%" PRIu64 ":s%" PRIu64 " %s send UPDATE w/ nexthop %s", + PAF_SUBGRP(paf)->update_group->id, PAF_SUBGRP(paf)->id, + peer->host, inet_ntoa (*mod_v4nh)); + + } } bgp_packet_add (peer, s); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 19f0dd98ff..a085fecc76 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -35,6 +35,7 @@ #include "lib/bfd.h" #include "filter.h" #include "mpls.h" +#include "vxlan.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_route.h" @@ -52,6 +53,7 @@ # include "bgpd/rfapi/rfapi_backend.h" # include "bgpd/rfapi/vnc_export_bgp.h" #endif +#include "bgpd/bgp_evpn.h" /* All information about zebra. */ struct zclient *zclient = NULL; @@ -2139,6 +2141,82 @@ bgp_zebra_connected (struct zclient *zclient) */ } +static int +bgp_zebra_process_local_vni (int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + struct stream *s; + vni_t vni; + struct bgp *bgp; + struct in_addr vtep_ip; + + s = zclient->ibuf; + vni = stream_getl (s); + if (command == ZEBRA_VNI_ADD) + vtep_ip.s_addr = stream_get_ipv4 (s); + bgp = bgp_lookup_by_vrf_id (vrf_id); + if (!bgp) + return 0; + + if (BGP_DEBUG (zebra, ZEBRA)) + zlog_debug("Rx VNI %s VRF %u VNI %u", + (command == ZEBRA_VNI_ADD) ? "add" : "del", vrf_id, vni); + + if (command == ZEBRA_VNI_ADD) + return bgp_evpn_local_vni_add (bgp, vni, vtep_ip.s_addr? vtep_ip : bgp->router_id); + else + return bgp_evpn_local_vni_del (bgp, vni); +} + +static int +bgp_zebra_process_local_macip (int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + struct stream *s; + vni_t vni; + struct bgp *bgp; + struct ethaddr mac; + struct ipaddr ip; + int ipa_len; + char buf[ETHER_ADDR_STRLEN]; + char buf1[INET6_ADDRSTRLEN]; + + memset (&ip, 0, sizeof (ip)); + s = zclient->ibuf; + vni = stream_getl (s); + stream_get (&mac.octet, s, ETHER_ADDR_LEN); + ipa_len = stream_getl (s); + if (ipa_len != 0 && + ipa_len != IPV4_MAX_BYTELEN && + ipa_len != IPV6_MAX_BYTELEN) + { + zlog_err ("%u:Recv MACIP %s with invalid IP addr length %d", + vrf_id, (command == ZEBRA_MACIP_ADD) ? "Add" : "Del", + ipa_len); + return -1; + } + + if (ipa_len) + { + ip.ipa_type = (ipa_len == IPV4_MAX_BYTELEN) ? IPADDR_V4: IPADDR_V6; + stream_get (&ip.ip.addr, s, ipa_len); + } + + bgp = bgp_lookup_by_vrf_id (vrf_id); + if (!bgp) + return 0; + + if (BGP_DEBUG (zebra, ZEBRA)) + zlog_debug ("%u:Recv MACIP %s MAC %s IP %s VNI %u", + vrf_id, (command == ZEBRA_MACIP_ADD) ? "Add" : "Del", + prefix_mac2str (&mac, buf, sizeof (buf)), + ipaddr2str (&ip, buf1, sizeof(buf1)), vni); + + if (command == ZEBRA_MACIP_ADD) + return bgp_evpn_local_macip_add (bgp, vni, &mac, &ip); + else + return bgp_evpn_local_macip_del (bgp, vni, &mac, &ip); +} void bgp_zebra_init (struct thread_master *master) @@ -2166,6 +2244,10 @@ bgp_zebra_init (struct thread_master *master) zclient->nexthop_update = bgp_read_nexthop_update; zclient->import_check_update = bgp_read_import_check_update; zclient->fec_update = bgp_read_fec_update; + zclient->local_vni_add = bgp_zebra_process_local_vni; + zclient->local_vni_del = bgp_zebra_process_local_vni; + zclient->local_macip_add = bgp_zebra_process_local_macip; + zclient->local_macip_del = bgp_zebra_process_local_macip; bgp_nexthop_buf = stream_new(multipath_num * sizeof (struct in6_addr)); bgp_ifindices_buf = stream_new(multipath_num * sizeof (unsigned int)); diff --git a/bgpd/rfapi/rfapi_import.c b/bgpd/rfapi/rfapi_import.c index b0c6db2a1e..ec52b5742b 100644 --- a/bgpd/rfapi/rfapi_import.c +++ b/bgpd/rfapi/rfapi_import.c @@ -2121,6 +2121,8 @@ rfapiBgpInfoAttachSorted ( struct bgp *bgp; struct bgp_info *prev; struct bgp_info *next; + char pfx_buf[PREFIX2STR_BUFFER]; + bgp = bgp_get_default (); /* assume 1 instance for now */ @@ -2136,7 +2138,7 @@ rfapiBgpInfoAttachSorted ( if (!bgp || (!CHECK_FLAG (info_new->flags, BGP_INFO_REMOVED) && CHECK_FLAG (next->flags, BGP_INFO_REMOVED)) || - bgp_info_cmp_compatible (bgp, info_new, next, afi, safi) == -1) + bgp_info_cmp_compatible (bgp, info_new, next, pfx_buf, afi, safi) == -1) { /* -1 if 1st is better */ break; }