diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index 497fb0749e..76a65f7f04 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -194,6 +194,7 @@ void bgp_adj_in_set(struct bgp_node *rn, struct peer *peer, struct attr *attr, adj = XCALLOC(MTYPE_BGP_ADJ_IN, sizeof(struct bgp_adj_in)); adj->peer = peer_lock(peer); /* adj_in peer reference */ adj->attr = bgp_attr_intern(attr); + adj->uptime = bgp_clock(); adj->addpath_rx_id = addpath_id; BGP_ADJ_IN_ADD(rn, adj); bgp_lock_node(rn); diff --git a/bgpd/bgp_advertise.h b/bgpd/bgp_advertise.h index 1b55b6e64b..c983598756 100644 --- a/bgpd/bgp_advertise.h +++ b/bgpd/bgp_advertise.h @@ -101,6 +101,9 @@ struct bgp_adj_in { /* Received attribute. */ struct attr *attr; + /* timestamp (monotime) */ + time_t uptime; + /* Addpath identifier */ uint32_t addpath_rx_id; }; diff --git a/bgpd/bgp_bmp.c b/bgpd/bgp_bmp.c new file mode 100644 index 0000000000..8fca202345 --- /dev/null +++ b/bgpd/bgp_bmp.c @@ -0,0 +1,2240 @@ +/* BMP support. + * Copyright (C) 2018 Yasuhiro Ohara + * Copyright (C) 2019 David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "log.h" +#include "stream.h" +#include "sockunion.h" +#include "command.h" +#include "prefix.h" +#include "thread.h" +#include "linklist.h" +#include "queue.h" +#include "pullwr.h" +#include "memory.h" +#include "network.h" +#include "filter.h" +#include "lib_errors.h" +#include "stream.h" +#include "libfrr.h" +#include "version.h" +#include "jhash.h" +#include "termtable.h" + +#include "bgpd/bgp_table.h" +#include "bgpd/bgpd.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_dump.h" +#include "bgpd/bgp_errors.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_bmp.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_vty.h" + +static void bmp_close(struct bmp *bmp); +static struct bmp_bgp *bmp_bgp_find(struct bgp *bgp); +static void bmp_targets_put(struct bmp_targets *bt); +static struct bmp_bgp_peer *bmp_bgp_peer_find(uint64_t peerid); +static struct bmp_bgp_peer *bmp_bgp_peer_get(struct peer *peer); +static void bmp_active_disconnected(struct bmp_active *ba); +static void bmp_active_put(struct bmp_active *ba); + +DEFINE_MGROUP(BMP, "BMP (BGP Monitoring Protocol)") + +DEFINE_MTYPE_STATIC(BMP, BMP_CONN, "BMP connection state") +DEFINE_MTYPE_STATIC(BMP, BMP_TARGETS, "BMP targets") +DEFINE_MTYPE_STATIC(BMP, BMP_TARGETSNAME, "BMP targets name") +DEFINE_MTYPE_STATIC(BMP, BMP_LISTENER, "BMP listener") +DEFINE_MTYPE_STATIC(BMP, BMP_ACTIVE, "BMP active connection config") +DEFINE_MTYPE_STATIC(BMP, BMP_ACLNAME, "BMP access-list name") +DEFINE_MTYPE_STATIC(BMP, BMP_QUEUE, "BMP update queue item") +DEFINE_MTYPE_STATIC(BMP, BMP, "BMP instance state") +DEFINE_MTYPE_STATIC(BMP, BMP_MIRRORQ, "BMP route mirroring buffer") +DEFINE_MTYPE_STATIC(BMP, BMP_PEER, "BMP per BGP peer data") +DEFINE_MTYPE_STATIC(BMP, BMP_OPEN, "BMP stored BGP OPEN message") + +DEFINE_QOBJ_TYPE(bmp_targets) + +static int bmp_bgp_cmp(const struct bmp_bgp *a, const struct bmp_bgp *b) +{ + if (a->bgp < b->bgp) + return -1; + if (a->bgp > b->bgp) + return 1; + return 0; +} + +static uint32_t bmp_bgp_hash(const struct bmp_bgp *e) +{ + return jhash(&e->bgp, sizeof(e->bgp), 0x55aa5a5a); +} + +DECLARE_HASH(bmp_bgph, struct bmp_bgp, bbi, bmp_bgp_cmp, bmp_bgp_hash) + +struct bmp_bgph_head bmp_bgph; + +static int bmp_bgp_peer_cmp(const struct bmp_bgp_peer *a, + const struct bmp_bgp_peer *b) +{ + if (a->peerid < b->peerid) + return -1; + if (a->peerid > b->peerid) + return 1; + return 0; +} + +static uint32_t bmp_bgp_peer_hash(const struct bmp_bgp_peer *e) +{ + return e->peerid; +} + +DECLARE_HASH(bmp_peerh, struct bmp_bgp_peer, bpi, + bmp_bgp_peer_cmp, bmp_bgp_peer_hash) + +struct bmp_peerh_head bmp_peerh; + +DECLARE_LIST(bmp_mirrorq, struct bmp_mirrorq, bmi) + +/* listener management */ + +static int bmp_listener_cmp(const struct bmp_listener *a, + const struct bmp_listener *b) +{ + int c; + + c = sockunion_cmp(&a->addr, &b->addr); + if (c) + return c; + if (a->port < b->port) + return -1; + if (a->port > b->port) + return 1; + return 0; +} + +DECLARE_SORTLIST_UNIQ(bmp_listeners, struct bmp_listener, bli, bmp_listener_cmp) + +static int bmp_targets_cmp(const struct bmp_targets *a, + const struct bmp_targets *b) +{ + return strcmp(a->name, b->name); +} + +DECLARE_SORTLIST_UNIQ(bmp_targets, struct bmp_targets, bti, bmp_targets_cmp) + +DECLARE_LIST(bmp_session, struct bmp, bsi) + +DECLARE_DLIST(bmp_qlist, struct bmp_queue_entry, bli) + +static int bmp_qhash_cmp(const struct bmp_queue_entry *a, + const struct bmp_queue_entry *b) +{ + int ret; + ret = prefix_cmp(&a->p, &b->p); + if (ret) + return ret; + ret = memcmp(&a->peerid, &b->peerid, + offsetof(struct bmp_queue_entry, refcount) - + offsetof(struct bmp_queue_entry, peerid)); + return ret; +} + +static uint32_t bmp_qhash_hkey(const struct bmp_queue_entry *e) +{ + uint32_t key; + + key = prefix_hash_key((void *)&e->p); + key = jhash(&e->peerid, + offsetof(struct bmp_queue_entry, refcount) - + offsetof(struct bmp_queue_entry, peerid), + key); + return key; +} + +DECLARE_HASH(bmp_qhash, struct bmp_queue_entry, bhi, + bmp_qhash_cmp, bmp_qhash_hkey) + +static int bmp_active_cmp(const struct bmp_active *a, + const struct bmp_active *b) +{ + int c; + + c = strcmp(a->hostname, b->hostname); + if (c) + return c; + if (a->port < b->port) + return -1; + if (a->port > b->port) + return 1; + return 0; +} + +DECLARE_SORTLIST_UNIQ(bmp_actives, struct bmp_active, bai, bmp_active_cmp) + +static struct bmp *bmp_new(struct bmp_targets *bt, int bmp_sock) +{ + struct bmp *new = XCALLOC(MTYPE_BMP_CONN, sizeof(struct bmp)); + afi_t afi; + safi_t safi; + + monotime(&new->t_up); + new->targets = bt; + new->socket = bmp_sock; + new->syncafi = AFI_MAX; + + FOREACH_AFI_SAFI (afi, safi) { + new->afistate[afi][safi] = bt->afimon[afi][safi] + ? BMP_AFI_NEEDSYNC : BMP_AFI_INACTIVE; + } + + bmp_session_add_tail(&bt->sessions, new); + return new; +} + +static void bmp_free(struct bmp *bmp) +{ + bmp_session_del(&bmp->targets->sessions, bmp); + XFREE(MTYPE_BMP_CONN, bmp); +} + +static void bmp_common_hdr(struct stream *s, uint8_t ver, uint8_t type) +{ + stream_putc(s, ver); + stream_putl(s, 0); //dummy message length. will be set later. + stream_putc(s, type); +} + +static void bmp_per_peer_hdr(struct stream *s, struct peer *peer, + uint8_t flags, const struct timeval *tv) +{ + char peer_distinguisher[8]; + +#define BMP_PEER_TYPE_GLOBAL_INSTANCE 0 +#define BMP_PEER_TYPE_RD_INSTANCE 1 +#define BMP_PEER_TYPE_LOCAL_INSTANCE 2 + +#define BMP_PEER_FLAG_V (1 << 7) +#define BMP_PEER_FLAG_L (1 << 6) +#define BMP_PEER_FLAG_A (1 << 5) + + /* Peer Type */ + stream_putc(s, BMP_PEER_TYPE_GLOBAL_INSTANCE); + + /* Peer Flags */ + if (peer->su.sa.sa_family == AF_INET6) + SET_FLAG(flags, BMP_PEER_FLAG_V); + else + UNSET_FLAG(flags, BMP_PEER_FLAG_V); + stream_putc(s, flags); + + /* Peer Distinguisher */ + memset (&peer_distinguisher[0], 0, 8); + stream_put(s, &peer_distinguisher[0], 8); + + /* Peer Address */ + if (peer->su.sa.sa_family == AF_INET6) + stream_put(s, &peer->su.sin6.sin6_addr, 16); + else if (peer->su.sa.sa_family == AF_INET) { + stream_putl(s, 0); + stream_putl(s, 0); + stream_putl(s, 0); + stream_put_in_addr(s, &peer->su.sin.sin_addr); + } else { + stream_putl(s, 0); + stream_putl(s, 0); + stream_putl(s, 0); + stream_putl(s, 0); + } + + /* Peer AS */ + stream_putl(s, peer->as); + + /* Peer BGP ID */ + stream_put_in_addr(s, &peer->remote_id); + + /* Timestamp */ + if (tv) { + stream_putl(s, tv->tv_sec); + stream_putl(s, tv->tv_usec); + } else { + stream_putl(s, 0); + stream_putl(s, 0); + } +} + +static void bmp_put_info_tlv(struct stream *s, uint16_t type, + const char *string) +{ + int len = strlen (string); + stream_putw(s, type); + stream_putw(s, len); + stream_put(s, string, len); +} + +static int bmp_send_initiation(struct bmp *bmp) +{ + int len; + struct stream *s; + s = stream_new(BGP_MAX_PACKET_SIZE); + bmp_common_hdr(s, BMP_VERSION_3, BMP_TYPE_INITIATION); + +#define BMP_INFO_TYPE_SYSDESCR 1 +#define BMP_INFO_TYPE_SYSNAME 2 + bmp_put_info_tlv(s, BMP_INFO_TYPE_SYSDESCR, + FRR_FULL_NAME " " FRR_VER_SHORT); + bmp_put_info_tlv(s, BMP_INFO_TYPE_SYSNAME, cmd_hostname_get()); + + len = stream_get_endp(s); + stream_putl_at(s, BMP_LENGTH_POS, len); //message length is set. + + pullwr_write_stream(bmp->pullwr, s); + stream_free(s); + return 0; +} + +static void bmp_notify_put(struct stream *s, struct bgp_notify *nfy) +{ + size_t len_pos; + uint8_t marker[16] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + }; + + stream_put(s, marker, sizeof(marker)); + len_pos = stream_get_endp(s); + stream_putw(s, 0); + stream_putc(s, BGP_MSG_NOTIFY); + stream_putc(s, nfy->code); + stream_putc(s, nfy->subcode); + stream_put(s, nfy->data, nfy->length); + + stream_putw_at(s, len_pos, stream_get_endp(s) - len_pos + + sizeof(marker)); +} + +static struct stream *bmp_peerstate(struct peer *peer, bool down) +{ + struct stream *s; + size_t len; + struct timeval uptime, uptime_real; + + uptime.tv_sec = peer->uptime; + uptime.tv_usec = 0; + monotime_to_realtime(&uptime, &uptime_real); + +#define BGP_BMP_MAX_PACKET_SIZE 1024 + s = stream_new(BGP_MAX_PACKET_SIZE); + + if (peer->status == Established && !down) { + struct bmp_bgp_peer *bbpeer; + + bmp_common_hdr(s, BMP_VERSION_3, + BMP_TYPE_PEER_UP_NOTIFICATION); + bmp_per_peer_hdr(s, peer, 0, &uptime_real); + + /* Local Address (16 bytes) */ + if (peer->su_local->sa.sa_family == AF_INET6) + stream_put(s, &peer->su_local->sin6.sin6_addr, 16); + else if (peer->su_local->sa.sa_family == AF_INET) { + stream_putl(s, 0); + stream_putl(s, 0); + stream_putl(s, 0); + stream_put_in_addr(s, &peer->su_local->sin.sin_addr); + } + + /* Local Port, Remote Port */ + if (peer->su_local->sa.sa_family == AF_INET6) + stream_putw(s, peer->su_local->sin6.sin6_port); + else if (peer->su_local->sa.sa_family == AF_INET) + stream_putw(s, peer->su_local->sin.sin_port); + if (peer->su_remote->sa.sa_family == AF_INET6) + stream_putw(s, peer->su_remote->sin6.sin6_port); + else if (peer->su_remote->sa.sa_family == AF_INET) + stream_putw(s, peer->su_remote->sin.sin_port); + + static const uint8_t dummy_open[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x13, 0x01, + }; + + bbpeer = bmp_bgp_peer_find(peer->qobj_node.nid); + + if (bbpeer && bbpeer->open_tx) + stream_put(s, bbpeer->open_tx, bbpeer->open_tx_len); + else { + stream_put(s, dummy_open, sizeof(dummy_open)); + zlog_warn("bmp: missing TX OPEN message for peer %s\n", + peer->host); + } + if (bbpeer && bbpeer->open_rx) + stream_put(s, bbpeer->open_rx, bbpeer->open_rx_len); + else { + stream_put(s, dummy_open, sizeof(dummy_open)); + zlog_warn("bmp: missing RX OPEN message for peer %s\n", + peer->host); + } + + if (peer->desc) + bmp_put_info_tlv(s, 0, peer->desc); + } else { + uint8_t type; + size_t type_pos; + + bmp_common_hdr(s, BMP_VERSION_3, + BMP_TYPE_PEER_DOWN_NOTIFICATION); + bmp_per_peer_hdr(s, peer, 0, &uptime_real); + + type_pos = stream_get_endp(s); + stream_putc(s, 0); /* placeholder for down reason */ + + switch (peer->last_reset) { + case PEER_DOWN_NOTIFY_RECEIVED: + type = BMP_PEERDOWN_REMOTE_NOTIFY; + bmp_notify_put(s, &peer->notify); + break; + case PEER_DOWN_CLOSE_SESSION: + type = BMP_PEERDOWN_REMOTE_CLOSE; + break; + default: + type = BMP_PEERDOWN_LOCAL_NOTIFY; + stream_put(s, peer->last_reset_cause, + peer->last_reset_cause_size); + break; + } + stream_putc_at(s, type_pos, type); + } + + len = stream_get_endp(s); + stream_putl_at(s, BMP_LENGTH_POS, len); //message length is set. + return s; +} + + +static int bmp_send_peerup(struct bmp *bmp) +{ + struct peer *peer; + struct listnode *node; + struct stream *s; + + /* Walk down all peers */ + for (ALL_LIST_ELEMENTS_RO(bmp->targets->bgp->peer, node, peer)) { + s = bmp_peerstate(peer, false); + pullwr_write_stream(bmp->pullwr, s); + stream_free(s); + } + + return 0; +} + +/* XXX: kludge - filling the pullwr's buffer */ +static void bmp_send_all(struct bmp_bgp *bmpbgp, struct stream *s) +{ + struct bmp_targets *bt; + struct bmp *bmp; + + frr_each(bmp_targets, &bmpbgp->targets, bt) + frr_each(bmp_session, &bt->sessions, bmp) + pullwr_write_stream(bmp->pullwr, s); + stream_free(s); +} + +/* + * Route Mirroring + */ + +#define BMP_MIRROR_TLV_TYPE_BGP_MESSAGE 0 +#define BMP_MIRROR_TLV_TYPE_INFO 1 + +#define BMP_MIRROR_INFO_CODE_ERRORPDU 0 +#define BMP_MIRROR_INFO_CODE_LOSTMSGS 1 + +static struct bmp_mirrorq *bmp_pull_mirror(struct bmp *bmp) +{ + struct bmp_mirrorq *bmq; + + bmq = bmp->mirrorpos; + if (!bmq) + return NULL; + + bmp->mirrorpos = bmp_mirrorq_next(&bmp->targets->bmpbgp->mirrorq, bmq); + + bmq->refcount--; + if (!bmq->refcount) { + bmp->targets->bmpbgp->mirror_qsize -= sizeof(*bmq) + bmq->len; + bmp_mirrorq_del(&bmp->targets->bmpbgp->mirrorq, bmq); + } + return bmq; +} + +static void bmp_mirror_cull(struct bmp_bgp *bmpbgp) +{ + while (bmpbgp->mirror_qsize > bmpbgp->mirror_qsizelimit) { + struct bmp_mirrorq *bmq, *inner; + struct bmp_targets *bt; + struct bmp *bmp; + + bmq = bmp_mirrorq_first(&bmpbgp->mirrorq); + + frr_each(bmp_targets, &bmpbgp->targets, bt) { + if (!bt->mirror) + continue; + frr_each(bmp_session, &bt->sessions, bmp) { + if (bmp->mirrorpos != bmq) + continue; + + while ((inner = bmp_pull_mirror(bmp))) { + if (!inner->refcount) + XFREE(MTYPE_BMP_MIRRORQ, + inner); + } + + zlog_warn("bmp[%s] lost mirror messages due to buffer size limit", + bmp->remote); + bmp->mirror_lost = true; + pullwr_bump(bmp->pullwr); + } + } + } +} + +static int bmp_mirror_packet(struct peer *peer, uint8_t type, bgp_size_t size, + struct stream *packet) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(peer->bgp); + struct timeval tv; + struct bmp_mirrorq *qitem; + struct bmp_targets *bt; + struct bmp *bmp; + + gettimeofday(&tv, NULL); + + if (type == BGP_MSG_OPEN) { + struct bmp_bgp_peer *bbpeer = bmp_bgp_peer_get(peer); + + XFREE(MTYPE_BMP_OPEN, bbpeer->open_rx); + + bbpeer->open_rx_len = size; + bbpeer->open_rx = XMALLOC(MTYPE_BMP_OPEN, size); + memcpy(bbpeer->open_rx, packet->data, size); + } + + if (!bmpbgp) + return 0; + + qitem = XCALLOC(MTYPE_BMP_MIRRORQ, sizeof(*qitem) + size); + qitem->peerid = peer->qobj_node.nid; + qitem->tv = tv; + qitem->len = size; + memcpy(qitem->data, packet->data, size); + + frr_each(bmp_targets, &bmpbgp->targets, bt) { + if (!bt->mirror) + continue; + frr_each(bmp_session, &bt->sessions, bmp) { + qitem->refcount++; + if (!bmp->mirrorpos) + bmp->mirrorpos = qitem; + pullwr_bump(bmp->pullwr); + } + } + if (qitem->refcount == 0) + XFREE(MTYPE_BMP_MIRRORQ, qitem); + else { + bmpbgp->mirror_qsize += sizeof(*qitem) + size; + bmp_mirrorq_add_tail(&bmpbgp->mirrorq, qitem); + + bmp_mirror_cull(bmpbgp); + + bmpbgp->mirror_qsizemax = MAX(bmpbgp->mirror_qsizemax, + bmpbgp->mirror_qsize); + } + return 0; +} + +static void bmp_wrmirror_lost(struct bmp *bmp, struct pullwr *pullwr) +{ + struct stream *s; + struct timeval tv; + + gettimeofday(&tv, NULL); + + s = stream_new(BGP_MAX_PACKET_SIZE); + + bmp_common_hdr(s, BMP_VERSION_3, BMP_TYPE_ROUTE_MIRRORING); + bmp_per_peer_hdr(s, bmp->targets->bgp->peer_self, 0, &tv); + + stream_putw(s, BMP_MIRROR_TLV_TYPE_INFO); + stream_putw(s, 2); + stream_putw(s, BMP_MIRROR_INFO_CODE_LOSTMSGS); + stream_putl_at(s, BMP_LENGTH_POS, stream_get_endp(s)); + + bmp->cnt_mirror_overruns++; + pullwr_write_stream(bmp->pullwr, s); + stream_free(s); +} + +static bool bmp_wrmirror(struct bmp *bmp, struct pullwr *pullwr) +{ + struct bmp_mirrorq *bmq; + struct peer *peer; + bool written = false; + + if (bmp->mirror_lost) { + bmp_wrmirror_lost(bmp, pullwr); + bmp->mirror_lost = false; + return true; + } + + bmq = bmp_pull_mirror(bmp); + if (!bmq) + return false; + + peer = QOBJ_GET_TYPESAFE(bmq->peerid, peer); + if (!peer) { + zlog_info("bmp: skipping mirror message for deleted peer"); + goto out; + } + + struct stream *s; + s = stream_new(BGP_MAX_PACKET_SIZE); + + bmp_common_hdr(s, BMP_VERSION_3, BMP_TYPE_ROUTE_MIRRORING); + bmp_per_peer_hdr(s, peer, 0, &bmq->tv); + + /* BMP Mirror TLV. */ + stream_putw(s, BMP_MIRROR_TLV_TYPE_BGP_MESSAGE); + stream_putw(s, bmq->len); + stream_putl_at(s, BMP_LENGTH_POS, stream_get_endp(s) + bmq->len); + + bmp->cnt_mirror++; + pullwr_write_stream(bmp->pullwr, s); + pullwr_write(bmp->pullwr, bmq->data, bmq->len); + + stream_free(s); + written = true; + +out: + if (!bmq->refcount) + XFREE(MTYPE_BMP_MIRRORQ, bmq); + return written; +} + +static int bmp_outgoing_packet(struct peer *peer, uint8_t type, bgp_size_t size, + struct stream *packet) +{ + if (type == BGP_MSG_OPEN) { + struct bmp_bgp_peer *bbpeer = bmp_bgp_peer_get(peer); + + XFREE(MTYPE_BMP_OPEN, bbpeer->open_tx); + + bbpeer->open_tx_len = size; + bbpeer->open_tx = XMALLOC(MTYPE_BMP_OPEN, size); + memcpy(bbpeer->open_tx, packet->data, size); + } + return 0; +} + +static int bmp_peer_established(struct peer *peer) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(peer->bgp); + + if (!bmpbgp) + return 0; + + if (peer->doppelganger && (peer->doppelganger->status != Deleted)) { + struct bmp_bgp_peer *bbpeer, *bbdopp; + + bbpeer = bmp_bgp_peer_get(peer); + bbdopp = bmp_bgp_peer_find(peer->doppelganger->qobj_node.nid); + if (bbdopp) { + XFREE(MTYPE_BMP_OPEN, bbpeer->open_tx); + XFREE(MTYPE_BMP_OPEN, bbpeer->open_rx); + + bbpeer->open_tx = bbdopp->open_tx; + bbpeer->open_tx_len = bbdopp->open_tx_len; + bbpeer->open_rx = bbdopp->open_rx; + bbpeer->open_rx_len = bbdopp->open_rx_len; + + bmp_peerh_del(&bmp_peerh, bbdopp); + XFREE(MTYPE_BMP_PEER, bbdopp); + } + } + + bmp_send_all(bmpbgp, bmp_peerstate(peer, false)); + return 0; +} + +static int bmp_peer_backward(struct peer *peer) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(peer->bgp); + struct bmp_bgp_peer *bbpeer; + + if (!bmpbgp) + return 0; + + bbpeer = bmp_bgp_peer_find(peer->qobj_node.nid); + if (bbpeer) { + XFREE(MTYPE_BMP_OPEN, bbpeer->open_tx); + bbpeer->open_tx_len = 0; + XFREE(MTYPE_BMP_OPEN, bbpeer->open_rx); + bbpeer->open_rx_len = 0; + } + + bmp_send_all(bmpbgp, bmp_peerstate(peer, true)); + return 0; +} + +static void bmp_eor(struct bmp *bmp, afi_t afi, safi_t safi, uint8_t flags) +{ + struct peer *peer; + struct listnode *node; + struct stream *s, *s2; + iana_afi_t pkt_afi; + iana_safi_t pkt_safi; + + s = stream_new(BGP_MAX_PACKET_SIZE); + + /* Make BGP update packet. */ + bgp_packet_set_marker(s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length */ + stream_putw(s, 0); + + if (afi == AFI_IP && safi == SAFI_UNICAST) { + /* Total Path Attribute Length */ + stream_putw(s, 0); + } else { + /* Convert AFI, SAFI to values for packet. */ + bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); + + /* Total Path Attribute Length */ + stream_putw(s, 6); + stream_putc(s, BGP_ATTR_FLAG_OPTIONAL); + stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI); + stream_putc(s, 3); + stream_putw(s, pkt_afi); + stream_putc(s, pkt_safi); + } + + bgp_packet_set_size(s); + + for (ALL_LIST_ELEMENTS_RO(bmp->targets->bgp->peer, node, peer)) { + if (!peer->afc_nego[afi][safi]) + continue; + + s2 = stream_new(BGP_MAX_PACKET_SIZE); + + bmp_common_hdr(s2, BMP_VERSION_3, + BMP_TYPE_ROUTE_MONITORING); + bmp_per_peer_hdr(s2, peer, flags, NULL); + + stream_putl_at(s2, BMP_LENGTH_POS, + stream_get_endp(s) + stream_get_endp(s2)); + + bmp->cnt_update++; + pullwr_write_stream(bmp->pullwr, s2); + pullwr_write_stream(bmp->pullwr, s); + stream_free(s2); + } + stream_free(s); +} + +static struct stream *bmp_update(struct prefix *p, struct peer *peer, + struct attr *attr, afi_t afi, safi_t safi) +{ + struct bpacket_attr_vec_arr vecarr; + struct stream *s; + size_t attrlen_pos = 0, mpattrlen_pos = 0; + bgp_size_t total_attr_len = 0; + + bpacket_attr_vec_arr_reset(&vecarr); + + s = stream_new(BGP_MAX_PACKET_SIZE); + bgp_packet_set_marker(s, BGP_MSG_UPDATE); + + /* 2: withdrawn routes length */ + stream_putw(s, 0); + + /* 3: total attributes length - attrlen_pos stores the position */ + attrlen_pos = stream_get_endp(s); + stream_putw(s, 0); + + /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */ + total_attr_len = bgp_packet_attribute(NULL, peer, s, attr, + &vecarr, NULL, afi, safi, peer, NULL, NULL, 0, 0, 0); + + /* space check? */ + + /* peer_cap_enhe & add-path removed */ + if (afi == AFI_IP && safi == SAFI_UNICAST) + stream_put_prefix(s, p); + else { + size_t p1 = stream_get_endp(s); + + /* MPLS removed for now */ + + mpattrlen_pos = bgp_packet_mpattr_start(s, peer, afi, safi, + &vecarr, attr); + bgp_packet_mpattr_prefix(s, afi, safi, p, NULL, NULL, 0, + 0, 0, attr); + bgp_packet_mpattr_end(s, mpattrlen_pos); + total_attr_len += stream_get_endp(s) - p1; + } + + /* set the total attribute length correctly */ + stream_putw_at(s, attrlen_pos, total_attr_len); + bgp_packet_set_size(s); + return s; +} + +static struct stream *bmp_withdraw(struct prefix *p, afi_t afi, safi_t safi) +{ + struct stream *s; + size_t attrlen_pos = 0, mp_start, mplen_pos; + bgp_size_t total_attr_len = 0; + bgp_size_t unfeasible_len; + + s = stream_new(BGP_MAX_PACKET_SIZE); + + bgp_packet_set_marker(s, BGP_MSG_UPDATE); + stream_putw(s, 0); + + if (afi == AFI_IP && safi == SAFI_UNICAST) { + stream_put_prefix(s, p); + unfeasible_len = stream_get_endp(s) - BGP_HEADER_SIZE + - BGP_UNFEASIBLE_LEN; + stream_putw_at(s, BGP_HEADER_SIZE, unfeasible_len); + stream_putw(s, 0); + } else { + attrlen_pos = stream_get_endp(s); + /* total attr length = 0 for now. reevaluate later */ + stream_putw(s, 0); + mp_start = stream_get_endp(s); + mplen_pos = bgp_packet_mpunreach_start(s, afi, safi); + + bgp_packet_mpunreach_prefix(s, p, afi, safi, NULL, NULL, 0, + 0, 0, NULL); + /* Set the mp_unreach attr's length */ + bgp_packet_mpunreach_end(s, mplen_pos); + + /* Set total path attribute length. */ + total_attr_len = stream_get_endp(s) - mp_start; + stream_putw_at(s, attrlen_pos, total_attr_len); + } + + bgp_packet_set_size(s); + return s; +} + +static void bmp_monitor(struct bmp *bmp, struct peer *peer, uint8_t flags, + struct prefix *p, struct attr *attr, afi_t afi, + safi_t safi, time_t uptime) +{ + struct stream *hdr, *msg; + struct timeval tv = { .tv_sec = uptime, .tv_usec = 0 }; + + if (attr) + msg = bmp_update(p, peer, attr, afi, safi); + else + msg = bmp_withdraw(p, afi, safi); + + hdr = stream_new(BGP_MAX_PACKET_SIZE); + bmp_common_hdr(hdr, BMP_VERSION_3, BMP_TYPE_ROUTE_MONITORING); + bmp_per_peer_hdr(hdr, peer, flags, &tv); + + stream_putl_at(hdr, BMP_LENGTH_POS, + stream_get_endp(hdr) + stream_get_endp(msg)); + + bmp->cnt_update++; + pullwr_write_stream(bmp->pullwr, hdr); + pullwr_write_stream(bmp->pullwr, msg); + stream_free(hdr); + stream_free(msg); +} + +static bool bmp_wrsync(struct bmp *bmp, struct pullwr *pullwr) +{ + afi_t afi; + safi_t safi; + + if (bmp->syncafi == AFI_MAX) { + FOREACH_AFI_SAFI (afi, safi) { + if (bmp->afistate[afi][safi] != BMP_AFI_NEEDSYNC) + continue; + + bmp->afistate[afi][safi] = BMP_AFI_SYNC; + + bmp->syncafi = afi; + bmp->syncsafi = safi; + bmp->syncpeerid = 0; + memset(&bmp->syncpos, 0, sizeof(bmp->syncpos)); + bmp->syncpos.family = afi2family(afi); + zlog_info("bmp[%s] %s %s sending table", + bmp->remote, + afi2str(bmp->syncafi), + safi2str(bmp->syncsafi)); + /* break does not work here, 2 loops... */ + goto afibreak; + } + if (bmp->syncafi == AFI_MAX) + return false; + } + +afibreak: + afi = bmp->syncafi; + safi = bmp->syncsafi; + + if (!bmp->targets->afimon[afi][safi]) { + /* shouldn't happen */ + bmp->afistate[afi][safi] = BMP_AFI_INACTIVE; + bmp->syncafi = AFI_MAX; + bmp->syncsafi = SAFI_MAX; + return true; + } + + struct bgp_table *table = bmp->targets->bgp->rib[afi][safi]; + struct bgp_node *bn; + struct bgp_path_info *bpi = NULL, *bpiter; + struct bgp_adj_in *adjin = NULL, *adjiter; + + bn = bgp_node_lookup(table, &bmp->syncpos); + do { + if (!bn) { + bn = bgp_table_get_next(table, &bmp->syncpos); + if (!bn) { + zlog_info("bmp[%s] %s %s table completed (EoR)", + bmp->remote, afi2str(afi), + safi2str(safi)); + bmp_eor(bmp, afi, safi, BMP_PEER_FLAG_L); + bmp_eor(bmp, afi, safi, 0); + + bmp->afistate[afi][safi] = BMP_AFI_LIVE; + bmp->syncafi = AFI_MAX; + bmp->syncsafi = SAFI_MAX; + return true; + } + bmp->syncpeerid = 0; + prefix_copy(&bmp->syncpos, &bn->p); + } + + if (bmp->targets->afimon[afi][safi] & BMP_MON_POSTPOLICY) { + for (bpiter = bn->info; bpiter; bpiter = bpiter->next) { + if (!CHECK_FLAG(bpiter->flags, BGP_PATH_VALID)) + continue; + if (bpiter->peer->qobj_node.nid + <= bmp->syncpeerid) + continue; + if (bpi && bpiter->peer->qobj_node.nid + > bpi->peer->qobj_node.nid) + continue; + bpi = bpiter; + } + } + if (bmp->targets->afimon[afi][safi] & BMP_MON_PREPOLICY) { + for (adjiter = bn->adj_in; adjiter; + adjiter = adjiter->next) { + if (adjiter->peer->qobj_node.nid + <= bmp->syncpeerid) + continue; + if (adjin && adjiter->peer->qobj_node.nid + > adjin->peer->qobj_node.nid) + continue; + adjin = adjiter; + } + } + if (bpi || adjin) + break; + + bn = NULL; + } while (1); + + if (adjin && bpi + && adjin->peer->qobj_node.nid < bpi->peer->qobj_node.nid) { + bpi = NULL; + bmp->syncpeerid = adjin->peer->qobj_node.nid; + } else if (adjin && bpi + && adjin->peer->qobj_node.nid > bpi->peer->qobj_node.nid) { + adjin = NULL; + bmp->syncpeerid = bpi->peer->qobj_node.nid; + } else if (bpi) { + bmp->syncpeerid = bpi->peer->qobj_node.nid; + } else if (adjin) { + bmp->syncpeerid = adjin->peer->qobj_node.nid; + } + + if (bpi) + bmp_monitor(bmp, bpi->peer, BMP_PEER_FLAG_L, &bn->p, bpi->attr, + afi, safi, bpi->uptime); + if (adjin) + bmp_monitor(bmp, adjin->peer, 0, &bn->p, adjin->attr, + afi, safi, adjin->uptime); + + return true; +} + +static struct bmp_queue_entry *bmp_pull(struct bmp *bmp) +{ + struct bmp_queue_entry *bqe; + + bqe = bmp->queuepos; + if (!bqe) + return NULL; + + bmp->queuepos = bmp_qlist_next(&bmp->targets->updlist, bqe); + + bqe->refcount--; + if (!bqe->refcount) { + bmp_qhash_del(&bmp->targets->updhash, bqe); + bmp_qlist_del(&bmp->targets->updlist, bqe); + } + return bqe; +} + +static bool bmp_wrqueue(struct bmp *bmp, struct pullwr *pullwr) +{ + struct bmp_queue_entry *bqe; + struct peer *peer; + struct bgp_node *bn; + bool written = false; + + bqe = bmp_pull(bmp); + if (!bqe) + return false; + + afi_t afi = bqe->afi; + safi_t safi = bqe->safi; + + switch (bmp->afistate[afi][safi]) { + case BMP_AFI_INACTIVE: + case BMP_AFI_NEEDSYNC: + goto out; + case BMP_AFI_SYNC: + if (prefix_cmp(&bqe->p, &bmp->syncpos) <= 0) + /* currently syncing but have already passed this + * prefix => send it. */ + break; + + /* currently syncing & haven't reached this prefix yet + * => it'll be sent as part of the table sync, no need here */ + goto out; + case BMP_AFI_LIVE: + break; + } + + peer = QOBJ_GET_TYPESAFE(bqe->peerid, peer); + if (!peer) { + zlog_info("bmp: skipping queued item for deleted peer"); + goto out; + } + if (peer->status != Established) + goto out; + + bn = bgp_node_lookup(bmp->targets->bgp->rib[afi][safi], &bqe->p); + + if (bmp->targets->afimon[afi][safi] & BMP_MON_POSTPOLICY) { + struct bgp_path_info *bpi; + + for (bpi = bn ? bn->info : NULL; bpi; bpi = bpi->next) { + if (!CHECK_FLAG(bpi->flags, BGP_PATH_VALID)) + continue; + if (bpi->peer == peer) + break; + } + + bmp_monitor(bmp, peer, BMP_PEER_FLAG_L, &bqe->p, + bpi ? bpi->attr : NULL, afi, safi, + bpi ? bpi->uptime : monotime(NULL)); + written = true; + } + + if (bmp->targets->afimon[afi][safi] & BMP_MON_PREPOLICY) { + struct bgp_adj_in *adjin; + + for (adjin = bn ? bn->adj_in : NULL; adjin; + adjin = adjin->next) { + if (adjin->peer == peer) + break; + } + bmp_monitor(bmp, peer, BMP_PEER_FLAG_L, &bqe->p, + adjin ? adjin->attr : NULL, afi, safi, + adjin ? adjin->uptime : monotime(NULL)); + written = true; + } + +out: + if (!bqe->refcount) + XFREE(MTYPE_BMP_QUEUE, bqe); + return written; +} + +static void bmp_wrfill(struct bmp *bmp, struct pullwr *pullwr) +{ + switch(bmp->state) { + case BMP_PeerUp: + bmp_send_peerup(bmp); + bmp->state = BMP_Run; + break; + + case BMP_Run: + if (bmp_wrmirror(bmp, pullwr)) + break; + if (bmp_wrqueue(bmp, pullwr)) + break; + if (bmp_wrsync(bmp, pullwr)) + break; + break; + } +} + +static void bmp_wrerr(struct bmp *bmp, struct pullwr *pullwr, bool eof) +{ + if (eof) + zlog_info("bmp[%s] disconnected", bmp->remote); + else + flog_warn(EC_LIB_SYSTEM_CALL, "bmp[%s] connection error: %s", + bmp->remote, strerror(errno)); + + bmp_close(bmp); + bmp_free(bmp); +} + +static void bmp_process_one(struct bmp_targets *bt, struct bgp *bgp, + afi_t afi, safi_t safi, struct bgp_node *bn, struct peer *peer) +{ + struct bmp *bmp; + struct bmp_queue_entry *bqe, bqeref; + size_t refcount; + char buf[256]; + + prefix2str(&bn->p, buf, sizeof(buf)); + + refcount = bmp_session_count(&bt->sessions); + if (refcount == 0) + return; + + memset(&bqeref, 0, sizeof(bqeref)); + prefix_copy(&bqeref.p, &bn->p); + bqeref.peerid = peer->qobj_node.nid; + bqeref.afi = afi; + bqeref.safi = safi; + + bqe = bmp_qhash_find(&bt->updhash, &bqeref); + if (bqe) { + if (bqe->refcount >= refcount) + /* nothing to do here */ + return; + + bmp_qlist_del(&bt->updlist, bqe); + } else { + bqe = XMALLOC(MTYPE_BMP_QUEUE, sizeof(*bqe)); + memcpy(bqe, &bqeref, sizeof(*bqe)); + + bmp_qhash_add(&bt->updhash, bqe); + } + + bqe->refcount = refcount; + bmp_qlist_add_tail(&bt->updlist, bqe); + + frr_each (bmp_session, &bt->sessions, bmp) + if (!bmp->queuepos) + bmp->queuepos = bqe; +} + +static int bmp_process(struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_node *bn, struct peer *peer, bool withdraw) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(peer->bgp); + struct bmp_targets *bt; + struct bmp *bmp; + + if (!bmpbgp) + return 0; + + frr_each(bmp_targets, &bmpbgp->targets, bt) { + if (!bt->afimon[afi][safi]) + continue; + + bmp_process_one(bt, bgp, afi, safi, bn, peer); + + frr_each(bmp_session, &bt->sessions, bmp) { + pullwr_bump(bmp->pullwr); + } + } + return 0; +} + +static void bmp_stat_put_u32(struct stream *s, size_t *cnt, uint16_t type, + uint32_t value) +{ + stream_putw(s, type); + stream_putw(s, 4); + stream_putl(s, value); + (*cnt)++; +} + +static int bmp_stats(struct thread *thread) +{ + struct bmp_targets *bt = THREAD_ARG(thread); + struct stream *s; + struct peer *peer; + struct listnode *node; + struct timeval tv; + + if (bt->stat_msec) + thread_add_timer_msec(bm->master, bmp_stats, bt, bt->stat_msec, + &bt->t_stats); + + gettimeofday(&tv, NULL); + + /* Walk down all peers */ + for (ALL_LIST_ELEMENTS_RO(bt->bgp->peer, node, peer)) { + size_t count = 0, count_pos, len; + + if (peer->status != Established) + continue; + + s = stream_new(BGP_MAX_PACKET_SIZE); + bmp_common_hdr(s, BMP_VERSION_3, BMP_TYPE_STATISTICS_REPORT); + bmp_per_peer_hdr(s, peer, 0, &tv); + + count_pos = stream_get_endp(s); + stream_putl(s, 0); + + bmp_stat_put_u32(s, &count, BMP_STATS_PFX_REJECTED, + peer->stat_pfx_filter); + bmp_stat_put_u32(s, &count, BMP_STATS_UPD_LOOP_ASPATH, + peer->stat_pfx_aspath_loop); + bmp_stat_put_u32(s, &count, BMP_STATS_UPD_LOOP_ORIGINATOR, + peer->stat_pfx_originator_loop); + bmp_stat_put_u32(s, &count, BMP_STATS_UPD_LOOP_CLUSTER, + peer->stat_pfx_cluster_loop); + bmp_stat_put_u32(s, &count, BMP_STATS_PFX_DUP_WITHDRAW, + peer->stat_pfx_dup_withdraw); + bmp_stat_put_u32(s, &count, BMP_STATS_UPD_7606_WITHDRAW, + peer->stat_upd_7606); + bmp_stat_put_u32(s, &count, BMP_STATS_FRR_NH_INVALID, + peer->stat_pfx_nh_invalid); + + stream_putl_at(s, count_pos, count); + + len = stream_get_endp(s); + stream_putl_at(s, BMP_LENGTH_POS, len); + + bmp_send_all(bt->bmpbgp, s); + } + return 0; +} + +static struct bmp *bmp_open(struct bmp_targets *bt, int bmp_sock) +{ + union sockunion su, *sumem; + struct prefix p; + int on = 1; + struct access_list *acl = NULL; + enum filter_type ret; + char buf[SU_ADDRSTRLEN]; + struct bmp *bmp; + + sumem = sockunion_getpeername(bmp_sock); + if (!sumem) { + close(bmp_sock); + return NULL; + } + memcpy(&su, sumem, sizeof(su)); + sockunion_free(sumem); + + set_nonblocking(bmp_sock); + set_cloexec(bmp_sock); + shutdown(bmp_sock, SHUT_RD); + + sockunion2hostprefix(&su, &p); + + acl = NULL; + switch (p.family) { + case AF_INET: + acl = access_list_lookup(AFI_IP, bt->acl_name); + break; + case AF_INET6: + acl = access_list_lookup(AFI_IP6, bt->acl6_name); + break; + default: + break; + } + + ret = FILTER_PERMIT; + if (acl) { + ret = access_list_apply(acl, &p); + } + + sockunion2str(&su, buf, SU_ADDRSTRLEN); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ":%u", + su.sa.sa_family == AF_INET + ? ntohs(su.sin.sin_port) + : ntohs(su.sin6.sin6_port)); + + if (ret == FILTER_DENY) { + bt->cnt_aclrefused++; + zlog_info("bmp[%s] connection refused by access-list", buf); + close(bmp_sock); + return NULL; + } + bt->cnt_accept++; + + setsockopt(bmp_sock, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); + setsockopt(bmp_sock, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); + + zlog_info("bmp[%s] connection established", buf); + + /* Allocate new BMP structure and set up default values. */ + bmp = bmp_new(bt, bmp_sock); + strlcpy(bmp->remote, buf, sizeof(bmp->remote)); + + bmp->state = BMP_PeerUp; + bmp->pullwr = pullwr_new(bm->master, bmp_sock, bmp, bmp_wrfill, + bmp_wrerr); + bmp_send_initiation(bmp); + + return bmp; +} + +/* Accept BMP connection. */ +static int bmp_accept(struct thread *thread) +{ + union sockunion su; + struct bmp_listener *bl = THREAD_ARG(thread); + int bmp_sock; + + /* We continue hearing BMP socket. */ + thread_add_read(bm->master, bmp_accept, bl, bl->sock, &bl->t_accept); + + memset(&su, 0, sizeof(union sockunion)); + + /* We can handle IPv4 or IPv6 socket. */ + bmp_sock = sockunion_accept(bl->sock, &su); + if (bmp_sock < 0) { + zlog_info("bmp: accept_sock failed: %s\n", + safe_strerror (errno)); + return -1; + } + bmp_open(bl->targets, bmp_sock); + return 0; +} + +static void bmp_close(struct bmp *bmp) +{ + struct bmp_queue_entry *bqe; + struct bmp_mirrorq *bmq; + + if (bmp->active) + bmp_active_disconnected(bmp->active); + + while ((bmq = bmp_pull_mirror(bmp))) + if (!bmq->refcount) + XFREE(MTYPE_BMP_MIRRORQ, bmq); + while ((bqe = bmp_pull(bmp))) + if (!bqe->refcount) + XFREE(MTYPE_BMP_QUEUE, bqe); + + THREAD_OFF(bmp->t_read); + pullwr_del(bmp->pullwr); + close(bmp->socket); +} + +static struct bmp_bgp *bmp_bgp_find(struct bgp *bgp) +{ + struct bmp_bgp dummy = { .bgp = bgp }; + return bmp_bgph_find(&bmp_bgph, &dummy); +} + +static struct bmp_bgp *bmp_bgp_get(struct bgp *bgp) +{ + struct bmp_bgp *bmpbgp; + + bmpbgp = bmp_bgp_find(bgp); + if (bmpbgp) + return bmpbgp; + + bmpbgp = XCALLOC(MTYPE_BMP, sizeof(*bmpbgp)); + bmpbgp->bgp = bgp; + bmpbgp->mirror_qsizelimit = ~0UL; + bmp_mirrorq_init(&bmpbgp->mirrorq); + bmp_bgph_add(&bmp_bgph, bmpbgp); + + return bmpbgp; +} + +static void bmp_bgp_put(struct bmp_bgp *bmpbgp) +{ + struct bmp_targets *bt; + + bmp_bgph_del(&bmp_bgph, bmpbgp); + + frr_each_safe(bmp_targets, &bmpbgp->targets, bt) + bmp_targets_put(bt); + + bmp_mirrorq_fini(&bmpbgp->mirrorq); + XFREE(MTYPE_BMP, bmpbgp); +} + +static int bmp_bgp_del(struct bgp *bgp) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(bgp); + + if (bmpbgp) + bmp_bgp_put(bmpbgp); + return 0; +} + +static struct bmp_bgp_peer *bmp_bgp_peer_find(uint64_t peerid) +{ + struct bmp_bgp_peer dummy = { .peerid = peerid }; + return bmp_peerh_find(&bmp_peerh, &dummy); +} + +static struct bmp_bgp_peer *bmp_bgp_peer_get(struct peer *peer) +{ + struct bmp_bgp_peer *bbpeer; + + bbpeer = bmp_bgp_peer_find(peer->qobj_node.nid); + if (bbpeer) + return bbpeer; + + bbpeer = XCALLOC(MTYPE_BMP_PEER, sizeof(*bbpeer)); + bbpeer->peerid = peer->qobj_node.nid; + bmp_peerh_add(&bmp_peerh, bbpeer); + + return bbpeer; +} + +static struct bmp_targets *bmp_targets_find1(struct bgp *bgp, const char *name) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(bgp); + struct bmp_targets dummy; + + if (!bmpbgp) + return NULL; + dummy.name = (char *)name; + return bmp_targets_find(&bmpbgp->targets, &dummy); +} + +static struct bmp_targets *bmp_targets_get(struct bgp *bgp, const char *name) +{ + struct bmp_targets *bt; + + bt = bmp_targets_find1(bgp, name); + if (bt) + return bt; + + bt = XCALLOC(MTYPE_BMP_TARGETS, sizeof(*bt)); + bt->name = XSTRDUP(MTYPE_BMP_TARGETSNAME, name); + bt->bgp = bgp; + bt->bmpbgp = bmp_bgp_get(bgp); + bmp_session_init(&bt->sessions); + bmp_qhash_init(&bt->updhash); + bmp_qlist_init(&bt->updlist); + bmp_actives_init(&bt->actives); + bmp_listeners_init(&bt->listeners); + + QOBJ_REG(bt, bmp_targets); + bmp_targets_add(&bt->bmpbgp->targets, bt); + return bt; +} + +static void bmp_targets_put(struct bmp_targets *bt) +{ + struct bmp *bmp; + struct bmp_active *ba; + + frr_each_safe (bmp_actives, &bt->actives, ba) + bmp_active_put(ba); + + frr_each_safe(bmp_session, &bt->sessions, bmp) { + bmp_close(bmp); + bmp_free(bmp); + } + + bmp_targets_del(&bt->bmpbgp->targets, bt); + QOBJ_UNREG(bt); + + bmp_listeners_fini(&bt->listeners); + bmp_actives_fini(&bt->actives); + bmp_qhash_fini(&bt->updhash); + bmp_qlist_fini(&bt->updlist); + + XFREE(MTYPE_BMP_ACLNAME, bt->acl_name); + XFREE(MTYPE_BMP_ACLNAME, bt->acl6_name); + bmp_session_fini(&bt->sessions); + + XFREE(MTYPE_BMP_TARGETSNAME, bt->name); + XFREE(MTYPE_BMP_TARGETS, bt); +} + +static struct bmp_listener *bmp_listener_find(struct bmp_targets *bt, + const union sockunion *su, + int port) +{ + struct bmp_listener dummy; + dummy.addr = *su; + dummy.port = port; + return bmp_listeners_find(&bt->listeners, &dummy); +} + +static struct bmp_listener *bmp_listener_get(struct bmp_targets *bt, + const union sockunion *su, + int port) +{ + struct bmp_listener *bl = bmp_listener_find(bt, su, port); + + if (bl) + return bl; + + bl = XCALLOC(MTYPE_BMP_LISTENER, sizeof(*bl)); + bl->targets = bt; + bl->addr = *su; + bl->port = port; + bl->sock = -1; + + bmp_listeners_add(&bt->listeners, bl); + return bl; +} + +static void bmp_listener_put(struct bmp_listener *bl) +{ + bmp_listeners_del(&bl->targets->listeners, bl); + XFREE(MTYPE_BMP_LISTENER, bl); +} + +static void bmp_listener_start(struct bmp_listener *bl) +{ + int sock, ret; + + sock = socket(bl->addr.sa.sa_family, SOCK_STREAM, 0); + if (sock < 0) + return; + + sockopt_reuseaddr(sock); + sockopt_reuseport(sock); + sockopt_v6only(bl->addr.sa.sa_family, sock); + set_cloexec(sock); + + ret = sockunion_bind(sock, &bl->addr, bl->port, &bl->addr); + if (ret < 0) + goto out_sock; + + ret = listen(sock, 3); + if (ret < 0) + goto out_sock; + + bl->sock = sock; + thread_add_read(bm->master, bmp_accept, bl, sock, &bl->t_accept); + return; +out_sock: + close(sock); +} + +static void bmp_listener_stop(struct bmp_listener *bl) +{ + THREAD_OFF(bl->t_accept); + + if (bl->sock != -1) + close(bl->sock); + bl->sock = -1; +} + +static struct bmp_active *bmp_active_find(struct bmp_targets *bt, + const char *hostname, int port) +{ + struct bmp_active dummy; + dummy.hostname = (char *)hostname; + dummy.port = port; + return bmp_actives_find(&bt->actives, &dummy); +} + +static struct bmp_active *bmp_active_get(struct bmp_targets *bt, + const char *hostname, int port) +{ + struct bmp_active *ba; + + ba = bmp_active_find(bt, hostname, port); + if (ba) + return ba; + + ba = XCALLOC(MTYPE_BMP_ACTIVE, sizeof(*ba)); + ba->targets = bt; + ba->hostname = XSTRDUP(MTYPE_TMP, hostname); + ba->port = port; + ba->minretry = BMP_DFLT_MINRETRY; + ba->maxretry = BMP_DFLT_MAXRETRY; + ba->socket = -1; + + bmp_actives_add(&bt->actives, ba); + return ba; +} + +static void bmp_active_put(struct bmp_active *ba) +{ + THREAD_OFF(ba->t_timer); + THREAD_OFF(ba->t_read); + THREAD_OFF(ba->t_write); + + bmp_actives_del(&ba->targets->actives, ba); + + if (ba->bmp) { + ba->bmp->active = NULL; + bmp_close(ba->bmp); + bmp_free(ba->bmp); + } + if (ba->socket != -1) + close(ba->socket); + + XFREE(MTYPE_TMP, ba->hostname); + XFREE(MTYPE_BMP_ACTIVE, ba); +} + +static void bmp_active_setup(struct bmp_active *ba); + +static void bmp_active_connect(struct bmp_active *ba) +{ + enum connect_result res; + char buf[SU_ADDRSTRLEN]; + + for (; ba->addrpos < ba->addrtotal; ba->addrpos++) { + ba->socket = sockunion_socket(&ba->addrs[ba->addrpos]); + if (ba->socket < 0) { + zlog_warn("bmp[%s]: failed to create socket", + ba->hostname); + continue; + } + + set_nonblocking(ba->socket); + res = sockunion_connect(ba->socket, &ba->addrs[ba->addrpos], + htons(ba->port), 0); + switch (res) { + case connect_error: + sockunion2str(&ba->addrs[ba->addrpos], buf, + sizeof(buf)); + zlog_warn("bmp[%s]: failed to connect to %s:%d", + ba->hostname, buf, ba->port); + close(ba->socket); + ba->socket = -1; + continue; + case connect_success: + break; + case connect_in_progress: + bmp_active_setup(ba); + return; + } + } + + /* exhausted all addresses */ + ba->curretry += ba->curretry / 2; + bmp_active_setup(ba); +} + +static void bmp_active_resolved(struct resolver_query *resq, int numaddrs, + union sockunion *addr) +{ + struct bmp_active *ba = container_of(resq, struct bmp_active, resq); + unsigned i; + + if (numaddrs <= 0) { + zlog_warn("bmp[%s]: hostname resolution failed", ba->hostname); + ba->curretry += ba->curretry / 2; + bmp_active_setup(ba); + return; + } + if (numaddrs > (int)array_size(ba->addrs)) + numaddrs = array_size(ba->addrs); + + ba->addrpos = 0; + ba->addrtotal = numaddrs; + for (i = 0; i < ba->addrtotal; i++) + memcpy(&ba->addrs[i], &addr[i], sizeof(ba->addrs[0])); + + bmp_active_connect(ba); +} + +static int bmp_active_thread(struct thread *t) +{ + struct bmp_active *ba = THREAD_ARG(t); + socklen_t slen; + int status, ret; + char buf[SU_ADDRSTRLEN]; + + /* all 3 end up here, though only timer or read+write are active + * at a time */ + THREAD_OFF(ba->t_timer); + THREAD_OFF(ba->t_read); + THREAD_OFF(ba->t_write); + + if (ba->socket == -1) { + resolver_resolve(&ba->resq, AF_UNSPEC, ba->hostname, + bmp_active_resolved); + return 0; + } + + slen = sizeof(status); + ret = getsockopt(ba->socket, SOL_SOCKET, SO_ERROR, (void *)&status, + &slen); + + sockunion2str(&ba->addrs[ba->addrpos], buf, sizeof(buf)); + if (ret < 0 || status != 0) { + zlog_warn("bmp[%s]: failed to connect to %s:%d", + ba->hostname, buf, ba->port); + goto out_next; + } + + zlog_warn("bmp[%s]: outbound connection to %s:%d", + ba->hostname, buf, ba->port); + + ba->bmp = bmp_open(ba->targets, ba->socket); + if (!ba->bmp) + goto out_next; + + ba->bmp->active = ba; + ba->socket = -1; + ba->curretry = ba->minretry; + return 0; + +out_next: + close(ba->socket); + ba->socket = -1; + ba->addrpos++; + bmp_active_connect(ba); + return 0; +} + +static void bmp_active_disconnected(struct bmp_active *ba) +{ + ba->bmp = NULL; + bmp_active_setup(ba); +} + +static void bmp_active_setup(struct bmp_active *ba) +{ + THREAD_OFF(ba->t_timer); + THREAD_OFF(ba->t_read); + THREAD_OFF(ba->t_write); + + if (ba->bmp) + return; + if (ba->resq.callback) + return; + + if (ba->curretry > ba->maxretry) + ba->curretry = ba->maxretry; + + if (ba->socket == -1) + thread_add_timer_msec(bm->master, bmp_active_thread, ba, + ba->curretry, &ba->t_timer); + else { + thread_add_read(bm->master, bmp_active_thread, ba, ba->socket, + &ba->t_read); + thread_add_write(bm->master, bmp_active_thread, ba, ba->socket, + &ba->t_write); + } +} + +static struct cmd_node bmp_node = {BMP_NODE, "%s(config-bgp-bmp)# "}; + +#define BMP_STR "BGP Monitoring Protocol\n" + +#ifndef VTYSH_EXTRACT_PL +#include "bgp_bmp_clippy.c" +#endif + +DEFPY_NOSH(bmp_targets_main, + bmp_targets_cmd, + "bmp targets BMPTARGETS", + BMP_STR + "Create BMP target group\n" + "Name of the BMP target group\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + struct bmp_targets *bt; + + bt = bmp_targets_get(bgp, bmptargets); + + VTY_PUSH_CONTEXT_SUB(BMP_NODE, bt); + return CMD_SUCCESS; +} + +DEFPY(no_bmp_targets_main, + no_bmp_targets_cmd, + "no bmp targets BMPTARGETS", + NO_STR + BMP_STR + "Delete BMP target group\n" + "Name of the BMP target group\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + struct bmp_targets *bt; + + bt = bmp_targets_find1(bgp, bmptargets); + if (!bt) { + vty_out(vty, "%% BMP target group not found\n"); + return CMD_WARNING; + } + bmp_targets_put(bt); + return CMD_SUCCESS; +} + +DEFPY(bmp_listener_main, + bmp_listener_cmd, + "bmp listener port (1-65535)", + BMP_STR + "Listen for inbound BMP connections\n" + "IPv6 address to listen on\n" + "IPv4 address to listen on\n" + "TCP Port number\n" + "TCP Port number\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + struct bmp_listener *bl; + + bl = bmp_listener_get(bt, listener, port); + if (bl->sock == -1) + bmp_listener_start(bl); + + return CMD_SUCCESS; +} + +DEFPY(no_bmp_listener_main, + no_bmp_listener_cmd, + "no bmp listener port (1-65535)", + NO_STR + BMP_STR + "Create BMP listener\n" + "IPv6 address to listen on\n" + "IPv4 address to listen on\n" + "TCP Port number\n" + "TCP Port number\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + struct bmp_listener *bl; + + bl = bmp_listener_find(bt, listener, port); + if (!bl) { + vty_out(vty, "%% BMP listener not found\n"); + return CMD_WARNING; + } + bmp_listener_stop(bl); + bmp_listener_put(bl); + return CMD_SUCCESS; +} + +DEFPY(bmp_connect, + bmp_connect_cmd, + "[no] bmp connect HOSTNAME port (1-65535) " + "{min-retry (100-86400000)" + "|max-retry (100-86400000)}", + NO_STR + BMP_STR + "Actively establish connection to monitoring station\n" + "Monitoring station hostname or address\n" + "TCP port\n" + "TCP port\n" + "Minimum connection retry interval\n" + "Minimum connection retry interval (milliseconds)\n" + "Maximum connection retry interval\n" + "Maximum connection retry interval (milliseconds)\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + struct bmp_active *ba; + + if (no) { + ba = bmp_active_find(bt, hostname, port); + if (!ba) { + vty_out(vty, "%% No such active connection found\n"); + return CMD_WARNING; + } + bmp_active_put(ba); + return CMD_SUCCESS; + } + + ba = bmp_active_get(bt, hostname, port); + if (min_retry_str) + ba->minretry = min_retry; + if (max_retry_str) + ba->maxretry = max_retry; + ba->curretry = ba->minretry; + bmp_active_setup(ba); + + return CMD_SUCCESS; +} + +DEFPY(bmp_acl, + bmp_acl_cmd, + "[no] $af access-list WORD", + NO_STR + IP_STR + IPV6_STR + "Access list to restrict BMP sessions\n" + "Access list name\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + char **what; + + if (no) + access_list = NULL; + if (!strcmp(af, "ipv6")) + what = &bt->acl6_name; + else + what = &bt->acl_name; + + XFREE(MTYPE_BMP_ACLNAME, *what); + if (access_list) + *what = XSTRDUP(MTYPE_BMP_ACLNAME, access_list); + + return CMD_SUCCESS; +} + +DEFPY(bmp_stats_cfg, + bmp_stats_cmd, + "[no] bmp stats [interval (100-86400000)]", + NO_STR + BMP_STR + "Send BMP statistics messages\n" + "Specify BMP stats interval\n" + "Interval (milliseconds) to send BMP Stats in\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + + THREAD_OFF(bt->t_stats); + if (no) + bt->stat_msec = 0; + else if (interval_str) + bt->stat_msec = interval; + else + bt->stat_msec = BMP_STAT_DEFAULT_TIMER; + + if (bt->stat_msec) + thread_add_timer_msec(bm->master, bmp_stats, bt, bt->stat_msec, + &bt->t_stats); + return CMD_SUCCESS; +} + +DEFPY(bmp_monitor_cfg, + bmp_monitor_cmd, + "[no] bmp monitor "BGP_AFI_CMD_STR" $policy", + NO_STR + BMP_STR + "Send BMP route monitoring messages\n" + BGP_AFI_HELP_STR + "Address family modifier\n" + "Address family modifier\n" + "Send state before policy and filter processing\n" + "Send state with policy and filters applied\n") +{ + int index = 0; + uint8_t flag, prev; + afi_t afi; + safi_t safi; + + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + struct bmp *bmp; + + argv_find_and_parse_afi(argv, argc, &index, &afi); + argv_find_and_parse_safi(argv, argc, &index, &safi); + + if (policy[1] == 'r') + flag = BMP_MON_PREPOLICY; + else + flag = BMP_MON_POSTPOLICY; + + prev = bt->afimon[afi][safi]; + if (no) + bt->afimon[afi][safi] &= ~flag; + else + bt->afimon[afi][safi] |= flag; + + if (prev == bt->afimon[afi][safi]) + return CMD_SUCCESS; + + frr_each (bmp_session, &bt->sessions, bmp) { + if (bmp->syncafi == afi && bmp->syncsafi == safi) { + bmp->syncafi = AFI_MAX; + bmp->syncsafi = SAFI_MAX; + } + + if (!bt->afimon[afi][safi]) { + bmp->afistate[afi][safi] = BMP_AFI_INACTIVE; + continue; + } + + bmp->afistate[afi][safi] = BMP_AFI_NEEDSYNC; + } + + return CMD_SUCCESS; +} + +DEFPY(bmp_mirror_cfg, + bmp_mirror_cmd, + "[no] bmp mirror", + NO_STR + BMP_STR + "Send BMP route mirroring messages\n") +{ + VTY_DECLVAR_CONTEXT_SUB(bmp_targets, bt); + struct bmp *bmp; + + if (bt->mirror == !no) + return CMD_SUCCESS; + + bt->mirror = !no; + if (bt->mirror) + return CMD_SUCCESS; + + frr_each (bmp_session, &bt->sessions, bmp) { + struct bmp_mirrorq *bmq; + + while ((bmq = bmp_pull_mirror(bmp))) + if (!bmq->refcount) + XFREE(MTYPE_BMP_MIRRORQ, bmq); + } + return CMD_SUCCESS; +} + +DEFPY(bmp_mirror_limit_cfg, + bmp_mirror_limit_cmd, + "bmp mirror buffer-limit (0-4294967294)", + BMP_STR + "Route Mirroring settings\n" + "Configure maximum memory used for buffered mirroring messages\n" + "Limit in bytes\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + struct bmp_bgp *bmpbgp; + + bmpbgp = bmp_bgp_get(bgp); + bmpbgp->mirror_qsizelimit = buffer_limit; + + return CMD_SUCCESS; +} + +DEFPY(no_bmp_mirror_limit_cfg, + no_bmp_mirror_limit_cmd, + "no bmp mirror buffer-limit [(0-4294967294)]", + NO_STR + BMP_STR + "Route Mirroring settings\n" + "Configure maximum memory used for buffered mirroring messages\n" + "Limit in bytes\n") +{ + VTY_DECLVAR_CONTEXT(bgp, bgp); + struct bmp_bgp *bmpbgp; + + bmpbgp = bmp_bgp_get(bgp); + bmpbgp->mirror_qsizelimit = ~0UL; + + return CMD_SUCCESS; +} + + +DEFPY(show_bmp, + show_bmp_cmd, + "show bmp", + SHOW_STR + BMP_STR) +{ + struct bmp_bgp *bmpbgp; + struct bmp_targets *bt; + struct bmp_listener *bl; + struct bmp *bmp; + struct ttable *tt; + char buf[SU_ADDRSTRLEN]; + + frr_each(bmp_bgph, &bmp_bgph, bmpbgp) { + vty_out(vty, "BMP state for BGP %s:\n\n", + bmpbgp->bgp->name_pretty); + vty_out(vty, " Route Mirroring %9zu bytes (%zu messages) pending\n", + bmpbgp->mirror_qsize, + bmp_mirrorq_count(&bmpbgp->mirrorq)); + vty_out(vty, " %9zu bytes maximum buffer used\n", + bmpbgp->mirror_qsizemax); + if (bmpbgp->mirror_qsizelimit != ~0UL) + vty_out(vty, " %9zu bytes buffer size limit\n", + bmpbgp->mirror_qsizelimit); + vty_out(vty, "\n"); + + frr_each(bmp_targets, &bmpbgp->targets, bt) { + vty_out(vty, " Targets \"%s\":\n", bt->name); + vty_out(vty, " Route Mirroring %sabled\n", + bt->mirror ? "en" : "dis"); + + afi_t afi; + safi_t safi; + + FOREACH_AFI_SAFI (afi, safi) { + const char *str = NULL; + + switch (bt->afimon[afi][safi]) { + case BMP_MON_PREPOLICY: + str = "pre-policy"; + break; + case BMP_MON_POSTPOLICY: + str = "post-policy"; + break; + case BMP_MON_PREPOLICY | BMP_MON_POSTPOLICY: + str = "pre-policy and post-policy"; + break; + } + if (!str) + continue; + vty_out(vty, " Route Monitoring %s %s %s\n", + afi2str(afi), safi2str(safi), str); + } + + vty_out(vty, " Listeners:\n"); + frr_each (bmp_listeners, &bt->listeners, bl) + vty_out(vty, " %s:%d\n", + sockunion2str(&bl->addr, buf, + SU_ADDRSTRLEN), bl->port); + + vty_out(vty, "\n %zu connected clients:\n", + bmp_session_count(&bt->sessions)); + tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]); + ttable_add_row(tt, "remote|uptime|MonSent|MirrSent|MirrLost|ByteSent|ByteQ|ByteQKernel"); + ttable_rowseps(tt, 0, BOTTOM, true, '-'); + + frr_each (bmp_session, &bt->sessions, bmp) { + uint64_t total; + size_t q, kq; + + pullwr_stats(bmp->pullwr, &total, &q, &kq); + + ttable_add_row(tt, "%s|-|%Lu|%Lu|%Lu|%Lu|%zu|%zu", + bmp->remote, + bmp->cnt_update, + bmp->cnt_mirror, + bmp->cnt_mirror_overruns, + total, q, kq); + } + char *out = ttable_dump(tt, "\n"); + vty_out(vty, "%s", out); + XFREE(MTYPE_TMP, out); + ttable_del(tt); + vty_out(vty, "\n"); + } + } + + return CMD_SUCCESS; +} + +static int bmp_config_write(struct bgp *bgp, struct vty *vty) +{ + struct bmp_bgp *bmpbgp = bmp_bgp_find(bgp); + struct bmp_targets *bt; + struct bmp_listener *bl; + struct bmp_active *ba; + char buf[SU_ADDRSTRLEN]; + afi_t afi; + safi_t safi; + + if (!bmpbgp) + return 0; + + if (bmpbgp->mirror_qsizelimit != ~0UL) + vty_out(vty, " !\n bmp mirror buffer-limit %zu\n", + bmpbgp->mirror_qsizelimit); + + frr_each(bmp_targets, &bmpbgp->targets, bt) { + vty_out(vty, " !\n bmp targets %s\n", bt->name); + + if (bt->acl6_name) + vty_out(vty, " ipv6 access-list %s\n", bt->acl6_name); + if (bt->acl_name) + vty_out(vty, " ip access-list %s\n", bt->acl_name); + + if (bt->stat_msec) + vty_out(vty, " bmp stats interval %d\n", + bt->stat_msec); + + if (bt->mirror) + vty_out(vty, " bmp mirror\n"); + + FOREACH_AFI_SAFI (afi, safi) { + const char *afi_str = (afi == AFI_IP) ? "ipv4" : "ipv6"; + + if (bt->afimon[afi][safi] & BMP_MON_PREPOLICY) + vty_out(vty, " bmp monitor %s %s pre-policy\n", + afi_str, safi2str(safi)); + if (bt->afimon[afi][safi] & BMP_MON_POSTPOLICY) + vty_out(vty, " bmp monitor %s %s post-policy\n", + afi_str, safi2str(safi)); + } + frr_each (bmp_listeners, &bt->listeners, bl) + vty_out(vty, " \n bmp listener %s port %d\n", + sockunion2str(&bl->addr, buf, SU_ADDRSTRLEN), + bl->port); + + frr_each (bmp_actives, &bt->actives, ba) + vty_out(vty, " bmp connect %s port %u min-retry %u max-retry %u\n", + ba->hostname, ba->port, ba->minretry, ba->maxretry); + } + + return 0; +} + +static int bgp_bmp_init(struct thread_master *tm) +{ + install_node(&bmp_node, NULL); + install_default(BMP_NODE); + install_element(BGP_NODE, &bmp_targets_cmd); + install_element(BGP_NODE, &no_bmp_targets_cmd); + + install_element(BMP_NODE, &bmp_listener_cmd); + install_element(BMP_NODE, &no_bmp_listener_cmd); + install_element(BMP_NODE, &bmp_connect_cmd); + install_element(BMP_NODE, &bmp_acl_cmd); + install_element(BMP_NODE, &bmp_stats_cmd); + install_element(BMP_NODE, &bmp_monitor_cmd); + install_element(BMP_NODE, &bmp_mirror_cmd); + + install_element(BGP_NODE, &bmp_mirror_limit_cmd); + install_element(BGP_NODE, &no_bmp_mirror_limit_cmd); + + install_element(VIEW_NODE, &show_bmp_cmd); + + resolver_init(tm); + return 0; +} + +static int bgp_bmp_module_init(void) +{ + hook_register(bgp_packet_dump, bmp_mirror_packet); + hook_register(bgp_packet_send, bmp_outgoing_packet); + hook_register(peer_established, bmp_peer_established); + hook_register(peer_backward_transition, bmp_peer_backward); + hook_register(bgp_process, bmp_process); + hook_register(bgp_inst_config_write, bmp_config_write); + hook_register(bgp_inst_delete, bmp_bgp_del); + hook_register(frr_late_init, bgp_bmp_init); + return 0; +} + +FRR_MODULE_SETUP(.name = "bgpd_bmp", .version = FRR_VERSION, + .description = "bgpd BMP module", + .init = bgp_bmp_module_init) diff --git a/bgpd/bgp_bmp.h b/bgpd/bgp_bmp.h new file mode 100644 index 0000000000..9d270e808c --- /dev/null +++ b/bgpd/bgp_bmp.h @@ -0,0 +1,303 @@ +/* BMP support. + * Copyright (C) 2018 Yasuhiro Ohara + * Copyright (C) 2019 David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _BGP_BMP_H_ +#define _BGP_BMP_H_ + +#include "zebra.h" +#include "typesafe.h" +#include "pullwr.h" +#include "qobj.h" +#include "resolver.h" + +#define BMP_VERSION_3 3 + +#define BMP_LENGTH_POS 1 + +/* BMP message types */ +#define BMP_TYPE_ROUTE_MONITORING 0 +#define BMP_TYPE_STATISTICS_REPORT 1 +#define BMP_TYPE_PEER_DOWN_NOTIFICATION 2 +#define BMP_TYPE_PEER_UP_NOTIFICATION 3 +#define BMP_TYPE_INITIATION 4 +#define BMP_TYPE_TERMINATION 5 +#define BMP_TYPE_ROUTE_MIRRORING 6 + +#define BMP_READ_BUFSIZ 1024 + +/* bmp->state */ +#define BMP_None 0 +#define BMP_PeerUp 2 +#define BMP_Run 3 + +/* This one is for BMP Route Monitoring messages, i.e. delivering updates + * in somewhat processed (as opposed to fully raw, see mirroring below) form. + * RFC explicitly says that we can skip old updates if we haven't sent them out + * yet and another newer update for the same prefix arrives. + * + * So, at most one of these can exist for each (bgp, afi, safi, prefix, peerid) + * tuple; if some prefix is "re-added" to the queue, the existing entry is + * instead moved to the end of the queue. This ensures that the queue size is + * bounded by the BGP table size. + * + * bmp_qlist is the queue itself while bmp_qhash is used to efficiently check + * whether a tuple is already on the list. The queue is maintained per + * bmp_target. + * + * refcount = number of "struct bmp *" whose queue position is before this + * entry, i.e. number of BMP sessions where we still want to send this out. + * Decremented on send so we know when we're done with an entry (i.e. this + * always happens from the front of the queue.) + */ + +PREDECL_DLIST(bmp_qlist) +PREDECL_HASH(bmp_qhash) + +struct bmp_queue_entry { + struct bmp_qlist_item bli; + struct bmp_qhash_item bhi; + + struct prefix p; + uint64_t peerid; + afi_t afi; + safi_t safi; + + size_t refcount; +}; + +/* This is for BMP Route Mirroring, which feeds fully raw BGP PDUs out to BMP + * receivers. So, this goes directly off packet RX/TX handling instead of + * grabbing bits from tables. + * + * There is *one* queue for each "struct bgp *" where we throw everything on, + * with a size limit. Refcount works the same as for monitoring above. + */ + +PREDECL_LIST(bmp_mirrorq) + +struct bmp_mirrorq { + struct bmp_mirrorq_item bmi; + + size_t refcount; + uint64_t peerid; + struct timeval tv; + + size_t len; + uint8_t data[0]; +}; + +enum { + BMP_AFI_INACTIVE = 0, + BMP_AFI_NEEDSYNC, + BMP_AFI_SYNC, + BMP_AFI_LIVE, +}; + +PREDECL_LIST(bmp_session) + +struct bmp_active; +struct bmp_targets; + +/* an established BMP session to a peer */ +struct bmp { + struct bmp_session_item bsi; + struct bmp_targets *targets; + struct bmp_active *active; + + int socket; + char remote[SU_ADDRSTRLEN + 6]; + struct thread *t_read; + + struct pullwr *pullwr; + + int state; + + /* queue positions must remain synced with refcounts in the items. + * Whenever appending a queue item, we need to know the correct number + * of "struct bmp *" that want it, and when moving these positions + * ahead we need to make sure that refcount is decremented. Also, on + * disconnects we need to walk the queue and drop our reference. + */ + struct bmp_queue_entry *queuepos; + struct bmp_mirrorq *mirrorpos; + bool mirror_lost; + + /* enum BMP_AFI_* */ + uint8_t afistate[AFI_MAX][SAFI_MAX]; + + /* counters for the various BMP packet types */ + uint64_t cnt_update, cnt_mirror; + /* number of times this peer wasn't fast enough in consuming the + * mirror queue + */ + uint64_t cnt_mirror_overruns; + struct timeval t_up; + + /* synchronization / startup works by repeatedly finding the next + * table entry, the sync* fields note down what we sent last + */ + struct prefix syncpos; + uint64_t syncpeerid; + afi_t syncafi; + safi_t syncsafi; +}; + +/* config & state for an active outbound connection. When the connection + * succeeds, "bmp" is set up. + */ + +PREDECL_SORTLIST_UNIQ(bmp_actives) + +#define BMP_DFLT_MINRETRY 30000 +#define BMP_DFLT_MAXRETRY 720000 + +struct bmp_active { + struct bmp_actives_item bai; + struct bmp_targets *targets; + struct bmp *bmp; + + char *hostname; + int port; + unsigned minretry, maxretry; + + struct resolver_query resq; + + unsigned curretry; + unsigned addrpos, addrtotal; + union sockunion addrs[8]; + int socket; + struct thread *t_timer, *t_read, *t_write; +}; + +/* config & state for passive / listening sockets */ +PREDECL_SORTLIST_UNIQ(bmp_listeners) + +struct bmp_listener { + struct bmp_listeners_item bli; + + struct bmp_targets *targets; + + union sockunion addr; + int port; + + struct thread *t_accept; + int sock; +}; + +/* bmp_targets - plural since it may contain multiple bmp_listener & + * bmp_active items. If they have the same config, BMP session should be + * put in the same targets since that's a bit more effective. + */ +PREDECL_SORTLIST_UNIQ(bmp_targets) + +struct bmp_targets { + struct bmp_targets_item bti; + + struct bmp_bgp *bmpbgp; + struct bgp *bgp; + char *name; + + struct bmp_listeners_head listeners; + + char *acl_name; + char *acl6_name; +#define BMP_STAT_DEFAULT_TIMER 60000 + int stat_msec; + + /* only IPv4 & IPv6 / unicast & multicast supported for now */ +#define BMP_MON_PREPOLICY (1 << 0) +#define BMP_MON_POSTPOLICY (1 << 1) + uint8_t afimon[AFI_MAX][SAFI_MAX]; + bool mirror; + + struct bmp_actives_head actives; + + struct thread *t_stats; + struct bmp_session_head sessions; + + struct bmp_qhash_head updhash; + struct bmp_qlist_head updlist; + + uint64_t cnt_accept, cnt_aclrefused; + + QOBJ_FIELDS +}; +DECLARE_QOBJ_TYPE(bmp_targets) + +/* per struct peer * data. Lookup by peer->qobj_node.nid, created on demand, + * deleted in peer_backward hook. */ +PREDECL_HASH(bmp_peerh) + +struct bmp_bgp_peer { + struct bmp_peerh_item bpi; + + uint64_t peerid; + /* struct peer *peer; */ + + uint8_t *open_rx; + size_t open_rx_len; + + uint8_t *open_tx; + size_t open_tx_len; +}; + +/* per struct bgp * data */ +PREDECL_HASH(bmp_bgph) + +struct bmp_bgp { + struct bmp_bgph_item bbi; + + struct bgp *bgp; + struct bmp_targets_head targets; + + struct bmp_mirrorq_head mirrorq; + size_t mirror_qsize, mirror_qsizemax; + + size_t mirror_qsizelimit; +}; + +enum { + BMP_PEERDOWN_LOCAL_NOTIFY = 1, + BMP_PEERDOWN_LOCAL_FSM = 2, + BMP_PEERDOWN_REMOTE_NOTIFY = 3, + BMP_PEERDOWN_REMOTE_CLOSE = 4, + BMP_PEERDOWN_ENDMONITOR = 5, +}; + +enum { + BMP_STATS_PFX_REJECTED = 0, + BMP_STATS_PFX_DUP_ADV = 1, + BMP_STATS_PFX_DUP_WITHDRAW = 2, + BMP_STATS_UPD_LOOP_CLUSTER = 3, + BMP_STATS_UPD_LOOP_ASPATH = 4, + BMP_STATS_UPD_LOOP_ORIGINATOR = 5, + BMP_STATS_UPD_LOOP_CONFED = 6, + BMP_STATS_SIZE_ADJ_RIB_IN = 7, + BMP_STATS_SIZE_LOC_RIB = 8, + BMP_STATS_SIZE_ADJ_RIB_IN_SAFI = 9, + BMP_STATS_SIZE_LOC_RIB_IN_SAFI = 10, + BMP_STATS_UPD_7606_WITHDRAW = 11, + BMP_STATS_PFX_7606_WITHDRAW = 12, + BMP_STATS_UPD_DUP = 13, + BMP_STATS_FRR_NH_INVALID = 65531, +}; + +DECLARE_MGROUP(BMP) + +#endif /*_BGP_BMP_H_*/ diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index d3a08d23d9..cd94f421ef 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -41,6 +41,7 @@ #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" #include "bgpd/bgp_dump.h" +#include "bgpd/bgp_bmp.h" #include "bgpd/bgp_attr.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_errors.h" diff --git a/bgpd/subdir.am b/bgpd/subdir.am index d281fe4e59..b338fd4f3d 100644 --- a/bgpd/subdir.am +++ b/bgpd/subdir.am @@ -27,6 +27,7 @@ vtysh_scan += \ # can be loaded as DSO - always include for vtysh vtysh_scan += $(top_srcdir)/bgpd/bgp_rpki.c +vtysh_scan += $(top_srcdir)/bgpd/bgp_bmp.c if ENABLE_BGP_VNC vtysh_scan += \ @@ -42,6 +43,9 @@ endif if RPKI module_LTLIBRARIES += bgpd/bgpd_rpki.la endif +if BGP_BMP +module_LTLIBRARIES += bgpd/bgpd_bmp.la +endif man8 += $(MANBUILD)/bgpd.8 endif @@ -129,6 +133,7 @@ noinst_HEADERS += \ bgpd/bgp_damp.h \ bgpd/bgp_debug.h \ bgpd/bgp_dump.h \ + bgpd/bgp_bmp.h \ bgpd/bgp_ecommunity.h \ bgpd/bgp_encap_tlv.h \ bgpd/bgp_encap_types.h \ @@ -216,6 +221,10 @@ bgpd_bgpd_rpki_la_CFLAGS = $(WERROR) $(RTRLIB_CFLAGS) bgpd_bgpd_rpki_la_LDFLAGS = -avoid-version -module -shared -export-dynamic bgpd_bgpd_rpki_la_LIBADD = $(RTRLIB_LIBS) +bgpd_bgpd_bmp_la_SOURCES = bgpd/bgp_bmp.c +bgpd_bgpd_bmp_la_LIBADD = lib/libfrrcares.la +bgpd_bgpd_bmp_la_LDFLAGS = -avoid-version -module -shared -export-dynamic + bgpd/bgp_evpn_vty_clippy.c: $(CLIPPY_DEPS) bgpd/bgp_evpn_vty.$(OBJEXT): bgpd/bgp_evpn_vty_clippy.c bgpd/bgp_vty_clippy.c: $(CLIPPY_DEPS) @@ -229,3 +238,5 @@ bgpd/bgp_routemap.$(OBJEXT): bgpd/bgp_routemap_clippy.c bgpd/bgp_rpki_clippy.c: $(CLIPPY_DEPS) $(AUTOMAKE_DUMMY)bgpd/bgpd_bgpd_rpki_la-bgp_rpki.lo: bgpd/bgp_rpki_clippy.c $(AUTOMAKE_DUMMY)bgpd/bgpd_rpki_la-bgp_rpki.lo: bgpd/bgp_rpki_clippy.c +bgpd/bgp_bmp_clippy.c: $(CLIPPY_DEPS) +bgpd/bgp_bmp.lo: bgpd/bgp_bmp_clippy.c diff --git a/configure.ac b/configure.ac index 676c984385..a940739ae4 100755 --- a/configure.ac +++ b/configure.ac @@ -479,12 +479,14 @@ AC_ARG_ENABLE([staticd], AS_HELP_STRING([--disable-staticd], [do not build staticd])) AC_ARG_ENABLE([fabricd], AS_HELP_STRING([--disable-fabricd], [do not build fabricd])) -AC_ARG_ENABLE([bgp-announce], - AS_HELP_STRING([--disable-bgp-announce,], [turn off BGP route announcement])) AC_ARG_ENABLE([vrrpd], AS_HELP_STRING([--disable-vrrpd], [do not build vrrpd])) +AC_ARG_ENABLE([bgp-announce], + AS_HELP_STRING([--disable-bgp-announce,], [turn off BGP route announcement])) AC_ARG_ENABLE([bgp-vnc], AS_HELP_STRING([--disable-bgp-vnc],[turn off BGP VNC support])) +AC_ARG_ENABLE([bgp-bmp], + AS_HELP_STRING([--disable-bgp-bmp],[turn off BGP BMP support])) AC_ARG_ENABLE([snmp], AS_HELP_STRING([--enable-snmp], [enable SNMP support for agentx])) AC_ARG_ENABLE([config_rollbacks], @@ -1450,6 +1452,16 @@ if test "x$enable_pcreposix" = "xyes"; then fi AC_SUBST([HAVE_LIBPCREPOSIX]) +dnl ------------------ +dnl check C-Ares library +dnl ------------------ +PKG_CHECK_MODULES([CARES], [libcares], [ + c_ares_found=true +],[ + c_ares_found=false +]) +AM_CONDITIONAL([CARES], [$c_ares_found]) + dnl ########################################################################## dnl test "${enable_clippy_only}" != "yes" fi @@ -1519,9 +1531,21 @@ fi NHRPD="" case "$host_os" in linux*) - if test "${enable_nhrpd}" != "no"; then - NHRPD="nhrpd" - fi + case "${enable_nhrpd}" in + no) + ;; + yes) + if test "$c_ares_found" != "true" ; then + AC_MSG_ERROR([nhrpd requires libcares. Please install c-ares and its -dev headers.]) + fi + NHRPD="nhrpd" + ;; + *) + if test "$c_ares_found" = "true" ; then + NHRPD="nhrpd" + fi + ;; + esac ;; *) if test "${enable_nhrpd}" = "yes"; then @@ -1555,21 +1579,28 @@ if test "${enable_bgp_vnc}" != "no";then AC_DEFINE([ENABLE_BGP_VNC], [1], [Enable BGP VNC support]) fi +bgpd_bmp=false +case "${enable_bmp}" in + no) + ;; + yes) + if test "$c_ares_found" != "true" ; then + AC_MSG_ERROR([BMP support requires libcares. Please install c-ares and its -dev headers.]) + fi + bgpd_bmp=true + ;; + *) + if test "$c_ares_found" = "true" ; then + bgpd_bmp=true + fi + ;; +esac + dnl ########################################################################## dnl LARGE if block if test "${enable_clippy_only}" != "yes"; then dnl ########################################################################## -dnl ------------------ -dnl check C-Ares library -dnl ------------------ -if test "${NHRPD}" != ""; then - PKG_CHECK_MODULES([CARES], [libcares], ,[ - AC_MSG_ERROR([trying to build nhrpd, but libcares not found. install c-ares and its -dev headers.]) - ]) -fi -AM_CONDITIONAL([CARES], [test "${NHRPD}" != ""]) - dnl ------------------ dnl check Net-SNMP library dnl ------------------ @@ -2193,6 +2224,7 @@ AC_DEFINE_UNQUOTED([WATCHFRR_SH_PATH], ["${CFG_SBIN%/}/watchfrr.sh"], [path to w dnl various features AM_CONDITIONAL([SUPPORT_REALMS], [test "${enable_realms}" = "yes"]) AM_CONDITIONAL([ENABLE_BGP_VNC], [test x${enable_bgp_vnc} != xno]) +AM_CONDITIONAL([BGP_BMP], [$bgpd_bmp]) dnl northbound AM_CONDITIONAL([SQLITE3], [$SQLITE3]) AM_CONDITIONAL([CONFD], [test "x$enable_confd" != "x"]) diff --git a/debian/frr.install b/debian/frr.install index fe34b23d02..09bddf0fc6 100644 --- a/debian/frr.install +++ b/debian/frr.install @@ -10,6 +10,7 @@ usr/lib/frr/watchfrr usr/lib/frr/zebra usr/lib/*/frr/modules/zebra_irdp.so usr/lib/*/frr/modules/zebra_fpm.so +usr/lib/*/frr/modules/bgpd_bmp.so usr/share/doc/frr/examples usr/share/man/ usr/share/yang/ diff --git a/doc/user/bmp.rst b/doc/user/bmp.rst new file mode 100644 index 0000000000..061800c14e --- /dev/null +++ b/doc/user/bmp.rst @@ -0,0 +1,170 @@ +.. _bmp: + +*** +BMP +*** + +:abbr:`BMP` (BGP Monitoring Protocol, :rfc:`7854`) is used to send monitoring +data from BGP routers to network management entities. + +Implementation characteristics +============================== + +The `BMP` implementation in FRR has the following properties: + +- only the :rfc:`7854` features are currently implemented. This means protocol + version 3 without any extensions. It is not possible to use an older draft + protocol version of BMP. + +- the following statistics codes are implemented: + + - 0: count of prefixes rejected + - 2: count of duplicate prefix withdrawals + - 3: count of **prefixes** with loop in cluster id + - 4: count of **prefixes** with loop in AS-path + - 5: count of **prefixes** with loop in originator + - 11: count of updates subjected to :rfc:`7607` "treat as withdrawal" + handling due to errors + - 65531: *experimental* count of prefixes rejected due to invalid next-hop + + Note that stat items 3, 4 and 5 are specified to count updates, but FRR + implements them as prefix-based counters. + +- **route mirroring** is fully implemented, however BGP OPEN messages are not + currently included in route mirroring messages. Their contents can be + extracted from the "peer up" notification for sessions that established + successfully. OPEN messages for failed sessions cannot currently be + mirrored. + +- **route monitoring** is available for IPv4 and IPv6 AFIs, unicast and + multicast SAFIs. Other SAFIs (VPN, Labeled-Unicast, Flowspec, etc.) are not + currently supported. + +- monitoring peers that have BGP **add-path** enabled on the session will + result in somewhat unpredictable behaviour. Currently, the outcome is: + + - route mirroring functions as intended, messages are copied verbatim + - the add-path ID is never included in route monitoring messages + - if multiple paths were received from a peer, an unpredictable path is + picked and sent on the BMP session. The selection will differ for + pre-policy and post-policy monitoring sessions. + - as long as any path is present, something will be advertised on BMP + sessions. Only after the last path is gone a withdrawal will be sent on + BMP sessions. + - updates to additional paths will trigger BMP route monitoring messages. + There is no guarantee on consistency regarding which path is sent in these + messages. + +- monitoring peers with :rfc:`5549` extended next-hops has not been tested. + +Starting BMP +============ + +BMP is implemented as a loadable module. This means that to use BMP, ``bgpd`` +must be started with the ``-M bmp`` option. It is not possible to enable BMP +if ``bgpd`` was started without this option. + +Configuring BMP +=============== + +All of FRR's BMP configuration options are located inside the +:clicmd:`router bgp ASN` block. Configure BGP first before proceeding to BMP +setup. + +There is one option that applies to the BGP instance as a whole: + +.. index:: bmp mirror buffer-limit(0-4294967294) +.. clicmd:: [no] bmp mirror buffer-limit(0-4294967294) + + This sets the maximum amount of memory used for buffering BGP messages + (updates, keepalives, ...) for sending in BMP Route Mirroring. + + The buffer is for the entire BGP instance; if multiple BMP targets are + configured they reference the same buffer and do not consume additional + memory. Queue overhead is included in accounting this memory, so the + actual space available for BGP messages is slightly less than the value + configured here. + + If the buffer fills up, the oldest messages are removed from the buffer and + any BMP sessions where the now-removed messages were still pending have + their **entire** queue flushed and a "Mirroring Messages Lost" BMP message + is sent. + + BMP Route Monitoring is not affected by this option. + +All other configuration is managed per targets: + +.. index:: bmp targets NAME +.. clicmd:: [no] bmp targets NAME + + Create/delete a targets group. As implied by the plural name, targets may + cover multiple outbound active BMP sessions as well as inbound passive + listeners. + + If BMP sessions have the same configuration, putting them in the same + ``bmp targets`` will reduce overhead. + +BMP session configuration +------------------------- + +Inside a ``bmp targets`` block, the following commands control session +establishment: + +.. index:: bmp connect HOSTNAME port (1-65535) {min-retry MSEC|max-retry MSEC} +.. clicmd:: [no] bmp connect HOSTNAME port (1-65535) {min-retry MSEC|max-retry MSEC} + + Add/remove an active outbound BMP session. HOSTNAME is resolved via DNS, + if multiple addresses are returned they are tried in nondeterministic + order. Only one connection will be established even if multiple addresses + are returned. ``min-retry`` and ``max-retry`` specify (in milliseconds) + bounds for exponential backoff. + +.. warning:: + + ``ip access-list`` and ``ipv6 access-list`` are checked for outbound + connections resulting from ``bmp connect`` statements. + +.. index:: bmp listener port (1-65535) +.. clicmd:: [no] bmp listener port (1-65535) + + Accept incoming BMP sessions on the specified address and port. You can + use ``0.0.0.0`` and ``::`` to listen on all IPv4/IPv6 addresses. + +.. clicmd:: [no] ip access-list NAME +.. clicmd:: [no] ipv6 access-list NAME + + Restrict BMP sessions to the addresses allowed by the respective access + lists. The access lists are checked for both passive and active BMP + sessions. Changes do not affect currently established sessions. + +BMP data feed configuration +--------------------------- + +The following commands configure what BMP messages are sent on sessions +associated with a particular ``bmp targets``: + +.. index:: bmp stats [interval (100-86400000)] +.. clicmd:: [no] bmp stats [interval (100-86400000)] + + Send BMP Statistics (counter) messages at the specified interval (in + milliseconds.) + +.. index:: bmp monitor AFI SAFI +.. clicmd:: [no] bmp monitor AFI SAFI + + Perform Route Monitoring for the specified AFI and SAFI. Only IPv4 and + IPv6 are currently valid for AFI, and only unicast and multicast are valid + for SAFI. Other AFI/SAFI combinations may be added in the future. + + All BGP neighbors are included in Route Monitoring. Options to select + a subset of BGP sessions may be added in the future. + +.. index:: bmp mirror +.. clicmd:: [no] bmp mirror + + Perform Route Mirroring for all BGP neighbors. Since this provides a + direct feed of BGP messages, there are no AFI/SAFI options to be + configured. + + All BGP neighbors are included in Route Mirroring. Options to select + a subset of BGP sessions may be added in the future. diff --git a/doc/user/index.rst b/doc/user/index.rst index 4e14de6737..6c3b14e062 100644 --- a/doc/user/index.rst +++ b/doc/user/index.rst @@ -57,6 +57,7 @@ Protocols static vnc vrrp + bmp ######## Appendix diff --git a/doc/user/subdir.am b/doc/user/subdir.am index 1e4d86c722..0f0a8a0774 100644 --- a/doc/user/subdir.am +++ b/doc/user/subdir.am @@ -7,6 +7,7 @@ user_RSTFILES = \ doc/user/ldpd.rst \ doc/user/basic.rst \ doc/user/bgp.rst \ + doc/user/bmp.rst \ doc/user/bugs.rst \ doc/user/conf.py \ doc/user/eigrpd.rst \ diff --git a/lib/command.c b/lib/command.c index 9dabc2af7e..d4c35a80d5 100644 --- a/lib/command.c +++ b/lib/command.c @@ -151,6 +151,7 @@ const char *node_names[] = { "bfd peer", /* BFD_PEER_NODE */ "openfabric", // OPENFABRIC_NODE "vrrp", /* VRRP_NODE */ + "bmp", /* BMP_NODE */ }; /* clang-format on */ @@ -975,6 +976,7 @@ enum node_type node_parent(enum node_type node) case BGP_IPV6M_NODE: case BGP_EVPN_NODE: case BGP_IPV6L_NODE: + case BMP_NODE: ret = BGP_NODE; break; case BGP_EVPN_VNI_NODE: @@ -1491,6 +1493,7 @@ void cmd_exit(struct vty *vty) case BGP_IPV6M_NODE: case BGP_EVPN_NODE: case BGP_IPV6L_NODE: + case BMP_NODE: vty->node = BGP_NODE; break; case BGP_EVPN_VNI_NODE: diff --git a/lib/command.h b/lib/command.h index 8dc35a0fdc..137d3748ae 100644 --- a/lib/command.h +++ b/lib/command.h @@ -159,6 +159,7 @@ enum node_type { BFD_PEER_NODE, /* BFD peer configuration mode. */ OPENFABRIC_NODE, /* OpenFabric router configuration node */ VRRP_NODE, /* VRRP node */ + BMP_NODE, /* BMP config under router bgp */ NODE_TYPE_MAX, /* maximum */ }; diff --git a/lib/monotime.h b/lib/monotime.h index ca27c45dc6..e246f177de 100644 --- a/lib/monotime.h +++ b/lib/monotime.h @@ -84,6 +84,20 @@ static inline int64_t monotime_until(const struct timeval *ref, return (int64_t)tv.tv_sec * 1000000LL + tv.tv_usec; } +static inline time_t monotime_to_realtime(const struct timeval *mono, + struct timeval *realout) +{ + struct timeval delta, real; + + monotime_since(mono, &delta); + gettimeofday(&real, NULL); + + timersub(&real, &delta, &real); + if (realout) + *realout = real; + return real.tv_sec; +} + /* Char buffer size for time-to-string api */ #define MONOTIME_STRLEN 32 diff --git a/lib/pullwr.c b/lib/pullwr.c new file mode 100644 index 0000000000..0c326f29d4 --- /dev/null +++ b/lib/pullwr.c @@ -0,0 +1,275 @@ +/* + * Pull-driven write event handler + * Copyright (C) 2019 David Lamparter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" + +#include "pullwr.h" +#include "memory.h" +#include "monotime.h" + +/* defaults */ +#define PULLWR_THRESH 16384 /* size at which we start to call write() */ +#define PULLWR_MAXSPIN 2500 /* max µs to spend grabbing more data */ + +struct pullwr { + int fd; + struct thread_master *tm; + /* writer == NULL <=> we're idle */ + struct thread *writer; + + void *arg; + void (*fill)(void *, struct pullwr *); + void (*err)(void *, struct pullwr *, bool); + + /* ring buffer (although it's "un-ringed" on resizing, it WILL wrap + * around if data is trickling in while keeping it at a constant size) + */ + size_t bufsz, valid, pos; + uint64_t total_written; + char *buffer; + + size_t thresh; /* PULLWR_THRESH */ + int64_t maxspin; /* PULLWR_MAXSPIN */ +}; + +DEFINE_MTYPE_STATIC(LIB, PULLWR_HEAD, "pull-driven write controller") +DEFINE_MTYPE_STATIC(LIB, PULLWR_BUF, "pull-driven write buffer") + +static int pullwr_run(struct thread *t); + +struct pullwr *_pullwr_new(struct thread_master *tm, int fd, + void *arg, + void (*fill)(void *, struct pullwr *), + void (*err)(void *, struct pullwr *, bool)) +{ + struct pullwr *pullwr; + + pullwr = XCALLOC(MTYPE_PULLWR_HEAD, sizeof(*pullwr)); + pullwr->fd = fd; + pullwr->tm = tm; + pullwr->arg = arg; + pullwr->fill = fill; + pullwr->err = err; + + pullwr->thresh = PULLWR_THRESH; + pullwr->maxspin = PULLWR_MAXSPIN; + + return pullwr; +} + +void pullwr_del(struct pullwr *pullwr) +{ + THREAD_OFF(pullwr->writer); + + XFREE(MTYPE_PULLWR_BUF, pullwr->buffer); + XFREE(MTYPE_PULLWR_HEAD, pullwr); +} + +void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec, + size_t write_threshold) +{ + pullwr->maxspin = max_spin_usec ?: PULLWR_MAXSPIN; + pullwr->thresh = write_threshold ?: PULLWR_THRESH; +} + +void pullwr_bump(struct pullwr *pullwr) +{ + if (pullwr->writer) + return; + + thread_add_timer(pullwr->tm, pullwr_run, pullwr, 0, &pullwr->writer); +} + +static size_t pullwr_iov(struct pullwr *pullwr, struct iovec *iov) +{ + size_t len1; + + if (pullwr->valid == 0) + return 0; + + if (pullwr->pos + pullwr->valid <= pullwr->bufsz) { + iov[0].iov_base = pullwr->buffer + pullwr->pos; + iov[0].iov_len = pullwr->valid; + return 1; + } + + len1 = pullwr->bufsz - pullwr->pos; + + iov[0].iov_base = pullwr->buffer + pullwr->pos; + iov[0].iov_len = len1; + iov[1].iov_base = pullwr->buffer; + iov[1].iov_len = pullwr->valid - len1; + return 2; +} + +static void pullwr_resize(struct pullwr *pullwr, size_t need) +{ + struct iovec iov[2]; + size_t niov, newsize; + char *newbuf; + + /* the buffer is maintained at pullwr->thresh * 2 since we'll be + * trying to fill it as long as it's anywhere below pullwr->thresh. + * That means we frequently end up a little short of it and then write + * something that goes over the threshold. So, just use double. + */ + if (need) { + /* resize up */ + if (pullwr->bufsz - pullwr->valid >= need) + return; + + newsize = MAX((pullwr->valid + need) * 2, pullwr->thresh * 2); + newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize); + } else if (!pullwr->valid) { + /* resize down, buffer empty */ + newsize = 0; + newbuf = NULL; + } else { + /* resize down */ + if (pullwr->bufsz - pullwr->valid < pullwr->thresh) + return; + newsize = MAX(pullwr->valid, pullwr->thresh * 2); + newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize); + } + + niov = pullwr_iov(pullwr, iov); + if (niov >= 1) { + memcpy(newbuf, iov[0].iov_base, iov[0].iov_len); + if (niov >= 2) + memcpy(newbuf + iov[0].iov_len, + iov[1].iov_base, iov[1].iov_len); + } + + XFREE(MTYPE_PULLWR_BUF, pullwr->buffer); + pullwr->buffer = newbuf; + pullwr->bufsz = newsize; + pullwr->pos = 0; +} + +void pullwr_write(struct pullwr *pullwr, const void *data, size_t len) +{ + pullwr_resize(pullwr, len); + + if (pullwr->pos + pullwr->valid > pullwr->bufsz) { + size_t pos; + + pos = (pullwr->pos + pullwr->valid) % pullwr->bufsz; + memcpy(pullwr->buffer + pos, data, len); + } else { + size_t max1, len1; + max1 = pullwr->bufsz - (pullwr->pos + pullwr->valid); + max1 = MIN(max1, len); + + memcpy(pullwr->buffer + pullwr->pos + pullwr->valid, + data, max1); + len1 = len - max1; + + if (len1) + memcpy(pullwr->buffer, (char *)data + max1, len1); + + } + pullwr->valid += len; + + pullwr_bump(pullwr); +} + +static int pullwr_run(struct thread *t) +{ + struct pullwr *pullwr = THREAD_ARG(t); + struct iovec iov[2]; + size_t niov, lastvalid; + ssize_t nwr; + struct timeval t0; + bool maxspun = false; + + monotime(&t0); + + do { + lastvalid = pullwr->valid - 1; + while (pullwr->valid < pullwr->thresh + && pullwr->valid != lastvalid + && !maxspun) { + lastvalid = pullwr->valid; + pullwr->fill(pullwr->arg, pullwr); + + /* check after doing at least one fill() call so we + * don't spin without making progress on slow boxes + */ + if (!maxspun && monotime_since(&t0, NULL) + >= pullwr->maxspin) + maxspun = true; + } + + if (pullwr->valid == 0) { + /* we made a fill() call above that didn't feed any + * data in, and we have nothing more queued, so we go + * into idle, i.e. no calling thread_add_write() + */ + pullwr_resize(pullwr, 0); + return 0; + } + + niov = pullwr_iov(pullwr, iov); + assert(niov); + + nwr = writev(pullwr->fd, iov, niov); + if (nwr < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + pullwr->err(pullwr->arg, pullwr, false); + return 0; + } + + if (nwr == 0) { + pullwr->err(pullwr->arg, pullwr, true); + return 0; + } + + pullwr->total_written += nwr; + pullwr->valid -= nwr; + pullwr->pos += nwr; + pullwr->pos %= pullwr->bufsz; + } while (pullwr->valid == 0 && !maxspun); + /* pullwr->valid != 0 implies we did an incomplete write, i.e. socket + * is full and we go wait until it's available for writing again. + */ + + thread_add_write(pullwr->tm, pullwr_run, pullwr, pullwr->fd, + &pullwr->writer); + + /* if we hit the time limit, just keep the buffer, we'll probably need + * it anyway & another run is already coming up. + */ + if (!maxspun) + pullwr_resize(pullwr, 0); + return 0; +} + +void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written, + size_t *pending, size_t *kernel_pending) +{ + int tmp; + + *total_written = pullwr->total_written; + *pending = pullwr->valid; + + if (ioctl(pullwr->fd, TIOCOUTQ, &tmp) != 0) + tmp = 0; + *kernel_pending = tmp; +} diff --git a/lib/pullwr.h b/lib/pullwr.h new file mode 100644 index 0000000000..601eac1b79 --- /dev/null +++ b/lib/pullwr.h @@ -0,0 +1,110 @@ +/* + * Pull-driven write event handler + * Copyright (C) 2019 David Lamparter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _WRITEPOLL_H +#define _WRITEPOLL_H + +#include +#include + +#include "thread.h" +#include "stream.h" + +struct pullwr; + +/* This is a "pull-driven" write event handler. Instead of having some buffer + * or being driven by the availability of data, it triggers on the space being + * available on the socket for data to be written on and then calls fill() to + * get data to be sent. + * + * pullwr_* maintains an "idle" vs. "active" state, going into idle when a + * fill() call completes without feeing more data into it. The overall + * semantics are: + * - to put data out, call pullwr_write(). This is possible from both inside + * fill() callbacks or anywhere else. Doing so puts the pullwr into + * active state. + * - in active state, the fill() callback will be called and should feed more + * data in. It should NOT loop to push out more than one "unit" of data; + * the pullwr code handles this by calling fill() until it has enough data. + * - if there's nothing more to be sent, fill() returns without doing anything + * and pullwr goes into idle state after flushing all buffered data out. + * - when new data becomes available, pullwr_bump() should be called to put + * the pullwr back into active mode so it will collect data from fill(), + * or you can directly call pullwr_write(). + * - only calling pullwr_write() from within fill() is the cleanest way of + * doing things. + * + * When the err() callback is called, the pullwr should be considered unusable + * and released with pullwr_del(). This can be done from inside the callback, + * the pullwr code holds no more references on it when calling err(). + */ +extern struct pullwr *_pullwr_new(struct thread_master *tm, int fd, + void *arg, + void (*fill)(void *, struct pullwr *), + void (*err)(void *, struct pullwr *, bool eof)); +extern void pullwr_del(struct pullwr *pullwr); + +/* type-checking wrapper. makes sure fill() and err() take a first argument + * whose type is identical to the type of arg. + * => use "void fill(struct mystruct *arg, ...)" - no "void *arg" + */ +#define pullwr_new(tm, fd, arg, fill, err) ({ \ + void (*fill_typechk)(typeof(arg), struct pullwr *) = fill; \ + void (*err_typechk)(typeof(arg), struct pullwr *, bool) = err; \ + _pullwr_new(tm, fd, arg, (void *)fill_typechk, (void *)err_typechk); \ +}) + +/* max_spin_usec is the time after which the pullwr event handler will stop + * trying to get more data from fill() and yield control back to the + * thread_master. It does reschedule itself to continue later; this is + * only to make sure we don't freeze the entire process if we're piping a + * lot of data to a local endpoint that reads quickly (i.e. no backpressure) + * + * default: 2500 (2.5 ms) + * + * write_threshold is the amount of data buffered from fill() calls at which + * the pullwr code starts calling write(). But this is not a "limit". + * pullwr will keep poking fill() for more data until + * (a) max_spin_usec is reached; fill() will be called again later after + * returning to the thread_master to give other events a chance to run + * (b) fill() returns without pushing any data onto the pullwr with + * pullwr_write(), so fill() will NOT be called again until a call to + * pullwr_bump() or pullwr_write() comes in. + * + * default: 16384 (16 kB) + * + * passing 0 for either value (or not calling it at all) uses the default. + */ +extern void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec, + size_t write_threshold); + +extern void pullwr_bump(struct pullwr *pullwr); +extern void pullwr_write(struct pullwr *pullwr, + const void *data, size_t len); + +static inline void pullwr_write_stream(struct pullwr *pullwr, + struct stream *s) +{ + pullwr_write(pullwr, s->data, stream_get_endp(s)); +} + +extern void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written, + size_t *pending, size_t *kernel_pending); + +#endif /* _WRITEPOLL_H */ diff --git a/lib/subdir.am b/lib/subdir.am index 2be7537bcc..e0f1352380 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -65,6 +65,7 @@ lib_libfrr_la_SOURCES = \ lib/prefix.c \ lib/privs.c \ lib/ptm_lib.c \ + lib/pullwr.c \ lib/qobj.c \ lib/ringbuf.c \ lib/routemap.c \ @@ -203,6 +204,7 @@ pkginclude_HEADERS += \ lib/printfrr.h \ lib/privs.h \ lib/ptm_lib.h \ + lib/pullwr.h \ lib/pw.h \ lib/qobj.h \ lib/queue.h \ diff --git a/python/clidef.py b/python/clidef.py index bc2f5caebf..baa6ed52b2 100644 --- a/python/clidef.py +++ b/python/clidef.py @@ -351,6 +351,7 @@ if __name__ == '__main__': macros = Macros() macros.load('lib/route_types.h') macros.load(os.path.join(basepath, 'lib/command.h')) + macros.load(os.path.join(basepath, 'bgpd/bgp_vty.h')) # sigh :( macros['PROTO_REDIST_STR'] = 'FRR_REDIST_STR_ISISD' diff --git a/redhat/frr.spec.in b/redhat/frr.spec.in index 014cae02ee..fa0a6d8a0a 100644 --- a/redhat/frr.spec.in +++ b/redhat/frr.spec.in @@ -634,6 +634,7 @@ fi %{_libdir}/frr/modules/bgpd_rpki.so %endif %{_libdir}/frr/modules/zebra_irdp.so +%{_libdir}/frr/modules/bgpd_bmp.so %{_bindir}/* %config(noreplace) %{configdir}/[!v]*.conf* %config(noreplace) %attr(750,%{frr_user},%{frr_user}) %{configdir}/daemons diff --git a/vtysh/vtysh.c b/vtysh/vtysh.c index b053392bff..eee864940c 100644 --- a/vtysh/vtysh.c +++ b/vtysh/vtysh.c @@ -1260,6 +1260,8 @@ static struct cmd_node bgp_vrf_policy_node = {BGP_VRF_POLICY_NODE, static struct cmd_node bgp_vnc_l2_group_node = { BGP_VNC_L2_GROUP_NODE, "%s(config-router-vnc-l2-group)# "}; +static struct cmd_node bmp_node = {BMP_NODE, "%s(config-bgp-bmp)# "}; + static struct cmd_node ospf_node = {OSPF_NODE, "%s(config-router)# "}; static struct cmd_node eigrp_node = {EIGRP_NODE, "%s(config-router)# "}; @@ -1335,7 +1337,7 @@ DEFUNSH(VTYSH_REALLYALL, vtysh_end_all, vtysh_end_all_cmd, "end", } DEFUNSH(VTYSH_BGPD, router_bgp, router_bgp_cmd, - "router bgp [(1-4294967295) [ WORD]]", + "router bgp [(1-4294967295)$instasn [ WORD]]", ROUTER_STR BGP_STR AS_STR "BGP view\nBGP VRF\n" "View/VRF name\n") @@ -1478,6 +1480,18 @@ DEFUNSH(VTYSH_BGPD, return CMD_SUCCESS; } +DEFUNSH(VTYSH_BGPD, + bmp_targets, + bmp_targets_cmd, + "bmp targets BMPTARGETS", + "BGP Monitoring Protocol\n" + "Create BMP target group\n" + "Name of the BMP target group\n") +{ + vty->node = BMP_NODE; + return CMD_SUCCESS; +} + DEFUNSH(VTYSH_BGPD, address_family_evpn, address_family_evpn_cmd, "address-family ", "Enter Address Family command mode\n" @@ -1842,6 +1856,7 @@ static int vtysh_exit(struct vty *vty) case BGP_VNC_DEFAULTS_NODE: case BGP_VNC_NVE_GROUP_NODE: case BGP_VNC_L2_GROUP_NODE: + case BMP_NODE: vty->node = BGP_NODE; break; case BGP_EVPN_VNI_NODE: @@ -1932,6 +1947,19 @@ DEFUNSH(VTYSH_BGPD, rpki_quit, rpki_quit_cmd, "quit", return rpki_exit(self, vty, argc, argv); } +DEFUNSH(VTYSH_BGPD, bmp_exit, bmp_exit_cmd, "exit", + "Exit current mode and down to previous mode\n") +{ + vtysh_exit(vty); + return CMD_SUCCESS; +} + +DEFUNSH(VTYSH_BGPD, bmp_quit, bmp_quit_cmd, "quit", + "Exit current mode and down to previous mode\n") +{ + return bmp_exit(self, vty, argc, argv); +} + DEFUNSH(VTYSH_VRF, exit_vrf_config, exit_vrf_config_cmd, "exit-vrf", "Exit from VRF configuration mode\n") { @@ -3620,6 +3648,7 @@ void vtysh_init_vty(void) install_node(&openfabric_node, NULL); install_node(&vty_node, NULL); install_node(&rpki_node, NULL); + install_node(&bmp_node, NULL); #if HAVE_BFDD > 0 install_node(&bfd_node, NULL); install_node(&bfd_peer_node, NULL); @@ -3853,6 +3882,11 @@ void vtysh_init_vty(void) install_element(BGP_FLOWSPECV4_NODE, &exit_address_family_cmd); install_element(BGP_FLOWSPECV6_NODE, &exit_address_family_cmd); + install_element(BGP_NODE, &bmp_targets_cmd); + install_element(BMP_NODE, &bmp_exit_cmd); + install_element(BMP_NODE, &bmp_quit_cmd); + install_element(BMP_NODE, &vtysh_end_all_cmd); + install_element(CONFIG_NODE, &rpki_cmd); install_element(RPKI_NODE, &rpki_exit_cmd); install_element(RPKI_NODE, &rpki_quit_cmd);