Merge pull request #4709 from opensourcerouting/frr-bmp

BMP support
This commit is contained in:
Russ White 2019-09-03 07:58:11 -04:00 committed by GitHub
commit 6f76b3f6f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 3221 additions and 16 deletions

View File

@ -194,6 +194,7 @@ void bgp_adj_in_set(struct bgp_node *rn, struct peer *peer, struct attr *attr,
adj = XCALLOC(MTYPE_BGP_ADJ_IN, sizeof(struct bgp_adj_in));
adj->peer = peer_lock(peer); /* adj_in peer reference */
adj->attr = bgp_attr_intern(attr);
adj->uptime = bgp_clock();
adj->addpath_rx_id = addpath_id;
BGP_ADJ_IN_ADD(rn, adj);
bgp_lock_node(rn);

View File

@ -101,6 +101,9 @@ struct bgp_adj_in {
/* Received attribute. */
struct attr *attr;
/* timestamp (monotime) */
time_t uptime;
/* Addpath identifier */
uint32_t addpath_rx_id;
};

2240
bgpd/bgp_bmp.c Normal file

File diff suppressed because it is too large Load Diff

303
bgpd/bgp_bmp.h Normal file
View File

@ -0,0 +1,303 @@
/* BMP support.
* Copyright (C) 2018 Yasuhiro Ohara
* Copyright (C) 2019 David Lamparter for NetDEF, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _BGP_BMP_H_
#define _BGP_BMP_H_
#include "zebra.h"
#include "typesafe.h"
#include "pullwr.h"
#include "qobj.h"
#include "resolver.h"
#define BMP_VERSION_3 3
#define BMP_LENGTH_POS 1
/* BMP message types */
#define BMP_TYPE_ROUTE_MONITORING 0
#define BMP_TYPE_STATISTICS_REPORT 1
#define BMP_TYPE_PEER_DOWN_NOTIFICATION 2
#define BMP_TYPE_PEER_UP_NOTIFICATION 3
#define BMP_TYPE_INITIATION 4
#define BMP_TYPE_TERMINATION 5
#define BMP_TYPE_ROUTE_MIRRORING 6
#define BMP_READ_BUFSIZ 1024
/* bmp->state */
#define BMP_None 0
#define BMP_PeerUp 2
#define BMP_Run 3
/* This one is for BMP Route Monitoring messages, i.e. delivering updates
* in somewhat processed (as opposed to fully raw, see mirroring below) form.
* RFC explicitly says that we can skip old updates if we haven't sent them out
* yet and another newer update for the same prefix arrives.
*
* So, at most one of these can exist for each (bgp, afi, safi, prefix, peerid)
* tuple; if some prefix is "re-added" to the queue, the existing entry is
* instead moved to the end of the queue. This ensures that the queue size is
* bounded by the BGP table size.
*
* bmp_qlist is the queue itself while bmp_qhash is used to efficiently check
* whether a tuple is already on the list. The queue is maintained per
* bmp_target.
*
* refcount = number of "struct bmp *" whose queue position is before this
* entry, i.e. number of BMP sessions where we still want to send this out.
* Decremented on send so we know when we're done with an entry (i.e. this
* always happens from the front of the queue.)
*/
PREDECL_DLIST(bmp_qlist)
PREDECL_HASH(bmp_qhash)
struct bmp_queue_entry {
struct bmp_qlist_item bli;
struct bmp_qhash_item bhi;
struct prefix p;
uint64_t peerid;
afi_t afi;
safi_t safi;
size_t refcount;
};
/* This is for BMP Route Mirroring, which feeds fully raw BGP PDUs out to BMP
* receivers. So, this goes directly off packet RX/TX handling instead of
* grabbing bits from tables.
*
* There is *one* queue for each "struct bgp *" where we throw everything on,
* with a size limit. Refcount works the same as for monitoring above.
*/
PREDECL_LIST(bmp_mirrorq)
struct bmp_mirrorq {
struct bmp_mirrorq_item bmi;
size_t refcount;
uint64_t peerid;
struct timeval tv;
size_t len;
uint8_t data[0];
};
enum {
BMP_AFI_INACTIVE = 0,
BMP_AFI_NEEDSYNC,
BMP_AFI_SYNC,
BMP_AFI_LIVE,
};
PREDECL_LIST(bmp_session)
struct bmp_active;
struct bmp_targets;
/* an established BMP session to a peer */
struct bmp {
struct bmp_session_item bsi;
struct bmp_targets *targets;
struct bmp_active *active;
int socket;
char remote[SU_ADDRSTRLEN + 6];
struct thread *t_read;
struct pullwr *pullwr;
int state;
/* queue positions must remain synced with refcounts in the items.
* Whenever appending a queue item, we need to know the correct number
* of "struct bmp *" that want it, and when moving these positions
* ahead we need to make sure that refcount is decremented. Also, on
* disconnects we need to walk the queue and drop our reference.
*/
struct bmp_queue_entry *queuepos;
struct bmp_mirrorq *mirrorpos;
bool mirror_lost;
/* enum BMP_AFI_* */
uint8_t afistate[AFI_MAX][SAFI_MAX];
/* counters for the various BMP packet types */
uint64_t cnt_update, cnt_mirror;
/* number of times this peer wasn't fast enough in consuming the
* mirror queue
*/
uint64_t cnt_mirror_overruns;
struct timeval t_up;
/* synchronization / startup works by repeatedly finding the next
* table entry, the sync* fields note down what we sent last
*/
struct prefix syncpos;
uint64_t syncpeerid;
afi_t syncafi;
safi_t syncsafi;
};
/* config & state for an active outbound connection. When the connection
* succeeds, "bmp" is set up.
*/
PREDECL_SORTLIST_UNIQ(bmp_actives)
#define BMP_DFLT_MINRETRY 30000
#define BMP_DFLT_MAXRETRY 720000
struct bmp_active {
struct bmp_actives_item bai;
struct bmp_targets *targets;
struct bmp *bmp;
char *hostname;
int port;
unsigned minretry, maxretry;
struct resolver_query resq;
unsigned curretry;
unsigned addrpos, addrtotal;
union sockunion addrs[8];
int socket;
struct thread *t_timer, *t_read, *t_write;
};
/* config & state for passive / listening sockets */
PREDECL_SORTLIST_UNIQ(bmp_listeners)
struct bmp_listener {
struct bmp_listeners_item bli;
struct bmp_targets *targets;
union sockunion addr;
int port;
struct thread *t_accept;
int sock;
};
/* bmp_targets - plural since it may contain multiple bmp_listener &
* bmp_active items. If they have the same config, BMP session should be
* put in the same targets since that's a bit more effective.
*/
PREDECL_SORTLIST_UNIQ(bmp_targets)
struct bmp_targets {
struct bmp_targets_item bti;
struct bmp_bgp *bmpbgp;
struct bgp *bgp;
char *name;
struct bmp_listeners_head listeners;
char *acl_name;
char *acl6_name;
#define BMP_STAT_DEFAULT_TIMER 60000
int stat_msec;
/* only IPv4 & IPv6 / unicast & multicast supported for now */
#define BMP_MON_PREPOLICY (1 << 0)
#define BMP_MON_POSTPOLICY (1 << 1)
uint8_t afimon[AFI_MAX][SAFI_MAX];
bool mirror;
struct bmp_actives_head actives;
struct thread *t_stats;
struct bmp_session_head sessions;
struct bmp_qhash_head updhash;
struct bmp_qlist_head updlist;
uint64_t cnt_accept, cnt_aclrefused;
QOBJ_FIELDS
};
DECLARE_QOBJ_TYPE(bmp_targets)
/* per struct peer * data. Lookup by peer->qobj_node.nid, created on demand,
* deleted in peer_backward hook. */
PREDECL_HASH(bmp_peerh)
struct bmp_bgp_peer {
struct bmp_peerh_item bpi;
uint64_t peerid;
/* struct peer *peer; */
uint8_t *open_rx;
size_t open_rx_len;
uint8_t *open_tx;
size_t open_tx_len;
};
/* per struct bgp * data */
PREDECL_HASH(bmp_bgph)
struct bmp_bgp {
struct bmp_bgph_item bbi;
struct bgp *bgp;
struct bmp_targets_head targets;
struct bmp_mirrorq_head mirrorq;
size_t mirror_qsize, mirror_qsizemax;
size_t mirror_qsizelimit;
};
enum {
BMP_PEERDOWN_LOCAL_NOTIFY = 1,
BMP_PEERDOWN_LOCAL_FSM = 2,
BMP_PEERDOWN_REMOTE_NOTIFY = 3,
BMP_PEERDOWN_REMOTE_CLOSE = 4,
BMP_PEERDOWN_ENDMONITOR = 5,
};
enum {
BMP_STATS_PFX_REJECTED = 0,
BMP_STATS_PFX_DUP_ADV = 1,
BMP_STATS_PFX_DUP_WITHDRAW = 2,
BMP_STATS_UPD_LOOP_CLUSTER = 3,
BMP_STATS_UPD_LOOP_ASPATH = 4,
BMP_STATS_UPD_LOOP_ORIGINATOR = 5,
BMP_STATS_UPD_LOOP_CONFED = 6,
BMP_STATS_SIZE_ADJ_RIB_IN = 7,
BMP_STATS_SIZE_LOC_RIB = 8,
BMP_STATS_SIZE_ADJ_RIB_IN_SAFI = 9,
BMP_STATS_SIZE_LOC_RIB_IN_SAFI = 10,
BMP_STATS_UPD_7606_WITHDRAW = 11,
BMP_STATS_PFX_7606_WITHDRAW = 12,
BMP_STATS_UPD_DUP = 13,
BMP_STATS_FRR_NH_INVALID = 65531,
};
DECLARE_MGROUP(BMP)
#endif /*_BGP_BMP_H_*/

View File

@ -41,6 +41,7 @@
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_dump.h"
#include "bgpd/bgp_bmp.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_errors.h"

View File

@ -27,6 +27,7 @@ vtysh_scan += \
# can be loaded as DSO - always include for vtysh
vtysh_scan += $(top_srcdir)/bgpd/bgp_rpki.c
vtysh_scan += $(top_srcdir)/bgpd/bgp_bmp.c
if ENABLE_BGP_VNC
vtysh_scan += \
@ -42,6 +43,9 @@ endif
if RPKI
module_LTLIBRARIES += bgpd/bgpd_rpki.la
endif
if BGP_BMP
module_LTLIBRARIES += bgpd/bgpd_bmp.la
endif
man8 += $(MANBUILD)/bgpd.8
endif
@ -129,6 +133,7 @@ noinst_HEADERS += \
bgpd/bgp_damp.h \
bgpd/bgp_debug.h \
bgpd/bgp_dump.h \
bgpd/bgp_bmp.h \
bgpd/bgp_ecommunity.h \
bgpd/bgp_encap_tlv.h \
bgpd/bgp_encap_types.h \
@ -216,6 +221,10 @@ bgpd_bgpd_rpki_la_CFLAGS = $(WERROR) $(RTRLIB_CFLAGS)
bgpd_bgpd_rpki_la_LDFLAGS = -avoid-version -module -shared -export-dynamic
bgpd_bgpd_rpki_la_LIBADD = $(RTRLIB_LIBS)
bgpd_bgpd_bmp_la_SOURCES = bgpd/bgp_bmp.c
bgpd_bgpd_bmp_la_LIBADD = lib/libfrrcares.la
bgpd_bgpd_bmp_la_LDFLAGS = -avoid-version -module -shared -export-dynamic
bgpd/bgp_evpn_vty_clippy.c: $(CLIPPY_DEPS)
bgpd/bgp_evpn_vty.$(OBJEXT): bgpd/bgp_evpn_vty_clippy.c
bgpd/bgp_vty_clippy.c: $(CLIPPY_DEPS)
@ -229,3 +238,5 @@ bgpd/bgp_routemap.$(OBJEXT): bgpd/bgp_routemap_clippy.c
bgpd/bgp_rpki_clippy.c: $(CLIPPY_DEPS)
$(AUTOMAKE_DUMMY)bgpd/bgpd_bgpd_rpki_la-bgp_rpki.lo: bgpd/bgp_rpki_clippy.c
$(AUTOMAKE_DUMMY)bgpd/bgpd_rpki_la-bgp_rpki.lo: bgpd/bgp_rpki_clippy.c
bgpd/bgp_bmp_clippy.c: $(CLIPPY_DEPS)
bgpd/bgp_bmp.lo: bgpd/bgp_bmp_clippy.c

View File

@ -479,12 +479,14 @@ AC_ARG_ENABLE([staticd],
AS_HELP_STRING([--disable-staticd], [do not build staticd]))
AC_ARG_ENABLE([fabricd],
AS_HELP_STRING([--disable-fabricd], [do not build fabricd]))
AC_ARG_ENABLE([bgp-announce],
AS_HELP_STRING([--disable-bgp-announce,], [turn off BGP route announcement]))
AC_ARG_ENABLE([vrrpd],
AS_HELP_STRING([--disable-vrrpd], [do not build vrrpd]))
AC_ARG_ENABLE([bgp-announce],
AS_HELP_STRING([--disable-bgp-announce,], [turn off BGP route announcement]))
AC_ARG_ENABLE([bgp-vnc],
AS_HELP_STRING([--disable-bgp-vnc],[turn off BGP VNC support]))
AC_ARG_ENABLE([bgp-bmp],
AS_HELP_STRING([--disable-bgp-bmp],[turn off BGP BMP support]))
AC_ARG_ENABLE([snmp],
AS_HELP_STRING([--enable-snmp], [enable SNMP support for agentx]))
AC_ARG_ENABLE([config_rollbacks],
@ -1450,6 +1452,16 @@ if test "x$enable_pcreposix" = "xyes"; then
fi
AC_SUBST([HAVE_LIBPCREPOSIX])
dnl ------------------
dnl check C-Ares library
dnl ------------------
PKG_CHECK_MODULES([CARES], [libcares], [
c_ares_found=true
],[
c_ares_found=false
])
AM_CONDITIONAL([CARES], [$c_ares_found])
dnl ##########################################################################
dnl test "${enable_clippy_only}" != "yes"
fi
@ -1519,9 +1531,21 @@ fi
NHRPD=""
case "$host_os" in
linux*)
if test "${enable_nhrpd}" != "no"; then
NHRPD="nhrpd"
fi
case "${enable_nhrpd}" in
no)
;;
yes)
if test "$c_ares_found" != "true" ; then
AC_MSG_ERROR([nhrpd requires libcares. Please install c-ares and its -dev headers.])
fi
NHRPD="nhrpd"
;;
*)
if test "$c_ares_found" = "true" ; then
NHRPD="nhrpd"
fi
;;
esac
;;
*)
if test "${enable_nhrpd}" = "yes"; then
@ -1555,21 +1579,28 @@ if test "${enable_bgp_vnc}" != "no";then
AC_DEFINE([ENABLE_BGP_VNC], [1], [Enable BGP VNC support])
fi
bgpd_bmp=false
case "${enable_bmp}" in
no)
;;
yes)
if test "$c_ares_found" != "true" ; then
AC_MSG_ERROR([BMP support requires libcares. Please install c-ares and its -dev headers.])
fi
bgpd_bmp=true
;;
*)
if test "$c_ares_found" = "true" ; then
bgpd_bmp=true
fi
;;
esac
dnl ##########################################################################
dnl LARGE if block
if test "${enable_clippy_only}" != "yes"; then
dnl ##########################################################################
dnl ------------------
dnl check C-Ares library
dnl ------------------
if test "${NHRPD}" != ""; then
PKG_CHECK_MODULES([CARES], [libcares], ,[
AC_MSG_ERROR([trying to build nhrpd, but libcares not found. install c-ares and its -dev headers.])
])
fi
AM_CONDITIONAL([CARES], [test "${NHRPD}" != ""])
dnl ------------------
dnl check Net-SNMP library
dnl ------------------
@ -2193,6 +2224,7 @@ AC_DEFINE_UNQUOTED([WATCHFRR_SH_PATH], ["${CFG_SBIN%/}/watchfrr.sh"], [path to w
dnl various features
AM_CONDITIONAL([SUPPORT_REALMS], [test "${enable_realms}" = "yes"])
AM_CONDITIONAL([ENABLE_BGP_VNC], [test x${enable_bgp_vnc} != xno])
AM_CONDITIONAL([BGP_BMP], [$bgpd_bmp])
dnl northbound
AM_CONDITIONAL([SQLITE3], [$SQLITE3])
AM_CONDITIONAL([CONFD], [test "x$enable_confd" != "x"])

1
debian/frr.install vendored
View File

@ -10,6 +10,7 @@ usr/lib/frr/watchfrr
usr/lib/frr/zebra
usr/lib/*/frr/modules/zebra_irdp.so
usr/lib/*/frr/modules/zebra_fpm.so
usr/lib/*/frr/modules/bgpd_bmp.so
usr/share/doc/frr/examples
usr/share/man/
usr/share/yang/

170
doc/user/bmp.rst Normal file
View File

@ -0,0 +1,170 @@
.. _bmp:
***
BMP
***
:abbr:`BMP` (BGP Monitoring Protocol, :rfc:`7854`) is used to send monitoring
data from BGP routers to network management entities.
Implementation characteristics
==============================
The `BMP` implementation in FRR has the following properties:
- only the :rfc:`7854` features are currently implemented. This means protocol
version 3 without any extensions. It is not possible to use an older draft
protocol version of BMP.
- the following statistics codes are implemented:
- 0: count of prefixes rejected
- 2: count of duplicate prefix withdrawals
- 3: count of **prefixes** with loop in cluster id
- 4: count of **prefixes** with loop in AS-path
- 5: count of **prefixes** with loop in originator
- 11: count of updates subjected to :rfc:`7607` "treat as withdrawal"
handling due to errors
- 65531: *experimental* count of prefixes rejected due to invalid next-hop
Note that stat items 3, 4 and 5 are specified to count updates, but FRR
implements them as prefix-based counters.
- **route mirroring** is fully implemented, however BGP OPEN messages are not
currently included in route mirroring messages. Their contents can be
extracted from the "peer up" notification for sessions that established
successfully. OPEN messages for failed sessions cannot currently be
mirrored.
- **route monitoring** is available for IPv4 and IPv6 AFIs, unicast and
multicast SAFIs. Other SAFIs (VPN, Labeled-Unicast, Flowspec, etc.) are not
currently supported.
- monitoring peers that have BGP **add-path** enabled on the session will
result in somewhat unpredictable behaviour. Currently, the outcome is:
- route mirroring functions as intended, messages are copied verbatim
- the add-path ID is never included in route monitoring messages
- if multiple paths were received from a peer, an unpredictable path is
picked and sent on the BMP session. The selection will differ for
pre-policy and post-policy monitoring sessions.
- as long as any path is present, something will be advertised on BMP
sessions. Only after the last path is gone a withdrawal will be sent on
BMP sessions.
- updates to additional paths will trigger BMP route monitoring messages.
There is no guarantee on consistency regarding which path is sent in these
messages.
- monitoring peers with :rfc:`5549` extended next-hops has not been tested.
Starting BMP
============
BMP is implemented as a loadable module. This means that to use BMP, ``bgpd``
must be started with the ``-M bmp`` option. It is not possible to enable BMP
if ``bgpd`` was started without this option.
Configuring BMP
===============
All of FRR's BMP configuration options are located inside the
:clicmd:`router bgp ASN` block. Configure BGP first before proceeding to BMP
setup.
There is one option that applies to the BGP instance as a whole:
.. index:: bmp mirror buffer-limit(0-4294967294)
.. clicmd:: [no] bmp mirror buffer-limit(0-4294967294)
This sets the maximum amount of memory used for buffering BGP messages
(updates, keepalives, ...) for sending in BMP Route Mirroring.
The buffer is for the entire BGP instance; if multiple BMP targets are
configured they reference the same buffer and do not consume additional
memory. Queue overhead is included in accounting this memory, so the
actual space available for BGP messages is slightly less than the value
configured here.
If the buffer fills up, the oldest messages are removed from the buffer and
any BMP sessions where the now-removed messages were still pending have
their **entire** queue flushed and a "Mirroring Messages Lost" BMP message
is sent.
BMP Route Monitoring is not affected by this option.
All other configuration is managed per targets:
.. index:: bmp targets NAME
.. clicmd:: [no] bmp targets NAME
Create/delete a targets group. As implied by the plural name, targets may
cover multiple outbound active BMP sessions as well as inbound passive
listeners.
If BMP sessions have the same configuration, putting them in the same
``bmp targets`` will reduce overhead.
BMP session configuration
-------------------------
Inside a ``bmp targets`` block, the following commands control session
establishment:
.. index:: bmp connect HOSTNAME port (1-65535) {min-retry MSEC|max-retry MSEC}
.. clicmd:: [no] bmp connect HOSTNAME port (1-65535) {min-retry MSEC|max-retry MSEC}
Add/remove an active outbound BMP session. HOSTNAME is resolved via DNS,
if multiple addresses are returned they are tried in nondeterministic
order. Only one connection will be established even if multiple addresses
are returned. ``min-retry`` and ``max-retry`` specify (in milliseconds)
bounds for exponential backoff.
.. warning::
``ip access-list`` and ``ipv6 access-list`` are checked for outbound
connections resulting from ``bmp connect`` statements.
.. index:: bmp listener <X:X::X:X|A.B.C.D> port (1-65535)
.. clicmd:: [no] bmp listener <X:X::X:X|A.B.C.D> port (1-65535)
Accept incoming BMP sessions on the specified address and port. You can
use ``0.0.0.0`` and ``::`` to listen on all IPv4/IPv6 addresses.
.. clicmd:: [no] ip access-list NAME
.. clicmd:: [no] ipv6 access-list NAME
Restrict BMP sessions to the addresses allowed by the respective access
lists. The access lists are checked for both passive and active BMP
sessions. Changes do not affect currently established sessions.
BMP data feed configuration
---------------------------
The following commands configure what BMP messages are sent on sessions
associated with a particular ``bmp targets``:
.. index:: bmp stats [interval (100-86400000)]
.. clicmd:: [no] bmp stats [interval (100-86400000)]
Send BMP Statistics (counter) messages at the specified interval (in
milliseconds.)
.. index:: bmp monitor AFI SAFI <pre-policy|post-policy>
.. clicmd:: [no] bmp monitor AFI SAFI <pre-policy|post-policy>
Perform Route Monitoring for the specified AFI and SAFI. Only IPv4 and
IPv6 are currently valid for AFI, and only unicast and multicast are valid
for SAFI. Other AFI/SAFI combinations may be added in the future.
All BGP neighbors are included in Route Monitoring. Options to select
a subset of BGP sessions may be added in the future.
.. index:: bmp mirror
.. clicmd:: [no] bmp mirror
Perform Route Mirroring for all BGP neighbors. Since this provides a
direct feed of BGP messages, there are no AFI/SAFI options to be
configured.
All BGP neighbors are included in Route Mirroring. Options to select
a subset of BGP sessions may be added in the future.

View File

@ -57,6 +57,7 @@ Protocols
static
vnc
vrrp
bmp
########
Appendix

View File

@ -7,6 +7,7 @@ user_RSTFILES = \
doc/user/ldpd.rst \
doc/user/basic.rst \
doc/user/bgp.rst \
doc/user/bmp.rst \
doc/user/bugs.rst \
doc/user/conf.py \
doc/user/eigrpd.rst \

View File

@ -151,6 +151,7 @@ const char *node_names[] = {
"bfd peer", /* BFD_PEER_NODE */
"openfabric", // OPENFABRIC_NODE
"vrrp", /* VRRP_NODE */
"bmp", /* BMP_NODE */
};
/* clang-format on */
@ -975,6 +976,7 @@ enum node_type node_parent(enum node_type node)
case BGP_IPV6M_NODE:
case BGP_EVPN_NODE:
case BGP_IPV6L_NODE:
case BMP_NODE:
ret = BGP_NODE;
break;
case BGP_EVPN_VNI_NODE:
@ -1491,6 +1493,7 @@ void cmd_exit(struct vty *vty)
case BGP_IPV6M_NODE:
case BGP_EVPN_NODE:
case BGP_IPV6L_NODE:
case BMP_NODE:
vty->node = BGP_NODE;
break;
case BGP_EVPN_VNI_NODE:

View File

@ -159,6 +159,7 @@ enum node_type {
BFD_PEER_NODE, /* BFD peer configuration mode. */
OPENFABRIC_NODE, /* OpenFabric router configuration node */
VRRP_NODE, /* VRRP node */
BMP_NODE, /* BMP config under router bgp */
NODE_TYPE_MAX, /* maximum */
};

View File

@ -84,6 +84,20 @@ static inline int64_t monotime_until(const struct timeval *ref,
return (int64_t)tv.tv_sec * 1000000LL + tv.tv_usec;
}
static inline time_t monotime_to_realtime(const struct timeval *mono,
struct timeval *realout)
{
struct timeval delta, real;
monotime_since(mono, &delta);
gettimeofday(&real, NULL);
timersub(&real, &delta, &real);
if (realout)
*realout = real;
return real.tv_sec;
}
/* Char buffer size for time-to-string api */
#define MONOTIME_STRLEN 32

275
lib/pullwr.c Normal file
View File

@ -0,0 +1,275 @@
/*
* Pull-driven write event handler
* Copyright (C) 2019 David Lamparter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "zebra.h"
#include "pullwr.h"
#include "memory.h"
#include "monotime.h"
/* defaults */
#define PULLWR_THRESH 16384 /* size at which we start to call write() */
#define PULLWR_MAXSPIN 2500 /* max µs to spend grabbing more data */
struct pullwr {
int fd;
struct thread_master *tm;
/* writer == NULL <=> we're idle */
struct thread *writer;
void *arg;
void (*fill)(void *, struct pullwr *);
void (*err)(void *, struct pullwr *, bool);
/* ring buffer (although it's "un-ringed" on resizing, it WILL wrap
* around if data is trickling in while keeping it at a constant size)
*/
size_t bufsz, valid, pos;
uint64_t total_written;
char *buffer;
size_t thresh; /* PULLWR_THRESH */
int64_t maxspin; /* PULLWR_MAXSPIN */
};
DEFINE_MTYPE_STATIC(LIB, PULLWR_HEAD, "pull-driven write controller")
DEFINE_MTYPE_STATIC(LIB, PULLWR_BUF, "pull-driven write buffer")
static int pullwr_run(struct thread *t);
struct pullwr *_pullwr_new(struct thread_master *tm, int fd,
void *arg,
void (*fill)(void *, struct pullwr *),
void (*err)(void *, struct pullwr *, bool))
{
struct pullwr *pullwr;
pullwr = XCALLOC(MTYPE_PULLWR_HEAD, sizeof(*pullwr));
pullwr->fd = fd;
pullwr->tm = tm;
pullwr->arg = arg;
pullwr->fill = fill;
pullwr->err = err;
pullwr->thresh = PULLWR_THRESH;
pullwr->maxspin = PULLWR_MAXSPIN;
return pullwr;
}
void pullwr_del(struct pullwr *pullwr)
{
THREAD_OFF(pullwr->writer);
XFREE(MTYPE_PULLWR_BUF, pullwr->buffer);
XFREE(MTYPE_PULLWR_HEAD, pullwr);
}
void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec,
size_t write_threshold)
{
pullwr->maxspin = max_spin_usec ?: PULLWR_MAXSPIN;
pullwr->thresh = write_threshold ?: PULLWR_THRESH;
}
void pullwr_bump(struct pullwr *pullwr)
{
if (pullwr->writer)
return;
thread_add_timer(pullwr->tm, pullwr_run, pullwr, 0, &pullwr->writer);
}
static size_t pullwr_iov(struct pullwr *pullwr, struct iovec *iov)
{
size_t len1;
if (pullwr->valid == 0)
return 0;
if (pullwr->pos + pullwr->valid <= pullwr->bufsz) {
iov[0].iov_base = pullwr->buffer + pullwr->pos;
iov[0].iov_len = pullwr->valid;
return 1;
}
len1 = pullwr->bufsz - pullwr->pos;
iov[0].iov_base = pullwr->buffer + pullwr->pos;
iov[0].iov_len = len1;
iov[1].iov_base = pullwr->buffer;
iov[1].iov_len = pullwr->valid - len1;
return 2;
}
static void pullwr_resize(struct pullwr *pullwr, size_t need)
{
struct iovec iov[2];
size_t niov, newsize;
char *newbuf;
/* the buffer is maintained at pullwr->thresh * 2 since we'll be
* trying to fill it as long as it's anywhere below pullwr->thresh.
* That means we frequently end up a little short of it and then write
* something that goes over the threshold. So, just use double.
*/
if (need) {
/* resize up */
if (pullwr->bufsz - pullwr->valid >= need)
return;
newsize = MAX((pullwr->valid + need) * 2, pullwr->thresh * 2);
newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize);
} else if (!pullwr->valid) {
/* resize down, buffer empty */
newsize = 0;
newbuf = NULL;
} else {
/* resize down */
if (pullwr->bufsz - pullwr->valid < pullwr->thresh)
return;
newsize = MAX(pullwr->valid, pullwr->thresh * 2);
newbuf = XMALLOC(MTYPE_PULLWR_BUF, newsize);
}
niov = pullwr_iov(pullwr, iov);
if (niov >= 1) {
memcpy(newbuf, iov[0].iov_base, iov[0].iov_len);
if (niov >= 2)
memcpy(newbuf + iov[0].iov_len,
iov[1].iov_base, iov[1].iov_len);
}
XFREE(MTYPE_PULLWR_BUF, pullwr->buffer);
pullwr->buffer = newbuf;
pullwr->bufsz = newsize;
pullwr->pos = 0;
}
void pullwr_write(struct pullwr *pullwr, const void *data, size_t len)
{
pullwr_resize(pullwr, len);
if (pullwr->pos + pullwr->valid > pullwr->bufsz) {
size_t pos;
pos = (pullwr->pos + pullwr->valid) % pullwr->bufsz;
memcpy(pullwr->buffer + pos, data, len);
} else {
size_t max1, len1;
max1 = pullwr->bufsz - (pullwr->pos + pullwr->valid);
max1 = MIN(max1, len);
memcpy(pullwr->buffer + pullwr->pos + pullwr->valid,
data, max1);
len1 = len - max1;
if (len1)
memcpy(pullwr->buffer, (char *)data + max1, len1);
}
pullwr->valid += len;
pullwr_bump(pullwr);
}
static int pullwr_run(struct thread *t)
{
struct pullwr *pullwr = THREAD_ARG(t);
struct iovec iov[2];
size_t niov, lastvalid;
ssize_t nwr;
struct timeval t0;
bool maxspun = false;
monotime(&t0);
do {
lastvalid = pullwr->valid - 1;
while (pullwr->valid < pullwr->thresh
&& pullwr->valid != lastvalid
&& !maxspun) {
lastvalid = pullwr->valid;
pullwr->fill(pullwr->arg, pullwr);
/* check after doing at least one fill() call so we
* don't spin without making progress on slow boxes
*/
if (!maxspun && monotime_since(&t0, NULL)
>= pullwr->maxspin)
maxspun = true;
}
if (pullwr->valid == 0) {
/* we made a fill() call above that didn't feed any
* data in, and we have nothing more queued, so we go
* into idle, i.e. no calling thread_add_write()
*/
pullwr_resize(pullwr, 0);
return 0;
}
niov = pullwr_iov(pullwr, iov);
assert(niov);
nwr = writev(pullwr->fd, iov, niov);
if (nwr < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK)
break;
pullwr->err(pullwr->arg, pullwr, false);
return 0;
}
if (nwr == 0) {
pullwr->err(pullwr->arg, pullwr, true);
return 0;
}
pullwr->total_written += nwr;
pullwr->valid -= nwr;
pullwr->pos += nwr;
pullwr->pos %= pullwr->bufsz;
} while (pullwr->valid == 0 && !maxspun);
/* pullwr->valid != 0 implies we did an incomplete write, i.e. socket
* is full and we go wait until it's available for writing again.
*/
thread_add_write(pullwr->tm, pullwr_run, pullwr, pullwr->fd,
&pullwr->writer);
/* if we hit the time limit, just keep the buffer, we'll probably need
* it anyway & another run is already coming up.
*/
if (!maxspun)
pullwr_resize(pullwr, 0);
return 0;
}
void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written,
size_t *pending, size_t *kernel_pending)
{
int tmp;
*total_written = pullwr->total_written;
*pending = pullwr->valid;
if (ioctl(pullwr->fd, TIOCOUTQ, &tmp) != 0)
tmp = 0;
*kernel_pending = tmp;
}

110
lib/pullwr.h Normal file
View File

@ -0,0 +1,110 @@
/*
* Pull-driven write event handler
* Copyright (C) 2019 David Lamparter
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _WRITEPOLL_H
#define _WRITEPOLL_H
#include <stdbool.h>
#include <stdint.h>
#include "thread.h"
#include "stream.h"
struct pullwr;
/* This is a "pull-driven" write event handler. Instead of having some buffer
* or being driven by the availability of data, it triggers on the space being
* available on the socket for data to be written on and then calls fill() to
* get data to be sent.
*
* pullwr_* maintains an "idle" vs. "active" state, going into idle when a
* fill() call completes without feeing more data into it. The overall
* semantics are:
* - to put data out, call pullwr_write(). This is possible from both inside
* fill() callbacks or anywhere else. Doing so puts the pullwr into
* active state.
* - in active state, the fill() callback will be called and should feed more
* data in. It should NOT loop to push out more than one "unit" of data;
* the pullwr code handles this by calling fill() until it has enough data.
* - if there's nothing more to be sent, fill() returns without doing anything
* and pullwr goes into idle state after flushing all buffered data out.
* - when new data becomes available, pullwr_bump() should be called to put
* the pullwr back into active mode so it will collect data from fill(),
* or you can directly call pullwr_write().
* - only calling pullwr_write() from within fill() is the cleanest way of
* doing things.
*
* When the err() callback is called, the pullwr should be considered unusable
* and released with pullwr_del(). This can be done from inside the callback,
* the pullwr code holds no more references on it when calling err().
*/
extern struct pullwr *_pullwr_new(struct thread_master *tm, int fd,
void *arg,
void (*fill)(void *, struct pullwr *),
void (*err)(void *, struct pullwr *, bool eof));
extern void pullwr_del(struct pullwr *pullwr);
/* type-checking wrapper. makes sure fill() and err() take a first argument
* whose type is identical to the type of arg.
* => use "void fill(struct mystruct *arg, ...)" - no "void *arg"
*/
#define pullwr_new(tm, fd, arg, fill, err) ({ \
void (*fill_typechk)(typeof(arg), struct pullwr *) = fill; \
void (*err_typechk)(typeof(arg), struct pullwr *, bool) = err; \
_pullwr_new(tm, fd, arg, (void *)fill_typechk, (void *)err_typechk); \
})
/* max_spin_usec is the time after which the pullwr event handler will stop
* trying to get more data from fill() and yield control back to the
* thread_master. It does reschedule itself to continue later; this is
* only to make sure we don't freeze the entire process if we're piping a
* lot of data to a local endpoint that reads quickly (i.e. no backpressure)
*
* default: 2500 (2.5 ms)
*
* write_threshold is the amount of data buffered from fill() calls at which
* the pullwr code starts calling write(). But this is not a "limit".
* pullwr will keep poking fill() for more data until
* (a) max_spin_usec is reached; fill() will be called again later after
* returning to the thread_master to give other events a chance to run
* (b) fill() returns without pushing any data onto the pullwr with
* pullwr_write(), so fill() will NOT be called again until a call to
* pullwr_bump() or pullwr_write() comes in.
*
* default: 16384 (16 kB)
*
* passing 0 for either value (or not calling it at all) uses the default.
*/
extern void pullwr_cfg(struct pullwr *pullwr, int64_t max_spin_usec,
size_t write_threshold);
extern void pullwr_bump(struct pullwr *pullwr);
extern void pullwr_write(struct pullwr *pullwr,
const void *data, size_t len);
static inline void pullwr_write_stream(struct pullwr *pullwr,
struct stream *s)
{
pullwr_write(pullwr, s->data, stream_get_endp(s));
}
extern void pullwr_stats(struct pullwr *pullwr, uint64_t *total_written,
size_t *pending, size_t *kernel_pending);
#endif /* _WRITEPOLL_H */

View File

@ -65,6 +65,7 @@ lib_libfrr_la_SOURCES = \
lib/prefix.c \
lib/privs.c \
lib/ptm_lib.c \
lib/pullwr.c \
lib/qobj.c \
lib/ringbuf.c \
lib/routemap.c \
@ -203,6 +204,7 @@ pkginclude_HEADERS += \
lib/printfrr.h \
lib/privs.h \
lib/ptm_lib.h \
lib/pullwr.h \
lib/pw.h \
lib/qobj.h \
lib/queue.h \

View File

@ -351,6 +351,7 @@ if __name__ == '__main__':
macros = Macros()
macros.load('lib/route_types.h')
macros.load(os.path.join(basepath, 'lib/command.h'))
macros.load(os.path.join(basepath, 'bgpd/bgp_vty.h'))
# sigh :(
macros['PROTO_REDIST_STR'] = 'FRR_REDIST_STR_ISISD'

View File

@ -634,6 +634,7 @@ fi
%{_libdir}/frr/modules/bgpd_rpki.so
%endif
%{_libdir}/frr/modules/zebra_irdp.so
%{_libdir}/frr/modules/bgpd_bmp.so
%{_bindir}/*
%config(noreplace) %{configdir}/[!v]*.conf*
%config(noreplace) %attr(750,%{frr_user},%{frr_user}) %{configdir}/daemons

View File

@ -1260,6 +1260,8 @@ static struct cmd_node bgp_vrf_policy_node = {BGP_VRF_POLICY_NODE,
static struct cmd_node bgp_vnc_l2_group_node = {
BGP_VNC_L2_GROUP_NODE, "%s(config-router-vnc-l2-group)# "};
static struct cmd_node bmp_node = {BMP_NODE, "%s(config-bgp-bmp)# "};
static struct cmd_node ospf_node = {OSPF_NODE, "%s(config-router)# "};
static struct cmd_node eigrp_node = {EIGRP_NODE, "%s(config-router)# "};
@ -1335,7 +1337,7 @@ DEFUNSH(VTYSH_REALLYALL, vtysh_end_all, vtysh_end_all_cmd, "end",
}
DEFUNSH(VTYSH_BGPD, router_bgp, router_bgp_cmd,
"router bgp [(1-4294967295) [<view|vrf> WORD]]",
"router bgp [(1-4294967295)$instasn [<view|vrf> WORD]]",
ROUTER_STR BGP_STR AS_STR
"BGP view\nBGP VRF\n"
"View/VRF name\n")
@ -1478,6 +1480,18 @@ DEFUNSH(VTYSH_BGPD,
return CMD_SUCCESS;
}
DEFUNSH(VTYSH_BGPD,
bmp_targets,
bmp_targets_cmd,
"bmp targets BMPTARGETS",
"BGP Monitoring Protocol\n"
"Create BMP target group\n"
"Name of the BMP target group\n")
{
vty->node = BMP_NODE;
return CMD_SUCCESS;
}
DEFUNSH(VTYSH_BGPD, address_family_evpn, address_family_evpn_cmd,
"address-family <l2vpn evpn>",
"Enter Address Family command mode\n"
@ -1842,6 +1856,7 @@ static int vtysh_exit(struct vty *vty)
case BGP_VNC_DEFAULTS_NODE:
case BGP_VNC_NVE_GROUP_NODE:
case BGP_VNC_L2_GROUP_NODE:
case BMP_NODE:
vty->node = BGP_NODE;
break;
case BGP_EVPN_VNI_NODE:
@ -1932,6 +1947,19 @@ DEFUNSH(VTYSH_BGPD, rpki_quit, rpki_quit_cmd, "quit",
return rpki_exit(self, vty, argc, argv);
}
DEFUNSH(VTYSH_BGPD, bmp_exit, bmp_exit_cmd, "exit",
"Exit current mode and down to previous mode\n")
{
vtysh_exit(vty);
return CMD_SUCCESS;
}
DEFUNSH(VTYSH_BGPD, bmp_quit, bmp_quit_cmd, "quit",
"Exit current mode and down to previous mode\n")
{
return bmp_exit(self, vty, argc, argv);
}
DEFUNSH(VTYSH_VRF, exit_vrf_config, exit_vrf_config_cmd, "exit-vrf",
"Exit from VRF configuration mode\n")
{
@ -3620,6 +3648,7 @@ void vtysh_init_vty(void)
install_node(&openfabric_node, NULL);
install_node(&vty_node, NULL);
install_node(&rpki_node, NULL);
install_node(&bmp_node, NULL);
#if HAVE_BFDD > 0
install_node(&bfd_node, NULL);
install_node(&bfd_peer_node, NULL);
@ -3853,6 +3882,11 @@ void vtysh_init_vty(void)
install_element(BGP_FLOWSPECV4_NODE, &exit_address_family_cmd);
install_element(BGP_FLOWSPECV6_NODE, &exit_address_family_cmd);
install_element(BGP_NODE, &bmp_targets_cmd);
install_element(BMP_NODE, &bmp_exit_cmd);
install_element(BMP_NODE, &bmp_quit_cmd);
install_element(BMP_NODE, &vtysh_end_all_cmd);
install_element(CONFIG_NODE, &rpki_cmd);
install_element(RPKI_NODE, &rpki_exit_cmd);
install_element(RPKI_NODE, &rpki_quit_cmd);