bgpd: Add BGP Extended message support

Implement https://www.rfc-editor.org/rfc/rfc8654.txt

```
> | jq '."192.168.10.25".neighborCapabilities.extendedMessage'
"advertisedAndReceived"
```

Another side is Bird:

```
BIRD 2.0.7 ready.
Name       Proto      Table      State  Since         Info
v4         BGP        ---        up     19:39:15.689  Established
  BGP state:          Established
    Neighbor address: 192.168.10.123
    Neighbor AS:      65534
    Local AS:         65025
    Neighbor ID:      192.168.100.1
    Local capabilities
      Multiprotocol
        AF announced: ipv4
      Route refresh
      Extended message
      Graceful restart
      4-octet AS numbers
      Enhanced refresh
      Long-lived graceful restart
    Neighbor capabilities
      Multiprotocol
        AF announced: ipv4
      Route refresh
      Extended message
      Graceful restart
      4-octet AS numbers
      ADD-PATH
        RX: ipv4
        TX:
      Enhanced refresh
    Session:          external AS4
    Source address:   192.168.10.25
    Hold timer:       140.139/180
    Keepalive timer:  9.484/60
  Channel ipv4
    State:          UP
    Table:          master4
    Preference:     100
    Input filter:   ACCEPT
    Output filter:  ACCEPT
    Routes:         9 imported, 3 exported, 8 preferred
    Route change stats:     received   rejected   filtered    ignored   accepted
      Import updates:              9          0          0          0          9
      Import withdraws:            2          0        ---          2          0
      Export updates:             11          8          0        ---          3
      Export withdraws:            0        ---        ---        ---          0
    BGP Next hop:   192.168.10.25
```

Tested at least as well with to make sure it works with backward compat.:
	ExaBGP 4.0.2-1c737d99.
	Arista vEOS 4.21.14M

Testing by injecint 10k routes with:
```
sharp install routes 172.16.0.1 nexthop 192.168.10.123 10000
```

Before extended message support:
```
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 2186 (max message len: 4096) numpfx 427
2021/03/01 07:18:53 BGP: u1:s1 send UPDATE len 3421 (max message len: 4096) numpfx 674
```

After extended message support:
```
2021/03/01 07:20:11 BGP: u1:s1 send UPDATE len 50051 (max message len: 65535) numpfx 10000
```

Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
This commit is contained in:
Donatas Abraitis 2021-02-25 19:46:49 +02:00
parent b764e4682a
commit ef56aee47c
10 changed files with 124 additions and 28 deletions

View File

@ -3103,7 +3103,7 @@ bgp_attr_parse_ret_t bgp_attr_parse(struct peer *peer, struct attr *attr,
* a stack buffer, since they perform bounds checking
* and we are working with untrusted data.
*/
unsigned char ndata[BGP_MAX_PACKET_SIZE];
unsigned char ndata[peer->max_packet_size];
memset(ndata, 0x00, sizeof(ndata));
size_t lfl =
CHECK_FLAG(flag, BGP_ATTR_FLAG_EXTLEN) ? 2 : 1;

View File

@ -228,7 +228,7 @@ static int bgp_process_reads(struct thread *thread)
pktsize = ntohs(pktsize);
/* if this fails we are seriously screwed */
assert(pktsize <= BGP_MAX_PACKET_SIZE);
assert(pktsize <= peer->max_packet_size);
/*
* If we have that much data, chuck it into its own
@ -255,7 +255,7 @@ static int bgp_process_reads(struct thread *thread)
/* wipe buffer just in case someone screwed up */
ringbuf_wipe(peer->ibuf_work);
} else {
assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
assert(ringbuf_space(peer->ibuf_work) >= peer->max_packet_size);
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
@ -454,7 +454,7 @@ static uint16_t bgp_read(struct peer *peer)
size_t readsize; // how many bytes we want to read
ssize_t nbytes; // how many bytes we actually read
uint16_t status = 0;
static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
uint8_t ibw[peer->max_packet_size * BGP_READ_PACKET_MAX];
readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
nbytes = read(peer->fd, ibw, readsize);
@ -558,7 +558,7 @@ static bool validate_header(struct peer *peer)
}
/* Minimum packet length check. */
if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
if ((size < BGP_HEADER_SIZE) || (size > peer->max_packet_size)
|| (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
|| (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
|| (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)

View File

@ -538,6 +538,22 @@ static as_t bgp_capability_as4(struct peer *peer, struct capability_header *hdr)
return as4;
}
static int bgp_capability_ext_message(struct peer *peer,
struct capability_header *hdr)
{
if (hdr->length != CAPABILITY_CODE_EXT_MESSAGE_LEN) {
flog_err(
EC_BGP_PKT_OPEN,
"%s: BGP Extended Message capability has incorrect data length %d",
peer->host, hdr->length);
return -1;
}
SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV);
return 0;
}
static int bgp_capability_addpath(struct peer *peer,
struct capability_header *hdr)
{
@ -761,6 +777,7 @@ static const struct message capcode_str[] = {
{CAPABILITY_CODE_ORF_OLD, "ORF (Old)"},
{CAPABILITY_CODE_FQDN, "FQDN"},
{CAPABILITY_CODE_ENHANCED_RR, "Enhanced Route Refresh"},
{CAPABILITY_CODE_EXT_MESSAGE, "BGP Extended Message"},
{0}};
/* Minimum sizes for length field of each cap (so not inc. the header) */
@ -778,6 +795,7 @@ static const size_t cap_minsizes[] = {
[CAPABILITY_CODE_ORF_OLD] = CAPABILITY_CODE_ORF_LEN,
[CAPABILITY_CODE_FQDN] = CAPABILITY_CODE_MIN_FQDN_LEN,
[CAPABILITY_CODE_ENHANCED_RR] = CAPABILITY_CODE_ENHANCED_LEN,
[CAPABILITY_CODE_EXT_MESSAGE] = CAPABILITY_CODE_EXT_MESSAGE_LEN,
};
/* value the capability must be a multiple of.
@ -799,6 +817,7 @@ static const size_t cap_modsizes[] = {
[CAPABILITY_CODE_ORF_OLD] = 1,
[CAPABILITY_CODE_FQDN] = 1,
[CAPABILITY_CODE_ENHANCED_RR] = 1,
[CAPABILITY_CODE_EXT_MESSAGE] = 1,
};
/**
@ -867,6 +886,7 @@ static int bgp_capability_parse(struct peer *peer, size_t length,
case CAPABILITY_CODE_ENHE:
case CAPABILITY_CODE_FQDN:
case CAPABILITY_CODE_ENHANCED_RR:
case CAPABILITY_CODE_EXT_MESSAGE:
/* Check length. */
if (caphdr.length < cap_minsizes[caphdr.code]) {
zlog_info(
@ -955,6 +975,9 @@ static int bgp_capability_parse(struct peer *peer, size_t length,
case CAPABILITY_CODE_ENHE:
ret = bgp_capability_enhe(peer, &caphdr);
break;
case CAPABILITY_CODE_EXT_MESSAGE:
ret = bgp_capability_ext_message(peer, &caphdr);
break;
case CAPABILITY_CODE_FQDN:
ret = bgp_capability_hostname(peer, &caphdr);
break;
@ -1191,6 +1214,12 @@ int bgp_open_option_parse(struct peer *peer, uint8_t length, int *mp_capability)
}
}
/* Extended Message Support */
peer->max_packet_size =
CHECK_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV)
? BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
: BGP_MAX_PACKET_SIZE;
/* Check there are no common AFI/SAFIs and send Unsupported Capability
error. */
if (*mp_capability
@ -1476,6 +1505,13 @@ void bgp_open_capability(struct stream *s, struct peer *peer)
local_as = peer->local_as;
stream_putl(s, local_as);
/* Extended Message Support */
SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_ADV);
stream_putc(s, BGP_OPEN_OPT_CAP);
stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN + 2);
stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE);
stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN);
/* AddPath */
FOREACH_AFI_SAFI (afi, safi) {
if (peer->afc[afi][safi]) {

View File

@ -54,6 +54,7 @@ struct graceful_restart_af {
#define CAPABILITY_CODE_ENHE 5 /* Extended Next Hop Encoding */
#define CAPABILITY_CODE_REFRESH_OLD 128 /* Route Refresh Capability(cisco) */
#define CAPABILITY_CODE_ORF_OLD 130 /* Cooperative Route Filtering Capability(cisco) */
#define CAPABILITY_CODE_EXT_MESSAGE 6 /* Extended Message Support */
/* Capability Length */
#define CAPABILITY_CODE_MP_LEN 4
@ -66,6 +67,7 @@ struct graceful_restart_af {
#define CAPABILITY_CODE_MIN_FQDN_LEN 2
#define CAPABILITY_CODE_ENHANCED_LEN 0
#define CAPABILITY_CODE_ORF_LEN 5
#define CAPABILITY_CODE_EXT_MESSAGE_LEN 0 /* Extended Message Support */
/* Cooperative Route Filtering Capability. */

View File

@ -144,7 +144,7 @@ static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
zlog_debug("send End-of-RIB for %s to %s",
get_afi_safi_str(afi, safi, false), peer->host);
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
@ -726,7 +726,7 @@ void bgp_notify_send_with_data(struct peer *peer, uint8_t code,
/* ============================================== */
/* Allocate new stream. */
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make notify packet. */
bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
@ -864,7 +864,7 @@ void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
@ -963,7 +963,7 @@ void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);

View File

@ -79,8 +79,11 @@ static void update_subgroup_checkin(struct update_subgroup *subgrp,
subgrp->uptime = bgp_clock();
}
static void sync_init(struct update_subgroup *subgrp)
static void sync_init(struct update_subgroup *subgrp,
struct update_group *updgrp)
{
struct peer *peer = UPDGRP_PEER(updgrp);
subgrp->sync =
XCALLOC(MTYPE_BGP_SYNCHRONISE, sizeof(struct bgp_synchronize));
bgp_adv_fifo_init(&subgrp->sync->update);
@ -91,7 +94,7 @@ static void sync_init(struct update_subgroup *subgrp)
/* We use a larger buffer for subgrp->work in the event that:
* - We RX a BGP_UPDATE where the attributes alone are just
* under BGP_MAX_PACKET_SIZE
* under 4096 or 65535 (if Extended Message capability negotiated).
* - The user configures an outbound route-map that does many as-path
* prepends or adds many communities. At most they can have
* CMD_ARGC_MAX
@ -103,9 +106,9 @@ static void sync_init(struct update_subgroup *subgrp)
* bounds
* checking for every single attribute as we construct an UPDATE.
*/
subgrp->work =
stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW);
subgrp->scratch = stream_new(BGP_MAX_PACKET_SIZE);
subgrp->work = stream_new(peer->max_packet_size
+ BGP_MAX_PACKET_SIZE_OVERFLOW);
subgrp->scratch = stream_new(peer->max_packet_size);
}
static void sync_delete(struct update_subgroup *subgrp)
@ -143,6 +146,7 @@ static void conf_copy(struct peer *dst, struct peer *src, afi_t afi,
dst->flags = src->flags;
dst->af_flags[afi][safi] = src->af_flags[afi][safi];
dst->pmax_out[afi][safi] = src->pmax_out[afi][safi];
dst->max_packet_size = src->max_packet_size;
XFREE(MTYPE_BGP_PEER_HOST, dst->host);
dst->host = XSTRDUP(MTYPE_BGP_PEER_HOST, src->host);
@ -800,7 +804,7 @@ update_subgroup_create(struct update_group *updgrp)
subgrp = XCALLOC(MTYPE_BGP_UPD_SUBGRP, sizeof(struct update_subgroup));
update_subgroup_checkin(subgrp, updgrp);
subgrp->v_coalesce = (UPDGRP_INST(updgrp))->coalesce_time;
sync_init(subgrp);
sync_init(subgrp, updgrp);
bpacket_queue_init(SUBGRP_PKTQ(subgrp));
bpacket_queue_add(SUBGRP_PKTQ(subgrp), NULL, NULL);
TAILQ_INIT(&(subgrp->adjq));

View File

@ -898,11 +898,13 @@ next:
packet = stream_dup(s);
bgp_packet_set_size(packet);
if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0))
zlog_debug("u%" PRIu64 ":s%" PRIu64" send UPDATE len %zd numpfx %d",
subgrp->update_group->id, subgrp->id,
(stream_get_endp(packet)
- stream_get_getp(packet)),
num_pfx);
zlog_debug(
"u%" PRIu64 ":s%" PRIu64
" send UPDATE len %zd (max message len: %hu) numpfx %d",
subgrp->update_group->id, subgrp->id,
(stream_get_endp(packet)
- stream_get_getp(packet)),
peer->max_packet_size, num_pfx);
pkt = bpacket_queue_add(SUBGRP_PKTQ(subgrp), packet, &vecarr);
stream_reset(s);
stream_reset(snlri);
@ -1128,7 +1130,7 @@ void subgroup_default_update_packet(struct update_subgroup *subgrp,
tx_id_buf, attrstr);
}
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
@ -1206,7 +1208,7 @@ void subgroup_default_withdraw_packet(struct update_subgroup *subgrp)
tx_id_buf);
}
s = stream_new(BGP_MAX_PACKET_SIZE);
s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);

View File

@ -12971,6 +12971,28 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json,
"received");
}
/* Extended Message Support */
if (CHECK_FLAG(p->cap,
PEER_CAP_EXTENDED_MESSAGE_ADV)
&& CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_RCV))
json_object_string_add(
json_cap, "extendedMessage",
"advertisedAndReceived");
else if (CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_ADV))
json_object_string_add(
json_cap, "extendedMessage",
"advertised");
else if (CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_RCV))
json_object_string_add(
json_cap, "extendedMessage",
"received");
/* AddPath */
if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV)
|| CHECK_FLAG(p->cap,
@ -13449,6 +13471,29 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json,
vty_out(vty, "\n");
}
/* Extended Message Support */
if (CHECK_FLAG(p->cap,
PEER_CAP_EXTENDED_MESSAGE_RCV)
|| CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_ADV)) {
vty_out(vty, " Extended Message:");
if (CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_ADV))
vty_out(vty, " advertised");
if (CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_RCV))
vty_out(vty, " %sreceived",
CHECK_FLAG(
p->cap,
PEER_CAP_EXTENDED_MESSAGE_ADV)
? "and "
: "");
vty_out(vty, "\n");
}
/* AddPath */
if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV)
|| CHECK_FLAG(p->cap,

View File

@ -1345,6 +1345,7 @@ struct peer *peer_new(struct bgp *bgp)
peer->bgp = bgp_lock(bgp);
peer = peer_lock(peer); /* initial reference */
peer->password = NULL;
peer->max_packet_size = BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE;
/* Set default flags. */
FOREACH_AFI_SAFI (afi, safi) {
@ -1379,7 +1380,7 @@ struct peer *peer_new(struct bgp *bgp)
/* We use a larger buffer for peer->obuf_work in the event that:
* - We RX a BGP_UPDATE where the attributes alone are just
* under BGP_MAX_PACKET_SIZE
* under BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE.
* - The user configures an outbound route-map that does many as-path
* prepends or adds many communities. At most they can have
* CMD_ARGC_MAX args in a route-map so there is a finite limit on how
@ -1389,12 +1390,12 @@ struct peer *peer_new(struct bgp *bgp)
* bounds checking for every single attribute as we construct an
* UPDATE.
*/
peer->obuf_work =
stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW);
peer->ibuf_work =
ringbuf_new(BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX);
peer->obuf_work = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
+ BGP_MAX_PACKET_SIZE_OVERFLOW);
peer->ibuf_work = ringbuf_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
* BGP_READ_PACKET_MAX);
peer->scratch = stream_new(BGP_MAX_PACKET_SIZE);
peer->scratch = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE);
bgp_sync_init(peer);

View File

@ -859,6 +859,7 @@ typedef enum {
#define BGP_MARKER_SIZE 16
#define BGP_HEADER_SIZE 19
#define BGP_MAX_PACKET_SIZE 4096
#define BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE 65535
#define BGP_MAX_PACKET_SIZE_OVERFLOW 1024
/*
@ -1122,6 +1123,8 @@ struct peer {
#define PEER_CAP_HOSTNAME_RCV (1U << 16) /* hostname received */
#define PEER_CAP_ENHANCED_RR_ADV (1U << 17) /* enhanced rr advertised */
#define PEER_CAP_ENHANCED_RR_RCV (1U << 18) /* enhanced rr received */
#define PEER_CAP_EXTENDED_MESSAGE_ADV (1U << 19)
#define PEER_CAP_EXTENDED_MESSAGE_RCV (1U << 20)
/* Capability flags (reset in bgp_stop) */
uint32_t af_cap[AFI_MAX][SAFI_MAX];
@ -1564,6 +1567,9 @@ struct peer {
/* Sender side AS path loop detection. */
bool as_path_loop_detection;
/* Extended Message Support */
uint16_t max_packet_size;
/* Conditional advertisement */
bool advmap_config_change[AFI_MAX][SAFI_MAX];
bool advmap_table_change;