diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index dc8cc81042..bd57b63997 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -3103,7 +3103,7 @@ bgp_attr_parse_ret_t bgp_attr_parse(struct peer *peer, struct attr *attr, * a stack buffer, since they perform bounds checking * and we are working with untrusted data. */ - unsigned char ndata[BGP_MAX_PACKET_SIZE]; + unsigned char ndata[peer->max_packet_size]; memset(ndata, 0x00, sizeof(ndata)); size_t lfl = CHECK_FLAG(flag, BGP_ATTR_FLAG_EXTLEN) ? 2 : 1; diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c index 53fd3b5fe3..644f9633f3 100644 --- a/bgpd/bgp_io.c +++ b/bgpd/bgp_io.c @@ -228,7 +228,7 @@ static int bgp_process_reads(struct thread *thread) pktsize = ntohs(pktsize); /* if this fails we are seriously screwed */ - assert(pktsize <= BGP_MAX_PACKET_SIZE); + assert(pktsize <= peer->max_packet_size); /* * If we have that much data, chuck it into its own @@ -255,7 +255,7 @@ static int bgp_process_reads(struct thread *thread) /* wipe buffer just in case someone screwed up */ ringbuf_wipe(peer->ibuf_work); } else { - assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE); + assert(ringbuf_space(peer->ibuf_work) >= peer->max_packet_size); thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, &peer->t_read); @@ -454,7 +454,7 @@ static uint16_t bgp_read(struct peer *peer) size_t readsize; // how many bytes we want to read ssize_t nbytes; // how many bytes we actually read uint16_t status = 0; - static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX]; + uint8_t ibw[peer->max_packet_size * BGP_READ_PACKET_MAX]; readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw)); nbytes = read(peer->fd, ibw, readsize); @@ -558,7 +558,7 @@ static bool validate_header(struct peer *peer) } /* Minimum packet length check. */ - if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE) + if ((size < BGP_HEADER_SIZE) || (size > peer->max_packet_size) || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE) || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE) || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE) diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index 533518cf93..7642640218 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -538,6 +538,22 @@ static as_t bgp_capability_as4(struct peer *peer, struct capability_header *hdr) return as4; } +static int bgp_capability_ext_message(struct peer *peer, + struct capability_header *hdr) +{ + if (hdr->length != CAPABILITY_CODE_EXT_MESSAGE_LEN) { + flog_err( + EC_BGP_PKT_OPEN, + "%s: BGP Extended Message capability has incorrect data length %d", + peer->host, hdr->length); + return -1; + } + + SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV); + + return 0; +} + static int bgp_capability_addpath(struct peer *peer, struct capability_header *hdr) { @@ -761,6 +777,7 @@ static const struct message capcode_str[] = { {CAPABILITY_CODE_ORF_OLD, "ORF (Old)"}, {CAPABILITY_CODE_FQDN, "FQDN"}, {CAPABILITY_CODE_ENHANCED_RR, "Enhanced Route Refresh"}, + {CAPABILITY_CODE_EXT_MESSAGE, "BGP Extended Message"}, {0}}; /* Minimum sizes for length field of each cap (so not inc. the header) */ @@ -778,6 +795,7 @@ static const size_t cap_minsizes[] = { [CAPABILITY_CODE_ORF_OLD] = CAPABILITY_CODE_ORF_LEN, [CAPABILITY_CODE_FQDN] = CAPABILITY_CODE_MIN_FQDN_LEN, [CAPABILITY_CODE_ENHANCED_RR] = CAPABILITY_CODE_ENHANCED_LEN, + [CAPABILITY_CODE_EXT_MESSAGE] = CAPABILITY_CODE_EXT_MESSAGE_LEN, }; /* value the capability must be a multiple of. @@ -799,6 +817,7 @@ static const size_t cap_modsizes[] = { [CAPABILITY_CODE_ORF_OLD] = 1, [CAPABILITY_CODE_FQDN] = 1, [CAPABILITY_CODE_ENHANCED_RR] = 1, + [CAPABILITY_CODE_EXT_MESSAGE] = 1, }; /** @@ -867,6 +886,7 @@ static int bgp_capability_parse(struct peer *peer, size_t length, case CAPABILITY_CODE_ENHE: case CAPABILITY_CODE_FQDN: case CAPABILITY_CODE_ENHANCED_RR: + case CAPABILITY_CODE_EXT_MESSAGE: /* Check length. */ if (caphdr.length < cap_minsizes[caphdr.code]) { zlog_info( @@ -955,6 +975,9 @@ static int bgp_capability_parse(struct peer *peer, size_t length, case CAPABILITY_CODE_ENHE: ret = bgp_capability_enhe(peer, &caphdr); break; + case CAPABILITY_CODE_EXT_MESSAGE: + ret = bgp_capability_ext_message(peer, &caphdr); + break; case CAPABILITY_CODE_FQDN: ret = bgp_capability_hostname(peer, &caphdr); break; @@ -1191,6 +1214,12 @@ int bgp_open_option_parse(struct peer *peer, uint8_t length, int *mp_capability) } } + /* Extended Message Support */ + peer->max_packet_size = + CHECK_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV) + ? BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE + : BGP_MAX_PACKET_SIZE; + /* Check there are no common AFI/SAFIs and send Unsupported Capability error. */ if (*mp_capability @@ -1476,6 +1505,13 @@ void bgp_open_capability(struct stream *s, struct peer *peer) local_as = peer->local_as; stream_putl(s, local_as); + /* Extended Message Support */ + SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_ADV); + stream_putc(s, BGP_OPEN_OPT_CAP); + stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN + 2); + stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE); + stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN); + /* AddPath */ FOREACH_AFI_SAFI (afi, safi) { if (peer->afc[afi][safi]) { diff --git a/bgpd/bgp_open.h b/bgpd/bgp_open.h index 471ac05c7c..bc6eedac85 100644 --- a/bgpd/bgp_open.h +++ b/bgpd/bgp_open.h @@ -54,6 +54,7 @@ struct graceful_restart_af { #define CAPABILITY_CODE_ENHE 5 /* Extended Next Hop Encoding */ #define CAPABILITY_CODE_REFRESH_OLD 128 /* Route Refresh Capability(cisco) */ #define CAPABILITY_CODE_ORF_OLD 130 /* Cooperative Route Filtering Capability(cisco) */ +#define CAPABILITY_CODE_EXT_MESSAGE 6 /* Extended Message Support */ /* Capability Length */ #define CAPABILITY_CODE_MP_LEN 4 @@ -66,6 +67,7 @@ struct graceful_restart_af { #define CAPABILITY_CODE_MIN_FQDN_LEN 2 #define CAPABILITY_CODE_ENHANCED_LEN 0 #define CAPABILITY_CODE_ORF_LEN 5 +#define CAPABILITY_CODE_EXT_MESSAGE_LEN 0 /* Extended Message Support */ /* Cooperative Route Filtering Capability. */ diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index c2e2de1c73..ff2cc26d42 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -144,7 +144,7 @@ static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi, zlog_debug("send End-of-RIB for %s to %s", get_afi_safi_str(afi, safi, false), peer->host); - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make BGP update packet. */ bgp_packet_set_marker(s, BGP_MSG_UPDATE); @@ -726,7 +726,7 @@ void bgp_notify_send_with_data(struct peer *peer, uint8_t code, /* ============================================== */ /* Allocate new stream. */ - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make notify packet. */ bgp_packet_set_marker(s, BGP_MSG_NOTIFY); @@ -864,7 +864,7 @@ void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi, /* Convert AFI, SAFI to values for packet. */ bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make BGP update packet. */ if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV)) @@ -963,7 +963,7 @@ void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi, /* Convert AFI, SAFI to values for packet. */ bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi); - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make BGP update packet. */ bgp_packet_set_marker(s, BGP_MSG_CAPABILITY); diff --git a/bgpd/bgp_updgrp.c b/bgpd/bgp_updgrp.c index 621a14014f..2600eda42e 100644 --- a/bgpd/bgp_updgrp.c +++ b/bgpd/bgp_updgrp.c @@ -79,8 +79,11 @@ static void update_subgroup_checkin(struct update_subgroup *subgrp, subgrp->uptime = bgp_clock(); } -static void sync_init(struct update_subgroup *subgrp) +static void sync_init(struct update_subgroup *subgrp, + struct update_group *updgrp) { + struct peer *peer = UPDGRP_PEER(updgrp); + subgrp->sync = XCALLOC(MTYPE_BGP_SYNCHRONISE, sizeof(struct bgp_synchronize)); bgp_adv_fifo_init(&subgrp->sync->update); @@ -91,7 +94,7 @@ static void sync_init(struct update_subgroup *subgrp) /* We use a larger buffer for subgrp->work in the event that: * - We RX a BGP_UPDATE where the attributes alone are just - * under BGP_MAX_PACKET_SIZE + * under 4096 or 65535 (if Extended Message capability negotiated). * - The user configures an outbound route-map that does many as-path * prepends or adds many communities. At most they can have * CMD_ARGC_MAX @@ -103,9 +106,9 @@ static void sync_init(struct update_subgroup *subgrp) * bounds * checking for every single attribute as we construct an UPDATE. */ - subgrp->work = - stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW); - subgrp->scratch = stream_new(BGP_MAX_PACKET_SIZE); + subgrp->work = stream_new(peer->max_packet_size + + BGP_MAX_PACKET_SIZE_OVERFLOW); + subgrp->scratch = stream_new(peer->max_packet_size); } static void sync_delete(struct update_subgroup *subgrp) @@ -143,6 +146,7 @@ static void conf_copy(struct peer *dst, struct peer *src, afi_t afi, dst->flags = src->flags; dst->af_flags[afi][safi] = src->af_flags[afi][safi]; dst->pmax_out[afi][safi] = src->pmax_out[afi][safi]; + dst->max_packet_size = src->max_packet_size; XFREE(MTYPE_BGP_PEER_HOST, dst->host); dst->host = XSTRDUP(MTYPE_BGP_PEER_HOST, src->host); @@ -800,7 +804,7 @@ update_subgroup_create(struct update_group *updgrp) subgrp = XCALLOC(MTYPE_BGP_UPD_SUBGRP, sizeof(struct update_subgroup)); update_subgroup_checkin(subgrp, updgrp); subgrp->v_coalesce = (UPDGRP_INST(updgrp))->coalesce_time; - sync_init(subgrp); + sync_init(subgrp, updgrp); bpacket_queue_init(SUBGRP_PKTQ(subgrp)); bpacket_queue_add(SUBGRP_PKTQ(subgrp), NULL, NULL); TAILQ_INIT(&(subgrp->adjq)); diff --git a/bgpd/bgp_updgrp_packet.c b/bgpd/bgp_updgrp_packet.c index a13a5395b4..05452beacd 100644 --- a/bgpd/bgp_updgrp_packet.c +++ b/bgpd/bgp_updgrp_packet.c @@ -898,11 +898,13 @@ next: packet = stream_dup(s); bgp_packet_set_size(packet); if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) - zlog_debug("u%" PRIu64 ":s%" PRIu64" send UPDATE len %zd numpfx %d", - subgrp->update_group->id, subgrp->id, - (stream_get_endp(packet) - - stream_get_getp(packet)), - num_pfx); + zlog_debug( + "u%" PRIu64 ":s%" PRIu64 + " send UPDATE len %zd (max message len: %hu) numpfx %d", + subgrp->update_group->id, subgrp->id, + (stream_get_endp(packet) + - stream_get_getp(packet)), + peer->max_packet_size, num_pfx); pkt = bpacket_queue_add(SUBGRP_PKTQ(subgrp), packet, &vecarr); stream_reset(s); stream_reset(snlri); @@ -1128,7 +1130,7 @@ void subgroup_default_update_packet(struct update_subgroup *subgrp, tx_id_buf, attrstr); } - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make BGP update packet. */ bgp_packet_set_marker(s, BGP_MSG_UPDATE); @@ -1206,7 +1208,7 @@ void subgroup_default_withdraw_packet(struct update_subgroup *subgrp) tx_id_buf); } - s = stream_new(BGP_MAX_PACKET_SIZE); + s = stream_new(peer->max_packet_size); /* Make BGP update packet. */ bgp_packet_set_marker(s, BGP_MSG_UPDATE); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 6a76237410..a4e56c95c8 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -12971,6 +12971,28 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json, "received"); } + /* Extended Message Support */ + if (CHECK_FLAG(p->cap, + PEER_CAP_EXTENDED_MESSAGE_ADV) + && CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_RCV)) + json_object_string_add( + json_cap, "extendedMessage", + "advertisedAndReceived"); + else if (CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_ADV)) + json_object_string_add( + json_cap, "extendedMessage", + "advertised"); + else if (CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_RCV)) + json_object_string_add( + json_cap, "extendedMessage", + "received"); + /* AddPath */ if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV) || CHECK_FLAG(p->cap, @@ -13449,6 +13471,29 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json, vty_out(vty, "\n"); } + /* Extended Message Support */ + if (CHECK_FLAG(p->cap, + PEER_CAP_EXTENDED_MESSAGE_RCV) + || CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_ADV)) { + vty_out(vty, " Extended Message:"); + if (CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_ADV)) + vty_out(vty, " advertised"); + if (CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_RCV)) + vty_out(vty, " %sreceived", + CHECK_FLAG( + p->cap, + PEER_CAP_EXTENDED_MESSAGE_ADV) + ? "and " + : ""); + vty_out(vty, "\n"); + } + /* AddPath */ if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV) || CHECK_FLAG(p->cap, diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 07ca247ee6..33c8f3c1f0 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1345,6 +1345,7 @@ struct peer *peer_new(struct bgp *bgp) peer->bgp = bgp_lock(bgp); peer = peer_lock(peer); /* initial reference */ peer->password = NULL; + peer->max_packet_size = BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE; /* Set default flags. */ FOREACH_AFI_SAFI (afi, safi) { @@ -1379,7 +1380,7 @@ struct peer *peer_new(struct bgp *bgp) /* We use a larger buffer for peer->obuf_work in the event that: * - We RX a BGP_UPDATE where the attributes alone are just - * under BGP_MAX_PACKET_SIZE + * under BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE. * - The user configures an outbound route-map that does many as-path * prepends or adds many communities. At most they can have * CMD_ARGC_MAX args in a route-map so there is a finite limit on how @@ -1389,12 +1390,12 @@ struct peer *peer_new(struct bgp *bgp) * bounds checking for every single attribute as we construct an * UPDATE. */ - peer->obuf_work = - stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW); - peer->ibuf_work = - ringbuf_new(BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX); + peer->obuf_work = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE + + BGP_MAX_PACKET_SIZE_OVERFLOW); + peer->ibuf_work = ringbuf_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE + * BGP_READ_PACKET_MAX); - peer->scratch = stream_new(BGP_MAX_PACKET_SIZE); + peer->scratch = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE); bgp_sync_init(peer); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index e79dccdab8..23d0e9dfb1 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -859,6 +859,7 @@ typedef enum { #define BGP_MARKER_SIZE 16 #define BGP_HEADER_SIZE 19 #define BGP_MAX_PACKET_SIZE 4096 +#define BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE 65535 #define BGP_MAX_PACKET_SIZE_OVERFLOW 1024 /* @@ -1122,6 +1123,8 @@ struct peer { #define PEER_CAP_HOSTNAME_RCV (1U << 16) /* hostname received */ #define PEER_CAP_ENHANCED_RR_ADV (1U << 17) /* enhanced rr advertised */ #define PEER_CAP_ENHANCED_RR_RCV (1U << 18) /* enhanced rr received */ +#define PEER_CAP_EXTENDED_MESSAGE_ADV (1U << 19) +#define PEER_CAP_EXTENDED_MESSAGE_RCV (1U << 20) /* Capability flags (reset in bgp_stop) */ uint32_t af_cap[AFI_MAX][SAFI_MAX]; @@ -1564,6 +1567,9 @@ struct peer { /* Sender side AS path loop detection. */ bool as_path_loop_detection; + /* Extended Message Support */ + uint16_t max_packet_size; + /* Conditional advertisement */ bool advmap_config_change[AFI_MAX][SAFI_MAX]; bool advmap_table_change;