diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index 63836a1bd5..b005a0caf1 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -16,14 +16,15 @@ libbgp_a_SOURCES = \ bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \ bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \ bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \ - bgp_nht.c + bgp_nht.c bgp_updgrp.c bgp_updgrp_packet.c bgp_updgrp_adv.c noinst_HEADERS = \ bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \ bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \ bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \ bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \ - bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h + bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h \ + bgp_updgrp.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index c71e83bc72..bd60ca10a9 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -25,6 +25,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "prefix.h" #include "hash.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -36,11 +37,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_packet.h" #include "bgpd/bgp_fsm.h" #include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_updgrp.h" /* BGP advertise attribute is used for pack same attribute update into one packet. To do that we maintain attribute hash in struct peer. */ -static struct bgp_advertise_attr * +struct bgp_advertise_attr * baa_new (void) { return (struct bgp_advertise_attr *) @@ -64,7 +66,7 @@ baa_hash_alloc (void *p) return baa; } -static unsigned int +unsigned int baa_hash_key (void *p) { struct bgp_advertise_attr * baa = (struct bgp_advertise_attr *) p; @@ -72,7 +74,7 @@ baa_hash_key (void *p) return attrhash_key_make (baa->attr); } -static int +int baa_hash_cmp (const void *p1, const void *p2) { const struct bgp_advertise_attr * baa1 = p1; @@ -84,14 +86,14 @@ baa_hash_cmp (const void *p1, const void *p2) /* BGP update and withdraw information is stored in BGP advertise structure. This structure is referred from BGP adjacency information. */ -static struct bgp_advertise * +struct bgp_advertise * bgp_advertise_new (void) { - return (struct bgp_advertise *) + return (struct bgp_advertise *) XCALLOC (MTYPE_BGP_ADVERTISE, sizeof (struct bgp_advertise)); } -static void +void bgp_advertise_free (struct bgp_advertise *adv) { if (adv->binfo) @@ -99,7 +101,7 @@ bgp_advertise_free (struct bgp_advertise *adv) XFREE (MTYPE_BGP_ADVERTISE, adv); } -static void +void bgp_advertise_add (struct bgp_advertise_attr *baa, struct bgp_advertise *adv) { @@ -109,7 +111,7 @@ bgp_advertise_add (struct bgp_advertise_attr *baa, baa->adv = adv; } -static void +void bgp_advertise_delete (struct bgp_advertise_attr *baa, struct bgp_advertise *adv) { @@ -121,7 +123,7 @@ bgp_advertise_delete (struct bgp_advertise_attr *baa, baa->adv = adv->next; } -static struct bgp_advertise_attr * +struct bgp_advertise_attr * bgp_advertise_intern (struct hash *hash, struct attr *attr) { struct bgp_advertise_attr ref; @@ -134,7 +136,7 @@ bgp_advertise_intern (struct hash *hash, struct attr *attr) return baa; } -static void +void bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa) { if (baa->refcnt) @@ -153,12 +155,17 @@ bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa) } } -/* BGP adjacency keeps minimal advertisement information. */ -static void -bgp_adj_out_free (struct bgp_adj_out *adj) +struct bgp_adj_out * +bgp_adj_peer_lookup (struct peer *peer, struct bgp_node *rn) { - peer_unlock (adj->peer); /* adj_out peer reference */ - XFREE (MTYPE_BGP_ADJ_OUT, adj); + struct bgp_adj_out *adj; + struct peer_af *paf; + + for (adj = rn->adj_out; adj; adj = adj->next) + SUBGRP_FOREACH_PEER(adj->subgroup, paf) + if (paf->peer == peer) + return adj; + return NULL; } int @@ -166,203 +173,20 @@ bgp_adj_out_lookup (struct peer *peer, struct prefix *p, afi_t afi, safi_t safi, struct bgp_node *rn) { struct bgp_adj_out *adj; + struct peer_af *paf; for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; + SUBGRP_FOREACH_PEER(adj->subgroup, paf) + if (paf->peer == peer) + { + return (adj->adv + ? (adj->adv->baa ? 1 : 0) + : (adj->attr ? 1 : 0)); + } - if (! adj) - return 0; - - return (adj->adv - ? (adj->adv->baa ? 1 : 0) - : (adj->attr ? 1 : 0)); + return 0; } -struct bgp_advertise * -bgp_advertise_clean (struct peer *peer, struct bgp_adj_out *adj, - afi_t afi, safi_t safi) -{ - struct bgp_advertise *adv; - struct bgp_advertise_attr *baa; - struct bgp_advertise *next; - struct bgp_advertise_fifo *fhead; - - adv = adj->adv; - baa = adv->baa; - next = NULL; - fhead = &peer->sync[afi][safi]->withdraw; - - if (baa) - { - /* Unlink myself from advertise attribute FIFO. */ - bgp_advertise_delete (baa, adv); - - /* Fetch next advertise candidate. */ - next = baa->adv; - - /* Unintern BGP advertise attribute. */ - bgp_advertise_unintern (peer->hash[afi][safi], baa); - - fhead = &peer->sync[afi][safi]->update; - } - - /* Unlink myself from advertisement FIFO. */ - BGP_ADV_FIFO_DEL (fhead, adv); - - /* Free memory. */ - bgp_advertise_free (adj->adv); - adj->adv = NULL; - - return next; -} - -void -bgp_adj_out_set (struct bgp_node *rn, struct peer *peer, struct prefix *p, - struct attr *attr, afi_t afi, safi_t safi, - struct bgp_info *binfo) -{ - struct bgp_adj_out *adj = NULL; - struct bgp_advertise *adv; - - if (DISABLE_BGP_ANNOUNCE) - return; - - /* Look for adjacency information. */ - if (rn) - { - for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; - } - - if (! adj) - { - adj = XCALLOC (MTYPE_BGP_ADJ_OUT, sizeof (struct bgp_adj_out)); - adj->peer = peer_lock (peer); /* adj_out peer reference */ - - if (rn) - { - BGP_ADJ_OUT_ADD (rn, adj); - bgp_lock_node (rn); - } - } - - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - adj->adv = bgp_advertise_new (); - - adv = adj->adv; - adv->rn = rn; - - assert (adv->binfo == NULL); - adv->binfo = bgp_info_lock (binfo); /* bgp_info adj_out reference */ - - if (attr) - adv->baa = bgp_advertise_intern (peer->hash[afi][safi], attr); - else - adv->baa = baa_new (); - adv->adj = adj; - - /* Add new advertisement to advertisement attribute list. */ - bgp_advertise_add (adv->baa, adv); - - BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo); - - /* - * Schedule write thread (by triggering adjustment of MRAI timer) only if - * update FIFO has grown. Otherwise, it will be done upon the work queue - * being fully processed. Only adjust timer if needed. - */ - if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && - (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->update) >= - peer->bgp->adv_quanta)) - { - if (!peer->radv_adjusted) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling MRAI timer after adj_out_set", peer->host); - bgp_adjust_routeadv(peer); - } - } -} - -void -bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, struct prefix *p, - afi_t afi, safi_t safi) -{ - struct bgp_adj_out *adj; - struct bgp_advertise *adv; - - if (DISABLE_BGP_ANNOUNCE) - return; - - /* Lookup existing adjacency, if it is not there return immediately. */ - for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; - - if (! adj) - return; - - /* Clearn up previous advertisement. */ - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - if (adj->attr) - { - /* We need advertisement structure. */ - adj->adv = bgp_advertise_new (); - adv = adj->adv; - adv->rn = rn; - adv->adj = adj; - - /* Add to synchronization entry for withdraw announcement. */ - BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->withdraw, &adv->fifo); - - /* - * Schedule write thread only if withdraw FIFO has grown. Otherwise, - * it will be done upon the work queue being fully processed. - */ - if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && - (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->withdraw) >= - peer->bgp->wd_quanta)) - { - if (!peer->t_write) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling write thread after adj_out_unset", - peer->host); - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - } - } - } - else - { - /* Remove myself from adjacency. */ - BGP_ADJ_OUT_DEL (rn, adj); - - /* Free allocated information. */ - bgp_adj_out_free (adj); - - bgp_unlock_node (rn); - } -} - -void -bgp_adj_out_remove (struct bgp_node *rn, struct bgp_adj_out *adj, - struct peer *peer, afi_t afi, safi_t safi) -{ - if (adj->attr) - bgp_attr_unintern (&adj->attr); - - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - BGP_ADJ_OUT_DEL (rn, adj); - bgp_adj_out_free (adj); -} void bgp_adj_in_set (struct bgp_node *rn, struct peer *peer, struct attr *attr) diff --git a/bgpd/bgp_advertise.h b/bgpd/bgp_advertise.h index 36ab576989..a147437470 100644 --- a/bgpd/bgp_advertise.h +++ b/bgpd/bgp_advertise.h @@ -21,6 +21,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifndef _QUAGGA_BGP_ADVERTISE_H #define _QUAGGA_BGP_ADVERTISE_H +struct update_subgroup; + /* BGP advertise FIFO. */ struct bgp_advertise_fifo { @@ -71,8 +73,14 @@ struct bgp_adj_out struct bgp_adj_out *next; struct bgp_adj_out *prev; - /* Advertised peer. */ - struct peer *peer; + /* Advertised subgroup. */ + struct update_subgroup *subgroup; + + /* Threading that makes the adj part of subgroup's adj queue */ + TAILQ_ENTRY(bgp_adj_out) subgrp_adj_train; + + /* Prefix information. */ + struct bgp_node *rn; /* Advertised attribute. */ struct attr *attr; @@ -149,13 +157,14 @@ struct bgp_synchronize #define BGP_ADV_FIFO_COUNT(F) \ (F)->count +#define BGP_ADV_FIFO_EMPTY(F) \ + (((struct bgp_advertise_fifo *)(F))->next == (struct bgp_advertise *)(F)) + +#define BGP_ADV_FIFO_HEAD(F) \ + ((((struct bgp_advertise_fifo *)(F))->next == (struct bgp_advertise *)(F)) \ + ? NULL : (F)->next) + /* Prototypes. */ -extern void bgp_adj_out_set (struct bgp_node *, struct peer *, struct prefix *, - struct attr *, afi_t, safi_t, struct bgp_info *); -extern void bgp_adj_out_unset (struct bgp_node *, struct peer *, struct prefix *, - afi_t, safi_t); -extern void bgp_adj_out_remove (struct bgp_node *, struct bgp_adj_out *, - struct peer *, afi_t, safi_t); extern int bgp_adj_out_lookup (struct peer *, struct prefix *, afi_t, safi_t, struct bgp_node *); @@ -163,10 +172,23 @@ extern void bgp_adj_in_set (struct bgp_node *, struct peer *, struct attr *); extern void bgp_adj_in_unset (struct bgp_node *, struct peer *); extern void bgp_adj_in_remove (struct bgp_node *, struct bgp_adj_in *); -extern struct bgp_advertise * -bgp_advertise_clean (struct peer *, struct bgp_adj_out *, afi_t, safi_t); - extern void bgp_sync_init (struct peer *); extern void bgp_sync_delete (struct peer *); +extern unsigned int baa_hash_key (void *p); +extern int baa_hash_cmp (const void *p1, const void *p2); +extern void bgp_advertise_add (struct bgp_advertise_attr *baa, + struct bgp_advertise *adv); +extern struct bgp_advertise *bgp_advertise_new (void); +extern void bgp_advertise_free (struct bgp_advertise *adv); +extern struct bgp_advertise_attr * +bgp_advertise_intern (struct hash *hash, struct attr *attr); +extern struct bgp_advertise_attr *baa_new (void); +extern void +bgp_advertise_delete (struct bgp_advertise_attr *baa, + struct bgp_advertise *adv); +extern void +bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa); +extern struct bgp_adj_out * +bgp_adj_peer_lookup (struct peer *peer, struct bgp_node *rn); #endif /* _QUAGGA_BGP_ADVERTISE_H */ diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c index 450f5f5273..0441f8ed8c 100644 --- a/bgpd/bgp_aspath.c +++ b/bgpd/bgp_aspath.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "stream.h" #include "jhash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index 506583b3e5..e0c5cc8937 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "hash.h" #include "jhash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -38,6 +39,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_debug.h" #include "bgpd/bgp_packet.h" #include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_updgrp.h" /* Attribute strings for logging. */ static const struct message attr_str [] = @@ -474,7 +476,8 @@ attrhash_cmp (const void *p1, const void *p2) && attr1->aspath == attr2->aspath && attr1->community == attr2->community && attr1->med == attr2->med - && attr1->local_pref == attr2->local_pref) + && attr1->local_pref == attr2->local_pref + && attr1->rmap_change_flags == attr2->rmap_change_flags) { const struct attr_extra *ae1 = attr1->extra; const struct attr_extra *ae2 = attr2->extra; @@ -607,6 +610,40 @@ bgp_attr_intern (struct attr *attr) return find; } +/** + * Increment the refcount on various structures that attr holds. + * Note on usage: call _only_ when the 'attr' object has already + * been 'intern'ed and exists in 'attrhash' table. The function + * serves to hold a reference to that (real) object. + * Note also that the caller can safely call bgp_attr_unintern() + * after calling bgp_attr_refcount(). That would release the + * reference and could result in a free() of the attr object. + */ +struct attr * +bgp_attr_refcount (struct attr *attr) +{ + /* Intern referenced strucutre. */ + if (attr->aspath) + attr->aspath->refcnt++; + + if (attr->community) + attr->community->refcnt++; + + if (attr->extra) + { + struct attr_extra *attre = attr->extra; + if (attre->ecommunity) + attre->ecommunity->refcnt++; + + if (attre->cluster) + attre->cluster->refcnt++; + + if (attre->transit) + attre->transit->refcnt++; + } + attr->refcnt++; + return attr; +} /* Make network statement's attribute. */ struct attr * @@ -1565,7 +1602,7 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args, char buf1[INET6_ADDRSTRLEN]; char buf2[INET6_ADDRSTRLEN]; - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("%s sent two nexthops %s %s but second one is not a link-local nexthop", peer->host, inet_ntop (AF_INET6, &attre->mp_nexthop_global, buf1, INET6_ADDRSTRLEN), @@ -1716,7 +1753,7 @@ bgp_attr_unknown (struct bgp_attr_parser_args *args) const u_char flag = args->flags; const bgp_size_t length = args->length; - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("%s Unknown attribute is received (type %d, length %d)", peer->host, type, length); @@ -2098,6 +2135,7 @@ int stream_put_prefix (struct stream *, struct prefix *); size_t bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, + struct bpacket_attr_vec_arr *vecarr, struct attr *attr) { size_t sizep; @@ -2118,10 +2156,12 @@ bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, { case SAFI_UNICAST: case SAFI_MULTICAST: + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 4); stream_put_ipv4 (s, attr->nexthop.s_addr); break; case SAFI_MPLS_VPN: + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 12); stream_putl (s, 0); stream_putl (s, 0); @@ -2142,6 +2182,7 @@ bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, struct attr_extra *attre = attr->extra; assert (attr->extra); + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, attre->mp_nexthop_len); stream_put (s, &attre->mp_nexthop_global, 16); if (attre->mp_nexthop_len == 32) @@ -2194,6 +2235,7 @@ bgp_packet_mpattr_end (struct stream *s, size_t sizep) bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *peer, struct stream *s, struct attr *attr, + struct bpacket_attr_vec_arr *vecarr, struct prefix *p, afi_t afi, safi_t safi, struct peer *from, struct prefix_rd *prd, u_char *tag) { @@ -2202,6 +2244,7 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, struct aspath *aspath; int send_as4_path = 0; int send_as4_aggregator = 0; + int i = 0; int use32bit = (CHECK_FLAG (peer->cap, PEER_CAP_AS4_RCV)) ? 1 : 0; size_t mpattrlen_pos = 0; @@ -2213,7 +2256,7 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, if (p && !(afi == AFI_IP && safi == SAFI_UNICAST)) { - mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, attr); + mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, vecarr, attr); bgp_packet_mpattr_prefix(s, afi, safi, p, prd, tag); bgp_packet_mpattr_end(s, mpattrlen_pos); } @@ -2290,16 +2333,9 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, { stream_putc (s, BGP_ATTR_FLAG_TRANS); stream_putc (s, BGP_ATTR_NEXT_HOP); + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 4); - if (safi == SAFI_MPLS_VPN) - { - if (attr->nexthop.s_addr == 0) - stream_put_ipv4 (s, peer->nexthop.v4.s_addr); - else - stream_put_ipv4 (s, attr->nexthop.s_addr); - } - else - stream_put_ipv4 (s, attr->nexthop.s_addr); + stream_put_ipv4 (s, attr->nexthop.s_addr); } /* MED attribute. */ diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index 4285c75522..57eca3abbb 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -116,8 +116,17 @@ struct attr /* Path origin attribute */ u_char origin; + + /* has the route-map changed any attribute? + Used on the peer outbound side. */ + u_int32_t rmap_change_flags; }; +/* rmap_change_flags definition */ +#define BATTR_RMAP_NEXTHOP_CHANGED (1 << 0) +#define BATTR_RMAP_NEXTHOP_PEER_ADDRESS (1 << 1) +#define BATTR_REFLECTED (1 << 2) + /* Router Reflector related structure. */ struct cluster_list { @@ -149,6 +158,8 @@ typedef enum { BGP_ATTR_PARSE_ERROR_NOTIFYPLS = -3, } bgp_attr_parse_ret_t; +struct bpacket_attr_vec_arr; + /* Prototypes. */ extern void bgp_attr_init (void); extern void bgp_attr_finish (void); @@ -162,6 +173,7 @@ extern void bgp_attr_dup (struct attr *, struct attr *); extern void bgp_attr_deep_dup (struct attr *, struct attr *); extern void bgp_attr_deep_free (struct attr *); extern struct attr *bgp_attr_intern (struct attr *attr); +extern struct attr *bgp_attr_refcount (struct attr *attr); extern void bgp_attr_unintern_sub (struct attr *); extern void bgp_attr_unintern (struct attr **); extern void bgp_attr_flush (struct attr *); @@ -172,6 +184,7 @@ extern struct attr *bgp_attr_aggregate_intern (struct bgp *, u_char, struct community *, int as_set, u_char); extern bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *, struct stream *, struct attr *, + struct bpacket_attr_vec_arr *vecarr, struct prefix *, afi_t, safi_t, struct peer *, struct prefix_rd *, u_char *); @@ -212,6 +225,7 @@ extern int bgp_mp_unreach_parse (struct bgp_attr_parser_args *args, * finally the _end() function. */ extern size_t bgp_packet_mpattr_start(struct stream *s, afi_t afi, safi_t safi, + struct bpacket_attr_vec_arr *vecarr, struct attr *attr); extern void bgp_packet_mpattr_prefix(struct stream *s, afi_t afi, safi_t safi, struct prefix *p, struct prefix_rd *prd, diff --git a/bgpd/bgp_clist.c b/bgpd/bgp_clist.c index 80564df4b7..33444c46f3 100644 --- a/bgpd/bgp_clist.c +++ b/bgpd/bgp_clist.c @@ -23,6 +23,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "command.h" #include "prefix.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_community.h" diff --git a/bgpd/bgp_damp.c b/bgpd/bgp_damp.c index 0ffafb7a08..468a3e91bb 100644 --- a/bgpd/bgp_damp.c +++ b/bgpd/bgp_damp.c @@ -26,6 +26,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "command.h" #include "log.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_damp.h" diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 9033b94df4..27d7e69782 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "sockunion.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" @@ -36,6 +37,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_attr.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_community.h" +#include "bgpd/bgp_updgrp.h" unsigned long conf_bgp_debug_as4; unsigned long conf_bgp_debug_neighbor_events; @@ -46,6 +48,7 @@ unsigned long conf_bgp_debug_keepalive; unsigned long conf_bgp_debug_update; unsigned long conf_bgp_debug_zebra; unsigned long conf_bgp_debug_nht; +unsigned long conf_bgp_debug_update_groups; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_neighbor_events; @@ -56,6 +59,7 @@ unsigned long term_bgp_debug_keepalive; unsigned long term_bgp_debug_update; unsigned long term_bgp_debug_zebra; unsigned long term_bgp_debug_nht; +unsigned long term_bgp_debug_update_groups; struct list *bgp_debug_neighbor_events_peers = NULL; struct list *bgp_debug_keepalive_peers = NULL; @@ -313,6 +317,12 @@ bgp_debug_list_has_entry(struct list *list, struct peer *peer, struct prefix *p) return 0; } +int +bgp_debug_peer_updout_enabled(struct peer *peer) +{ + return (bgp_debug_list_has_entry(bgp_debug_update_out_peers, peer, NULL)); +} + /* Dump attribute. */ int bgp_dump_attr (struct peer *peer, struct attr *attr, char *buf, size_t size) @@ -438,6 +448,15 @@ bgp_notify_print(struct peer *peer, struct bgp_notify *bgp_notify, bgp_notify->data ? bgp_notify->data : ""); } +static void +bgp_debug_clear_updgrp_update_dbg(struct bgp *bgp) +{ + if (!bgp) + bgp = bgp_get_default(); + update_group_walk (bgp, update_group_clear_update_dbg, NULL); +} + + /* Debug option setting interface. */ unsigned long bgp_debug_option = 0; @@ -912,7 +931,19 @@ DEFUN (debug_bgp_update_direct_peer, if (inbound) bgp_debug_list_add_entry(bgp_debug_update_in_peers, peer, NULL); else - bgp_debug_list_add_entry(bgp_debug_update_out_peers, peer, NULL); + { + struct peer_af *paf; + int af; + + bgp_debug_list_add_entry(bgp_debug_update_out_peers, peer, NULL); + PEERAF_FOREACH (peer, paf, af) + { + if (PAF_SUBGRP (paf)) + { + UPDGRP_PEER_DBG_EN(PAF_SUBGRP(paf)->update_group); + } + } + } if (vty->node == CONFIG_NODE) { @@ -1006,6 +1037,20 @@ DEFUN (no_debug_bgp_update_direct_peer, vty_out (vty, "BGP updates debugging (outbound) is off%s", VTY_NEWLINE); } } + + if (found_peer) + { + struct peer_af *paf; + int af; + + PEERAF_FOREACH (peer, paf, af) + { + if (PAF_SUBGRP (paf)) + { + UPDGRP_PEER_DBG_DIS(PAF_SUBGRP(paf)->update_group); + } + } + } } if (found_peer) @@ -1133,6 +1178,8 @@ DEFUN (no_debug_bgp_update, bgp_debug_list_free(bgp_debug_update_out_peers); bgp_debug_list_free(bgp_debug_update_prefixes); + bgp_debug_clear_updgrp_update_dbg(vty->index); + if (vty->node == CONFIG_NODE) { DEBUG_OFF (update, UPDATE_IN); @@ -1281,6 +1328,42 @@ DEFUN (no_debug_bgp_zebra_prefix, return CMD_SUCCESS; } +/* debug bgp update-groups */ +DEFUN (debug_bgp_update_groups, + debug_bgp_update_groups_cmd, + "debug bgp update-groups", + DEBUG_STR + BGP_STR + "BGP update-groups\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_ON (update_groups, UPDATE_GROUPS); + else + { + TERM_DEBUG_ON (update_groups, UPDATE_GROUPS); + vty_out (vty, "BGP update-groups debugging is on%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + +DEFUN (no_debug_bgp_update_groups, + no_debug_bgp_update_groups_cmd, + "no debug bgp update-groups", + NO_STR + DEBUG_STR + BGP_STR + "BGP update-groups\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_OFF (update_groups, UPDATE_GROUPS); + else + { + TERM_DEBUG_OFF (update_groups, UPDATE_GROUPS); + vty_out (vty, "BGP update-groups debugging is off%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + DEFUN (no_debug_bgp, no_debug_bgp_cmd, "no debug bgp", @@ -1295,6 +1378,8 @@ DEFUN (no_debug_bgp, bgp_debug_list_free(bgp_debug_update_prefixes); bgp_debug_list_free(bgp_debug_zebra_prefixes); + bgp_debug_clear_updgrp_update_dbg(vty->index); + TERM_DEBUG_OFF (keepalive, KEEPALIVE); TERM_DEBUG_OFF (update, UPDATE_IN); TERM_DEBUG_OFF (update, UPDATE_OUT); @@ -1350,6 +1435,9 @@ DEFUN (show_debugging_bgp, bgp_debug_list_print (vty, " BGP zebra debugging is on", bgp_debug_zebra_prefixes); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + vty_out (vty, " BGP update-groups debugging is on%s", VTY_NEWLINE); + vty_out (vty, "%s", VTY_NEWLINE); return CMD_SUCCESS; } @@ -1411,6 +1499,12 @@ bgp_config_write_debug (struct vty *vty) write++; } + if (CONF_BGP_DEBUG (update_groups, UPDATE_GROUPS)) + { + vty_out (vty, "debug bgp update-groups%s", VTY_NEWLINE); + write++; + } + return write; } @@ -1445,6 +1539,8 @@ bgp_debug_init (void) install_element (CONFIG_NODE, &debug_bgp_update_direct_cmd); install_element (ENABLE_NODE, &debug_bgp_zebra_cmd); install_element (CONFIG_NODE, &debug_bgp_zebra_cmd); + install_element (ENABLE_NODE, &debug_bgp_update_groups_cmd); + install_element (CONFIG_NODE, &debug_bgp_update_groups_cmd); /* deb bgp updates [in|out] A.B.C.D */ install_element (ENABLE_NODE, &debug_bgp_update_direct_peer_cmd); @@ -1491,6 +1587,8 @@ bgp_debug_init (void) install_element (CONFIG_NODE, &no_debug_bgp_update_cmd); install_element (ENABLE_NODE, &no_debug_bgp_zebra_cmd); install_element (CONFIG_NODE, &no_debug_bgp_zebra_cmd); + install_element (ENABLE_NODE, &no_debug_bgp_update_groups_cmd); + install_element (CONFIG_NODE, &no_debug_bgp_update_groups_cmd); install_element (ENABLE_NODE, &no_debug_bgp_cmd); } @@ -1577,7 +1675,8 @@ bgp_debug_keepalive (struct peer *peer) } int -bgp_debug_update (struct peer *peer, struct prefix *p, unsigned int inbound) +bgp_debug_update (struct peer *peer, struct prefix *p, + struct update_group *updgrp, unsigned int inbound) { if (inbound) { @@ -1593,6 +1692,10 @@ bgp_debug_update (struct peer *peer, struct prefix *p, unsigned int inbound) BGP_DEBUG_UPDATE_OUT, bgp_debug_update_out_peers)) return 1; + + /* Check if update debugging implicitly enabled for the group. */ + if (updgrp && UPDGRP_DBG_ON(updgrp)) + return 1; } diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index b8ad7b9cfa..a80d00440e 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -22,6 +22,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define _QUAGGA_BGP_DEBUG_H #include "bgp_attr.h" +#include "bgp_updgrp.h" /* sort of packet direction */ #define DUMP_ON 1 @@ -65,6 +66,7 @@ extern unsigned long conf_bgp_debug_keepalive; extern unsigned long conf_bgp_debug_update; extern unsigned long conf_bgp_debug_zebra; extern unsigned long conf_bgp_debug_nht; +extern unsigned long conf_bgp_debug_update_groups; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_neighbor_events; @@ -73,6 +75,7 @@ extern unsigned long term_bgp_debug_keepalive; extern unsigned long term_bgp_debug_update; extern unsigned long term_bgp_debug_zebra; extern unsigned long term_bgp_debug_nht; +extern unsigned long term_bgp_debug_update_groups; extern struct list *bgp_debug_neighbor_events_peers; extern struct list *bgp_debug_keepalive_peers; @@ -98,6 +101,7 @@ struct bgp_debug_filter #define BGP_DEBUG_UPDATE_PREFIX 0x04 #define BGP_DEBUG_ZEBRA 0x01 #define BGP_DEBUG_NHT 0x01 +#define BGP_DEBUG_UPDATE_GROUPS 0x01 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 @@ -125,13 +129,15 @@ struct bgp_debug_filter extern const char *bgp_type_str[]; extern int bgp_dump_attr (struct peer *, struct attr *, char *, size_t); +extern int bgp_debug_peer_updout_enabled(struct peer *peer); extern void bgp_notify_print (struct peer *, struct bgp_notify *, const char *); extern const struct message bgp_status_msg[]; extern const int bgp_status_msg_max; extern int bgp_debug_neighbor_events(struct peer *peer); extern int bgp_debug_keepalive(struct peer *peer); -extern int bgp_debug_update(struct peer *peer, struct prefix *p, unsigned int inbound); +extern int bgp_debug_update(struct peer *peer, struct prefix *p, + struct update_group *updgrp, unsigned int inbound); extern int bgp_debug_zebra(struct prefix *p); #endif /* _QUAGGA_BGP_DEBUG_H */ diff --git a/bgpd/bgp_dump.c b/bgpd/bgp_dump.c index a3c9526fd7..9ee3285def 100644 --- a/bgpd/bgp_dump.c +++ b/bgpd/bgp_dump.c @@ -27,6 +27,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "prefix.h" #include "thread.h" #include "linklist.h" +#include "queue.h" + #include "bgpd/bgp_table.h" #include "bgpd/bgpd.h" diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index 8a326a8bb5..05e0c01722 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "prefix.h" #include "command.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_ecommunity.h" diff --git a/bgpd/bgp_filter.c b/bgpd/bgp_filter.c index fa0889cdbe..ca9ce67e5f 100644 --- a/bgpd/bgp_filter.c +++ b/bgpd/bgp_filter.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "buffer.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index f70ad25aac..28cfb45af2 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "plist.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -45,6 +46,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_SNMP #include "bgpd/bgp_snmp.h" #endif /* HAVE_SNMP */ +#include "bgpd/bgp_updgrp.h" /* BGP FSM (finite state machine) has three types of functions. Type one is thread functions. Type two is event functions. Type three @@ -93,11 +95,17 @@ peer_xfer_conn(struct peer *from_peer) if (!peer || !CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE)) return from_peer; + if (bgp_debug_neighbor_events(peer)) + zlog_debug ("peer transfer (%s -> %s)", from_peer->host, peer->host); + BGP_WRITE_OFF(peer->t_write); BGP_READ_OFF(peer->t_read); BGP_WRITE_OFF(from_peer->t_write); BGP_READ_OFF(from_peer->t_read); + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_OFF(from_peer->t_routeadv); + fd = peer->fd; peer->fd = from_peer->fd; from_peer->fd = fd; @@ -390,27 +398,26 @@ bgp_keepalive_timer (struct thread *thread) static int bgp_routeq_empty (struct peer *peer) { - afi_t afi; - safi_t safi; + struct peer_af *paf; + int af; - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - if (!FIFO_EMPTY(&peer->sync[afi][safi]->withdraw) || - !FIFO_EMPTY(&peer->sync[afi][safi]->update)) - return 0; - } + PEERAF_FOREACH(peer, paf, af) + { + if (!PAF_SUBGRP(paf)) + continue; + if (!advertise_list_is_empty(PAF_SUBGRP(paf))) + return 0; + } return 1; } -static int +int bgp_routeadv_timer (struct thread *thread) { struct peer *peer; peer = THREAD_ARG (thread); peer->t_routeadv = NULL; - peer->radv_adjusted = 0; if (bgp_debug_neighbor_events(peer)) zlog_debug ("%s [FSM] Timer (routeadv timer expire)", peer->host); @@ -419,10 +426,9 @@ bgp_routeadv_timer (struct thread *thread) BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - /* MRAI timer is no longer restarted here, it would be done - * when the FIFO is built. + /* MRAI timer will be started again when FIFO is built, no need to + * do it here. */ - return 0; } @@ -630,9 +636,6 @@ bgp_adjust_routeadv (struct peer *peer) return; } - /* Mark that we've adjusted the timer */ - peer->radv_adjusted = 1; - /* * CASE I: @@ -655,8 +658,6 @@ bgp_adjust_routeadv (struct peer *peer) { BGP_TIMER_OFF(peer->t_routeadv); BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, 0); - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug ("%s: MRAI timer to expire instantly", peer->host); return; } @@ -685,8 +686,6 @@ bgp_adjust_routeadv (struct peer *peer) { BGP_TIMER_OFF(peer->t_routeadv); BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, diff); - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug ("%s: MRAI timer to expire in %f secs", peer->host, diff); } } @@ -720,8 +719,6 @@ bgp_maxmed_onstartup_active (struct bgp *bgp) void bgp_maxmed_update (struct bgp *bgp) { - struct listnode *node, *nnode; - struct peer *peer; u_char maxmed_active; u_int32_t maxmed_value; @@ -747,8 +744,7 @@ bgp_maxmed_update (struct bgp *bgp) bgp->maxmed_active = maxmed_active; bgp->maxmed_value = maxmed_value; - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - bgp_announce_route_all (peer); + update_group_announce(bgp); } } @@ -1005,6 +1001,10 @@ bgp_stop (struct peer *peer) /* set last reset time */ peer->resettime = peer->uptime = bgp_clock (); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("%s remove from all update group", peer->host); + update_group_remove_peer_afs(peer); + #ifdef HAVE_SNMP bgpTrapBackwardTransition (peer); #endif /* HAVE_SNMP */ @@ -1339,6 +1339,7 @@ static int bgp_establish (struct peer *peer) { struct bgp_notify *notify; + struct peer_af *paf; afi_t afi; safi_t safi; int nsf_af_count = 0; @@ -1377,6 +1378,9 @@ bgp_establish (struct peer *peer) if (bgp_flag_check (peer->bgp, BGP_FLAG_LOG_NEIGHBOR_CHANGES)) zlog_info ("%%ADJCHANGE: neighbor %s Up", peer->host); + /* assign update-group/subgroup */ + update_group_adjust_peer_afs(peer); + /* graceful restart */ UNSET_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT); for (afi = AFI_IP ; afi < AFI_MAX ; afi++) @@ -1449,14 +1453,17 @@ bgp_establish (struct peer *peer) || CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV)) SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_WAIT_REFRESH); - bgp_announce_route_all (peer); + bgp_announce_peer (peer); /* Start the route advertisement timer to send updates to the peer - if BGP * is not in read-only mode. If it is, the timer will be started at the end * of read-only mode. */ if (!bgp_update_delay_active(peer->bgp)) - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + { + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + } if (peer->doppelganger && (peer->doppelganger->status != Deleted)) { diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index bd6c416d32..6937a6c3b7 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -40,7 +40,13 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA if (!(T) && (peer->status != Deleted)) \ THREAD_WRITE_ON(master,(T),(F),peer,(V)); \ } while (0) - + +#define BGP_PEER_WRITE_ON(T,F,V, peer) \ + do { \ + if (!(T) && ((peer)->status != Deleted)) \ + THREAD_WRITE_ON(master,(T),(F),(peer),(V)); \ + } while (0) + #define BGP_WRITE_OFF(T) \ do { \ if (T) \ @@ -79,6 +85,7 @@ extern int bgp_event (struct thread *); extern int bgp_event_update (struct peer *, int event); extern int bgp_stop (struct peer *peer); extern void bgp_timer_set (struct peer *); +extern int bgp_routeadv_timer (struct thread *); extern void bgp_fsm_change_status (struct peer *peer, int status); extern const char *peer_down_str[]; extern void bgp_update_delay_end (struct bgp *); diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 234f17d1f9..4da30133e3 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -36,6 +36,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "filter.h" #include "plist.h" #include "stream.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index b354520ed8..2d080a12bf 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -28,6 +28,7 @@ #include "linklist.h" #include "sockunion.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -412,7 +413,8 @@ bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best, old_mpath_count = 0; prev_mpath = new_best; mp_node = listhead (mp_list); - debug = bgp_debug_update(NULL, &rn->p, 1) || bgp_debug_update(NULL, &rn->p, 0); + debug = bgp_debug_update(NULL, &rn->p, NULL, 1) || + bgp_debug_update(NULL, &rn->p, NULL, 0); if (debug) prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf)); diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 22ae54e0e0..a497efaf38 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -25,6 +25,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "stream.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index a36af72ff9..eb8bad7627 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "privs.h" #include "linklist.h" #include "network.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_open.h" diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index df3c903431..8095e6233e 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "jhash.h" #include "nexthop.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index 7aef76d53d..d35be4b1f3 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -27,6 +27,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "command.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 7213d1a00b..5289f9cd58 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -30,6 +30,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "sockunion.h" /* for inet_ntop () */ #include "linklist.h" #include "plist.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -47,11 +48,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_updgrp.h" int stream_put_prefix (struct stream *, struct prefix *); /* Set up BGP packet marker and packet type. */ -static int +int bgp_packet_set_marker (struct stream *s, u_char type) { int i; @@ -72,7 +74,7 @@ bgp_packet_set_marker (struct stream *s, u_char type) /* Set BGP packet header size entry. If size is zero then use current stream size. */ -static int +int bgp_packet_set_size (struct stream *s) { int cp; @@ -85,7 +87,7 @@ bgp_packet_set_size (struct stream *s) } /* Add new packet to the peer. */ -static void +void bgp_packet_add (struct peer *peer, struct stream *s) { /* Add packet to the end of list. */ @@ -140,181 +142,6 @@ bgp_connect_check (struct peer *peer, int change_state) } } -/* Make BGP update packet. */ -static struct stream * -bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *snlri; - struct bgp_adj_out *adj; - struct bgp_advertise *adv; - struct stream *packet; - struct bgp_node *rn = NULL; - struct bgp_info *binfo = NULL; - bgp_size_t total_attr_len = 0; - unsigned long attrlen_pos = 0; - int space_remaining = 0; - int space_needed = 0; - size_t mpattrlen_pos = 0; - size_t mpattr_pos = 0; - int num_pfx_adv = 0; - char send_attr_str[BUFSIZ]; - int send_attr_printed; - - s = peer->work; - stream_reset (s); - snlri = peer->scratch; - stream_reset (snlri); - - adv = FIFO_HEAD (&peer->sync[afi][safi]->update); - - while (adv) - { - assert (adv->rn); - rn = adv->rn; - adj = adv->adj; - if (adv->binfo) - binfo = adv->binfo; - - space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); - - /* When remaining space can't include NLRI and it's length. */ - if (space_remaining < space_needed) - break; - - /* If packet is empty, set attribute. */ - if (stream_empty (s)) - { - struct peer *from = NULL; - - if (binfo) - from = binfo->peer; - - /* 1: Write the BGP message header - 16 bytes marker, 2 bytes length, - * one byte message type. - */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* 2: withdrawn routes length */ - stream_putw (s, 0); - - /* 3: total attributes length - attrlen_pos stores the position */ - attrlen_pos = stream_get_endp (s); - stream_putw (s, 0); - - /* 4: if there is MP_REACH_NLRI attribute, that should be the first - * attribute, according to draft-ietf-idr-error-handling. Save the - * position. - */ - mpattr_pos = stream_get_endp(s); - - /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */ - total_attr_len = bgp_packet_attribute (NULL, peer, s, - adv->baa->attr, - NULL, afi, safi, - from, NULL, NULL); - - space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); - - /* If the attributes alone do not leave any room for NLRI then - * return */ - if (space_remaining < space_needed) - { - zlog_err ("%s cannot send UPDATE, the attributes do not leave " - "room for NLRI", peer->host); - /* Flush the FIFO update queue */ - while (adv) - adv = bgp_advertise_clean (peer, adv->adj, afi, safi); - return NULL; - } - - if (BGP_DEBUG (update, UPDATE_OUT) || - BGP_DEBUG (update, UPDATE_PREFIX)) - { - memset (send_attr_str, 0, BUFSIZ); - send_attr_printed = 0; - bgp_dump_attr (peer, adv->baa->attr, send_attr_str, BUFSIZ); - } - } - - if (afi == AFI_IP && safi == SAFI_UNICAST) - stream_put_prefix (s, &rn->p); - else - { - /* Encode the prefix in MP_REACH_NLRI attribute */ - struct prefix_rd *prd = NULL; - u_char *tag = NULL; - - if (rn->prn) - prd = (struct prefix_rd *) &rn->prn->p; - if (binfo && binfo->extra) - tag = binfo->extra->tag; - - if (stream_empty(snlri)) - mpattrlen_pos = bgp_packet_mpattr_start(snlri, afi, safi, - adv->baa->attr); - bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag); - } - num_pfx_adv++; - - if (bgp_debug_update(peer, &rn->p, 0)) - { - if (!send_attr_printed) - { - zlog_debug ("%s send UPDATE w/ attr: %s", peer->host, send_attr_str); - send_attr_printed = 1; - } - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d", - peer->host, - inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, INET6_BUFSIZ), - rn->p.prefixlen); - } - - /* Synchnorize attribute. */ - if (adj->attr) - bgp_attr_unintern (&adj->attr); - else - peer->scount[afi][safi]++; - - adj->attr = bgp_attr_intern (adv->baa->attr); - - adv = bgp_advertise_clean (peer, adj, afi, safi); - } - - if (! stream_empty (s)) - { - if (!stream_empty(snlri)) - { - bgp_packet_mpattr_end(snlri, mpattrlen_pos); - total_attr_len += stream_get_endp(snlri); - } - - /* set the total attribute length correctly */ - stream_putw_at (s, attrlen_pos, total_attr_len); - - if (!stream_empty(snlri)) - packet = stream_dupcat(s, snlri, mpattr_pos); - else - packet = stream_dup (s); - bgp_packet_set_size (packet); - if (BGP_DEBUG (update, UPDATE_OUT)) - zlog_debug("%s form UPDATE (adv) total len %d numPfx %d", - peer->host, - (stream_get_endp (s) - stream_get_getp (s)), num_pfx_adv); - bgp_packet_add (peer, packet); - stream_reset (s); - stream_reset (snlri); - return packet; - } - return NULL; -} - static struct stream * bgp_update_packet_eor (struct peer *peer, afi_t afi, safi_t safi) { @@ -358,299 +185,28 @@ bgp_update_packet_eor (struct peer *peer, afi_t afi, safi_t safi) return packet; } -/* Make BGP withdraw packet. */ -/* For ipv4 unicast: - 16-octet marker | 2-octet length | 1-octet type | - 2-octet withdrawn route length | withdrawn prefixes | 2-octet attrlen (=0) -*/ -/* For other afi/safis: - 16-octet marker | 2-octet length | 1-octet type | - 2-octet withdrawn route length (=0) | 2-octet attrlen | - mp_unreach attr type | attr len | afi | safi | withdrawn prefixes -*/ -static struct stream * -bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *packet; - struct bgp_adj_out *adj; - struct bgp_advertise *adv; - struct bgp_node *rn; - unsigned long pos; - bgp_size_t unfeasible_len; - bgp_size_t total_attr_len; - size_t mp_start = 0; - size_t attrlen_pos = 0; - size_t mplen_pos = 0; - u_char first_time = 1; - int space_remaining = 0; - int space_needed = 0; - int num_pfx_wd = 0; - - s = peer->work; - stream_reset (s); - - while ((adv = FIFO_HEAD (&peer->sync[afi][safi]->withdraw)) != NULL) - { - assert (adv->rn); - adj = adv->adj; - rn = adv->rn; - - space_remaining = STREAM_REMAIN (s) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = (BGP_NLRI_LENGTH + BGP_TOTAL_ATTR_LEN + - PSIZE (rn->p.prefixlen)); - - if (space_remaining < space_needed) - break; - - if (stream_empty (s)) - { - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - stream_putw (s, 0); /* unfeasible routes length */ - } - else - first_time = 0; - - if (afi == AFI_IP && safi == SAFI_UNICAST) - stream_put_prefix (s, &rn->p); - else - { - struct prefix_rd *prd = NULL; - - if (rn->prn) - prd = (struct prefix_rd *) &rn->prn->p; - - /* If first time, format the MP_UNREACH header */ - if (first_time) - { - attrlen_pos = stream_get_endp (s); - /* total attr length = 0 for now. reevaluate later */ - stream_putw (s, 0); - mp_start = stream_get_endp (s); - mplen_pos = bgp_packet_mpunreach_start(s, afi, safi); - } - - bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL); - } - num_pfx_wd++; - - if (bgp_debug_update(peer, &rn->p, 0)) - { - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d -- unreachable", - peer->host, - inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, INET6_BUFSIZ), - rn->p.prefixlen); - } - - peer->scount[afi][safi]--; - - bgp_adj_out_remove (rn, adj, peer, afi, safi); - bgp_unlock_node (rn); - } - - if (! stream_empty (s)) - { - if (afi == AFI_IP && safi == SAFI_UNICAST) - { - unfeasible_len - = stream_get_endp (s) - BGP_HEADER_SIZE - BGP_UNFEASIBLE_LEN; - stream_putw_at (s, BGP_HEADER_SIZE, unfeasible_len); - stream_putw (s, 0); - } - else - { - /* Set the mp_unreach attr's length */ - bgp_packet_mpunreach_end(s, mplen_pos); - - /* Set total path attribute length. */ - total_attr_len = stream_get_endp(s) - mp_start; - stream_putw_at (s, attrlen_pos, total_attr_len); - } - bgp_packet_set_size (s); - if (BGP_DEBUG (update, UPDATE_OUT)) - zlog_debug("%s form UPDATE (wd) total len %d numPfx %d", - peer->host, - (stream_get_endp (s) - stream_get_getp (s)), num_pfx_wd); - packet = stream_dup (s); - bgp_packet_add (peer, packet); - stream_reset (s); - return packet; - } - - return NULL; -} - -void -bgp_default_update_send (struct peer *peer, struct attr *attr, - afi_t afi, safi_t safi, struct peer *from) -{ - struct stream *s; - struct stream *packet; - struct prefix p; - unsigned long pos; - bgp_size_t total_attr_len; - - if (DISABLE_BGP_ANNOUNCE) - return; - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else - str2prefix ("::/0", &p); -#endif /* HAVE_IPV6 */ - - /* Logging the attribute. */ - if (bgp_debug_update(peer, &p, 0)) - { - char attrstr[BUFSIZ]; - char buf[INET6_BUFSIZ]; - attrstr[0] = '\0'; - - bgp_dump_attr (peer, attr, attrstr, BUFSIZ); - zlog_debug ("%s send UPDATE %s/%d %s", - peer->host, inet_ntop(p.family, &(p.u.prefix), buf, INET6_BUFSIZ), - p.prefixlen, attrstr); - } - - s = stream_new (BGP_MAX_PACKET_SIZE); - - /* Make BGP update packet. */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* Unfeasible Routes Length. */ - stream_putw (s, 0); - - /* Make place for total attribute length. */ - pos = stream_get_endp (s); - stream_putw (s, 0); - total_attr_len = bgp_packet_attribute (NULL, peer, s, attr, &p, afi, safi, from, NULL, NULL); - - /* Set Total Path Attribute Length. */ - stream_putw_at (s, pos, total_attr_len); - - /* NLRI set. */ - if (p.family == AF_INET && safi == SAFI_UNICAST) - stream_put_prefix (s, &p); - - /* Set size. */ - bgp_packet_set_size (s); - - packet = stream_dup (s); - stream_free (s); - - /* Dump packet if debug option is set. */ -#ifdef DEBUG - /* bgp_packet_dump (packet); */ -#endif /* DEBUG */ - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); -} - -void -bgp_default_withdraw_send (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *packet; - struct prefix p; - unsigned long attrlen_pos = 0; - unsigned long cp; - bgp_size_t unfeasible_len; - bgp_size_t total_attr_len; - size_t mp_start = 0; - size_t mplen_pos = 0; - - if (DISABLE_BGP_ANNOUNCE) - return; - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else - str2prefix ("::/0", &p); -#endif /* HAVE_IPV6 */ - - total_attr_len = 0; - - if (bgp_debug_update(peer, &p, 0)) - { - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d -- unreachable", - peer->host, inet_ntop(p.family, &(p.u.prefix), buf, INET6_BUFSIZ), - p.prefixlen); - } - - s = stream_new (BGP_MAX_PACKET_SIZE); - - /* Make BGP update packet. */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* Unfeasible Routes Length. */; - cp = stream_get_endp (s); - stream_putw (s, 0); - - /* Withdrawn Routes. */ - if (p.family == AF_INET && safi == SAFI_UNICAST) - { - stream_put_prefix (s, &p); - - unfeasible_len = stream_get_endp (s) - cp - 2; - - /* Set unfeasible len. */ - stream_putw_at (s, cp, unfeasible_len); - - /* Set total path attribute length. */ - stream_putw (s, 0); - } - else - { - attrlen_pos = stream_get_endp (s); - stream_putw (s, 0); - mp_start = stream_get_endp (s); - mplen_pos = bgp_packet_mpunreach_start(s, afi, safi); - bgp_packet_mpunreach_prefix(s, &p, afi, safi, NULL, NULL); - - /* Set the mp_unreach attr's length */ - bgp_packet_mpunreach_end(s, mplen_pos); - - /* Set total path attribute length. */ - total_attr_len = stream_get_endp(s) - mp_start; - stream_putw_at (s, attrlen_pos, total_attr_len); - } - - bgp_packet_set_size (s); - - packet = stream_dup (s); - stream_free (s); - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); -} - /* Get next packet to be written. */ static struct stream * bgp_write_packet (struct peer *peer) { + struct stream *s = NULL; + struct peer_af *paf; + struct bpacket *next_pkt; afi_t afi; safi_t safi; - struct stream *s = NULL; - struct bgp_advertise *adv; s = stream_fifo_head (peer->obuf); if (s) return s; - /* The code beyond this part deals with update packets, check if updates - are on hold as part of the update-delay post processing stages. */ + /* + * The code beyond this part deals with update packets, proceed only + * if peer is Established and updates are not on hold (as part of + * update-delay post processing). + */ + if (peer->status != Established) + return NULL; + if (peer->bgp && (peer->bgp->main_peers_update_hold || peer->bgp->rsclient_peers_update_hold)) return NULL; @@ -658,138 +214,109 @@ bgp_write_packet (struct peer *peer) for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) { - adv = FIFO_HEAD (&peer->sync[afi][safi]->withdraw); - if (adv) + paf = peer_af_find (peer, afi, safi); + if (!paf || !PAF_SUBGRP(paf)) + continue; + next_pkt = paf->next_pkt_to_send; + + /* Try to generate a packet for the peer if we are at the end of + * the list. Always try to push out WITHDRAWs first. */ + if (!next_pkt || !next_pkt->buffer) { - s = bgp_withdraw_packet (peer, afi, safi); - if (s) - return s; + next_pkt = subgroup_withdraw_packet(PAF_SUBGRP(paf)); + if (!next_pkt || !next_pkt->buffer) + subgroup_update_packet (PAF_SUBGRP(paf)); + next_pkt = paf->next_pkt_to_send; } - } - - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - adv = FIFO_HEAD (&peer->sync[afi][safi]->update); - if (adv) - { - if (adv->binfo && adv->binfo->uptime <= peer->synctime) + + /* If we still don't have a packet to send to the peer, then + * try to find out out if we have to send eor or if not, skip to + * the next AFI, SAFI. + * Don't send the EOR prematurely... if the subgroup's coalesce + * timer is running, the adjacency-out structure is not created + * yet. + */ + if (!next_pkt || !next_pkt->buffer) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_RESTART_RCV)) { - if (CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_RCV) - && CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_ADV) - && ! (CHECK_FLAG (adv->binfo->peer->cap, - PEER_CAP_RESTART_BIT_RCV) && - CHECK_FLAG (adv->binfo->peer->cap, - PEER_CAP_RESTART_BIT_ADV)) - && ! CHECK_FLAG (adv->binfo->flags, BGP_INFO_STALE) + if (!(PAF_SUBGRP(paf))->t_coalesce && + peer->afc_nego[afi][safi] && peer->synctime + && ! CHECK_FLAG (peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND) && safi != SAFI_MPLS_VPN) { - if (CHECK_FLAG (adv->binfo->peer->af_sflags[afi][safi], - PEER_STATUS_EOR_RECEIVED)) - s = bgp_update_packet (peer, afi, safi); + SET_FLAG (peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND); + return bgp_update_packet_eor (peer, afi, safi); } - else - s = bgp_update_packet (peer, afi, safi); - } - if (s) - return s; + } + continue; } - if (CHECK_FLAG (peer->cap, PEER_CAP_RESTART_RCV)) - { - if (peer->afc_nego[afi][safi] && peer->synctime - && ! CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_EOR_SEND) - && safi != SAFI_MPLS_VPN) - { - SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_EOR_SEND); - return bgp_update_packet_eor (peer, afi, safi); - } - } + + /* + * Found a packet template to send, overwrite packet with appropriate + * attributes from peer and advance peer + */ + s = bpacket_reformat_for_peer (next_pkt, paf); + bpacket_queue_advance_peer (paf); + if (bgp_debug_update(peer, NULL, NULL, 0)) + zlog_debug ("u%llu:s%llu %s send UPDATE len %d ", + PAF_SUBGRP(paf)->update_group->id, PAF_SUBGRP(paf)->id, + peer->host, (stream_get_endp(s) - stream_get_getp(s))); + return s; } return NULL; } -/* Are there prefixes queued for being withdrawn? */ -int -bgp_peer_wd_fifo_exists (struct peer *peer) +/* The next action for the peer from a write perspective */ +static void +bgp_write_proceed_actions (struct peer *peer) { afi_t afi; safi_t safi; - struct bgp_advertise *adv; + struct peer_af *paf; + struct bpacket *next_pkt; + int fullq_found = 0; - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - if (FIFO_HEAD (&peer->sync[afi][safi]->withdraw)) - return 1; - - return 0; -} - -/* Are there prefixes queued for being advertised? - * Are they recent? - */ -int -bgp_peer_adv_fifo_exists (struct peer *peer, int chk_recent) -{ - afi_t afi; - safi_t safi; - struct bgp_advertise *adv; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - if ((adv = FIFO_HEAD (&peer->sync[afi][safi]->update)) != NULL) - { - if (!chk_recent) - return 1; - if (adv->binfo->uptime < peer->synctime) - return 1; - } - - return 0; -} - -/* - * Schedule updates for the peer, if needed. - */ -void -bgp_peer_schedule_updates(struct peer *peer) -{ - /* If withdraw FIFO exists, immediately schedule write */ - if (bgp_peer_wd_fifo_exists(peer) && !peer->t_write) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling write thread", peer->host); - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - } - - /* If update FIFO exists, fire MRAI timer */ - if (bgp_peer_adv_fifo_exists(peer, 0) && !peer->radv_adjusted) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling MRAI timer", peer->host); - bgp_adjust_routeadv(peer); - } -} - -/* Is there partially written packet or updates we can send right - now. */ -static int -bgp_write_proceed (struct peer *peer) -{ - /* If queued packet exists, we should try to write it */ if (stream_fifo_head (peer->obuf)) - return 1; + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } - /* If there are prefixes to be withdrawn or to be advertised (and - * queued before last MRAI timer expiry), schedule write - */ - if (bgp_peer_wd_fifo_exists(peer) - || bgp_peer_adv_fifo_exists(peer, 1)) - return 1; - - return 0; + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + paf = peer_af_find (peer, afi, safi); + if (!paf) + continue; + next_pkt = paf->next_pkt_to_send; + if (next_pkt && next_pkt->buffer) + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } + /* No packets readily available for AFI/SAFI, are there subgroup packets + * that need to be generated? */ + if (paf->subgroup && + bpacket_queue_is_full(SUBGRP_INST(paf->subgroup), + SUBGRP_PKTQ(paf->subgroup))) + fullq_found = 1; + else if (subgroup_packets_to_build (paf->subgroup)) + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } + } + if (fullq_found) + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } } /* Write packet to the peer. */ @@ -798,7 +325,7 @@ bgp_write (struct thread *thread) { struct peer *peer; u_char type; - struct stream *s; + struct stream *s; int num; unsigned int count = 0; int oc = 0; @@ -816,7 +343,10 @@ bgp_write (struct thread *thread) s = bgp_write_packet (peer); if (!s) - return 0; /* nothing to send */ + { + bgp_write_proceed_actions (peer); + return 0; + } sockopt_cork (peer->fd, 1); @@ -892,8 +422,7 @@ bgp_write (struct thread *thread) while (++count < peer->bgp->wpkt_quanta && (s = bgp_write_packet (peer)) != NULL); - if (bgp_write_proceed (peer)) - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + bgp_write_proceed_actions (peer); done: /* Update the last write if some updates were written. */ @@ -910,7 +439,7 @@ bgp_write_notify (struct peer *peer) { int ret, val; u_char type; - struct stream *s; + struct stream *s; /* There should be at least one packet. */ s = stream_fifo_head (peer->obuf); @@ -1801,8 +1330,6 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) if (ret < 0) return -1; - zlog_err ("%s [Update:RECV] Unfeasible NLRI received", peer->host); - withdraw.afi = AFI_IP; withdraw.safi = SAFI_UNICAST; withdraw.nlri = stream_pnt (s); @@ -1871,7 +1398,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) zlog_err ("%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.", peer->host); - if (ret && bgp_debug_update(peer, NULL, 1)) + if (ret && bgp_debug_update(peer, NULL, NULL, 1)) { zlog_debug ("%s rcvd UPDATE w/ attr: %s", peer->host, peer->rcvd_attr_str); peer->rcvd_attr_printed = 1; @@ -2052,7 +1579,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) if (peer->nsf[AFI_IP6][SAFI_MULTICAST]) bgp_clear_stale_route (peer, AFI_IP6, SAFI_MULTICAST); - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("rcvd End-of-RIB for IPv6 Multicast from %s", peer->host); } } @@ -2082,7 +1609,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) bgp_update_explicit_eors(peer); } - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("rcvd End-of-RIB for VPNv4 Unicast from %s", peer->host); } } @@ -2227,7 +1754,7 @@ bgp_route_refresh_receive (struct peer *peer, bgp_size_t size) reserved = stream_getc (s); safi = stream_getc (s); - if (bgp_debug_update(peer, NULL, 0)) + if (bgp_debug_update(peer, NULL, NULL, 0)) zlog_debug ("%s rcvd REFRESH_REQ for afi/safi: %d/%d", peer->host, afi, safi); diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h index bb3903cf78..34b666fe72 100644 --- a/bgpd/bgp_packet.h +++ b/bgpd/bgp_packet.h @@ -26,12 +26,6 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_UNFEASIBLE_LEN 2U #define BGP_WRITE_PACKET_MAX 10U -/* Size of FIFOs upon which write thread is triggered. Note that write - * thread is also triggered upon BGP work-queue completion. - */ -#define BGP_ADV_FIFO_QUANTA 500 -#define BGP_WD_FIFO_QUANTA 200 - /* When to refresh */ #define REFRESH_IMMEDIATE 1 #define REFRESH_DEFER 2 @@ -63,8 +57,10 @@ extern int bgp_capability_receive (struct peer *, bgp_size_t); extern void bgp_update_restarted_peers (struct peer *); extern void bgp_update_implicit_eors (struct peer *); extern void bgp_check_update_delay (struct bgp *); -extern int bgp_peer_wd_fifo_exists (struct peer *); -extern int bgp_peer_adv_fifo_exists (struct peer *, int); -extern void bgp_peer_schedule_updates(struct peer *peer); extern int bgp_valid_host_address (unsigned long addr); + +extern int bgp_packet_set_marker (struct stream *s, u_char type); +extern int bgp_packet_set_size (struct stream *s); +extern void bgp_packet_add (struct peer *peer, struct stream *s); + #endif /* _QUAGGA_BGP_PACKET_H */ diff --git a/bgpd/bgp_regex.c b/bgpd/bgp_regex.c index 9b65f7cb15..15c23c9ff9 100644 --- a/bgpd/bgp_regex.c +++ b/bgpd/bgp_regex.c @@ -23,6 +23,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "command.h" #include "memory.h" +#include "queue.h" #include "bgpd.h" #include "bgp_aspath.h" diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index f0b1e5ddf9..7094e8ca45 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -34,6 +34,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "thread.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -56,6 +57,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_nht.h" +#include "bgpd/bgp_updgrp.h" /* Extern from bgp_dump.c */ extern const char *bgp_origin_str[]; @@ -986,7 +988,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, { if (IPV4_ADDR_SAME (&peer->remote_id, &riattr->extra->originator_id)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] %s/%d originator-id is same as remote router-id", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -1008,7 +1010,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* Output filter check. */ if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] %s/%d is filtered", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -1020,7 +1022,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* AS path loop check. */ if (aspath_loop_check (riattr->aspath, peer->as)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", peer->host, peer->as); return 0; @@ -1032,7 +1034,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, { if (aspath_loop_check(riattr->aspath, bgp->confed_id)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", peer->host, bgp->confed_id); @@ -1228,6 +1230,331 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, return 1; } +static void +subgroup_announce_reset_nhop (u_char family, struct attr *attr) +{ + if (family == AF_INET) + attr->nexthop.s_addr = 0; +#ifdef HAVE_IPV6 + if (family == AF_INET6) + memset (&attr->extra->mp_nexthop_global, 0, IPV6_MAX_BYTELEN); +#endif +} + +int +subgroup_announce_check (struct bgp_info *ri, struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr) +{ + struct bgp_filter *filter; + struct peer *from; + struct peer *peer; + struct peer *onlypeer; + struct bgp *bgp; + struct attr *riattr; + struct peer_af *paf; + char buf[SU_ADDRSTRLEN]; + int ret; + int transparent; + int reflect; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return 0; + + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + peer = SUBGRP_PEER(subgrp); + onlypeer = NULL; + if (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL)) + onlypeer = SUBGRP_PFIRST(subgrp)->peer; + + from = ri->peer; + filter = &peer->filter[afi][safi]; + bgp = SUBGRP_INST(subgrp); + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; + + /* Aggregate-address suppress check. */ + if (ri->extra && ri->extra->suppress) + if (! UNSUPPRESS_MAP_NAME (filter)) + { + return 0; + } + + /* Do not send announces to RS-clients from the 'normal' bgp_table. */ + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + return 0; + } + + /* Do not send back route to sender. */ + if (onlypeer && from == onlypeer) + { + return 0; + } + + /* Transparency check. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && CHECK_FLAG (from->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + transparent = 1; + else + transparent = 0; + + /* If community is not disabled check the no-export and local. */ + if (! transparent && bgp_community_filter (peer, riattr)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("subgrpannouncecheck: community filter check fail"); + return 0; + } + + /* If the attribute has originator-id and it is same as remote + peer's id. */ + if (onlypeer && + riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID) && + (IPV4_ADDR_SAME (&onlypeer->remote_id, &riattr->extra->originator_id))) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as " + "remote router-id", + onlypeer->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + + /* ORF prefix-list filter check */ + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_ADV) + && (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) + || CHECK_FLAG (peer->af_cap[afi][safi], + PEER_CAP_ORF_PREFIX_SM_OLD_RCV))) + if (peer->orf_plist[afi][safi]) + { + if (prefix_list_apply (peer->orf_plist[afi][safi], p) == PREFIX_DENY) + { + return 0; + } + } + + /* Output filter check. */ + if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d is filtered", + peer->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + +#ifdef BGP_SEND_ASPATH_CHECK + /* AS path loop check. */ + if (onlypeer && aspath_loop_check (riattr->aspath, onlypeer->as)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u " + "that is part of AS path.", + onlypeer->host, onlypeer->as); + return 0; + } +#endif /* BGP_SEND_ASPATH_CHECK */ + + /* If we're a CONFED we need to loop check the CONFED ID too */ + if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) + { + if (aspath_loop_check(riattr->aspath, bgp->confed_id)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u" + " is AS path.", + peer->host, + bgp->confed_id); + return 0; + } + } + + /* Route-Reflect check. */ + if (from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP) + reflect = 1; + else + reflect = 0; + + /* IBGP reflection check. */ + if (reflect) + { + /* A route from a Client peer. */ + if (CHECK_FLAG (from->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) + { + /* Reflect to all the Non-Client peers and also to the + Client peers other than the originator. Originator check + is already done. So there is noting to do. */ + /* no bgp client-to-client reflection check. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) + if (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT)) + return 0; + } + else + { + /* A route from a Non-client peer. Reflect to all other + clients. */ + if (! CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT)) + return 0; + } + } + + /* For modify attribute, copy it to temporary structure. */ + bgp_attr_dup (attr, riattr); + + /* If local-preference is not set. */ + if ((peer->sort == BGP_PEER_IBGP + || peer->sort == BGP_PEER_CONFED) + && (! (attr->flag & ATTR_FLAG_BIT (BGP_ATTR_LOCAL_PREF)))) + { + attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_LOCAL_PREF); + attr->local_pref = bgp->default_local_pref; + } + + /* If originator-id is not set and the route is to be reflected, + set the originator id */ + if (reflect && (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)))) + { + attr->extra = bgp_attr_extra_get(attr); + IPV4_ADDR_COPY(&(attr->extra->originator_id), &(from->remote_id)); + SET_FLAG(attr->flag, BGP_ATTR_ORIGINATOR_ID); + } + + /* Remove MED if its an EBGP peer - will get overwritten by route-maps */ + if (peer->sort == BGP_PEER_EBGP + && attr->flag & ATTR_FLAG_BIT (BGP_ATTR_MULTI_EXIT_DISC)) + { + if (ri->peer != bgp->peer_self && ! transparent + && ! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + attr->flag &= ~(ATTR_FLAG_BIT (BGP_ATTR_MULTI_EXIT_DISC)); + } + + /* Since the nexthop attribute can vary per peer, it is not explicitly set + * in announce check, only certain flags and length (or number of nexthops + * -- for IPv6/MP_REACH) are set here in order to guide the update formation + * code in setting the nexthop(s) on a per peer basis in reformat_peer(). + * Typically, the source nexthop in the attribute is preserved but in the + * scenarios where we know it will always be overwritten, we reset the + * nexthop to "0" in an attempt to achieve better Update packing. An + * example of this is when a prefix from each of 2 IBGP peers needs to be + * announced to an EBGP peer (and they have the same attributes barring + * their nexthop). + */ + if (reflect) + SET_FLAG(attr->rmap_change_flags, BATTR_REFLECTED); + +#ifdef HAVE_IPV6 + /* IPv6/MP starts with 1 nexthop, the link-local address is passed only if + * we're not reflecting the route and the peer (group) to whom we're going + * to announce is on a shared network (directly connected peers) or the + * peer (group) is configured to receive link-local nexthop and it is + * available in the prefix. + * Of course, the operator can always set it through the route-map, if + * so desired. + */ + if (p->family == AF_INET6) + { + attr->extra->mp_nexthop_len = 16; + if (!reflect) + { + if (peer->shared_network || + (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) && + IN6_IS_ADDR_LINKLOCAL (&attr->extra->mp_nexthop_local))) + attr->extra->mp_nexthop_len = 32; + } + + /* Clear off link-local nexthop in source, if not needed. This may help + * more prefixes share the same attribute for announcement. + */ + if (!(CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED))) + memset (&attr->extra->mp_nexthop_local, 0, IPV6_MAX_BYTELEN); + } +#endif /* HAVE_IPV6 */ + + bgp_peer_remove_private_as(bgp, afi, safi, peer, attr); + bgp_peer_as_override(bgp, afi, safi, peer, attr); + + /* Route map & unsuppress-map apply. */ + if (ROUTE_MAP_OUT_NAME (filter) + || (ri->extra && ri->extra->suppress) ) + { + struct bgp_info info; + struct attr dummy_attr; + struct attr_extra dummy_extra; + + dummy_attr.extra = &dummy_extra; + + info.peer = peer; + info.attr = attr; + + /* + * The route reflector is not allowed to modify the attributes + * of the reflected IBGP routes unless explicitly allowed. + */ + if ((from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP) + && !bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) + { + bgp_attr_dup (&dummy_attr, attr); + info.attr = &dummy_attr; + } + + SET_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT); + + if (ri->extra && ri->extra->suppress) + ret = route_map_apply (UNSUPPRESS_MAP (filter), p, RMAP_BGP, &info); + else + ret = route_map_apply (ROUTE_MAP_OUT (filter), p, RMAP_BGP, &info); + + peer->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + { + bgp_attr_flush (attr); + return 0; + } + } + + /* After route-map has been applied, we check to see if the nexthop to + * be carried in the attribute (that is used for the announcement) can + * be cleared off or not. We do this in all cases where we would be + * setting the nexthop to "ourselves". For IPv6, we only need to consider + * the global nexthop here; the link-local nexthop would have been cleared + * already, and if not, it is required by the update formation code. + * Also see earlier comments in this function. + */ + if (!(CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_NEXTHOP_CHANGED) || + transparent || + CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED))) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_SELF)) + { + if (!reflect || + CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_FORCE_NEXTHOP_SELF)) + subgroup_announce_reset_nhop (p->family, attr); + } + else if (peer->sort == BGP_PEER_EBGP) + { + SUBGRP_FOREACH_PEER (subgrp, paf) + { + if (bgp_multiaccess_check_v4 (riattr->nexthop, paf->peer)) + break; + } + if (!paf) + subgroup_announce_reset_nhop (p->family, attr); + } + } + + return 1; +} + static int bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, struct prefix *p, struct attr *attr, afi_t afi, safi_t safi) @@ -1238,11 +1565,9 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, struct bgp_info info; struct peer *from; struct attr *riattr; - struct bgp *bgp; from = ri->peer; filter = &rsclient->filter[afi][safi]; - bgp = rsclient->bgp; riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; if (DISABLE_BGP_ANNOUNCE) @@ -1273,15 +1598,15 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, peer's id. */ if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) { - if (IPV4_ADDR_SAME (&rsclient->remote_id, + if (IPV4_ADDR_SAME (&rsclient->remote_id, &riattr->extra->originator_id)) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, NULL, 0)) zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as remote router-id", - rsclient->host, - inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), - p->prefixlen); - return 0; + rsclient->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; } } @@ -1298,11 +1623,11 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, /* Output filter check. */ if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, NULL, 0)) zlog_debug ("%s [Update:SEND] %s/%d is filtered", - rsclient->host, - inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), - p->prefixlen); + rsclient->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); return 0; } @@ -1310,9 +1635,9 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, /* AS path loop check. */ if (aspath_loop_check (riattr->aspath, rsclient->as)) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, NULL, 0)) zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", - rsclient->host, rsclient->as); + rsclient->host, rsclient->as); return 0; } #endif /* BGP_SEND_ASPATH_CHECK */ @@ -1355,7 +1680,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, struct attr_extra *attre = attr->extra; /* Left nexthop_local unchanged if so configured. */ - if ( CHECK_FLAG (rsclient->af_flags[afi][safi], + if ( CHECK_FLAG (rsclient->af_flags[afi][safi], PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) ) { if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) @@ -1363,11 +1688,218 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, else attre->mp_nexthop_len=16; } - + /* Default nexthop_local treatment for RS-Clients */ - else - { - /* Announcer and RS-Client are both in the same network */ + else + { + /* Announcer and RS-Client are both in the same network */ + if (rsclient->shared_network && from->shared_network && + (rsclient->ifindex == from->ifindex)) + { + if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) + attre->mp_nexthop_len=32; + else + attre->mp_nexthop_len=16; + } + + /* Set link-local address for shared network peer. */ + else if (rsclient->shared_network + && IN6_IS_ADDR_LINKLOCAL (&rsclient->nexthop.v6_local)) + { + memcpy (&attre->mp_nexthop_local, &rsclient->nexthop.v6_local, + IPV6_MAX_BYTELEN); + attre->mp_nexthop_len = 32; + } + + else + attre->mp_nexthop_len = 16; + } + + } +#endif /* HAVE_IPV6 */ + + + /* If this is EBGP peer and remove-private-AS is set. */ + if (rsclient->sort == BGP_PEER_EBGP + && peer_af_flag_check (rsclient, afi, safi, PEER_FLAG_REMOVE_PRIVATE_AS) + && aspath_private_as_check (attr->aspath)) + attr->aspath = aspath_empty_get (); + + /* Route map & unsuppress-map apply. */ + if (ROUTE_MAP_OUT_NAME (filter) || (ri->extra && ri->extra->suppress) ) + { + info.peer = rsclient; + info.attr = attr; + + SET_FLAG (rsclient->rmap_type, PEER_RMAP_TYPE_OUT); + + if (ri->extra && ri->extra->suppress) + ret = route_map_apply (UNSUPPRESS_MAP (filter), p, RMAP_BGP, &info); + else + ret = route_map_apply (ROUTE_MAP_OUT (filter), p, RMAP_BGP, &info); + + rsclient->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + { + bgp_attr_flush (attr); + return 0; + } + } + + return 1; +} + +static int +subgroup_announce_check_rsclient (struct bgp_info *ri, + struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr) +{ + int ret; + char buf[SU_ADDRSTRLEN]; + struct bgp_filter *filter; + struct bgp_info info; + struct peer *from; + struct peer *rsclient; + struct peer *onlypeer; + struct attr *riattr; + struct bgp *bgp; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return 0; + + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + rsclient = SUBGRP_PEER(subgrp); + onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? + (SUBGRP_PFIRST(subgrp))->peer : NULL); + from = ri->peer; + filter = &rsclient->filter[afi][safi]; + bgp = rsclient->bgp; + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; + + /* Do not send back route to sender. */ + if (onlypeer && (from == onlypeer)) + return 0; + + /* Aggregate-address suppress check. */ + if (ri->extra && ri->extra->suppress) + if (! UNSUPPRESS_MAP_NAME (filter)) + return 0; + + /* Default route check. */ + if (CHECK_FLAG (rsclient->af_sflags[afi][safi], + PEER_STATUS_DEFAULT_ORIGINATE)) + { + if (p->family == AF_INET && p->u.prefix4.s_addr == INADDR_ANY) + return 0; +#ifdef HAVE_IPV6 + else if (p->family == AF_INET6 && p->prefixlen == 0) + return 0; +#endif /* HAVE_IPV6 */ + } + + /* If the attribute has originator-id and it is same as remote + peer's id. */ + if (onlypeer && riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) + { + if (IPV4_ADDR_SAME (&onlypeer->remote_id, + &riattr->extra->originator_id)) + { + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as remote router-id", + onlypeer->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + } + + /* ORF prefix-list filter check */ + if (CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_ADV) + && (CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) + || CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV))) + if (rsclient->orf_plist[afi][safi]) + { + if (prefix_list_apply (rsclient->orf_plist[afi][safi], p) == PREFIX_DENY) + return 0; + } + + /* Output filter check. */ + if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY) + { + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d is filtered", + rsclient->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + +#ifdef BGP_SEND_ASPATH_CHECK + /* AS path loop check. */ + if (onlypeer && aspath_loop_check (riattr->aspath, onlypeer->as)) + { + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", + onlypeer->host, onlypeer->as); + return 0; + } +#endif /* BGP_SEND_ASPATH_CHECK */ + + /* For modify attribute, copy it to temporary structure. */ + bgp_attr_dup (attr, riattr); + + /* next-hop-set */ + if ((p->family == AF_INET && attr->nexthop.s_addr == 0) +#ifdef HAVE_IPV6 + || (p->family == AF_INET6 && + IN6_IS_ADDR_UNSPECIFIED(&attr->extra->mp_nexthop_global)) +#endif /* HAVE_IPV6 */ + ) + { + /* Set IPv4 nexthop. */ + if (p->family == AF_INET) + { + if (safi == SAFI_MPLS_VPN) + memcpy (&attr->extra->mp_nexthop_global_in, &rsclient->nexthop.v4, + IPV4_MAX_BYTELEN); + else + memcpy (&attr->nexthop, &rsclient->nexthop.v4, IPV4_MAX_BYTELEN); + } +#ifdef HAVE_IPV6 + /* Set IPv6 nexthop. */ + if (p->family == AF_INET6) + { + /* IPv6 global nexthop must be included. */ + memcpy (&attr->extra->mp_nexthop_global, &rsclient->nexthop.v6_global, + IPV6_MAX_BYTELEN); + attr->extra->mp_nexthop_len = 16; + } +#endif /* HAVE_IPV6 */ + } + +#ifdef HAVE_IPV6 + if (p->family == AF_INET6) + { + struct attr_extra *attre = attr->extra; + + /* Left nexthop_local unchanged if so configured. */ + if ( CHECK_FLAG (rsclient->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) ) + { + if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) + attre->mp_nexthop_len=32; + else + attre->mp_nexthop_len=16; + } + + /* Default nexthop_local treatment for RS-Clients */ + else + { + /* Announcer and RS-Client are both in the same network */ if (rsclient->shared_network && from->shared_network && (rsclient->ifindex == from->ifindex)) { @@ -1561,27 +2093,32 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, return; } -static int -bgp_process_announce_selected (struct peer *peer, struct bgp_info *selected, - struct bgp_node *rn, afi_t afi, safi_t safi) +/* + * A new route/change in bestpath of an existing route. Evaluate the path + * for advertisement to the subgroup. + */ +int +subgroup_process_announce_selected (struct update_subgroup *subgrp, + struct bgp_info *selected, + struct bgp_node *rn) { struct prefix *p; + struct peer_af *paf; + struct peer *onlypeer; struct attr attr; struct attr_extra extra; + afi_t afi; + safi_t safi; p = &rn->p; - - /* Announce route to Established peer. */ - if (peer->status != Established) - return 0; - - /* Address family configuration check. */ - if (! peer->afc_nego[afi][safi]) - return 0; + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? + (SUBGRP_PFIRST(subgrp))->peer : NULL); /* First update is deferred until ORF or ROUTE-REFRESH is received */ - if (CHECK_FLAG (peer->af_sflags[afi][safi], - PEER_STATUS_ORF_WAIT_REFRESH)) + if (onlypeer && CHECK_FLAG (onlypeer->af_sflags[afi][safi], + PEER_STATUS_ORF_WAIT_REFRESH)) return 0; /* It's initialized in bgp_announce_[check|check_rsclient]() */ @@ -1590,28 +2127,29 @@ bgp_process_announce_selected (struct peer *peer, struct bgp_info *selected, switch (bgp_node_table (rn)->type) { case BGP_TABLE_MAIN: - /* Announcement to peer->conf. If the route is filtered, + /* Announcement to the subgroup. If the route is filtered, withdraw it. */ - if (selected && bgp_announce_check (selected, peer, p, &attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, &attr, afi, safi, selected); + if (selected && subgroup_announce_check(selected, subgrp, p, &attr)) + bgp_adj_out_set_subgroup(rn, subgrp, &attr, selected); else - bgp_adj_out_unset (rn, peer, p, afi, safi); + bgp_adj_out_unset_subgroup(rn, subgrp); + break; case BGP_TABLE_RSCLIENT: - /* Announcement to peer->conf. If the route is filtered, + /* Announcement to peer->conf. If the route is filtered, withdraw it. */ - if (selected && - bgp_announce_check_rsclient (selected, peer, p, &attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, &attr, afi, safi, selected); + if (selected && + subgroup_announce_check_rsclient (selected, subgrp, p, &attr)) + bgp_adj_out_set_subgroup (rn, subgrp, &attr, selected); else - bgp_adj_out_unset (rn, peer, p, afi, safi); + bgp_adj_out_unset_subgroup(rn, subgrp); break; } return 0; } -struct bgp_process_queue +struct bgp_process_queue { struct bgp *bgp; struct bgp_node *rn; @@ -1632,6 +2170,8 @@ bgp_process_rsclient (struct work_queue *wq, void *data) struct bgp_info_pair old_and_new; struct listnode *node, *nnode; struct peer *rsclient; + struct peer_af *paf; + struct update_subgroup *subgrp; /* Is it end of initial update? (after startup) */ if (!rn) @@ -1672,8 +2212,12 @@ bgp_process_rsclient (struct work_queue *wq, void *data) UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - bgp_process_announce_selected (rsclient, new_select, rn, - afi, safi); + paf = peer_af_find(rsclient, afi, safi); + assert(paf); + subgrp = PAF_SUBGRP(paf); + if (!subgrp) /* not an established session */ + continue; + subgroup_process_announce_selected (subgrp, new_select, rn); } } else @@ -1686,12 +2230,14 @@ bgp_process_rsclient (struct work_queue *wq, void *data) bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - bgp_process_announce_selected (rsclient, new_select, rn, afi, safi); + paf = peer_af_find(rsclient, afi, safi); + if (paf && (subgrp = PAF_SUBGRP(paf))) /* if an established session */ + subgroup_process_announce_selected (subgrp, new_select, rn); } if (old_select && CHECK_FLAG (old_select->flags, BGP_INFO_REMOVED)) bgp_info_reap (rn, old_select); - + UNSET_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED); return WQ_SUCCESS; } @@ -1752,6 +2298,10 @@ bgp_process_main (struct work_queue *wq, void *data) /* If the user did "clear ip bgp prefix x.x.x.x" this flag will be set */ UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR); + /* bestpath has changed; bump version */ + if (old_select || new_select) + bgp_bump_version(rn); + if (old_select) bgp_info_unset_flag (rn, old_select, BGP_INFO_SELECTED); if (new_select) @@ -1761,12 +2311,7 @@ bgp_process_main (struct work_queue *wq, void *data) UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - - /* Check each BGP peer. */ - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - { - bgp_process_announce_selected (peer, new_select, rn, afi, safi); - } + group_announce_route(bgp, afi, safi, rn, new_select); /* FIB update. */ if ((safi == SAFI_UNICAST || safi == SAFI_MULTICAST) && (! bgp->name && @@ -1810,27 +2355,6 @@ bgp_processq_del (struct work_queue *wq, void *data) XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } -static void -bgp_process_queue_complete (struct work_queue *wq) -{ - struct bgp *bgp; - struct peer *peer; - struct listnode *node, *nnode; - - /* Schedule write thread either directly or through the MRAI timer - * if needed. - */ - bgp = bgp_get_default (); - if (!bgp) - return; - - if (BGP_ROUTE_ADV_HOLD(bgp)) - return; - - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - bgp_peer_schedule_updates(peer); -} - void bgp_process_queue_init (void) { @@ -1847,7 +2371,6 @@ bgp_process_queue_init (void) bm->process_main_queue->spec.workfunc = &bgp_process_main; bm->process_main_queue->spec.del_item_data = &bgp_processq_del; - bm->process_main_queue->spec.completion_func = &bgp_process_queue_complete; bm->process_main_queue->spec.max_retries = 0; bm->process_main_queue->spec.hold = 50; /* Use a higher yield value of 50ms for main queue processing */ @@ -2165,7 +2688,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, && attrhash_cmp (ri->attr, attr_new)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d for RS-client %s...duplicate ignored", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2183,7 +2706,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, bgp_info_restore (rn, ri); /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d for RS-client %s", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2210,7 +2733,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, } /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { zlog_debug ("%s rcvd %s/%d for RS-client %s", peer->host, @@ -2240,7 +2763,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, filtered: /* This BGP update is filtered. Log the reason then update BGP entry. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd UPDATE about %s/%d -- DENIED for RS-client %s due to: %s", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2276,7 +2799,7 @@ bgp_withdraw_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) bgp_rib_withdraw (rn, ri, peer, afi, safi); - else if (bgp_debug_update(peer, p, 1)) + else if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s Can't find the route %s/%d", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); @@ -2408,7 +2931,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, && peer->sort == BGP_PEER_EBGP && CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2422,7 +2945,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } else /* Duplicate - odd */ { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2453,7 +2976,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* Withdraw/Announce before we fully processed the withdraw */ if (CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d, flapped quicker than processing", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2462,7 +2985,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2546,7 +3069,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } // End of implicit withdraw /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2618,7 +3141,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* This BGP update is filtered. Log the reason then update BGP entry. */ filtered: - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2689,7 +3212,7 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } /* Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd UPDATE about %s/%d -- withdrawn", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2713,7 +3236,7 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) bgp_rib_withdraw (rn, ri, peer, afi, safi); - else if (bgp_debug_update(peer, p, 1)) + else if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s Can't find the route %s/%d", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); @@ -2727,175 +3250,103 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, void bgp_default_originate (struct peer *peer, afi_t afi, safi_t safi, int withdraw) { - struct bgp *bgp; - struct attr attr; - struct aspath *aspath; - struct prefix p; - struct peer *from; - struct bgp_node *rn; - struct bgp_info *ri; - int ret = RMAP_DENYMATCH; - - if (!(afi == AFI_IP || afi == AFI_IP6)) - return; - - bgp = peer->bgp; - from = bgp->peer_self; - - bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); - aspath = attr.aspath; - attr.local_pref = bgp->default_local_pref; - memcpy (&attr.nexthop, &peer->nexthop.v4, IPV4_MAX_BYTELEN); - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else if (afi == AFI_IP6) - { - struct attr_extra *ae = attr.extra; - - str2prefix ("::/0", &p); - - /* IPv6 global nexthop must be included. */ - memcpy (&ae->mp_nexthop_global, &peer->nexthop.v6_global, - IPV6_MAX_BYTELEN); - ae->mp_nexthop_len = 16; - - /* If the peer is on shared nextwork and we have link-local - nexthop set it. */ - if (peer->shared_network - && !IN6_IS_ADDR_UNSPECIFIED (&peer->nexthop.v6_local)) - { - memcpy (&ae->mp_nexthop_local, &peer->nexthop.v6_local, - IPV6_MAX_BYTELEN); - ae->mp_nexthop_len = 32; - } - } -#endif /* HAVE_IPV6 */ - - if (peer->default_rmap[afi][safi].name) - { - SET_FLAG (bgp->peer_self->rmap_type, PEER_RMAP_TYPE_DEFAULT); - for (rn = bgp_table_top(bgp->rib[afi][safi]); rn; rn = bgp_route_next(rn)) - { - for (ri = rn->info; ri; ri = ri->next) - { - struct attr dummy_attr; - struct attr_extra dummy_extra; - struct bgp_info info; - - /* Provide dummy so the route-map can't modify the attributes */ - dummy_attr.extra = &dummy_extra; - bgp_attr_dup(&dummy_attr, ri->attr); - info.peer = ri->peer; - info.attr = &dummy_attr; - - ret = route_map_apply(peer->default_rmap[afi][safi].map, &rn->p, - RMAP_BGP, &info); - - /* The route map might have set attributes. If we don't flush them - * here, they will be leaked. */ - bgp_attr_flush(&dummy_attr); - if (ret != RMAP_DENYMATCH) - break; - } - if (ret != RMAP_DENYMATCH) - break; - } - bgp->peer_self->rmap_type = 0; - - if (ret == RMAP_DENYMATCH) - withdraw = 1; - } - - if (withdraw) - { - if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) - bgp_default_withdraw_send (peer, afi, safi); - UNSET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE); - } - else - { - if (! CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) - { - SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE); - bgp_default_update_send (peer, &attr, afi, safi, from); - } - } - - bgp_attr_extra_free (&attr); - aspath_unintern (&aspath); + struct update_subgroup *subgrp; + subgrp = peer_subgroup(peer, afi, safi); + subgroup_default_originate(subgrp, withdraw); } -static void -bgp_announce_table (struct peer *peer, afi_t afi, safi_t safi, - struct bgp_table *table, int rsclient) + +/* + * bgp_stop_announce_route_timer + */ +void +bgp_stop_announce_route_timer (struct peer_af *paf) { - struct bgp_node *rn; - struct bgp_info *ri; - struct attr attr; - struct attr_extra extra; - - if (! table) - table = (rsclient) ? peer->rib[afi][safi] : peer->bgp->rib[afi][safi]; - - if (safi != SAFI_MPLS_VPN - && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) - bgp_default_originate (peer, afi, safi, 0); - - /* It's initialized in bgp_announce_[check|check_rsclient]() */ - attr.extra = &extra; - - for (rn = bgp_table_top (table); rn; rn = bgp_route_next(rn)) - for (ri = rn->info; ri; ri = ri->next) - if (CHECK_FLAG (ri->flags, BGP_INFO_SELECTED) && ri->peer != peer) - { - if ( (rsclient) ? - (bgp_announce_check_rsclient (ri, peer, &rn->p, &attr, afi, safi)) - : (bgp_announce_check (ri, peer, &rn->p, &attr, afi, safi))) - bgp_adj_out_set (rn, peer, &rn->p, &attr, afi, safi, ri); - else - bgp_adj_out_unset (rn, peer, &rn->p, afi, safi); - } + if (!paf->t_announce_route) + return; + + THREAD_TIMER_OFF (paf->t_announce_route); } +/* + * bgp_announce_route_timer_expired + * + * Callback that is invoked when the route announcement timer for a + * peer_af expires. + */ +static int +bgp_announce_route_timer_expired (struct thread *t) +{ + struct peer_af *paf; + struct peer *peer; + + + paf = THREAD_ARG (t); + peer = paf->peer; + + assert (paf->t_announce_route); + paf->t_announce_route = NULL; + + if (peer->status != Established) + return 0; + + if (!peer->afc_nego[paf->afi][paf->safi]) + return 0; + + peer_af_announce_route (paf, 1); + return 0; +} + +/* + * bgp_announce_route + * + * *Triggers* announcement of routes of a given AFI/SAFI to a peer. + */ void bgp_announce_route (struct peer *peer, afi_t afi, safi_t safi) { - struct bgp_node *rn; - struct bgp_table *table; + struct peer_af *paf; + struct update_subgroup *subgrp; - if (peer->status != Established) + paf = peer_af_find (peer, afi, safi); + if (!paf) return; - - if (! peer->afc_nego[afi][safi]) - return; - - /* First update is deferred until ORF or ROUTE-REFRESH is received */ - if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_WAIT_REFRESH)) - return; - - if (safi != SAFI_MPLS_VPN) - bgp_announce_table (peer, afi, safi, NULL, 0); - else - for (rn = bgp_table_top (peer->bgp->rib[afi][safi]); rn; - rn = bgp_route_next(rn)) - if ((table = (rn->info)) != NULL) - bgp_announce_table (peer, afi, safi, table, 0); - - if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) - bgp_announce_table (peer, afi, safi, NULL, 1); + subgrp = PAF_SUBGRP(paf); /* - * The write thread needs to be scheduled since it may not be done as - * part of building adj_out. + * Ignore if subgroup doesn't exist (implies AF is not negotiated) + * or a refresh has already been triggered. */ - bgp_peer_schedule_updates(peer); + if (!subgrp || paf->t_announce_route) + return; + + /* + * Start a timer to stagger/delay the announce. This serves + * two purposes - announcement can potentially be combined for + * multiple peers and the announcement doesn't happen in the + * vty context. + */ + THREAD_TIMER_MSEC_ON (master, paf->t_announce_route, + bgp_announce_route_timer_expired, paf, + (subgrp->peer_count == 1) ? + BGP_ANNOUNCE_ROUTE_SHORT_DELAY_MS : + BGP_ANNOUNCE_ROUTE_DELAY_MS); } +/* + * Announce routes from all AF tables to a peer. + * + * This should ONLY be called when there is a need to refresh the + * routes to the peer based on a policy change for this peer alone + * or a route refresh request received from the peer. + * The operation will result in splitting the peer from its existing + * subgroups and putting it in new subgroups. + */ void bgp_announce_route_all (struct peer *peer) { + struct peer_af *paf; + int af; afi_t afi; safi_t safi; @@ -3146,6 +3597,10 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, bgp_unlock_node (rn); break; } + + /* + * Can't do this anymore. adj-outs are not maintained per peer. + * for (aout = rn->adj_out; aout; aout = aout->next) if (aout->peer == peer || purpose == BGP_CLEAR_ROUTE_MY_RSCLIENT) { @@ -3153,7 +3608,7 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, bgp_unlock_node (rn); break; } - + */ for (ri = rn->info; ri; ri = ri->next) if (ri->peer == peer || purpose == BGP_CLEAR_ROUTE_MY_RSCLIENT) { @@ -3641,7 +4096,7 @@ bgp_static_update_rsclient (struct peer *rsclient, struct prefix *p, == RMAP_DENY) { /* This BGP update is filtered. Log the reason then update BGP entry. */ - if (bgp_debug_update(rsclient, p, 1)) + if (bgp_debug_update(rsclient, p, NULL, 1)) zlog_debug ("Static UPDATE about %s/%d -- DENIED for RS-client %s due to: import-policy", inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen, rsclient->host); @@ -6613,11 +7068,6 @@ route_vty_out_detail (struct vty *vty, struct bgp *bgp, struct prefix *p, vty_out (vty, "%s", VTY_NEWLINE); } -#define BGP_SHOW_SCODE_HEADER "Status codes: s suppressed, d damped, "\ - "h history, * valid, > best, = multipath,%s"\ - " i internal, r RIB-failure, S Stale, R Removed%s" -#define BGP_SHOW_OCODE_HEADER "Origin codes: i - IGP, e - EGP, ? - incomplete%s%s" -#define BGP_SHOW_HEADER " Network Next Hop Metric LocPrf Weight Path%s" #define BGP_SHOW_HEADER_CSV "Flags, Network, Next Hop, Metric, LocPrf, Weight, Path%s" #define BGP_SHOW_DAMP_HEADER " Network From Reuse Path%s" #define BGP_SHOW_FLAP_HEADER " Network From Flaps Duration Reuse Path%s" @@ -6826,7 +7276,7 @@ bgp_show_table (struct vty *vty, struct bgp_table *table, struct in_addr *router } else if (header) { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (*router_id), VTY_NEWLINE); + vty_out (vty, "BGP table version is %llu, local router ID is %s%s", table->version, inet_ntoa (*router_id), VTY_NEWLINE); vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); if (type == bgp_show_type_dampend_paths @@ -10614,7 +11064,7 @@ show_adj_route (struct vty *vty, struct peer *peer, afi_t afi, safi_t safi, if (! in && CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (bgp->router_id), VTY_NEWLINE); + vty_out (vty, "BGP table version is %llu, local router ID is %s%s", table->version, inet_ntoa (bgp->router_id), VTY_NEWLINE); vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); @@ -10661,47 +11111,48 @@ show_adj_route (struct vty *vty, struct peer *peer, afi_t afi, safi_t safi, } else { - for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - { - if (header1) - { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (bgp->router_id), VTY_NEWLINE); - vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); - vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); - header1 = 0; - } - if (header2) - { - if (delim) - vty_out (vty, BGP_SHOW_HEADER_CSV, VTY_NEWLINE); - else - vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); - header2 = 0; - } - if (adj->attr) - { - if (!CHECK_FLAG(peer->af_flags[afi][safi], - PEER_FLAG_REFLECTOR_CLIENT) - || bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) - { + adj = bgp_adj_peer_lookup(peer, rn); + if (adj) + { + if (header1) + { + vty_out (vty, "BGP table version is %llu, local router ID " + "is %s%s", table->version, + inet_ntoa (bgp->router_id), VTY_NEWLINE); + vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + header1 = 0; + } + if (header2) + { + if (delim) + vty_out (vty, BGP_SHOW_HEADER_CSV, VTY_NEWLINE); + else + vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); + header2 = 0; + } + if (adj->attr) + { + if (!CHECK_FLAG(peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT) + || bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) + { + bgp_attr_dup(&attr, adj->attr); + ret = bgp_output_modifier(peer, &rn->p, &attr, afi, + safi, rmap_name); + } + else + ret = RMAP_PERMIT; - bgp_attr_dup(&attr, adj->attr); - ret = bgp_output_modifier(peer, &rn->p, &attr, afi, - safi, rmap_name); - } - else - ret = RMAP_PERMIT; - - if (ret != RMAP_DENY) - { - route_vty_out_tmp (vty, &rn->p, &attr, safi, delim); - output_count++; - } - else - filtered_count++; - } - } + if (ret != RMAP_DENY) + { + route_vty_out_tmp (vty, &rn->p, &attr, safi, delim); + output_count++; + } + else + filtered_count++; + } + } } if (output_count != 0) diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index c35b5f1115..d6af446687 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -26,6 +26,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA struct bgp_nexthop_cache; +#define BGP_SHOW_SCODE_HEADER "Status codes: s suppressed, d damped, "\ + "h history, * valid, > best, = multipath,%s"\ + " i internal, r RIB-failure, S Stale, R Removed%s" +#define BGP_SHOW_OCODE_HEADER "Origin codes: i - IGP, e - EGP, ? - incomplete%s%s" +#define BGP_SHOW_HEADER " Network Next Hop Metric LocPrf Weight Path%s" + /* Ancillary information to struct bgp_info, * used for uncommonly used data (aggregation, MPLS, etc.) * and lazily allocated to save memory. @@ -199,12 +205,19 @@ enum bgp_path_type BGP_PATH_MULTIPATH }; +static inline void +bgp_bump_version (struct bgp_node *node) +{ + node->version = bgp_table_next_version(bgp_node_table(node)); +} + /* Prototypes. */ extern void bgp_process_queue_init (void); extern void bgp_route_init (void); extern void bgp_route_finish (void); extern void bgp_cleanup_routes (void); extern void bgp_announce_route (struct peer *, afi_t, safi_t); +extern void bgp_stop_announce_route_timer(struct peer_af *paf); extern void bgp_announce_route_all (struct peer *); extern void bgp_default_originate (struct peer *, afi_t, safi_t, int); extern void bgp_soft_reconfig_in (struct peer *, afi_t, safi_t); @@ -280,4 +293,13 @@ extern void route_vty_out (struct vty *, struct prefix *, struct bgp_info *, int extern void route_vty_out_tag (struct vty *, struct prefix *, struct bgp_info *, int, safi_t); extern void route_vty_out_tmp (struct vty *, struct prefix *, struct attr *, safi_t, char *); +extern int +subgroup_process_announce_selected (struct update_subgroup *subgrp, + struct bgp_info *selected, + struct bgp_node *rn); + +extern int subgroup_announce_check(struct bgp_info *ri, + struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr); + #endif /* _QUAGGA_BGP_ROUTE_H */ diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index 2e68f31abc..c2dba97a70 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -40,6 +40,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "buffer.h" #include "sockunion.h" #include "hash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -1103,19 +1104,26 @@ route_set_ip_nexthop (void *rule, struct prefix *prefix, bgp_info->attr->nexthop.s_addr = sockunion2ip (peer->su_remote); bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); } - else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT) - && peer->su_local - && sockunion_family (peer->su_local) == AF_INET) + else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT)) { - bgp_info->attr->nexthop.s_addr = sockunion2ip (peer->su_local); - bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); + /* The next hop value will be set as part of packet rewrite. + * Set the flags here to indicate that rewrite needs to be done. + * Also, clear the value. + */ + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); + bgp_info->attr->nexthop.s_addr = 0; } } else { - /* Set next hop value. */ + /* Set next hop value. */ bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); bgp_info->attr->nexthop = *rins->address; + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } } @@ -2173,10 +2181,13 @@ route_set_ipv6_nexthop_global (void *rule, struct prefix *prefix, /* Set next hop value. */ (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global = *address; - + /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len == 0) bgp_info->attr->extra->mp_nexthop_len = 16; + + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } return RMAP_OKAY; @@ -2241,6 +2252,9 @@ route_set_ipv6_nexthop_local (void *rule, struct prefix *prefix, /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len != 32) bgp_info->attr->extra->mp_nexthop_len = 32; + + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } return RMAP_OKAY; @@ -2313,20 +2327,20 @@ route_set_ipv6_nexthop_peer (void *rule, struct prefix *prefix, INET6_ADDRSTRLEN), &peer_address); } - else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT) - && peer->su_local - && sockunion_family (peer->su_local) == AF_INET6) + else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT)) { - inet_pton (AF_INET, sockunion2str (peer->su_local, - peer_addr_buf, - INET6_ADDRSTRLEN), - &peer_address); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); + /* clear next hop value. */ + memset (&((bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global), + 0, sizeof (struct in6_addr)); } if (IN6_IS_ADDR_LINKLOCAL(&peer_address)) { - /* Set next hop value. */ - (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_local = peer_address; + /* The next hop value will be set as part of packet rewrite. */ /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len != 32) @@ -2334,8 +2348,7 @@ route_set_ipv6_nexthop_peer (void *rule, struct prefix *prefix, } else { - /* Set next hop value. */ - (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global = peer_address; + /* The next hop value will be set as part of packet rewrite. */ /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len == 0) @@ -2652,7 +2665,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (inbound, soft-reconfig)", rmap_name, peer->host); @@ -2663,7 +2676,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (inbound, route-refresh)", rmap_name, peer->host); @@ -2699,7 +2712,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (import, soft-reconfig)", rmap_name, peer->host); @@ -2709,7 +2722,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (import, route-refresh)", rmap_name, peer->host); @@ -2719,27 +2732,24 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, } } + /* + * For outbound, unsuppress and default-originate map change (content or + * map created), merely update the "config" here, the actual route + * announcement happens at the group level. + */ if (filter->map[RMAP_OUT].name && - (strcmp(rmap_name, filter->map[RMAP_OUT].name) == 0)) - { - filter->map[RMAP_OUT].map = - route_map_lookup_by_name (filter->map[RMAP_OUT].name); - - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("Processing route_map %s update on peer %s (outbound)", - rmap_name, peer->host); - - if (route_update) - bgp_announce_route_all(peer); - } + (strcmp(rmap_name, filter->map[RMAP_OUT].name) == 0)) + filter->map[RMAP_OUT].map = + route_map_lookup_by_name (filter->map[RMAP_OUT].name); if (filter->usmap.name && (strcmp(rmap_name, filter->usmap.name) == 0)) - { - filter->usmap.map = route_map_lookup_by_name (filter->usmap.name); - if (route_update) - bgp_announce_route_all(peer); - } + filter->usmap.map = route_map_lookup_by_name (filter->usmap.name); + + if (peer->default_rmap[afi][safi].name && + (strcmp (rmap_name, peer->default_rmap[afi][safi].name) == 0)) + peer->default_rmap[afi][safi].map = + route_map_lookup_by_name (peer->default_rmap[afi][safi].name); } static void @@ -2807,26 +2817,16 @@ bgp_route_map_process_update (void *arg, char *rmap_name, int route_update) if (! peer->afc[afi][safi]) continue; - /* process in/out/import/export route-maps */ + /* process in/out/import/export/default-orig route-maps */ bgp_route_map_process_peer(rmap_name, peer, afi, safi, route_update); - - /* process default-originate route-map */ - if (peer->default_rmap[afi][safi].name && - (strcmp (rmap_name, peer->default_rmap[afi][safi].name) == 0)) - { - peer->default_rmap[afi][safi].map = - route_map_lookup_by_name (peer->default_rmap[afi][safi].name); - - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("Processing route_map %s update on " - "default-originate", rmap_name); - - if (route_update) - bgp_default_originate (peer, afi, safi, 0); - } } } + /* for outbound/default-orig route-maps, process for groups */ + update_group_policy_update(bgp, BGP_POLICY_ROUTE_MAP, rmap_name, + route_update, 0); + + /* update peer-group config (template) */ bgp_route_map_update_peer_group(rmap_name, bgp); /* For table route-map updates. */ @@ -2893,7 +2893,7 @@ bgp_route_map_process_update (void *arg, char *rmap_name, int route_update) if (route_update) { - if (bgp_debug_update(peer, NULL, 0)) + if (BGP_DEBUG (zebra, ZEBRA)) zlog_debug("Processing route_map %s update on " "redistributed routes", rmap_name); @@ -2932,13 +2932,18 @@ bgp_route_map_mark_update (char *rmap_name) for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) { + if (bgp->t_rmap_update == NULL) { /* rmap_update_timer of 0 means don't do route updates */ if (bgp->rmap_update_timer) - bgp->t_rmap_update = - thread_add_timer(master, bgp_route_map_update_timer, bgp, - bgp->rmap_update_timer); + { + bgp->t_rmap_update = + thread_add_timer(master, bgp_route_map_update_timer, bgp, + bgp->rmap_update_timer); + /* Signal the groups that a route-map update event has started */ + update_group_policy_update(bgp, BGP_POLICY_ROUTE_MAP, rmap_name, 1, 1); + } else bgp_route_map_process_update((void *)bgp, rmap_name, 0); } diff --git a/bgpd/bgp_table.c b/bgpd/bgp_table.c index 7a6c675dc2..45d5a9eb71 100644 --- a/bgpd/bgp_table.c +++ b/bgpd/bgp_table.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "sockunion.h" #include "vty.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index cfda368570..ce9b9fac0e 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -43,6 +43,7 @@ struct bgp_table struct peer *owner; struct route_table *route_table; + u_int64_t version; }; struct bgp_node @@ -63,6 +64,7 @@ struct bgp_node struct bgp_node *prn; + u_int64_t version; u_char flags; #define BGP_NODE_PROCESS_SCHEDULED (1 << 0) #define BGP_NODE_USER_CLEAR (1 << 1) @@ -353,4 +355,18 @@ bgp_table_iter_started (bgp_table_iter_t * iter) return route_table_iter_started (&iter->rt_iter); } +/* This would benefit from a real atomic operation... + * until then. */ +static inline u_int64_t +bgp_table_next_version (struct bgp_table *table) +{ + return ++table->version; +} + +static inline u_int64_t +bgp_table_version (struct bgp_table *table) +{ + return table->version; +} + #endif /* _QUAGGA_BGP_TABLE_H */ diff --git a/bgpd/bgp_updgrp.c b/bgpd/bgp_updgrp.c new file mode 100644 index 0000000000..6d18e8346b --- /dev/null +++ b/bgpd/bgp_updgrp.c @@ -0,0 +1,1842 @@ +/** + * bgp_updgrp.c: BGP update group structures + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" +#include "hash.h" +#include "jhash.h" +#include "queue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_filter.h" + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +/** + * assign a unique ID to update group and subgroup. Mostly for display/ + * debugging purposes. It's a 64-bit space - used leisurely without a + * worry about its wrapping and about filling gaps. While at it, timestamp + * the creation. + */ +static void +update_group_checkin (struct update_group *updgrp) +{ + updgrp->id = ++bm->updgrp_idspace; + updgrp->uptime = bgp_clock (); +} + +static void +update_subgroup_checkin (struct update_subgroup *subgrp, + struct update_group *updgrp) +{ + subgrp->id = ++bm->subgrp_idspace; + subgrp->uptime = bgp_clock (); +} + +static void +sync_init (struct update_subgroup *subgrp) +{ + subgrp->sync = XCALLOC (MTYPE_BGP_SYNCHRONISE, + sizeof (struct bgp_synchronize)); + BGP_ADV_FIFO_INIT (&subgrp->sync->update); + BGP_ADV_FIFO_INIT (&subgrp->sync->withdraw); + BGP_ADV_FIFO_INIT (&subgrp->sync->withdraw_low); + subgrp->hash = hash_create (baa_hash_key, baa_hash_cmp); + + /* We use a larger buffer for subgrp->work in the event that: + * - We RX a BGP_UPDATE where the attributes alone are just + * under BGP_MAX_PACKET_SIZE + * - The user configures an outbound route-map that does many as-path + * prepends or adds many communities. At most they can have CMD_ARGC_MAX + * args in a route-map so there is a finite limit on how large they can + * make the attributes. + * + * Having a buffer with BGP_MAX_PACKET_SIZE_OVERFLOW allows us to avoid bounds + * checking for every single attribute as we construct an UPDATE. + */ + subgrp->work = stream_new (BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW); + subgrp->scratch = stream_new (BGP_MAX_PACKET_SIZE); +} + +static void +sync_delete (struct update_subgroup *subgrp) +{ + if (subgrp->sync) + XFREE (MTYPE_BGP_SYNCHRONISE, subgrp->sync); + subgrp->sync = NULL; + if (subgrp->hash) + hash_free (subgrp->hash); + subgrp->hash = NULL; + if (subgrp->work) + stream_free (subgrp->work); + subgrp->work = NULL; + if (subgrp->scratch) + stream_free (subgrp->scratch); + subgrp->scratch = NULL; +} + +/** + * conf_copy + * + * copy only those fields that are relevant to update group match + */ +static void +conf_copy (struct peer *dst, struct peer *src, afi_t afi, safi_t safi) +{ + struct bgp_filter *srcfilter; + struct bgp_filter *dstfilter; + + srcfilter = &src->filter[afi][safi]; + dstfilter = &dst->filter[afi][safi]; + + dst->bgp = src->bgp; + dst->sort = src->sort; + dst->as = src->as; + dst->weight = src->weight; + dst->v_routeadv = src->v_routeadv; + dst->flags = src->flags; + dst->af_flags[afi][safi] = src->af_flags[afi][safi]; + dst->host = strdup (src->host); + dst->cap = src->cap; + dst->af_cap[afi][safi] = src->af_cap[afi][safi]; + dst->afc_nego[afi][safi] = src->afc_nego[afi][safi]; + dst->local_as = src->local_as; + dst->change_local_as = src->change_local_as; + dst->shared_network = src->shared_network; + memcpy (&(dst->nexthop), &(src->nexthop), sizeof (struct bgp_nexthop)); + + dst->group = src->group; + + if (src->default_rmap[afi][safi].name) + { + dst->default_rmap[afi][safi].name = + strdup (src->default_rmap[afi][safi].name); + dst->default_rmap[afi][safi].map = src->default_rmap[afi][safi].map; + } + + if (DISTRIBUTE_OUT_NAME(srcfilter)) + { + DISTRIBUTE_OUT_NAME(dstfilter) = strdup(DISTRIBUTE_OUT_NAME(srcfilter)); + DISTRIBUTE_OUT(dstfilter) = DISTRIBUTE_OUT(srcfilter); + } + + if (PREFIX_LIST_OUT_NAME(srcfilter)) + { + PREFIX_LIST_OUT_NAME(dstfilter) = strdup(PREFIX_LIST_OUT_NAME(srcfilter)); + PREFIX_LIST_OUT(dstfilter) = PREFIX_LIST_OUT(srcfilter); + } + + if (FILTER_LIST_OUT_NAME(srcfilter)) + { + FILTER_LIST_OUT_NAME(dstfilter) = strdup(FILTER_LIST_OUT_NAME(srcfilter)); + FILTER_LIST_OUT(dstfilter) = FILTER_LIST_OUT(srcfilter); + } + + if (ROUTE_MAP_OUT_NAME(srcfilter)) + { + ROUTE_MAP_OUT_NAME(dstfilter) = strdup(ROUTE_MAP_OUT_NAME(srcfilter)); + ROUTE_MAP_OUT(dstfilter) = ROUTE_MAP_OUT(srcfilter); + } + + if (UNSUPPRESS_MAP_NAME(srcfilter)) + { + UNSUPPRESS_MAP_NAME(dstfilter) = strdup(UNSUPPRESS_MAP_NAME(srcfilter)); + UNSUPPRESS_MAP(dstfilter) = UNSUPPRESS_MAP(srcfilter); + } +} + +/** + * since we did a bunch of strdup's in conf_copy, time to free them up + */ +static void +conf_release (struct peer *src, afi_t afi, safi_t safi) +{ + struct bgp_filter *srcfilter; + + srcfilter = &src->filter[afi][safi]; + + if (src->default_rmap[afi][safi].name) + free (src->default_rmap[afi][safi].name); + + if (srcfilter->dlist[FILTER_OUT].name) + free (srcfilter->dlist[FILTER_OUT].name); + + if (srcfilter->plist[FILTER_OUT].name) + free (srcfilter->plist[FILTER_OUT].name); + + if (srcfilter->aslist[FILTER_OUT].name) + free (srcfilter->aslist[FILTER_OUT].name); + + if (srcfilter->map[RMAP_OUT].name) + free (srcfilter->map[RMAP_OUT].name); + + if (srcfilter->usmap.name) + free (srcfilter->usmap.name); +} + +static void +peer2_updgrp_copy (struct update_group *updgrp, struct peer_af *paf) +{ + struct peer *src; + struct peer *dst; + + if (!updgrp || !paf) + return; + + src = paf->peer; + dst = updgrp->conf; + if (!src || !dst) + return; + + updgrp->afi = paf->afi; + updgrp->safi = paf->safi; + updgrp->afid = paf->afid; + updgrp->bgp = src->bgp; + + conf_copy (dst, src, paf->afi, paf->safi); +} + +/** + * auxiliary functions to maintain the hash table. + * - updgrp_hash_alloc - to create a new entry, passed to hash_get + * - updgrp_hash_key_make - makes the key for update group search + * - updgrp_hash_cmp - compare two update groups. + */ +static void * +updgrp_hash_alloc (void *p) +{ + struct update_group *updgrp; + struct update_group *in; + + in = p; + updgrp = XCALLOC (MTYPE_BGP_UPDGRP, sizeof (struct update_group)); + memcpy (updgrp, in, sizeof (struct update_group)); + updgrp->conf = XCALLOC (MTYPE_BGP_PEER, sizeof (struct peer)); + conf_copy (updgrp->conf, in->conf, in->afi, in->safi); + return updgrp; +} + +/** + * The hash value for a peer is computed from the following variables: + * v = f( + * 1. IBGP (1) or EBGP (2) + * 2. FLAGS based on configuration: + * LOCAL_AS_NO_PREPEND + * LOCAL_AS_REPLACE_AS + * 3. AF_FLAGS based on configuration: + * Refer to definition in bgp_updgrp.h + * 4. (AF-independent) Capability flags: + * AS4_RCV capability + * 5. (AF-dependent) Capability flags: + * ORF_PREFIX_SM_RCV (peer can send prefix ORF) + * 6. MRAI + * 7. peer-group name + * 8. Outbound route-map name (neighbor route-map <> out) + * 9. Outbound distribute-list name (neighbor distribute-list <> out) + * 10. Outbound prefix-list name (neighbor prefix-list <> out) + * 11. Outbound as-list name (neighbor filter-list <> out) + * 12. Unsuppress map name (neighbor unsuppress-map <>) + * 13. default rmap name (neighbor default-originate route-map <>) + * 14. encoding both global and link-local nexthop? + * 15. If peer is configured to be a lonesoul, peer ip address + * 16. Local-as should match, if configured. + * ) + */ +static unsigned int +updgrp_hash_key_make (void *p) +{ + const struct update_group *updgrp; + const struct peer *peer; + const struct bgp_filter *filter; + uint32_t flags; + uint32_t key; + afi_t afi; + safi_t safi; + +#define SEED1 999331 +#define SEED2 2147483647 + + updgrp = p; + peer = updgrp->conf; + afi = updgrp->afi; + safi = updgrp->safi; + flags = peer->af_flags[afi][safi]; + filter = &peer->filter[afi][safi]; + + key = 0; + + key = jhash_1word (peer->sort, key); /* EBGP or IBGP */ + key = jhash_1word ((peer->flags & PEER_UPDGRP_FLAGS), key); + key = jhash_1word ((flags & PEER_UPDGRP_AF_FLAGS), key); + key = jhash_1word ((peer->cap & PEER_UPDGRP_CAP_FLAGS), key); + key = jhash_1word ((peer->af_cap[afi][safi] & + PEER_UPDGRP_AF_CAP_FLAGS), key); + key = jhash_1word (peer->v_routeadv, key); + key = jhash_1word (peer->change_local_as, key); + + if (peer->group) + key = jhash_1word (jhash (peer->group->name, + strlen (peer->group->name), SEED1), key); + + if (filter->map[RMAP_OUT].name) + key = jhash_1word (jhash (filter->map[RMAP_OUT].name, + strlen (filter->map[RMAP_OUT].name), SEED1), + key); + + if (filter->dlist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->dlist[FILTER_OUT].name, + strlen (filter->dlist[FILTER_OUT].name), SEED1), + key); + + if (filter->plist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->plist[FILTER_OUT].name, + strlen (filter->plist[FILTER_OUT].name), SEED1), + key); + + if (filter->aslist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->aslist[FILTER_OUT].name, + strlen (filter->aslist[FILTER_OUT].name), + SEED1), key); + + if (filter->usmap.name) + key = jhash_1word (jhash (filter->usmap.name, + strlen (filter->usmap.name), SEED1), key); + + if (peer->default_rmap[afi][safi].name) + key = jhash_1word (jhash (peer->default_rmap[afi][safi].name, + strlen (peer->default_rmap[afi][safi].name), + SEED1), key); + + /* If peer is on a shared network and is exchanging IPv6 prefixes, + * it needs to include link-local address. That's different from + * non-shared-network peers (nexthop encoded with 32 bytes vs 16 + * bytes). We create different update groups to take care of that. + */ + key = jhash_1word ((peer->shared_network && + peer_afi_active_nego (peer, AFI_IP6)), + key); + + /* + * Every peer configured to be a lonesoul gets its own update group. + * + * Every route server client gets its own update group as well. Optimize + * later. + */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL) || + CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + key = jhash_1word (jhash (peer->host, strlen (peer->host), SEED2), key); + + return key; +} + +static int +updgrp_hash_cmp (const void *p1, const void *p2) +{ + const struct update_group *grp1; + const struct update_group *grp2; + const struct peer *pe1; + const struct peer *pe2; + uint32_t flags1; + uint32_t flags2; + const struct bgp_filter *fl1; + const struct bgp_filter *fl2; + afi_t afi; + safi_t safi; + + if (!p1 || !p2) + return 0; + + grp1 = p1; + grp2 = p2; + pe1 = grp1->conf; + pe2 = grp2->conf; + afi = grp1->afi; + safi = grp1->safi; + flags1 = pe1->af_flags[afi][safi]; + flags2 = pe2->af_flags[afi][safi]; + fl1 = &pe1->filter[afi][safi]; + fl2 = &pe2->filter[afi][safi]; + + /* put EBGP and IBGP peers in different update groups */ + if (pe1->sort != pe2->sort) + return 0; + + /* check peer flags */ + if ((pe1->flags & PEER_UPDGRP_FLAGS) != + (pe2->flags & PEER_UPDGRP_FLAGS)) + return 0; + + /* If there is 'local-as' configured, it should match. */ + if (pe1->change_local_as != pe2->change_local_as) + return 0; + + /* flags like route reflector client */ + if ((flags1 & PEER_UPDGRP_AF_FLAGS) != (flags2 & PEER_UPDGRP_AF_FLAGS)) + return 0; + + if ((pe1->cap & PEER_UPDGRP_CAP_FLAGS) != + (pe2->cap & PEER_UPDGRP_CAP_FLAGS)) + return 0; + + if ((pe1->af_cap[afi][safi] & PEER_UPDGRP_AF_CAP_FLAGS) != + (pe2->af_cap[afi][safi] & PEER_UPDGRP_AF_CAP_FLAGS)) + return 0; + + if (pe1->v_routeadv != pe2->v_routeadv) + return 0; + + if (pe1->group != pe2->group) + return 0; + + /* route-map names should be the same */ + if ((fl1->map[RMAP_OUT].name && !fl2->map[RMAP_OUT].name) || + (!fl1->map[RMAP_OUT].name && fl2->map[RMAP_OUT].name) || + (fl1->map[RMAP_OUT].name && fl2->map[RMAP_OUT].name && + strcmp (fl1->map[RMAP_OUT].name, fl2->map[RMAP_OUT].name))) + return 0; + + if ((fl1->dlist[FILTER_OUT].name && !fl2->dlist[FILTER_OUT].name) || + (!fl1->dlist[FILTER_OUT].name && fl2->dlist[FILTER_OUT].name) || + (fl1->dlist[FILTER_OUT].name && fl2->dlist[FILTER_OUT].name && + strcmp (fl1->dlist[FILTER_OUT].name, fl2->dlist[FILTER_OUT].name))) + return 0; + + if ((fl1->plist[FILTER_OUT].name && !fl2->plist[FILTER_OUT].name) || + (!fl1->plist[FILTER_OUT].name && fl2->plist[FILTER_OUT].name) || + (fl1->plist[FILTER_OUT].name && fl2->plist[FILTER_OUT].name && + strcmp (fl1->plist[FILTER_OUT].name, fl2->plist[FILTER_OUT].name))) + return 0; + + if ((fl1->aslist[FILTER_OUT].name && !fl2->aslist[FILTER_OUT].name) || + (!fl1->aslist[FILTER_OUT].name && fl2->aslist[FILTER_OUT].name) || + (fl1->aslist[FILTER_OUT].name && fl2->aslist[FILTER_OUT].name && + strcmp (fl1->aslist[FILTER_OUT].name, fl2->aslist[FILTER_OUT].name))) + return 0; + + if ((fl1->usmap.name && !fl2->usmap.name) || + (!fl1->usmap.name && fl2->usmap.name) || + (fl1->usmap.name && fl2->usmap.name && + strcmp (fl1->usmap.name, fl2->usmap.name))) + return 0; + + if ((pe1->default_rmap[afi][safi].name && + !pe2->default_rmap[afi][safi].name) || + (!pe1->default_rmap[afi][safi].name && + pe2->default_rmap[afi][safi].name) || + (pe1->default_rmap[afi][safi].name && + pe2->default_rmap[afi][safi].name && + strcmp (pe1->default_rmap[afi][safi].name, + pe2->default_rmap[afi][safi].name))) + return 0; + + if ((afi == AFI_IP6) && (pe1->shared_network != pe2->shared_network)) + return 0; + + if ((CHECK_FLAG (pe1->flags, PEER_FLAG_LONESOUL) || + CHECK_FLAG (pe1->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) && + !sockunion_same (&pe1->su, &pe2->su)) + return 0; + + return 1; +} + +static void +peer_lonesoul_or_not (struct peer *peer, int set) +{ + /* no change in status? */ + if (set == (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL) > 0)) + return; + + if (set) + SET_FLAG (peer->flags, PEER_FLAG_LONESOUL); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LONESOUL); + + update_group_adjust_peer_afs (peer); +} + +/* + * subgroup_total_packets_enqueued + * + * Returns the total number of packets enqueued to a subgroup. + */ +static unsigned int +subgroup_total_packets_enqueued (struct update_subgroup *subgrp) +{ + struct bpacket *pkt; + + pkt = bpacket_queue_last (SUBGRP_PKTQ (subgrp)); + + return pkt->ver - 1; +} + +static int +update_group_show_walkcb (struct update_group *updgrp, void *arg) +{ + struct vty *vty = arg; + struct update_subgroup *subgrp; + struct peer_af *paf; + struct bgp_filter *filter; + + vty_out (vty, "Update-group %llu:%s", updgrp->id, VTY_NEWLINE); + vty_out (vty, " Created: %s", timestamp_string (updgrp->uptime)); + filter = &updgrp->conf->filter[updgrp->afi][updgrp->safi]; + if (filter->map[RMAP_OUT].name) + vty_out (vty, " Outgoing route map: %s%s%s", + filter->map[RMAP_OUT].map ? "X" : "", + filter->map[RMAP_OUT].name, VTY_NEWLINE); + vty_out (vty, " MRAI value (seconds): %d%s", + updgrp->conf->v_routeadv, VTY_NEWLINE); + if (updgrp->conf->change_local_as) + vty_out (vty, " Local AS %u%s%s%s", + updgrp->conf->change_local_as, + CHECK_FLAG (updgrp->conf->flags, + PEER_FLAG_LOCAL_AS_NO_PREPEND) ? " no-prepend" : "", + CHECK_FLAG (updgrp->conf->flags, + PEER_FLAG_LOCAL_AS_REPLACE_AS) ? " replace-as" : "", + VTY_NEWLINE); + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + vty_out (vty, "%s", VTY_NEWLINE); + vty_out (vty, " Update-subgroup %llu:%s", subgrp->id, VTY_NEWLINE); + vty_out (vty, " Created: %s", timestamp_string (subgrp->uptime)); + + if (subgrp->split_from.update_group_id || subgrp->split_from.subgroup_id) + { + vty_out (vty, " Split from group id: %llu%s", + subgrp->split_from.update_group_id, VTY_NEWLINE); + vty_out (vty, " Split from subgroup id: %llu%s", + subgrp->split_from.subgroup_id, VTY_NEWLINE); + } + + vty_out (vty, " Join events: %u%s", subgrp->join_events, VTY_NEWLINE); + vty_out (vty, " Prune events: %u%s", + subgrp->prune_events, VTY_NEWLINE); + vty_out (vty, " Merge events: %u%s", + subgrp->merge_events, VTY_NEWLINE); + vty_out (vty, " Split events: %u%s", + subgrp->split_events, VTY_NEWLINE); + vty_out (vty, " Update group switch events: %u%s", + subgrp->updgrp_switch_events, VTY_NEWLINE); + vty_out (vty, " Peer refreshes combined: %u%s", + subgrp->peer_refreshes_combined, VTY_NEWLINE); + vty_out (vty, " Merge checks triggered: %u%s", + subgrp->merge_checks_triggered, VTY_NEWLINE); + vty_out (vty, " Version: %llu%s", subgrp->version, VTY_NEWLINE); + vty_out (vty, " Packet queue length: %d%s", + bpacket_queue_length (SUBGRP_PKTQ (subgrp)), VTY_NEWLINE); + vty_out (vty, " Total packets enqueued: %u%s", + subgroup_total_packets_enqueued (subgrp), VTY_NEWLINE); + vty_out (vty, " Packet queue high watermark: %d%s", + bpacket_queue_hwm_length (SUBGRP_PKTQ (subgrp)), VTY_NEWLINE); + vty_out (vty, " Adj-out list count: %u%s", + subgrp->adj_count, VTY_NEWLINE); + vty_out (vty, " Advertise list: %s%s", + advertise_list_is_empty (subgrp) ? "empty" : "not empty", + VTY_NEWLINE); + vty_out (vty, " Flags: %s%s", + CHECK_FLAG (subgrp->flags, + SUBGRP_FLAG_NEEDS_REFRESH) ? "R" : "", VTY_NEWLINE); + if (subgrp->peer_count > 0) + { + vty_out (vty, " Peers:%s", VTY_NEWLINE); + SUBGRP_FOREACH_PEER (subgrp, paf) + vty_out (vty, " - %s%s", paf->peer->host, VTY_NEWLINE); + } + } + return UPDWALK_CONTINUE; +} + +/* + * Helper function to show the packet queue for each subgroup of update group. + * Will be constrained to a particular subgroup id if id !=0 + */ +static int +updgrp_show_packet_queue_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + struct vty *vty; + + vty = ctx->vty; + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (ctx->subgrp_id && (ctx->subgrp_id != subgrp->id)) + continue; + vty_out (vty, "update group %llu, subgroup %llu%s", updgrp->id, + subgrp->id, VTY_NEWLINE); + bpacket_queue_show_vty (SUBGRP_PKTQ (subgrp), vty); + } + return UPDWALK_CONTINUE; +} + +/* + * Show the packet queue for each subgroup of update group. Will be + * constrained to a particular subgroup id if id !=0 + */ +void +update_group_show_packet_queue (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + struct updwalk_context ctx; + + memset (&ctx, 0, sizeof (ctx)); + ctx.vty = vty; + ctx.subgrp_id = id; + ctx.flags = 0; + update_group_af_walk (bgp, afi, safi, updgrp_show_packet_queue_walkcb, + &ctx); +} + +static struct update_group * +update_group_find (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_group tmp; + struct peer tmp_conf; + + if (!peer_established (PAF_PEER (paf))) + return NULL; + + memset (&tmp, 0, sizeof (tmp)); + memset (&tmp_conf, 0, sizeof (tmp_conf)); + tmp.conf = &tmp_conf; + peer2_updgrp_copy (&tmp, paf); + + updgrp = hash_lookup (paf->peer->bgp->update_groups[paf->afid], &tmp); + conf_release (&tmp_conf, paf->afi, paf->safi); + return updgrp; +} + +static struct update_group * +update_group_create (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_group tmp; + struct peer tmp_conf; + + memset (&tmp, 0, sizeof (tmp)); + memset (&tmp_conf, 0, sizeof (tmp_conf)); + tmp.conf = &tmp_conf; + peer2_updgrp_copy (&tmp, paf); + + updgrp = hash_get (paf->peer->bgp->update_groups[paf->afid], &tmp, + updgrp_hash_alloc); + if (!updgrp) + return NULL; + update_group_checkin (updgrp); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("create update group %llu", updgrp->id); + + UPDGRP_GLOBAL_STAT (updgrp, updgrps_created) += 1; + + return updgrp; +} + +static void +update_group_delete (struct update_group *updgrp) +{ + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("delete update group %llu", updgrp->id); + + UPDGRP_GLOBAL_STAT (updgrp, updgrps_deleted) += 1; + + hash_release (updgrp->bgp->update_groups[updgrp->afid], updgrp); + conf_release (updgrp->conf, updgrp->afi, updgrp->safi); + XFREE (MTYPE_BGP_PEER, updgrp->conf); + XFREE (MTYPE_BGP_UPDGRP, updgrp); +} + +static void +update_group_add_subgroup (struct update_group *updgrp, + struct update_subgroup *subgrp) +{ + if (!updgrp || !subgrp) + return; + + LIST_INSERT_HEAD (&(updgrp->subgrps), subgrp, updgrp_train); + subgrp->update_group = updgrp; +} + +static void +update_group_remove_subgroup (struct update_group *updgrp, + struct update_subgroup *subgrp) +{ + if (!updgrp || !subgrp) + return; + + LIST_REMOVE (subgrp, updgrp_train); + subgrp->update_group = NULL; + if (LIST_EMPTY (&(updgrp->subgrps))) + update_group_delete (updgrp); +} + +static struct update_subgroup * +update_subgroup_create (struct update_group *updgrp) +{ + struct update_subgroup *subgrp; + + subgrp = XCALLOC (MTYPE_BGP_UPD_SUBGRP, sizeof (struct update_subgroup)); + update_subgroup_checkin (subgrp, updgrp); + subgrp->v_coalesce = (UPDGRP_INST (updgrp))->coalesce_time; + sync_init (subgrp); + bpacket_queue_init (SUBGRP_PKTQ (subgrp)); + bpacket_queue_add (SUBGRP_PKTQ (subgrp), NULL, NULL); + TAILQ_INIT (&(subgrp->adjq)); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("create subgroup u%llu:s%llu", + updgrp->id, subgrp->id); + + update_group_add_subgroup (updgrp, subgrp); + + UPDGRP_INCR_STAT (updgrp, subgrps_created); + + return subgrp; +} + +static void +update_subgroup_delete (struct update_subgroup *subgrp) +{ + if (!subgrp) + return; + + if (subgrp->update_group) + UPDGRP_INCR_STAT (subgrp->update_group, subgrps_deleted); + + if (subgrp->t_merge_check) + THREAD_OFF (subgrp->t_merge_check); + + if (subgrp->t_coalesce) + THREAD_TIMER_OFF (subgrp->t_coalesce); + + bpacket_queue_cleanup (SUBGRP_PKTQ (subgrp)); + subgroup_clear_table (subgrp); + + if (subgrp->t_coalesce) + THREAD_TIMER_OFF (subgrp->t_coalesce); + sync_delete (subgrp); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("delete subgroup u%llu:s%llu", + subgrp->update_group->id, subgrp->id); + + update_group_remove_subgroup (subgrp->update_group, subgrp); + + XFREE (MTYPE_BGP_UPD_SUBGRP, subgrp); +} + +void +update_subgroup_inherit_info (struct update_subgroup *to, + struct update_subgroup *from) +{ + if (!to || !from) + return; + + to->sflags = from->sflags; +} + +/* + * update_subgroup_check_delete + * + * Delete a subgroup if it is ready to be deleted. + * + * Returns TRUE if the subgroup was deleted. + */ +static int +update_subgroup_check_delete (struct update_subgroup *subgrp) +{ + if (!subgrp) + return 0; + + if (!LIST_EMPTY (&(subgrp->peers))) + return 0; + + update_subgroup_delete (subgrp); + + return 1; +} + +/* + * update_subgroup_add_peer + * + * @param send_enqueued_packets If true all currently enqueued packets will + * also be sent to the peer. + */ +static void +update_subgroup_add_peer (struct update_subgroup *subgrp, struct peer_af *paf, + int send_enqueued_pkts) +{ + struct bpacket *pkt; + + if (!subgrp || !paf) + return; + + LIST_INSERT_HEAD (&(subgrp->peers), paf, subgrp_train); + paf->subgroup = subgrp; + subgrp->peer_count++; + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_EN(subgrp->update_group); + } + + SUBGRP_INCR_STAT (subgrp, join_events); + + if (send_enqueued_pkts) + { + pkt = bpacket_queue_first (SUBGRP_PKTQ (subgrp)); + } + else + { + + /* + * Hang the peer off of the last, placeholder, packet in the + * queue. This means it won't see any of the packets that are + * currently the queue. + */ + pkt = bpacket_queue_last (SUBGRP_PKTQ (subgrp)); + assert (pkt->buffer == NULL); + } + + bpacket_add_peer (pkt, paf); + + bpacket_queue_sanity_check (SUBGRP_PKTQ (subgrp)); +} + +/* + * update_subgroup_remove_peer_internal + * + * Internal function that removes a peer from a subgroup, but does not + * delete the subgroup. A call to this function must almost always be + * followed by a call to update_subgroup_check_delete(). + * + * @see update_subgroup_remove_peer + */ +static void +update_subgroup_remove_peer_internal (struct update_subgroup *subgrp, + struct peer_af *paf) +{ + assert (subgrp && paf); + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_DIS(subgrp->update_group); + } + + bpacket_queue_remove_peer (paf); + LIST_REMOVE (paf, subgrp_train); + paf->subgroup = NULL; + subgrp->peer_count--; + + SUBGRP_INCR_STAT (subgrp, prune_events); +} + +/* + * update_subgroup_remove_peer + */ +void +update_subgroup_remove_peer (struct update_subgroup *subgrp, + struct peer_af *paf) +{ + if (!subgrp || !paf) + return; + + update_subgroup_remove_peer_internal (subgrp, paf); + + if (update_subgroup_check_delete (subgrp)) + return; + + /* + * The deletion of the peer may have caused some packets to be + * deleted from the subgroup packet queue. Check if the subgroup can + * be merged now. + */ + update_subgroup_check_merge (subgrp, "removed peer from subgroup"); +} + +static struct update_subgroup * +update_subgroup_find (struct update_group *updgrp, struct peer_af *paf) +{ + struct update_subgroup *subgrp = NULL; + uint64_t version; + + if (paf->subgroup) + { + assert (0); + return NULL; + } + else + version = 0; + + if (!peer_established (PAF_PEER (paf))) + return NULL; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (subgrp->version != version) + continue; + + /* + * The version number is not meaningful on a subgroup that needs + * a refresh. + */ + if (update_subgroup_needs_refresh (subgrp)) + continue; + + break; + } + + return subgrp; +} + +/* + * update_subgroup_ready_for_merge + * + * Returns TRUE if this subgroup is in a state that allows it to be + * merged into another subgroup. + */ +static inline int +update_subgroup_ready_for_merge (struct update_subgroup *subgrp) +{ + + /* + * Not ready if there are any encoded packets waiting to be written + * out to peers. + */ + if (!bpacket_queue_is_empty (SUBGRP_PKTQ (subgrp))) + return 0; + + /* + * Not ready if there enqueued updates waiting to be encoded. + */ + if (!advertise_list_is_empty (subgrp)) + return 0; + + /* + * Don't attempt to merge a subgroup that needs a refresh. For one, + * we can't determine if the adj_out of such a group matches that of + * another group. + */ + if (update_subgroup_needs_refresh (subgrp)) + return 0; + + return 1; +} + +/* + * update_subgrp_can_merge_into + * + * Returns TRUE if the first subgroup can merge into the second + * subgroup. + */ +static inline int +update_subgroup_can_merge_into (struct update_subgroup *subgrp, + struct update_subgroup *target) +{ + + if (subgrp == target) + return 0; + + /* + * Both must have processed the BRIB to the same point in order to + * be merged. + */ + if (subgrp->version != target->version) + return 0; + + /* + * If there are any adv entries on the target, then its adj-out (the + * set of advertised routes) does not match that of the other + * subgrp, and we cannot merge the two. + * + * The adj-out is used when generating a route refresh to a peer in + * a subgroup. If it is not accurate, say it is missing an entry, we + * may miss sending a withdraw for an entry as part of a refresh. + */ + if (!advertise_list_is_empty (target)) + return 0; + + if (update_subgroup_needs_refresh (target)) + return 0; + + return 1; +} + +/* + * update_subgroup_merge + * + * Merge the first subgroup into the second one. + */ +static void +update_subgroup_merge (struct update_subgroup *subgrp, + struct update_subgroup *target, const char *reason) +{ + struct peer_af *paf; + int result; + int peer_count; + + assert (subgrp->adj_count == target->adj_count); + + peer_count = subgrp->peer_count; + + while (1) + { + paf = LIST_FIRST (&subgrp->peers); + if (!paf) + break; + + update_subgroup_remove_peer_internal (subgrp, paf); + + /* + * Add the peer to the target subgroup, while making sure that + * any currently enqueued packets won't be sent to it. Enqueued + * packets could, for example, result in an unnecessary withdraw + * followed by an advertise. + */ + update_subgroup_add_peer (target, paf, 0); + } + + SUBGRP_INCR_STAT (target, merge_events); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu (%d peers) merged into u%llu:s%llu, " + "trigger: %s", subgrp->update_group->id, subgrp->id, peer_count, + target->update_group->id, target->id, reason ? reason : "unknown"); + + result = update_subgroup_check_delete (subgrp); + assert (result); +} + +/* + * update_subgroup_check_merge + * + * Merge this subgroup into another subgroup if possible. + * + * Returns TRUE if the subgroup has been merged. The subgroup pointer + * should not be accessed in this case. + */ +int +update_subgroup_check_merge (struct update_subgroup *subgrp, + const char *reason) +{ + struct update_subgroup *target; + + if (!update_subgroup_ready_for_merge (subgrp)) + return 0; + + /* + * Look for a subgroup to merge into. + */ + UPDGRP_FOREACH_SUBGRP (subgrp->update_group, target) + { + if (update_subgroup_can_merge_into (subgrp, target)) + break; + } + + if (!target) + return 0; + + update_subgroup_merge (subgrp, target, reason); + return 1; +} + + /* + * update_subgroup_merge_check_thread_cb + */ +static int +update_subgroup_merge_check_thread_cb (struct thread *thread) +{ + struct update_subgroup *subgrp; + + subgrp = THREAD_ARG (thread); + + subgrp->t_merge_check = NULL; + + update_subgroup_check_merge (subgrp, "triggered merge check"); + return 0; +} + +/* + * update_subgroup_trigger_merge_check + * + * Triggers a call to update_subgroup_check_merge() on a clean context. + * + * @param force If true, the merge check will be triggered even if the + * subgroup doesn't currently look ready for a merge. + * + * Returns TRUE if a merge check will be performed shortly. + */ +int +update_subgroup_trigger_merge_check (struct update_subgroup *subgrp, + int force) +{ + if (subgrp->t_merge_check) + return 1; + + if (!force && !update_subgroup_ready_for_merge (subgrp)) + return 0; + + subgrp->t_merge_check = + thread_add_background (master, + update_subgroup_merge_check_thread_cb, + subgrp, 0); + + SUBGRP_INCR_STAT (subgrp, merge_checks_triggered); + + return 1; +} + +/* + * update_subgroup_copy_adj_out + * + * Helper function that clones the adj out (state about advertised + * routes) from one subgroup to another. It assumes that the adj out + * of the target subgroup is empty. + */ +static void +update_subgroup_copy_adj_out (struct update_subgroup *source, + struct update_subgroup *dest) +{ + struct bgp_adj_out *aout, *aout_copy; + + SUBGRP_FOREACH_ADJ (source, aout) + { + /* + * Copy the adj out. + */ + aout_copy = bgp_adj_out_alloc (dest, aout->rn); + aout_copy->attr = aout->attr ? bgp_attr_refcount (aout->attr) : NULL; + } +} + +/* + * update_subgroup_copy_packets + * + * Copy packets after and including the given packet to the subgroup + * 'dest'. + * + * Returns the number of packets copied. + */ +static int +update_subgroup_copy_packets (struct update_subgroup *dest, + struct bpacket *pkt) +{ + int count; + + count = 0; + while (pkt && pkt->buffer) + { + bpacket_queue_add (SUBGRP_PKTQ (dest), stream_dup (pkt->buffer), + &pkt->arr); + count++; + pkt = bpacket_next (pkt); + } + + bpacket_queue_sanity_check (SUBGRP_PKTQ (dest)); + + return count; +} + +static int +updgrp_prefix_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER (updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (PREFIX_LIST_OUT_NAME(filter) && + (strcmp (name, PREFIX_LIST_OUT_NAME(filter)) == 0)) + { + PREFIX_LIST_OUT(filter) = + prefix_list_lookup (UPDGRP_AFI(updgrp), PREFIX_LIST_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_filter_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER (updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (FILTER_LIST_OUT_NAME(filter) && + (strcmp (name, FILTER_LIST_OUT_NAME(filter)) == 0)) + { + FILTER_LIST_OUT(filter) = as_list_lookup (FILTER_LIST_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_distribute_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER(updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (DISTRIBUTE_OUT_NAME(filter) && + (strcmp (name, DISTRIBUTE_OUT_NAME(filter)) == 0)) + { + DISTRIBUTE_OUT(filter) = access_list_lookup(UPDGRP_AFI(updgrp), + DISTRIBUTE_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_route_map_update (struct update_group *updgrp, char *name, + int *def_rmap_changed) +{ + struct peer *peer; + struct bgp_filter *filter; + int changed = 0; + afi_t afi; + safi_t safi; + + peer = UPDGRP_PEER (updgrp); + afi = UPDGRP_AFI (updgrp); + safi = UPDGRP_SAFI (updgrp); + filter = &peer->filter[afi][safi]; + + if (ROUTE_MAP_OUT_NAME(filter) && + (strcmp (name, ROUTE_MAP_OUT_NAME(filter)) == 0)) + { + ROUTE_MAP_OUT(filter) = route_map_lookup_by_name (name); + + changed = 1; + } + + if (UNSUPPRESS_MAP_NAME(filter) && + (strcmp (name, UNSUPPRESS_MAP_NAME(filter)) == 0)) + { + UNSUPPRESS_MAP(filter) = route_map_lookup_by_name (name); + changed = 1; + } + + /* process default-originate route-map */ + if (peer->default_rmap[afi][safi].name && + (strcmp (name, peer->default_rmap[afi][safi].name) == 0)) + { + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (name); + if (def_rmap_changed) + *def_rmap_changed = 1; + } + return changed; +} + +/* + * hash iteration callback function to process a policy change for an + * update group. Check if the changed policy matches the updgrp's + * outbound route-map or unsuppress-map or default-originate map or + * filter-list or prefix-list or distribute-list. + * Trigger update generation accordingly. + */ +static int +updgrp_policy_update_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + int changed = 0; + int def_changed = 0; + + if (!updgrp || !ctx || !ctx->policy_name) + return UPDWALK_CONTINUE; + + switch (ctx->policy_type) { + case BGP_POLICY_ROUTE_MAP: + changed = updgrp_route_map_update(updgrp, ctx->policy_name, &def_changed); + break; + case BGP_POLICY_FILTER_LIST: + changed = updgrp_filter_list_update(updgrp, ctx->policy_name); + break; + case BGP_POLICY_PREFIX_LIST: + changed = updgrp_prefix_list_update(updgrp, ctx->policy_name); + break; + case BGP_POLICY_DISTRIBUTE_LIST: + changed = updgrp_distribute_list_update(updgrp, ctx->policy_name); + break; + default: + break; + } + + /* If not doing route update, return after updating "config" */ + if (!ctx->policy_route_update) + return UPDWALK_CONTINUE; + + /* If nothing has changed, return after updating "config" */ + if (!changed && !def_changed) + return UPDWALK_CONTINUE; + + /* + * If something has changed, at the beginning of a route-map modification + * event, mark each subgroup's needs-refresh bit. For one, it signals to + * whoever that the subgroup needs a refresh. Second, it prevents premature + * merge of this subgroup with another before a complete (outbound) refresh. + */ + if (ctx->policy_event_start_flag) + { + UPDGRP_FOREACH_SUBGRP(updgrp, subgrp) + { + update_subgroup_set_needs_refresh(subgrp, 1); + } + return UPDWALK_CONTINUE; + } + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (changed) + { + if (bgp_debug_update(NULL, NULL, updgrp, 0)) + zlog_debug ("u%llu:s%llu announcing routes upon policy %s (type %d) change", + updgrp->id, subgrp->id, ctx->policy_name, ctx->policy_type); + subgroup_announce_route (subgrp); + } + if (def_changed) + { + if (bgp_debug_update(NULL, NULL, updgrp, 0)) + zlog_debug ("u%llu:s%llu announcing default upon default routemap %s change", + updgrp->id, subgrp->id, ctx->policy_name); + subgroup_default_originate (subgrp, 0); + } + update_subgroup_set_needs_refresh(subgrp, 0); + } + return UPDWALK_CONTINUE; +} + +static int +update_group_walkcb (struct hash_backet *backet, void *arg) +{ + struct update_group *updgrp = backet->data; + struct updwalk_context *wctx = arg; + int ret = (*wctx->cb) (updgrp, wctx->context); + return ret; +} + +static int +update_group_periodic_merge_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + struct update_subgroup *tmp_subgrp; + const char *reason = arg; + + UPDGRP_FOREACH_SUBGRP_SAFE (updgrp, subgrp, tmp_subgrp) + update_subgroup_check_merge (subgrp, reason); + return UPDWALK_CONTINUE; +} + +/******************** + * PUBLIC FUNCTIONS + ********************/ + +/* + * trigger function when a policy (route-map/filter-list/prefix-list/ + * distribute-list etc.) content changes. Go through all the + * update groups and process the change. + * + * bgp: the bgp instance + * ptype: the type of policy that got modified, see bgpd.h + * pname: name of the policy + * route_update: flag to control if an automatic update generation should + * occur + * start_event: flag that indicates if it's the beginning of the change. + * Esp. when the user is changing the content interactively + * over multiple statements. Useful to set dirty flag on + * update groups. + */ +void +update_group_policy_update (struct bgp *bgp, bgp_policy_type_e ptype, + char *pname, int route_update, int start_event) +{ + struct updwalk_context ctx; + + memset (&ctx, 0, sizeof (ctx)); + ctx.policy_type = ptype; + ctx.policy_name = pname; + ctx.policy_route_update = route_update; + ctx.policy_event_start_flag = start_event; + ctx.flags = 0; + + update_group_walk (bgp, updgrp_policy_update_walkcb, &ctx); +} + +/* + * update_subgroup_split_peer + * + * Ensure that the given peer is in a subgroup of its own in the + * specified update group. + */ +void +update_subgroup_split_peer (struct peer_af *paf, struct update_group *updgrp) +{ + struct update_subgroup *old_subgrp, *subgrp; + uint64_t old_id; + + + old_subgrp = paf->subgroup; + + if (!updgrp) + updgrp = old_subgrp->update_group; + + /* + * If the peer is alone in its subgroup, reuse the existing + * subgroup. + */ + if (old_subgrp->peer_count == 1) + { + if (updgrp == old_subgrp->update_group) + return; + + subgrp = old_subgrp; + old_id = old_subgrp->update_group->id; + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_DIS(old_subgrp->update_group); + } + + update_group_remove_subgroup (old_subgrp->update_group, old_subgrp); + update_group_add_subgroup (updgrp, subgrp); + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_EN(updgrp); + } + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu peer %s moved to u%llu:s%llu", + old_id, subgrp->id, paf->peer->host, updgrp->id, subgrp->id); + + /* + * The state of the subgroup (adj_out, advs, packet queue etc) + * is consistent internally, but may not be identical to other + * subgroups in the new update group even if the version number + * matches up. Make sure a full refresh is done before the + * subgroup is merged with another. + */ + update_subgroup_set_needs_refresh (subgrp, 1); + + SUBGRP_INCR_STAT (subgrp, updgrp_switch_events); + return; + } + + /* + * Create a new subgroup under the specified update group, and copy + * over relevant state to it. + */ + subgrp = update_subgroup_create (updgrp); + update_subgroup_inherit_info (subgrp, old_subgrp); + + subgrp->split_from.update_group_id = old_subgrp->update_group->id; + subgrp->split_from.subgroup_id = old_subgrp->id; + + /* + * Copy out relevant state from the old subgroup. + */ + update_subgroup_copy_adj_out (paf->subgroup, subgrp); + update_subgroup_copy_packets (subgrp, paf->next_pkt_to_send); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu peer %s split and moved into u%llu:s%llu", + paf->subgroup->update_group->id, paf->subgroup->id, + paf->peer->host, updgrp->id, subgrp->id); + + SUBGRP_INCR_STAT (paf->subgroup, split_events); + + /* + * Since queued advs were left behind, this new subgroup needs a + * refresh. + */ + update_subgroup_set_needs_refresh (subgrp, 1); + + /* + * Remove peer from old subgroup, and add it to the new one. + */ + update_subgroup_remove_peer (paf->subgroup, paf); + + update_subgroup_add_peer (subgrp, paf, 1); +} + +void +update_group_init (struct bgp *bgp) +{ + int afid; + + AF_FOREACH (afid) + bgp->update_groups[afid] = hash_create (updgrp_hash_key_make, + updgrp_hash_cmp); +} + +void +update_group_show (struct bgp *bgp, afi_t afi, safi_t safi, struct vty *vty) +{ + update_group_af_walk (bgp, afi, safi, update_group_show_walkcb, vty); +} + +/* + * update_group_show_stats + * + * Show global statistics about update groups. + */ +void +update_group_show_stats (struct bgp *bgp, struct vty *vty) +{ + vty_out (vty, "Update groups created: %u%s", + bgp->update_group_stats.updgrps_created, VTY_NEWLINE); + vty_out (vty, "Update groups deleted: %u%s", + bgp->update_group_stats.updgrps_deleted, VTY_NEWLINE); + vty_out (vty, "Update subgroups created: %u%s", + bgp->update_group_stats.subgrps_created, VTY_NEWLINE); + vty_out (vty, "Update subgroups deleted: %u%s", + bgp->update_group_stats.subgrps_deleted, VTY_NEWLINE); + vty_out (vty, "Join events: %u%s", + bgp->update_group_stats.join_events, VTY_NEWLINE); + vty_out (vty, "Prune events: %u%s", + bgp->update_group_stats.prune_events, VTY_NEWLINE); + vty_out (vty, "Merge events: %u%s", + bgp->update_group_stats.merge_events, VTY_NEWLINE); + vty_out (vty, "Split events: %u%s", + bgp->update_group_stats.split_events, VTY_NEWLINE); + vty_out (vty, "Update group switch events: %u%s", + bgp->update_group_stats.updgrp_switch_events, VTY_NEWLINE); + vty_out (vty, "Peer route refreshes combined: %u%s", + bgp->update_group_stats.peer_refreshes_combined, VTY_NEWLINE); + vty_out (vty, "Merge checks triggered: %u%s", + bgp->update_group_stats.merge_checks_triggered, VTY_NEWLINE); +} + +/* + * update_group_adjust_peer + */ +void +update_group_adjust_peer (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_subgroup *subgrp, *old_subgrp; + struct peer *peer; + + if (!paf) + return; + + peer = PAF_PEER (paf); + if (!peer_established (peer)) + { + return; + } + + if (!CHECK_FLAG (peer->flags, PEER_FLAG_CONFIG_NODE)) + { + return; + } + + if (!peer->afc_nego[paf->afi][paf->safi]) + { + return; + } + + updgrp = update_group_find (paf); + if (!updgrp) + { + updgrp = update_group_create (paf); + if (!updgrp) + { + zlog_err ("couldn't create update group for peer %s", + paf->peer->host); + return; + } + } + + old_subgrp = paf->subgroup; + + if (old_subgrp) + { + + /* + * If the update group of the peer is unchanged, the peer can stay + * in its existing subgroup and we're done. + */ + if (old_subgrp->update_group == updgrp) + return; + + /* + * The peer is switching between update groups. Put it in its + * own subgroup under the new update group. + */ + update_subgroup_split_peer (paf, updgrp); + return; + } + + subgrp = update_subgroup_find (updgrp, paf); + if (!subgrp) + { + subgrp = update_subgroup_create (updgrp); + if (!subgrp) + return; + } + + update_subgroup_add_peer (subgrp, paf, 1); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu add peer %s", + updgrp->id, subgrp->id, paf->peer->host); + + return; +} + +int +update_group_adjust_soloness (struct peer *peer, int set) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer_lonesoul_or_not (peer, set); + if (peer->status == Established) + bgp_announce_route_all (peer); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer_lonesoul_or_not (peer, set); + if (peer->status == Established) + bgp_announce_route_all (peer); + } + } + return 0; +} + +/* + * update_subgroup_rib + */ +struct bgp_table * +update_subgroup_rib (struct update_subgroup *subgrp) +{ + struct bgp *bgp; + + bgp = SUBGRP_INST (subgrp); + if (!bgp) + return NULL; + + return bgp->rib[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)]; +} + +void +update_group_af_walk (struct bgp *bgp, afi_t afi, safi_t safi, + updgrp_walkcb cb, void *ctx) +{ + struct updwalk_context wctx; + int afid; + + if (!bgp) + return; + afid = afindex (afi, safi); + if (afid >= BGP_AF_MAX) + return; + + memset (&wctx, 0, sizeof (wctx)); + wctx.cb = cb; + wctx.context = ctx; + hash_walk (bgp->update_groups[afid], update_group_walkcb, &wctx); +} + +void +update_group_walk (struct bgp *bgp, updgrp_walkcb cb, void *ctx) +{ + afi_t afi; + safi_t safi; + + FOREACH_AFI_SAFI (afi, safi) + { + update_group_af_walk (bgp, afi, safi, cb, ctx); + } +} + +void +update_group_periodic_merge (struct bgp *bgp) +{ + char reason[] = "periodic merge check"; + + update_group_walk (bgp, update_group_periodic_merge_walkcb, + (void *) reason); +} + +/* + * peer_af_announce_route + * + * Refreshes routes out to a peer_af immediately. + * + * If the combine parameter is TRUE, then this function will try to + * gather other peers in the subgroup for which a route announcement + * is pending and efficently announce routes to all of them. + * + * For now, the 'combine' option has an effect only if all peers in + * the subgroup have a route announcement pending. + */ +void +peer_af_announce_route (struct peer_af *paf, int combine) +{ + struct update_subgroup *subgrp; + struct peer_af *cur_paf; + int all_pending; + + subgrp = paf->subgroup; + all_pending = 0; + + if (combine) + { + struct peer_af *temp_paf; + + /* + * If there are other peers in the old subgroup that also need + * routes to be announced, pull them into the peer's new + * subgroup. + * Combine route announcement with other peers if possible. + * + * For now, we combine only if all peers in the subgroup have an + * announcement pending. + */ + all_pending = 1; + + SUBGRP_FOREACH_PEER (subgrp, cur_paf) + { + if (cur_paf == paf) + continue; + + if (cur_paf->t_announce_route) + continue; + + all_pending = 0; + break; + } + } + /* + * Announce to the peer alone if we were not asked to combine peers, + * or if some peers don't have a route annoucement pending. + */ + if (!combine || !all_pending) + { + update_subgroup_split_peer (paf, NULL); + if (!paf->subgroup) + return; + + if (bgp_debug_update(paf->peer, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu %s announcing routes", + subgrp->update_group->id, subgrp->id, paf->peer->host); + + subgroup_announce_route (paf->subgroup); + return; + } + + /* + * We will announce routes the entire subgroup. + * + * First stop refresh timers on all the other peers. + */ + SUBGRP_FOREACH_PEER (subgrp, cur_paf) + { + if (cur_paf == paf) + continue; + + bgp_stop_announce_route_timer (cur_paf); + } + + if (bgp_debug_update(paf->peer, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing routes to %s, combined into %d peers", + subgrp->update_group->id, subgrp->id, + paf->peer->host, subgrp->peer_count); + + subgroup_announce_route (subgrp); + + SUBGRP_INCR_STAT_BY (subgrp, peer_refreshes_combined, + subgrp->peer_count - 1); +} + +void +subgroup_trigger_write (struct update_subgroup *subgrp) +{ + struct peer_af *paf; + +#if 0 + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug("u%llu:s%llu scheduling write thread for peers", + subgrp->update_group->id, subgrp->id); +#endif + SUBGRP_FOREACH_PEER (subgrp, paf) + { + if (paf->peer->status == Established) + { + BGP_PEER_WRITE_ON (paf->peer->t_write, bgp_write, paf->peer->fd, + paf->peer); + } + } +} + +int +update_group_clear_update_dbg (struct update_group *updgrp, void *arg) +{ + UPDGRP_PEER_DBG_OFF(updgrp); + return UPDWALK_CONTINUE; +} diff --git a/bgpd/bgp_updgrp.h b/bgpd/bgp_updgrp.h new file mode 100644 index 0000000000..aa2ef50362 --- /dev/null +++ b/bgpd/bgp_updgrp.h @@ -0,0 +1,594 @@ +/** + * bgp_updgrp.c: BGP update group structures + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _QUAGGA_BGP_UPDGRP_H +#define _QUAGGA_BGP_UPDGRP_H + +#include "bgp_advertise.h" + +#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 200 + +#define PEER_UPDGRP_FLAGS (PEER_FLAG_LOCAL_AS_NO_PREPEND | \ + PEER_FLAG_LOCAL_AS_REPLACE_AS) + +#define PEER_UPDGRP_AF_FLAGS (PEER_FLAG_SEND_COMMUNITY | \ + PEER_FLAG_SEND_EXT_COMMUNITY | \ + PEER_FLAG_DEFAULT_ORIGINATE | \ + PEER_FLAG_REFLECTOR_CLIENT | \ + PEER_FLAG_NEXTHOP_SELF | \ + PEER_FLAG_NEXTHOP_UNCHANGED | \ + PEER_FLAG_AS_PATH_UNCHANGED | \ + PEER_FLAG_MED_UNCHANGED | \ + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED | \ + PEER_FLAG_REMOVE_PRIVATE_AS | \ + PEER_FLAG_REMOVE_PRIVATE_AS_ALL | \ + PEER_FLAG_REMOVE_PRIVATE_AS_REPLACE | \ + PEER_FLAG_AS_OVERRIDE) + +#define PEER_UPDGRP_CAP_FLAGS (PEER_CAP_AS4_RCV) + +#define PEER_UPDGRP_AF_CAP_FLAGS (PEER_CAP_ORF_PREFIX_SM_RCV | \ + PEER_CAP_ORF_PREFIX_SM_OLD_RCV) + +typedef enum +{ + BGP_ATTR_VEC_NH = 0, + BGP_ATTR_VEC_MAX +} bpacket_attr_vec_type; + +typedef struct +{ + u_int32_t flags; + unsigned long offset; +} bpacket_attr_vec; + +#define BPACKET_ATTRVEC_FLAGS_UPDATED (1 << 0) +#define BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED (1 << 1) +#define BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS (1 << 2) +#define BPACKET_ATTRVEC_FLAGS_REFLECTED (1 << 3) + +typedef struct bpacket_attr_vec_arr +{ + bpacket_attr_vec entries[BGP_ATTR_VEC_MAX]; +} bpacket_attr_vec_arr; + +struct bpacket +{ + /* for being part of an update subgroup's message list */ + TAILQ_ENTRY (bpacket) pkt_train; + + /* list of peers (well, peer_afs) that the packet needs to be sent to */ + LIST_HEAD (pkt_peer_list, peer_af) peers; + + struct stream *buffer; + bpacket_attr_vec_arr arr; + + unsigned int ver; +}; + +struct bpacket_queue +{ + TAILQ_HEAD (pkt_queue, bpacket) pkts; + +#if 0 + /* A dummy packet that is used to thread all peers that have + completed their work */ + struct bpacket sentinel; +#endif + + unsigned int conf_max_count; + unsigned int curr_count; + unsigned int hwm_count; + unsigned int max_count_reached_count; +}; + +struct update_group +{ + /* back pointer to the BGP instance */ + struct bgp *bgp; + + /* list of subgroups that belong to the update group */ + LIST_HEAD (subgrp_list, update_subgroup) subgrps; + + /* lazy way to store configuration common to all peers + hash function will compute from this data */ + struct peer *conf; + + afi_t afi; + safi_t safi; + int afid; + + u_int64_t id; + time_t uptime; + + u_int32_t join_events; + u_int32_t prune_events; + u_int32_t merge_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t split_events; + u_int32_t merge_checks_triggered; + + u_int32_t subgrps_created; + u_int32_t subgrps_deleted; + + u_int32_t num_dbg_en_peers; +}; + +/* + * Shorthand for a global statistics counter. + */ +#define UPDGRP_GLOBAL_STAT(updgrp, stat) \ + ((updgrp)->bgp->update_group_stats.stat) + +/* + * Add the given value to a counter on an update group and the bgp + * instance. + */ +#define UPDGRP_INCR_STAT_BY(updgrp, stat, value) \ + do { \ + (updgrp)->stat += (value); \ + UPDGRP_GLOBAL_STAT(updgrp, stat) += (value); \ + } while (0) + +/* + * Increment a counter on a update group and its parent structures. + */ +#define UPDGRP_INCR_STAT(subgrp, stat) \ + UPDGRP_INCR_STAT_BY(subgrp, stat, 1) + +struct update_subgroup +{ + /* back pointer to the parent update group */ + struct update_group *update_group; + + /* list of peers that belong to the subgroup */ + LIST_HEAD (peer_list, peer_af) peers; + int peer_count; + + /* for being part of an update group's subgroup list */ + LIST_ENTRY (update_subgroup) updgrp_train; + + struct bpacket_queue pkt_queue; + + /* + * List of adj-out structures for this subgroup. + * It essentially represents the snapshot of every prefix that + * has been advertised to the members of the subgroup + */ + TAILQ_HEAD (adjout_queue, bgp_adj_out) adjq; + + /* packet buffer for update generation */ + struct stream *work; + + /* We use a separate stream to encode MP_REACH_NLRI for efficient + * NLRI packing. peer->work stores all the other attributes. The + * actual packet is then constructed by concatenating the two. + */ + struct stream *scratch; + + /* synchronization list and time */ + struct bgp_synchronize *sync; + + /* send prefix count */ + unsigned long scount; + + /* announcement attribute hash */ + struct hash *hash; + + struct thread *t_coalesce; + u_int32_t v_coalesce; + + struct thread *t_merge_check; + + /* table version that the subgroup has caught up to. */ + uint64_t version; + + /* version maintained to record adj changes */ + uint64_t adj_version; + + time_t uptime; + + /* + * Identifying information about the subgroup that this subgroup was split + * from, if any. + */ + struct + { + u_int64_t update_group_id; + u_int64_t subgroup_id; + } split_from; + + u_int32_t join_events; + u_int32_t prune_events; + + /* + * This is bumped up when another subgroup merges into this one. + */ + u_int32_t merge_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t split_events; + u_int32_t merge_checks_triggered; + + u_int64_t id; + struct zlog *log; + + u_int16_t sflags; + + /* Subgroup flags, see below */ + u_int16_t flags; +}; + +/* + * We need to do an outbound refresh to get this subgroup into a + * consistent state. + */ +#define SUBGRP_FLAG_NEEDS_REFRESH (1 << 0) + +#define SUBGRP_STATUS_DEFAULT_ORIGINATE (1 << 0) + +/* + * Add the given value to the specified counter on a subgroup and its + * parent structures. + */ +#define SUBGRP_INCR_STAT_BY(subgrp, stat, value) \ + do { \ + (subgrp)->stat += (value); \ + if ((subgrp)->update_group) \ + UPDGRP_INCR_STAT_BY((subgrp)->update_group, stat, value); \ + } while (0) + +/* + * Increment a counter on a subgroup and its parent structures. + */ +#define SUBGRP_INCR_STAT(subgrp, stat) \ + SUBGRP_INCR_STAT_BY(subgrp, stat, 1) + +/* + * Decrement a counter on a subgroup and its parent structures. + */ +#define SUBGRP_DECR_STAT(subgrp, stat) \ + SUBGRP_INCR_STAT_BY(subgrp, stat, -1) + + +typedef int (*updgrp_walkcb) (struct update_group * updgrp, void *ctx); + +/* really a private structure */ +struct updwalk_context +{ + struct vty *vty; + struct bgp_node *rn; + struct bgp_info *ri; + u_int64_t updgrp_id; + u_int64_t subgrp_id; + bgp_policy_type_e policy_type; + char *policy_name; + int policy_event_start_flag; + int policy_route_update; + updgrp_walkcb cb; + void *context; + u_int8_t flags; + +#define UPDWALK_FLAGS_ADVQUEUE (1 << 0) +#define UPDWALK_FLAGS_ADVERTISED (1 << 1) +}; + +#define UPDWALK_CONTINUE HASHWALK_CONTINUE +#define UPDWALK_ABORT HASHWALK_ABORT + +#define PAF_PEER(p) ((p)->peer) +#define PAF_SUBGRP(p) ((p)->subgroup) +#define PAF_UPDGRP(p) ((p)->subgroup->update_group) +#define PAF_PKTQ(f) SUBGRP_PKTQ((f)->subgroup) + +#define UPDGRP_PEER(u) ((u)->conf) +#define UPDGRP_AFI(u) ((u)->afi) +#define UPDGRP_SAFI(u) ((u)->safi) +#define UPDGRP_INST(u) ((u)->bgp) +#define UPDGRP_AFFLAGS(u) \ + ((u)->conf->af_flags[UPDGRP_AFI(u)][UPDGRP_SAFI(u)]) +#define UPDGRP_DBG_ON(u) ((u)->num_dbg_en_peers) +#define UPDGRP_PEER_DBG_EN(u) (((u)->num_dbg_en_peers)++) +#define UPDGRP_PEER_DBG_DIS(u) (((u)->num_dbg_en_peers)--) +#define UPDGRP_PEER_DBG_OFF(u) (u)->num_dbg_en_peers = 0 + +#define SUBGRP_AFI(s) UPDGRP_AFI((s)->update_group) +#define SUBGRP_SAFI(s) UPDGRP_SAFI((s)->update_group) +#define SUBGRP_PEER(s) UPDGRP_PEER((s)->update_group) +#define SUBGRP_PCOUNT(s) ((s)->peer_count) +#define SUBGRP_PFIRST(s) LIST_FIRST(&((s)->peers)) +#define SUBGRP_PKTQ(s) &((s)->pkt_queue) +#define SUBGRP_INST(s) UPDGRP_INST((s)->update_group) +#define SUBGRP_AFFLAGS(s) UPDGRP_AFFLAGS((s)->update_group) +#define SUBGRP_UPDGRP(s) ((s)->update_group) + +/* + * Walk all subgroups in an update group. + */ +#define UPDGRP_FOREACH_SUBGRP(updgrp, subgrp) \ + LIST_FOREACH(subgrp, &((updgrp)->subgrps), updgrp_train) + +#define UPDGRP_FOREACH_SUBGRP_SAFE(updgrp, subgrp, tmp_subgrp) \ + LIST_FOREACH_SAFE(subgrp, &((updgrp)->subgrps), updgrp_train, tmp_subgrp) + +#define SUBGRP_FOREACH_PEER(subgrp, paf) \ + LIST_FOREACH(paf, &(subgrp->peers), subgrp_train) + +#define SUBGRP_FOREACH_PEER_SAFE(subgrp, paf, temp_paf) \ + LIST_FOREACH_SAFE(paf, &(subgrp->peers), subgrp_train, temp_paf) + +#define SUBGRP_FOREACH_ADJ(subgrp, adj) \ + TAILQ_FOREACH(adj, &(subgrp->adjq), subgrp_adj_train) + +#define SUBGRP_FOREACH_ADJ_SAFE(subgrp, adj, adj_temp) \ + TAILQ_FOREACH_SAFE(adj, &(subgrp->adjq), subgrp_adj_train, adj_temp) + +/* Prototypes. */ +/* bgp_updgrp.c */ +extern void update_group_init (struct bgp *); +extern void +update_group_show (struct bgp *bgp, afi_t afi, safi_t safi, struct vty *vty); +extern void update_group_show_stats (struct bgp *bgp, struct vty *vty); +extern void update_group_adjust_peer (struct peer_af *paf); +extern int update_group_adjust_soloness (struct peer *peer, int set); + +extern void +update_subgroup_remove_peer (struct update_subgroup *, struct peer_af *); +extern struct bgp_table *update_subgroup_rib (struct update_subgroup *); +extern void +update_subgroup_split_peer (struct peer_af *, struct update_group *); +extern int +update_subgroup_check_merge (struct update_subgroup *, const char *); +extern int +update_subgroup_trigger_merge_check (struct update_subgroup *, + int force); +extern void update_group_policy_update (struct bgp *bgp, + bgp_policy_type_e ptype, char *pname, + int route_update, int start_event); +extern void update_group_af_walk (struct bgp *bgp, afi_t afi, safi_t safi, + updgrp_walkcb cb, void *ctx); +extern void update_group_walk (struct bgp *bgp, updgrp_walkcb cb, void *ctx); +extern void update_group_periodic_merge (struct bgp *bgp); +extern void update_group_start_advtimer (struct bgp *bgp); + +extern void update_subgroup_inherit_info (struct update_subgroup *to, + struct update_subgroup *from); + +/* bgp_updgrp_packet.c */ +extern struct bpacket *bpacket_alloc (void); +extern void bpacket_free (struct bpacket *pkt); +extern void bpacket_queue_init (struct bpacket_queue *q); +extern void bpacket_queue_cleanup (struct bpacket_queue *q); +extern void bpacket_queue_sanity_check (struct bpacket_queue *q); +extern struct bpacket *bpacket_queue_add (struct bpacket_queue *q, + struct stream *s, + struct bpacket_attr_vec_arr + *vecarr); +struct bpacket *bpacket_queue_remove (struct bpacket_queue *q); +extern struct bpacket *bpacket_queue_first (struct bpacket_queue *q); +struct bpacket *bpacket_queue_last (struct bpacket_queue *q); +unsigned int bpacket_queue_length (struct bpacket_queue *q); +unsigned int bpacket_queue_hwm_length (struct bpacket_queue *q); +int bpacket_queue_is_full (struct bgp *bgp, struct bpacket_queue *q); +extern void bpacket_queue_advance_peer (struct peer_af *paf); +extern void bpacket_queue_remove_peer (struct peer_af *paf); +extern void bpacket_add_peer (struct bpacket *pkt, struct peer_af *paf); +unsigned int bpacket_queue_virtual_length (struct peer_af *paf); +extern void bpacket_queue_show_vty (struct bpacket_queue *q, struct vty *vty); +int subgroup_packets_to_build (struct update_subgroup *subgrp); +extern struct bpacket *subgroup_update_packet (struct update_subgroup *s); +extern struct bpacket *subgroup_withdraw_packet (struct update_subgroup *s); +extern struct stream *bpacket_reformat_for_peer (struct bpacket *pkt, + struct peer_af *paf); +extern void bpacket_attr_vec_arr_reset (struct bpacket_attr_vec_arr *vecarr); +extern void bpacket_attr_vec_arr_set_vec (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, + struct stream *s, + struct attr *attr); +extern void +subgroup_default_update_packet (struct update_subgroup *subgrp, + struct attr *attr, struct peer *from); +extern void subgroup_default_withdraw_packet (struct update_subgroup *subgrp); + +/* bgp_updgrp_adv.c */ +extern struct bgp_advertise *bgp_advertise_clean_subgroup (struct + update_subgroup + *subgrp, + struct bgp_adj_out + *adj); +extern void update_group_show_adj_queue (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void update_group_show_advertised (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void update_group_show_packet_queue (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void subgroup_announce_route (struct update_subgroup *subgrp); +extern void subgroup_announce_all (struct update_subgroup *subgrp); + +extern void +subgroup_default_originate (struct update_subgroup *subgrp, int withdraw); +extern void +group_announce_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_node *rn, struct bgp_info *ri); +extern void subgroup_clear_table (struct update_subgroup *subgrp); +extern void update_group_announce (struct bgp *bgp); +extern void update_group_announce_rrclients (struct bgp *bgp); +extern void peer_af_announce_route (struct peer_af *paf, int combine); +extern struct bgp_adj_out *bgp_adj_out_alloc (struct update_subgroup *subgrp, + struct bgp_node *rn); +extern void bgp_adj_out_remove_subgroup (struct bgp_node *rn, + struct bgp_adj_out *adj, + struct update_subgroup *subgrp); +extern void +bgp_adj_out_set_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp, + struct attr *attr, struct bgp_info *binfo); +extern void +bgp_adj_out_unset_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp); +void +subgroup_announce_table (struct update_subgroup *subgrp, + struct bgp_table *table, int rsclient); +extern void +subgroup_trigger_write (struct update_subgroup *subgrp); + +extern int +update_group_clear_update_dbg (struct update_group *updgrp, void *arg); + +/* + * Inline functions + */ + +/* + * bpacket_queue_is_empty + */ +static inline int +bpacket_queue_is_empty (struct bpacket_queue *queue) +{ + + /* + * The packet queue is empty if it only contains a sentinel. + */ + if (queue->curr_count != 1) + return 0; + + assert (bpacket_queue_first (queue)->buffer == NULL); + return 1; +} + +/* + * bpacket_next + * + * Returns the packet after the given packet in a bpacket queue. + */ +static inline struct bpacket * +bpacket_next (struct bpacket *pkt) +{ + return TAILQ_NEXT (pkt, pkt_train); +} + +/* + * update_group_adjust_peer_afs + * + * Adjust all peer_af structures for the given peer. + */ +static inline void +update_group_adjust_peer_afs (struct peer *peer) +{ + struct peer_af *paf; + afi_t afi; + + PEERAF_FOREACH (peer, paf, afi) update_group_adjust_peer (paf); +} + +/* + * update_group_remove_peer_afs + * + * Remove all peer_af structures for the given peer from their subgroups. + */ +static inline void +update_group_remove_peer_afs (struct peer *peer) +{ + struct peer_af *paf; + afi_t afi; + + PEERAF_FOREACH (peer, paf, afi) + update_subgroup_remove_peer (PAF_SUBGRP (paf), paf); +} + +/* + * update_subgroup_needs_refresh + */ +static inline int +update_subgroup_needs_refresh (const struct update_subgroup *subgrp) +{ + if (CHECK_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH)) + return 1; + else + return 0; +} + +/* + * update_subgroup_set_needs_refresh + */ +static inline void +update_subgroup_set_needs_refresh (struct update_subgroup *subgrp, int value) +{ + if (value) + SET_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH); + else + UNSET_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH); +} + +static inline struct update_subgroup * +peer_subgroup (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *paf; + + paf = peer_af_find (peer, afi, safi); + if (paf) + return PAF_SUBGRP (paf); + return NULL; +} + +/* + * update_group_adjust_peer_afs + * + * Adjust all peer_af structures for the given peer. + */ +static inline void +bgp_announce_peer (struct peer *peer) +{ + struct peer_af *paf; + int af; + + PEERAF_FOREACH (peer, paf, af) subgroup_announce_all (PAF_SUBGRP (paf)); +} + +/** + * advertise_list_is_empty + */ +static inline int +advertise_list_is_empty (struct update_subgroup *subgrp) +{ + if (!BGP_ADV_FIFO_EMPTY (&subgrp->sync->update) || + !BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw) || + !BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw_low)) + { + return 0; + } + + return 1; +} + +#endif /* _QUAGGA_BGP_UPDGRP_H */ diff --git a/bgpd/bgp_updgrp_adv.c b/bgpd/bgp_updgrp_adv.c new file mode 100644 index 0000000000..54da37c48c --- /dev/null +++ b/bgpd/bgp_updgrp_adv.c @@ -0,0 +1,765 @@ +/** + * bgp_updgrp_adv.c: BGP update group advertisement and adjacency + * maintenance + * + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "command.h" +#include "memory.h" +#include "prefix.h" +#include "hash.h" +#include "thread.h" +#include "queue.h" +#include "routemap.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_advertise.h" + + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +static inline struct bgp_adj_out * +adj_lookup (struct bgp_node *rn, struct update_subgroup *subgrp) +{ + struct bgp_adj_out *adj; + + if (!rn || !subgrp) + return NULL; + for (adj = rn->adj_out; adj; adj = adj->next) + if (adj->subgroup == subgrp) + break; + return adj; +} + +static void +adj_free (struct bgp_adj_out *adj) +{ + TAILQ_REMOVE (&(adj->subgroup->adjq), adj, subgrp_adj_train); + SUBGRP_DECR_STAT (adj->subgroup, adj_count); + XFREE (MTYPE_BGP_ADJ_OUT, adj); +} + +static int +group_announce_route_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + + /* + * Skip the subgroups that have coalesce timer running. We will + * walk the entire prefix table for those subgroups when the + * coalesce timer fires. + */ + if (!subgrp->t_coalesce) + subgroup_process_announce_selected (subgrp, ctx->ri, ctx->rn); + } + + return UPDWALK_CONTINUE; +} + +static void +subgrp_show_adjq_vty (struct update_subgroup *subgrp, struct vty *vty, + u_int8_t flags) +{ + struct bgp_table *table; + struct bgp_adj_out *adj; + unsigned long output_count; + struct bgp_node *rn; + int header1 = 1; + struct bgp *bgp; + int header2 = 1; + + bgp = SUBGRP_INST (subgrp); + if (!bgp) + return; + + table = bgp->rib[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)]; + + output_count = 0; + + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + for (adj = rn->adj_out; adj; adj = adj->next) + if (adj->subgroup == subgrp) + { + if (header1) + { + vty_out (vty, + "BGP table version is %llu, local router ID is %s%s", + table->version, inet_ntoa (bgp->router_id), + VTY_NEWLINE); + vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + header1 = 0; + } + if (header2) + { + vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); + header2 = 0; + } + if ((flags & UPDWALK_FLAGS_ADVQUEUE) && adj->adv && adj->adv->baa) + { + route_vty_out_tmp (vty, &rn->p, adj->adv->baa->attr, + SUBGRP_SAFI (subgrp), NULL); + output_count++; + } + if ((flags & UPDWALK_FLAGS_ADVERTISED) && adj->attr) + { + route_vty_out_tmp (vty, &rn->p, adj->attr, SUBGRP_SAFI (subgrp), + NULL); + output_count++; + } + } + if (output_count != 0) + vty_out (vty, "%sTotal number of prefixes %ld%s", + VTY_NEWLINE, output_count, VTY_NEWLINE); +} + +static int +updgrp_show_adj_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + struct vty *vty; + + vty = ctx->vty; + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (ctx->subgrp_id && (ctx->subgrp_id != subgrp->id)) + continue; + vty_out (vty, "update group %llu, subgroup %llu%s", updgrp->id, + subgrp->id, VTY_NEWLINE); + subgrp_show_adjq_vty (subgrp, vty, ctx->flags); + } + return UPDWALK_CONTINUE; +} + +static void +updgrp_show_adj (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id, u_int8_t flags) +{ + struct updwalk_context ctx; + memset (&ctx, 0, sizeof (ctx)); + ctx.vty = vty; + ctx.subgrp_id = id; + ctx.flags = flags; + + update_group_af_walk (bgp, afi, safi, updgrp_show_adj_walkcb, &ctx); +} + +static int +subgroup_coalesce_timer (struct thread *thread) +{ + struct update_subgroup *subgrp; + + subgrp = THREAD_ARG (thread); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing routes upon coalesce timer expiry", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id); + subgrp->t_coalesce = NULL; + subgrp->v_coalesce = 0; + subgroup_announce_route (subgrp); + + + /* While the announce_route() may kick off the route advertisement timer for + * the members of the subgroup, we'd like to send the initial updates much + * faster (i.e., without enforcing MRAI). Also, if there were no routes to + * announce, this is the method currently employed to trigger the EOR. + */ + if (!bgp_update_delay_active(SUBGRP_INST(subgrp))) + { + struct peer_af *paf; + struct peer *peer; + + SUBGRP_FOREACH_PEER (subgrp, paf) + { + peer = PAF_PEER(paf); + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + } + } + + return 0; +} + +static int +update_group_announce_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + subgroup_announce_all (subgrp); + } + + return UPDWALK_CONTINUE; +} + +static int +update_group_announce_rrc_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + afi_t afi; + safi_t safi; + struct peer *peer; + + afi = UPDGRP_AFI (updgrp); + safi = UPDGRP_SAFI (updgrp); + peer = UPDGRP_PEER (updgrp); + + /* Only announce if this is a group of route-reflector-clients */ + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) + { + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + subgroup_announce_all (subgrp); + } + } + + return UPDWALK_CONTINUE; +} + +/******************** + * PUBLIC FUNCTIONS + ********************/ + +/** + * Allocate an adj-out object. Do proper initialization of its fields, + * primarily its association with the subgroup and the prefix. + */ +struct bgp_adj_out * +bgp_adj_out_alloc (struct update_subgroup *subgrp, struct bgp_node *rn) +{ + struct bgp_adj_out *adj; + + adj = XCALLOC (MTYPE_BGP_ADJ_OUT, sizeof (struct bgp_adj_out)); + adj->subgroup = subgrp; + if (rn) + { + BGP_ADJ_OUT_ADD (rn, adj); + bgp_lock_node (rn); + adj->rn = rn; + } + TAILQ_INSERT_TAIL (&(subgrp->adjq), adj, subgrp_adj_train); + SUBGRP_INCR_STAT (subgrp, adj_count); + return adj; +} + + +struct bgp_advertise * +bgp_advertise_clean_subgroup (struct update_subgroup *subgrp, + struct bgp_adj_out *adj) +{ + struct bgp_advertise *adv; + struct bgp_advertise_attr *baa; + struct bgp_advertise *next; + struct bgp_advertise_fifo *fhead; + + adv = adj->adv; + baa = adv->baa; + next = NULL; + + if (baa) + { + fhead = &subgrp->sync->update; + + /* Unlink myself from advertise attribute FIFO. */ + bgp_advertise_delete (baa, adv); + + /* Fetch next advertise candidate. */ + next = baa->adv; + + /* Unintern BGP advertise attribute. */ + bgp_advertise_unintern (subgrp->hash, baa); + } + else + fhead = &subgrp->sync->withdraw; + + + /* Unlink myself from advertisement FIFO. */ + BGP_ADV_FIFO_DEL (fhead, adv); + + /* Free memory. */ + bgp_advertise_free (adj->adv); + adj->adv = NULL; + + return next; +} + +void +bgp_adj_out_set_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp, + struct attr *attr, struct bgp_info *binfo) +{ + struct bgp_adj_out *adj = NULL; + struct bgp_advertise *adv; + + if (DISABLE_BGP_ANNOUNCE) + return; + + /* Look for adjacency information. */ + adj = adj_lookup (rn, subgrp); + + if (!adj) + { + adj = bgp_adj_out_alloc (subgrp, rn); + if (!adj) + return; + } + + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + adj->adv = bgp_advertise_new (); + + adv = adj->adv; + adv->rn = rn; + assert (adv->binfo == NULL); + adv->binfo = bgp_info_lock (binfo); /* bgp_info adj_out reference */ + + if (attr) + adv->baa = bgp_advertise_intern (subgrp->hash, attr); + else + adv->baa = baa_new (); + adv->adj = adj; + + /* Add new advertisement to advertisement attribute list. */ + bgp_advertise_add (adv->baa, adv); + + /* + * If the update adv list is empty, trigger the member peers' + * mrai timers so the socket writes can happen. + */ + if (BGP_ADV_FIFO_EMPTY (&subgrp->sync->update)) + { + struct peer_af *paf; + + SUBGRP_FOREACH_PEER (subgrp, paf) + { + bgp_adjust_routeadv (PAF_PEER (paf)); + } + } + + BGP_ADV_FIFO_ADD (&subgrp->sync->update, &adv->fifo); + + subgrp->version = max (subgrp->version, rn->version); +} + +void +bgp_adj_out_unset_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp) +{ + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + + if (DISABLE_BGP_ANNOUNCE) + return; + + /* Lookup existing adjacency, if it is not there return immediately. */ + adj = adj_lookup (rn, subgrp); + + if (!adj) + goto done; + + /* Clearn up previous advertisement. */ + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + + if (adj->attr) + { + /* We need advertisement structure. */ + adj->adv = bgp_advertise_new (); + adv = adj->adv; + adv->rn = rn; + adv->adj = adj; + + /* Schedule packet write, if FIFO is getting its first entry. */ + if (BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw)) + subgroup_trigger_write(subgrp); + + /* Add to synchronization entry for withdraw announcement. */ + BGP_ADV_FIFO_ADD (&subgrp->sync->withdraw, &adv->fifo); + } + else + { + /* Remove myself from adjacency. */ + BGP_ADJ_OUT_DEL (rn, adj); + + /* Free allocated information. */ + adj_free (adj); + + bgp_unlock_node (rn); + } + + /* + * Fall through. + */ + +done: + subgrp->version = max (subgrp->version, rn->version); +} + +void +bgp_adj_out_remove_subgroup (struct bgp_node *rn, struct bgp_adj_out *adj, + struct update_subgroup *subgrp) +{ + if (adj->attr) + bgp_attr_unintern (&adj->attr); + + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + + BGP_ADJ_OUT_DEL (rn, adj); + adj_free (adj); +} + +/* + * Go through all the routes and clean up the adj/adv structures corresponding + * to the subgroup. + */ +void +subgroup_clear_table (struct update_subgroup *subgrp) +{ + struct bgp_adj_out *aout, *taout; + + SUBGRP_FOREACH_ADJ_SAFE (subgrp, aout, taout) + { + bgp_unlock_node (aout->rn); + bgp_adj_out_remove_subgroup (aout->rn, aout, subgrp); + } +} + +/* + * subgroup_announce_table + */ +void +subgroup_announce_table (struct update_subgroup *subgrp, + struct bgp_table *table, int rsclient) +{ + struct bgp_node *rn; + struct bgp_info *ri; + struct attr attr; + struct attr_extra extra; + struct peer *peer; + struct peer *onlypeer; + afi_t afi; + safi_t safi; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + onlypeer = ((SUBGRP_PCOUNT (subgrp) == 1) ? + (SUBGRP_PFIRST (subgrp))->peer : NULL); + if (rsclient) + assert(onlypeer); + + if (!table) + table = (rsclient) ? onlypeer->rib[afi][safi] : peer->bgp->rib[afi][safi]; + + if (safi != SAFI_MPLS_VPN + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) + subgroup_default_originate (subgrp, 0); + + /* It's initialized in bgp_announce_[check|check_rsclient]() */ + attr.extra = &extra; + + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + for (ri = rn->info; ri; ri = ri->next) + + if (CHECK_FLAG (ri->flags, BGP_INFO_SELECTED)) + { + if (!rsclient + && subgroup_announce_check (ri, subgrp, &rn->p, &attr)) + bgp_adj_out_set_subgroup (rn, subgrp, &attr, ri); + else + bgp_adj_out_unset_subgroup (rn, subgrp); + } + + /* + * We walked through the whole table -- make sure our version number + * is consistent with the one on the table. This should allow + * subgroups to merge sooner if a peer comes up when the route node + * with the largest version is no longer in the table. This also + * covers the pathological case where all routes in the table have + * now been deleted. + */ + subgrp->version = max (subgrp->version, table->version); + + /* + * Start a task to merge the subgroup if necessary. + */ + update_subgroup_trigger_merge_check (subgrp, 0); +} + +/* + * subgroup_announce_route + * + * Refresh all routes out to a subgroup. + */ +void +subgroup_announce_route (struct update_subgroup *subgrp) +{ + struct bgp_node *rn; + struct bgp_table *table; + struct peer *onlypeer; + struct peer *peer; + + if (update_subgroup_needs_refresh (subgrp)) + { + update_subgroup_set_needs_refresh (subgrp, 0); + } + + /* + * First update is deferred until ORF or ROUTE-REFRESH is received + */ + onlypeer = ((SUBGRP_PCOUNT (subgrp) == 1) ? + (SUBGRP_PFIRST (subgrp))->peer : NULL); + if (onlypeer && + CHECK_FLAG (onlypeer-> + af_sflags[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)], + PEER_STATUS_ORF_WAIT_REFRESH)) + return; + + if (SUBGRP_SAFI (subgrp) != SAFI_MPLS_VPN) + subgroup_announce_table (subgrp, NULL, 0); + else + for (rn = bgp_table_top (update_subgroup_rib (subgrp)); rn; + rn = bgp_route_next (rn)) + if ((table = (rn->info)) != NULL) + subgroup_announce_table (subgrp, table, 0); + + peer = SUBGRP_PEER(subgrp); + if (CHECK_FLAG(peer->af_flags[SUBGRP_AFI(subgrp)][SUBGRP_SAFI(subgrp)], + PEER_FLAG_RSERVER_CLIENT)) + subgroup_announce_table (subgrp, NULL, 1); +} + +void +subgroup_default_originate (struct update_subgroup *subgrp, int withdraw) +{ + struct bgp *bgp; + struct attr attr; + struct aspath *aspath; + struct prefix p; + struct peer *from; + struct bgp_node *rn; + struct bgp_info *ri; + struct peer *peer; + int ret = RMAP_DENYMATCH; + afi_t afi; + safi_t safi; + + if (!subgrp) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + if (!(afi == AFI_IP || afi == AFI_IP6)) + return; + + bgp = peer->bgp; + from = bgp->peer_self; + + bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); + aspath = attr.aspath; + attr.local_pref = bgp->default_local_pref; + memcpy (&attr.nexthop, &peer->nexthop.v4, IPV4_MAX_BYTELEN); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else if (afi == AFI_IP6) + { + struct attr_extra *ae = attr.extra; + + str2prefix ("::/0", &p); + + /* IPv6 global nexthop must be included. */ + memcpy (&ae->mp_nexthop_global, &peer->nexthop.v6_global, + IPV6_MAX_BYTELEN); + ae->mp_nexthop_len = 16; + + /* If the peer is on shared nextwork and we have link-local + nexthop set it. */ + if (peer->shared_network + && !IN6_IS_ADDR_UNSPECIFIED (&peer->nexthop.v6_local)) + { + memcpy (&ae->mp_nexthop_local, &peer->nexthop.v6_local, + IPV6_MAX_BYTELEN); + ae->mp_nexthop_len = 32; + } + } +#endif /* HAVE_IPV6 */ + + if (peer->default_rmap[afi][safi].name) + { + SET_FLAG (bgp->peer_self->rmap_type, PEER_RMAP_TYPE_DEFAULT); + for (rn = bgp_table_top (bgp->rib[afi][safi]); rn; + rn = bgp_route_next (rn)) + { + for (ri = rn->info; ri; ri = ri->next) + { + struct attr dummy_attr; + struct attr_extra dummy_extra; + struct bgp_info info; + + /* Provide dummy so the route-map can't modify the attributes */ + dummy_attr.extra = &dummy_extra; + bgp_attr_dup (&dummy_attr, ri->attr); + info.peer = ri->peer; + info.attr = &dummy_attr; + + ret = + route_map_apply (peer->default_rmap[afi][safi].map, &rn->p, + RMAP_BGP, &info); + + /* The route map might have set attributes. If we don't flush them + * here, they will be leaked. */ + bgp_attr_flush (&dummy_attr); + if (ret != RMAP_DENYMATCH) + break; + } + if (ret != RMAP_DENYMATCH) + break; + } + bgp->peer_self->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + withdraw = 1; + } + + if (withdraw) + { + if (CHECK_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE)) + subgroup_default_withdraw_packet (subgrp); + UNSET_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE); + } + else + { + if (!CHECK_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE)) + { + SET_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE); + subgroup_default_update_packet (subgrp, &attr, from); + } + } + + bgp_attr_extra_free (&attr); + aspath_unintern (&aspath); +} + +/* + * Announce the BGP table to a subgroup. + * + * At startup, we try to optimize route announcement by coalescing the + * peer-up events. This is done only the first time - from then on, + * subgrp->v_coalesce will be set to zero and the normal logic + * prevails. + */ +void +subgroup_announce_all (struct update_subgroup *subgrp) +{ + if (!subgrp) + return; + + /* + * If coalesce timer value is not set, announce routes immediately. + */ + if (!subgrp->v_coalesce) + { + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing all routes", + subgrp->update_group->id, subgrp->id); + subgroup_announce_route (subgrp); + return; + } + + /* + * We should wait for the coalesce timer. Arm the timer if not done. + */ + if (!subgrp->t_coalesce) + { + THREAD_TIMER_MSEC_ON (master, subgrp->t_coalesce, subgroup_coalesce_timer, + subgrp, subgrp->v_coalesce); + } +} + +/* + * Go through all update subgroups and set up the adv queue for the + * input route. + */ +void +group_announce_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_node *rn, struct bgp_info *ri) +{ + struct updwalk_context ctx; + ctx.ri = ri; + ctx.rn = rn; + update_group_af_walk (bgp, afi, safi, group_announce_route_walkcb, &ctx); +} + +void +update_group_show_adj_queue (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + updgrp_show_adj (bgp, afi, safi, vty, id, UPDWALK_FLAGS_ADVQUEUE); +} + +void +update_group_show_advertised (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + updgrp_show_adj (bgp, afi, safi, vty, id, UPDWALK_FLAGS_ADVERTISED); +} + +void +update_group_announce (struct bgp *bgp) +{ + update_group_walk (bgp, update_group_announce_walkcb, NULL); +} + +void +update_group_announce_rrclients (struct bgp *bgp) +{ + update_group_walk (bgp, update_group_announce_rrc_walkcb, NULL); +} diff --git a/bgpd/bgp_updgrp_packet.c b/bgpd/bgp_updgrp_packet.c new file mode 100644 index 0000000000..8ca4e79319 --- /dev/null +++ b/bgpd/bgp_updgrp_packet.c @@ -0,0 +1,1136 @@ +/** + * bgp_updgrp_packet.c: BGP update group packet handling routines + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" +#include "hash.h" +#include "queue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_nht.h" + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +/******************** + * PUBLIC FUNCTIONS + ********************/ +struct bpacket * +bpacket_alloc () +{ + struct bpacket *pkt; + + pkt = + (struct bpacket *) XCALLOC (MTYPE_BGP_PACKET, sizeof (struct bpacket)); + + return pkt; +} + +void +bpacket_free (struct bpacket *pkt) +{ + if (pkt->buffer) + stream_free (pkt->buffer); + pkt->buffer = NULL; + XFREE (MTYPE_BGP_PACKET, pkt); +} + +void +bpacket_queue_init (struct bpacket_queue *q) +{ + TAILQ_INIT (&(q->pkts)); +} + +/* + * bpacket_queue_sanity_check + */ +void +bpacket_queue_sanity_check (struct bpacket_queue __attribute__ ((__unused__)) *q) +{ +#if 0 + struct bpacket *pkt; + + pkt = bpacket_queue_last (q); + assert (pkt); + assert (!pkt->buffer); + + /* + * Make sure the count of packets is correct. + */ + int num_pkts = 0; + + pkt = bpacket_queue_first (q); + while (pkt) + { + num_pkts++; + + if (num_pkts > q->curr_count) + assert (0); + + pkt = TAILQ_NEXT (pkt, pkt_train); + } + + assert (num_pkts == q->curr_count); +#endif +} + +/* + * bpacket_queue_add_packet + * + * Internal function of bpacket_queue - and adds a + * packet entry to the end of the list. + * + * Users of bpacket_queue should use bpacket_queue_add instead. + */ +static void +bpacket_queue_add_packet (struct bpacket_queue *q, struct bpacket *pkt) +{ + struct bpacket *last_pkt; + + if (TAILQ_EMPTY (&(q->pkts))) + TAILQ_INSERT_TAIL (&(q->pkts), pkt, pkt_train); + else + { + last_pkt = bpacket_queue_last (q); + TAILQ_INSERT_AFTER (&(q->pkts), last_pkt, pkt, pkt_train); + } + q->curr_count++; + if (q->hwm_count < q->curr_count) + q->hwm_count = q->curr_count; +} + +/* + * Adds a packet to the bpacket_queue. + * + * The stream passed is consumed by this function. So, the caller should + * not free or use the stream after + * invoking this function. + */ +struct bpacket * +bpacket_queue_add (struct bpacket_queue *q, struct stream *s, + struct bpacket_attr_vec_arr *vecarrp) +{ + struct bpacket *pkt; + struct bpacket *last_pkt; + + + pkt = bpacket_alloc (); + if (TAILQ_EMPTY (&(q->pkts))) + { + pkt->ver = 1; + pkt->buffer = s; + if (vecarrp) + memcpy (&pkt->arr, vecarrp, sizeof (struct bpacket_attr_vec_arr)); + else + bpacket_attr_vec_arr_reset (&pkt->arr); + bpacket_queue_add_packet (q, pkt); + bpacket_queue_sanity_check (q); + return pkt; + } + + /* + * Fill in the new information into the current sentinel and create a + * new sentinel. + */ + bpacket_queue_sanity_check (q); + last_pkt = bpacket_queue_last (q); + assert (last_pkt->buffer == NULL); + last_pkt->buffer = s; + if (vecarrp) + memcpy (&last_pkt->arr, vecarrp, sizeof (struct bpacket_attr_vec_arr)); + else + bpacket_attr_vec_arr_reset (&last_pkt->arr); + + pkt->ver = last_pkt->ver; + pkt->ver++; + bpacket_queue_add_packet (q, pkt); + + bpacket_queue_sanity_check (q); + return last_pkt; +} + +struct bpacket * +bpacket_queue_first (struct bpacket_queue *q) +{ + return (TAILQ_FIRST (&(q->pkts))); +} + +struct bpacket * +bpacket_queue_last (struct bpacket_queue *q) +{ + return TAILQ_LAST (&(q->pkts), pkt_queue); +} + +struct bpacket * +bpacket_queue_remove (struct bpacket_queue *q) +{ + struct bpacket *first; + + first = bpacket_queue_first (q); + if (first) + { + TAILQ_REMOVE (&(q->pkts), first, pkt_train); + q->curr_count--; + } + return first; +} + +unsigned int +bpacket_queue_length (struct bpacket_queue *q) +{ + return q->curr_count - 1; +} + +unsigned int +bpacket_queue_hwm_length (struct bpacket_queue *q) +{ + return q->hwm_count - 1; +} + +int +bpacket_queue_is_full (struct bgp *bgp, struct bpacket_queue *q) +{ + if (q->curr_count >= bgp->default_subgroup_pkt_queue_max) + return 1; + return 0; +} + +void +bpacket_add_peer (struct bpacket *pkt, struct peer_af *paf) +{ + if (!pkt || !paf) + return; + + LIST_INSERT_HEAD (&(pkt->peers), paf, pkt_train); + paf->next_pkt_to_send = pkt; +} + +/* + * bpacket_queue_cleanup + */ +void +bpacket_queue_cleanup (struct bpacket_queue *q) +{ + struct bpacket *pkt; + + while ((pkt = bpacket_queue_remove (q))) + { + bpacket_free (pkt); + } +} + +/* + * bpacket_queue_compact + * + * Delete packets that do not need to be transmitted to any peer from + * the queue. + * + * @return the number of packets deleted. + */ +static int +bpacket_queue_compact (struct bpacket_queue *q) +{ + int num_deleted; + struct bpacket *pkt, *removed_pkt; + + num_deleted = 0; + + while (1) + { + pkt = bpacket_queue_first (q); + if (!pkt) + break; + + /* + * Don't delete the sentinel. + */ + if (!pkt->buffer) + break; + + if (!LIST_EMPTY (&(pkt->peers))) + break; + + removed_pkt = bpacket_queue_remove (q); + assert (pkt == removed_pkt); + bpacket_free (removed_pkt); + + num_deleted++; + } + + bpacket_queue_sanity_check (q); + return num_deleted; +} + +void +bpacket_queue_advance_peer (struct peer_af *paf) +{ + struct bpacket *pkt; + struct bpacket *old_pkt; + + old_pkt = paf->next_pkt_to_send; + if (old_pkt->buffer == NULL) + /* Already at end of list */ + return; + + LIST_REMOVE (paf, pkt_train); + pkt = TAILQ_NEXT (old_pkt, pkt_train); + bpacket_add_peer (pkt, paf); + + if (!bpacket_queue_compact (PAF_PKTQ (paf))) + return; + + /* + * Deleted one or more packets. Check if we can now merge this + * peer's subgroup into another subgroup. + */ + update_subgroup_check_merge (paf->subgroup, "advanced peer in queue"); +} + +/* + * bpacket_queue_remove_peer + * + * Remove the peer from the packet queue of the subgroup it belongs + * to. + */ +void +bpacket_queue_remove_peer (struct peer_af *paf) +{ + struct bpacket_queue *q; + + q = PAF_PKTQ (paf); + assert (q); + if (!q) + return; + + LIST_REMOVE (paf, pkt_train); + paf->next_pkt_to_send = NULL; + + bpacket_queue_compact (q); +} + +unsigned int +bpacket_queue_virtual_length (struct peer_af *paf) +{ + struct bpacket *pkt; + struct bpacket *last; + struct bpacket_queue *q; + + pkt = paf->next_pkt_to_send; + if (!pkt || (pkt->buffer == NULL)) + /* Already at end of list */ + return 0; + + q = PAF_PKTQ (paf); + if (TAILQ_EMPTY (&(q->pkts))) + return 0; + + last = TAILQ_LAST (&(q->pkts), pkt_queue); + if (last->ver >= pkt->ver) + return last->ver - pkt->ver; + + /* sequence # rolled over */ + return (UINT_MAX - pkt->ver + 1) + last->ver; +} + +/* + * Dump the bpacket queue + */ +void +bpacket_queue_show_vty (struct bpacket_queue *q, struct vty *vty) +{ + struct bpacket *pkt; + struct peer_af *paf; + + pkt = bpacket_queue_first (q); + while (pkt) + { + vty_out (vty, " Packet %p ver %u buffer %p%s", pkt, pkt->ver, + pkt->buffer, VTY_NEWLINE); + + LIST_FOREACH (paf, &(pkt->peers), pkt_train) + { + vty_out (vty, " - %s%s", paf->peer->host, VTY_NEWLINE); + } + pkt = bpacket_next (pkt); + } + return; +} + +struct stream * +bpacket_reformat_for_peer (struct bpacket *pkt, struct peer_af *paf) +{ + struct stream *s = NULL; + bpacket_attr_vec *vec; + + s = stream_dup (pkt->buffer); + + vec = &pkt->arr.entries[BGP_ATTR_VEC_NH]; + if (CHECK_FLAG (vec->flags, BPACKET_ATTRVEC_FLAGS_UPDATED)) + { + u_int8_t nhlen; + int route_map_sets_nh; + nhlen = stream_getc_from (s, vec->offset); + + route_map_sets_nh = CHECK_FLAG (vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED); + + if (paf->afi == AFI_IP) + { + struct in_addr v4nh; + + stream_get_from (&v4nh, s, vec->offset + 1, 4); + + /* If NH unavailable from attribute or the route-map has set it to + * be the peering address, use peer's NH. The "NH unavailable" case + * also covers next-hop-self and some other scenarios -- see + * subgroup_announce_check(). The only other case where we use the + * peer's NH is if it is an EBGP multiaccess scenario and there is + * no next-hop-unchanged setting. + */ + if (!v4nh.s_addr || + (route_map_sets_nh && + CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS))) + stream_put_in_addr_at (s, vec->offset + 1, &paf->peer->nexthop.v4); + else if (paf->peer->sort == BGP_PEER_EBGP && + !peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED)) + { + if (bgp_multiaccess_check_v4 (v4nh, paf->peer) == 0) + stream_put_in_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v4); + } + +#if 0 + if (!v4nh.s_addr) + nhtouse = paf->peer->nexthop.v4; + + /* + * If NH is available from attribute (which is after outbound + * policy application), always use it if it has been specified + * by the policy. Otherwise, the decision to make is whether + * we need to set ourselves as the next-hop or not. Here are + * the conditions for that (1 OR 2): + * + * (1) if the configuration says: 'next-hop-self' + * (2) if the peer is EBGP AND not a third-party-nexthop type + * + * There are some exceptions even if the above conditions apply. + * Those are: + * (a) if the configuration says: 'next-hop-unchanged'. Honor that + * always. Not set 'self' as next-hop. + * (b) if we are reflecting the routes (IBGP->IBGP) and the config + * is _not_ forcing next-hop-self. We should pass on the + * next-hop unchanged for reflected routes. + */ + if (route_map_sets_nh) + { + /* + * If address is specified, nothing to do; if specified as + * 'peer-address', compute the value to use. + * + * NOTE: If we are reflecting routes, the policy could have set + * this only if outbound policy has been allowed for route + * reflection -- handled in announce_check(). + */ + if (CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS)) + nhtouse = paf->peer->nexthop.v4; + } + else if (peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_SELF) + || (paf->peer->sort == BGP_PEER_EBGP && + (bgp_multiaccess_check_v4 (v4nh, paf->peer) == 0))) + { + if (!(peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED) + || (CHECK_FLAG(vec->flags, BPACKET_ATTRVEC_FLAGS_REFLECTED) && + !peer_af_flag_check(paf->peer, paf->afi, paf->safi, + PEER_FLAG_FORCE_NEXTHOP_SELF)))) + nhtouse = paf->peer->nexthop.v4; + } +#endif + + } + else if (paf->afi == AFI_IP6) + { + struct in6_addr v6nhglobal; + struct in6_addr v6nhlocal; + + /* + * The logic here is rather similar to that for IPv4, the + * additional work being to handle 1 or 2 nexthops. + */ + stream_get_from (&v6nhglobal, s, vec->offset + 1, 16); + if (IN6_IS_ADDR_UNSPECIFIED (&v6nhglobal) || + (route_map_sets_nh && + CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS))) + stream_put_in6_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v6_global); + else if (paf->peer->sort == BGP_PEER_EBGP && + !peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED)) + { + stream_put_in6_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v6_global); + } + + if (nhlen == 32) + { + stream_get_from (&v6nhlocal, s, vec->offset + 1 + 16, 16); + if (IN6_IS_ADDR_UNSPECIFIED (&v6nhlocal)) + stream_put_in6_addr_at (s, vec->offset + 1 + 16, + &paf->peer->nexthop.v6_local); + } + } + } + + bgp_packet_add (paf->peer, s); + return s; +} + +/* + * Update the vecarr offsets to go beyond 'pos' bytes, i.e. add 'pos' + * to each offset. + */ +static void +bpacket_attr_vec_arr_update (struct bpacket_attr_vec_arr *vecarr, size_t pos) +{ + int i; + + if (!vecarr) + return; + + for (i = 0; i < BGP_ATTR_VEC_MAX; i++) + vecarr->entries[i].offset += pos; +} + +/* + * Return if there are packets to build for this subgroup. + */ +int +subgroup_packets_to_build (struct update_subgroup *subgrp) +{ + struct bgp_advertise *adv; + + if (!subgrp) + return 0; + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->withdraw); + if (adv) + return 1; + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->update); + if (adv) + return 1; + + return 0; +} + +/* Make BGP update packet. */ +struct bpacket * +subgroup_update_packet (struct update_subgroup *subgrp) +{ + struct bpacket_attr_vec_arr vecarr; + struct bpacket *pkt; + struct peer *peer; + struct stream *s; + struct stream *snlri; + struct stream *packet; + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + struct bgp_node *rn = NULL; + struct bgp_info *binfo = NULL; + bgp_size_t total_attr_len = 0; + unsigned long attrlen_pos = 0; + size_t mpattrlen_pos = 0; + size_t mpattr_pos = 0; + afi_t afi; + safi_t safi; + int space_remaining = 0; + int space_needed = 0; + char send_attr_str[BUFSIZ]; + int send_attr_printed; + int num_pfx = 0; + + + if (!subgrp) + return NULL; + + if (bpacket_queue_is_full (SUBGRP_INST (subgrp), SUBGRP_PKTQ (subgrp))) + return NULL; + + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + s = subgrp->work; + stream_reset (s); + snlri = subgrp->scratch; + stream_reset (snlri); + + bpacket_attr_vec_arr_reset (&vecarr); + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->update); + while (adv) + { + assert (adv->rn); + rn = adv->rn; + adj = adv->adj; + if (adv->binfo) + binfo = adv->binfo; + + space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); + + /* When remaining space can't include NLRI and it's length. */ + if (space_remaining < space_needed) + break; + + /* If packet is empty, set attribute. */ + if (stream_empty (s)) + { + struct peer *from = NULL; + + if (binfo) + from = binfo->peer; + + /* 1: Write the BGP message header - 16 bytes marker, 2 bytes length, + * one byte message type. + */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* 2: withdrawn routes length */ + stream_putw (s, 0); + + /* 3: total attributes length - attrlen_pos stores the position */ + attrlen_pos = stream_get_endp (s); + stream_putw (s, 0); + + /* 4: if there is MP_REACH_NLRI attribute, that should be the first + * attribute, according to draft-ietf-idr-error-handling. Save the + * position. + */ + mpattr_pos = stream_get_endp (s); + + /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */ + total_attr_len = bgp_packet_attribute (NULL, peer, s, + adv->baa->attr, &vecarr, + NULL, afi, safi, + from, NULL, NULL); + + space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); + + /* If the attributes alone do not leave any room for NLRI then + * return */ + if (space_remaining < space_needed) + { + zlog_err ("u%llu:s%llu attributes too long, cannot send UPDATE", + subgrp->update_group->id, subgrp->id); + + /* Flush the FIFO update queue */ + while (adv) + adv = bgp_advertise_clean_subgroup (subgrp, adj); + return NULL; + } + + if (BGP_DEBUG (update, UPDATE_OUT) || + BGP_DEBUG (update, UPDATE_PREFIX)) + { + memset (send_attr_str, 0, BUFSIZ); + send_attr_printed = 0; + bgp_dump_attr (peer, adv->baa->attr, send_attr_str, BUFSIZ); + } + } + + if (afi == AFI_IP && safi == SAFI_UNICAST) + stream_put_prefix (s, &rn->p); + else + { + /* Encode the prefix in MP_REACH_NLRI attribute */ + struct prefix_rd *prd = NULL; + u_char *tag = NULL; + + if (rn->prn) + prd = (struct prefix_rd *) &rn->prn->p; + if (binfo && binfo->extra) + tag = binfo->extra->tag; + + if (stream_empty (snlri)) + mpattrlen_pos = bgp_packet_mpattr_start (snlri, afi, safi, + &vecarr, adv->baa->attr); + bgp_packet_mpattr_prefix (snlri, afi, safi, &rn->p, prd, tag); + } + + num_pfx++; + + if (bgp_debug_update(NULL, &rn->p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + if (!send_attr_printed) + { + zlog_debug ("u%llu:s%llu send UPDATE w/ attr: %s", + subgrp->update_group->id, subgrp->id, send_attr_str); + send_attr_printed = 1; + } + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d", + subgrp->update_group->id, subgrp->id, + inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, + INET6_BUFSIZ), rn->p.prefixlen); + } + + /* Synchnorize attribute. */ + if (adj->attr) + bgp_attr_unintern (&adj->attr); + else + subgrp->scount++; + + adj->attr = bgp_attr_intern (adv->baa->attr); + + adv = bgp_advertise_clean_subgroup (subgrp, adj); + } + + if (!stream_empty (s)) + { + if (!stream_empty (snlri)) + { + bgp_packet_mpattr_end (snlri, mpattrlen_pos); + total_attr_len += stream_get_endp (snlri); + } + + /* set the total attribute length correctly */ + stream_putw_at (s, attrlen_pos, total_attr_len); + + if (!stream_empty (snlri)) + { + packet = stream_dupcat (s, snlri, mpattr_pos); + bpacket_attr_vec_arr_update (&vecarr, mpattr_pos); + } + else + packet = stream_dup (s); + bgp_packet_set_size (packet); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu UPDATE len %d numpfx %d", + subgrp->update_group->id, subgrp->id, + (stream_get_endp(packet) - stream_get_getp(packet)), num_pfx); + pkt = bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, &vecarr); + stream_reset (s); + stream_reset (snlri); + return pkt; + } + return NULL; +} + +/* Make BGP withdraw packet. */ +/* For ipv4 unicast: + 16-octet marker | 2-octet length | 1-octet type | + 2-octet withdrawn route length | withdrawn prefixes | 2-octet attrlen (=0) +*/ +/* For other afi/safis: + 16-octet marker | 2-octet length | 1-octet type | + 2-octet withdrawn route length (=0) | 2-octet attrlen | + mp_unreach attr type | attr len | afi | safi | withdrawn prefixes +*/ +struct bpacket * +subgroup_withdraw_packet (struct update_subgroup *subgrp) +{ + struct bpacket *pkt; + struct stream *s; + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + struct peer *peer; + struct bgp_node *rn; + bgp_size_t unfeasible_len; + bgp_size_t total_attr_len; + size_t mp_start = 0; + size_t attrlen_pos = 0; + size_t mplen_pos = 0; + u_char first_time = 1; + afi_t afi; + safi_t safi; + int space_remaining = 0; + int space_needed = 0; + int num_pfx = 0; + + if (!subgrp) + return NULL; + + if (bpacket_queue_is_full (SUBGRP_INST (subgrp), SUBGRP_PKTQ (subgrp))) + return NULL; + + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + s = subgrp->work; + stream_reset (s); + + while ((adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->withdraw)) != NULL) + { + assert (adv->rn); + adj = adv->adj; + rn = adv->rn; + + space_remaining = STREAM_REMAIN (s) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = (BGP_NLRI_LENGTH + BGP_TOTAL_ATTR_LEN + + PSIZE (rn->p.prefixlen)); + + if (space_remaining < space_needed) + break; + + if (stream_empty (s)) + { + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + stream_putw (s, 0); /* unfeasible routes length */ + } + else + first_time = 0; + + if (afi == AFI_IP && safi == SAFI_UNICAST) + stream_put_prefix (s, &rn->p); + else + { + struct prefix_rd *prd = NULL; + + if (rn->prn) + prd = (struct prefix_rd *) &rn->prn->p; + + /* If first time, format the MP_UNREACH header */ + if (first_time) + { + attrlen_pos = stream_get_endp (s); + /* total attr length = 0 for now. reevaluate later */ + stream_putw (s, 0); + mp_start = stream_get_endp (s); + mplen_pos = bgp_packet_mpunreach_start (s, afi, safi); + } + + bgp_packet_mpunreach_prefix (s, &rn->p, afi, safi, prd, NULL); + } + + num_pfx++; + + if (bgp_debug_update(NULL, &rn->p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d -- unreachable", + subgrp->update_group->id, subgrp->id, + inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, + INET6_BUFSIZ), rn->p.prefixlen); + } + + subgrp->scount--; + + bgp_adj_out_remove_subgroup (rn, adj, subgrp); + bgp_unlock_node (rn); + } + + if (!stream_empty (s)) + { + if (afi == AFI_IP && safi == SAFI_UNICAST) + { + unfeasible_len + = stream_get_endp (s) - BGP_HEADER_SIZE - BGP_UNFEASIBLE_LEN; + stream_putw_at (s, BGP_HEADER_SIZE, unfeasible_len); + stream_putw (s, 0); + } + else + { + /* Set the mp_unreach attr's length */ + bgp_packet_mpunreach_end (s, mplen_pos); + + /* Set total path attribute length. */ + total_attr_len = stream_get_endp (s) - mp_start; + stream_putw_at (s, attrlen_pos, total_attr_len); + } + bgp_packet_set_size (s); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu UPDATE (withdraw) len %d numpfx %d", + subgrp->update_group->id, subgrp->id, + (stream_get_endp(s) - stream_get_getp(s)), num_pfx); + pkt = bpacket_queue_add (SUBGRP_PKTQ (subgrp), stream_dup (s), NULL); + stream_reset (s); + return pkt; + } + + return NULL; +} + +void +subgroup_default_update_packet (struct update_subgroup *subgrp, + struct attr *attr, struct peer *from) +{ + struct stream *s; + struct stream *packet; + struct peer *peer; + struct prefix p; + unsigned long pos; + bgp_size_t total_attr_len; + afi_t afi; + safi_t safi; + struct bpacket_attr_vec_arr vecarr; + + if (DISABLE_BGP_ANNOUNCE) + return; + + if (!subgrp) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + bpacket_attr_vec_arr_reset (&vecarr); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else + str2prefix ("::/0", &p); +#endif /* HAVE_IPV6 */ + + /* Logging the attribute. */ + if (bgp_debug_update(NULL, &p, subgrp->update_group, 0)) + { + char attrstr[BUFSIZ]; + char buf[INET6_BUFSIZ]; + attrstr[0] = '\0'; + + bgp_dump_attr (peer, attr, attrstr, BUFSIZ); + zlog_debug ("u%llu:s%llu send UPDATE %s/%d %s", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id, + inet_ntop (p.family, &(p.u.prefix), buf, INET6_BUFSIZ), + p.prefixlen, attrstr); + } + + s = stream_new (BGP_MAX_PACKET_SIZE); + + /* Make BGP update packet. */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length. */ + stream_putw (s, 0); + + /* Make place for total attribute length. */ + pos = stream_get_endp (s); + stream_putw (s, 0); + total_attr_len = bgp_packet_attribute (NULL, peer, s, attr, &vecarr, &p, + afi, safi, from, NULL, NULL); + + /* Set Total Path Attribute Length. */ + stream_putw_at (s, pos, total_attr_len); + + /* NLRI set. */ + if (p.family == AF_INET && safi == SAFI_UNICAST) + stream_put_prefix (s, &p); + + /* Set size. */ + bgp_packet_set_size (s); + + packet = stream_dup (s); + stream_free (s); + (void) bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, &vecarr); + subgroup_trigger_write(subgrp); +} + +void +subgroup_default_withdraw_packet (struct update_subgroup *subgrp) +{ + struct peer *peer; + struct stream *s; + struct stream *packet; + struct prefix p; + unsigned long attrlen_pos = 0; + unsigned long cp; + bgp_size_t unfeasible_len; + bgp_size_t total_attr_len; + size_t mp_start = 0; + size_t mplen_pos = 0; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else + str2prefix ("::/0", &p); +#endif /* HAVE_IPV6 */ + + total_attr_len = 0; + + if (bgp_debug_update(NULL, &p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d -- unreachable", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id, inet_ntop (p.family, + &(p.u. + prefix), + buf, + INET6_BUFSIZ), + p.prefixlen); + } + + s = stream_new (BGP_MAX_PACKET_SIZE); + + /* Make BGP update packet. */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length. */ ; + cp = stream_get_endp (s); + stream_putw (s, 0); + + /* Withdrawn Routes. */ + if (p.family == AF_INET && safi == SAFI_UNICAST) + { + stream_put_prefix (s, &p); + + unfeasible_len = stream_get_endp (s) - cp - 2; + + /* Set unfeasible len. */ + stream_putw_at (s, cp, unfeasible_len); + + /* Set total path attribute length. */ + stream_putw (s, 0); + } + else + { + attrlen_pos = stream_get_endp (s); + stream_putw (s, 0); + mp_start = stream_get_endp (s); + mplen_pos = bgp_packet_mpunreach_start (s, afi, safi); + bgp_packet_mpunreach_prefix (s, &p, afi, safi, NULL, NULL); + + /* Set the mp_unreach attr's length */ + bgp_packet_mpunreach_end (s, mplen_pos); + + /* Set total path attribute length. */ + total_attr_len = stream_get_endp (s) - mp_start; + stream_putw_at (s, attrlen_pos, total_attr_len); + } + + bgp_packet_set_size (s); + + packet = stream_dup (s); + stream_free (s); + + (void) bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, NULL); + subgroup_trigger_write(subgrp); +} + +static void +bpacket_vec_arr_inherit_attr_flags (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, + struct attr *attr) +{ + if (CHECK_FLAG (attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED); + + if (CHECK_FLAG (attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS); + + if (CHECK_FLAG (attr->rmap_change_flags, BATTR_REFLECTED)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_REFLECTED); +} + +/* Reset the Attributes vector array. The vector array is used to override + * certain output parameters in the packet for a particular peer + */ +void +bpacket_attr_vec_arr_reset (struct bpacket_attr_vec_arr *vecarr) +{ + int i; + + if (!vecarr) + return; + + i = 0; + while (i < BGP_ATTR_VEC_MAX) + { + vecarr->entries[i].flags = 0; + vecarr->entries[i].offset = 0; + i++; + } +} + +/* Setup a particular node entry in the vecarr */ +void +bpacket_attr_vec_arr_set_vec (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, struct stream *s, + struct attr *attr) +{ + if (!vecarr) + return; + assert (type < BGP_ATTR_VEC_MAX); + + SET_FLAG (vecarr->entries[type].flags, BPACKET_ATTRVEC_FLAGS_UPDATED); + vecarr->entries[type].offset = stream_get_endp (s); + if (attr) + bpacket_vec_arr_inherit_attr_flags(vecarr, type, attr); +} diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index eb3c01e9f5..5cfbaebeb3 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -30,6 +30,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "hash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_advertise.h" @@ -50,6 +51,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_packet.h" +#include "bgpd/bgp_updgrp.h" extern struct in_addr router_id_zebra; @@ -1027,6 +1029,51 @@ DEFUN (no_bgp_wpkt_quanta, return bgp_wpkt_quanta_config_vty(vty, argv[0], 0); } +int +bgp_coalesce_config_vty (struct vty *vty, const char *num, char set) +{ + struct bgp *bgp; + + bgp = vty->index; + + if (set) + VTY_GET_INTEGER_RANGE ("coalesce-time", bgp->coalesce_time, num, + 0, 4294967295); + else + bgp->coalesce_time = BGP_DEFAULT_SUBGROUP_COALESCE_TIME; + + return CMD_SUCCESS; +} + +int +bgp_config_write_coalesce_time (struct vty *vty, struct bgp *bgp) +{ + if (bgp->coalesce_time != BGP_DEFAULT_SUBGROUP_COALESCE_TIME) + vty_out (vty, " coalesce-time %d%s", + bgp->coalesce_time, VTY_NEWLINE); + + return 0; +} + + +DEFUN (bgp_coalesce_time, + bgp_coalesce_time_cmd, + "coalesce-time <0-4294967295>", + "Subgroup coalesce timer\n" + "Subgroup coalesce timer value (in ms)\n") +{ + return bgp_coalesce_config_vty(vty, argv[0], 1); +} + +DEFUN (no_bgp_coalesce_time, + no_bgp_coalesce_time_cmd, + "no coalesce-time <0-4294967295>", + "Subgroup coalesce timer\n" + "Subgroup coalesce timer value (in ms)\n") +{ + return bgp_coalesce_config_vty(vty, argv[0], 0); +} + /* Maximum-paths configuration */ DEFUN (bgp_maxpaths, bgp_maxpaths_cmd, @@ -1757,37 +1804,39 @@ ALIAS (no_bgp_default_local_preference, "local preference (higher=more preferred)\n" "Configure default local preference value\n") -static void -peer_announce_routes_if_rmap_out (struct bgp *bgp) +DEFUN (bgp_default_subgroup_pkt_queue_max, + bgp_default_subgroup_pkt_queue_max_cmd, + "bgp default subgroup-pkt-queue-max <20-100>", + "BGP specific commands\n" + "Configure BGP defaults\n" + "subgroup-pkt-queue-max\n" + "Configure subgroup packet queue max\n") { - struct peer *peer; - struct listnode *node, *nnode; - struct bgp_filter *filter; - afi_t afi; - safi_t safi; + struct bgp *bgp; + u_int32_t max_size; - /* Reannounce all routes to appropriate neighbors */ - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - { - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) - { - /* check if there's an out route-map on this client */ - filter = &peer->filter[afi][safi]; - if (ROUTE_MAP_OUT_NAME(filter)) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s: Announcing routes again for peer %s" - "(afi=%d, safi=%d", __func__, peer->host, afi, - safi); + bgp = vty->index; - bgp_announce_route_all(peer); - } - } - } - } + VTY_GET_INTEGER ("subgroup packet queue max", max_size, argv[0]); + + bgp_default_subgroup_pkt_queue_max_set (bgp, max_size); + + return CMD_SUCCESS; +} + +DEFUN (no_bgp_default_subgroup_pkt_queue_max, + no_bgp_default_subgroup_pkt_queue_max_cmd, + "no bgp default subgroup-pkt-queue-max", + NO_STR + "BGP specific commands\n" + "Configure BGP defaults\n" + "subgroup-pkt-queue-max\n") +{ + struct bgp *bgp; + + bgp = vty->index; + bgp_default_subgroup_pkt_queue_max_unset (bgp); + return CMD_SUCCESS; } DEFUN (bgp_rr_allow_outbound_policy, @@ -1806,7 +1855,7 @@ DEFUN (bgp_rr_allow_outbound_policy, if (!bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) { bgp_flag_set(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY); - peer_announce_routes_if_rmap_out(bgp); + update_group_announce_rrclients(bgp); } return CMD_SUCCESS; @@ -1828,7 +1877,7 @@ DEFUN (no_bgp_rr_allow_outbound_policy, if (bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) { bgp_flag_unset(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY); - peer_announce_routes_if_rmap_out(bgp); + update_group_announce_rrclients(bgp); } return CMD_SUCCESS; @@ -2197,6 +2246,43 @@ ALIAS (no_neighbor_local_as, "Do not prepend local-as to updates from ebgp peers\n" "Do not prepend local-as to updates from ibgp peers\n") +DEFUN (neighbor_solo, + neighbor_solo_cmd, + NEIGHBOR_CMD2 "solo", + NEIGHBOR_STR + NEIGHBOR_ADDR_STR2 + "Solo peer - part of its own update group\n") +{ + struct peer *peer; + int ret; + + peer = peer_and_group_lookup_vty (vty, argv[0]); + if (! peer) + return CMD_WARNING; + + ret = update_group_adjust_soloness(peer, 1); + return bgp_vty_return (vty, ret); +} + +DEFUN (no_neighbor_solo, + no_neighbor_solo_cmd, + NO_NEIGHBOR_CMD2 "solo", + NO_STR + NEIGHBOR_STR + NEIGHBOR_ADDR_STR2 + "Solo peer - part of its own update group\n") +{ + struct peer *peer; + int ret; + + peer = peer_and_group_lookup_vty (vty, argv[0]); + if (! peer) + return CMD_WARNING; + + ret = update_group_adjust_soloness(peer, 0); + return bgp_vty_return (vty, ret); +} + DEFUN (neighbor_password, neighbor_password_cmd, NEIGHBOR_CMD2 "password LINE", @@ -7684,6 +7770,12 @@ DEFUN (show_bgp_memory, mtype_memstr (memstrbuf, sizeof (memstrbuf), count * sizeof (struct bgp_static)), VTY_NEWLINE); + + if ((count = mtype_stats_alloc (MTYPE_BGP_PACKET))) + vty_out (vty, "%ld Packets, using %s of memory%s", count, + mtype_memstr (memstrbuf, sizeof (memstrbuf), + count * sizeof (struct bpacket)), + VTY_NEWLINE); /* Adj-In/Out */ if ((count = mtype_stats_alloc (MTYPE_BGP_ADJ_IN))) @@ -7876,6 +7968,9 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del if (bgp->v_maxmed_admin) vty_out (vty, "Max-med administrative active%s", VTY_NEWLINE); + vty_out(vty, "BGP table version %llu%s", + bgp_table_version(bgp->rib[afi][safi]), VTY_NEWLINE); + ents = bgp_table_count (bgp->rib[afi][safi]); vty_out (vty, "RIB entries %ld, using %s of memory%s", ents, mtype_memstr (memstrbuf, sizeof (memstrbuf), @@ -7927,7 +8022,7 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del vty_out(vty, "%c", *delimit); if (!delimit) - vty_out (vty, "%5u %7d %7d %8d %4d %4u ", + vty_out (vty, "%5u %7d %7d %8lu %4d %4u ", peer->as, peer->open_in + peer->update_in + peer->keepalive_in + peer->notify_in + peer->refresh_in @@ -7935,12 +8030,11 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del peer->open_out + peer->update_out + peer->keepalive_out + peer->notify_out + peer->refresh_out + peer->dynamic_cap_out, + peer->version[afi][safi], 0, - 0, - peer->sync[afi][safi]->update.count + - peer->sync[afi][safi]->withdraw.count); + (unsigned long) peer->obuf->count); else - vty_out (vty, "%5u %c %7d %c %7d %c %8d %c %4d %c %4u %c", + vty_out (vty, "%5u %c %7d %c %7d %c %8lu %c %4d %c %4u %c", peer->as, *delimit, peer->open_in + peer->update_in + peer->keepalive_in + peer->notify_in + peer->refresh_in @@ -7948,10 +8042,9 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del peer->open_out + peer->update_out + peer->keepalive_out + peer->notify_out + peer->refresh_out + peer->dynamic_cap_out, *delimit, + peer->version[afi][safi], *delimit, 0, *delimit, - 0, *delimit, - peer->sync[afi][safi]->update.count + - peer->sync[afi][safi]->withdraw.count, *delimit); + (unsigned long) peer->obuf->count, *delimit); vty_out (vty, "%8s", peer_uptime (peer->uptime, timebuf, BGP_UPTIME_LEN)); @@ -8320,6 +8413,7 @@ static void bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) { struct bgp_filter *filter; + struct peer_af *paf; char orf_pfx_name[BUFSIZ]; int orf_pfx_count; @@ -8331,6 +8425,17 @@ bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) if (p->af_group[afi][safi]) vty_out (vty, " %s peer-group member%s", p->group->name, VTY_NEWLINE); + paf = peer_af_find(p, afi, safi); + if (paf && PAF_SUBGRP(paf)) + { + vty_out (vty, " Update group %llu, subgroup %llu%s", + PAF_UPDGRP(paf)->id, PAF_SUBGRP(paf)->id, VTY_NEWLINE); + vty_out (vty, " Packet Queue length %d%s", + bpacket_queue_virtual_length(paf), VTY_NEWLINE); + } else + { + vty_out(vty, " Not part of any update group%s", VTY_NEWLINE); + } if (CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_ADV) || CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) || CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV) @@ -8425,7 +8530,8 @@ bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) vty_out (vty, " default route-map %s%s,", p->default_rmap[afi][safi].map ? "*" : "", p->default_rmap[afi][safi].name); - if (CHECK_FLAG (p->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) + if (paf && PAF_SUBGRP(paf) && CHECK_FLAG(PAF_SUBGRP(paf)->sflags, + SUBGRP_STATUS_DEFAULT_ORIGINATE)) vty_out (vty, " default sent%s", VTY_NEWLINE); else vty_out (vty, " default not sent%s", VTY_NEWLINE); @@ -8962,7 +9068,7 @@ bgp_show_peer (struct vty *vty, struct peer *p) vty_out (vty, "Next connect timer due in %ld seconds%s", thread_timer_remain_second (p->t_connect), VTY_NEWLINE); if (p->t_routeadv) - vty_out (vty, "MRAI (interval %ld) timer expires in %ld seconds%s", + vty_out (vty, "MRAI (interval %u) timer expires in %ld seconds%s", p->v_routeadv, thread_timer_remain_second (p->t_routeadv), VTY_NEWLINE); @@ -9664,6 +9770,205 @@ ALIAS (show_bgp_instance_ipv6_safi_rsclient_summary, #endif /* HAVE IPV6 */ +DEFUN (show_ip_bgp_updgrps, + show_ip_bgp_updgrps_cmd, + "show ip bgp update-groups summary", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, AFI_IP, SAFI_UNICAST, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_ipv6_updgrps, + show_bgp_ipv6_updgrps_cmd, + "show bgp update-groups summary", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, AFI_IP6, SAFI_UNICAST, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps, + show_bgp_updgrps_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups summary", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, afi, safi, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_stats, + show_bgp_updgrps_stats_cmd, + "show bgp update-groups statistics", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Statistics\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show_stats(bgp, vty); + + return CMD_SUCCESS; +} + +static void +show_bgp_updgrps_adj_info_aux (struct vty *vty, afi_t afi, safi_t safi, + const char *what, u_int64_t subgrp_id) +{ + struct bgp *bgp; + bgp = bgp_get_default(); + if (bgp) + { + if (!strcmp(what, "advertise-queue")) + update_group_show_adj_queue(bgp, afi, safi, vty, subgrp_id); + else if (!strcmp(what, "advertised-routes")) + update_group_show_advertised(bgp, afi, safi, vty, subgrp_id); + else if (!strcmp(what, "packet-queue")) + update_group_show_packet_queue(bgp, afi, safi, vty, subgrp_id); + } +} + +DEFUN (show_ip_bgp_updgrps_adj, + show_ip_bgp_updgrps_adj_cmd, + "show ip bgp update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP, SAFI_UNICAST, argv[0], 0); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_afi_adj, + show_bgp_updgrps_afi_adj_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + show_bgp_updgrps_adj_info_aux(vty, afi, safi, argv[2], 0); +} + +DEFUN (show_bgp_updgrps_adj, + show_bgp_updgrps_adj_cmd, + "show bgp update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP6, SAFI_UNICAST, argv[0], 0); + return CMD_SUCCESS; +} + +DEFUN (show_ip_bgp_updgrps_adj_s, + show_ip_bgp_updgrps_adj_subgroup_cmd, + "show ip bgp update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP, SAFI_UNICAST, argv[1], + atoll(argv[0])); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_adj_s, + show_bgp_updgrps_adj_subgroup_cmd, + "show bgp update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP6, SAFI_UNICAST, argv[1], + atoll(argv[0])); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_afi_adj_subgroup, + show_bgp_updgrps_afi_adj_subgroup_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + show_bgp_updgrps_adj_info_aux(vty, afi, safi, argv[3], atoll(argv[2])); +} + + /* Redistribute VTY commands. */ DEFUN (bgp_redistribute_ipv4, @@ -10393,6 +10698,9 @@ bgp_vty_init (void) install_element (BGP_NODE, &bgp_wpkt_quanta_cmd); install_element (BGP_NODE, &no_bgp_wpkt_quanta_cmd); + install_element (BGP_NODE, &bgp_coalesce_time_cmd); + install_element (BGP_NODE, &no_bgp_coalesce_time_cmd); + /* "maximum-paths" commands. */ install_element (BGP_NODE, &bgp_maxpaths_cmd); install_element (BGP_NODE, &no_bgp_maxpaths_cmd); @@ -10496,6 +10804,10 @@ bgp_vty_init (void) install_element (BGP_NODE, &no_bgp_default_local_preference_cmd); install_element (BGP_NODE, &no_bgp_default_local_preference_val_cmd); + /* "bgp default subgroup-pkt-queue-max" commands. */ + install_element (BGP_NODE, &bgp_default_subgroup_pkt_queue_max_cmd); + install_element (BGP_NODE, &no_bgp_default_subgroup_pkt_queue_max_cmd); + /* bgp ibgp-allow-policy-mods command */ install_element (BGP_NODE, &bgp_rr_allow_outbound_policy_cmd); install_element (BGP_NODE, &no_bgp_rr_allow_outbound_policy_cmd); @@ -10521,6 +10833,10 @@ bgp_vty_init (void) install_element (BGP_NODE, &no_neighbor_local_as_val2_cmd); install_element (BGP_NODE, &no_neighbor_local_as_val3_cmd); + /* "neighbor solo" commands. */ + install_element (BGP_NODE, &neighbor_solo_cmd); + install_element (BGP_NODE, &no_neighbor_solo_cmd); + /* "neighbor password" commands. */ install_element (BGP_NODE, &neighbor_password_cmd); install_element (BGP_NODE, &no_neighbor_password_cmd); @@ -11353,6 +11669,15 @@ bgp_vty_init (void) /* "show ip bgp summary" commands. */ install_element (VIEW_NODE, &show_ip_bgp_summary_cmd); install_element (VIEW_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_cmd); + install_element (VIEW_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_adj_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (VIEW_NODE, &show_ip_bgp_instance_summary_cmd); install_element (VIEW_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (VIEW_NODE, &show_bgp_ipv4_safi_summary_cmd); @@ -11371,6 +11696,15 @@ bgp_vty_init (void) #endif /* HAVE_IPV6 */ install_element (RESTRICTED_NODE, &show_ip_bgp_summary_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_adj_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_instance_summary_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (RESTRICTED_NODE, &show_bgp_ipv4_safi_summary_cmd); @@ -11389,6 +11723,16 @@ bgp_vty_init (void) #endif /* HAVE_IPV6 */ install_element (ENABLE_NODE, &show_ip_bgp_summary_cmd); install_element (ENABLE_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_stats_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_adj_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (ENABLE_NODE, &show_ip_bgp_instance_summary_cmd); install_element (ENABLE_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (ENABLE_NODE, &show_bgp_ipv4_safi_summary_cmd); diff --git a/bgpd/bgp_vty.h b/bgpd/bgp_vty.h index 9caf0baace..1357e3c25a 100644 --- a/bgpd/bgp_vty.h +++ b/bgpd/bgp_vty.h @@ -27,5 +27,6 @@ extern void bgp_vty_init (void); extern const char *afi_safi_print (afi_t, safi_t); extern int bgp_config_write_update_delay (struct vty *, struct bgp *); extern int bgp_config_write_wpkt_quanta(struct vty *vty, struct bgp *bgp); +extern int bgp_config_write_coalesce_time(struct vty *vty, struct bgp *bgp); #endif /* _QUAGGA_BGP_VTY_H */ diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 78735caded..4c3f3dbb21 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA. */ #include "zclient.h" #include "routemap.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_route.h" diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 8ec27e338d..17bf8cf5d6 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -35,6 +35,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "linklist.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -62,6 +63,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_SNMP #include "bgpd/bgp_snmp.h" #endif /* HAVE_SNMP */ +#include "bgpd/bgp_updgrp.h" + /* BGP process wide configuration. */ static struct bgp_master bgp_master; @@ -547,6 +550,104 @@ bgp_default_local_preference_unset (struct bgp *bgp) return 0; } +/* Local preference configuration. */ +int +bgp_default_subgroup_pkt_queue_max_set (struct bgp *bgp, u_int32_t queue_size) +{ + if (! bgp) + return -1; + + bgp->default_subgroup_pkt_queue_max = queue_size; + + return 0; +} + +int +bgp_default_subgroup_pkt_queue_max_unset (struct bgp *bgp) +{ + if (! bgp) + return -1; + bgp->default_subgroup_pkt_queue_max = BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX; + + return 0; +} + +struct peer_af * +peer_af_create (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *af; + int afid; + + if (!peer) + return NULL; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return NULL; + + assert(peer->peer_af_array[afid] == NULL); + + /* Allocate new peer af */ + af = XCALLOC (MTYPE_BGP_PEER_AF, sizeof (struct peer_af)); + peer->peer_af_array[afid] = af; + af->afi = afi; + af->safi = safi; + af->afid = afid; + af->peer = peer; + + //update_group_adjust_peer(af); + return af; +} + +struct peer_af * +peer_af_find (struct peer *peer, afi_t afi, safi_t safi) +{ + int afid; + + if (!peer) + return NULL; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return NULL; + + return peer->peer_af_array[afid]; +} + +int +peer_af_delete (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *af; + int afid; + + if (!peer) + return -1; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return -1; + + af = peer->peer_af_array[afid]; + if (!af) + return -1; + + bgp_stop_announce_route_timer (af); + + if (PAF_SUBGRP(af)) + { + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu remove peer %s", + af->subgroup->update_group->id, af->subgroup->id, peer->host); + } + + update_subgroup_remove_peer (af->subgroup, af); + + peer->peer_af_array[afid] = NULL; + XFREE(MTYPE_BGP_PEER_AF, af); + return 0; +} + + /* If peer is RSERVER_CLIENT in at least one address family and is not member of a peer_group for that family, return 1. Used to check wether the peer is included in list bgp->rsclient. */ @@ -906,8 +1007,10 @@ peer_new (struct bgp *bgp) void peer_xfer_config (struct peer *peer_dst, struct peer *peer_src) { + struct peer_af *paf; afi_t afi; safi_t safi; + int afindex; assert(peer_src); assert(peer_dst); @@ -953,6 +1056,9 @@ peer_xfer_config (struct peer *peer_dst, struct peer *peer_src) peer_dst->allowas_in[afi][safi] = peer_src->allowas_in[afi][safi]; } + PEERAF_FOREACH(peer_src, paf, afindex) + peer_af_create(peer_dst, paf->afi, paf->safi); + /* update-source apply */ if (peer_src->update_source) { @@ -1057,9 +1163,6 @@ peer_create (union sockunion *su, const char *conf_if, struct bgp *bgp, active = peer_active (peer); - if (afi && safi) - peer->afc[afi][safi] = 1; - /* Last read and reset time set */ peer->readtime = peer->resettime = bgp_clock (); @@ -1068,6 +1171,15 @@ peer_create (union sockunion *su, const char *conf_if, struct bgp *bgp, SET_FLAG (peer->flags, PEER_FLAG_CONFIG_NODE); + if (afi && safi) + { + peer->afc[afi][safi] = 1; + if (peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } + } + /* Set up peer's events and timers. */ if (! active && peer_active (peer)) bgp_timer_set (peer); @@ -1273,6 +1385,11 @@ peer_activate (struct peer *peer, afi_t afi, safi_t safi) peer->afc[afi][safi] = 1; + if (peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } + if (! active && peer_active (peer)) bgp_timer_set (peer); else @@ -1332,6 +1449,10 @@ peer_deactivate (struct peer *peer, afi_t afi, safi_t safi) /* De-activate the address family configuration. */ peer->afc[afi][safi] = 0; peer_af_flag_reset (peer, afi, safi); + if (peer_af_delete(peer, afi, safi) != 0) + { + zlog_err("couldn't delete af structure for peer %s", peer->host); + } if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { @@ -1551,6 +1672,9 @@ peer_delete (struct peer *peer) filter->usmap.name = NULL; peer->default_rmap[afi][safi].name = NULL; } + + FOREACH_AFI_SAFI (afi, safi) + peer_af_delete (peer, afi, safi); peer_unlock (peer); /* initial reference */ @@ -1563,19 +1687,6 @@ peer_group_cmp (struct peer_group *g1, struct peer_group *g2) return strcmp (g1->name, g2->name); } -/* If peer is configured at least one address family return 1. */ -static int -peer_group_active (struct peer *peer) -{ - if (peer->af_group[AFI_IP][SAFI_UNICAST] - || peer->af_group[AFI_IP][SAFI_MULTICAST] - || peer->af_group[AFI_IP][SAFI_MPLS_VPN] - || peer->af_group[AFI_IP6][SAFI_UNICAST] - || peer->af_group[AFI_IP6][SAFI_MULTICAST]) - return 1; - return 0; -} - /* Peer group cofiguration. */ static struct peer_group * peer_group_new (void) @@ -2052,6 +2163,11 @@ peer_group_bind (struct bgp *bgp, union sockunion *su, struct peer *peer, peer->af_group[afi][safi] = 1; peer->afc[afi][safi] = 1; + if (!peer_af_find(peer, afi, safi) && + peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } if (! peer->group) { peer->group = group; @@ -2151,6 +2267,10 @@ peer_group_unbind (struct bgp *bgp, struct peer *peer, peer->af_group[afi][safi] = 0; peer->afc[afi][safi] = 0; peer_af_flag_reset (peer, afi, safi); + if (peer_af_delete(peer, afi, safi) != 0) + { + zlog_err("couldn't delete af structure for peer %s", peer->host); + } if (peer->rib[afi][safi]) peer->rib[afi][safi] = NULL; @@ -2240,6 +2360,7 @@ bgp_create (as_t *as, const char *name) bgp->v_update_delay = BGP_UPDATE_DELAY_DEF; bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + bgp->default_subgroup_pkt_queue_max = BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX; bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; bgp->restart_time = BGP_DEFAULT_RESTART_TIME; @@ -2251,12 +2372,12 @@ bgp_create (as_t *as, const char *name) bgp->name = strdup (name); bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX; - bgp->adv_quanta = BGP_ADV_FIFO_QUANTA; - bgp->wd_quanta = BGP_WD_FIFO_QUANTA; + bgp->coalesce_time = BGP_DEFAULT_SUBGROUP_COALESCE_TIME; THREAD_TIMER_ON (master, bgp->t_startup, bgp_startup_timer_expire, bgp, bgp->restart_time); + update_group_init(bgp); return bgp; } @@ -2614,7 +2735,10 @@ peer_change_action (struct peer *peer, afi_t afi, safi_t safi, } } else if (type == peer_change_reset_out) - bgp_announce_route (peer, afi, safi); + { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); + bgp_announce_route (peer, afi, safi); + } } struct peer_flag_action @@ -3386,8 +3510,11 @@ peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 0); + bgp_announce_route (peer, afi, safi); + } return 0; } @@ -3405,8 +3532,11 @@ peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); } - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 0); + bgp_announce_route (peer, afi, safi); + } } return 0; } @@ -3437,8 +3567,11 @@ peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 1); + bgp_announce_route (peer, afi, safi); + } return 0; } @@ -3453,8 +3586,11 @@ peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) peer->default_rmap[afi][safi].name = NULL; peer->default_rmap[afi][safi].map = NULL; - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 1); + bgp_announce_route (peer, afi, safi); + } } return 0; } @@ -3670,8 +3806,12 @@ peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) peer->routeadv = routeadv; peer->v_routeadv = routeadv; - if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); return 0; + } /* peer-group member updates. */ group = peer->group; @@ -3680,6 +3820,9 @@ peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) SET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); peer->routeadv = routeadv; peer->v_routeadv = routeadv; + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); } return 0; @@ -3702,8 +3845,12 @@ peer_advertise_interval_unset (struct peer *peer) else peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; - if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); return 0; + } /* peer-group member updates. */ group = peer->group; @@ -3716,6 +3863,10 @@ peer_advertise_interval_unset (struct peer *peer) peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; else peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); } return 0; @@ -4035,6 +4186,40 @@ peer_password_unset (struct peer *peer) return 0; } +/* + * peer_on_policy_change + * + * Helper function that is called after the name of the policy + * being used by a peer_af has changed. + */ +static void +peer_on_policy_change (struct peer *peer, afi_t afi, safi_t safi) +{ + update_group_adjust_peer (peer_af_find (peer, afi, safi)); +} + +/* Set route-map to the peer. */ +static void +peer_reprocess_routes (struct peer *peer, int direct, + afi_t afi, safi_t safi) +{ + if (peer->status != Established) + return; + + if (direct != RMAP_OUT) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_SOFT_RECONFIG)) + bgp_soft_reconfig_in (peer, afi, safi); + else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + } + else + bgp_announce_route(peer, afi, safi); +} + + /* Set distribute list to the peer. */ int peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, @@ -4064,7 +4249,11 @@ peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->dlist[direct].alist = access_list_lookup (afi, name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4078,6 +4267,9 @@ peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->dlist[direct].name); filter->dlist[direct].name = strdup (name); filter->dlist[direct].alist = access_list_lookup (afi, name); + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4113,6 +4305,8 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->dlist[direct].name); filter->dlist[direct].name = strdup (gfilter->dlist[direct].name); filter->dlist[direct].alist = gfilter->dlist[direct].alist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4123,7 +4317,11 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) filter->dlist[direct].alist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4137,6 +4335,8 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->dlist[direct].name); filter->dlist[direct].name = NULL; filter->dlist[direct].alist = NULL; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4158,6 +4358,9 @@ peer_distribute_update (struct access_list *access) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + if (access->name) + update_group_policy_update(bgp, BGP_POLICY_FILTER_LIST, access->name, + 0, 0); for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4224,7 +4427,11 @@ peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->plist[direct].plist = prefix_list_lookup (afi, name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4238,6 +4445,9 @@ peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->plist[direct].name); filter->plist[direct].name = strdup (name); filter->plist[direct].plist = prefix_list_lookup (afi, name); + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; } @@ -4272,6 +4482,8 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->plist[direct].name); filter->plist[direct].name = strdup (gfilter->plist[direct].name); filter->plist[direct].plist = gfilter->plist[direct].plist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4282,7 +4494,11 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) filter->plist[direct].plist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4296,6 +4512,9 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->plist[direct].name); filter->plist[direct].name = NULL; filter->plist[direct].plist = NULL; + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4317,6 +4536,13 @@ peer_prefix_list_update (struct prefix_list *plist) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + + /* + * Update the prefix-list on update groups. + */ + update_group_policy_update(bgp, BGP_POLICY_PREFIX_LIST, + plist ? plist->name : NULL, 0, 0); + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4379,7 +4605,11 @@ peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->aslist[direct].aslist = as_list_lookup (name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4393,6 +4623,8 @@ peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->aslist[direct].name); filter->aslist[direct].name = strdup (name); filter->aslist[direct].aslist = as_list_lookup (name); + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; } @@ -4427,6 +4659,8 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) free (filter->aslist[direct].name); filter->aslist[direct].name = strdup (gfilter->aslist[direct].name); filter->aslist[direct].aslist = gfilter->aslist[direct].aslist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4437,7 +4671,11 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) filter->aslist[direct].aslist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4451,6 +4689,9 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) free (filter->aslist[direct].name); filter->aslist[direct].name = NULL; filter->aslist[direct].aslist = NULL; + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4471,6 +4712,9 @@ peer_aslist_update (char *aslist_name) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + update_group_policy_update(bgp, BGP_POLICY_FILTER_LIST, aslist_name, + 0, 0); + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4507,6 +4751,7 @@ peer_aslist_update (char *aslist_name) } } } + static void peer_aslist_add (char *aslist_name) { @@ -4522,27 +4767,6 @@ peer_aslist_del (char *aslist_name) } -/* Set route-map to the peer. */ -static void -peer_reprocess_routes (struct peer *peer, int direct, - afi_t afi, safi_t safi) -{ - if (peer->status != Established) - return; - - if (direct != RMAP_OUT) - { - if (CHECK_FLAG (peer->af_flags[afi][safi], - PEER_FLAG_SOFT_RECONFIG)) - bgp_soft_reconfig_in (peer, afi, safi); - else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) - || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) - bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); - } - else - bgp_announce_route(peer, afi, safi); -} - int peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, const char *name) @@ -4572,6 +4796,8 @@ peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); return 0; } @@ -4588,6 +4814,8 @@ peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->map[direct].name); filter->map[direct].name = strdup (name); filter->map[direct].map = route_map_lookup_by_name (name); + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes (peer, direct, afi, safi); } return 0; @@ -4626,6 +4854,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->map[direct].name); filter->map[direct].name = strdup (gfilter->map[direct].name); filter->map[direct].map = gfilter->map[direct].map; + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4637,6 +4867,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); return 0; } @@ -4653,6 +4885,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->map[direct].name); filter->map[direct].name = NULL; filter->map[direct].map = NULL; + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); } return 0; @@ -4683,6 +4917,7 @@ peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + peer_on_policy_change (peer, afi, safi); bgp_announce_route (peer, afi, safi); return 0; } @@ -4699,6 +4934,7 @@ peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, free (filter->usmap.name); filter->usmap.name = strdup (name); filter->usmap.map = route_map_lookup_by_name (name); + peer_on_policy_change (peer, afi, safi); bgp_announce_route (peer, afi, safi); } return 0; @@ -4727,6 +4963,7 @@ peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + peer_on_policy_change (peer, afi, safi); bgp_announce_route(peer, afi, safi); return 0; } @@ -4743,6 +4980,7 @@ peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) free (filter->usmap.name); filter->usmap.name = NULL; filter->usmap.map = NULL; + peer_on_policy_change (peer, afi, safi); bgp_announce_route(peer, afi, safi); } return 0; @@ -5301,6 +5539,11 @@ bgp_config_write_peer (struct vty *vty, struct bgp *bgp, vty_out (vty, " neighbor %s password %s%s", addr, peer->password, VTY_NEWLINE); + /* neighbor solo */ + if (CHECK_FLAG(peer->flags, PEER_FLAG_LONESOUL)) + if (!peer_group_active (peer)) + vty_out (vty, " neighbor %s solo%s", addr, VTY_NEWLINE); + /* BGP port. */ if (peer->port != BGP_PORT_DEFAULT) vty_out (vty, " neighbor %s port %d%s", addr, peer->port, @@ -5741,6 +5984,11 @@ bgp_config_write (struct vty *vty) vty_out (vty, " bgp default local-preference %d%s", bgp->default_local_pref, VTY_NEWLINE); + /* BGP default subgroup-pkt-queue-max. */ + if (bgp->default_subgroup_pkt_queue_max != BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX) + vty_out (vty, " bgp default subgroup-pkt-queue-max %d%s", + bgp->default_subgroup_pkt_queue_max, VTY_NEWLINE); + /* BGP client-to-client reflection. */ if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) vty_out (vty, " no bgp client-to-client reflection%s", VTY_NEWLINE); @@ -5797,6 +6045,9 @@ bgp_config_write (struct vty *vty) /* write quanta */ bgp_config_write_wpkt_quanta (vty, bgp); + /* coalesce time */ + bgp_config_write_coalesce_time(vty, bgp); + /* BGP graceful-restart. */ if (bgp->stalepath_time != BGP_DEFAULT_STALEPATH_TIME) vty_out (vty, " bgp graceful-restart stalepath-time %d%s", diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6eafb59b20..9863649525 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -22,14 +22,42 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define _QUAGGA_BGPD_H /* For union sockunion. */ +#include "queue.h" #include "sockunion.h" #include "routemap.h" +struct update_subgroup; +struct bpacket; + /* Typedef BGP specific types. */ typedef u_int32_t as_t; typedef u_int16_t as16_t; /* we may still encounter 16 Bit asnums */ typedef u_int16_t bgp_size_t; +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +enum bgp_af_index +{ + BGP_AF_START, + BGP_AF_IPV4_UNICAST = BGP_AF_START, + BGP_AF_IPV4_MULTICAST, + BGP_AF_IPV4_VPN, + BGP_AF_IPV6_UNICAST, + BGP_AF_IPV6_MULTICAST, + BGP_AF_MAX +}; + +#define AF_FOREACH(af) \ + for ((af) = BGP_AF_START; (af) < BGP_AF_MAX; (af)++) + +#define FOREACH_AFI_SAFI(afi, safi) \ + for (afi = AFI_IP; afi < AFI_MAX; afi++) \ + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + + /* BGP master for system wide configurations and variables. */ struct bgp_master { @@ -61,6 +89,9 @@ struct bgp_master #define BGP_OPT_MULTIPLE_INSTANCE (1 << 1) #define BGP_OPT_CONFIG_CISCO (1 << 2) #define BGP_OPT_NO_LISTEN (1 << 3) + + u_int64_t updgrp_idspace; + u_int64_t subgrp_idspace; }; /* BGP route-map structure. */ @@ -106,6 +137,27 @@ struct bgp /* BGP route-server-clients. */ struct list *rsclient; + struct hash *update_groups[BGP_AF_MAX]; + + /* + * Global statistics for update groups. + */ + struct { + u_int32_t join_events; + u_int32_t prune_events; + u_int32_t merge_events; + u_int32_t split_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t merge_checks_triggered; + + u_int32_t updgrps_created; + u_int32_t updgrps_deleted; + u_int32_t subgrps_created; + u_int32_t subgrps_deleted; + } update_group_stats; + /* BGP configuration. */ u_int16_t config; #define BGP_CONFIG_ROUTER_ID (1 << 0) @@ -212,6 +264,9 @@ struct bgp /* BGP default local-preference. */ u_int32_t default_local_pref; + /* BGP default subgroup pkt queue max */ + u_int32_t default_subgroup_pkt_queue_max; + /* BGP default timer. */ u_int32_t default_holdtime; u_int32_t default_keepalive; @@ -229,8 +284,7 @@ struct bgp } maxpaths[AFI_MAX][SAFI_MAX]; u_int32_t wpkt_quanta; /* per peer packet quanta to write */ - u_int32_t adv_quanta; /* adv FIFO size that triggers write */ - u_int32_t wd_quanta; /* withdraw FIFO size that triggers write */ + u_int32_t coalesce_time; }; #define BGP_ROUTE_ADV_HOLD(bgp) \ @@ -346,6 +400,38 @@ typedef enum #define BGP_MAX_PACKET_SIZE 4096 #define BGP_MAX_PACKET_SIZE_OVERFLOW 1024 +/* + * Trigger delay for bgp_announce_route(). + */ +#define BGP_ANNOUNCE_ROUTE_SHORT_DELAY_MS 100 +#define BGP_ANNOUNCE_ROUTE_DELAY_MS 500 + +struct peer_af +{ + /* back pointer to the peer */ + struct peer *peer; + + /* which subgroup the peer_af belongs to */ + struct update_subgroup *subgroup; + + /* for being part of an update subgroup's peer list */ + LIST_ENTRY(peer_af) subgrp_train; + + /* for being part of a packet's peer list */ + LIST_ENTRY(peer_af) pkt_train; + + struct bpacket *next_pkt_to_send; + + /* + * Trigger timer for bgp_announce_route(). + */ + struct thread *t_announce_route; + + afi_t afi; + safi_t safi; + int afid; +}; + /* BGP neighbor structure. */ struct peer { @@ -362,6 +448,10 @@ struct peer /* BGP peer group. */ struct peer_group *group; u_char af_group[AFI_MAX][SAFI_MAX]; + u_int64_t version[AFI_MAX][SAFI_MAX]; + + /* BGP peer_af structures, per configured AF on this peer */ + struct peer_af *peer_af_array[BGP_AF_MAX]; /* Peer's remote AS number. */ as_t as; @@ -479,6 +569,7 @@ struct peer #define PEER_FLAG_DELETE (1 << 9) /* mark the peer for deleting */ #define PEER_FLAG_CONFIG_NODE (1 << 10) /* the node to update configs on */ #define PEER_FLAG_BFD (1 << 11) /* bfd */ +#define PEER_FLAG_LONESOUL (1 << 12) /* NSF mode (graceful restart) */ u_char nsf[AFI_MAX][SAFI_MAX]; @@ -572,8 +663,6 @@ struct peer struct thread *t_gr_restart; struct thread *t_gr_stale; - int radv_adjusted; /* flag if MRAI has been adjusted or not */ - /* workqueues */ struct work_queue *clear_node_queue; @@ -699,6 +788,12 @@ struct bgp_nlri bgp_size_t length; }; +#define PEERAF_FOREACH(peer, paf, afi) \ + for ((afi) = BGP_AF_START, (paf) = (peer)->peer_af_array[(afi)]; \ + (afi) < BGP_AF_MAX; \ + (afi)++, (paf) = (peer)->peer_af_array[(afi)]) \ + if ((paf) != NULL) \ + /* BGP versions. */ #define BGP_VERSION_4 4 @@ -852,6 +947,9 @@ struct bgp_nlri /* BGP default local preference. */ #define BGP_DEFAULT_LOCAL_PREF 100 +/* BGP default subgroup packet queue max . */ +#define BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX 40 + /* BGP graceful restart */ #define BGP_DEFAULT_RESTART_TIME 120 #define BGP_DEFAULT_STALEPATH_TIME 360 @@ -930,6 +1028,17 @@ enum bgp_clear_type #define BGP_ERR_AS_OVERRIDE -34 #define BGP_ERR_MAX -35 +/* + * Enumeration of different policy kinds a peer can be configured with. + */ +typedef enum +{ + BGP_POLICY_ROUTE_MAP, + BGP_POLICY_FILTER_LIST, + BGP_POLICY_PREFIX_LIST, + BGP_POLICY_DISTRIBUTE_LIST, +} bgp_policy_type_e; + extern struct bgp_master *bm; extern struct thread_master *master; @@ -1001,6 +1110,9 @@ extern int bgp_timers_unset (struct bgp *); extern int bgp_default_local_preference_set (struct bgp *, u_int32_t); extern int bgp_default_local_preference_unset (struct bgp *); +extern int bgp_default_subgroup_pkt_queue_max_set (struct bgp *bgp, u_int32_t); +extern int bgp_default_subgroup_pkt_queue_max_unset (struct bgp *bgp); + extern int bgp_update_delay_active (struct bgp *); extern int bgp_update_delay_configured (struct bgp *); extern int peer_rsclient_active (struct peer *); @@ -1096,4 +1208,95 @@ extern int bgp_route_map_update_timer (struct thread *thread); extern void bgp_route_map_terminate(void); extern int peer_cmp (struct peer *p1, struct peer *p2); + +extern struct peer_af * peer_af_create (struct peer *, afi_t, safi_t); +extern struct peer_af * peer_af_find (struct peer *, afi_t, safi_t); +extern int peer_af_delete (struct peer *, afi_t, safi_t); + +static inline int +afindex (afi_t afi, safi_t safi) +{ + switch (afi) + { + case AFI_IP: + switch (safi) + { + case SAFI_UNICAST: + return BGP_AF_IPV4_UNICAST; + break; + case SAFI_MULTICAST: + return BGP_AF_IPV4_MULTICAST; + break; + case SAFI_MPLS_VPN: + return BGP_AF_IPV4_VPN; + break; + default: + return BGP_AF_MAX; + break; + } + break; + case AFI_IP6: + switch (safi) + { + case SAFI_UNICAST: + return BGP_AF_IPV6_UNICAST; + break; + case SAFI_MULTICAST: + return BGP_AF_IPV6_MULTICAST; + break; + default: + return BGP_AF_MAX; + break; + } + break; + default: + return BGP_AF_MAX; + break; + } +} + +/* If peer is configured at least one address family return 1. */ +static inline int +peer_group_active (struct peer *peer) +{ + if (peer->af_group[AFI_IP][SAFI_UNICAST] + || peer->af_group[AFI_IP][SAFI_MULTICAST] + || peer->af_group[AFI_IP][SAFI_MPLS_VPN] + || peer->af_group[AFI_IP6][SAFI_UNICAST] + || peer->af_group[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* If peer is negotiated at least one address family return 1. */ +static inline int +peer_afi_active_nego (const struct peer *peer, afi_t afi) +{ + if (peer->afc_nego[afi][SAFI_UNICAST] + || peer->afc_nego[afi][SAFI_MULTICAST] + || peer->afc_nego[afi][SAFI_MPLS_VPN]) + return 1; + return 0; +} + +static inline char * +timestamp_string (time_t ts) +{ +#ifdef HAVE_CLOCK_MONOTONIC + time_t tbuf; + tbuf = time(NULL) - (bgp_clock() - ts); + return ctime(&tbuf); +#else + return ctime(&ts); +#endif /* HAVE_CLOCK_MONOTONIC */ +} + +static inline int +peer_established (struct peer *peer) +{ + if (peer->status == Established) + return 1; + return 0; +} + #endif /* _QUAGGA_BGPD_H */ diff --git a/configure.ac b/configure.ac index f1df482b32..0821b2fa4a 100755 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,17 @@ elif test -n "$CFLAGS" ; then cflags_specified=yes ; fi +AC_ARG_ENABLE(tcmalloc, +[ --enable-tcmalloc Turn on tcmalloc], +[case "${enableval}" in + yes) tcmalloc_enabled=true +LIBS="$LIBS -ltcmalloc_minimal" + ;; + no) tcmalloc_enabled=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-tcmalloc) ;; +esac],[tcmalloc_enabled=false]) + + dnl -------------------- dnl Check CC and friends dnl -------------------- diff --git a/lib/hash.c b/lib/hash.c index 56e41fa826..4d3da66d79 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -225,6 +225,31 @@ hash_iterate (struct hash *hash, } } +/* Iterator function for hash. */ +void +hash_walk (struct hash *hash, + int (*func) (struct hash_backet *, void *), void *arg) +{ + unsigned int i; + struct hash_backet *hb; + struct hash_backet *hbnext; + int ret = HASHWALK_CONTINUE; + + for (i = 0; i < hash->size; i++) + { + for (hb = hash->index[i]; hb; hb = hbnext) + { + /* get pointer to next hash backet here, in case (*func) + * decides to delete hb by calling hash_release + */ + hbnext = hb->next; + ret = (*func) (hb, arg); + if (ret == HASHWALK_ABORT) + return; + } + } +} + /* Clean up hash. */ void hash_clean (struct hash *hash, void (*free_func) (void *)) diff --git a/lib/hash.h b/lib/hash.h index 920c6685fb..9707dbd1bf 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -25,6 +25,9 @@ Boston, MA 02111-1307, USA. */ #define HASH_INITIAL_SIZE 256 /* initial number of backets. */ #define HASH_THRESHOLD 10 /* expand when backet. */ +#define HASHWALK_CONTINUE 0 +#define HASHWALK_ABORT -1 + struct hash_backet { /* Linked list. */ @@ -71,6 +74,9 @@ extern void *hash_release (struct hash *, void *); extern void hash_iterate (struct hash *, void (*) (struct hash_backet *, void *), void *); +extern void hash_walk (struct hash *, + int (*) (struct hash_backet *, void *), void *); + extern void hash_clean (struct hash *, void (*) (void *)); extern void hash_free (struct hash *); diff --git a/lib/memtypes.c b/lib/memtypes.c index ca3a4a4f76..c32c08817f 100644 --- a/lib/memtypes.c +++ b/lib/memtypes.c @@ -99,6 +99,10 @@ struct memory_list memory_list_bgp[] = { MTYPE_PEER_GROUP, "Peer group" }, { MTYPE_PEER_DESC, "Peer description" }, { MTYPE_PEER_PASSWORD, "Peer password string" }, + { MTYPE_BGP_PEER_AF, "BGP peer af" }, + { MTYPE_BGP_UPDGRP, "BGP update group" }, + { MTYPE_BGP_UPD_SUBGRP, "BGP update subgroup" }, + { MTYPE_BGP_PACKET, "BGP packet" }, { MTYPE_ATTR, "BGP attribute" }, { MTYPE_ATTR_EXTRA, "BGP extra attributes" }, { MTYPE_AS_PATH, "BGP aspath" }, diff --git a/lib/plist.c b/lib/plist.c index f5950c331f..10012f3dc4 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -329,7 +329,7 @@ prefix_list_delete (struct prefix_list *plist) route_map_notify_dependencies(plist->name, RMAP_EVENT_PLIST_DELETED); if (master->delete_hook) - (*master->delete_hook) (NULL); + (*master->delete_hook) (plist); if (plist->name) XFREE (MTYPE_PREFIX_LIST_STR, plist->name); diff --git a/lib/sockunion.c b/lib/sockunion.c index 5dcf72563a..3cbb59cc60 100644 --- a/lib/sockunion.c +++ b/lib/sockunion.c @@ -552,7 +552,7 @@ sockopt_v6only (int family, int sock) /* If same family and same prefix return 1. */ int -sockunion_same (union sockunion *su1, union sockunion *su2) +sockunion_same (const union sockunion *su1, const union sockunion *su2) { int ret = 0; diff --git a/lib/sockunion.h b/lib/sockunion.h index b9f3514246..8f0a9be37c 100644 --- a/lib/sockunion.h +++ b/lib/sockunion.h @@ -86,7 +86,7 @@ enum connect_result extern int str2sockunion (const char *, union sockunion *); extern const char *sockunion2str (union sockunion *, char *, size_t); extern int sockunion_cmp (union sockunion *, union sockunion *); -extern int sockunion_same (union sockunion *, union sockunion *); +extern int sockunion_same (const union sockunion *, const union sockunion *); extern union sockunion *sockunion_str2su (const char *str); extern int sockunion_accept (int sock, union sockunion *); diff --git a/lib/stream.c b/lib/stream.c index 0fc3c3b118..cc5898a6db 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -401,6 +401,21 @@ stream_getl_from (struct stream *s, size_t from) return l; } +/* Copy from stream at specific location to destination. */ +void +stream_get_from (void *dst, struct stream *s, size_t from, size_t size) +{ + STREAM_VERIFY_SANE(s); + + if (!GETP_VALID (s, from + size)) + { + STREAM_BOUND_WARN (s, "get from"); + return; + } + + memcpy (dst, s->data + from, size); +} + u_int32_t stream_getl (struct stream *s) { @@ -709,6 +724,38 @@ stream_put_in_addr (struct stream *s, struct in_addr *addr) return sizeof (u_int32_t); } +/* Put in_addr at location in the stream. */ +int +stream_put_in_addr_at (struct stream *s, size_t putp, struct in_addr *addr) +{ + STREAM_VERIFY_SANE(s); + + if (!PUT_AT_VALID (s, putp + 4)) + { + STREAM_BOUND_WARN (s, "put"); + return 0; + } + + memcpy (&s->data[putp], addr, 4); + return 4; +} + +/* Put in6_addr at location in the stream. */ +int +stream_put_in6_addr_at (struct stream *s, size_t putp, struct in6_addr *addr) +{ + STREAM_VERIFY_SANE(s); + + if (!PUT_AT_VALID (s, putp + 16)) + { + STREAM_BOUND_WARN (s, "put"); + return 0; + } + + memcpy (&s->data[putp], addr, 16); + return 16; +} + /* Put prefix by nlri type format. */ int stream_put_prefix (struct stream *s, struct prefix *p) diff --git a/lib/stream.h b/lib/stream.h index f0c742c052..3efabe358d 100644 --- a/lib/stream.h +++ b/lib/stream.h @@ -173,9 +173,12 @@ extern int stream_putq (struct stream *, uint64_t); extern int stream_putq_at (struct stream *, size_t, uint64_t); extern int stream_put_ipv4 (struct stream *, u_int32_t); extern int stream_put_in_addr (struct stream *, struct in_addr *); +extern int stream_put_in_addr_at (struct stream *, size_t, struct in_addr *); +extern int stream_put_in6_addr_at (struct stream *, size_t, struct in6_addr *); extern int stream_put_prefix (struct stream *, struct prefix *); extern void stream_get (void *, struct stream *, size_t); +extern void stream_get_from (void *, struct stream *, size_t, size_t); extern u_char stream_getc (struct stream *); extern u_char stream_getc_from (struct stream *, size_t); extern u_int16_t stream_getw (struct stream *); diff --git a/tests/aspath_test.c b/tests/aspath_test.c index 7fdb5e2210..8ba77b122b 100644 --- a/tests/aspath_test.c +++ b/tests/aspath_test.c @@ -3,6 +3,7 @@ #include "vty.h" #include "stream.h" #include "privs.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/tests/bgp_capability_test.c b/tests/bgp_capability_test.c index 31976e9ad8..c981504e54 100644 --- a/tests/bgp_capability_test.c +++ b/tests/bgp_capability_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_open.h" diff --git a/tests/bgp_mp_attr_test.c b/tests/bgp_mp_attr_test.c index 831c755b98..10369cc74f 100644 --- a/tests/bgp_mp_attr_test.c +++ b/tests/bgp_mp_attr_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/tests/bgp_mpath_test.c b/tests/bgp_mpath_test.c index a6ca9c5371..aa20ae4069 100644 --- a/tests/bgp_mpath_test.c +++ b/tests/bgp_mpath_test.c @@ -29,6 +29,7 @@ #include "linklist.h" #include "memory.h" #include "zclient.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/tests/ecommunity_test.c b/tests/ecommunity_test.c index 87f20f282d..6006bd7350 100644 --- a/tests/ecommunity_test.c +++ b/tests/ecommunity_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_ecommunity.h"