diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index adf40a0bb9..9043c83b24 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -267,6 +267,9 @@ bgp_adj_out_set (struct bgp_node *rn, struct peer *peer, struct prefix *p, /* Add new advertisement to advertisement attribute list. */ bgp_advertise_add (adv->baa, adv); + if (FIFO_EMPTY(&peer->sync[afi][safi]->update)) + bgp_adjust_routeadv(peer); + BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo); } diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 653dd7b1be..9dbe904e88 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -294,6 +294,22 @@ bgp_keepalive_timer (struct thread *thread) return 0; } +static int +bgp_routeq_empty (struct peer *peer) +{ + afi_t afi; + safi_t safi; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + if (!FIFO_EMPTY(&peer->sync[afi][safi]->withdraw) || + !FIFO_EMPTY(&peer->sync[afi][safi]->update)) + return 0; + } + return 1; +} + static int bgp_routeadv_timer (struct thread *thread) { @@ -311,8 +327,14 @@ bgp_routeadv_timer (struct thread *thread) BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, - peer->v_routeadv); + /* + * If there is no UPDATE to send, don't start the timer. We will start + * it when the queues go non-empty. + */ + if (bgp_routeq_empty(peer)) + return 0; + + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, peer->v_routeadv); return 0; } @@ -448,6 +470,13 @@ bgp_update_delay_end (struct bgp *bgp) quagga_timestamp(3, bgp->update_delay_end_time, sizeof(bgp->update_delay_end_time)); + /* + * Add an end-of-initial-update marker to the main process queues so that + * the route advertisement timer for the peers can be started. + */ + bgp_add_eoiu_mark(bgp, BGP_TABLE_MAIN); + bgp_add_eoiu_mark(bgp, BGP_TABLE_RSCLIENT); + /* Route announcements were postponed for all the peers during read-only mode, send those now. */ for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) @@ -459,6 +488,92 @@ bgp_update_delay_end (struct bgp *bgp) work_queue_unplug(bm->process_rsclient_queue); } +/** + * see bgp_fsm.h + */ +void +bgp_start_routeadv (struct bgp *bgp) +{ + struct listnode *node, *nnode; + struct peer *peer; + + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->status != Established) + continue; + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, 0); + } +} + +/** + * see bgp_fsm.h + */ +void +bgp_adjust_routeadv (struct peer *peer) +{ + time_t nowtime = bgp_clock(); + double diff; + unsigned long remain; + + /* + * CASE I: + * If the last update was written more than MRAI back, expire the timer + * instantly so that we can send the update out sooner. + * + * <------- MRAI ---------> + * |-----------------|-----------------------| + * <------------- m ------------> + * ^ ^ ^ + * | | | + * | | current time + * | timer start + * last write + * + * m > MRAI + */ + diff = difftime(nowtime, peer->last_write); + if (diff > (double) peer->v_routeadv) + { + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, 0); + if (BGP_DEBUG (update, UPDATE_OUT)) + zlog (peer->log, LOG_DEBUG, "%s: MRAI timer to expire instantly\n", + peer->host); + return; + } + + /* + * CASE II: + * - Find when to expire the MRAI timer. + * If MRAI timer is not active, assume we can start it now. + * + * <------- MRAI ---------> + * |------------|-----------------------| + * <-------- m ----------><----- r -----> + * ^ ^ ^ + * | | | + * | | current time + * | timer start + * last write + * + * (MRAI - m) < r + */ + if (peer->t_routeadv) + remain = thread_timer_remain_second(peer->t_routeadv); + else + remain = peer->v_routeadv; + diff = peer->v_routeadv - diff; + if (diff <= (double) remain) + { + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, diff); + if (BGP_DEBUG (update, UPDATE_OUT)) + zlog (peer->log, LOG_DEBUG, "%s: MRAI timer to expire in %f secs\n", + peer->host, diff); + } +} + /* The update delay timer expiry callback. */ static int bgp_update_delay_timer (struct thread *thread) @@ -920,16 +1035,12 @@ bgp_fsm_open (struct peer *peer) static int bgp_fsm_keepalive_expire (struct peer *peer) { - afi_t afi; - safi_t safi; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - if (!FIFO_EMPTY(&peer->sync[afi][safi]->withdraw) || - !FIFO_EMPTY(&peer->sync[afi][safi]->update)) - return 0; - } + /* + * If there are UPDATE messages to send, no need to send keepalive. The + * peer will note our progress through the UPDATEs. + */ + if (!bgp_routeq_empty(peer)) + return 0; bgp_keepalive_send (peer); return 0; @@ -1062,7 +1173,12 @@ bgp_establish (struct peer *peer) bgp_announce_route_all (peer); - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 1); + /* Start the route advertisement timer to send updates to the peer - if BGP + * is not in read-only mode. If it is, the timer will be started at the end + * of read-only mode. + */ + if (!bgp_update_delay_active(peer->bgp)) + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); return 0; } diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index bef5e0a596..eecd131d66 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -71,6 +71,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA thread_cancel_event (master, (P)); \ } while (0) +#define BGP_MSEC_JITTER 10 + /* Prototypes. */ extern int bgp_event (struct thread *); extern int bgp_stop (struct peer *peer); @@ -79,4 +81,20 @@ extern void bgp_fsm_change_status (struct peer *peer, int status); extern const char *peer_down_str[]; extern void bgp_update_delay_end (struct bgp *); +/** + * Start the route advertisement timer (that honors MRAI) for all the + * peers. Typically called at the end of initial convergence, coming + * out of read-only mode. + */ +extern void bgp_start_routeadv (struct bgp *); + +/** + * See if the route advertisement timer needs to be adjusted for a + * peer. For example, if the last update was written to the peer a + * long while back, we don't need to wait for the periodic advertisement + * timer to expire to send the new set of prefixes. It should fire + * instantly and updates should go out sooner. + */ +extern void bgp_adjust_routeadv (struct peer *); + #endif /* _QUAGGA_BGP_FSM_H */ diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 674892515b..0c12df891c 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -616,7 +616,7 @@ bgp_write_packet (struct peer *peer) adv = FIFO_HEAD (&peer->sync[afi][safi]->update); if (adv) { - if (adv->binfo && adv->binfo->uptime < peer->synctime) + if (adv->binfo && adv->binfo->uptime <= peer->synctime) { if (CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_RCV) && CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_ADV) @@ -689,6 +689,7 @@ bgp_write (struct thread *thread) struct stream *s; int num; unsigned int count = 0; + int oc = 0; /* Yes first of all get peer pointer. */ peer = THREAD_ARG (thread); @@ -707,6 +708,8 @@ bgp_write (struct thread *thread) sockopt_cork (peer->fd, 1); + oc = peer->update_out; + /* Nonblocking write until TCP output buffer is full. */ do { @@ -774,13 +777,17 @@ bgp_write (struct thread *thread) /* OK we send packet so delete it. */ bgp_packet_delete (peer); } - while (++count < BGP_WRITE_PACKET_MAX && + while (++count < peer->bgp->wpkt_quanta && (s = bgp_write_packet (peer)) != NULL); - + if (bgp_write_proceed (peer)) BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); done: + /* Update the last write if some updates were written. */ + if (peer->update_out > oc) + peer->last_write = bgp_clock (); + sockopt_cork (peer->fd, 0); return 0; } diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 134c7c07dc..b2ec4811af 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1521,8 +1521,17 @@ bgp_process_rsclient (struct work_queue *wq, void *data) struct bgp_info *old_select; struct bgp_info_pair old_and_new; struct listnode *node, *nnode; - struct peer *rsclient = bgp_node_table (rn)->owner; - + struct peer *rsclient; + + /* Is it end of initial update? (after startup) */ + if (!rn) + { + bgp_start_routeadv(bgp); + return WQ_SUCCESS; + } + + rsclient = bgp_node_table (rn)->owner; + /* Best path selection. */ bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new); new_select = old_and_new.new; @@ -1585,7 +1594,14 @@ bgp_process_main (struct work_queue *wq, void *data) struct bgp_info_pair old_and_new; struct listnode *node, *nnode; struct peer *peer; - + + /* Is it end of initial update? (after startup) */ + if (!rn) + { + bgp_start_routeadv(bgp); + return WQ_SUCCESS; + } + /* Best path selection. */ bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new); old_select = old_and_new.old; @@ -1652,11 +1668,15 @@ static void bgp_processq_del (struct work_queue *wq, void *data) { struct bgp_process_queue *pq = data; - struct bgp_table *table = bgp_node_table (pq->rn); - + struct bgp_table *table; + bgp_unlock (pq->bgp); - bgp_unlock_node (pq->rn); - bgp_table_unlock (table); + if (pq->rn) + { + table = bgp_node_table (pq->rn); + bgp_unlock_node (pq->rn); + bgp_table_unlock (table); + } XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } @@ -1724,6 +1744,36 @@ bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) return; } +void +bgp_add_eoiu_mark (struct bgp *bgp, bgp_table_t type) +{ + struct bgp_process_queue *pqnode; + + if ( (bm->process_main_queue == NULL) || + (bm->process_rsclient_queue == NULL) ) + bgp_process_queue_init (); + + pqnode = XCALLOC (MTYPE_BGP_PROCESS_QUEUE, + sizeof (struct bgp_process_queue)); + if (!pqnode) + return; + + pqnode->rn = NULL; + pqnode->bgp = bgp; + bgp_lock (bgp); + switch (type) + { + case BGP_TABLE_MAIN: + work_queue_add (bm->process_main_queue, pqnode); + break; + case BGP_TABLE_RSCLIENT: + work_queue_add (bm->process_rsclient_queue, pqnode); + break; + } + + return; +} + static int bgp_maximum_prefix_restart_timer (struct thread *thread) { diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 7f31a38204..b8d2ed7c73 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -237,6 +237,12 @@ extern int bgp_withdraw (struct peer *, struct prefix *, struct attr *, /* for bgp_nexthop and bgp_damp */ extern void bgp_process (struct bgp *, struct bgp_node *, afi_t, safi_t); + +/* + * Add an end-of-initial-update marker to the process queue. This is just a + * queue element with NULL bgp node. + */ +extern void bgp_add_eoiu_mark (struct bgp *, bgp_table_t); extern int bgp_config_write_table_map (struct vty *, struct bgp *, afi_t, safi_t, int *); extern int bgp_config_write_network (struct vty *, struct bgp *, afi_t, safi_t, int *); diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index 12e6b8b845..db09d583b7 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -49,6 +49,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_table.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" +#include "bgpd/bgp_packet.h" extern struct in_addr router_id_zebra; @@ -796,6 +797,54 @@ ALIAS (no_bgp_update_delay, "Wait for peers to be established\n" "Seconds\n") +int +bgp_wpkt_quanta_config_vty (struct vty *vty, char *num, char set) +{ + struct bgp *bgp; + u_int16_t update_delay; + + bgp = vty->index; + + if (set) + VTY_GET_INTEGER_RANGE ("write-quanta", bgp->wpkt_quanta, num, + 1, 4294967295); + else + bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX; + + return CMD_SUCCESS; +} + +int +bgp_config_write_wpkt_quanta (struct vty *vty, struct bgp *bgp) +{ + if (bgp->wpkt_quanta != BGP_WRITE_PACKET_MAX) + vty_out (vty, " write-quanta %d%s", + bgp->wpkt_quanta, VTY_NEWLINE); + + return 0; +} + + +/* Update-delay configuration */ +DEFUN (bgp_wpkt_quanta, + bgp_wpkt_quanta_cmd, + "write-quanta <1-4294967295>", + "How many packets to write to peer socket per run\n" + "Number of packets\n") +{ + return bgp_wpkt_quanta_config_vty(vty, argv[0], 1); +} + +/* Update-delay deconfiguration */ +DEFUN (no_bgp_wpkt_quanta, + no_bgp_wpkt_quanta_cmd, + "no write-quanta <1-4294967295>", + "How many packets to write to peer socket per run\n" + "Number of packets\n") +{ + return bgp_wpkt_quanta_config_vty(vty, argv[0], 0); +} + /* Maximum-paths configuration */ DEFUN (bgp_maxpaths, bgp_maxpaths_cmd, @@ -7835,9 +7884,11 @@ bgp_show_peer (struct vty *vty, struct peer *p) /* read timer */ vty_out (vty, " Last read %s", peer_uptime (p->readtime, timebuf, BGP_UPTIME_LEN)); + vty_out (vty, ", Last write %s%s", + peer_uptime (p->last_write, timebuf, BGP_UPTIME_LEN), VTY_NEWLINE); /* Configured timer values. */ - vty_out (vty, ", hold time is %d, keepalive interval is %d seconds%s", + vty_out (vty, " Hold time is %d, keepalive interval is %d seconds%s", p->v_holdtime, p->v_keepalive, VTY_NEWLINE); if (CHECK_FLAG (p->config, PEER_CONFIG_TIMER)) { @@ -8133,8 +8184,12 @@ bgp_show_peer (struct vty *vty, struct peer *p) if (p->t_connect) vty_out (vty, "Next connect timer due in %ld seconds%s", thread_timer_remain_second (p->t_connect), VTY_NEWLINE); - - vty_out (vty, "Read thread: %s Write thread: %s%s", + if (p->t_routeadv) + vty_out (vty, "MRAI (interval %ld) timer expires in %ld seconds%s", + p->v_routeadv, thread_timer_remain_second (p->t_routeadv), + VTY_NEWLINE); + + vty_out (vty, "Read thread: %s Write thread: %s%s", p->t_read ? "on" : "off", p->t_write ? "on" : "off", VTY_NEWLINE); @@ -9383,6 +9438,9 @@ bgp_vty_init (void) install_element (BGP_NODE, &bgp_update_delay_establish_wait_cmd); install_element (BGP_NODE, &no_bgp_update_delay_establish_wait_cmd); + install_element (BGP_NODE, &bgp_wpkt_quanta_cmd); + install_element (BGP_NODE, &no_bgp_wpkt_quanta_cmd); + /* "maximum-paths" commands. */ install_element (BGP_NODE, &bgp_maxpaths_cmd); install_element (BGP_NODE, &no_bgp_maxpaths_cmd); diff --git a/bgpd/bgp_vty.h b/bgpd/bgp_vty.h index e9dc09a061..9caf0baace 100644 --- a/bgpd/bgp_vty.h +++ b/bgpd/bgp_vty.h @@ -26,5 +26,6 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA extern void bgp_vty_init (void); extern const char *afi_safi_print (afi_t, safi_t); extern int bgp_config_write_update_delay (struct vty *, struct bgp *); +extern int bgp_config_write_wpkt_quanta(struct vty *vty, struct bgp *bgp); #endif /* _QUAGGA_BGP_VTY_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index acca578c80..f1903630f0 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -1997,6 +1997,8 @@ bgp_create (as_t *as, const char *name) if (name) bgp->name = strdup (name); + bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX; + THREAD_TIMER_ON (master, bgp->t_startup, bgp_startup_timer_expire, bgp, bgp->restart_time); @@ -5388,6 +5390,9 @@ bgp_config_write (struct vty *vty) /* BGP update-delay. */ bgp_config_write_update_delay (vty, bgp); + /* write quanta */ + bgp_config_write_wpkt_quanta (vty, bgp); + /* BGP graceful-restart. */ if (bgp->stalepath_time != BGP_DEFAULT_STALEPATH_TIME) vty_out (vty, " bgp graceful-restart stalepath-time %d%s", diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index b2d421ed1f..d32bdcf043 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -196,6 +196,8 @@ struct bgp u_int16_t ibgp_flags; #define BGP_FLAG_IBGP_MULTIPATH_SAME_CLUSTERLEN (1 << 0) } maxpaths[AFI_MAX][SAFI_MAX]; + + u_int32_t wpkt_quanta; /* per peer packet quanta to write */ }; /* BGP peer-group support. */ @@ -535,6 +537,7 @@ struct peer /* Syncronization list and time. */ struct bgp_synchronize *sync[AFI_MAX][SAFI_MAX]; time_t synctime; + time_t last_write; /* timestamp when the last UPDATE msg was written */ /* Send prefix count. */ unsigned long scount[AFI_MAX][SAFI_MAX];