[bgpd] Record afi/safi in bgp_table. Serialise peer clear with FSM.

2006-02-21 Paul Jakma <paul.jakma@sun.com>

	* bgpd.h: move the clear_node_queue to be peer specific.
	  Add a new peer status flag, PEER_STATUS_CLEARING.
	* bgp_table.h: (struct bgp_table) Add fields to record afi,
          safi of the table.
          (bgp_table_init) Take afi and safi to create table for.
        * bgp_table.c: (bgp_table_init) record the afi and safi.
        * bgp_nexthop.c: Update all calls to bgp_table_init.
        * bgp_vty.c: ditto.
        * bgpd.c: ditto.
        * bgp_fsm.c: (bgp_timer_set) dont bring up a session which is
	  clearing.
        * bgp_route.c: (general) Update all bgp_table_init calls.
          (bgp_process_{rsclient,main}) clear_node is serialised
          via PEER_STATUS_CLEARING and fsm now.
          (struct bgp_clear_node_queue) can be removed. struct bgp_node
          can be the queue item data directly, as struct peer can be
          kept in the new wq global user data and afi/safi can be
          retrieved via bgp_node -> bgp_table.
          (bgp_clear_route_node) fix to get peer via wq->spec.data,
          afi/safi via bgp_node->bgp_table.
          (bgp_clear_node_queue_del) no more item data to delete, only
          unlock the bgp_node.
          (bgp_clear_node_complete) only need to unset CLEARING flag
          and unlock struct peer.
          (bgp_clear_node_queue_init) queue attaches to struct peer
          now. record peer name as queue name.
          (bgp_clear_route_table) If queue transitions to active,
          serialise clearing by setting PEER_STATUS_CLEARING rather
          than plugging process queue, and lock peer while queue
          active.
          Update to pass only bgp_node as per-queue-item specific data.
This commit is contained in:
Paul Jakma 2006-02-21 01:09:01 +00:00
parent 4528ffa280
commit afb226bebd
9 changed files with 126 additions and 96 deletions

View File

@ -1,3 +1,37 @@
2006-02-21 Paul Jakma <paul.jakma@sun.com>
* bgpd.h: move the clear_node_queue to be peer specific.
Add a new peer status flag, PEER_STATUS_CLEARING.
* bgp_table.h: (struct bgp_table) Add fields to record afi,
safi of the table.
(bgp_table_init) Take afi and safi to create table for.
* bgp_table.c: (bgp_table_init) record the afi and safi.
* bgp_nexthop.c: Update all calls to bgp_table_init.
* bgp_vty.c: ditto.
* bgpd.c: ditto.
* bgp_fsm.c: (bgp_timer_set) dont bring up a session which is
clearing.
* bgp_route.c: (general) Update all bgp_table_init calls.
(bgp_process_{rsclient,main}) clear_node is serialised
via PEER_STATUS_CLEARING and fsm now.
(struct bgp_clear_node_queue) can be removed. struct bgp_node
can be the queue item data directly, as struct peer can be
kept in the new wq global user data and afi/safi can be
retrieved via bgp_node -> bgp_table.
(bgp_clear_route_node) fix to get peer via wq->spec.data,
afi/safi via bgp_node->bgp_table.
(bgp_clear_node_queue_del) no more item data to delete, only
unlock the bgp_node.
(bgp_clear_node_complete) only need to unset CLEARING flag
and unlock struct peer.
(bgp_clear_node_queue_init) queue attaches to struct peer
now. record peer name as queue name.
(bgp_clear_route_table) If queue transitions to active,
serialise clearing by setting PEER_STATUS_CLEARING rather
than plugging process queue, and lock peer while queue
active.
Update to pass only bgp_node as per-queue-item specific data.
2006-02-18 Paul Jakma <paul.jakma@sun.com>
* bgp_routemap.c: (route_set_community) Quick, very hacky, fix

View File

@ -84,6 +84,7 @@ bgp_timer_set (struct peer *peer)
inactive. All other timer must be turned off */
if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)
|| CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)
|| CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING)
|| ! peer_active (peer))
{
BGP_TIMER_OFF (peer->t_start);

View File

@ -1330,17 +1330,17 @@ bgp_scan_init ()
bgp_scan_interval = BGP_SCAN_INTERVAL_DEFAULT;
bgp_import_interval = BGP_IMPORT_INTERVAL_DEFAULT;
cache1_table[AFI_IP] = bgp_table_init ();
cache2_table[AFI_IP] = bgp_table_init ();
cache1_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST);
cache2_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST);
bgp_nexthop_cache_table[AFI_IP] = cache1_table[AFI_IP];
bgp_connected_table[AFI_IP] = bgp_table_init ();
bgp_connected_table[AFI_IP] = bgp_table_init (AFI_IP, SAFI_UNICAST);
#ifdef HAVE_IPV6
cache1_table[AFI_IP6] = bgp_table_init ();
cache2_table[AFI_IP6] = bgp_table_init ();
cache1_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST);
cache2_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST);
bgp_nexthop_cache_table[AFI_IP6] = cache1_table[AFI_IP6];
bgp_connected_table[AFI_IP6] = bgp_table_init ();
bgp_connected_table[AFI_IP6] = bgp_table_init (AFI_IP6, SAFI_UNICAST);
#endif /* HAVE_IPV6 */
/* Make BGP scan thread. */

View File

@ -71,7 +71,7 @@ bgp_afi_node_get (struct bgp_table *table, afi_t afi, safi_t safi, struct prefix
prn = bgp_node_get (table, (struct prefix *) prd);
if (prn->info == NULL)
prn->info = bgp_table_init ();
prn->info = bgp_table_init (afi, safi);
else
bgp_unlock_node (prn);
table = prn->info;
@ -1257,13 +1257,6 @@ bgp_process_rsclient (struct work_queue *wq, void *data)
struct listnode *node, *nnode;
struct peer *rsclient = rn->table->owner;
/* we shouldn't run if the clear_route_node queue is still running
* or scheduled to run, or we can race with session coming up
* and adding routes back before we've cleared them
*/
if (bm->clear_node_queue && bm->clear_node_queue->thread)
return WQ_QUEUE_BLOCKED;
/* Best path selection. */
bgp_best_selection (bgp, rn, &old_and_new);
new_select = old_and_new.new;
@ -1326,13 +1319,6 @@ bgp_process_main (struct work_queue *wq, void *data)
struct peer *peer;
struct attr attr;
/* we shouldn't run if the clear_route_node queue is still running
* or scheduled to run, or we can race with session coming up
* and adding routes back before we've cleared them
*/
if (bm->clear_node_queue && bm->clear_node_queue->thread)
return WQ_QUEUE_BLOCKED;
/* Best path selection. */
bgp_best_selection (bgp, rn, &old_and_new);
old_select = old_and_new.old;
@ -2465,54 +2451,49 @@ bgp_soft_reconfig_in (struct peer *peer, afi_t afi, safi_t safi)
bgp_soft_reconfig_table (peer, afi, safi, table);
}
struct bgp_clear_node_queue
{
struct bgp_node *rn;
struct peer *peer;
afi_t afi;
safi_t safi;
};
static wq_item_status
bgp_clear_route_node (struct work_queue *wq, void *data)
{
struct bgp_clear_node_queue *cq = data;
struct bgp_node *rn = data;
struct peer *peer = wq->spec.data;
struct bgp_adj_in *ain;
struct bgp_adj_out *aout;
struct bgp_info *ri;
afi_t afi = rn->table->afi;
safi_t safi = rn->table->safi;
assert (cq->rn && cq->peer);
assert (rn && peer);
for (ri = cq->rn->info; ri; ri = ri->next)
if (ri->peer == cq->peer)
for (ri = rn->info; ri; ri = ri->next)
if (ri->peer == peer)
{
/* graceful restart STALE flag set. */
if (CHECK_FLAG (cq->peer->sflags, PEER_STATUS_NSF_WAIT)
&& cq->peer->nsf[cq->afi][cq->safi]
if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)
&& peer->nsf[afi][safi]
&& ! CHECK_FLAG (ri->flags, BGP_INFO_STALE)
&& ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)
&& ! CHECK_FLAG (ri->flags, BGP_INFO_DAMPED)
&& ! CHECK_FLAG (ri->flags, BGP_INFO_REMOVED))
{
bgp_pcount_decrement (cq->rn, ri, cq->afi, cq->safi);
bgp_pcount_decrement (rn, ri, afi, safi);
SET_FLAG (ri->flags, BGP_INFO_STALE);
}
else
bgp_rib_remove (cq->rn, ri, cq->peer, cq->afi, cq->safi);
bgp_rib_remove (rn, ri, peer, afi, safi);
break;
}
for (ain = cq->rn->adj_in; ain; ain = ain->next)
if (ain->peer == cq->peer)
for (ain = rn->adj_in; ain; ain = ain->next)
if (ain->peer == peer)
{
bgp_adj_in_remove (cq->rn, ain);
bgp_unlock_node (cq->rn);
bgp_adj_in_remove (rn, ain);
bgp_unlock_node (rn);
break;
}
for (aout = cq->rn->adj_out; aout; aout = aout->next)
if (aout->peer == cq->peer)
for (aout = rn->adj_out; aout; aout = aout->next)
if (aout->peer == peer)
{
bgp_adj_out_remove (cq->rn, aout, cq->peer, cq->afi, cq->safi);
bgp_unlock_node (cq->rn);
bgp_adj_out_remove (rn, aout, peer, afi, safi);
bgp_unlock_node (rn);
break;
}
return WQ_SUCCESS;
@ -2521,78 +2502,84 @@ bgp_clear_route_node (struct work_queue *wq, void *data)
static void
bgp_clear_node_queue_del (struct work_queue *wq, void *data)
{
struct bgp_clear_node_queue *cq = data;
bgp_unlock_node (cq->rn);
peer_unlock (cq->peer); /* bgp_clear_node_queue_del */
XFREE (MTYPE_BGP_CLEAR_NODE_QUEUE, cq);
struct bgp_node *rn = data;
bgp_unlock_node (rn);
}
static void
bgp_clear_node_complete (struct work_queue *wq)
{
/* unplug the 2 processing queues */
if (bm->process_main_queue)
work_queue_unplug (bm->process_main_queue);
if (bm->process_rsclient_queue)
work_queue_unplug (bm->process_rsclient_queue);
struct peer *peer = wq->spec.data;
UNSET_FLAG (peer->sflags, PEER_STATUS_CLEARING);
peer_unlock (peer); /* bgp_clear_node_complete */
}
static void
bgp_clear_node_queue_init (void)
bgp_clear_node_queue_init (struct peer *peer)
{
if ( (bm->clear_node_queue
= work_queue_new (bm->master, "clear_route_node")) == NULL)
#define CLEAR_QUEUE_NAME_LEN 26 /* "clear 2001:123:123:123::1" */
char wname[CLEAR_QUEUE_NAME_LEN];
snprintf (wname, CLEAR_QUEUE_NAME_LEN, "clear %s", peer->host);
#undef CLEAR_QUEUE_NAME_LEN
if ( (peer->clear_node_queue = work_queue_new (bm->master, wname)) == NULL)
{
zlog_err ("%s: Failed to allocate work queue", __func__);
exit (1);
}
bm->clear_node_queue->spec.hold = 10;
bm->clear_node_queue->spec.workfunc = &bgp_clear_route_node;
bm->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del;
bm->clear_node_queue->spec.completion_func = &bgp_clear_node_complete;
bm->clear_node_queue->spec.max_retries = 0;
peer->clear_node_queue->spec.hold = 10;
peer->clear_node_queue->spec.workfunc = &bgp_clear_route_node;
peer->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del;
peer->clear_node_queue->spec.completion_func = &bgp_clear_node_complete;
peer->clear_node_queue->spec.max_retries = 0;
/* we only 'lock' this peer reference when the queue is actually active */
peer->clear_node_queue->spec.data = peer;
}
static void
bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi,
struct bgp_table *table, struct peer *rsclient)
{
struct bgp_clear_node_queue *cqnode;
struct bgp_node *rn;
if (! table)
table = (rsclient) ? rsclient->rib[afi][safi] : peer->bgp->rib[afi][safi];
/* If still no table => afi/safi isn't configured at all or smth. */
if (! table)
return;
if (bm->clear_node_queue == NULL)
bgp_clear_node_queue_init ();
if (peer->clear_node_queue == NULL)
bgp_clear_node_queue_init (peer);
/* plug the two bgp_process queues to avoid any chance of racing
* with a session coming back up and adding routes before we've
* cleared them all. We'll unplug them with completion callback.
/* bgp_fsm.c will not bring CLEARING sessions out of Idle this
* protects against peers which flap faster than we can we clear,
* which could lead to:
*
* a) race with routes from the new session being installed before
* clear_route_node visits the node (to delete the route of that
* peer)
* b) resource exhaustion, clear_route_node likely leads to an entry
* on the process_main queue. Fast-flapping could cause that queue
* to grow and grow.
*/
if (bm->process_main_queue)
work_queue_plug (bm->process_main_queue);
if (bm->process_rsclient_queue)
work_queue_plug (bm->process_rsclient_queue);
if (!CHECK_FLAG (peer->sflags, PEER_STATUS_CLEARING))
{
SET_FLAG (peer->sflags, PEER_STATUS_CLEARING);
peer_lock (peer); /* bgp_clear_node_complete */
}
for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn))
{
if (rn->info == NULL)
continue;
if ( (cqnode = XCALLOC (MTYPE_BGP_CLEAR_NODE_QUEUE,
sizeof (struct bgp_clear_node_queue))) == NULL)
continue;
cqnode->rn = bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */
cqnode->afi = afi;
cqnode->safi = safi;
cqnode->peer = peer_lock (peer); /* bgp_clear_node_queue_del */
work_queue_add (bm->clear_node_queue, cqnode);
bgp_lock_node (rn); /* unlocked: bgp_clear_node_queue_del */
work_queue_add (peer->clear_node_queue, rn);
}
return;
}
@ -3524,7 +3511,7 @@ bgp_static_set_vpnv4 (struct vty *vty, const char *ip_str, const char *rd_str,
prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN],
(struct prefix *)&prd);
if (prn->info == NULL)
prn->info = bgp_table_init ();
prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN);
else
bgp_unlock_node (prn);
table = prn->info;
@ -3593,7 +3580,7 @@ bgp_static_unset_vpnv4 (struct vty *vty, const char *ip_str,
prn = bgp_node_get (bgp->route[AFI_IP][SAFI_MPLS_VPN],
(struct prefix *)&prd);
if (prn->info == NULL)
prn->info = bgp_table_init ();
prn->info = bgp_table_init (AFI_IP, SAFI_MPLS_VPN);
else
bgp_unlock_node (prn);
table = prn->info;
@ -10408,7 +10395,7 @@ void
bgp_route_init ()
{
/* Init BGP distance table. */
bgp_distance_table = bgp_table_init ();
bgp_distance_table = bgp_table_init (AFI_IP, SAFI_UNICAST);
/* IPv4 BGP commands. */
install_element (BGP_NODE, &bgp_network_cmd);

View File

@ -32,7 +32,7 @@ void bgp_node_delete (struct bgp_node *);
void bgp_table_free (struct bgp_table *);
struct bgp_table *
bgp_table_init (void)
bgp_table_init (afi_t afi, safi_t safi)
{
struct bgp_table *rt;
@ -40,7 +40,9 @@ bgp_table_init (void)
memset (rt, 0, sizeof (struct bgp_table));
rt->type = BGP_TABLE_MAIN;
rt->afi = afi;
rt->safi = safi;
return rt;
}

View File

@ -30,7 +30,11 @@ typedef enum
struct bgp_table
{
bgp_table_t type;
/* afi/safi of this table */
afi_t afi;
safi_t safi;
/* The owner of this 'bgp_table' structure. */
void *owner;
@ -63,7 +67,7 @@ struct bgp_node
#define BGP_NODE_PROCESS_SCHEDULED (1 << 0)
};
extern struct bgp_table *bgp_table_init (void);
extern struct bgp_table *bgp_table_init (afi_t, safi_t);
extern void bgp_table_finish (struct bgp_table *);
extern void bgp_unlock_node (struct bgp_node *node);
extern void bgp_node_delete (struct bgp_node *node);

View File

@ -2048,7 +2048,7 @@ peer_rsclient_set_vty (struct vty *vty, const char *peer_str,
if (ret < 0)
return bgp_vty_return (vty, ret);
peer->rib[afi][safi] = bgp_table_init ();
peer->rib[afi][safi] = bgp_table_init (afi, safi);
peer->rib[afi][safi]->type = BGP_TABLE_RSCLIENT;
peer->rib[afi][safi]->owner = peer;

View File

@ -1866,9 +1866,9 @@ bgp_create (as_t *as, const char *name)
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
{
bgp->route[afi][safi] = bgp_table_init ();
bgp->aggregate[afi][safi] = bgp_table_init ();
bgp->rib[afi][safi] = bgp_table_init ();
bgp->route[afi][safi] = bgp_table_init (afi, safi);
bgp->aggregate[afi][safi] = bgp_table_init (afi, safi);
bgp->rib[afi][safi] = bgp_table_init (afi, safi);
}
bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF;

View File

@ -40,7 +40,6 @@ struct bgp_master
/* work queues */
struct work_queue *process_main_queue;
struct work_queue *process_rsclient_queue;
struct work_queue *clear_node_queue;
/* BGP port number. */
u_int16_t port;
@ -387,6 +386,7 @@ struct peer
#define PEER_STATUS_GROUP (1 << 4) /* peer-group conf */
#define PEER_STATUS_NSF_MODE (1 << 5) /* NSF aware peer */
#define PEER_STATUS_NSF_WAIT (1 << 6) /* wait comeback peer */
#define PEER_STATUS_CLEARING (1 << 7) /* peers table being cleared */
/* Peer status af flags (reset in bgp_stop) */
u_int16_t af_sflags[AFI_MAX][SAFI_MAX];
@ -398,7 +398,6 @@ struct peer
#define PEER_STATUS_EOR_SEND (1 << 5) /* end-of-rib send to peer */
#define PEER_STATUS_EOR_RECEIVED (1 << 6) /* end-of-rib received from peer */
/* Default attribute value for the peer. */
u_int32_t config;
#define PEER_CONFIG_WEIGHT (1 << 0) /* Default weight. */
@ -433,7 +432,10 @@ struct peer
struct thread *t_pmax_restart;
struct thread *t_gr_restart;
struct thread *t_gr_stale;
/* workqueues */
struct work_queue *clear_node_queue;
/* Statistics field */
u_int32_t open_in; /* Open message input count */
u_int32_t open_out; /* Open message output count */