pimd: NHT upstream list is inefficient

The NHT upstream list at scale is horribly inefficient due to keeping
a sorted list of upstream entries.  The attempting to find
the upstream and the insertion of it into the upstream_list
was consuming a large amount of cpu cycles.

Convert to a hash, allow add/deletions to effectively become
O(1) events.

Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
This commit is contained in:
Donald Sharp 2017-07-12 18:17:31 -04:00
parent 6e1ef388f3
commit 7c59195031
5 changed files with 124 additions and 127 deletions

View File

@ -130,8 +130,9 @@ static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
pnc->rp_list = list_new(); pnc->rp_list = list_new();
pnc->rp_list->cmp = pim_rp_list_cmp; pnc->rp_list->cmp = pim_rp_list_cmp;
pnc->upstream_list = list_new(); pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
pnc->upstream_list->cmp = pim_upstream_compare; pim_upstream_equal,
"PNC Upstream Hash");
return pnc; return pnc;
} }
@ -187,11 +188,8 @@ int pim_find_or_track_nexthop(struct pim_instance *pim, struct prefix *addr,
listnode_add_sort(pnc->rp_list, rp); listnode_add_sort(pnc->rp_list, rp);
} }
if (up != NULL) { if (up != NULL)
ch_node = listnode_lookup(pnc->upstream_list, up); up = hash_get(pnc->upstream_hash, up, hash_alloc_intern);
if (ch_node == NULL)
listnode_add_sort(pnc->upstream_list, up);
}
if (pnc && CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) { if (pnc && CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache)); memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
@ -217,24 +215,24 @@ void pim_delete_tracked_nexthop(struct pim_instance *pim, struct prefix *addr,
if (rp) if (rp)
listnode_delete(pnc->rp_list, rp); listnode_delete(pnc->rp_list, rp);
if (up) if (up)
listnode_delete(pnc->upstream_list, up); hash_release(pnc->upstream_hash, up);
if (PIM_DEBUG_PIM_NHT) { if (PIM_DEBUG_PIM_NHT) {
char buf[PREFIX_STRLEN]; char buf[PREFIX_STRLEN];
prefix2str(addr, buf, sizeof buf); prefix2str(addr, buf, sizeof buf);
zlog_debug( zlog_debug(
"%s: NHT %s(%s) rp_list count:%d upstream_list count:%d ", "%s: NHT %s(%s) rp_list count:%d upstream count:%ld",
__PRETTY_FUNCTION__, buf, pim->vrf->name, __PRETTY_FUNCTION__, buf, pim->vrf->name,
pnc->rp_list->count, pnc->upstream_list->count); pnc->rp_list->count, pnc->upstream_hash->count);
} }
if (pnc->rp_list->count == 0 if (pnc->rp_list->count == 0
&& pnc->upstream_list->count == 0) { && pnc->upstream_hash->count == 0) {
pim_sendmsg_zebra_rnh(pim, zclient, pnc, pim_sendmsg_zebra_rnh(pim, zclient, pnc,
ZEBRA_NEXTHOP_UNREGISTER); ZEBRA_NEXTHOP_UNREGISTER);
list_delete(pnc->rp_list); list_delete(pnc->rp_list);
list_delete(pnc->upstream_list); hash_free(pnc->upstream_hash);
hash_release(pim->rpf_hash, pnc); hash_release(pim->rpf_hash, pnc);
if (pnc->nexthop) if (pnc->nexthop)
@ -306,44 +304,35 @@ void pim_resolve_upstream_nh(struct pim_instance *pim, struct prefix *nht_p)
} }
/* Update Upstream nexthop info based on Nexthop update received from Zebra.*/ /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
static int pim_update_upstream_nh(struct pim_instance *pim, static int pim_update_upstream_nh_helper(struct hash_backet *backet, void *arg)
struct pim_nexthop_cache *pnc)
{ {
struct listnode *up_node; struct pim_instance *pim = (struct pim_instance *)arg;
struct listnode *ifnode; struct pim_upstream *up = (struct pim_upstream *)backet->data;
struct listnode *up_nextnode;
struct listnode *node;
struct pim_upstream *up = NULL;
struct interface *ifp = NULL;
int vif_index = 0; int vif_index = 0;
for (ALL_LIST_ELEMENTS(pnc->upstream_list, up_node, up_nextnode, up)) {
enum pim_rpf_result rpf_result; enum pim_rpf_result rpf_result;
struct pim_rpf old; struct pim_rpf old;
old.source_nexthop.interface = up->rpf.source_nexthop.interface; old.source_nexthop.interface = up->rpf.source_nexthop.interface;
rpf_result = pim_rpf_update(pim, up, &old, 0); rpf_result = pim_rpf_update(pim, up, &old, 0);
if (rpf_result == PIM_RPF_FAILURE) if (rpf_result == PIM_RPF_FAILURE)
continue; return HASHWALK_CONTINUE;
/* update kernel multicast forwarding cache (MFC) */ /* update kernel multicast forwarding cache (MFC) */
if (up->channel_oil) { if (up->channel_oil) {
ifindex_t ifindex = ifindex_t ifindex = up->rpf.source_nexthop.interface->ifindex;
up->rpf.source_nexthop.interface->ifindex;
vif_index = vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
pim_if_find_vifindex_by_ifindex(pim, ifindex);
/* Pass Current selected NH vif index to mroute download /* Pass Current selected NH vif index to mroute download
*/ */
if (vif_index) if (vif_index)
pim_scan_individual_oil(up->channel_oil, pim_scan_individual_oil(up->channel_oil, vif_index);
vif_index);
else { else {
if (PIM_DEBUG_PIM_NHT) if (PIM_DEBUG_PIM_NHT)
zlog_debug( zlog_debug(
"%s: NHT upstream %s channel_oil IIF %s vif_index is not valid", "%s: NHT upstream %s channel_oil IIF %s vif_index is not valid",
__PRETTY_FUNCTION__, up->sg_str, __PRETTY_FUNCTION__, up->sg_str,
up->rpf.source_nexthop up->rpf.source_nexthop.interface->name);
.interface->name);
} }
} }
@ -353,8 +342,7 @@ static int pim_update_upstream_nh(struct pim_instance *pim,
nbr = pim_neighbor_find(old.source_nexthop.interface, nbr = pim_neighbor_find(old.source_nexthop.interface,
old.rpf_addr.u.prefix4); old.rpf_addr.u.prefix4);
if (nbr) if (nbr)
pim_jp_agg_remove_group(nbr->upstream_jp_agg, pim_jp_agg_remove_group(nbr->upstream_jp_agg, up);
up);
/* /*
* We have detected a case where we might need to rescan * We have detected a case where we might need to rescan
@ -378,44 +366,49 @@ static int pim_update_upstream_nh(struct pim_instance *pim,
__PRETTY_FUNCTION__); __PRETTY_FUNCTION__);
/* /*
RFC 4601: 4.5.7. Sending (S,G) Join/Prune * RFC 4601: 4.5.7. Sending (S,G) Join/Prune Messages
Messages *
* Transitions from Joined State
Transitions from Joined State *
* RPF'(S,G) changes not due to an Assert
RPF'(S,G) changes not due to an Assert *
* The upstream (S,G) state machine remains in Joined
The upstream (S,G) state machine remains in * state. Send Join(S,G) to the new upstream
Joined * neighbor, which is the new value of RPF'(S,G).
state. Send Join(S,G) to the new upstream * Send Prune(S,G) to the old upstream neighbor, which
neighbor, which is * is the old value of RPF'(S,G). Set the Join
the new value of RPF'(S,G). Send Prune(S,G) * Timer (JT) to expire after t_periodic seconds.
to the old
upstream neighbor, which is the old value of
RPF'(S,G). Set
the Join Timer (JT) to expire after
t_periodic seconds.
*/ */
pim_jp_agg_switch_interface(&old, &up->rpf, up); pim_jp_agg_switch_interface(&old, &up->rpf, up);
pim_upstream_join_timer_restart(up, &old); pim_upstream_join_timer_restart(up, &old);
} /* up->join_state == PIM_UPSTREAM_JOINED */ } /* up->join_state == PIM_UPSTREAM_JOINED */
/* FIXME can join_desired actually be changed by /*
pim_rpf_update() * FIXME can join_desired actually be changed by
returning PIM_RPF_CHANGED ? */ * pim_rpf_update() returning PIM_RPF_CHANGED ?
*/
pim_upstream_update_join_desired(pim, up); pim_upstream_update_join_desired(pim, up);
} /* PIM_RPF_CHANGED */ } /* PIM_RPF_CHANGED */
if (PIM_DEBUG_PIM_NHT) { if (PIM_DEBUG_PIM_NHT) {
zlog_debug( zlog_debug("%s: NHT upstream %s(%s) old ifp %s new ifp %s",
"%s: NHT upstream %s(%s) old ifp %s new ifp %s",
__PRETTY_FUNCTION__, up->sg_str, pim->vrf->name, __PRETTY_FUNCTION__, up->sg_str, pim->vrf->name,
old.source_nexthop.interface->name, old.source_nexthop.interface->name,
up->rpf.source_nexthop.interface->name); up->rpf.source_nexthop.interface->name);
} }
} /* for (pnc->upstream_list) */
return HASHWALK_CONTINUE;
}
static int pim_update_upstream_nh(struct pim_instance *pim,
struct pim_nexthop_cache *pnc)
{
struct listnode *node, *ifnode;
struct interface *ifp;
hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
for (ALL_LIST_ELEMENTS_RO(vrf_iflist(pim->vrf_id), ifnode, ifp)) for (ALL_LIST_ELEMENTS_RO(vrf_iflist(pim->vrf_id), ifnode, ifp))
if (ifp->info) { if (ifp->info) {
@ -809,9 +802,9 @@ int pim_parse_nexthop_update(int command, struct zclient *zclient,
char buf[PREFIX2STR_BUFFER]; char buf[PREFIX2STR_BUFFER];
prefix2str(&p, buf, sizeof(buf)); prefix2str(&p, buf, sizeof(buf));
zlog_debug( zlog_debug(
"%s: NHT Update for %s(%s) num_nh %d num_pim_nh %d vrf:%d up %d rp %d", "%s: NHT Update for %s(%s) num_nh %d num_pim_nh %d vrf:%d up %ld rp %d",
__PRETTY_FUNCTION__, buf, pim->vrf->name, nexthop_num, __PRETTY_FUNCTION__, buf, pim->vrf->name, nexthop_num,
pnc->nexthop_num, vrf_id, listcount(pnc->upstream_list), pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
listcount(pnc->rp_list)); listcount(pnc->rp_list));
} }
@ -819,7 +812,7 @@ int pim_parse_nexthop_update(int command, struct zclient *zclient,
if (listcount(pnc->rp_list)) if (listcount(pnc->rp_list))
pim_update_rp_nh(pim, pnc); pim_update_rp_nh(pim, pnc);
if (listcount(pnc->upstream_list)) if (pnc->upstream_hash->count)
pim_update_upstream_nh(pim, pnc); pim_update_upstream_nh(pim, pnc);
return 0; return 0;

View File

@ -43,7 +43,7 @@ struct pim_nexthop_cache {
#define PIM_NEXTHOP_VALID (1 << 0) #define PIM_NEXTHOP_VALID (1 << 0)
struct list *rp_list; struct list *rp_list;
struct list *upstream_list; struct hash *upstream_hash;
}; };
int pim_parse_nexthop_update(int command, struct zclient *zclient, int pim_parse_nexthop_update(int command, struct zclient *zclient,

View File

@ -48,13 +48,14 @@
/* Cleanup pim->rpf_hash each node data */ /* Cleanup pim->rpf_hash each node data */
void pim_rp_list_hash_clean(void *data) void pim_rp_list_hash_clean(void *data)
{ {
struct pim_nexthop_cache *pnc; struct pim_nexthop_cache *pnc = (struct pim_nexthop_cache *)data;
list_delete(pnc->rp_list); list_delete(pnc->rp_list);
pnc->rp_list = NULL; pnc->rp_list = NULL;
list_delete(pnc->upstream_list); hash_clean(pnc->upstream_hash, NULL);
pnc->upstream_list = NULL; hash_free(pnc->upstream_hash);
pnc->upstream_hash = NULL;
XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc); XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
} }

View File

@ -1527,7 +1527,7 @@ void pim_upstream_find_new_rpf(struct pim_instance *pim)
} }
} }
static unsigned int pim_upstream_hash_key(void *arg) unsigned int pim_upstream_hash_key(void *arg)
{ {
struct pim_upstream *up = (struct pim_upstream *)arg; struct pim_upstream *up = (struct pim_upstream *)arg;
@ -1545,7 +1545,7 @@ void pim_upstream_terminate(struct pim_instance *pim)
pim->upstream_hash = NULL; pim->upstream_hash = NULL;
} }
static int pim_upstream_equal(const void *arg1, const void *arg2) int pim_upstream_equal(const void *arg1, const void *arg2)
{ {
const struct pim_upstream *up1 = (const struct pim_upstream *)arg1; const struct pim_upstream *up1 = (const struct pim_upstream *)arg1;
const struct pim_upstream *up2 = (const struct pim_upstream *)arg2; const struct pim_upstream *up2 = (const struct pim_upstream *)arg2;

View File

@ -222,4 +222,7 @@ void pim_upstream_remove_lhr_star_pimreg(struct pim_instance *pim,
void pim_upstream_spt_prefix_list_update(struct pim_instance *pim, void pim_upstream_spt_prefix_list_update(struct pim_instance *pim,
struct prefix_list *pl); struct prefix_list *pl);
unsigned int pim_upstream_hash_key(void *arg);
int pim_upstream_equal(const void *arg1, const void *arg2);
#endif /* PIM_UPSTREAM_H */ #endif /* PIM_UPSTREAM_H */