From 9dc1cb0424d5b7033dd9a58aaf2165d302e20fd6 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 19 Feb 2020 09:57:43 -0500 Subject: [PATCH 01/14] pimd: Put subdir.am into alphabetical order The subdir.am had lost alphabetical order. Put this back for easy finding in the future. Signed-off-by: Donald Sharp --- pimd/subdir.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pimd/subdir.am b/pimd/subdir.am index b5d135d032..db43f0a7c6 100644 --- a/pimd/subdir.am +++ b/pimd/subdir.am @@ -34,6 +34,7 @@ pimd_libpim_a_SOURCES = \ pimd/pim_jp_agg.c \ pimd/pim_macro.c \ pimd/pim_memory.c \ + pimd/pim_mlag.c \ pimd/pim_mroute.c \ pimd/pim_msdp.c \ pimd/pim_msdp_packet.c \ @@ -62,7 +63,6 @@ pimd_libpim_a_SOURCES = \ pimd/pim_zebra.c \ pimd/pim_zlookup.c \ pimd/pim_vxlan.c \ - pimd/pim_mlag.c \ pimd/pimd.c \ # end @@ -88,6 +88,7 @@ noinst_HEADERS += \ pimd/pim_jp_agg.h \ pimd/pim_macro.h \ pimd/pim_memory.h \ + pimd/pim_mlag.h \ pimd/pim_mroute.h \ pimd/pim_msdp.h \ pimd/pim_msdp_packet.h \ @@ -115,7 +116,6 @@ noinst_HEADERS += \ pimd/pim_zebra.h \ pimd/pim_zlookup.h \ pimd/pim_vxlan.h \ - pimd/pim_mlag.h \ pimd/pim_vxlan_instance.h \ pimd/pimd.h \ pimd/mtracebis_netlink.h \ From a054f6d73e43f70e7e10f026d7cc3f3b09f3fc09 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 20 Dec 2019 07:57:28 -0500 Subject: [PATCH 02/14] pimd: There is nothing to do with a WRVIFWHOLE for iifp of pimreg When the WRVIFWHOLE callback is made with a iifp of the pimreg device we *know* that the packet is a PIM Register packet ( see net/ipv4/ipmr.c for kernel behavior ). As such we know that we will shortly read the pim register packet and handle it through those mechanics. There is nothing to do here so we can move along. Ticket: CM-27729 Signed-off-by: Donald Sharp --- pimd/pim_mroute.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pimd/pim_mroute.c b/pimd/pim_mroute.c index 14f8a8312b..d162f0f52e 100644 --- a/pimd/pim_mroute.c +++ b/pimd/pim_mroute.c @@ -443,6 +443,7 @@ static int pim_mroute_msg_wrvifwhole(int fd, struct interface *ifp, { const struct ip *ip_hdr = (const struct ip *)buf; struct pim_interface *pim_ifp; + struct pim_instance *pim; struct pim_ifchannel *ch; struct pim_upstream *up; struct prefix_sg star_g; @@ -465,16 +466,18 @@ static int pim_mroute_msg_wrvifwhole(int fd, struct interface *ifp, star_g = sg; star_g.src.s_addr = INADDR_ANY; -#if 0 - ch = pim_ifchannel_find(ifp, &star_g); - if (ch) - { - if (PIM_DEBUG_MROUTE) - zlog_debug ("WRVIFWHOLE (*,G)=%s found ifchannel on interface %s", - pim_str_sg_dump (&star_g), ifp->name); - return -1; - } -#endif + + pim = pim_ifp->pim; + /* + * If the incoming interface is the pimreg, then + * we know the callback is associated with a pim register + * packet and there is nothing to do here as that + * normal pim processing will see the packet and allow + * us to do the right thing. + */ + if (ifp == pim->regiface) { + return 0; + } up = pim_upstream_find(pim_ifp->pim, &sg); if (up) { From 17823cdd2c943746ffee0a6bf87b75a3a8b1c0ff Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 19 Feb 2020 09:52:17 -0500 Subject: [PATCH 03/14] pimd: Add accidently missed code during upstreaming process There was some code missed during the upstreaming process due to code squash. Identify and put into a commit to keep code consistent and correct. Signed-off-by: Satheesh Kumar K Signed-off-by: Anuradha Karuppiah Signed-off-by: Donald Sharp --- pimd/pim_cmd.c | 269 ++++++++++++++++++++----------------- pimd/pim_ifchannel.c | 3 + pimd/pim_mlag.c | 19 +-- pimd/pim_mlag.h | 6 + pimd/pim_oil.c | 5 +- pimd/pim_oil.h | 4 +- pimd/pim_vty.c | 5 + pimd/pim_zpthread.c | 225 +++++++++++++++++++++++++++++++ pimd/pimd.c | 2 + pimd/subdir.am | 1 + zebra/zebra_mlag.h | 1 + zebra/zebra_mlag_private.c | 2 + 12 files changed, 396 insertions(+), 146 deletions(-) create mode 100644 pimd/pim_zpthread.c diff --git a/pimd/pim_cmd.c b/pimd/pim_cmd.c index a3e5151a78..2de5a7923b 100644 --- a/pimd/pim_cmd.c +++ b/pimd/pim_cmd.c @@ -909,7 +909,7 @@ static void igmp_show_interface_join(struct pim_instance *pim, struct vty *vty) static void pim_show_interfaces_single(struct pim_instance *pim, struct vty *vty, const char *ifname, - bool uj) + bool mlag, bool uj) { struct in_addr ifaddr; struct interface *ifp; @@ -952,6 +952,9 @@ static void pim_show_interfaces_single(struct pim_instance *pim, if (!pim_ifp) continue; + if (mlag == true && pim_ifp->activeactive == false) + continue; + if (strcmp(ifname, "detail") && strcmp(ifname, ifp->name)) continue; @@ -1380,7 +1383,7 @@ static void igmp_show_statistics(struct pim_instance *pim, struct vty *vty, } static void pim_show_interfaces(struct pim_instance *pim, struct vty *vty, - bool uj) + bool mlag, bool uj) { struct interface *ifp; struct pim_interface *pim_ifp; @@ -1400,6 +1403,9 @@ static void pim_show_interfaces(struct pim_instance *pim, struct vty *vty, if (!pim_ifp) continue; + if (mlag == true && pim_ifp->activeactive == false) + continue; + pim_nbrs = pim_ifp->pim_neighbor_list->count; pim_ifchannels = pim_if_ifchannel_count(pim_ifp); fhr = 0; @@ -4295,6 +4301,113 @@ DEFUN (show_ip_igmp_statistics, return CMD_SUCCESS; } +DEFUN (show_ip_pim_mlag_summary, + show_ip_pim_mlag_summary_cmd, + "show ip pim mlag summary [json]", + SHOW_STR + IP_STR + PIM_STR + "MLAG\n" + "status and stats\n" + JSON_STR) +{ + bool uj = use_json(argc, argv); + char role_buf[MLAG_ROLE_STRSIZE]; + char addr_buf[INET_ADDRSTRLEN]; + + if (uj) { + json_object *json = NULL; + json_object *json_stat = NULL; + + json = json_object_new_object(); + if (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) + json_object_boolean_true_add(json, "mlagConnUp"); + if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) + json_object_boolean_true_add(json, "mlagPeerConnUp"); + if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) + json_object_boolean_true_add(json, "mlagPeerZebraUp"); + json_object_string_add(json, "mlagRole", + mlag_role2str(router->mlag_role, + role_buf, sizeof(role_buf))); + inet_ntop(AF_INET, &router->local_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + json_object_string_add(json, "localVtepIp", addr_buf); + inet_ntop(AF_INET, &router->anycast_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + json_object_string_add(json, "anycastVtepIp", addr_buf); + json_object_string_add(json, "peerlinkRif", + router->peerlink_rif); + + json_stat = json_object_new_object(); + json_object_int_add(json_stat, "mlagConnFlaps", + router->mlag_stats.mlagd_session_downs); + json_object_int_add(json_stat, "mlagPeerConnFlaps", + router->mlag_stats.peer_session_downs); + json_object_int_add(json_stat, "mlagPeerZebraFlaps", + router->mlag_stats.peer_zebra_downs); + json_object_int_add(json_stat, "mrouteAddRx", + router->mlag_stats.msg.mroute_add_rx); + json_object_int_add(json_stat, "mrouteAddTx", + router->mlag_stats.msg.mroute_add_tx); + json_object_int_add(json_stat, "mrouteDelRx", + router->mlag_stats.msg.mroute_del_rx); + json_object_int_add(json_stat, "mrouteDelTx", + router->mlag_stats.msg.mroute_del_tx); + json_object_int_add(json_stat, "mlagStatusUpdates", + router->mlag_stats.msg.mlag_status_updates); + json_object_int_add(json_stat, "peerZebraStatusUpdates", + router->mlag_stats.msg.peer_zebra_status_updates); + json_object_int_add(json_stat, "pimStatusUpdates", + router->mlag_stats.msg.pim_status_updates); + json_object_int_add(json_stat, "vxlanUpdates", + router->mlag_stats.msg.vxlan_updates); + json_object_object_add(json, "connStats", json_stat); + + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + return CMD_SUCCESS; + } + + vty_out(vty, "MLAG daemon connection: %s\n", + (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) + ? "up" : "down"); + vty_out(vty, "MLAG peer state: %s\n", + (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) + ? "up" : "down"); + vty_out(vty, "Zebra peer state: %s\n", + (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) + ? "up" : "down"); + vty_out(vty, "MLAG role: %s\n", + mlag_role2str(router->mlag_role, role_buf, sizeof(role_buf))); + inet_ntop(AF_INET, &router->local_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + vty_out(vty, "Local VTEP IP: %s\n", addr_buf); + inet_ntop(AF_INET, &router->anycast_vtep_ip, + addr_buf, INET_ADDRSTRLEN); + vty_out(vty, "Anycast VTEP IP: %s\n", addr_buf); + vty_out(vty, "Peerlink: %s\n", router->peerlink_rif); + vty_out(vty, "Session flaps: mlagd: %d mlag-peer: %d zebra-peer: %d\n", + router->mlag_stats.mlagd_session_downs, + router->mlag_stats.peer_session_downs, + router->mlag_stats.peer_zebra_downs); + vty_out(vty, "Message Statistics:\n"); + vty_out(vty, " mroute adds: rx: %d, tx: %d\n", + router->mlag_stats.msg.mroute_add_rx, + router->mlag_stats.msg.mroute_add_tx); + vty_out(vty, " mroute dels: rx: %d, tx: %d\n", + router->mlag_stats.msg.mroute_del_rx, + router->mlag_stats.msg.mroute_del_tx); + vty_out(vty, " peer zebra status updates: %d\n", + router->mlag_stats.msg.peer_zebra_status_updates); + vty_out(vty, " PIM status updates: %d\n", + router->mlag_stats.msg.pim_status_updates); + vty_out(vty, " VxLAN updates: %d\n", + router->mlag_stats.msg.vxlan_updates); + + return CMD_SUCCESS; +} + DEFUN (show_ip_pim_assert, show_ip_pim_assert_cmd, "show ip pim [vrf NAME] assert", @@ -4377,10 +4490,11 @@ DEFUN (show_ip_pim_assert_winner_metric, DEFUN (show_ip_pim_interface, show_ip_pim_interface_cmd, - "show ip pim [vrf NAME] interface [detail|WORD] [json]", + "show ip pim [mlag] [vrf NAME] interface [detail|WORD] [json]", SHOW_STR IP_STR PIM_STR + "MLAG\n" VRF_CMD_HELP_STR "PIM interface information\n" "Detailed output\n" @@ -4390,36 +4504,47 @@ DEFUN (show_ip_pim_interface, int idx = 2; struct vrf *vrf = pim_cmd_lookup_vrf(vty, argv, argc, &idx); bool uj = use_json(argc, argv); + bool mlag = false; if (!vrf) return CMD_WARNING; + if (argv_find(argv, argc, "mlag", &idx)) + mlag = true; + if (argv_find(argv, argc, "WORD", &idx) || argv_find(argv, argc, "detail", &idx)) - pim_show_interfaces_single(vrf->info, vty, argv[idx]->arg, uj); + pim_show_interfaces_single(vrf->info, vty, argv[idx]->arg, mlag, + uj); else - pim_show_interfaces(vrf->info, vty, uj); + pim_show_interfaces(vrf->info, vty, mlag, uj); return CMD_SUCCESS; } DEFUN (show_ip_pim_interface_vrf_all, show_ip_pim_interface_vrf_all_cmd, - "show ip pim vrf all interface [detail|WORD] [json]", + "show ip pim [mlag] vrf all interface [detail|WORD] [json]", SHOW_STR IP_STR PIM_STR + "MLAG\n" VRF_CMD_HELP_STR "PIM interface information\n" "Detailed output\n" "interface name\n" JSON_STR) { - int idx = 6; + int idx = 2; bool uj = use_json(argc, argv); struct vrf *vrf; bool first = true; + bool mlag = false; + if (argv_find(argv, argc, "mlag", &idx)) + mlag = true; + + idx = 6; if (uj) vty_out(vty, "{ "); RB_FOREACH (vrf, vrf_name_head, &vrfs_by_name) { @@ -4433,9 +4558,9 @@ DEFUN (show_ip_pim_interface_vrf_all, if (argv_find(argv, argc, "WORD", &idx) || argv_find(argv, argc, "detail", &idx)) pim_show_interfaces_single(vrf->info, vty, - argv[idx]->arg, uj); + argv[idx]->arg, mlag, uj); else - pim_show_interfaces(vrf->info, vty, uj); + pim_show_interfaces(vrf->info, vty, mlag, uj); } if (uj) vty_out(vty, "}\n"); @@ -4625,113 +4750,6 @@ DEFUN (show_ip_pim_local_membership, return CMD_SUCCESS; } -DEFUN (show_ip_pim_mlag_summary, - show_ip_pim_mlag_summary_cmd, - "show ip pim mlag summary [json]", - SHOW_STR - IP_STR - PIM_STR - "MLAG\n" - "status and stats\n" - JSON_STR) -{ - bool uj = use_json(argc, argv); - char role_buf[MLAG_ROLE_STRSIZE]; - char addr_buf[INET_ADDRSTRLEN]; - - if (uj) { - json_object *json = NULL; - json_object *json_stat = NULL; - - json = json_object_new_object(); - if (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) - json_object_boolean_true_add(json, "mlagConnUp"); - if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) - json_object_boolean_true_add(json, "mlagPeerConnUp"); - if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) - json_object_boolean_true_add(json, "mlagPeerZebraUp"); - json_object_string_add(json, "mlagRole", - mlag_role2str(router->mlag_role, - role_buf, sizeof(role_buf))); - inet_ntop(AF_INET, &router->local_vtep_ip, - addr_buf, INET_ADDRSTRLEN); - json_object_string_add(json, "localVtepIp", addr_buf); - inet_ntop(AF_INET, &router->anycast_vtep_ip, - addr_buf, INET_ADDRSTRLEN); - json_object_string_add(json, "anycastVtepIp", addr_buf); - json_object_string_add(json, "peerlinkRif", - router->peerlink_rif); - - json_stat = json_object_new_object(); - json_object_int_add(json_stat, "mlagConnFlaps", - router->mlag_stats.mlagd_session_downs); - json_object_int_add(json_stat, "mlagPeerConnFlaps", - router->mlag_stats.peer_session_downs); - json_object_int_add(json_stat, "mlagPeerZebraFlaps", - router->mlag_stats.peer_zebra_downs); - json_object_int_add(json_stat, "mrouteAddRx", - router->mlag_stats.msg.mroute_add_rx); - json_object_int_add(json_stat, "mrouteAddTx", - router->mlag_stats.msg.mroute_add_tx); - json_object_int_add(json_stat, "mrouteDelRx", - router->mlag_stats.msg.mroute_del_rx); - json_object_int_add(json_stat, "mrouteDelTx", - router->mlag_stats.msg.mroute_del_tx); - json_object_int_add(json_stat, "mlagStatusUpdates", - router->mlag_stats.msg.mlag_status_updates); - json_object_int_add(json_stat, "peerZebraStatusUpdates", - router->mlag_stats.msg.peer_zebra_status_updates); - json_object_int_add(json_stat, "pimStatusUpdates", - router->mlag_stats.msg.pim_status_updates); - json_object_int_add(json_stat, "vxlanUpdates", - router->mlag_stats.msg.vxlan_updates); - json_object_object_add(json, "connStats", json_stat); - - vty_out(vty, "%s\n", json_object_to_json_string_ext( - json, JSON_C_TO_STRING_PRETTY)); - json_object_free(json); - return CMD_SUCCESS; - } - - vty_out(vty, "MLAG daemon connection: %s\n", - (router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP) - ? "up" : "down"); - vty_out(vty, "MLAG peer state: %s\n", - (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) - ? "up" : "down"); - vty_out(vty, "Zebra peer state: %s\n", - (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) - ? "up" : "down"); - vty_out(vty, "MLAG role: %s\n", - mlag_role2str(router->mlag_role, role_buf, sizeof(role_buf))); - inet_ntop(AF_INET, &router->local_vtep_ip, - addr_buf, INET_ADDRSTRLEN); - vty_out(vty, "Local VTEP IP: %s\n", addr_buf); - inet_ntop(AF_INET, &router->anycast_vtep_ip, - addr_buf, INET_ADDRSTRLEN); - vty_out(vty, "Anycast VTEP IP: %s\n", addr_buf); - vty_out(vty, "Peerlink: %s\n", router->peerlink_rif); - vty_out(vty, "Session flaps: mlagd: %d mlag-peer: %d zebra-peer: %d\n", - router->mlag_stats.mlagd_session_downs, - router->mlag_stats.peer_session_downs, - router->mlag_stats.peer_zebra_downs); - vty_out(vty, "Message Statistics:\n"); - vty_out(vty, " mroute adds: rx: %d, tx: %d\n", - router->mlag_stats.msg.mroute_add_rx, - router->mlag_stats.msg.mroute_add_tx); - vty_out(vty, " mroute dels: rx: %d, tx: %d\n", - router->mlag_stats.msg.mroute_del_rx, - router->mlag_stats.msg.mroute_del_tx); - vty_out(vty, " peer zebra status updates: %d\n", - router->mlag_stats.msg.peer_zebra_status_updates); - vty_out(vty, " PIM status updates: %d\n", - router->mlag_stats.msg.pim_status_updates); - vty_out(vty, " VxLAN updates: %d\n", - router->mlag_stats.msg.vxlan_updates); - - return CMD_SUCCESS; -} - static void pim_show_mlag_up_entry_detail(struct vrf *vrf, struct vty *vty, struct pim_upstream *up, char *src_str, char *grp_str, json_object *json) @@ -8035,13 +8053,13 @@ DEFPY_HIDDEN (pim_test_sg_keepalive, return CMD_SUCCESS; } -DEFPY_HIDDEN (interface_ip_pim_activeactive, - interface_ip_pim_activeactive_cmd, - "[no$no] ip pim active-active", - NO_STR - IP_STR - PIM_STR - "Mark interface as Active-Active for MLAG operations, Hidden because not finished yet\n") +DEFPY (interface_ip_pim_activeactive, + interface_ip_pim_activeactive_cmd, + "[no$no] ip pim active-active", + NO_STR + IP_STR + PIM_STR + "Mark interface as Active-Active for MLAG operations, Hidden because not finished yet\n") { VTY_DECLVAR_CONTEXT(interface, ifp); struct pim_interface *pim_ifp; @@ -8051,6 +8069,11 @@ DEFPY_HIDDEN (interface_ip_pim_activeactive, return CMD_WARNING_CONFIG_FAILED; } + + if (PIM_DEBUG_MLAG) + zlog_debug("%sConfiguring PIM active-active on Interface: %s", + no ? "Un-":" ", ifp->name); + pim_ifp = ifp->info; if (no) pim_if_unconfigure_mlag_dualactive(pim_ifp); diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index ead9d6dbcc..528c93ce16 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -1073,6 +1073,9 @@ int pim_ifchannel_local_membership_add(struct interface *ifp, } } + /* vxlan term mroutes use ipmr-lo as local member to + * pull down multicast vxlan tunnel traffic + */ up_flags = is_vxlan ? PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM : PIM_UPSTREAM_FLAG_MASK_SRC_IGMP; ch = pim_ifchannel_add(ifp, sg, 0, up_flags); diff --git a/pimd/pim_mlag.c b/pimd/pim_mlag.c index cbde45abb8..defe41674c 100644 --- a/pimd/pim_mlag.c +++ b/pimd/pim_mlag.c @@ -312,14 +312,6 @@ static void pim_mlag_up_peer_del_all(void) list_delete(&temp); } -static int pim_mlag_signal_zpthread(void) -{ - /* XXX - This is a temporary stub; the MLAG thread code is planned for - * a separate commit - */ - return (0); -} - /* Send upstream entry to the local MLAG daemon (which will subsequently * send it to the peer MLAG switch). */ @@ -872,7 +864,7 @@ static int pim_mlag_deregister_handler(struct thread *thread) void pim_mlag_deregister(void) { /* if somebody still interested in the MLAG channel skip de-reg */ - if (router->pim_mlag_intf_cnt) + if (router->pim_mlag_intf_cnt || pim_vxlan_do_mlag_reg()) return; /* not registered; nothing do */ @@ -890,10 +882,6 @@ void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp) if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true) return; - if (PIM_DEBUG_MLAG) - zlog_debug("%s: Configuring active-active on Interface: %s", - __func__, "NULL"); - pim_ifp->activeactive = true; if (pim_ifp->pim) pim_ifp->pim->inst_mlag_intf_cnt++; @@ -919,10 +907,6 @@ void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp) if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false) return; - if (PIM_DEBUG_MLAG) - zlog_debug("%s: UnConfiguring active-active on Interface: %s", - __func__, "NULL"); - pim_ifp->activeactive = false; pim_ifp->pim->inst_mlag_intf_cnt--; @@ -939,6 +923,7 @@ void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp) * De-register to Zebra */ pim_mlag_deregister(); + pim_mlag_param_reset(); } } diff --git a/pimd/pim_mlag.h b/pimd/pim_mlag.h index dab29cc9a2..4639f56826 100644 --- a/pimd/pim_mlag.h +++ b/pimd/pim_mlag.h @@ -37,6 +37,12 @@ extern void pim_mlag_deregister(void); extern int pim_zebra_mlag_process_up(void); extern int pim_zebra_mlag_process_down(void); extern int pim_zebra_mlag_handle_msg(struct stream *msg, int len); + +/* pm_zpthread.c */ +extern int pim_mlag_signal_zpthread(void); +extern void pim_zpthread_init(void); +extern void pim_zpthread_terminate(void); + extern void pim_mlag_up_local_add(struct pim_instance *pim, struct pim_upstream *upstream); extern void pim_mlag_up_local_del(struct pim_instance *pim, diff --git a/pimd/pim_oil.c b/pimd/pim_oil.c index 0618308ba8..b165bcbee7 100644 --- a/pimd/pim_oil.c +++ b/pimd/pim_oil.c @@ -33,9 +33,6 @@ #include "pim_time.h" #include "pim_vxlan.h" -// struct list *pim_channel_oil_list = NULL; -// struct hash *pim_channel_oil_hash = NULL; - static void pim_channel_update_mute(struct channel_oil *c_oil); char *pim_channel_oil_dump(struct channel_oil *c_oil, char *buf, size_t size) @@ -174,7 +171,7 @@ struct channel_oil *pim_channel_oil_add(struct pim_instance *pim, } struct channel_oil *pim_channel_oil_del(struct channel_oil *c_oil, - const char *name) + const char *name) { if (PIM_DEBUG_MROUTE) { struct prefix_sg sg = {.src = c_oil->oil.mfcc_mcastgrp, diff --git a/pimd/pim_oil.h b/pimd/pim_oil.h index 788ddaa16c..8a808afa73 100644 --- a/pimd/pim_oil.h +++ b/pimd/pim_oil.h @@ -130,7 +130,7 @@ void pim_channel_oil_change_iif(struct pim_instance *pim, struct channel_oil *c_oil, int input_vif_index, const char *name); struct channel_oil *pim_channel_oil_del(struct channel_oil *c_oil, - const char *name); + const char *name); int pim_channel_add_oif(struct channel_oil *c_oil, struct interface *oif, uint32_t proto_mask, const char *caller); @@ -146,6 +146,6 @@ void pim_channel_update_oif_mute(struct channel_oil *c_oil, void pim_channel_oil_upstream_deref(struct channel_oil *c_oil); void pim_channel_del_inherited_oif(struct channel_oil *c_oil, - struct interface *oif, const char *caller); + struct interface *oif, const char *caller); #endif /* PIM_OIL_H */ diff --git a/pimd/pim_vty.c b/pimd/pim_vty.c index b5a5089ae7..8a87dfbb55 100644 --- a/pimd/pim_vty.c +++ b/pimd/pim_vty.c @@ -117,6 +117,11 @@ int pim_debug_config_write(struct vty *vty) ++writes; } + if (PIM_DEBUG_MLAG) { + vty_out(vty, "debug pim mlag\n"); + ++writes; + } + if (PIM_DEBUG_BSM) { vty_out(vty, "debug pim bsm\n"); ++writes; diff --git a/pimd/pim_zpthread.c b/pimd/pim_zpthread.c new file mode 100644 index 0000000000..518b024749 --- /dev/null +++ b/pimd/pim_zpthread.c @@ -0,0 +1,225 @@ +/* + * PIM for Quagga + * Copyright (C) 2008 Everton da Silva Marques + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include "pimd.h" +#include "pim_mlag.h" +#include "pim_zebra.h" + +extern struct zclient *zclient; + +#define PIM_MLAG_POST_LIMIT 100 + +int32_t mlag_bulk_cnt; + +static void pim_mlag_zebra_fill_header(enum mlag_msg_type msg_type) +{ + uint32_t fill_msg_type = msg_type; + uint16_t data_len; + uint16_t msg_cnt = 1; + + if (msg_type == MLAG_MSG_NONE) + return; + + switch (msg_type) { + case MLAG_REGISTER: + case MLAG_DEREGISTER: + data_len = sizeof(struct mlag_msg); + break; + case MLAG_MROUTE_ADD: + data_len = sizeof(struct mlag_mroute_add); + fill_msg_type = MLAG_MROUTE_ADD_BULK; + break; + case MLAG_MROUTE_DEL: + data_len = sizeof(struct mlag_mroute_del); + fill_msg_type = MLAG_MROUTE_DEL_BULK; + break; + default: + data_len = 0; + break; + } + + stream_reset(router->mlag_stream); + /* ADD Hedaer */ + stream_putl(router->mlag_stream, fill_msg_type); + /* + * In case of Bulk actual size & msg_cnt will be updated + * just before writing onto zebra + */ + stream_putw(router->mlag_stream, data_len); + stream_putw(router->mlag_stream, msg_cnt); + + if (PIM_DEBUG_MLAG) + zlog_debug(":%s: msg_type: %d/%d len %d", + __func__, msg_type, fill_msg_type, data_len); +} + +static void pim_mlag_zebra_flush_buffer(void) +{ + uint32_t msg_type; + + /* Stream had bulk messages update the Hedaer */ + if (mlag_bulk_cnt > 1) { + /* + * No need to reset the pointer, below api reads from data[0] + */ + STREAM_GETL(router->mlag_stream, msg_type); + if (msg_type == MLAG_MROUTE_ADD_BULK) { + stream_putw_at( + router->mlag_stream, 4, + (mlag_bulk_cnt * sizeof(struct mlag_mroute_add))); + stream_putw_at(router->mlag_stream, 6, mlag_bulk_cnt); + } else if (msg_type == MLAG_MROUTE_DEL_BULK) { + stream_putw_at( + router->mlag_stream, 4, + (mlag_bulk_cnt * sizeof(struct mlag_mroute_del))); + stream_putw_at(router->mlag_stream, 6, mlag_bulk_cnt); + } else { + flog_err(EC_LIB_ZAPI_ENCODE, + "unknown bulk message type %d bulk_count %d", + msg_type, mlag_bulk_cnt); + stream_reset(router->mlag_stream); + mlag_bulk_cnt = 0; + return; + } + } + + zclient_send_mlag_data(zclient, router->mlag_stream); +stream_failure: + stream_reset(router->mlag_stream); + mlag_bulk_cnt = 0; +} + +/* + * Only ROUTE add & Delete will be bulked. + * Buffer will be flushed, when + * 1) there were no messages in the queue + * 2) Curr_msg_type != prev_msg_type + */ + +static void pim_mlag_zebra_check_for_buffer_flush(uint32_t curr_msg_type, + uint32_t prev_msg_type) +{ + /* First Message, keep bulking */ + if (prev_msg_type == MLAG_MSG_NONE) { + mlag_bulk_cnt = 1; + return; + } + + /*msg type is route add & delete, keep bulking */ + if (curr_msg_type == prev_msg_type + && (curr_msg_type == MLAG_MROUTE_ADD + || curr_msg_type == MLAG_MROUTE_DEL)) { + mlag_bulk_cnt++; + return; + } + + pim_mlag_zebra_flush_buffer(); +} + +/* + * Thsi thread reads the clients data from the Gloabl queue and encodes with + * protobuf and pass on to the MLAG socket. + */ +static int pim_mlag_zthread_handler(struct thread *event) +{ + struct stream *read_s; + uint32_t wr_count = 0; + uint32_t prev_msg_type = MLAG_MSG_NONE; + uint32_t curr_msg_type = MLAG_MSG_NONE; + + router->zpthread_mlag_write = NULL; + wr_count = stream_fifo_count_safe(router->mlag_fifo); + + if (PIM_DEBUG_MLAG) + zlog_debug(":%s: Processing MLAG write, %d messages in queue", + __func__, wr_count); + + if (wr_count == 0) + return 0; + + for (wr_count = 0; wr_count < PIM_MLAG_POST_LIMIT; wr_count++) { + /* FIFO is empty,wait for teh message to be add */ + if (stream_fifo_count_safe(router->mlag_fifo) == 0) + break; + + read_s = stream_fifo_pop_safe(router->mlag_fifo); + if (!read_s) { + zlog_debug(":%s: Got a NULL Messages, some thing wrong", + __func__); + break; + } + STREAM_GETL(read_s, curr_msg_type); + /* + * Check for Buffer Overflow, + * MLAG Can't process more than 'PIM_MLAG_BUF_LIMIT' bytes + */ + if (router->mlag_stream->endp + read_s->endp + ZEBRA_HEADER_SIZE + > MLAG_BUF_LIMIT) + pim_mlag_zebra_flush_buffer(); + + pim_mlag_zebra_check_for_buffer_flush(curr_msg_type, + prev_msg_type); + + /* + * First message to Buffer, fill the Header + */ + if (router->mlag_stream->endp == 0) + pim_mlag_zebra_fill_header(curr_msg_type); + + /* + * add the data now + */ + stream_put(router->mlag_stream, read_s->data + read_s->getp, + read_s->endp - read_s->getp); + + stream_free(read_s); + prev_msg_type = curr_msg_type; + } + +stream_failure: + /* + * we are here , because + * 1. Queue might be empty + * 2. we crossed the max Q Read limit + * In any acse flush the buffer towards zebra + */ + pim_mlag_zebra_flush_buffer(); + + if (wr_count >= PIM_MLAG_POST_LIMIT) + pim_mlag_signal_zpthread(); + + return 0; +} + + +int pim_mlag_signal_zpthread(void) +{ + if (router->master) { + if (PIM_DEBUG_MLAG) + zlog_debug(":%s: Scheduling PIM MLAG write Thread", + __func__); + thread_add_event(router->master, pim_mlag_zthread_handler, NULL, + 0, &router->zpthread_mlag_write); + } + return (0); +} diff --git a/pimd/pimd.c b/pimd/pimd.c index a2af66fdc7..889d63c518 100644 --- a/pimd/pimd.c +++ b/pimd/pimd.c @@ -102,6 +102,8 @@ void pim_router_init(void) router->packet_process = PIM_DEFAULT_PACKET_PROCESS; router->register_probe_time = PIM_REGISTER_PROBE_TIME_DEFAULT; router->vrf_id = VRF_DEFAULT; + router->pim_mlag_intf_cnt = 0; + router->connected_to_mlag = false; } void pim_router_terminate(void) diff --git a/pimd/subdir.am b/pimd/subdir.am index db43f0a7c6..0e30590079 100644 --- a/pimd/subdir.am +++ b/pimd/subdir.am @@ -63,6 +63,7 @@ pimd_libpim_a_SOURCES = \ pimd/pim_zebra.c \ pimd/pim_zlookup.c \ pimd/pim_vxlan.c \ + pimd/pim_zpthread.c \ pimd/pimd.c \ # end diff --git a/zebra/zebra_mlag.h b/zebra/zebra_mlag.h index c35fa15561..d44a400666 100644 --- a/zebra/zebra_mlag.h +++ b/zebra/zebra_mlag.h @@ -46,6 +46,7 @@ extern uint32_t mlag_rd_buf_offset; static inline void zebra_mlag_reset_read_buffer(void) { + memset(mlag_wr_buffer, 0, ZEBRA_MLAG_BUF_LIMIT); mlag_rd_buf_offset = 0; } diff --git a/zebra/zebra_mlag_private.c b/zebra/zebra_mlag_private.c index 3024407ada..83d0d44097 100644 --- a/zebra/zebra_mlag_private.c +++ b/zebra/zebra_mlag_private.c @@ -78,6 +78,8 @@ static int zebra_mlag_read(struct thread *thread) uint32_t h_msglen; uint32_t tot_len, curr_len = mlag_rd_buf_offset; + zrouter.mlag_info.t_read = NULL; + /* * Received message in sock_stream looks like below * | len-1 (4 Bytes) | payload-1 (len-1) | From aeb672466fb7e38a846db757556fbe64d28e58ff Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 22 Nov 2019 16:16:26 -0500 Subject: [PATCH 04/14] pimd: Skip nexthop lookup for register source in some cases There exists the possibility that a RP exists as a anycast pair for a lan segment. As such one side may receive the register and properly handle the registration mechanics. The one that does not receive the register packets will still get S,G state and WRVIFWHOLE upcalls across the lan. In this case notice that we have not received the Registration packets and prevent nexthop lookups. Ticket: CM-27466 Signed-off-by: Donald Sharp --- pimd/pim_mroute.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pimd/pim_mroute.c b/pimd/pim_mroute.c index d162f0f52e..f366fdbe79 100644 --- a/pimd/pim_mroute.c +++ b/pimd/pim_mroute.c @@ -505,8 +505,17 @@ static int pim_mroute_msg_wrvifwhole(int fd, struct interface *ifp, * the pimreg period, so I believe we can ignore this packet */ if (!PIM_UPSTREAM_FLAG_TEST_FHR(up->flags)) { - // No if channel, but upstream we are at the RP. - if (pim_nexthop_lookup(pim_ifp->pim, &source, + /* + * No if channel, but upstream we are at the RP. + * + * This could be a anycast RP too and we may + * not have received a register packet from + * the source here at all. So gracefully + * bow out of doing a nexthop lookup and + * setting the SPTBIT to true + */ + if (up->upstream_register.s_addr != INADDR_ANY && + pim_nexthop_lookup(pim_ifp->pim, &source, up->upstream_register, 0)) { pim_register_stop_send(source.interface, &sg, pim_ifp->primary_address, From ec85b101e6ba032c17981b9f354f358a0bc67c01 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Wed, 10 Jul 2019 08:00:04 -0700 Subject: [PATCH 05/14] pimd: run DF election only on (*, G) termination mroutes (S,G) entries that inherit ipmr-lo into the OIL also inherit the DF role from the parent (*, G) entry. This change is done primarily to simplify the sync process and to prevent the MLAG peers from having to track (S, G) activity etc. Signed-off-by: Anuradha Karuppiah --- pimd/pim_upstream.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index cf333ffccf..c905dd146a 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -853,9 +853,23 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, up->ifchannels = list_new(); up->ifchannels->cmp = (int (*)(void *, void *))pim_ifchannel_compare; - if (up->sg.src.s_addr != INADDR_ANY) + if (up->sg.src.s_addr != INADDR_ANY) { wheel_add_item(pim->upstream_sg_wheel, up); + /* Inherit the DF role from the parent (*, G) entry for + * VxLAN BUM groups + */ + if (up->parent + && PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) + && PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->parent->flags)) { + PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags); + if (PIM_DEBUG_VXLAN) + zlog_debug( + "upstream %s inherited mlag non-df flag from parent", + up->sg_str); + } + } + if (PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags) || PIM_UPSTREAM_FLAG_TEST_SRC_NOCACHE(up->flags)) { pim_upstream_fill_static_iif(up, incoming); @@ -885,19 +899,6 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, } } - /* If (S, G) inherit the MLAG_VXLAN from the parent - * (*, G) entry. - */ - if ((up->sg.src.s_addr != INADDR_ANY) && - up->parent && - PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) && - !PIM_UPSTREAM_FLAG_TEST_SRC_VXLAN_ORIG(up->flags)) { - PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(up->flags); - if (PIM_DEBUG_VXLAN) - zlog_debug("upstream %s inherited mlag vxlan flag from parent", - up->sg_str); - } - /* send the entry to the MLAG peer */ /* XXX - duplicate send is possible here if pim_rpf_update * successfully resolved the nexthop From 22c35834ea43ebec121c0c042bbb2ef3b6e25591 Mon Sep 17 00:00:00 2001 From: Satheesh Kumar K Date: Mon, 19 Aug 2019 02:06:00 -0700 Subject: [PATCH 06/14] pimd: Use PIM EVPN MLAG Infra for syncing PIM MLAG Entries Initially, MLAG Sync is happened at pim_ifchannel, this is mainly to support even config mismatches(missing configuration of dual active). But this causes more syncs for each entry. and also it is not In-line with PIM EVPN. to avoid that moving to pm_upstream based syncing. Signed-off-by: Satheesh Kumar K --- pimd/pim_iface.h | 1 + pimd/pim_ifchannel.c | 34 ++++++++++++++++ pimd/pim_mlag.c | 95 ++++++++++++++++++++++++++++++++++++++++++-- pimd/pim_mlag.h | 11 ++--- pimd/pim_upstream.c | 25 +++++++++++- pimd/pim_upstream.h | 2 + 6 files changed, 158 insertions(+), 10 deletions(-) diff --git a/pimd/pim_iface.h b/pimd/pim_iface.h index 1b76b52305..570bf5eac3 100644 --- a/pimd/pim_iface.h +++ b/pimd/pim_iface.h @@ -55,6 +55,7 @@ #define PIM_IF_DONT_PIM_CAN_DISABLE_JOIN_SUPRESSION(options) ((options) &= ~PIM_IF_MASK_PIM_CAN_DISABLE_JOIN_SUPRESSION) #define PIM_I_am_DR(pim_ifp) (pim_ifp)->pim_dr_addr.s_addr == (pim_ifp)->primary_address.s_addr +#define PIM_I_am_DualActive(pim_ifp) (pim_ifp)->activeactive == true struct pim_iface_upstream_switch { struct in_addr address; diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index 528c93ce16..e23d3dc3da 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -43,6 +43,7 @@ #include "pim_upstream.h" #include "pim_ssm.h" #include "pim_rp.h" +#include "pim_mlag.h" RB_GENERATE(pim_ifchannel_rb, pim_ifchannel, pim_ifp_rb, pim_ifchannel_compare); @@ -130,6 +131,21 @@ void pim_ifchannel_delete(struct pim_ifchannel *ch) pim_ifp = ch->interface->info; + if (PIM_I_am_DualActive(pim_ifp)) { + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: if-chnanel-%s is deleted from a Dual " + "active Interface", + __func__, ch->sg_str); + /* Post Delete only if it is the last Dual-active Interface */ + if (ch->upstream->dualactive_ifchannel_count == 1) { + pim_mlag_up_local_del(pim_ifp->pim, ch->upstream); + PIM_UPSTREAM_FLAG_UNSET_MLAG_INTERFACE( + ch->upstream->flags); + } + ch->upstream->dualactive_ifchannel_count--; + } + if (ch->upstream->channel_oil) { uint32_t mask = PIM_OIF_FLAG_PROTO_PIM; if (ch->upstream->flags & PIM_UPSTREAM_FLAG_MASK_SRC_IGMP) @@ -586,6 +602,24 @@ struct pim_ifchannel *pim_ifchannel_add(struct interface *ifp, else PIM_IF_FLAG_UNSET_ASSERT_TRACKING_DESIRED(ch->flags); + /* + * advertise MLAG Data to MLAG peer + */ + if (PIM_I_am_DualActive(pim_ifp)) { + up->dualactive_ifchannel_count++; + /* Sync once for upstream */ + if (up->dualactive_ifchannel_count == 1) { + PIM_UPSTREAM_FLAG_SET_MLAG_INTERFACE(up->flags); + pim_mlag_up_local_add(pim_ifp->pim, up); + } + if (PIM_DEBUG_MLAG) + zlog_debug( + "%s: New Dual active if-chnanel is added to upstream:%s " + "count:%d, flags:0x%x", + __func__, up->sg_str, + up->dualactive_ifchannel_count, up->flags); + } + if (PIM_DEBUG_PIM_TRACE) zlog_debug("%s: ifchannel %s is created ", __func__, ch->sg_str); diff --git a/pimd/pim_mlag.c b/pimd/pim_mlag.c index defe41674c..321745590d 100644 --- a/pimd/pim_mlag.c +++ b/pimd/pim_mlag.c @@ -32,6 +32,76 @@ extern struct zclient *zclient; #define PIM_MLAG_METADATA_LEN 4 +/*********************ACtual Data processing *****************************/ +/* TBD: There can be duplicate updates to FIB***/ +#define PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil) \ + do { \ + if (PIM_DEBUG_MLAG) \ + zlog_debug( \ + "%s: add Dual-active Interface to %s " \ + "to oil:%s", \ + __func__, ch->interface->name, ch->sg_str); \ + pim_channel_add_oif(ch_oil, ch->interface, \ + PIM_OIF_FLAG_PROTO_IGMP, __func__); \ + } while (0) + +#define PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil) \ + do { \ + if (PIM_DEBUG_MLAG) \ + zlog_debug( \ + "%s: del Dual-active Interface to %s " \ + "to oil:%s", \ + __func__, ch->interface->name, ch->sg_str); \ + pim_channel_del_oif(ch_oil, ch->interface, \ + PIM_OIF_FLAG_PROTO_IGMP, __func__); \ + } while (0) + + +static void pim_mlag_calculate_df_for_ifchannels(struct pim_upstream *up, + bool is_df) +{ + struct listnode *chnode; + struct listnode *chnextnode; + struct pim_ifchannel *ch; + struct pim_interface *pim_ifp = NULL; + struct channel_oil *ch_oil = NULL; + + ch_oil = (up) ? up->channel_oil : NULL; + + if (!ch_oil) + return; + + if (PIM_DEBUG_MLAG) + zlog_debug("%s: Calculating DF for Dual active if-channel%s", + __func__, up->sg_str); + + for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) { + pim_ifp = (ch->interface) ? ch->interface->info : NULL; + if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp)) + continue; + + if (is_df) + PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil); + else + PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil); + } +} + +static void pim_mlag_inherit_mlag_flags(struct pim_upstream *up, bool is_df) +{ + struct listnode *listnode; + struct pim_upstream *child; + + for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, child)) { + PIM_UPSTREAM_FLAG_SET_MLAG_PEER(child->flags); + if (is_df) + PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(child->flags); + else + PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(child->flags); + pim_mlag_calculate_df_for_ifchannels(child, is_df); + } +} + /******************************* pim upstream sync **************************/ /* Update DF role for the upstream entry and return true on role change */ bool pim_mlag_up_df_role_update(struct pim_instance *pim, @@ -59,6 +129,15 @@ bool pim_mlag_up_df_role_update(struct pim_instance *pim, PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags); + /* + * This Upstream entry synced to peer Because of Dual-active + * Interface configuration + */ + if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) { + pim_mlag_calculate_df_for_ifchannels(up, is_df); + pim_mlag_inherit_mlag_flags(up, is_df); + } + /* If the DF role has changed check if ipmr-lo needs to be * muted/un-muted. Active-Active devices and vxlan termination * devices (ipmr-lo) are suppressed on the non-DF. @@ -91,7 +170,8 @@ static bool pim_mlag_up_df_role_elect(struct pim_instance *pim, uint32_t local_cost; bool rv; - if (!pim_up_mlag_is_local(up)) + if (!pim_up_mlag_is_local(up) + && !PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) return false; /* We are yet to rx a status update from the local MLAG daemon so @@ -417,7 +497,8 @@ static void pim_mlag_up_local_replay(void) RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { pim = vrf->info; frr_each (rb_pim_upstream, &pim->upstream_head, up) { - if (pim_up_mlag_is_local(up)) + if (pim_up_mlag_is_local(up) + || PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) pim_mlag_up_local_add_send(pim, up); } } @@ -438,7 +519,9 @@ static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code) RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) { pim = vrf->info; frr_each (rb_pim_upstream, &pim->upstream_head, up) { - if (!pim_up_mlag_is_local(up)) + if (!pim_up_mlag_is_local(up) + && !PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE( + up->flags)) continue; /* if role changes re-send to peer */ if (pim_mlag_up_df_role_elect(pim, up) && @@ -772,6 +855,12 @@ int pim_zebra_mlag_process_up(void) if (PIM_DEBUG_MLAG) zlog_debug("%s: Received Process-Up from Mlag", __func__); + /* + * Incase of local MLAG restart, PIM needs to replay all the data + * since MLAG is empty. + */ + router->connected_to_mlag = true; + router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP; return 0; } diff --git a/pimd/pim_mlag.h b/pimd/pim_mlag.h index 4639f56826..eb316695f7 100644 --- a/pimd/pim_mlag.h +++ b/pimd/pim_mlag.h @@ -32,8 +32,6 @@ extern void pim_instance_mlag_init(struct pim_instance *pim); extern void pim_instance_mlag_terminate(struct pim_instance *pim); extern void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp); extern void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp); -extern void pim_mlag_register(void); -extern void pim_mlag_deregister(void); extern int pim_zebra_mlag_process_up(void); extern int pim_zebra_mlag_process_down(void); extern int pim_zebra_mlag_handle_msg(struct stream *msg, int len); @@ -43,10 +41,13 @@ extern int pim_mlag_signal_zpthread(void); extern void pim_zpthread_init(void); extern void pim_zpthread_terminate(void); +extern void pim_mlag_register(void); +extern void pim_mlag_deregister(void); extern void pim_mlag_up_local_add(struct pim_instance *pim, - struct pim_upstream *upstream); + struct pim_upstream *upstream); extern void pim_mlag_up_local_del(struct pim_instance *pim, - struct pim_upstream *upstream); + struct pim_upstream *upstream); extern bool pim_mlag_up_df_role_update(struct pim_instance *pim, - struct pim_upstream *up, bool is_df, const char *reason); + struct pim_upstream *up, bool is_df, + const char *reason); #endif diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index c905dd146a..998720e8f6 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -141,6 +141,18 @@ static struct pim_upstream *pim_upstream_find_parent(struct pim_instance *pim, if (up) listnode_add(up->sources, child); + /* + * In case parent is MLAG entry copy the data to child + */ + if (up && PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) { + PIM_UPSTREAM_FLAG_SET_MLAG_INTERFACE(child->flags); + if (PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)) + PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(child->flags); + else + PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF( + child->flags); + } + return up; } @@ -903,7 +915,8 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, /* XXX - duplicate send is possible here if pim_rpf_update * successfully resolved the nexthop */ - if (pim_up_mlag_is_local(up)) + if (pim_up_mlag_is_local(up) + || PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) pim_mlag_up_local_add(pim, up); if (PIM_DEBUG_PIM_TRACE) { @@ -918,7 +931,8 @@ static struct pim_upstream *pim_upstream_new(struct pim_instance *pim, uint32_t pim_up_mlag_local_cost(struct pim_upstream *up) { - if (!(pim_up_mlag_is_local(up))) + if (!(pim_up_mlag_is_local(up)) + && !(up->flags & PIM_UPSTREAM_FLAG_MASK_MLAG_INTERFACE)) return router->infinite_assert_metric.route_metric; if ((up->rpf.source_nexthop.interface == @@ -1752,6 +1766,7 @@ int pim_upstream_inherited_olist_decide(struct pim_instance *pim, up->sg_str); FOR_ALL_INTERFACES (pim->vrf, ifp) { + struct pim_interface *pim_ifp; if (!ifp->info) continue; @@ -1765,6 +1780,12 @@ int pim_upstream_inherited_olist_decide(struct pim_instance *pim, if (!ch && !starch) continue; + pim_ifp = ifp->info; + if (PIM_I_am_DualActive(pim_ifp) + && PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags) + && (PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags) + || !PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))) + continue; if (pim_upstream_evaluate_join_desired_interface(up, ch, starch)) { int flag = PIM_OIF_FLAG_PROTO_PIM; diff --git a/pimd/pim_upstream.h b/pimd/pim_upstream.h index 4d693b8b64..ca693ee73f 100644 --- a/pimd/pim_upstream.h +++ b/pimd/pim_upstream.h @@ -237,6 +237,8 @@ struct pim_upstream { struct channel_oil *channel_oil; struct list *sources; struct list *ifchannels; + /* Counter for Dual active ifchannels*/ + uint32_t dualactive_ifchannel_count; enum pim_upstream_state join_state; enum pim_reg_state reg_state; From fa696b37276b7c396e55aba1a55b0b7441af14e9 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 9 Oct 2019 13:50:42 -0400 Subject: [PATCH 07/14] zebra: SO_PEERCRED is a getsockopt call This code is effectively dead code. SO_PEERCRED is a getsockopt call not *setsockopt* call. Additionally we are not doing anything with the failed setsockopt call at all. Signed-off-by: Donald Sharp --- zebra/zebra_mlag_private.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/zebra/zebra_mlag_private.c b/zebra/zebra_mlag_private.c index 83d0d44097..0f0285ed31 100644 --- a/zebra/zebra_mlag_private.c +++ b/zebra/zebra_mlag_private.c @@ -159,8 +159,6 @@ static int zebra_mlag_read(struct thread *thread) static int zebra_mlag_connect(struct thread *thread) { struct sockaddr_un svr = {0}; - struct ucred ucred; - socklen_t len = 0; /* Reset the Timer-running flag */ zrouter.mlag_info.timer_running = false; @@ -184,11 +182,8 @@ static int zebra_mlag_connect(struct thread *thread) &zrouter.mlag_info.t_read); return 0; } - len = sizeof(struct ucred); - ucred.pid = getpid(); set_nonblocking(mlag_socket); - setsockopt(mlag_socket, SOL_SOCKET, SO_PEERCRED, &ucred, len); if (IS_ZEBRA_DEBUG_MLAG) zlog_debug("%s: Connection with MLAG is established ", From 83f8a12b8ecbc3ffb285a59b6ce0a86e3a0cfb8f Mon Sep 17 00:00:00 2001 From: Satheesh Kumar K Date: Thu, 10 Oct 2019 21:33:19 -0700 Subject: [PATCH 08/14] lib, pimd, zebra: Provide some insurance against reading bad stream data This patch does two things: 1) Ensure the decoding of stream data between pim <-> zebra is properly decoded and we don't read beyond the end of the stream. 2) In zebra when we are freeing memory alloced ensure that we actually have memory to delete before we do so. Ticket: CM-27055 Signed-off-by: Satheesh Kumar K Signed-off-by: Donald Sharp --- lib/mlag.c | 28 +++++++++++---- lib/mlag.h | 9 +++-- pimd/pim_mlag.c | 13 ++++--- zebra/zebra_mlag.c | 87 +++++++++++++++++++++++++++++++++------------- 4 files changed, 96 insertions(+), 41 deletions(-) diff --git a/lib/mlag.c b/lib/mlag.c index 1daf290725..733dd41ea8 100644 --- a/lib/mlag.c +++ b/lib/mlag.c @@ -81,22 +81,33 @@ char *mlag_lib_msgid_to_str(enum mlag_msg_type msg_type, char *buf, size_t size) } -int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg) +int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg, + size_t *length) { - if (s == NULL || msg == NULL) +#define LIB_MLAG_HDR_LENGTH 8 + *length = stream_get_endp(s); + + if (s == NULL || msg == NULL || *length < LIB_MLAG_HDR_LENGTH) return -1; + *length -= LIB_MLAG_HDR_LENGTH; + STREAM_GETL(s, msg->msg_type); STREAM_GETW(s, msg->data_len); STREAM_GETW(s, msg->msg_cnt); + return 0; stream_failure: return -1; } -int mlag_lib_decode_mroute_add(struct stream *s, struct mlag_mroute_add *msg) +#define MLAG_MROUTE_ADD_LENGTH \ + (VRF_NAMSIZ + INTERFACE_NAMSIZ + 4 + 4 + 4 + 4 + 1 + 1 + 4) + +int mlag_lib_decode_mroute_add(struct stream *s, struct mlag_mroute_add *msg, + size_t *length) { - if (s == NULL || msg == NULL) + if (s == NULL || msg == NULL || *length < MLAG_MROUTE_ADD_LENGTH) return -1; STREAM_GET(msg->vrf_name, s, VRF_NAMSIZ); @@ -108,14 +119,18 @@ int mlag_lib_decode_mroute_add(struct stream *s, struct mlag_mroute_add *msg) STREAM_GETC(s, msg->am_i_dual_active); STREAM_GETL(s, msg->vrf_id); STREAM_GET(msg->intf_name, s, INTERFACE_NAMSIZ); + return 0; stream_failure: return -1; } -int mlag_lib_decode_mroute_del(struct stream *s, struct mlag_mroute_del *msg) +#define MLAG_MROUTE_DEL_LENGTH (VRF_NAMSIZ + INTERFACE_NAMSIZ + 4 + 4 + 4 + 4) + +int mlag_lib_decode_mroute_del(struct stream *s, struct mlag_mroute_del *msg, + size_t *length) { - if (s == NULL || msg == NULL) + if (s == NULL || msg == NULL || *length < MLAG_MROUTE_DEL_LENGTH) return -1; STREAM_GET(msg->vrf_name, s, VRF_NAMSIZ); @@ -124,6 +139,7 @@ int mlag_lib_decode_mroute_del(struct stream *s, struct mlag_mroute_del *msg) STREAM_GETL(s, msg->owner_id); STREAM_GETL(s, msg->vrf_id); STREAM_GET(msg->intf_name, s, INTERFACE_NAMSIZ); + return 0; stream_failure: return -1; diff --git a/lib/mlag.h b/lib/mlag.h index c531fb5b68..37bb3aa6db 100644 --- a/lib/mlag.h +++ b/lib/mlag.h @@ -125,11 +125,14 @@ struct mlag_msg { extern char *mlag_role2str(enum mlag_role role, char *buf, size_t size); extern char *mlag_lib_msgid_to_str(enum mlag_msg_type msg_type, char *buf, size_t size); -extern int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg); +extern int mlag_lib_decode_mlag_hdr(struct stream *s, struct mlag_msg *msg, + size_t *length); extern int mlag_lib_decode_mroute_add(struct stream *s, - struct mlag_mroute_add *msg); + struct mlag_mroute_add *msg, + size_t *length); extern int mlag_lib_decode_mroute_del(struct stream *s, - struct mlag_mroute_del *msg); + struct mlag_mroute_del *msg, + size_t *length); extern int mlag_lib_decode_mlag_status(struct stream *s, struct mlag_status *msg); extern int mlag_lib_decode_vxlan_update(struct stream *s, diff --git a/pimd/pim_mlag.c b/pimd/pim_mlag.c index 321745590d..0a6ff41114 100644 --- a/pimd/pim_mlag.c +++ b/pimd/pim_mlag.c @@ -765,8 +765,9 @@ int pim_zebra_mlag_handle_msg(struct stream *s, int len) struct mlag_msg mlag_msg; char buf[ZLOG_FILTER_LENGTH_MAX]; int rc = 0; + size_t length; - rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg); + rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length); if (rc) return (rc); @@ -805,7 +806,7 @@ int pim_zebra_mlag_handle_msg(struct stream *s, int len) case MLAG_MROUTE_ADD: { struct mlag_mroute_add msg; - rc = mlag_lib_decode_mroute_add(s, &msg); + rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_add(msg); @@ -813,7 +814,7 @@ int pim_zebra_mlag_handle_msg(struct stream *s, int len) case MLAG_MROUTE_DEL: { struct mlag_mroute_del msg; - rc = mlag_lib_decode_mroute_del(s, &msg); + rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_del(msg); @@ -823,8 +824,7 @@ int pim_zebra_mlag_handle_msg(struct stream *s, int len) int i; for (i = 0; i < mlag_msg.msg_cnt; i++) { - - rc = mlag_lib_decode_mroute_add(s, &msg); + rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_add(msg); @@ -835,8 +835,7 @@ int pim_zebra_mlag_handle_msg(struct stream *s, int len) int i; for (i = 0; i < mlag_msg.msg_cnt; i++) { - - rc = mlag_lib_decode_mroute_del(s, &msg); + rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) return (rc); pim_mlag_process_mroute_del(msg); diff --git a/zebra/zebra_mlag.c b/zebra/zebra_mlag.c index 5b721a8eac..edd71b9f77 100644 --- a/zebra/zebra_mlag.c +++ b/zebra/zebra_mlag.c @@ -667,14 +667,17 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) int n_len = 0; int rc = 0; char buf[ZLOG_FILTER_LENGTH_MAX]; + size_t length; if (IS_ZEBRA_DEBUG_MLAG) zlog_debug("%s: Entering..", __func__); - rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg); + rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length); if (rc) return rc; + memset(tmp_buf, 0, ZEBRA_MLAG_BUF_LIMIT); + if (IS_ZEBRA_DEBUG_MLAG) zlog_debug("%s: Mlag ProtoBuf encoding of message:%s, len:%d", __func__, @@ -688,9 +691,10 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) ZebraMlagMrouteAdd pay_load = ZEBRA_MLAG_MROUTE_ADD__INIT; uint32_t vrf_name_len = 0; - rc = mlag_lib_decode_mroute_add(s, &msg); + rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) return rc; + vrf_name_len = strlen(msg.vrf_name) + 1; pay_load.vrf_name = XMALLOC(MTYPE_MLAG_PBUF, vrf_name_len); strlcpy(pay_load.vrf_name, msg.vrf_name, vrf_name_len); @@ -720,7 +724,7 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) ZebraMlagMrouteDel pay_load = ZEBRA_MLAG_MROUTE_DEL__INIT; uint32_t vrf_name_len = 0; - rc = mlag_lib_decode_mroute_del(s, &msg); + rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) return rc; vrf_name_len = strlen(msg.vrf_name) + 1; @@ -749,18 +753,18 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) ZebraMlagMrouteAddBulk Bulk_msg = ZEBRA_MLAG_MROUTE_ADD_BULK__INIT; ZebraMlagMrouteAdd **pay_load = NULL; - int i; bool cleanup = false; + uint32_t i, actual; Bulk_msg.n_mroute_add = mlag_msg.msg_cnt; pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteAdd *) * mlag_msg.msg_cnt); - for (i = 0; i < mlag_msg.msg_cnt; i++) { + for (i = 0, actual = 0; i < mlag_msg.msg_cnt; i++, actual++) { uint32_t vrf_name_len = 0; - rc = mlag_lib_decode_mroute_add(s, &msg); + rc = mlag_lib_decode_mroute_add(s, &msg, &length); if (rc) { cleanup = true; break; @@ -796,8 +800,17 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) tmp_buf); } - for (i = 0; i < mlag_msg.msg_cnt; i++) { - XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + for (i = 0; i < actual; i++) { + /* + * The mlag_lib_decode_mroute_add can + * fail to properly decode and cause nothing + * to be allocated. Prevent a crash + */ + if (!pay_load[i]) + continue; + + if (pay_load[i]->vrf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE && pay_load[i]->intf_name) XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); @@ -812,18 +825,18 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) ZebraMlagMrouteDelBulk Bulk_msg = ZEBRA_MLAG_MROUTE_DEL_BULK__INIT; ZebraMlagMrouteDel **pay_load = NULL; - int i; bool cleanup = false; + uint32_t i, actual; Bulk_msg.n_mroute_del = mlag_msg.msg_cnt; pay_load = XMALLOC(MTYPE_MLAG_PBUF, sizeof(ZebraMlagMrouteDel *) * mlag_msg.msg_cnt); - for (i = 0; i < mlag_msg.msg_cnt; i++) { + for (i = 0, actual = 0; i < mlag_msg.msg_cnt; i++, actual++) { uint32_t vrf_name_len = 0; - rc = mlag_lib_decode_mroute_del(s, &msg); + rc = mlag_lib_decode_mroute_del(s, &msg, &length); if (rc) { cleanup = true; break; @@ -858,8 +871,17 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) tmp_buf); } - for (i = 0; i < mlag_msg.msg_cnt; i++) { - XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); + for (i = 0; i < actual; i++) { + /* + * The mlag_lib_decode_mroute_add can + * fail to properly decode and cause nothing + * to be allocated. Prevent a crash + */ + if (!pay_load[i]) + continue; + + if (pay_load[i]->vrf_name) + XFREE(MTYPE_MLAG_PBUF, pay_load[i]->vrf_name); if (pay_load[i]->owner_id == MLAG_OWNER_INTERFACE && pay_load[i]->intf_name) XFREE(MTYPE_MLAG_PBUF, pay_load[i]->intf_name); @@ -915,6 +937,15 @@ int zebra_mlag_protobuf_encode_client_data(struct stream *s, uint32_t *msg_type) return len; } +static void zebra_fill_protobuf_msg(struct stream *s, char *name, int len) +{ + int str_len = strlen(name) + 1; + + stream_put(s, name, str_len); + /* Fill the rest with Null Character for aligning */ + stream_put(s, NULL, len - str_len); +} + int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, uint32_t len) { @@ -966,7 +997,8 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, /* No Batching */ stream_putw(s, MLAG_MSG_NO_BATCH); /* Actual Data */ - stream_put(s, msg->peerlink, INTERFACE_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->peerlink, + INTERFACE_NAMSIZ); stream_putl(s, msg->my_role); stream_putl(s, msg->peer_state); zebra_mlag_status_update__free_unpacked(msg, NULL); @@ -1003,7 +1035,7 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, /* No Batching */ stream_putw(s, MLAG_MSG_NO_BATCH); /* Actual Data */ - stream_put(s, msg->vrf_name, VRF_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->vrf_name, VRF_NAMSIZ); stream_putl(s, msg->source_ip); stream_putl(s, msg->group_ip); @@ -1013,7 +1045,8 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, stream_putc(s, msg->am_i_dual_active); stream_putl(s, msg->vrf_id); if (msg->owner_id == MLAG_OWNER_INTERFACE) - stream_put(s, msg->intf_name, INTERFACE_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->intf_name, + INTERFACE_NAMSIZ); else stream_put(s, NULL, INTERFACE_NAMSIZ); zebra_mlag_mroute_add__free_unpacked(msg, NULL); @@ -1032,15 +1065,15 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, /* No Batching */ stream_putw(s, MLAG_MSG_NO_BATCH); /* Actual Data */ - stream_put(s, msg->vrf_name, VRF_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->vrf_name, VRF_NAMSIZ); stream_putl(s, msg->source_ip); stream_putl(s, msg->group_ip); - stream_putl(s, msg->group_ip); stream_putl(s, msg->owner_id); stream_putl(s, msg->vrf_id); if (msg->owner_id == MLAG_OWNER_INTERFACE) - stream_put(s, msg->intf_name, INTERFACE_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->intf_name, + INTERFACE_NAMSIZ); else stream_put(s, NULL, INTERFACE_NAMSIZ); zebra_mlag_mroute_del__free_unpacked(msg, NULL); @@ -1067,7 +1100,8 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, msg = Bulk_msg->mroute_add[i]; - stream_put(s, msg->vrf_name, VRF_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->vrf_name, + VRF_NAMSIZ); stream_putl(s, msg->source_ip); stream_putl(s, msg->group_ip); stream_putl(s, msg->cost_to_rp); @@ -1076,8 +1110,9 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, stream_putc(s, msg->am_i_dual_active); stream_putl(s, msg->vrf_id); if (msg->owner_id == MLAG_OWNER_INTERFACE) - stream_put(s, msg->intf_name, - INTERFACE_NAMSIZ); + zebra_fill_protobuf_msg( + s, msg->intf_name, + INTERFACE_NAMSIZ); else stream_put(s, NULL, INTERFACE_NAMSIZ); } @@ -1106,14 +1141,16 @@ int zebra_mlag_protobuf_decode_message(struct stream *s, uint8_t *data, msg = Bulk_msg->mroute_del[i]; - stream_put(s, msg->vrf_name, VRF_NAMSIZ); + zebra_fill_protobuf_msg(s, msg->vrf_name, + VRF_NAMSIZ); stream_putl(s, msg->source_ip); stream_putl(s, msg->group_ip); stream_putl(s, msg->owner_id); stream_putl(s, msg->vrf_id); if (msg->owner_id == MLAG_OWNER_INTERFACE) - stream_put(s, msg->intf_name, - INTERFACE_NAMSIZ); + zebra_fill_protobuf_msg( + s, msg->intf_name, + INTERFACE_NAMSIZ); else stream_put(s, NULL, INTERFACE_NAMSIZ); } From b900ad16ee2571d33c29a9f8541a78ce1681cb5a Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 14 Oct 2019 12:41:33 -0700 Subject: [PATCH 09/14] pimd: logs to help debug leaked channel OIF problems Additional protocols were being set on the OIF proto-mask without logs. Added logs in that area. Also added start and end logs to ifchannel_delete to help identify state machine changes that play out as a part of this event handling. Ticket: CM-26732 Signed-off-by: Anuradha Karuppiah --- pimd/pim_ifchannel.c | 8 ++++++-- pimd/pim_oil.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index e23d3dc3da..c402ea8e7e 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -131,6 +131,10 @@ void pim_ifchannel_delete(struct pim_ifchannel *ch) pim_ifp = ch->interface->info; + if (PIM_DEBUG_PIM_TRACE) + zlog_debug("%s: ifchannel entry %s(%s) del start", __func__, + ch->sg_str, ch->interface->name); + if (PIM_I_am_DualActive(pim_ifp)) { if (PIM_DEBUG_MLAG) zlog_debug( @@ -621,8 +625,8 @@ struct pim_ifchannel *pim_ifchannel_add(struct interface *ifp, } if (PIM_DEBUG_PIM_TRACE) - zlog_debug("%s: ifchannel %s is created ", __func__, - ch->sg_str); + zlog_debug("%s: ifchannel %s(%s) is created ", __func__, + ch->sg_str, ch->interface->name); return ch; } diff --git a/pimd/pim_oil.c b/pimd/pim_oil.c index b165bcbee7..21febcc969 100644 --- a/pimd/pim_oil.c +++ b/pimd/pim_oil.c @@ -493,6 +493,23 @@ int pim_channel_add_oif(struct channel_oil *channel_oil, struct interface *oif, } } + if (PIM_DEBUG_MROUTE) { + char group_str[INET_ADDRSTRLEN]; + char source_str[INET_ADDRSTRLEN]; + pim_inet4_dump("", + channel_oil->oil.mfcc_mcastgrp, + group_str, sizeof(group_str)); + pim_inet4_dump("", + channel_oil->oil.mfcc_origin, source_str, + sizeof(source_str)); + zlog_debug( + "%s(%s): (S,G)=(%s,%s): proto_mask=%u OIF=%s vif_index=%d added to 0x%x", + __func__, caller, source_str, group_str, + proto_mask, oif->name, + pim_ifp->mroute_vif_index, + channel_oil + ->oif_flags[pim_ifp->mroute_vif_index]); + } return 0; } From 0f31a82a11360bcbe44e667f58b3d2813aa526da Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Mon, 14 Oct 2019 16:02:36 -0700 Subject: [PATCH 10/14] pimd: fix problem with oif being re-added during ifchannel del Series of events leading to the problem - 1. (S,G) has been pruned on the rp on downlink-1 2. a (*,G) join is rxed on downlink-1 without the source S. This results in the (S,G,rpt) prune state being cleared on downlink-1. As a part of the clear the ifchannel associated with downlink-1 is deleted. 3. The ifchannel_delete handling is expected to add downlink-1 as an inherited OIF to the channel OIL (which it does). However it is also added in as an immediate OIF (accidentally) as the ifchannel is still present (in the process of being deleted). To avoid the problem defer pim_upstream_update_join_desired evaluation until after the channel is deleted. Relevant debug logs - PIM: pim_ifchannel_delete: ifchannel entry (27.0.0.15,239.1.1.106)(downlink-1) del start PIM: pim_channel_add_oif(pim_ifchannel_delete): (S,G)=(27.0.0.15,239.1.1.106): proto_mask=4 OIF=downlink-1 vif_index=7: DONE PIM: pimd/pim_oil.c pim_channel_del_oif: no existing protocol mask 2(4) for requested OIF downlink-1 (vif_index=7, min_ttl=1) for channel (S,G)=(27.0.0.15,239.1.1.106) PIM: pim_upstream_switch: PIM_UPSTREAM_(27.0.0.15,239.1.1.106): (S,G) old: NotJoined new: Joined PIM: pim_channel_add_oif(pim_upstream_inherited_olist_decide): (S,G)=(27.0.0.15,239.1.1.106): proto_mask=2 OIF=downlink-1 vif_index=7 added to 0x6 >>>>>>>>>>>>>>>>>> PIM: pim_upstream_del(pim_ifchannel_delete): Delete (27.0.0.15,239.1.1.106)[default] ref count: 2 , flags: 81 c_oil ref count 1 (Pre decrement) PIM: pim_ifchannel_delete: ifchannel entry (27.0.0.15,239.1.1.106)(downlink-1) del end Ticket: CM-26732 Signed-off-by: Anuradha Karuppiah --- pimd/pim_ifchannel.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pimd/pim_ifchannel.c b/pimd/pim_ifchannel.c index c402ea8e7e..8d8534a794 100644 --- a/pimd/pim_ifchannel.c +++ b/pimd/pim_ifchannel.c @@ -128,6 +128,7 @@ static void pim_ifchannel_find_new_children(struct pim_ifchannel *ch) void pim_ifchannel_delete(struct pim_ifchannel *ch) { struct pim_interface *pim_ifp; + struct pim_upstream *up; pim_ifp = ch->interface->info; @@ -201,14 +202,14 @@ void pim_ifchannel_delete(struct pim_ifchannel *ch) listnode_delete(ch->upstream->ifchannels, ch); - pim_upstream_update_join_desired(pim_ifp->pim, ch->upstream); + up = ch->upstream; /* upstream is common across ifchannels, check if upstream's ifchannel list is empty before deleting upstream_del ref count will take care of it. */ if (ch->upstream->ref_count > 0) - pim_upstream_del(pim_ifp->pim, ch->upstream, __func__); + up = pim_upstream_del(pim_ifp->pim, ch->upstream, __func__); else { if (PIM_DEBUG_PIM_TRACE) @@ -237,6 +238,9 @@ void pim_ifchannel_delete(struct pim_ifchannel *ch) ch->sg_str); XFREE(MTYPE_PIM_IFCHANNEL, ch); + + if (up) + pim_upstream_update_join_desired(pim_ifp->pim, up); } void pim_ifchannel_delete_all(struct interface *ifp) From 4d114ab9f91fcac953e92e25b29192909b1a81bf Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 9 Oct 2019 15:12:23 -0400 Subject: [PATCH 11/14] pimd: Allow LHR in a AA situation to join upstream Add a special catch to the test for pim_macro_chisin_pim_include to allow the LHR to signal interest in joining upstream. This will allow both the DR and non DR of the ActiveActive situation to draw traffic to itself. The non-DR will continue to not forward traffic. Ticket: CM-26610 Signed-off-by: Donald Sharp --- pimd/pim_macro.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pimd/pim_macro.c b/pimd/pim_macro.c index ea3e1a244f..c6961d30c2 100644 --- a/pimd/pim_macro.c +++ b/pimd/pim_macro.c @@ -157,6 +157,7 @@ int pim_macro_ch_lost_assert(const struct pim_ifchannel *ch) int pim_macro_chisin_pim_include(const struct pim_ifchannel *ch) { struct pim_interface *pim_ifp = ch->interface->info; + bool mlag_active = false; if (!pim_ifp) { zlog_warn("%s: (S,G)=%s: multicast not enabled on interface %s", @@ -172,9 +173,21 @@ int pim_macro_chisin_pim_include(const struct pim_ifchannel *ch) if (ch->ifassert_winner.s_addr == pim_ifp->primary_address.s_addr) return 1; /* true */ + /* + * When we have a activeactive interface we need to signal + * that this interface is interesting to the upstream + * decision to JOIN *if* we are syncing over the interface + */ + if (pim_ifp->activeactive) { + struct pim_upstream *up = ch->upstream; + + if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) + mlag_active = true; + } + return ( /* I_am_DR( I ) ? */ - PIM_I_am_DR(pim_ifp) && + (PIM_I_am_DR(pim_ifp) || mlag_active) && /* lost_assert(S,G,I) == false ? */ (!pim_macro_ch_lost_assert(ch))); } From 07f4bac3acd0caca84cd6b405a0afad21bdce7c3 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 15 Oct 2019 11:10:47 -0700 Subject: [PATCH 12/14] pimd: mute termination device on the origination mroute on type mods An mroute can transition from non-origination to a vxlan origination mroute. In that case we need to re-evaluate if the interfaces in the OIL need to be muted; pimreg and termination device need to be muted (if they were previously un-muted). Dump in a problem state: ======================= root@TORC11:~# net show pim state Codes: J -> Pim Join, I -> IGMP Report, S -> Source, * -> Inherited from (*,G), V -> VxLAN, M -> Muted Active Source Group RPT IIF OIL 1 * 239.1.1.100 y uplink-1 pimreg(I ), ipmr-lo( J ) 1 36.0.0.11 239.1.1.100 n peerlink-3.4094 ipmr-lo( * ), uplink-1( J ), uplink-2( J ), peerlink-3.4094( V ) PS: ipmr-lo should have M set in (36.0.0.11,239.1.1.100) Ticket: CM-26747 Signed-off-by: Anuradha Karuppiah --- pimd/pim_vxlan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pimd/pim_vxlan.c b/pimd/pim_vxlan.c index 93e2f00f90..bff8017b43 100644 --- a/pimd/pim_vxlan.c +++ b/pimd/pim_vxlan.c @@ -284,6 +284,7 @@ static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg) static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg) { struct pim_upstream *up; + struct pim_interface *term_ifp; int flags = 0; struct prefix nht_p; struct pim_instance *pim = vxlan_sg->pim; @@ -345,6 +346,11 @@ static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg) pim_upstream_update_use_rpt(up, false /*update_mroute*/); pim_upstream_ref(up, flags, __func__); vxlan_sg->up = up; + term_ifp = pim_vxlan_get_term_ifp(pim); + /* mute termination device on origination mroutes */ + if (term_ifp) + pim_channel_update_oif_mute(up->channel_oil, + term_ifp); pim_vxlan_orig_mr_up_iif_update(vxlan_sg); /* mute pimreg on origination mroutes */ if (pim->regiface) From ea6d91c86bd8d5f7ad03b5e7e9527c4fcdfa6887 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Thu, 15 Aug 2019 14:00:35 -0700 Subject: [PATCH 13/14] pimd: re-eval flow activity on kat expiry When the (S,G) KAT expires we need to poll for activity before dropping the entry as traffic may have been forwarded by the dataplane since the last periodic poll cycle. This only works if traffic is being forwarded by the kernel i.e. if the entries were HW accelerated via an ASIC we may still miss out on last minute activity on the mroute in the HW. Ticket: CM-26871 Signed-off-by: Anuradha Karuppiah --- pimd/pim_upstream.c | 88 ++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/pimd/pim_upstream.c b/pimd/pim_upstream.c index 998720e8f6..efa58c1b1f 100644 --- a/pimd/pim_upstream.c +++ b/pimd/pim_upstream.c @@ -57,6 +57,7 @@ static void join_timer_stop(struct pim_upstream *up); static void pim_upstream_update_assert_tracking_desired(struct pim_upstream *up); +static bool pim_upstream_sg_running_proc(struct pim_upstream *up); /* * A (*,G) or a (*,*) is going away @@ -1453,6 +1454,11 @@ static int pim_upstream_keep_alive_timer(struct thread *t) up = THREAD_ARG(t); + /* pull the stats and re-check */ + if (pim_upstream_sg_running_proc(up)) + /* kat was restarted because of new activity */ + return 0; + pim_upstream_keep_alive_timer_proc(up); return 0; } @@ -1965,39 +1971,14 @@ static bool pim_upstream_kat_start_ok(struct pim_upstream *up) return false; } -/* - * Code to check and see if we've received packets on a S,G mroute - * and if so to set the SPT bit appropriately - */ -static void pim_upstream_sg_running(void *arg) +static bool pim_upstream_sg_running_proc(struct pim_upstream *up) { - struct pim_upstream *up = (struct pim_upstream *)arg; - struct pim_instance *pim = up->channel_oil->pim; + bool rv = false; + struct pim_instance *pim = up->pim; - // No packet can have arrived here if this is the case - if (!up->channel_oil->installed) { - if (PIM_DEBUG_PIM_TRACE) - zlog_debug("%s: %s%s is not installed in mroute", - __func__, up->sg_str, pim->vrf->name); - return; - } + if (!up->channel_oil->installed) + return rv; - /* - * This is a bit of a hack - * We've noted that we should rescan but - * we've missed the window for doing so in - * pim_zebra.c for some reason. I am - * only doing this at this point in time - * to get us up and working for the moment - */ - if (up->channel_oil->oil_inherited_rescan) { - if (PIM_DEBUG_PIM_TRACE) - zlog_debug( - "%s: Handling unscanned inherited_olist for %s[%s]", - __func__, up->sg_str, pim->vrf->name); - pim_upstream_inherited_olist_decide(pim, up); - up->channel_oil->oil_inherited_rescan = 0; - } pim_mroute_update_counters(up->channel_oil); // Have we seen packets? @@ -2011,7 +1992,7 @@ static void pim_upstream_sg_running(void *arg) up->channel_oil->cc.pktcnt, up->channel_oil->cc.lastused / 100); } - return; + return rv; } if (pim_upstream_kat_start_ok(up)) { @@ -2029,14 +2010,55 @@ static void pim_upstream_sg_running(void *arg) pim_upstream_fhr_kat_start(up); } pim_upstream_keep_alive_timer_start(up, pim->keep_alive_time); - } else if (PIM_UPSTREAM_FLAG_TEST_SRC_LHR(up->flags)) + rv = true; + } else if (PIM_UPSTREAM_FLAG_TEST_SRC_LHR(up->flags)) { pim_upstream_keep_alive_timer_start(up, pim->keep_alive_time); + rv = true; + } if ((up->sptbit != PIM_UPSTREAM_SPTBIT_TRUE) && (up->rpf.source_nexthop.interface)) { pim_upstream_set_sptbit(up, up->rpf.source_nexthop.interface); } - return; + + return rv; +} + +/* + * Code to check and see if we've received packets on a S,G mroute + * and if so to set the SPT bit appropriately + */ +static void pim_upstream_sg_running(void *arg) +{ + struct pim_upstream *up = (struct pim_upstream *)arg; + struct pim_instance *pim = up->channel_oil->pim; + + // No packet can have arrived here if this is the case + if (!up->channel_oil->installed) { + if (PIM_DEBUG_TRACE) + zlog_debug("%s: %s%s is not installed in mroute", + __func__, up->sg_str, pim->vrf->name); + return; + } + + /* + * This is a bit of a hack + * We've noted that we should rescan but + * we've missed the window for doing so in + * pim_zebra.c for some reason. I am + * only doing this at this point in time + * to get us up and working for the moment + */ + if (up->channel_oil->oil_inherited_rescan) { + if (PIM_DEBUG_TRACE) + zlog_debug( + "%s: Handling unscanned inherited_olist for %s[%s]", + __func__, up->sg_str, pim->vrf->name); + pim_upstream_inherited_olist_decide(pim, up); + up->channel_oil->oil_inherited_rescan = 0; + } + + pim_upstream_sg_running_proc(up); } void pim_upstream_add_lhr_star_pimreg(struct pim_instance *pim) From d7a5bc62da6198cfcd2f30513b15a3dce85a3864 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 19 Feb 2020 12:32:30 -0500 Subject: [PATCH 14/14] doc: Add missing mlag documentation Add some missing mlag documentation for various commands. Signed-off-by: Donald Sharp --- doc/user/pim.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/user/pim.rst b/doc/user/pim.rst index 9876216736..36c8b44aa4 100644 --- a/doc/user/pim.rst +++ b/doc/user/pim.rst @@ -174,6 +174,13 @@ PIM interface commands allow you to configure an interface as either a Receiver or a interface that you would like to form pim neighbors on. If the interface is in a vrf, enter the interface command with the vrf keyword at the end. +.. index:: ip pim active-active +.. clicmd:: ip pim active-active + + Turn on pim active-active configuration for a Vxlan interface. This + command will not do anything if you do not have the underlying ability + of a mlag implementation. + .. index:: ip pim bfd .. clicmd:: ip pim bfd @@ -392,6 +399,11 @@ cause great confusion. Display information about interfaces PIM is using. +.. index:: show ip pim mlag [vrf NAME] interface [detail|WORD] [json] +.. clicmd:: show ip pim mlag [vrf NAME|all] interface [detail|WORD] [json] + + Display mlag interface information. + .. index:: show ip pim [vrf NAME] join [A.B.C.D [A.B.C.D]] [json] .. clicmd:: show ip pim join @@ -404,6 +416,11 @@ cause great confusion. Display information about PIM interface local-membership. +.. index:: show ip pim mlag summary [json] +.. clicmd:: show ip pim mlag summary [json] + + Display mlag information state that PIM is keeping track of. + .. index:: show ip pim neighbor .. clicmd:: show ip pim neighbor