From 51f3216bee15473533963c3e9b7c37061bdb0da9 Mon Sep 17 00:00:00 2001 From: Pooja Jagadeesh Doijode Date: Fri, 7 Oct 2022 17:07:46 -0700 Subject: [PATCH 1/2] bgpd: BGP fails to free the nexthop node In case of BGP unnumbered, BGP fails to free the nexthop node for peer if the interface is shutdown before unconfiguring/deleting the BGP neighbor. This is because, when the interface is shutdown, peer's LL neighbor address will be cleared. Therefore, during neighbor deletion, since the peer's neighbor address is not available, BGP will skip freeing the nexthop node of this peer. This results in a stale nexthop node that points to a peer that's already been freed. Ticket: 3191547 Signed-off-by: Pooja Jagadeesh Doijode --- bgpd/bgp_nht.c | 73 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c index fd1aa6ab47..e387cdd488 100644 --- a/bgpd/bgp_nht.c +++ b/bgpd/bgp_nht.c @@ -212,6 +212,37 @@ void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to) bnct->nht_info = to; } +/* + * Returns the bnc whose bnc->nht_info matches the LL peer by + * looping through the IPv6 nexthop table + */ +static struct bgp_nexthop_cache * +bgp_find_ipv6_nexthop_matching_peer(struct peer *peer) +{ + struct bgp_nexthop_cache *bnc; + + frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6], + bnc) { + if (bnc->nht_info == peer) { + if (BGP_DEBUG(nht, NHT)) { + zlog_debug( + "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p", + &bnc->prefix, bnc->ifindex, + bnc->srte_color, bnc, peer->host, + peer->bgp->name_pretty, peer); + } + return bnc; + } + } + + if (BGP_DEBUG(nht, NHT)) + zlog_debug( + "Could not find bnc for peer %s(%s) %p in v6 nexthop table", + peer->host, peer->bgp->name_pretty, peer); + + return NULL; +} + void bgp_unlink_nexthop_by_peer(struct peer *peer) { struct prefix p; @@ -219,15 +250,30 @@ void bgp_unlink_nexthop_by_peer(struct peer *peer) afi_t afi = family2afi(peer->su.sa.sa_family); ifindex_t ifindex = 0; - if (!sockunion2hostprefix(&peer->su, &p)) - return; - /* - * Gather the ifindex for if up/down events to be - * tagged into this fun - */ - if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) - ifindex = peer->su.sin6.sin6_scope_id; - bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0, ifindex); + if (!sockunion2hostprefix(&peer->su, &p)) { + /* + * In scenarios where unnumbered BGP session is brought + * down by shutting down the interface before unconfiguring + * the BGP neighbor, neighbor information in peer->su.sa + * will be cleared when the interface is shutdown. So + * during the deletion of unnumbered bgp peer, above check + * will return true. Therefore, in this case,BGP needs to + * find the bnc whose bnc->nht_info matches the + * peer being deleted and free it. + */ + bnc = bgp_find_ipv6_nexthop_matching_peer(peer); + } else { + /* + * Gather the ifindex for if up/down events to be + * tagged into this fun + */ + if (afi == AFI_IP6 && + IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) + ifindex = peer->su.sin6.sin6_scope_id; + bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0, + ifindex); + } + if (!bnc) return; @@ -443,6 +489,15 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer) if (!peer) return; + /* + * In case the below check evaluates true and if + * the bnc has not been freed at this point, then + * we might have to do something similar to what's + * done in bgp_unlink_nexthop_by_peer(). Since + * bgp_unlink_nexthop_by_peer() loops through the + * nodes of V6 nexthop cache to find the bnc, it is + * currently not being called here. + */ if (!sockunion2hostprefix(&peer->su, &p)) return; /* From 21b432f79f4130753b2b9430e02ba344b34c61da Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 7 Dec 2022 07:54:58 -0500 Subject: [PATCH 2/2] tests: Add a test to show that BGP does not crash with unnumbered interfaces This series of events will crash BGP prior to the prior commit: a) Configure an interfaced based peering b) Shut the interface the peering is over c) remove the peering from bgp Show that this no longer happens Signed-off-by: Donald Sharp --- tests/topotests/bgp_unnumbered/__init__.py | 0 tests/topotests/bgp_unnumbered/r1/bgpd.conf | 9 ++ tests/topotests/bgp_unnumbered/r1/zebra.conf | 10 ++ tests/topotests/bgp_unnumbered/r2/bgpd.conf | 9 ++ tests/topotests/bgp_unnumbered/r2/zebra.conf | 12 ++ .../bgp_unnumbered/test_bgp_unnumbered.py | 132 ++++++++++++++++++ 6 files changed, 172 insertions(+) create mode 100644 tests/topotests/bgp_unnumbered/__init__.py create mode 100644 tests/topotests/bgp_unnumbered/r1/bgpd.conf create mode 100644 tests/topotests/bgp_unnumbered/r1/zebra.conf create mode 100644 tests/topotests/bgp_unnumbered/r2/bgpd.conf create mode 100644 tests/topotests/bgp_unnumbered/r2/zebra.conf create mode 100644 tests/topotests/bgp_unnumbered/test_bgp_unnumbered.py diff --git a/tests/topotests/bgp_unnumbered/__init__.py b/tests/topotests/bgp_unnumbered/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/topotests/bgp_unnumbered/r1/bgpd.conf b/tests/topotests/bgp_unnumbered/r1/bgpd.conf new file mode 100644 index 0000000000..a9d0ec8dbd --- /dev/null +++ b/tests/topotests/bgp_unnumbered/r1/bgpd.conf @@ -0,0 +1,9 @@ +! +router bgp 65001 + timers bgp 1 9 + no bgp ebgp-requires-policy + neighbor r1-eth0 interface remote-as external + address-family ipv4 unicast + exit-address-family + ! +! diff --git a/tests/topotests/bgp_unnumbered/r1/zebra.conf b/tests/topotests/bgp_unnumbered/r1/zebra.conf new file mode 100644 index 0000000000..1cbaea44f8 --- /dev/null +++ b/tests/topotests/bgp_unnumbered/r1/zebra.conf @@ -0,0 +1,10 @@ +! +interface lo + ip address 172.16.250.254/32 +! +interface r1-eth0 + ip address 192.168.0.1/24 +! +ip forwarding +! + diff --git a/tests/topotests/bgp_unnumbered/r2/bgpd.conf b/tests/topotests/bgp_unnumbered/r2/bgpd.conf new file mode 100644 index 0000000000..fd29cd3c99 --- /dev/null +++ b/tests/topotests/bgp_unnumbered/r2/bgpd.conf @@ -0,0 +1,9 @@ +! +router bgp 65002 + no bgp network import-check + no bgp ebgp-requires-policy + timers bgp 1 9 + neighbor r2-eth0 interface remote-as external + address-family ipv4 uni + network 172.16.255.254/32 +! diff --git a/tests/topotests/bgp_unnumbered/r2/zebra.conf b/tests/topotests/bgp_unnumbered/r2/zebra.conf new file mode 100644 index 0000000000..cf6fb6d984 --- /dev/null +++ b/tests/topotests/bgp_unnumbered/r2/zebra.conf @@ -0,0 +1,12 @@ +! +interface r2-eth0 + ip address 192.168.0.2/24 +! +interface r2-eth1 + ip address 192.168.1.1/24 +! +interface r2-eth2 + ip address 192.168.2.1/24 +! +ip forwarding +! diff --git a/tests/topotests/bgp_unnumbered/test_bgp_unnumbered.py b/tests/topotests/bgp_unnumbered/test_bgp_unnumbered.py new file mode 100644 index 0000000000..de4c4cff37 --- /dev/null +++ b/tests/topotests/bgp_unnumbered/test_bgp_unnumbered.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# +# Copyright (c) 2022 by +# Donald Sharp +# +# Permission to use, copy, modify, and/or distribute this software +# for any purpose with or without fee is hereby granted, provided +# that the above copyright notice and this permission notice appear +# in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# + +""" +Test some bgp interface based issues that show up +""" + +import os +import sys +import json +import pytest +import functools + +CWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(CWD, "../")) + +# pylint: disable=C0413 +from lib import topotest +from lib.topogen import Topogen, TopoRouter, get_topogen +from lib.common_config import step + +pytestmark = [pytest.mark.bgpd] + + +def build_topo(tgen): + + tgen.add_router("r1") + tgen.add_router("r2") + + switch = tgen.add_switch("s1") + switch.add_link(tgen.gears["r1"]) + switch.add_link(tgen.gears["r2"]) + + +def setup_module(mod): + tgen = Topogen(build_topo, mod.__name__) + tgen.start_topology() + + router_list = tgen.routers() + + for i, (rname, router) in enumerate(router_list.items(), 1): + router.load_config( + TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname)) + ) + router.load_config( + TopoRouter.RD_BGP, os.path.join(CWD, "{}/bgpd.conf".format(rname)) + ) + + tgen.start_router() + + +def teardown_module(mod): + tgen = get_topogen() + tgen.stop_topology() + + +# +# Test these events: +# a) create an unnumbered neighbor +# b) shutdown the interface +# c) remove the unnumbered peer in bgp and bgp does not crash +def test_bgp_unnumbered_removal(): + tgen = get_topogen() + + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + def _bgp_nexthop_cache(): + output = tgen.gears["r1"].vtysh_cmd("show bgp nexthop") + expected = "Current BGP nexthop cache:\n" + return output == expected + + def _bgp_converge(): + output = json.loads( + tgen.gears["r1"].vtysh_cmd("show ip bgp 172.16.255.254/32 json") + ) + expected = {"prefix": "172.16.255.254/32"} + + return topotest.json_cmp(output, expected) + + step("Ensure Convergence of BGP") + test_func = functools.partial(_bgp_converge) + success, result = topotest.run_and_expect(test_func, None, count=60, wait=1) + + assert result is None, 'Failed bgp convergence in "{}"'.format(tgen.gears["r2"]) + + step("Shutdown interface r1-eth0") + + tgen.gears["r1"].vtysh_cmd( + """ + configure + int r1-eth0 + shutdown + """ + ) + + step("Remove the neighbor from r1") + tgen.gears["r1"].vtysh_cmd( + """ + configure + router bgp + no neighbor r1-eth0 interface remote-as external + """ + ) + + step("Ensure that BGP does not crash") + test_func = functools.partial(_bgp_nexthop_cache) + success, result = topotest.run_and_expect(test_func, True, count=10, wait=1) + + assert result is True, "BGP did not crash on r1" + + +if __name__ == "__main__": + args = ["-s"] + sys.argv[1:] + sys.exit(pytest.main(args))