From c9df216851a5d2de17ed61eb2ae992c854cea43f Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 15 Oct 2019 08:27:22 -0400 Subject: [PATCH 1/5] bgpd: Soft reconfig-in should find the right bgp_path_info When using soft reconfiguration inbound we are storing packet data on the side for replaying when necessary. The problem here is that we are just grabbing the first bgp_path_info and using that as the base. What happens when we have soft-reconfig turned on with multiple bgp_path_info's for a path? This was introduced in commit 8692c506520f6b268525b80890702432c95f13c4, yes back in 2012! I would argue, though, that it was just broken in a different way before this. Choose the correct bgp_path_info that corresponds to the peer we received the data from for rethinking. Signed-off-by: Donald Sharp --- bgpd/bgp_route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 9dc4a294c4..84d1feca78 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -3927,12 +3927,16 @@ static void bgp_soft_reconfig_table(struct peer *peer, afi_t afi, safi_t safi, if (ain->peer != peer) continue; - struct bgp_path_info *pi = - bgp_node_get_bgp_path_info(rn); + struct bgp_path_info *pi; uint32_t num_labels = 0; mpls_label_t *label_pnt = NULL; struct bgp_route_evpn evpn; + for (pi = bgp_node_get_bgp_path_info(rn); pi; + pi = pi->next) + if (pi->peer == peer) + break; + if (pi && pi->extra) num_labels = pi->extra->num_labels; if (num_labels) From 931066c074e48178f7c23ff6a83c4488403c53ef Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 9 Oct 2019 16:10:44 -0400 Subject: [PATCH 2/5] bgpd: AS paths are uint32_t instead of integers We have some JSON output that was displaying high order AS path data as negative numbers: { "paths":[ { "aspath":{ "string":"4200010118 4200010000 20473 1299", "segments":[ { "type":"as-sequence", "list":[ -94957178, -94957296, 20473, 1299 ] } ], Notice "String" output -vs- the list. With fixed code: "paths":[ { "aspath":{ "string":"64539 4294967000 15096 6939 7922 7332 4249", "segments":[ { "type":"as-sequence", "list":[ 64539, 4294967000, 15096, 6939, 7922, 7332, 4249 ] } ], Signed-off-by: Donald Sharp --- bgpd/bgp_aspath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c index f5652b07c5..d716218d70 100644 --- a/bgpd/bgp_aspath.c +++ b/bgpd/bgp_aspath.c @@ -576,7 +576,7 @@ static void aspath_make_str_count(struct aspath *as, bool make_json) if (make_json) json_object_array_add( jseg_list, - json_object_new_int(seg->as[i])); + json_object_new_int64(seg->as[i])); len += snprintf(str_buf + len, str_size - len, "%u", seg->as[i]); From 21d5940494cbcc70303f3d0eda8366e74d5b0548 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 10 Oct 2019 08:52:13 -0400 Subject: [PATCH 3/5] bgpd: Ensure that struct prefix_rd rd is zero'ed out We are passing around the created rd, Just make sure that the data is zero'ed out. Signed-off-by: Donald Sharp --- bgpd/bgp_mplsvpn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 1156810510..e8d3062561 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -109,7 +109,7 @@ int bgp_nlri_parse_vpn(struct peer *peer, struct attr *attr, uint16_t type; struct rd_as rd_as; struct rd_ip rd_ip; - struct prefix_rd prd; + struct prefix_rd prd = {0}; mpls_label_t label = {0}; afi_t afi; safi_t safi; From 8d6393ce8fc045edbe94435498b99ce28d190346 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 9 Oct 2019 20:19:56 -0400 Subject: [PATCH 4/5] bgpd: When creating extra from stack ensure it is zero'ed out BGP code assumes that the extra data is zero'ed out. Ensure that we are not leaving any situation that the data on the stack is actually all 0's when we pass it around as a pointer later. Please note in issue #5025, Lou reported a different valgrind issue, which is not the same issue: ==7313== Conditional jump or move depends on uninitialised value(s) ==7313== at 0x181F9F: subgroup_announce_check (bgp_route.c:1555) ==7313== by 0x1A112B: subgroup_announce_table (bgp_updgrp_adv.c:641) ==7313== by 0x1A1340: subgroup_announce_route (bgp_updgrp_adv.c:704) ==7313== by 0x1A13E3: subgroup_coalesce_timer (bgp_updgrp_adv.c:331) ==7313== by 0x4EBA615: thread_call (thread.c:1531) ==7313== by 0x4E8AC37: frr_run (libfrr.c:1052) ==7313== by 0x1429E0: main (bgp_main.c:486) ==7313== ==7313== Conditional jump or move depends on uninitialised value(s) ==7313== at 0x201C0E: rfapi_vty_out_vncinfo (rfapi_vty.c:429) ==7313== by 0x18D0D6: route_vty_out (bgp_route.c:7481) ==7313== by 0x18DD76: bgp_show_table (bgp_route.c:9365) ==7313== by 0x1930C4: bgp_show_table_rd (bgp_route.c:9471) ==7313== by 0x1932A3: bgp_show (bgp_route.c:9510) ==7313== by 0x193E68: show_ip_bgp_json (bgp_route.c:10284) ==7313== by 0x4E6D024: cmd_execute_command_real.isra.2 (command.c:1072) ==7313== by 0x4E6F51E: cmd_execute_command (command.c:1131) ==7313== by 0x4E6F686: cmd_execute (command.c:1285) ==7313== by 0x4EBF9C4: vty_command (vty.c:516) ==7313== by 0x4EBFB9F: vty_execute (vty.c:1285) ==7313== by 0x4EC250F: vtysh_read (vty.c:2119) ==7313== that is causing the actual crash. Signed-off-by: Donald Sharp --- bgpd/bgp_route.c | 6 +++--- bgpd/rfapi/rfapi_import.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 84d1feca78..c122df498a 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1785,9 +1785,9 @@ int subgroup_announce_check(struct bgp_node *rn, struct bgp_path_info *pi, /* Route map & unsuppress-map apply. */ if (ROUTE_MAP_OUT_NAME(filter) || (pi->extra && pi->extra->suppress)) { - struct bgp_path_info rmap_path; - struct bgp_path_info_extra dummy_rmap_path_extra; - struct attr dummy_attr; + struct bgp_path_info rmap_path = {0}; + struct bgp_path_info_extra dummy_rmap_path_extra = {0}; + struct attr dummy_attr = {0}; memset(&rmap_path, 0, sizeof(struct bgp_path_info)); rmap_path.peer = peer; diff --git a/bgpd/rfapi/rfapi_import.c b/bgpd/rfapi/rfapi_import.c index 87a05a4f8c..655cf747de 100644 --- a/bgpd/rfapi/rfapi_import.c +++ b/bgpd/rfapi/rfapi_import.c @@ -2179,8 +2179,8 @@ static struct bgp_path_info *rfapiItBiIndexSearch( { struct skiplist *sl; int rc; - struct bgp_path_info bpi_fake; - struct bgp_path_info_extra bpi_extra; + struct bgp_path_info bpi_fake = {0}; + struct bgp_path_info_extra bpi_extra = {0}; struct bgp_path_info *bpi_result; sl = RFAPI_RDINDEX(rn); From 4c5a5879f75bda99f9f4ad266ddd0e095ab1cdc8 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Thu, 10 Oct 2019 08:52:54 -0400 Subject: [PATCH 5/5] lib: Fix read beyond end of data structure Our Address Sanitizer CI is finding this issue: error 09-Oct-2019 19:28:33 r4: bgpd triggered an exception by AddressSanitizer error 09-Oct-2019 19:28:33 ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffdd425b060 at pc 0x00000068575f bp 0x7ffdd4258550 sp 0x7ffdd4258540 error 09-Oct-2019 19:28:33 READ of size 1 at 0x7ffdd425b060 thread T0 error 09-Oct-2019 19:28:33 #0 0x68575e in prefix_cmp lib/prefix.c:776 error 09-Oct-2019 19:28:33 #1 0x5889f5 in rfapiItBiIndexSearch bgpd/rfapi/rfapi_import.c:2230 error 09-Oct-2019 19:28:33 #2 0x5889f5 in rfapiBgpInfoFilteredImportVPN bgpd/rfapi/rfapi_import.c:3520 error 09-Oct-2019 19:28:33 #3 0x58b909 in rfapiProcessWithdraw bgpd/rfapi/rfapi_import.c:4071 error 09-Oct-2019 19:28:33 #4 0x4c459b in bgp_withdraw bgpd/bgp_route.c:3736 error 09-Oct-2019 19:28:33 #5 0x484122 in bgp_nlri_parse_vpn bgpd/bgp_mplsvpn.c:237 error 09-Oct-2019 19:28:33 #6 0x497f52 in bgp_nlri_parse bgpd/bgp_packet.c:315 error 09-Oct-2019 19:28:33 #7 0x49d06d in bgp_update_receive bgpd/bgp_packet.c:1598 error 09-Oct-2019 19:28:33 #8 0x49d06d in bgp_process_packet bgpd/bgp_packet.c:2274 error 09-Oct-2019 19:28:33 #9 0x6b9f54 in thread_call lib/thread.c:1531 error 09-Oct-2019 19:28:33 #10 0x657037 in frr_run lib/libfrr.c:1052 error 09-Oct-2019 19:28:33 #11 0x42d268 in main bgpd/bgp_main.c:486 error 09-Oct-2019 19:28:33 #12 0x7f806032482f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f) error 09-Oct-2019 19:28:33 #13 0x42bcc8 in _start (/usr/lib/frr/bgpd+0x42bcc8) error 09-Oct-2019 19:28:33 error 09-Oct-2019 19:28:33 Address 0x7ffdd425b060 is located in stack of thread T0 at offset 240 in frame error 09-Oct-2019 19:28:33 #0 0x483945 in bgp_nlri_parse_vpn bgpd/bgp_mplsvpn.c:103 error 09-Oct-2019 19:28:33 error 09-Oct-2019 19:28:33 This frame has 5 object(s): error 09-Oct-2019 19:28:33 [32, 36) 'label' error 09-Oct-2019 19:28:33 [96, 108) 'rd_as' error 09-Oct-2019 19:28:33 [160, 172) 'rd_ip' error 09-Oct-2019 19:28:33 [224, 240) 'prd' <== Memory access at offset 240 overflows this variable error 09-Oct-2019 19:28:33 [288, 336) 'p' error 09-Oct-2019 19:28:33 HINT: this may be a false positive if your program uses some custom stack unwind mechanism or swapcontext error 09-Oct-2019 19:28:33 (longjmp and C++ exceptions *are* supported) error 09-Oct-2019 19:28:33 SUMMARY: AddressSanitizer: stack-buffer-overflow lib/prefix.c:776 prefix_cmp error 09-Oct-2019 19:28:33 Shadow bytes around the buggy address: error 09-Oct-2019 19:28:33 0x10003a8435b0: 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 00 00 error 09-Oct-2019 19:28:33 0x10003a8435c0: 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 error 09-Oct-2019 19:28:33 0x10003a8435d0: f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 error 09-Oct-2019 19:28:33 0x10003a8435e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 error 09-Oct-2019 19:28:33 0x10003a8435f0: f1 f1 04 f4 f4 f4 f2 f2 f2 f2 00 04 f4 f4 f2 f2 error 09-Oct-2019 19:28:33 =>0x10003a843600: f2 f2 00 04 f4 f4 f2 f2 f2 f2 00 00[f4]f4 f2 f2 error 09-Oct-2019 19:28:33 0x10003a843610: f2 f2 00 00 00 00 00 00 f4 f4 f3 f3 f3 f3 00 00 error 09-Oct-2019 19:28:33 0x10003a843620: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 error 09-Oct-2019 19:28:33 0x10003a843630: 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 02 f4 error 09-Oct-2019 19:28:33 0x10003a843640: f4 f4 f2 f2 f2 f2 04 f4 f4 f4 f2 f2 f2 f2 00 00 error 09-Oct-2019 19:28:33 0x10003a843650: f4 f4 f2 f2 f2 f2 00 00 00 00 f2 f2 f2 f2 00 00 error 09-Oct-2019 19:28:33 Shadow byte legend (one shadow byte represents 8 application bytes): error 09-Oct-2019 19:28:33 Addressable: 00 error 09-Oct-2019 19:28:33 Partially addressable: 01 02 03 04 05 06 07 error 09-Oct-2019 19:28:33 Heap left redzone: fa error 09-Oct-2019 19:28:33 Heap right redzone: fb error 09-Oct-2019 19:28:33 Freed heap region: fd error 09-Oct-2019 19:28:33 Stack left redzone: f1 error 09-Oct-2019 19:28:33 Stack mid redzone: f2 error 09-Oct-2019 19:28:33 Stack right redzone: f3 error 09-Oct-2019 19:28:33 Stack partial redzone: f4 error 09-Oct-2019 19:28:33 Stack after return: f5 error 09-Oct-2019 19:28:33 Stack use after scope: f8 error 09-Oct-2019 19:28:33 Global redzone: f9 error 09-Oct-2019 19:28:33 Global init order: f6 error 09-Oct-2019 19:28:33 Poisoned by user: f7 error 09-Oct-2019 19:28:33 Container overflow: fc error 09-Oct-2019 19:28:33 Array cookie: ac error 09-Oct-2019 19:28:33 Intra object redzone: bb error 09-Oct-2019 19:28:33 ASan internal: fe error 09-Oct-2019 19:28:36 r3: Daemon bgpd not running This is the result of this code pattern in rfapi/rfapi_import.c: prefix_cmp((struct prefix *)&bpi_result->extra->vnc.import.rd, (struct prefix *)prd)) Effectively prd or vnc.import.rd are `struct prefix_rd` which are being typecast to a `struct prefix`. Not a big deal except commit 1315d74de97be2944d7b005b2f9a50e9ae5eff4d modified the prefix_cmp function to allow for a sorted prefix_cmp. In prefix_cmp we were looking at the offset and shift. In the case of vnc we were passing a prefix length of 64 which is the exact length of the remaining data structure for struct prefix_rd. So we calculated a offset of 8 and a shift of 0. The data structures for the prefix portion happened to be equal to 64 bits of data. So we checked that with the memcmp got a 0 and promptly read off the end of the data structure for the numcmp. The fix is if shift is 0 that means thei the memcmp has checked everything and there is nothing to do. Please note: We will still crash if we set the prefixlen > then ~312 bits currently( ie if the prefixlen specifies a bit length longer than the prefix length ). I do not think there is anything to do here( nor am I sure how to correct this either ) as that we are going to have some severe problems when we muck up the prefixlen. Fixes: #5025 Signed-off-by: Donald Sharp --- lib/prefix.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/prefix.c b/lib/prefix.c index 35b679ab90..aa6661d7fb 100644 --- a/lib/prefix.c +++ b/lib/prefix.c @@ -773,8 +773,18 @@ int prefix_cmp(union prefixconstptr up1, union prefixconstptr up2) if (i) return i; - return numcmp(pp1[offset] & maskbit[shift], - pp2[offset] & maskbit[shift]); + /* + * At this point offset was the same, if we have shift + * that means we still have data to compare, if shift is + * 0 then we are at the end of the data structure + * and should just return, as that we will be accessing + * memory beyond the end of the party zone + */ + if (shift) + return numcmp(pp1[offset] & maskbit[shift], + pp2[offset] & maskbit[shift]); + + return 0; } /*