diff --git a/debian/patches/D158066.patch b/debian/patches/D158066.patch new file mode 100644 index 00000000..99989d00 --- /dev/null +++ b/debian/patches/D158066.patch @@ -0,0 +1,265 @@ +Description: Fix SIMD compatibility headers on ppc64el +Origin/Author: https://reviews.llvm.org/D158066 +Bug-Debian: https://bugs.debian.org/1049362 + +Index: llvm-toolchain-16-16.0.6/clang/include/clang/Basic/BuiltinsPPC.def +=================================================================== +--- llvm-toolchain-16-16.0.6.orig/clang/include/clang/Basic/BuiltinsPPC.def ++++ llvm-toolchain-16-16.0.6/clang/include/clang/Basic/BuiltinsPPC.def +@@ -132,8 +132,10 @@ + BUILTIN(__builtin_ppc_extract_sig, "ULLid", "") + BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "") + BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "") ++BUILTIN(__builtin_ppc_mffs, "d", "") + BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "") + BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "") ++BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "") + BUILTIN(__builtin_ppc_insert_exp, "ddULLi", "") + BUILTIN(__builtin_ppc_fmsub, "dddd", "") + BUILTIN(__builtin_ppc_fmsubs, "ffff", "") +Index: llvm-toolchain-16-16.0.6/clang/lib/CodeGen/CGBuiltin.cpp +=================================================================== +--- llvm-toolchain-16-16.0.6.orig/clang/lib/CodeGen/CGBuiltin.cpp ++++ llvm-toolchain-16-16.0.6/clang/lib/CodeGen/CGBuiltin.cpp +@@ -16742,6 +16742,11 @@ + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateFDiv(Op0, Op1, "swdiv"); + } ++ case PPC::BI__builtin_ppc_set_fpscr_rn: ++ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), ++ {EmitScalarExpr(E->getArg(0))}); ++ case PPC::BI__builtin_ppc_mffs: ++ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); + } + } + +Index: llvm-toolchain-16-16.0.6/clang/lib/Headers/ppc_wrappers/smmintrin.h +=================================================================== +--- llvm-toolchain-16-16.0.6.orig/clang/lib/Headers/ppc_wrappers/smmintrin.h ++++ llvm-toolchain-16-16.0.6/clang/lib/Headers/ppc_wrappers/smmintrin.h +@@ -14,7 +14,7 @@ + + #ifndef NO_WARN_X86_INTRINSICS + /* This header is distributed to simplify porting x86_64 code that +- makes explicit use of Intel intrinsics to powerp64/powerpc64le. ++ makes explicit use of Intel intrinsics to powerpc64/powerpc64le. + + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. +@@ -68,10 +68,10 @@ + __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + #else +- __fpscr_save.__fr = __builtin_mffs(); ++ __fpscr_save.__fr = __builtin_ppc_mffs(); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + __fpscr_save.__fpscr &= ~0xf8; +- __builtin_mtfsf(0b00000011, __fpscr_save.__fr); ++ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); + #endif + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule +@@ -83,10 +83,15 @@ + + switch (__rounding) { + case _MM_FROUND_TO_NEAREST_INT: +- __fpscr_save.__fr = __builtin_mffsl(); ++#ifdef _ARCH_PWR9 ++ __fpscr_save.__fr = __builtin_ppc_mffsl(); ++#else ++ __fpscr_save.__fr = __builtin_ppc_mffs(); ++ __fpscr_save.__fpscr &= 0x70007f0ffL; ++#endif + __attribute__((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: +- __builtin_set_fpscr_rn(0b00); ++ __builtin_ppc_set_fpscr_rn(0b00); + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. +@@ -102,7 +107,7 @@ + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__("" : : "wa"(__r)); +- __builtin_set_fpscr_rn(__fpscr_save.__fpscr); ++ __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: +@@ -128,9 +133,14 @@ + */ + __asm__("" : : "wa"(__r)); + /* Restore enabled exceptions. */ +- __fpscr_save.__fr = __builtin_mffsl(); ++#ifdef _ARCH_PWR9 ++ __fpscr_save.__fr = __builtin_ppc_mffsl(); ++#else ++ __fpscr_save.__fr = __builtin_ppc_mffs(); ++ __fpscr_save.__fpscr &= 0x70007f0ffL; ++#endif + __fpscr_save.__fpscr |= __enables_save.__fpscr; +- __builtin_mtfsf(0b00000011, __fpscr_save.__fr); ++ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); + } + return (__m128d)__r; + } +@@ -159,10 +169,10 @@ + __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + #else +- __fpscr_save.__fr = __builtin_mffs(); ++ __fpscr_save.__fr = __builtin_ppc_mffs(); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + __fpscr_save.__fpscr &= ~0xf8; +- __builtin_mtfsf(0b00000011, __fpscr_save.__fr); ++ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); + #endif + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule +@@ -174,10 +184,15 @@ + + switch (__rounding) { + case _MM_FROUND_TO_NEAREST_INT: +- __fpscr_save.__fr = __builtin_mffsl(); ++#ifdef _ARCH_PWR9 ++ __fpscr_save.__fr = __builtin_ppc_mffsl(); ++#else ++ __fpscr_save.__fr = __builtin_ppc_mffs(); ++ __fpscr_save.__fpscr &= 0x70007f0ffL; ++#endif + __attribute__((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: +- __builtin_set_fpscr_rn(0b00); ++ __builtin_ppc_set_fpscr_rn(0b00); + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. +@@ -193,7 +208,7 @@ + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__("" : : "wa"(__r)); +- __builtin_set_fpscr_rn(__fpscr_save.__fpscr); ++ __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: +@@ -219,9 +234,14 @@ + */ + __asm__("" : : "wa"(__r)); + /* Restore enabled exceptions. */ +- __fpscr_save.__fr = __builtin_mffsl(); ++#ifdef _ARCH_PWR9 ++ __fpscr_save.__fr = __builtin_ppc_mffsl(); ++#else ++ __fpscr_save.__fr = __builtin_ppc_mffs(); ++ __fpscr_save.__fpscr &= 0x70007f0ffL; ++#endif + __fpscr_save.__fpscr |= __enables_save.__fpscr; +- __builtin_mtfsf(0b00000011, __fpscr_save.__fr); ++ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); + } + return (__m128)__r; + } +Index: llvm-toolchain-16-16.0.6/clang/test/CodeGen/PowerPC/builtins-ppc.c +=================================================================== +--- llvm-toolchain-16-16.0.6.orig/clang/test/CodeGen/PowerPC/builtins-ppc.c ++++ llvm-toolchain-16-16.0.6/clang/test/CodeGen/PowerPC/builtins-ppc.c +@@ -1,5 +1,8 @@ + // REQUIRES: powerpc-registered-target +-// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ ++// RUN: | FileCheck %s ++// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ ++// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK + + void test_eh_return_data_regno() + { +@@ -26,6 +29,9 @@ + + // CHECK: call double @llvm.ppc.setrnd(i32 %2) + res = __builtin_setrnd(x); ++ ++ // CHECK: call double @llvm.ppc.setrnd(i32 %4) ++ res = __builtin_ppc_set_fpscr_rn(x); + } + + void test_builtin_ppc_flm() { +@@ -33,7 +39,10 @@ + // CHECK: call double @llvm.ppc.readflm() + res = __builtin_readflm(); + +- // CHECK: call double @llvm.ppc.setflm(double %1) ++ // CHECK: call double @llvm.ppc.readflm() ++ res = __builtin_ppc_mffs(); ++ ++ // CHECK: call double @llvm.ppc.setflm(double %2) + res = __builtin_setflm(res); + } + +Index: llvm-toolchain-16-16.0.6/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +=================================================================== +--- llvm-toolchain-16-16.0.6.orig/clang/test/CodeGen/PowerPC/ppc-smmintrin.c ++++ llvm-toolchain-16-16.0.6/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +@@ -239,44 +239,48 @@ + // CHECK-LABEL: @test_round + + // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) +-// CHECK: call signext i32 @__builtin_mffs() +-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.readflm() ++// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" +-// CHECK: call signext i32 @__builtin_mffsl() +-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) ++// CHECK: call double @llvm.ppc.readflm() ++// P10: call double @llvm.ppc.mffsl() ++// CHECK: call double @llvm.ppc.setrnd(i32 0) + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" + // CHECK: call <4 x float> @vec_rint(float vector[4]) + // CHECK: call void asm sideeffect "", "^wa" +-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) + // CHECK: call <4 x float> @vec_floor(float vector[4]) + // CHECK: call <4 x float> @vec_ceil(float vector[4]) + // CHECK: call <4 x float> @vec_trunc(float vector[4]) + // CHECK: call <4 x float> @vec_rint(float vector[4]) + // CHECK: call void asm sideeffect "", "^wa" +-// CHECK: call signext i32 @__builtin_mffsl() +-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.readflm() ++// P10: call double @llvm.ppc.mffsl() ++// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) + + // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) + // CHECK: call <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) + // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 + + // CHECK-LABEL: define available_externally <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) +-// CHECK: call signext i32 @__builtin_mffs() +-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.readflm() ++// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" +-// CHECK: call signext i32 @__builtin_mffsl() +-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) ++// CHECK: call double @llvm.ppc.readflm() ++// P10: call double @llvm.ppc.mffsl() ++// CHECK: call double @llvm.ppc.setrnd(i32 0) + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" + // CHECK: call <2 x double> @vec_rint(double vector[2]) + // CHECK: call void asm sideeffect "", "^wa" +-// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) + // CHECK: call <2 x double> @vec_floor(double vector[2]) + // CHECK: call <2 x double> @vec_ceil(double vector[2]) + // CHECK: call <2 x double> @vec_trunc(double vector[2]) + // CHECK: call <2 x double> @vec_rint(double vector[2]) + // CHECK: call void asm sideeffect "", "^wa" +-// CHECK: call signext i32 @__builtin_mffsl() +-// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) ++// CHECK: call double @llvm.ppc.readflm() ++// P10: call double @llvm.ppc.mffsl() ++// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) + + // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) + // CHECK: call <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) diff --git a/debian/patches/series b/debian/patches/series index 718c611a..3d38b7f8 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -153,3 +153,4 @@ amdgpu/nonlinux.patch ubuntu-releases.patch new-cmake-build-fix.patch HIP-search-path-fix.patch +D158066.patch