zfs/config/toolchain-simd.m4
Joel Low bb9225ea86 Backport AVX2 AES-GCM implementation from BoringSSL
This uses the AVX2 versions of the AESENC and PCLMULQDQ instructions; on
Zen 3 this provides an up to 80% performance improvement.

Original source:
d5440dd2c2/gen/bcm/aes-gcm-avx2-x86_64-linux.S

See the original BoringSSL commit at
3b6e1be439.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Rob Norris <robn@despairlabs.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Attila Fülöp <attila@fueloep.org>
Signed-off-by: Joel Low <joel@joelsplace.sg>
Closes #17058
2025-08-13 14:51:20 -07:00

558 lines
12 KiB
Plaintext

dnl #
dnl # Checks if host toolchain supports SIMD instructions
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
case "$host_cpu" in
amd64 | x86_64 | x86 | i686)
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
;;
esac
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [
AC_MSG_CHECKING([whether host toolchain supports SSE])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
__asm__ __volatile__("xorps %xmm0, %xmm1");
return (0);
}
]])], [
AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [
AC_MSG_CHECKING([whether host toolchain supports SSE2])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
__asm__ __volatile__("pxor %xmm0, %xmm1");
return (0);
}
]])], [
AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [
AC_MSG_CHECKING([whether host toolchain supports SSE3])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
char v[16];
__asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0]));
return (0);
}
]])], [
AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [
AC_MSG_CHECKING([whether host toolchain supports SSSE3])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
__asm__ __volatile__("pshufb %xmm0,%xmm1");
return (0);
}
]])], [
AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [
AC_MSG_CHECKING([whether host toolchain supports SSE4.1])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
__asm__ __volatile__("pmaxsb %xmm0,%xmm1");
return (0);
}
]])], [
AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [
AC_MSG_CHECKING([whether host toolchain supports SSE4.2])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
__asm__ __volatile__("pcmpgtq %xmm0, %xmm1");
return (0);
}
]])], [
AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [
AC_MSG_CHECKING([whether host toolchain supports AVX])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
int main()
{
char v[32];
__asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0]));
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX], 1, [Define if host toolchain supports AVX])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [
AC_MSG_CHECKING([whether host toolchain supports AVX2])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX2], 1, [Define if host toolchain supports AVX2])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F], [
AC_MSG_CHECKING([whether host toolchain supports AVX512F])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512F], 1, [Define if host toolchain supports AVX512F])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD], [
AC_MSG_CHECKING([whether host toolchain supports AVX512CD])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vplzcntd %zmm0,%zmm1");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512CD], 1, [Define if host toolchain supports AVX512CD])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ], [
AC_MSG_CHECKING([whether host toolchain supports AVX512DQ])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512DQ], 1, [Define if host toolchain supports AVX512DQ])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW], [
AC_MSG_CHECKING([whether host toolchain supports AVX512BW])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512BW], 1, [Define if host toolchain supports AVX512BW])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA], [
AC_MSG_CHECKING([whether host toolchain supports AVX512IFMA])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512IFMA], 1, [Define if host toolchain supports AVX512IFMA])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI], [
AC_MSG_CHECKING([whether host toolchain supports AVX512VBMI])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512VBMI], 1, [Define if host toolchain supports AVX512VBMI])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF], [
AC_MSG_CHECKING([whether host toolchain supports AVX512PF])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512PF], 1, [Define if host toolchain supports AVX512PF])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER], [
AC_MSG_CHECKING([whether host toolchain supports AVX512ER])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vexp2pd %zmm0,%zmm1");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512ER], 1, [Define if host toolchain supports AVX512ER])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL], [
AC_MSG_CHECKING([whether host toolchain supports AVX512VL])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpabsq %zmm0,%zmm1");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512VL], 1, [Define if host toolchain supports AVX512VL])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES], [
AC_MSG_CHECKING([whether host toolchain supports AES])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("aesenc %xmm0, %xmm1");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AES], 1, [Define if host toolchain supports AES])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [
AC_MSG_CHECKING([whether host toolchain supports PCLMULQDQ])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("pclmulqdq %0, %%xmm0, %%xmm1" :: "i"(0));
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_PCLMULQDQ], 1, [Define if host toolchain supports PCLMULQDQ])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
AC_MSG_CHECKING([whether host toolchain supports MOVBE])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("movbe 0(%eax), %eax");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES], [
AC_MSG_CHECKING([whether host toolchain supports VAES])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vaesenc %ymm0, %ymm1, %ymm0");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_VAES], 1, [Define if host toolchain supports VAES])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ], [
AC_MSG_CHECKING([whether host toolchain supports VPCLMULQDQ])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
__asm__ __volatile__("vpclmulqdq %0, %%ymm4, %%ymm3, %%ymm5" :: "i"(0));
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_VPCLMULQDQ], 1, [Define if host toolchain supports VPCLMULQDQ])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [
AC_MSG_CHECKING([whether host toolchain supports XSAVE])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
char b[4096] __attribute__ ((aligned (64)));
__asm__ __volatile__("xsave %[b]\n" : : [b] "m" (*b) : "memory");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_XSAVE], 1, [Define if host toolchain supports XSAVE])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT], [
AC_MSG_CHECKING([whether host toolchain supports XSAVEOPT])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
char b[4096] __attribute__ ((aligned (64)));
__asm__ __volatile__("xsaveopt %[b]\n" : : [b] "m" (*b) : "memory");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_XSAVEOPT], 1, [Define if host toolchain supports XSAVEOPT])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES], [
AC_MSG_CHECKING([whether host toolchain supports XSAVES])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
int main()
{
char b[4096] __attribute__ ((aligned (64)));
__asm__ __volatile__("xsaves %[b]\n" : : [b] "m" (*b) : "memory");
return (0);
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_XSAVES], 1, [Define if host toolchain supports XSAVES])
], [
AC_MSG_RESULT([no])
])
])