mirror of
https://anongit.gentoo.org/git/repo/gentoo.git
synced 2025-07-25 00:19:49 +02:00
See https://mail.kde.org/pipermail/distributions/2024-July/001511.html. Signed-off-by: Sam James <sam@gentoo.org>
148 lines
5.7 KiB
Diff
148 lines
5.7 KiB
Diff
https://mail.kde.org/pipermail/distributions/2024-July/001511.html
|
|
https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e
|
|
|
|
From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001
|
|
From: Dmitry Kazakov <dimula73@gmail.com>
|
|
Date: Tue, 28 May 2024 22:21:08 +0200
|
|
Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly
|
|
disabled by OS
|
|
|
|
Some CPU vulnerability mitigations may disable AVX functionality
|
|
on the hardware level via the XCR0 register. We should check that
|
|
manually to verify that OS actually allows us to use this feature.
|
|
|
|
See https://bugs.kde.org/show_bug.cgi?id=484622
|
|
|
|
Fix #1025
|
|
---
|
|
include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------
|
|
1 file changed, 72 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
|
|
index f22089bac..6dda3be09 100644
|
|
--- a/include/xsimd/config/xsimd_cpuid.hpp
|
|
+++ b/include/xsimd/config/xsimd_cpuid.hpp
|
|
@@ -114,6 +114,35 @@ namespace xsimd
|
|
#endif
|
|
|
|
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
|
|
+
|
|
+ auto get_xcr0_low = []() noexcept
|
|
+ {
|
|
+ uint32_t xcr0;
|
|
+
|
|
+#if defined(_MSC_VER) && _MSC_VER >= 1400
|
|
+
|
|
+ xcr0 = (uint32_t)_xgetbv(0);
|
|
+
|
|
+#elif defined(__GNUC__)
|
|
+
|
|
+ __asm__(
|
|
+ "xorl %%ecx, %%ecx\n"
|
|
+ "xgetbv\n"
|
|
+ : "=a"(xcr0)
|
|
+ :
|
|
+#if defined(__i386__)
|
|
+ : "ecx", "edx"
|
|
+#else
|
|
+ : "rcx", "rdx"
|
|
+#endif
|
|
+ );
|
|
+
|
|
+#else /* _MSC_VER < 1400 */
|
|
+#error "_MSC_VER < 1400 is not supported"
|
|
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
|
|
+ return xcr0;
|
|
+ };
|
|
+
|
|
auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
|
|
{
|
|
|
|
@@ -148,19 +177,43 @@ namespace xsimd
|
|
|
|
get_cpuid(regs1, 0x1);
|
|
|
|
- sse2 = regs1[3] >> 26 & 1;
|
|
- sse3 = regs1[2] >> 0 & 1;
|
|
- ssse3 = regs1[2] >> 9 & 1;
|
|
- sse4_1 = regs1[2] >> 19 & 1;
|
|
- sse4_2 = regs1[2] >> 20 & 1;
|
|
- fma3_sse42 = regs1[2] >> 12 & 1;
|
|
+ // OS can explicitly disable the usage of SSE/AVX extensions
|
|
+ // by setting an appropriate flag in CR0 register
|
|
+ //
|
|
+ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
|
|
+
|
|
+ unsigned sse_state_os_enabled = 1;
|
|
+ unsigned avx_state_os_enabled = 1;
|
|
+ unsigned avx512_state_os_enabled = 1;
|
|
+
|
|
+ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
|
|
+ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and
|
|
+ // to support processor extended state management using
|
|
+ // XSAVE/XRSTOR.
|
|
+ bool osxsave = regs1[2] >> 27 & 1;
|
|
+ if (osxsave)
|
|
+ {
|
|
+
|
|
+ uint32_t xcr0 = get_xcr0_low();
|
|
+
|
|
+ sse_state_os_enabled = xcr0 >> 1 & 1;
|
|
+ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
|
|
+ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
|
|
+ }
|
|
+
|
|
+ sse2 = regs1[3] >> 26 & sse_state_os_enabled;
|
|
+ sse3 = regs1[2] >> 0 & sse_state_os_enabled;
|
|
+ ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
|
|
+ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
|
|
+ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
|
|
+ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
|
|
|
|
- avx = regs1[2] >> 28 & 1;
|
|
+ avx = regs1[2] >> 28 & avx_state_os_enabled;
|
|
fma3_avx = avx && fma3_sse42;
|
|
|
|
int regs8[4];
|
|
get_cpuid(regs8, 0x80000001);
|
|
- fma4 = regs8[2] >> 16 & 1;
|
|
+ fma4 = regs8[2] >> 16 & avx_state_os_enabled;
|
|
|
|
// sse4a = regs[2] >> 6 & 1;
|
|
|
|
@@ -168,23 +221,23 @@ namespace xsimd
|
|
|
|
int regs7[4];
|
|
get_cpuid(regs7, 0x7);
|
|
- avx2 = regs7[1] >> 5 & 1;
|
|
+ avx2 = regs7[1] >> 5 & avx_state_os_enabled;
|
|
|
|
int regs7a[4];
|
|
get_cpuid(regs7a, 0x7, 0x1);
|
|
- avxvnni = regs7a[0] >> 4 & 1;
|
|
+ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
|
|
|
|
fma3_avx2 = avx2 && fma3_sse42;
|
|
|
|
- avx512f = regs7[1] >> 16 & 1;
|
|
- avx512cd = regs7[1] >> 28 & 1;
|
|
- avx512dq = regs7[1] >> 17 & 1;
|
|
- avx512bw = regs7[1] >> 30 & 1;
|
|
- avx512er = regs7[1] >> 27 & 1;
|
|
- avx512pf = regs7[1] >> 26 & 1;
|
|
- avx512ifma = regs7[1] >> 21 & 1;
|
|
- avx512vbmi = regs7[2] >> 1 & 1;
|
|
- avx512vnni_bw = regs7[2] >> 11 & 1;
|
|
+ avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
|
|
+ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
|
|
+ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
|
|
+ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
|
|
+ avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
|
|
+ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
|
|
+ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
|
|
+ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
|
|
+ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
|
|
avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
|
|
#endif
|
|
}
|
|
|