aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Support/Host.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-31 18:56:55 +0000
commitaf732203b8f7f006927528db5497f5cbc4c4742a (patch)
tree596f112de3b76118552871dbb6114bb7e3e17f40 /contrib/llvm-project/llvm/lib/Support/Host.cpp
parent83dea422ac8d4a8323e64203c2eadaa813768717 (diff)
downloadsrc-af732203b8f7f006927528db5497f5cbc4c4742a.tar.gz
src-af732203b8f7f006927528db5497f5cbc4c4742a.zip
Merge llvm-project 12.0.1 release and follow-up fixes
Merge llvm-project main llvmorg-12-init-17869-g8e464dd76bef This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12-init-17869-g8e464dd76bef, the last commit before the upstream release/12.x branch was created. PR: 255570 (cherry picked from commit e8d8bef961a50d4dc22501cde4fb9fb0be1b2532) Merge llvm-project 12.0.0 release This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.0-0-gd28af7c654d8, a.k.a. 12.0.0 release. PR: 255570 (cherry picked from commit d409305fa3838fb39b38c26fc085fb729b8766d5) Disable strict-fp for powerpcspe, as it does not work properly yet Merge commit 5c18d1136665 from llvm git (by Qiu Chaofan) [SPE] Disable strict-fp for SPE by default As discussed in PR50385, strict-fp on PowerPC SPE has not been handled well. This patch disables it by default for SPE. Reviewed By: nemanjai, vit9696, jhibbits Differential Revision: https://reviews.llvm.org/D103235 PR: 255570 (cherry picked from commit 715df83abc049b23d9acddc81f2480bd4c056d64) Apply upstream libc++ fix to allow building with devel/xxx-xtoolchain-gcc Merge commit 52e9d80d5db2 from llvm git (by Jason Liu): [libc++] add `inline` for __open's definition in ifstream and ofstream Summary: When building with gcc on AIX, it seems that gcc does not like the `always_inline` without the `inline` keyword. So adding the inline keywords in for __open in ifstream and ofstream. That will also make it consistent with __open in basic_filebuf (it seems we added `inline` there before for gcc build as well). Differential Revision: https://reviews.llvm.org/D99422 PR: 255570 (cherry picked from commit d099db25464b826c5724cf2fb5b22292bbe15f6e) Undefine HAVE_(DE)REGISTER_FRAME in llvm's config.h on arm Otherwise, the lli tool (enable by WITH_CLANG_EXTRAS) won't link on arm, stating that __register_frame is undefined. This function is normally provided by libunwind, but explicitly not for the ARM Exception ABI. Reported by: oh PR: 255570 (cherry picked from commit f336b45e943c7f9a90ffcea1a6c4c7039e54c73c) Merge llvm-project 12.0.1 rc2 This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2. PR: 255570 (cherry picked from commit 23408297fbf3089f0388a8873b02fa75ab3f5bb9) Revert libunwind change to fix backtrace segfault on aarch64 Revert commit 22b615a96593 from llvm git (by Daniel Kiss): [libunwind] Support for leaf function unwinding. Unwinding leaf function is useful in cases when the backtrace finds a leaf function for example when it caused a signal. This patch also add the support for the DW_CFA_undefined because it marks the end of the frames. Ryan Prichard provided code for the tests. Reviewed By: #libunwind, mstorsjo Differential Revision: https://reviews.llvm.org/D83573 Reland with limit the test to the x86_64-linux target. Bisection has shown that this particular upstream commit causes programs using backtrace(3) on aarch64 to segfault. This affects the lang/rust port, for instance. Until we can upstream to fix this problem, revert the commit for now. Reported by: mikael PR: 256864 (cherry picked from commit 5866c369e4fd917c0d456f0f10b92ee354b82279) Merge llvm-project 12.0.1 release This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12.0.1-0-gfed41342a82f, a.k.a. 12.0.1 release. PR: 255570 (cherry picked from commit 4652422eb477731f284b1345afeefef7f269da50) compilert-rt: build out-of-line LSE atomics helpers for aarch64 Both clang >= 12 and gcc >= 10.1 now default to -moutline-atomics for aarch64. This requires a bunch of helper functions in libcompiler_rt.a, to avoid link errors like "undefined symbol: __aarch64_ldadd8_acq_rel". (Note: of course you can use -mno-outline-atomics as a workaround too, but this would negate the potential performance benefit of the faster LSE instructions.) Bump __FreeBSD_version so ports maintainers can easily detect this. PR: 257392 (cherry picked from commit cc55ee8009a550810d38777fd6ace9abf3a2f6b4)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/Host.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Host.cpp298
1 files changed, 194 insertions, 104 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/Host.cpp b/contrib/llvm-project/llvm/lib/Support/Host.cpp
index 36cecf9b2a16..09146c47ff2c 100644
--- a/contrib/llvm-project/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Host.cpp
@@ -161,11 +161,14 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
// Look for the CPU implementer line.
StringRef Implementer;
StringRef Hardware;
+ StringRef Part;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
if (Lines[I].startswith("CPU implementer"))
Implementer = Lines[I].substr(15).ltrim("\t :");
if (Lines[I].startswith("Hardware"))
Hardware = Lines[I].substr(8).ltrim("\t :");
+ if (Lines[I].startswith("CPU part"))
+ Part = Lines[I].substr(8).ltrim("\t :");
}
if (Implementer == "0x41") { // ARM Ltd.
@@ -175,110 +178,89 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
return "cortex-a53";
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- // This corresponds to the Main ID Register in Technical Reference Manuals.
- // and is used in programs like sys-utils
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x926", "arm926ej-s")
- .Case("0xb02", "mpcore")
- .Case("0xb36", "arm1136j-s")
- .Case("0xb56", "arm1156t2-s")
- .Case("0xb76", "arm1176jz-s")
- .Case("0xc08", "cortex-a8")
- .Case("0xc09", "cortex-a9")
- .Case("0xc0f", "cortex-a15")
- .Case("0xc20", "cortex-m0")
- .Case("0xc23", "cortex-m3")
- .Case("0xc24", "cortex-m4")
- .Case("0xd22", "cortex-m55")
- .Case("0xd02", "cortex-a34")
- .Case("0xd04", "cortex-a35")
- .Case("0xd03", "cortex-a53")
- .Case("0xd07", "cortex-a57")
- .Case("0xd08", "cortex-a72")
- .Case("0xd09", "cortex-a73")
- .Case("0xd0a", "cortex-a75")
- .Case("0xd0b", "cortex-a76")
- .Case("0xd0d", "cortex-a77")
- .Case("0xd41", "cortex-a78")
- .Case("0xd44", "cortex-x1")
- .Case("0xd0c", "neoverse-n1")
- .Default("generic");
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ // This corresponds to the Main ID Register in Technical Reference Manuals.
+ // and is used in programs like sys-utils
+ return StringSwitch<const char *>(Part)
+ .Case("0x926", "arm926ej-s")
+ .Case("0xb02", "mpcore")
+ .Case("0xb36", "arm1136j-s")
+ .Case("0xb56", "arm1156t2-s")
+ .Case("0xb76", "arm1176jz-s")
+ .Case("0xc08", "cortex-a8")
+ .Case("0xc09", "cortex-a9")
+ .Case("0xc0f", "cortex-a15")
+ .Case("0xc20", "cortex-m0")
+ .Case("0xc23", "cortex-m3")
+ .Case("0xc24", "cortex-m4")
+ .Case("0xd22", "cortex-m55")
+ .Case("0xd02", "cortex-a34")
+ .Case("0xd04", "cortex-a35")
+ .Case("0xd03", "cortex-a53")
+ .Case("0xd07", "cortex-a57")
+ .Case("0xd08", "cortex-a72")
+ .Case("0xd09", "cortex-a73")
+ .Case("0xd0a", "cortex-a75")
+ .Case("0xd0b", "cortex-a76")
+ .Case("0xd0d", "cortex-a77")
+ .Case("0xd41", "cortex-a78")
+ .Case("0xd44", "cortex-x1")
+ .Case("0xd0c", "neoverse-n1")
+ .Case("0xd49", "neoverse-n2")
+ .Default("generic");
}
if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x516", "thunderx2t99")
- .Case("0x0516", "thunderx2t99")
- .Case("0xaf", "thunderx2t99")
- .Case("0x0af", "thunderx2t99")
- .Case("0xa1", "thunderxt88")
- .Case("0x0a1", "thunderxt88")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x516", "thunderx2t99")
+ .Case("0x0516", "thunderx2t99")
+ .Case("0xaf", "thunderx2t99")
+ .Case("0x0af", "thunderx2t99")
+ .Case("0xa1", "thunderxt88")
+ .Case("0x0a1", "thunderxt88")
+ .Default("generic");
}
if (Implementer == "0x46") { // Fujitsu Ltd.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x001", "a64fx")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x001", "a64fx")
+ .Default("generic");
}
if (Implementer == "0x4e") { // NVIDIA Corporation
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("CPU part")) {
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x004", "carmel")
- .Default("generic");
- }
- }
+ return StringSwitch<const char *>(Part)
+ .Case("0x004", "carmel")
+ .Default("generic");
}
if (Implementer == "0x48") // HiSilicon Technologies, Inc.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0xd01", "tsv110")
- .Default("generic");
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Part)
+ .Case("0xd01", "tsv110")
+ .Default("generic");
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x06f", "krait") // APQ8064
- .Case("0x201", "kryo")
- .Case("0x205", "kryo")
- .Case("0x211", "kryo")
- .Case("0x800", "cortex-a73")
- .Case("0x801", "cortex-a73")
- .Case("0x802", "cortex-a73")
- .Case("0x803", "cortex-a73")
- .Case("0x804", "cortex-a73")
- .Case("0x805", "cortex-a73")
- .Case("0xc00", "falkor")
- .Case("0xc01", "saphira")
- .Default("generic");
-
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Part)
+ .Case("0x06f", "krait") // APQ8064
+ .Case("0x201", "kryo")
+ .Case("0x205", "kryo")
+ .Case("0x211", "kryo")
+ .Case("0x800", "cortex-a73") // Kryo 2xx Gold
+ .Case("0x801", "cortex-a73") // Kryo 2xx Silver
+ .Case("0x802", "cortex-a75") // Kryo 3xx Gold
+ .Case("0x803", "cortex-a75") // Kryo 3xx Silver
+ .Case("0x804", "cortex-a76") // Kryo 4xx Gold
+ .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
+ .Case("0xc00", "falkor")
+ .Case("0xc01", "saphira")
+ .Default("generic");
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
// any predictive pattern across variants and parts.
@@ -323,7 +305,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
SmallVector<StringRef, 32> CPUFeatures;
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
if (Lines[I].startswith("features")) {
- size_t Pos = Lines[I].find(":");
+ size_t Pos = Lines[I].find(':');
if (Pos != StringRef::npos) {
Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
break;
@@ -435,11 +417,6 @@ StringRef sys::detail::getHostCPUNameForBPF() {
#if defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)
-enum VendorSignatures {
- SIG_INTEL = 0x756e6547 /* Genu */,
- SIG_AMD = 0x68747541 /* Auth */
-};
-
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
// support. Consequently, for i386, the presence of CPUID is checked first
@@ -513,6 +490,42 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
#endif
}
+namespace llvm {
+namespace sys {
+namespace detail {
+namespace x86 {
+
+VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (MaxLeaf == nullptr)
+ MaxLeaf = &EAX;
+ else
+ *MaxLeaf = 0;
+
+ if (!isCpuIdSupported())
+ return VendorSignatures::UNKNOWN;
+
+ if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
+ return VendorSignatures::UNKNOWN;
+
+ // "Genu ineI ntel"
+ if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
+ return VendorSignatures::GENUINE_INTEL;
+
+ // "Auth enti cAMD"
+ if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
+ return VendorSignatures::AUTHENTIC_AMD;
+
+ return VendorSignatures::UNKNOWN;
+}
+
+} // namespace x86
+} // namespace detail
+} // namespace sys
+} // namespace llvm
+
+using namespace llvm::sys::detail::x86;
+
/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
/// the 4 values in the specified arguments. If we can't run cpuid on the host,
/// return true.
@@ -730,6 +743,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
break;
+ // Sapphire Rapids:
+ case 0x8f:
+ CPU = "sapphirerapids";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
@@ -956,6 +976,14 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break; // 00h-0Fh: Zen1
}
break;
+ case 25:
+ CPU = "znver3";
+ *Type = X86::AMDFAM19H;
+ if (Model <= 0x0f) {
+ *Subtype = X86::AMDFAM19H_ZNVER3;
+ break; // 00h-0Fh: Zen3
+ }
+ break;
default:
break; // Unknown AMD CPU.
}
@@ -1095,14 +1123,12 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
}
StringRef sys::getHostCPUName() {
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- unsigned MaxLeaf, Vendor;
-
- if (!isCpuIdSupported())
+ unsigned MaxLeaf = 0;
+ const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
+ if (Vendor == VendorSignatures::UNKNOWN)
return "generic";
- if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1)
- return "generic";
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
unsigned Family = 0, Model = 0;
@@ -1117,10 +1143,10 @@ StringRef sys::getHostCPUName() {
StringRef CPU;
- if (Vendor == SIG_INTEL) {
+ if (Vendor == VendorSignatures::GENUINE_INTEL) {
CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
&Subtype);
- } else if (Vendor == SIG_AMD) {
+ } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
&Subtype);
}
@@ -1222,6 +1248,19 @@ StringRef sys::getHostCPUName() {
}
#else
StringRef sys::getHostCPUName() { return "generic"; }
+namespace llvm {
+namespace sys {
+namespace detail {
+namespace x86 {
+
+VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
+ return VendorSignatures::UNKNOWN;
+}
+
+} // namespace x86
+} // namespace detail
+} // namespace sys
+} // namespace llvm
#endif
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
@@ -1272,6 +1311,27 @@ int computeHostNumPhysicalCores() {
}
return CPU_COUNT(&Enabled);
}
+#elif defined(__linux__) && defined(__powerpc__)
+int computeHostNumPhysicalCores() {
+ cpu_set_t Affinity;
+ if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
+ return CPU_COUNT(&Affinity);
+
+ // The call to sched_getaffinity() may have failed because the Affinity
+ // mask is too small for the number of CPU's on the system (i.e. the
+ // system has more than 1024 CPUs). Allocate a mask large enough for
+ // twice as many CPUs.
+ cpu_set_t *DynAffinity;
+ DynAffinity = CPU_ALLOC(2048);
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
+ int NumCPUs = CPU_COUNT(DynAffinity);
+ CPU_FREE(DynAffinity);
+ return NumCPUs;
+ }
+ return -1;
+}
+#elif defined(__linux__) && defined(__s390x__)
+int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
#elif defined(__APPLE__) && defined(__x86_64__)
#include <sys/param.h>
#include <sys/sysctl.h>
@@ -1291,6 +1351,28 @@ int computeHostNumPhysicalCores() {
}
return count;
}
+#elif defined(__MVS__)
+int computeHostNumPhysicalCores() {
+ enum {
+ // Byte offset of the pointer to the Communications Vector Table (CVT) in
+ // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
+ // will be zero-extended to uintptr_t.
+ FLCCVT = 16,
+ // Byte offset of the pointer to the Common System Data Area (CSD) in the
+ // CVT. The table entry is a 31-bit pointer and will be zero-extended to
+ // uintptr_t.
+ CVTCSD = 660,
+ // Byte offset to the number of live CPs in the LPAR, stored as a signed
+ // 32-bit value in the table.
+ CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
+ };
+ char *PSA = 0;
+ char *CVT = reinterpret_cast<char *>(
+ static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
+ char *CSD = reinterpret_cast<char *>(
+ static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
+ return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
+}
#elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
// Defined in llvm/lib/Support/Windows/Threading.inc
int computeHostNumPhysicalCores();
@@ -1420,11 +1502,13 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
+ Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
+ Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
Features["avx512vp2intersect"] =
HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
@@ -1445,7 +1529,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
+ Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
@@ -1460,6 +1546,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
+
return true;
}
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))