Skip to content

Commit 09e57cf

Browse files
[AArch64] Extend Windows CPU feature detection with more features. (#171930)
Mostly adding feature flags from the newest SDK. (Note that in addition to the obvious, this also affects the compiler-rt SME ABI routines, which rely on FEAT_SME and FEAT_SME2.)
1 parent 34f6303 commit 09e57cf

File tree

2 files changed

+89
-9
lines changed

2 files changed

+89
-9
lines changed

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,48 @@
1818
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
1919
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
2020
#endif
21+
#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
22+
#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
23+
#endif
2124
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
2225
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
2326
#endif
27+
#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
28+
#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
29+
#endif
2430
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
2531
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
2632
#endif
2733
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
2834
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
2935
#endif
30-
#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
31-
#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
32-
#endif
3336
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
3437
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
3538
#endif
3639
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
3740
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
3841
#endif
42+
#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
43+
#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
44+
#endif
45+
#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
46+
#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
47+
#endif
48+
#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
49+
#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
50+
#endif
51+
#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
52+
#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
53+
#endif
54+
#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
55+
#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
56+
#endif
57+
#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
58+
#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
59+
#endif
60+
#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
61+
#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
62+
#endif
3963

4064
void __init_cpu_features_resolver(unsigned long hwcap,
4165
const __ifunc_arg_t *arg) {}
@@ -68,15 +92,30 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
6892
{PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
6993
{PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
7094
{PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
95+
{PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE, FEAT_SVE2_1},
7196
{PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128},
97+
{PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BITPERM},
7298
{PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
7399
{PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
74100
{PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
75101
{PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
76-
// There is no I8MM flag, but when SVE_I8MM is available, I8MM is too.
77-
{PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
102+
{PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
103+
{PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, FEAT_FP16},
104+
{PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, FEAT_BF16},
105+
{PF_ARM_SME_INSTRUCTIONS_AVAILABLE, FEAT_SME},
106+
{PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, FEAT_SME2},
107+
{PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, FEAT_SME_I64},
108+
{PF_ARM_SME_F16F64_INSTRUCTIONS_AVAILABLE, FEAT_SME_F64},
78109
};
79110

111+
// The following features are never detected because there is no known way
112+
// to detect them on Windows:
113+
//
114+
// FEAT_RNG, FEAT_FLAGM, FEAT_FLAGM2, FEAT_FP16FML, FEAT_RDM, FEAT_CSSC,
115+
// FEAT_DIT, FEAT_DPB, FEAT_DPB2, FEAT_FCMA, FEAT_RCPC2, FEAT_FRINTTS,
116+
// FEAT_MEMTAG2, FEAT_SB, FEAT_SSBS2, FEAT_BTI, FEAT_WFXT, FEAT_RCPC3,
117+
// FEAT_MOPS.
118+
80119
for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
81120
if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
82121
setCPUFeature(FeatMap[I].CPUFeature);

llvm/lib/TargetParser/Host.cpp

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2304,24 +2304,49 @@ StringMap<bool> sys::getHostCPUFeatures() {
23042304
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
23052305
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
23062306
#endif
2307+
#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
2308+
#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
2309+
#endif
23072310
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
23082311
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
23092312
#endif
2313+
#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
2314+
#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
2315+
#endif
23102316
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
23112317
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
23122318
#endif
23132319
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
23142320
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
23152321
#endif
2316-
#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
2317-
#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
2318-
#endif
23192322
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
23202323
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
23212324
#endif
23222325
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
23232326
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
23242327
#endif
2328+
#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
2329+
#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
2330+
#endif
2331+
#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
2332+
#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
2333+
#endif
2334+
#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
2335+
#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
2336+
#endif
2337+
#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
2338+
#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
2339+
#endif
2340+
#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
2341+
#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
2342+
#endif
2343+
#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
2344+
#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
2345+
#endif
2346+
#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
2347+
#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
2348+
#endif
2349+
23252350
StringMap<bool> sys::getHostCPUFeatures() {
23262351
StringMap<bool> Features;
23272352

@@ -2340,8 +2365,12 @@ StringMap<bool> sys::getHostCPUFeatures() {
23402365
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
23412366
Features["sve2"] =
23422367
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
2368+
Features["sve2p1"] =
2369+
IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE);
23432370
Features["sve-aes"] =
23442371
IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE);
2372+
Features["sve-bitperm"] =
2373+
IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE);
23452374
Features["sve-sha3"] =
23462375
IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE);
23472376
Features["sve-sm4"] =
@@ -2351,7 +2380,19 @@ StringMap<bool> sys::getHostCPUFeatures() {
23512380
Features["f64mm"] =
23522381
IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE);
23532382
Features["i8mm"] =
2354-
IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE);
2383+
IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE);
2384+
Features["fp16"] =
2385+
IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE);
2386+
Features["bf16"] =
2387+
IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE);
2388+
Features["sme"] =
2389+
IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE);
2390+
Features["sme2"] =
2391+
IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE);
2392+
Features["sme-i16i64"] =
2393+
IsProcessorFeaturePresent(PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE);
2394+
Features["sme-f64f64"] =
2395+
IsProcessorFeaturePresent(PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE);
23552396

23562397
// Avoid inferring "crypto" means more than the traditional AES + SHA2
23572398
bool TradCrypto =

0 commit comments

Comments
 (0)