Skip to content

Commit 0ee549a

Browse files
authored
Make: Inline ASM for detecting CPU features on ARM
Closes #143
1 parent 715ad10 commit 0ee549a

File tree

2 files changed

+40
-14
lines changed

2 files changed

+40
-14
lines changed

Diff for: c/lib.c

+38-11
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,43 @@ extern void *malloc(size_t length);
3838
#endif
3939
#endif
4040

41+
// On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
42+
#if defined(__APPLE__) && defined(__MACH__)
43+
#define SZ_APPLE 1
44+
#include <sys/sysctl.h>
45+
#endif
46+
47+
#if defined(__linux__)
48+
#define SZ_LINUX 1
49+
#endif
50+
51+
SZ_INTERNAL sz_capability_t sz_capabilities_arm(void) {
52+
// https://github.com/ashvardanian/SimSIMD/blob/28e536083602f85ad0c59456782c8864463ffb0e/include/simsimd/simsimd.h#L434
53+
// for documentation on how we detect capabilities across different ARM platforms.
54+
#if defined(SZ_APPLE)
55+
56+
// On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
57+
uint32_t supports_neon = 0;
58+
size_t size = sizeof(supports_neon);
59+
if (sysctlbyname("hw.optional.neon", &supports_neon, &size, NULL, 0) != 0) supports_neon = 0;
60+
61+
return (sz_capability_t)( //
62+
(sz_cap_arm_neon_k * (supports_neon)) | //
63+
(sz_cap_serial_k));
64+
65+
#elif defined(SZ_LINUX)
66+
unsigned supports_neon = 1; // NEON is always supported
67+
__asm__ __volatile__("mrs %0, ID_AA64PFR0_EL1" : "=r"(id_aa64pfr0_el1));
68+
unsigned supports_sve = ((id_aa64pfr0_el1 >> 32) & 0xF) >= 1;
69+
return (sz_capability_t)( //
70+
(sz_cap_neon_k * (supports_neon)) | //
71+
(sz_cap_sve_k * (supports_sve)) | //
72+
(sz_cap_serial_k));
73+
#else // SIMSIMD_DEFINED_LINUX
74+
return sz_cap_serial_k;
75+
#endif
76+
}
77+
4178
SZ_DYNAMIC sz_capability_t sz_capabilities(void) {
4279

4380
#if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2
@@ -96,22 +133,12 @@ SZ_DYNAMIC sz_capability_t sz_capabilities(void) {
96133

97134
#if SZ_USE_ARM_NEON || SZ_USE_ARM_SVE
98135

99-
// Every 64-bit Arm CPU supports NEON
100-
unsigned supports_neon = 1;
101-
unsigned supports_sve = 0;
102-
unsigned supports_sve2 = 0;
103-
sz_unused(supports_sve);
104-
sz_unused(supports_sve2);
105-
106-
return (sz_capability_t)( //
107-
(sz_cap_arm_neon_k * supports_neon) | //
108-
(sz_cap_serial_k));
136+
return sz_capabilities_arm();
109137

110138
#endif // SIMSIMD_TARGET_ARM
111139

112140
return sz_cap_serial_k;
113141
}
114-
115142
typedef struct sz_implementations_t {
116143
sz_equal_t equal;
117144
sz_order_t order;

Diff for: include/stringzilla/stringzilla.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -260,16 +260,15 @@ typedef enum sz_capability_t {
260260

261261
sz_cap_arm_neon_k = 1 << 10, /// ARM NEON capability
262262
sz_cap_arm_sve_k = 1 << 11, /// ARM SVE capability TODO: Not yet supported or used
263-
263+
sz_cap_arm_sve2_k = 1 << 12,
264+
sz_cap_arm_sve2p1_k = 1 << 13,
264265
sz_cap_x86_avx2_k = 1 << 20, /// x86 AVX2 capability
265266
sz_cap_x86_avx512f_k = 1 << 21, /// x86 AVX512 F capability
266267
sz_cap_x86_avx512bw_k = 1 << 22, /// x86 AVX512 BW instruction capability
267268
sz_cap_x86_avx512vl_k = 1 << 23, /// x86 AVX512 VL instruction capability
268269
sz_cap_x86_avx512vbmi_k = 1 << 24, /// x86 AVX512 VBMI instruction capability
269270
sz_cap_x86_gfni_k = 1 << 25, /// x86 AVX512 GFNI instruction capability
270271

271-
sz_cap_x86_avx512vbmi2_k = 1 << 26, /// x86 AVX512 VBMI 2 instruction capability
272-
273272
} sz_capability_t;
274273

275274
/**

0 commit comments

Comments
 (0)