Skip to content

Commit 9bb323c

Browse files
AGSaididormando
authored andcommitted
arm64: Re-add arm crc32c hw acceleration
Use the .arch_extension directive so that a config options and special cflags aren't required. Add a few tests for both the software and hardware implementations
1 parent eb1bc72 commit 9bb323c

File tree

6 files changed

+103
-14
lines changed

6 files changed

+103
-14
lines changed

Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ noinst_PROGRAMS = memcached-debug sizes testapp timedrun
44

55
BUILT_SOURCES=
66

7-
testapp_SOURCES = testapp.c util.c util.h stats_prefix.c stats_prefix.h jenkins_hash.c murmur3_hash.c hash.h cache.c
7+
testapp_SOURCES = testapp.c util.c util.h stats_prefix.c stats_prefix.h jenkins_hash.c murmur3_hash.c hash.h cache.c crc32c.c
88

99
timedrun_SOURCES = timedrun.c
1010

configure.ac

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,17 @@ AS_IF([test "$ICC" = "yes" -o "$GCC" = "yes"],
7575
AS_IF(test "$CLANG" = "no",[CFLAGS="$CFLAGS -pthread"])
7676
])
7777

78+
dnl clang will error .arch_extension crc32 assembler directives to allow
79+
dnl assembling crc instructions without this
80+
AS_IF(test "$CLANG" = "yes",[CFLAGS="$CFLAGS -Wno-language-extension-token"])
81+
7882
if test "$ICC" = "no"; then
7983
AC_PROG_CC_C99
8084
fi
8185

8286
AM_PROG_CC_C_O
8387
AC_PROG_INSTALL
8488

85-
dnl ARM crc32 optimization is disabled until we have hardware for an automated
86-
dnl regression test.
87-
dnl AC_ARG_ENABLE(arm_crc32,
88-
dnl [AS_HELP_STRING([--enable-arm-crc32], [Enable ARMv8 CRC32 instructions])])
89-
9089
AC_ARG_ENABLE(extstore,
9190
[AS_HELP_STRING([--disable-extstore], [Disable external storage (extstore)])])
9291

@@ -206,10 +205,6 @@ if test "x$enable_tls" = "xyes"; then
206205
AC_DEFINE([TLS],1,[Set to nonzero if you want to enable TLS])
207206
fi
208207

209-
if test "x$enable_arm_crc32" = "xyes"; then
210-
AC_DEFINE([ARM_CRC32],1,[Set to nonzero if you want to enable ARMv8 crc32])
211-
fi
212-
213208
if test "x$enable_asan" = "xyes"; then
214209
AC_DEFINE([ASAN],1,[Set to nonzero if you want to compile using ASAN])
215210
fi

crc32c.c

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ crc_func crc32c;
4848
/* CRC-32C (iSCSI) polynomial in reversed bit order. */
4949
#define POLY 0x82f63b78
5050

51-
uint32_t crc32c_sw(uint32_t crc, void const *buf, size_t len);
5251
uint32_t crc32c_sw_little(uint32_t crc, void const *buf, size_t len);
5352
uint32_t crc32c_sw_big(uint32_t crc, void const *buf, size_t len);
5453
#ifdef __x86_64__
@@ -274,8 +273,70 @@ void crc32c_init(void) {
274273
}
275274
}
276275

277-
#else /* !__x86_64__ */
276+
#elif defined(__aarch64__) && defined(__linux__)
277+
#include <sys/auxv.h>
278278

279+
static inline uint32_t crc32cx(uint32_t crc, const uint64_t data)
280+
{
281+
asm(".arch_extension crc\n"
282+
"crc32cx %w0, %w0, %x1" : "+r" (crc) : "r" (data));
283+
return crc;
284+
}
285+
286+
static inline uint32_t crc32cb(uint32_t crc, const uint8_t data)
287+
{
288+
asm(".arch_extension crc\n"
289+
"crc32cb %w0, %w0, %w1" : "+r" (crc) : "r" (data));
290+
return crc;
291+
}
292+
293+
static uint32_t crc32c_hw(uint32_t crc, void const *buf, size_t len) {
294+
crc = ~crc;
295+
unsigned char const *next = buf;
296+
297+
while (((uintptr_t)next & 7) && len > 0) {
298+
crc = crc32cb(crc, *(uint8_t *)next);
299+
next++;
300+
len--;
301+
}
302+
303+
while (len >= 64) {
304+
uint64_t *next8 = (uint64_t *)next;
305+
crc = crc32cx(crc, next8[0]);
306+
crc = crc32cx(crc, next8[1]);
307+
crc = crc32cx(crc, next8[2]);
308+
crc = crc32cx(crc, next8[3]);
309+
crc = crc32cx(crc, next8[4]);
310+
crc = crc32cx(crc, next8[5]);
311+
crc = crc32cx(crc, next8[6]);
312+
crc = crc32cx(crc, next8[7]);
313+
next += 64;
314+
len -= 64;
315+
}
316+
317+
while (len >= 8) {
318+
crc = crc32cx(crc, *(uint64_t *)next);
319+
next += 8;
320+
len -= 8;
321+
}
322+
323+
while (len > 0) {
324+
crc = crc32cb(crc, *(uint8_t *)next);
325+
next++;
326+
len--;
327+
}
328+
329+
return ~crc;
330+
}
331+
332+
void crc32c_init(void) {
333+
uint64_t auxv = getauxval(AT_HWCAP);
334+
335+
crc32c = crc32c_sw;
336+
if (auxv & HWCAP_CRC32)
337+
crc32c = crc32c_hw;
338+
}
339+
#else /* !__x86_64__i && !__aarch64__ */
279340
void crc32c_init(void) {
280341
crc32c = crc32c_sw;
281342
}

crc32c.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,7 @@ extern crc_func crc32c;
1717

1818
void crc32c_init(void);
1919

20+
// Expose a prototype for the crc32c software variant simply for testing purposes
21+
uint32_t crc32c_sw(uint32_t crc, void const *buf, size_t len);
22+
2023
#endif /* CRC32C_H */

memcached.spec.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
%bcond_with arm_crc32
21
%bcond_with extstore
32
%bcond_with seccomp
43
%bcond_with sasl
@@ -66,7 +65,6 @@ web applications by alleviating database load.
6665

6766
%build
6867
%configure \
69-
%{?with_arm_crc32:--enable-arm-crc32} \
7068
%{?with_extstore:--enable-extstore} \
7169
%{?with_seccomp:--enable-seccomp} \
7270
%{?with_sasl:--enable-sasl} \

testapp.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "config.h"
2222
#include "cache.h"
23+
#include "crc32c.h"
2324
#include "hash.h"
2425
#include "jenkins_hash.h"
2526
#include "stats_prefix.h"
@@ -898,6 +899,34 @@ static enum test_return test_issue_92(void) {
898899
return TEST_PASS;
899900
}
900901

902+
static enum test_return test_crc32c(void) {
903+
uint32_t crc_hw, crc_sw;
904+
905+
char buffer[256];
906+
for (int x = 0; x < 256; x++)
907+
buffer[x] = x;
908+
909+
/* Compare harware to software implementaiton */
910+
crc_hw = crc32c(0, buffer, 256);
911+
crc_sw = crc32c_sw(0, buffer, 256);
912+
assert(crc_hw == 0x9c44184b);
913+
assert(crc_sw == 0x9c44184b);
914+
915+
/* Test that passing a CRC in also works */
916+
crc_hw = crc32c(crc_hw, buffer, 256);
917+
crc_sw = crc32c_sw(crc_sw, buffer, 256);
918+
assert(crc_hw == 0xae10ee5a);
919+
assert(crc_sw == 0xae10ee5a);
920+
921+
/* Test odd offsets/sizes */
922+
crc_hw = crc32c(crc_hw, buffer + 1, 256 - 2);
923+
crc_sw = crc32c_sw(crc_sw, buffer + 1, 256 - 2);
924+
assert(crc_hw == 0xed37b906);
925+
assert(crc_sw == 0xed37b906);
926+
927+
return TEST_PASS;
928+
}
929+
901930
static enum test_return test_issue_102(void) {
902931
char buffer[4096];
903932
memset(buffer, ' ', sizeof(buffer));
@@ -2297,6 +2326,7 @@ struct testcase testcases[] = {
22972326
{ "issue_44", test_issue_44 },
22982327
{ "vperror", test_vperror },
22992328
{ "issue_101", test_issue_101 },
2329+
{ "crc32c", test_crc32c },
23002330
/* The following tests all run towards the same server */
23012331
{ "start_server", start_memcached_server },
23022332
{ "issue_92", test_issue_92 },
@@ -2363,6 +2393,8 @@ int main(int argc, char **argv)
23632393
hash = jenkins_hash;
23642394
stats_prefix_init(':');
23652395

2396+
crc32c_init();
2397+
23662398
for (num_cases = 0; testcases[num_cases].description; num_cases++) {
23672399
/* Just counting */
23682400
}

0 commit comments

Comments
 (0)