Skip to content

Commit 34b63ba

Browse files
committed
Detect SSE4.2 support dynamically
This is a port of the dynamic SSE4.2 detection feature from moonjit. This makes luajit2 builds portable since SSE4.2 string hash functions are now built separately and chosen at runtime based on whether the CPU supports it. This patch also includes work by Thomas Fransham in moonjit to support Windows builds.
1 parent 6d6c9b3 commit 34b63ba

File tree

10 files changed

+195
-64
lines changed

10 files changed

+195
-64
lines changed

src/Makefile

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
507507
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
508508
lj_carith.o lj_clib.o lj_cparse.o \
509509
lj_lib.o lj_alloc.o lib_aux.o \
510-
$(LJLIB_O) lib_init.o
510+
$(LJLIB_O) lib_init.o lj_str_hash.o
511+
512+
ifeq (x64,$(TARGET_LJARCH))
513+
lj_str_hash-CFLAGS = -msse4.2
514+
endif
515+
516+
F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))
511517

512518
LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
513-
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
519+
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o
514520

515521
LIB_VMDEF= jit/vmdef.lua
516522
LIB_VMDEFP= $(LIB_VMDEF)
@@ -532,7 +538,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
532538
##############################################################################
533539

534540
# Mixed mode defaults.
535-
TARGET_O= $(LUAJIT_A)
541+
TARGET_O= lj_init.o $(LUAJIT_A)
536542
TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
537543
TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)
538544

@@ -614,7 +620,7 @@ E= @echo
614620
default all: $(TARGET_T)
615621

616622
amalg:
617-
$(MAKE) all "LJCORE_O=ljamalg.o"
623+
$(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"
618624

619625
clean:
620626
$(HOST_RM) $(ALL_RM)
@@ -691,8 +697,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
691697

692698
%.o: %.c
693699
$(E) "CC $@"
694-
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
695-
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
700+
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
701+
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<
696702

697703
%.o: %.S
698704
$(E) "ASM $@"

src/lj_arch.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@
209209
#define LJ_TARGET_GC64 1
210210
#endif
211211

212+
#ifdef __GNUC__
213+
#define LJ_HAS_OPTIMISED_HASH 1
214+
#endif
215+
212216
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
213217

214218
#define LJ_ARCH_NAME "arm"

src/lj_init.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#include <stdint.h>
2+
#include "lj_arch.h"
3+
#include "lj_jit.h"
4+
#include "lj_vm.h"
5+
#include "lj_str.h"
6+
7+
#if LJ_TARGET_ARM && LJ_TARGET_LINUX
8+
#include <sys/utsname.h>
9+
#endif
10+
11+
#ifdef _MSC_VER
12+
/*
13+
** Append a function pointer to the static constructor table executed by
14+
** the C runtime.
15+
** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
16+
** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization.
17+
*/
18+
#pragma section(".CRT$XCU",read)
19+
#define LJ_INITIALIZER2_(f,p) \
20+
static void f(void); \
21+
__declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
22+
__pragma(comment(linker,"/include:" p #f "_")) \
23+
static void f(void)
24+
#ifdef _WIN64
25+
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"")
26+
#else
27+
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_")
28+
#endif
29+
30+
#else
31+
#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void)
32+
#endif
33+
34+
35+
#ifdef LJ_HAS_OPTIMISED_HASH
36+
static void str_hash_init(uint32_t flags)
37+
{
38+
if (flags & JIT_F_SSE4_2)
39+
str_hash_init_sse42 ();
40+
}
41+
42+
/* CPU detection for interpreter features such as string hash function
43+
selection. We choose to cherry-pick from lj_cpudetect and not have a single
44+
initializer to make sure that merges with LuaJIT/LuaJIT remain
45+
convenient. */
46+
LJ_INITIALIZER(lj_init_cpuflags)
47+
{
48+
uint32_t flags = 0;
49+
#if LJ_TARGET_X86ORX64
50+
51+
uint32_t vendor[4];
52+
uint32_t features[4];
53+
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
54+
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
55+
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
56+
flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
57+
if (vendor[0] >= 7) {
58+
uint32_t xfeatures[4];
59+
lj_vm_cpuid(7, xfeatures);
60+
flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
61+
}
62+
}
63+
64+
#endif
65+
66+
/* The reason why we initialized early: select our string hash functions. */
67+
str_hash_init (flags);
68+
}
69+
#endif

src/lj_jit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define JIT_F_SSE3 (JIT_F_CPU << 0)
2323
#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
2424
#define JIT_F_BMI2 (JIT_F_CPU << 2)
25+
#define JIT_F_SSE4_2 (JIT_F_CPU << 3)
2526

2627

2728
#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"

src/lj_str.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "lj_str.h"
1313
#include "lj_char.h"
1414
#include "lj_prng.h"
15-
#include "x64/src/lj_str_hash_x64.h"
1615

1716
/* -- String helpers ------------------------------------------------------ */
1817

@@ -83,9 +82,22 @@ int lj_str_haspattern(GCstr *s)
8382

8483
/* -- String hashing ------------------------------------------------------ */
8584

86-
#ifndef ARCH_HASH_SPARSE
85+
#ifdef LJ_HAS_OPTIMISED_HASH
86+
static StrHash hash_sparse_def (uint64_t, const char *, MSize);
87+
str_sparse_hashfn hash_sparse = hash_sparse_def;
88+
#if LUAJIT_SECURITY_STRHASH
89+
static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize);
90+
str_dense_hashfn hash_dense = hash_dense_def;
91+
#endif
92+
#else
93+
#define hash_sparse hash_sparse_def
94+
#if LUAJIT_SECURITY_STRHASH
95+
#define hash_dense hash_dense_def
96+
#endif
97+
#endif
98+
8799
/* Keyed sparse ARX string hash. Constant time. */
88-
static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
100+
static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len)
89101
{
90102
/* Constants taken from lookup3 hash by Bob Jenkins. */
91103
StrHash a, b, h = len ^ (StrHash)seed;
@@ -106,12 +118,11 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
106118
h ^= b; h -= lj_rol(b, 16);
107119
return h;
108120
}
109-
#endif
110121

111-
#if LUAJIT_SECURITY_STRHASH && !defined(ARCH_HASH_DENSE)
122+
#if LUAJIT_SECURITY_STRHASH
112123
/* Keyed dense ARX string hash. Linear time. */
113-
static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
114-
const char *str, MSize len)
124+
static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h,
125+
const char *str, MSize len)
115126
{
116127
StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
117128
if (len > 12) {

src/lj_str.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
2828
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
2929
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
3030

31+
#ifdef LJ_HAS_OPTIMISED_HASH
32+
typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize);
33+
extern str_sparse_hashfn hash_sparse;
34+
35+
#if LUAJIT_SECURITY_STRHASH
36+
typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize);
37+
extern str_dense_hashfn hash_dense;
38+
#endif
39+
40+
extern void str_hash_init_sse42 (void);
41+
#endif
42+
3143
#endif

0 commit comments

Comments
 (0)