Skip to content

Commit e6ed9d3

Browse files
authored
Fix sse tests with -mnontrapping-fptoint (#22893)
Turns out we were relying on the explicit llvm bounds check here to give use the defined SSE behavior, but normal C semantics don't give us that so we need explicit bounds checks here. Fixes #22892
1 parent ffeb761 commit e6ed9d3

File tree

3 files changed

+55
-28
lines changed

3 files changed

+55
-28
lines changed

system/include/compat/emmintrin.h

+24-13
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,11 @@ _mm_cvttpd_epi32(__m128d __a)
431431
int m[2];
432432
for(int i = 0; i < 2; ++i)
433433
{
434-
int x = lrint(__a[i]);
435-
if (x != 0 || fabs(__a[i]) < 2.0)
436-
m[i] = (int)__a[i];
434+
float elem = __a[i];
435+
if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN)
436+
// Use the trapping instruction here since we have explicit bounds checks
437+
// above.
438+
m[i] = __builtin_wasm_trunc_s_i32_f32(elem);
437439
else
438440
m[i] = (int)0x80000000;
439441
}
@@ -444,9 +446,12 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__))
444446
_mm_cvttsd_si32(__m128d __a)
445447
{
446448
// TODO: OPTIMIZE!
447-
int x = lrint(__a[0]);
448-
if (x != 0 || fabs(__a[0]) < 2.0)
449-
return (int)__a[0];
449+
float elem = __a[0];
450+
if (isnan(elem) || elem > INT_MAX || elem < INT_MIN) return (int)0x80000000;
451+
if (lrint(elem) != 0 || fabs(elem) < 2.0)
452+
// Use the trapping instruction here since we have explicit bounds checks
453+
// above.
454+
return __builtin_wasm_trunc_s_i32_f32(elem);
450455
else
451456
return (int)0x80000000;
452457
}
@@ -1013,10 +1018,13 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__))
10131018
_mm_cvttsd_si64(__m128d __a)
10141019
{
10151020
// TODO: optimize
1016-
if (isnan(__a[0]) || isinf(__a[0])) return 0x8000000000000000LL;
1017-
long long x = llrint(__a[0]);
1018-
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(__a[0]) < 2.f))
1019-
return (long long)__a[0];
1021+
float e = __a[0];
1022+
if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
1023+
long long x = llrint(e);
1024+
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f))
1025+
// Use the trapping instruction here since we have explicit bounds checks
1026+
// above
1027+
return __builtin_wasm_trunc_s_i64_f32(e);
10201028
else
10211029
return 0x8000000000000000LL;
10221030
}
@@ -1056,9 +1064,12 @@ _mm_cvttps_epi32(__m128 __a)
10561064
} u;
10571065
for(int i = 0; i < 4; ++i)
10581066
{
1059-
int x = lrint(__a[i]);
1060-
if (x != 0 || fabs(__a[i]) < 2.0)
1061-
u.x[i] = (int)__a[i];
1067+
float e = __a[i];
1068+
int x = lrint(e);
1069+
if ((x != 0 || fabs(e) < 2.0) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN)
1070+
// Use the trapping instruction here since we have explicit bounds checks
1071+
// above.
1072+
u.x[i] = __builtin_wasm_trunc_s_i32_f32(e);
10621073
else
10631074
u.x[i] = (int)0x80000000;
10641075
}

system/include/compat/xmmintrin.h

+11-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <wasm_simd128.h>
1111

12+
#include <limits.h>
1213
#include <math.h>
1314
#include <string.h>
1415

@@ -605,9 +606,11 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL
605606

606607
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a)
607608
{
608-
int x = lrint(((__f32x4)__a)[0]);
609-
if (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f)
610-
return (int)((__f32x4)__a)[0];
609+
float e = ((__f32x4)__a)[0];
610+
if (isnanf(e) || e > INT_MAX || e < INT_MIN) return (int)0x80000000;
611+
int x = lrint(e);
612+
if ((x != 0 || fabsf(e) < 2.f))
613+
return (int)e;
611614
else
612615
return (int)0x80000000;
613616
}
@@ -635,10 +638,11 @@ _mm_cvtss_si64(__m128 __a)
635638
static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
636639
_mm_cvttss_si64(__m128 __a)
637640
{
638-
if (isnan(((__f32x4)__a)[0]) || isinf(((__f32x4)__a)[0])) return 0x8000000000000000LL;
639-
long long x = llrint(((__f32x4)__a)[0]);
640-
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f))
641-
return (long long)((__f32x4)__a)[0];
641+
float e = ((__f32x4)__a)[0];
642+
if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
643+
long long x = llrint(e);
644+
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f))
645+
return (long long)e;
642646
else
643647
return 0x8000000000000000LL;
644648
}

test/test_core.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -6511,12 +6511,16 @@ def test_neon_wasm_simd(self):
65116511
@requires_native_clang
65126512
@no_safe_heap('has unaligned 64-bit operations in wasm')
65136513
@no_ubsan('test contains UB')
6514-
def test_sse1(self):
6514+
@parameterized({
6515+
'': ([],),
6516+
'nontrapping': (['-mnontrapping-fptoint'],)
6517+
})
6518+
def test_sse1(self, args):
65156519
src = test_file('sse/test_sse1.cpp')
65166520
self.run_process([shared.CLANG_CXX, src, '-msse', '-o', 'test_sse1', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
65176521
native_result = self.run_process('./test_sse1', stdout=PIPE).stdout
65186522

6519-
self.emcc_args += ['-I' + test_file('sse'), '-msse']
6523+
self.emcc_args += ['-I' + test_file('sse'), '-msse'] + args
65206524
self.maybe_closure()
65216525

65226526
self.do_runf(src, native_result)
@@ -6528,14 +6532,18 @@ def test_sse1(self):
65286532
@is_slow_test
65296533
@no_ubsan('https://github.com/emscripten-core/emscripten/issues/19688')
65306534
@no_asan('local count too large')
6531-
def test_sse2(self):
6535+
@parameterized({
6536+
'': ([],),
6537+
'nontrapping': (['-mnontrapping-fptoint'],)
6538+
})
6539+
def test_sse2(self, args):
65326540
if self.is_wasm64():
65336541
self.require_node_canary()
65346542
src = test_file('sse/test_sse2.cpp')
65356543
self.run_process([shared.CLANG_CXX, src, '-msse2', '-Wno-argument-outside-range', '-o', 'test_sse2', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
65366544
native_result = self.run_process('./test_sse2', stdout=PIPE).stdout
65376545

6538-
self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB']
6546+
self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args
65396547
self.maybe_closure()
65406548
self.do_runf(src, native_result)
65416549

@@ -6587,8 +6595,8 @@ def test_sse4_1(self):
65876595
@wasm_simd
65886596
@requires_native_clang
65896597
@parameterized({
6590-
'': (False,),
6591-
'2': (True,)
6598+
'': (False,),
6599+
'2': (True,)
65926600
})
65936601
def test_sse4(self, use_4_2):
65946602
msse4 = '-msse4.2' if use_4_2 else '-msse4'
@@ -6606,12 +6614,16 @@ def test_sse4(self, use_4_2):
66066614
@is_slow_test
66076615
@no_asan('local count too large')
66086616
@no_ubsan('local count too large')
6609-
def test_avx(self):
6617+
@parameterized({
6618+
'': ([],),
6619+
'nontrapping': (['-mnontrapping-fptoint'],)
6620+
})
6621+
def test_avx(self, args):
66106622
src = test_file('sse/test_avx.cpp')
66116623
self.run_process([shared.CLANG_CXX, src, '-mavx', '-Wno-argument-outside-range', '-Wpedantic', '-o', 'test_avx', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
66126624
native_result = self.run_process('./test_avx', stdout=PIPE).stdout
66136625

6614-
self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB']
6626+
self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args
66156627
self.maybe_closure()
66166628
self.do_runf(src, native_result)
66176629

0 commit comments

Comments
 (0)