Skip to content

Commit 5eb3d41

Browse files
committed
Revert _mm_{min,max}_ps changes and add explanation why
1 parent 3dc6873 commit 5eb3d41

File tree

1 file changed

+23
-2
lines changed
  • crates/core_arch/src/x86

1 file changed

+23
-2
lines changed

crates/core_arch/src/x86/sse.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,8 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
194194
#[cfg_attr(test, assert_instr(minps))]
195195
#[stable(feature = "simd_x86", since = "1.27.0")]
196196
pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
197-
simd_fmin(a, b)
197+
// See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`.
198+
minps(a, b)
198199
}
199200

200201
/// Compares the first single-precision (32-bit) floating-point element of `a`
@@ -219,7 +220,8 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
219220
#[cfg_attr(test, assert_instr(maxps))]
220221
#[stable(feature = "simd_x86", since = "1.27.0")]
221222
pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
222-
simd_fmax(a, b)
223+
// See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`.
224+
maxps(a, b)
223225
}
224226

225227
/// Bitwise AND of packed single-precision (32-bit) floating-point elements.
@@ -1935,8 +1937,12 @@ extern "C" {
19351937
fn rsqrtps(a: __m128) -> __m128;
19361938
#[link_name = "llvm.x86.sse.min.ss"]
19371939
fn minss(a: __m128, b: __m128) -> __m128;
1940+
#[link_name = "llvm.x86.sse.min.ps"]
1941+
fn minps(a: __m128, b: __m128) -> __m128;
19381942
#[link_name = "llvm.x86.sse.max.ss"]
19391943
fn maxss(a: __m128, b: __m128) -> __m128;
1944+
#[link_name = "llvm.x86.sse.max.ps"]
1945+
fn maxps(a: __m128, b: __m128) -> __m128;
19401946
#[link_name = "llvm.x86.sse.movmsk.ps"]
19411947
fn movmskps(a: __m128) -> i32;
19421948
#[link_name = "llvm.x86.sse.cmp.ps"]
@@ -2634,6 +2640,21 @@ mod tests {
26342640
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
26352641
let r = _mm_min_ps(a, b);
26362642
assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2643+
2644+
// `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic. `simd_min`
2645+
// is lowered by the llvm codegen backend to `llvm.minnum.v*` llvm intrinsic. This intrinsic
2646+
// doesn't specify how -0.0 is handled. Unfortunately it happens to behave different from
2647+
// the `minps` x86 instruction on x86. The `llvm.minnum.v*` llvm intrinsic equals
2648+
// `r1` to `a` and `r2` to `b`.
2649+
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2650+
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2651+
let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2652+
let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2653+
let a: [u8; 16] = transmute(a);
2654+
let b: [u8; 16] = transmute(b);
2655+
assert_eq!(r1, b);
2656+
assert_eq!(r2, a);
2657+
assert_ne!(a, b); // sanity check that -0.0 is actually present
26372658
}
26382659

26392660
#[simd_test(enable = "sse")]

0 commit comments

Comments
 (0)