Skip to content

Commit b6e03e5

Browse files
Merge pull request #357 from rust-lang/subnormals
Fix subnormals
2 parents 6d10cd1 + 5e57453 commit b6e03e5

File tree

8 files changed

+316
-40
lines changed

8 files changed

+316
-40
lines changed

.github/workflows/ci.yml

+34-25
Original file line numberDiff line numberDiff line change
@@ -167,40 +167,33 @@ jobs:
167167
RUSTFLAGS: ${{ matrix.rustflags }}
168168

169169
cross-tests:
170-
name: "${{ matrix.target }} (via cross)"
170+
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
171171
runs-on: ubuntu-latest
172172
strategy:
173173
fail-fast: false
174-
# TODO: Sadly, we cant configure target-feature in a meaningful way
175-
# because `cross` doesn't tell qemu to enable any non-default cpu
176-
# features, nor does it give us a way to do so.
177-
#
178-
# Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
179-
# This is a lot more complex... but in practice it's likely that we can just
180-
# snarf the docker config from around [here][1000-dockerfiles].
181-
#
182-
# [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
183-
# [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
184174

185175
matrix:
186176
target:
187-
- i586-unknown-linux-gnu
188-
# 32-bit arm has a few idiosyncracies like having subnormal flushing
189-
# to zero on by default. Ideally we'd set
190177
- armv7-unknown-linux-gnueabihf
191-
- aarch64-unknown-linux-gnu
192-
# Note: The issue above means neither of these mips targets will use
193-
# MSA (mips simd) but MIPS uses a nonstandard binary representation
194-
# for NaNs which makes it worth testing on despite that.
178+
- thumbv7neon-unknown-linux-gnueabihf # includes neon by default
179+
- aarch64-unknown-linux-gnu # includes neon by default
180+
- powerpc-unknown-linux-gnu
181+
- powerpc64le-unknown-linux-gnu # includes altivec by default
182+
- riscv64gc-unknown-linux-gnu
183+
# MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
184+
# non-nightly since https://github.com/rust-lang/rust/pull/113274
195185
# - mips-unknown-linux-gnu
196186
# - mips64-unknown-linux-gnuabi64
197-
- riscv64gc-unknown-linux-gnu
198-
# TODO this test works, but it appears to time out
199-
# - powerpc-unknown-linux-gnu
200-
# TODO this test is broken, but it appears to be a problem with QEMU, not us.
201-
# - powerpc64le-unknown-linux-gnu
202-
# TODO enable this once a new version of cross is released
187+
# Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
203188
# - powerpc64-unknown-linux-gnu
189+
target_feature: [default]
190+
include:
191+
- { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
192+
# Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
193+
# This target is somewhat redundant, since ppc64le has altivec as well.
194+
# - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
195+
# We should test this, but cross currently can't run it
196+
# - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
204197

205198
steps:
206199
- uses: actions/checkout@v2
@@ -217,11 +210,27 @@ jobs:
217210
# being part of the tarball means we can't just use the download/latest
218211
# URL :(
219212
run: |
220-
CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
213+
CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
221214
mkdir -p "$HOME/.bin"
222215
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
223216
echo "$HOME/.bin" >> $GITHUB_PATH
224217
218+
- name: Configure Emulated CPUs
219+
run: |
220+
echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
221+
# echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
222+
223+
- name: Configure RUSTFLAGS
224+
shell: bash
225+
run: |
226+
case "${{ matrix.target_feature }}" in
227+
default)
228+
echo "RUSTFLAGS=" >> $GITHUB_ENV;;
229+
*)
230+
echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
231+
;;
232+
esac
233+
225234
- name: Test (debug)
226235
run: cross test --verbose --target=${{ matrix.target }}
227236

crates/core_simd/src/elements/float.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,10 @@ macro_rules! impl_trait {
336336

337337
#[inline]
338338
fn is_subnormal(self) -> Self::Mask {
339-
self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
339+
// On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
340+
// so this comparison must be done with integers.
341+
let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
342+
not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
340343
}
341344

342345
#[inline]

crates/core_simd/tests/ops_macros.rs

+27-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
66
{ $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
77
test_helpers::test_lanes! {
88
fn $fn<const LANES: usize>() {
9-
test_helpers::test_unary_elementwise(
9+
test_helpers::test_unary_elementwise_flush_subnormals(
1010
&<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
1111
&$scalar_fn,
1212
&|_| true,
@@ -31,15 +31,15 @@ macro_rules! impl_binary_op_test {
3131

3232
test_helpers::test_lanes! {
3333
fn normal<const LANES: usize>() {
34-
test_helpers::test_binary_elementwise(
34+
test_helpers::test_binary_elementwise_flush_subnormals(
3535
&<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
3636
&$scalar_fn,
3737
&|_, _| true,
3838
);
3939
}
4040

4141
fn assign<const LANES: usize>() {
42-
test_helpers::test_binary_elementwise(
42+
test_helpers::test_binary_elementwise_flush_subnormals(
4343
&|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
4444
&$scalar_fn,
4545
&|_, _| true,
@@ -126,19 +126,23 @@ macro_rules! impl_common_integer_tests {
126126

127127
fn reduce_sum<const LANES: usize>() {
128128
test_helpers::test_1(&|x| {
129+
use test_helpers::subnormals::{flush, flush_in};
129130
test_helpers::prop_assert_biteq! (
130131
$vector::<LANES>::from_array(x).reduce_sum(),
131132
x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
133+
flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
132134
);
133135
Ok(())
134136
});
135137
}
136138

137139
fn reduce_product<const LANES: usize>() {
138140
test_helpers::test_1(&|x| {
141+
use test_helpers::subnormals::{flush, flush_in};
139142
test_helpers::prop_assert_biteq! (
140143
$vector::<LANES>::from_array(x).reduce_product(),
141144
x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
145+
flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
142146
);
143147
Ok(())
144148
});
@@ -463,15 +467,15 @@ macro_rules! impl_float_tests {
463467
}
464468

465469
fn to_degrees<const LANES: usize>() {
466-
test_helpers::test_unary_elementwise(
470+
test_helpers::test_unary_elementwise_flush_subnormals(
467471
&Vector::<LANES>::to_degrees,
468472
&Scalar::to_degrees,
469473
&|_| true,
470474
)
471475
}
472476

473477
fn to_radians<const LANES: usize>() {
474-
test_helpers::test_unary_elementwise(
478+
test_helpers::test_unary_elementwise_flush_subnormals(
475479
&Vector::<LANES>::to_radians,
476480
&Scalar::to_radians,
477481
&|_| true,
@@ -541,7 +545,12 @@ macro_rules! impl_float_tests {
541545
}
542546

543547
fn simd_clamp<const LANES: usize>() {
548+
if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
549+
// https://gitlab.com/qemu-project/qemu/-/issues/1780
550+
return;
551+
}
544552
test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
553+
use test_helpers::subnormals::flush_in;
545554
for (min, max) in min.iter_mut().zip(max.iter_mut()) {
546555
if max < min {
547556
core::mem::swap(min, max);
@@ -558,8 +567,20 @@ macro_rules! impl_float_tests {
558567
for i in 0..LANES {
559568
result_scalar[i] = value[i].clamp(min[i], max[i]);
560569
}
570+
let mut result_scalar_flush = [Scalar::default(); LANES];
571+
for i in 0..LANES {
572+
// Comparisons flush-to-zero, but return value selection is _not_ flushed.
573+
let mut value = value[i];
574+
if flush_in(value) < flush_in(min[i]) {
575+
value = min[i];
576+
}
577+
if flush_in(value) > flush_in(max[i]) {
578+
value = max[i];
579+
}
580+
result_scalar_flush[i] = value
581+
}
561582
let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
562-
test_helpers::prop_assert_biteq!(result_scalar, result_vector);
583+
test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
563584
Ok(())
564585
})
565586
}

crates/core_simd/tests/round.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ macro_rules! float_rounding_test {
4343
}
4444

4545
fn fract<const LANES: usize>() {
46-
test_helpers::test_unary_elementwise(
46+
test_helpers::test_unary_elementwise_flush_subnormals(
4747
&Vector::<LANES>::fract,
4848
&Scalar::fract,
4949
&|_| true,

crates/test_helpers/Cargo.toml

+2-4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@ version = "0.1.0"
44
edition = "2021"
55
publish = false
66

7-
[dependencies.proptest]
8-
version = "0.10"
9-
default-features = false
10-
features = ["alloc"]
7+
[dependencies]
8+
proptest = { version = "0.10", default-features = false, features = ["alloc"] }
119

1210
[features]
1311
all_lane_counts = []

crates/test_helpers/src/biteq.rs

+31-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,27 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
113113
}
114114
}
115115

116+
#[doc(hidden)]
117+
pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
118+
119+
impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
120+
fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
121+
self.0.biteq(other.0) || self.0.biteq(other.1)
122+
}
123+
}
124+
125+
impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
126+
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
127+
if self.0.biteq(self.1) {
128+
self.0.fmt(f)
129+
} else {
130+
self.0.fmt(f)?;
131+
write!(f, " or ")?;
132+
self.1.fmt(f)
133+
}
134+
}
135+
}
136+
116137
#[macro_export]
117138
macro_rules! prop_assert_biteq {
118139
{ $a:expr, $b:expr $(,)? } => {
@@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq {
122143
let b = $b;
123144
proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
124145
}
125-
}
146+
};
147+
{ $a:expr, $b:expr, $c:expr $(,)? } => {
148+
{
149+
use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
150+
let a = $a;
151+
let b = $b;
152+
let c = $c;
153+
proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
154+
}
155+
};
126156
}

0 commit comments

Comments
 (0)