Skip to content

Fix subnormals #357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Sep 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 34 additions & 25 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,40 +167,33 @@ jobs:
RUSTFLAGS: ${{ matrix.rustflags }}

cross-tests:
name: "${{ matrix.target }} (via cross)"
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
runs-on: ubuntu-latest
strategy:
fail-fast: false
# TODO: Sadly, we cant configure target-feature in a meaningful way
# because `cross` doesn't tell qemu to enable any non-default cpu
# features, nor does it give us a way to do so.
#
# Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
# This is a lot more complex... but in practice it's likely that we can just
# snarf the docker config from around [here][1000-dockerfiles].
#
# [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
# [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker

matrix:
target:
- i586-unknown-linux-gnu
# 32-bit arm has a few idiosyncracies like having subnormal flushing
# to zero on by default. Ideally we'd set
- armv7-unknown-linux-gnueabihf
- aarch64-unknown-linux-gnu
# Note: The issue above means neither of these mips targets will use
# MSA (mips simd) but MIPS uses a nonstandard binary representation
# for NaNs which makes it worth testing on despite that.
- thumbv7neon-unknown-linux-gnueabihf # includes neon by default
- aarch64-unknown-linux-gnu # includes neon by default
- powerpc-unknown-linux-gnu
- powerpc64le-unknown-linux-gnu # includes altivec by default
- riscv64gc-unknown-linux-gnu
# MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
# non-nightly since https://github.com/rust-lang/rust/pull/113274
# - mips-unknown-linux-gnu
# - mips64-unknown-linux-gnuabi64
- riscv64gc-unknown-linux-gnu
# TODO this test works, but it appears to time out
# - powerpc-unknown-linux-gnu
# TODO this test is broken, but it appears to be a problem with QEMU, not us.
# - powerpc64le-unknown-linux-gnu
# TODO enable this once a new version of cross is released
# Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
# - powerpc64-unknown-linux-gnu
target_feature: [default]
include:
- { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
# Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
# This target is somewhat redundant, since ppc64le has altivec as well.
# - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
# We should test this, but cross currently can't run it
# - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }

steps:
- uses: actions/checkout@v2
Expand All @@ -217,11 +210,27 @@ jobs:
# being part of the tarball means we can't just use the download/latest
# URL :(
run: |
CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
mkdir -p "$HOME/.bin"
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
echo "$HOME/.bin" >> $GITHUB_PATH

- name: Configure Emulated CPUs
run: |
echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
# echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV

- name: Configure RUSTFLAGS
shell: bash
run: |
case "${{ matrix.target_feature }}" in
default)
echo "RUSTFLAGS=" >> $GITHUB_ENV;;
*)
echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
;;
esac

- name: Test (debug)
run: cross test --verbose --target=${{ matrix.target }}

Expand Down
5 changes: 4 additions & 1 deletion crates/core_simd/src/elements/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,10 @@ macro_rules! impl_trait {

#[inline]
fn is_subnormal(self) -> Self::Mask {
self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
// On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
// so this comparison must be done with integers.
let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
}

#[inline]
Expand Down
33 changes: 27 additions & 6 deletions crates/core_simd/tests/ops_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
{ $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
test_helpers::test_lanes! {
fn $fn<const LANES: usize>() {
test_helpers::test_unary_elementwise(
test_helpers::test_unary_elementwise_flush_subnormals(
&<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
&$scalar_fn,
&|_| true,
Expand All @@ -31,15 +31,15 @@ macro_rules! impl_binary_op_test {

test_helpers::test_lanes! {
fn normal<const LANES: usize>() {
test_helpers::test_binary_elementwise(
test_helpers::test_binary_elementwise_flush_subnormals(
&<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
&$scalar_fn,
&|_, _| true,
);
}

fn assign<const LANES: usize>() {
test_helpers::test_binary_elementwise(
test_helpers::test_binary_elementwise_flush_subnormals(
&|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
&$scalar_fn,
&|_, _| true,
Expand Down Expand Up @@ -96,19 +96,23 @@ macro_rules! impl_common_integer_tests {
test_helpers::test_lanes! {
fn reduce_sum<const LANES: usize>() {
test_helpers::test_1(&|x| {
use test_helpers::subnormals::{flush, flush_in};
test_helpers::prop_assert_biteq! (
$vector::<LANES>::from_array(x).reduce_sum(),
x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
);
Ok(())
});
}

fn reduce_product<const LANES: usize>() {
test_helpers::test_1(&|x| {
use test_helpers::subnormals::{flush, flush_in};
test_helpers::prop_assert_biteq! (
$vector::<LANES>::from_array(x).reduce_product(),
x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
);
Ok(())
});
Expand Down Expand Up @@ -433,15 +437,15 @@ macro_rules! impl_float_tests {
}

fn to_degrees<const LANES: usize>() {
test_helpers::test_unary_elementwise(
test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::to_degrees,
&Scalar::to_degrees,
&|_| true,
)
}

fn to_radians<const LANES: usize>() {
test_helpers::test_unary_elementwise(
test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::to_radians,
&Scalar::to_radians,
&|_| true,
Expand Down Expand Up @@ -511,7 +515,12 @@ macro_rules! impl_float_tests {
}

fn simd_clamp<const LANES: usize>() {
if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
// https://gitlab.com/qemu-project/qemu/-/issues/1780
return;
}
test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
use test_helpers::subnormals::flush_in;
for (min, max) in min.iter_mut().zip(max.iter_mut()) {
if max < min {
core::mem::swap(min, max);
Expand All @@ -528,8 +537,20 @@ macro_rules! impl_float_tests {
for i in 0..LANES {
result_scalar[i] = value[i].clamp(min[i], max[i]);
}
let mut result_scalar_flush = [Scalar::default(); LANES];
for i in 0..LANES {
// Comparisons flush-to-zero, but return value selection is _not_ flushed.
let mut value = value[i];
if flush_in(value) < flush_in(min[i]) {
value = min[i];
}
if flush_in(value) > flush_in(max[i]) {
value = max[i];
}
result_scalar_flush[i] = value
}
let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
test_helpers::prop_assert_biteq!(result_scalar, result_vector);
test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
Ok(())
})
}
Expand Down
2 changes: 1 addition & 1 deletion crates/core_simd/tests/round.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ macro_rules! float_rounding_test {
}

fn fract<const LANES: usize>() {
test_helpers::test_unary_elementwise(
test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::fract,
&Scalar::fract,
&|_| true,
Expand Down
6 changes: 2 additions & 4 deletions crates/test_helpers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ version = "0.1.0"
edition = "2021"
publish = false

[dependencies.proptest]
version = "0.10"
default-features = false
features = ["alloc"]
[dependencies]
proptest = { version = "0.10", default-features = false, features = ["alloc"] }

[features]
all_lane_counts = []
32 changes: 31 additions & 1 deletion crates/test_helpers/src/biteq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
}
}

#[doc(hidden)]
pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);

impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
self.0.biteq(other.0) || self.0.biteq(other.1)
}
}

impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
if self.0.biteq(self.1) {
self.0.fmt(f)
} else {
self.0.fmt(f)?;
write!(f, " or ")?;
self.1.fmt(f)
}
}
}

#[macro_export]
macro_rules! prop_assert_biteq {
{ $a:expr, $b:expr $(,)? } => {
Expand All @@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq {
let b = $b;
proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
}
}
};
{ $a:expr, $b:expr, $c:expr $(,)? } => {
{
use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
let a = $a;
let b = $b;
let c = $c;
proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
}
};
}
Loading