Skip to content

Commit 7b62812

Browse files
committed
Make a subset of libm symbols weakly available on all platforms
018616e ("Always have math functions but with `weak` linking attribute if we can") made all math symbols available on platforms that support weak linkage. This caused some unexpected regressions, however, because our less accurate and sometimes slow routines were being selected over the system `libm`, which also tends to be weak [1]. Thus, 0fab77e ("Don't include `math` for `unix` and `wasi` targets") was applied to undo these changes on many platforms. Now that some improvements have been made to `libm`, add back a subset of these functions: * cbrt * ceil * copysign * fabs * fdim * floor * fma * fmax * fmaximum * fmin * fminimum * fmod * rint * round * roundeven * sqrt * trunc This list includes only functions that produce exact results (verified with exhaustive / extensive tests, and also required by IEEE in most cases), and for which benchmarks indicate performance similar to or better than Musl's soft float math routines [^1]. All except `cbrt` also have `f16` and `f128` implementations. Once more routines meet these criteria, we can move them from platform-specific availability to always available. Once this change makes it to rust-lang/rust, we will also be able to move the relevant functions from `std` to `core`. [^1]: We still rely on the backend to provide optimized assmebly routines when available. [1]: rust-lang/rust#128386
1 parent 342ce46 commit 7b62812

File tree

2 files changed

+173
-115
lines changed

2 files changed

+173
-115
lines changed

src/lib.rs

+1-28
Original file line numberDiff line numberDiff line change
@@ -41,40 +41,13 @@ mod macros;
4141

4242
pub mod float;
4343
pub mod int;
44-
45-
// Disable for any of the following:
46-
// - x86 without sse2 due to ABI issues
47-
// - <https://github.com/rust-lang/rust/issues/114479>
48-
// - but exclude UEFI since it is a soft-float target
49-
// - <https://github.com/rust-lang/rust/issues/128533>
50-
// - All unix targets (linux, macos, freebsd, android, etc)
51-
// - wasm with known target_os
52-
#[cfg(not(any(
53-
all(
54-
target_arch = "x86",
55-
not(target_feature = "sse2"),
56-
not(target_os = "uefi"),
57-
),
58-
unix,
59-
all(target_family = "wasm", not(target_os = "unknown"))
60-
)))]
6144
pub mod math;
45+
pub mod mem;
6246

6347
// `libm` expects its `support` module to be available in the crate root. This config can be
6448
// cleaned up once `libm` is made always available.
65-
#[cfg(not(any(
66-
all(
67-
target_arch = "x86",
68-
not(target_feature = "sse2"),
69-
not(target_os = "uefi"),
70-
),
71-
unix,
72-
all(target_family = "wasm", not(target_os = "unknown"))
73-
)))]
7449
use math::libm::support;
7550

76-
pub mod mem;
77-
7851
#[cfg(target_arch = "arm")]
7952
pub mod arm;
8053

src/math.rs

+172-87
Original file line numberDiff line numberDiff line change
@@ -5,110 +5,195 @@
55
#[path = "../libm/src/math/mod.rs"]
66
pub(crate) mod libm;
77

8-
#[allow(unused_macros)]
9-
macro_rules! no_mangle {
8+
macro_rules! libm_intrinsics {
109
($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
1110
intrinsics! {
1211
$(
1312
pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
14-
self::libm::$fun($($iid),+)
13+
$crate::math::libm::$fun($($iid),+)
1514
}
1615
)+
1716
}
1817
}
1918
}
2019

21-
#[cfg(not(windows))]
22-
no_mangle! {
23-
fn acos(x: f64) -> f64;
24-
fn asin(x: f64) -> f64;
25-
fn cbrt(x: f64) -> f64;
26-
fn expm1(x: f64) -> f64;
27-
fn hypot(x: f64, y: f64) -> f64;
28-
fn tan(x: f64) -> f64;
29-
fn cos(x: f64) -> f64;
30-
fn expf(x: f32) -> f32;
31-
fn log2(x: f64) -> f64;
32-
fn log2f(x: f32) -> f32;
33-
fn log10(x: f64) -> f64;
34-
fn log10f(x: f32) -> f32;
35-
fn log(x: f64) -> f64;
36-
fn logf(x: f32) -> f32;
37-
fn round(x: f64) -> f64;
38-
fn roundf(x: f32) -> f32;
39-
fn rint(x: f64) -> f64;
40-
fn rintf(x: f32) -> f32;
41-
fn sin(x: f64) -> f64;
42-
fn pow(x: f64, y: f64) -> f64;
43-
fn powf(x: f32, y: f32) -> f32;
44-
fn acosf(n: f32) -> f32;
45-
fn atan2f(a: f32, b: f32) -> f32;
46-
fn atanf(n: f32) -> f32;
47-
fn coshf(n: f32) -> f32;
48-
fn expm1f(n: f32) -> f32;
49-
fn fdim(a: f64, b: f64) -> f64;
50-
fn fdimf(a: f32, b: f32) -> f32;
51-
fn log1pf(n: f32) -> f32;
52-
fn sinhf(n: f32) -> f32;
53-
fn tanhf(n: f32) -> f32;
54-
fn ldexp(f: f64, n: i32) -> f64;
55-
fn ldexpf(f: f32, n: i32) -> f32;
56-
fn tgamma(x: f64) -> f64;
57-
fn tgammaf(x: f32) -> f32;
58-
fn atan(x: f64) -> f64;
59-
fn atan2(x: f64, y: f64) -> f64;
60-
fn cosh(x: f64) -> f64;
61-
fn log1p(x: f64) -> f64;
62-
fn sinh(x: f64) -> f64;
63-
fn tanh(x: f64) -> f64;
64-
fn cosf(x: f32) -> f32;
65-
fn exp(x: f64) -> f64;
66-
fn sinf(x: f32) -> f32;
67-
fn exp2(x: f64) -> f64;
68-
fn exp2f(x: f32) -> f32;
69-
fn fma(x: f64, y: f64, z: f64) -> f64;
70-
fn fmaf(x: f32, y: f32, z: f32) -> f32;
71-
fn asinf(n: f32) -> f32;
72-
fn cbrtf(n: f32) -> f32;
73-
fn hypotf(x: f32, y: f32) -> f32;
74-
fn tanf(n: f32) -> f32;
20+
/// This set of functions is well tested in `libm` and known to provide similar performance to
21+
/// system `libm`, as well as the same or better accuracy.
22+
pub mod full_availability {
23+
#[cfg(f16_enabled)]
24+
libm_intrinsics! {
25+
fn ceilf16(x: f16) -> f16;
26+
fn copysignf16(x: f16, y: f16) -> f16;
27+
fn fabsf16(x: f16) -> f16;
28+
fn fdimf16(x: f16, y: f16) -> f16;
29+
fn floorf16(x: f16) -> f16;
30+
fn fmaxf16(x: f16, y: f16) -> f16;
31+
fn fmaximumf16(x: f16, y: f16) -> f16;
32+
fn fminf16(x: f16, y: f16) -> f16;
33+
fn fminimumf16(x: f16, y: f16) -> f16;
34+
fn fmodf16(x: f16, y: f16) -> f16;
35+
fn rintf16(x: f16) -> f16;
36+
fn roundevenf16(x: f16) -> f16;
37+
fn roundf16(x: f16) -> f16;
38+
fn sqrtf16(x: f16) -> f16;
39+
fn truncf16(x: f16) -> f16;
40+
}
41+
42+
/* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
43+
* the system libc provides in order to avoid conflicts. */
7544

76-
fn sqrtf(x: f32) -> f32;
77-
fn sqrt(x: f64) -> f64;
45+
#[cfg(all(not(windows), not(target_vendor = "apple")))]
46+
libm_intrinsics! {
47+
/* f32 */
48+
fn cbrtf(n: f32) -> f32;
49+
fn ceilf(x: f32) -> f32;
50+
fn copysignf(x: f32, y: f32) -> f32;
51+
fn fabsf(x: f32) -> f32;
52+
fn fdimf(a: f32, b: f32) -> f32;
53+
fn floorf(x: f32) -> f32;
54+
fn fmaf(x: f32, y: f32, z: f32) -> f32;
55+
fn fmaxf(x: f32, y: f32) -> f32;
56+
fn fminf(x: f32, y: f32) -> f32;
57+
fn fmodf(x: f32, y: f32) -> f32;
58+
fn rintf(x: f32) -> f32;
59+
fn roundf(x: f32) -> f32;
60+
fn sqrtf(x: f32) -> f32;
61+
fn truncf(x: f32) -> f32;
7862

79-
fn ceil(x: f64) -> f64;
80-
fn ceilf(x: f32) -> f32;
81-
fn floor(x: f64) -> f64;
82-
fn floorf(x: f32) -> f32;
83-
fn trunc(x: f64) -> f64;
84-
fn truncf(x: f32) -> f32;
63+
/* f64 */
64+
fn cbrt(x: f64) -> f64;
65+
fn ceil(x: f64) -> f64;
66+
fn copysign(x: f64, y: f64) -> f64;
67+
fn fabs(x: f64) -> f64;
68+
fn fdim(a: f64, b: f64) -> f64;
69+
fn floor(x: f64) -> f64;
70+
fn fma(x: f64, y: f64, z: f64) -> f64;
71+
fn fmax(x: f64, y: f64) -> f64;
72+
fn fmin(x: f64, y: f64) -> f64;
73+
fn fmod(x: f64, y: f64) -> f64;
74+
fn rint(x: f64) -> f64;
75+
fn round(x: f64) -> f64;
76+
fn sqrt(x: f64) -> f64;
77+
fn trunc(x: f64) -> f64;
78+
}
8579

86-
fn fmin(x: f64, y: f64) -> f64;
87-
fn fminf(x: f32, y: f32) -> f32;
88-
fn fmax(x: f64, y: f64) -> f64;
89-
fn fmaxf(x: f32, y: f32) -> f32;
90-
// `f64 % f64`
91-
fn fmod(x: f64, y: f64) -> f64;
92-
// `f32 % f32`
93-
fn fmodf(x: f32, y: f32) -> f32;
80+
// Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
81+
// however, so we still provide a fallback.
82+
libm_intrinsics! {
83+
fn fmaximum(x: f64, y: f64) -> f64;
84+
fn fmaximumf(x: f32, y: f32) -> f32;
85+
fn fminimum(x: f64, y: f64) -> f64;
86+
fn fminimumf(x: f32, y: f32) -> f32;
87+
fn roundeven(x: f64) -> f64;
88+
fn roundevenf(x: f32) -> f32;
89+
}
9490

95-
fn erf(x: f64) -> f64;
96-
fn erff(x: f32) -> f32;
97-
fn erfc(x: f64) -> f64;
98-
fn erfcf(x: f32) -> f32;
91+
#[cfg(f128_enabled)]
92+
libm_intrinsics! {
93+
fn ceilf128(x: f128) -> f128;
94+
fn copysignf128(x: f128, y: f128) -> f128;
95+
fn fabsf128(x: f128) -> f128;
96+
fn fdimf128(x: f128, y: f128) -> f128;
97+
fn floorf128(x: f128) -> f128;
98+
fn fmaf128(x: f128, y: f128, z: f128) -> f128;
99+
fn fmaxf128(x: f128, y: f128) -> f128;
100+
fn fmaximumf128(x: f128, y: f128) -> f128;
101+
fn fminf128(x: f128, y: f128) -> f128;
102+
fn fminimumf128(x: f128, y: f128) -> f128;
103+
fn fmodf128(x: f128, y: f128) -> f128;
104+
fn rintf128(x: f128) -> f128;
105+
fn roundevenf128(x: f128) -> f128;
106+
fn roundf128(x: f128) -> f128;
107+
fn sqrtf128(x: f128) -> f128;
108+
fn truncf128(x: f128) -> f128;
109+
}
99110
}
100111

101-
// allow for windows (and other targets)
102-
intrinsics! {
103-
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
104-
let r = self::libm::lgamma_r(x);
105-
*s = r.1;
106-
r.0
112+
/// This group of functions has more performance or precision issues than system versions, or
113+
/// are otherwise less well tested. Provide them only on platforms that have problems with the
114+
/// system `libm`.
115+
///
116+
/// As `libm` improves, more functions will be moved from this group to the first group.
117+
///
118+
/// Do not supply for any of the following:
119+
/// - x86 without sse2 due to ABI issues
120+
/// - <https://github.com/rust-lang/rust/issues/114479>
121+
/// - but exclude UEFI since it is a soft-float target
122+
/// - <https://github.com/rust-lang/rust/issues/128533>
123+
/// - All unix targets (linux, macos, freebsd, android, etc)
124+
/// - wasm with known target_os
125+
#[cfg(not(any(
126+
all(
127+
target_arch = "x86",
128+
not(target_feature = "sse2"),
129+
not(target_os = "uefi"),
130+
),
131+
unix,
132+
all(target_family = "wasm", not(target_os = "unknown"))
133+
)))]
134+
pub mod partial_availability {
135+
#[cfg(not(windows))]
136+
libm_intrinsics! {
137+
fn acos(x: f64) -> f64;
138+
fn acosf(n: f32) -> f32;
139+
fn asin(x: f64) -> f64;
140+
fn asinf(n: f32) -> f32;
141+
fn atan(x: f64) -> f64;
142+
fn atan2(x: f64, y: f64) -> f64;
143+
fn atan2f(a: f32, b: f32) -> f32;
144+
fn atanf(n: f32) -> f32;
145+
fn cos(x: f64) -> f64;
146+
fn cosf(x: f32) -> f32;
147+
fn cosh(x: f64) -> f64;
148+
fn coshf(n: f32) -> f32;
149+
fn erf(x: f64) -> f64;
150+
fn erfc(x: f64) -> f64;
151+
fn erfcf(x: f32) -> f32;
152+
fn erff(x: f32) -> f32;
153+
fn exp(x: f64) -> f64;
154+
fn exp2(x: f64) -> f64;
155+
fn exp2f(x: f32) -> f32;
156+
fn expf(x: f32) -> f32;
157+
fn expm1(x: f64) -> f64;
158+
fn expm1f(n: f32) -> f32;
159+
fn hypot(x: f64, y: f64) -> f64;
160+
fn hypotf(x: f32, y: f32) -> f32;
161+
fn ldexp(f: f64, n: i32) -> f64;
162+
fn ldexpf(f: f32, n: i32) -> f32;
163+
fn log(x: f64) -> f64;
164+
fn log10(x: f64) -> f64;
165+
fn log10f(x: f32) -> f32;
166+
fn log1p(x: f64) -> f64;
167+
fn log1pf(n: f32) -> f32;
168+
fn log2(x: f64) -> f64;
169+
fn log2f(x: f32) -> f32;
170+
fn logf(x: f32) -> f32;
171+
fn pow(x: f64, y: f64) -> f64;
172+
fn powf(x: f32, y: f32) -> f32;
173+
fn sin(x: f64) -> f64;
174+
fn sinf(x: f32) -> f32;
175+
fn sinh(x: f64) -> f64;
176+
fn sinhf(n: f32) -> f32;
177+
fn tan(x: f64) -> f64;
178+
fn tanf(n: f32) -> f32;
179+
fn tanh(x: f64) -> f64;
180+
fn tanhf(n: f32) -> f32;
181+
fn tgamma(x: f64) -> f64;
182+
fn tgammaf(x: f32) -> f32;
107183
}
108184

109-
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
110-
let r = self::libm::lgammaf_r(x);
111-
*s = r.1;
112-
r.0
185+
// allow for windows (and other targets)
186+
intrinsics! {
187+
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
188+
let r = super::libm::lgamma_r(x);
189+
*s = r.1;
190+
r.0
191+
}
192+
193+
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
194+
let r = super::libm::lgammaf_r(x);
195+
*s = r.1;
196+
r.0
197+
}
113198
}
114199
}

0 commit comments

Comments
 (0)