Skip to content

Commit 31ee454

Browse files
committed
address review comments and fix CI
- implement CAS 16 - remove useless commented out symbol name - support `feature("no-asm")` - fix warnings when `feature("c")` is enabled - rustfmt
1 parent ba44f2e commit 31ee454

File tree

4 files changed

+97
-29
lines changed

4 files changed

+97
-29
lines changed

build.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -122,24 +122,28 @@ fn generate_aarch64_outlined_atomics() {
122122
macros.insert(sym, gen_macro(sym));
123123
}
124124

125+
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
126+
let mut cas16 = gen_macro("cas16");
127+
125128
for ordering in [
126129
Ordering::Relaxed,
127130
Ordering::Acquire,
128131
Ordering::Release,
129132
Ordering::AcqRel,
130133
] {
131134
let sym_ordering = aarch64_symbol(ordering);
132-
// TODO: support CAS 16
133-
for size in [1, 2, 4, 8 /* , 16*/] {
135+
for size in [1, 2, 4, 8] {
134136
for (sym, macro_) in &mut macros {
135137
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
136138
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
137139
}
138140
}
141+
let name = format!("__aarch64_cas16_{sym_ordering}");
142+
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
139143
}
140144

141145
let mut buf = String::new();
142-
for macro_def in macros.values() {
146+
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
143147
buf += macro_def;
144148
buf += "}; }";
145149
}

src/aarch64.rs

+79-24
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,21 @@
1212
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1313
//!
1414
//! Generate functions for each of the following symbols:
15+
//! __aarch64_casM_ORDER
1516
//! __aarch64_swpN_ORDER
1617
//! __aarch64_ldaddN_ORDER
1718
//! __aarch64_ldclrN_ORDER
1819
//! __aarch64_ldeorN_ORDER
1920
//! __aarch64_ldsetN_ORDER
20-
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel }
21-
//!
22-
//! TODO: M = 16
21+
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
2322
//!
2423
//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
2524
//! We do something similar, but with macro arguments.
2625
27-
/// We don't do runtime dispatch so we don't have to worry about the global ctor.
28-
/// Apparently MacOS uses a different number of underscores in the symbol name (???)
29-
// #[cfg(target_vendor = "apple")]
30-
// macro_rules! have_lse {
31-
// () => { ___aarch64_have_lse_atomics }
32-
// }
33-
34-
// #[cfg(not(target_vendor = "apple"))]
35-
// macro_rules! have_lse {
36-
// () => { __aarch64_have_lse_atomics }
37-
// }
26+
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
3827

3928
/// Translate a byte size to a Rust type.
29+
#[rustfmt::skip]
4030
macro_rules! int_ty {
4131
(1) => { i8 };
4232
(2) => { i16 };
@@ -48,6 +38,7 @@ macro_rules! int_ty {
4838
/// Given a byte size and a register number, return a register of the appropriate size.
4939
///
5040
/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>.
41+
#[rustfmt::skip]
5142
macro_rules! reg {
5243
(1, $num:literal) => { concat!("w", $num) };
5344
(2, $num:literal) => { concat!("w", $num) };
@@ -56,6 +47,7 @@ macro_rules! reg {
5647
}
5748

5849
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
50+
#[rustfmt::skip]
5951
macro_rules! acquire {
6052
(Relaxed) => { "" };
6153
(Acquire) => { "a" };
@@ -64,6 +56,7 @@ macro_rules! acquire {
6456
}
6557

6658
/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction.
59+
#[rustfmt::skip]
6760
macro_rules! release {
6861
(Relaxed) => { "" };
6962
(Acquire) => { "" };
@@ -72,6 +65,7 @@ macro_rules! release {
7265
}
7366

7467
/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction.
68+
#[rustfmt::skip]
7569
macro_rules! size {
7670
(1) => { "b" };
7771
(2) => { "h" };
@@ -84,6 +78,7 @@ macro_rules! size {
8478
/// with the correct semantics.
8579
///
8680
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM->
81+
#[rustfmt::skip]
8782
macro_rules! uxt {
8883
(1) => { "uxtb" };
8984
(2) => { "uxth" };
@@ -95,15 +90,39 @@ macro_rules! uxt {
9590
///
9691
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->.
9792
macro_rules! ldxr {
98-
($ordering:ident, $bytes:tt) => { concat!("ld", acquire!($ordering), "xr", size!($bytes)) }
93+
($ordering:ident, $bytes:tt) => {
94+
concat!("ld", acquire!($ordering), "xr", size!($bytes))
95+
};
9996
}
10097

10198
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction
10299
/// with the correct semantics.
103100
///
104101
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->.
105102
macro_rules! stxr {
106-
($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) }
103+
($ordering:ident, $bytes:tt) => {
104+
concat!("st", release!($ordering), "xr", size!($bytes))
105+
};
106+
}
107+
108+
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
109+
/// with the correct semantics.
110+
///
111+
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
112+
macro_rules! ldxp {
113+
($ordering:ident) => {
114+
concat!("ld", acquire!($ordering), "xp")
115+
};
116+
}
117+
118+
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
119+
/// with the correct semantics.
120+
///
121+
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
122+
macro_rules! stxp {
123+
($ordering:ident) => {
124+
concat!("st", release!($ordering), "xp")
125+
};
107126
}
108127

109128
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
@@ -134,9 +153,38 @@ macro_rules! compare_and_swap {
134153
} }
135154
}
136155
}
137-
}
156+
};
138157
}
139158

159+
// i128 uses a completely different impl, so it has its own macro.
160+
macro_rules! compare_and_swap_i128 {
161+
($ordering:ident, $name:ident) => {
162+
intrinsics! {
163+
#[maybe_use_optimized_c_shim]
164+
#[naked]
165+
pub extern "C" fn $name (
166+
expected: i128, desired: i128, ptr: *mut i128
167+
) -> i128 {
168+
unsafe { core::arch::asm! {
169+
"mov x16, x0",
170+
"mov x17, x1",
171+
"0:",
172+
// LDXP x0, x1, [x4]
173+
concat!(ldxp!($ordering), " x0, x1, [x4]"),
174+
"cmp x0, x16",
175+
"ccmp x1, x17, #0, eq",
176+
"bne 1f",
177+
// STXP w(tmp2), x2, x3, [x4]
178+
concat!(stxp!($ordering), " w15, x2, x3, [x4]"),
179+
"cbnz w15, 0b",
180+
"1:",
181+
"ret",
182+
options(noreturn)
183+
} }
184+
}
185+
}
186+
};
187+
}
140188

141189
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
142190
macro_rules! swap {
@@ -161,7 +209,7 @@ macro_rules! swap {
161209
} }
162210
}
163211
}
164-
}
212+
};
165213
}
166214

167215
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
@@ -194,28 +242,35 @@ macro_rules! fetch_op {
194242

195243
// We need a single macro to pass to `foreach_ldadd`.
196244
macro_rules! add {
197-
($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "add" } }
245+
($ordering:ident, $bytes:tt, $name:ident) => {
246+
fetch_op! { $ordering, $bytes, $name, "add" }
247+
};
198248
}
199249

200250
macro_rules! and {
201-
($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } }
251+
($ordering:ident, $bytes:tt, $name:ident) => {
252+
fetch_op! { $ordering, $bytes, $name, "bic" }
253+
};
202254
}
203255

204256
macro_rules! xor {
205-
($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } }
257+
($ordering:ident, $bytes:tt, $name:ident) => {
258+
fetch_op! { $ordering, $bytes, $name, "eor" }
259+
};
206260
}
207261

208262
macro_rules! or {
209-
($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } }
263+
($ordering:ident, $bytes:tt, $name:ident) => {
264+
fetch_op! { $ordering, $bytes, $name, "orr" }
265+
};
210266
}
211267

212268
// See `generate_aarch64_outlined_atomics` in build.rs.
213269
include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
214270
foreach_cas!(compare_and_swap);
271+
foreach_cas16!(compare_and_swap_i128);
215272
foreach_swp!(swap);
216273
foreach_ldadd!(add);
217274
foreach_ldclr!(and);
218275
foreach_ldeor!(xor);
219276
foreach_ldset!(or);
220-
221-
// TODO: CAS 16

src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ pub mod mem;
5757
#[cfg(target_arch = "arm")]
5858
pub mod arm;
5959

60-
#[cfg(target_arch = "aarch64")]
60+
#[cfg(all(
61+
target_arch = "aarch64",
62+
not(feature = "no-asm"),
63+
not(feature = "optimized-c")
64+
))]
6165
pub mod aarch64;
6266

6367
#[cfg(all(

testcrate/tests/lse.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#![cfg(target_arch = "aarch64")]
21
#![feature(decl_macro)] // so we can use pub(super)
2+
#![cfg(all(target_arch = "aarch64", not(feature = "no-asm")))]
33

44
/// Translate a byte size to a Rust type.
55
macro int_ty {
@@ -38,6 +38,10 @@ mod cas {
3838
}
3939
}
4040

41+
macro test_cas16($_ordering:ident, $name:ident) {
42+
cas::test!($_ordering, 16, $name);
43+
}
44+
4145
mod swap {
4246
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
4347
#[test]
@@ -81,6 +85,7 @@ test_op!(xor, std::ops::BitXor::bitxor);
8185
test_op!(or, std::ops::BitOr::bitor);
8286

8387
compiler_builtins::foreach_cas!(cas::test);
88+
compiler_builtins::foreach_cas16!(test_cas16);
8489
compiler_builtins::foreach_swp!(swap::test);
8590
compiler_builtins::foreach_ldadd!(add::test);
8691
compiler_builtins::foreach_ldclr!(clr::test);

0 commit comments

Comments
 (0)