Skip to content

Commit b50eab5

Browse files
AngelicosPhosphorosscottmcm
authored andcommitted
Tweak the threshold for chunked swapping
Thanks to 98892 for the tests I brought in here, as it demonstrated that 3×usize is currently suboptimal.
1 parent 4eb5225 commit b50eab5

File tree

2 files changed

+45
-14
lines changed

2 files changed

+45
-14
lines changed

library/core/src/mem/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
738738
// tends to copy the whole thing to stack rather than doing it one part
739739
// at a time, so instead treat them as one-element slices and piggy-back
740740
// the slice optimizations that will split up the swaps.
741-
if size_of::<T>() / align_of::<T>() > 4 {
741+
if const { size_of::<T>() / align_of::<T>() > 2 } {
742742
// SAFETY: exclusive references always point to one non-overlapping
743743
// element and are non-null and properly aligned.
744744
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };

tests/codegen/swap-small-types.rs

+44-13
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,27 @@ type RGB48 = [u16; 3];
1111
// CHECK-LABEL: @swap_rgb48
1212
#[no_mangle]
1313
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
14-
// FIXME MIR inlining messes up LLVM optimizations.
15-
// WOULD-CHECK-NOT: alloca
16-
// WOULD-CHECK: load i48
17-
// WOULD-CHECK: store i48
14+
// CHECK-NOT: alloca
15+
// Those can be supported only by backend
16+
// WOULD-CHECK: load i48
17+
// WOULD-CHECK: store i48
18+
// CHECK: ret void
19+
swap(x, y)
20+
}
21+
22+
// CHECK-LABEL: @swap_vecs
23+
#[no_mangle]
24+
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
25+
// CHECK-NOT: alloca
26+
// CHECK: ret void
27+
swap(x, y)
28+
}
29+
30+
// CHECK-LABEL: @swap_slices
31+
#[no_mangle]
32+
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
33+
// CHECK-NOT: alloca
34+
// CHECK: ret void
1835
swap(x, y)
1936
}
2037

@@ -25,9 +42,9 @@ type RGB24 = [u8; 3];
2542
// CHECK-LABEL: @swap_rgb24_slices
2643
#[no_mangle]
2744
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
28-
// CHECK-NOT: alloca
29-
// CHECK: load <{{[0-9]+}} x i8>
30-
// CHECK: store <{{[0-9]+}} x i8>
45+
// CHECK-NOT: alloca
46+
// CHECK: load <{{[0-9]+}} x i8>
47+
// CHECK: store <{{[0-9]+}} x i8>
3148
if x.len() == y.len() {
3249
x.swap_with_slice(y);
3350
}
@@ -39,9 +56,9 @@ type RGBA32 = [u8; 4];
3956
// CHECK-LABEL: @swap_rgba32_slices
4057
#[no_mangle]
4158
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
42-
// CHECK-NOT: alloca
43-
// CHECK: load <{{[0-9]+}} x i32>
44-
// CHECK: store <{{[0-9]+}} x i32>
59+
// CHECK-NOT: alloca
60+
// CHECK: load <{{[0-9]+}} x i32>
61+
// CHECK: store <{{[0-9]+}} x i32>
4562
if x.len() == y.len() {
4663
x.swap_with_slice(y);
4764
}
@@ -54,10 +71,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
5471
// CHECK-LABEL: @swap_string_slices
5572
#[no_mangle]
5673
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
57-
// CHECK-NOT: alloca
58-
// CHECK: load <{{[0-9]+}} x i64>
59-
// CHECK: store <{{[0-9]+}} x i64>
74+
// CHECK-NOT: alloca
75+
// CHECK: load <{{[0-9]+}} x i64>
76+
// CHECK: store <{{[0-9]+}} x i64>
6077
if x.len() == y.len() {
6178
x.swap_with_slice(y);
6279
}
6380
}
81+
82+
#[repr(C, packed)]
83+
pub struct Packed {
84+
pub first: bool,
85+
pub second: usize,
86+
}
87+
88+
// CHECK-LABEL: @swap_packed_structs
89+
#[no_mangle]
90+
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
91+
// CHECK-NOT: alloca
92+
// CHECK: ret void
93+
swap(x, y)
94+
}

0 commit comments

Comments
 (0)