Skip to content

Commit f4bb10b

Browse files
committed
Vec::resize for bytes should be a single memset
1 parent c422581 commit f4bb10b

File tree

5 files changed

+98
-75
lines changed

5 files changed

+98
-75
lines changed

library/alloc/src/vec/mod.rs

+25-53
Original file line numberDiff line numberDiff line change
@@ -2561,7 +2561,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
25612561
let len = self.len();
25622562

25632563
if new_len > len {
2564-
self.extend_with(new_len - len, value)
2564+
self.extend_trusted(core::iter::repeat_n(value, new_len - len));
25652565
} else {
25662566
self.truncate(new_len);
25672567
}
@@ -2673,38 +2673,6 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
26732673
}
26742674
}
26752675

2676-
impl<T: Clone, A: Allocator> Vec<T, A> {
2677-
#[cfg(not(no_global_oom_handling))]
2678-
/// Extend the vector by `n` clones of value.
2679-
fn extend_with(&mut self, n: usize, value: T) {
2680-
self.reserve(n);
2681-
2682-
unsafe {
2683-
let mut ptr = self.as_mut_ptr().add(self.len());
2684-
// Use SetLenOnDrop to work around bug where compiler
2685-
// might not realize the store through `ptr` through self.set_len()
2686-
// don't alias.
2687-
let mut local_len = SetLenOnDrop::new(&mut self.len);
2688-
2689-
// Write all elements except the last one
2690-
for _ in 1..n {
2691-
ptr::write(ptr, value.clone());
2692-
ptr = ptr.add(1);
2693-
// Increment the length in every step in case clone() panics
2694-
local_len.increment_len(1);
2695-
}
2696-
2697-
if n > 0 {
2698-
// We can write the last element directly without cloning needlessly
2699-
ptr::write(ptr, value);
2700-
local_len.increment_len(1);
2701-
}
2702-
2703-
// len set by scope guard
2704-
}
2705-
}
2706-
}
2707-
27082676
impl<T: PartialEq, A: Allocator> Vec<T, A> {
27092677
/// Removes consecutive repeated elements in the vector according to the
27102678
/// [`PartialEq`] trait implementation.
@@ -3083,32 +3051,36 @@ impl<T, A: Allocator> Vec<T, A> {
30833051
#[cfg(not(no_global_oom_handling))]
30843052
fn extend_trusted(&mut self, iterator: impl iter::TrustedLen<Item = T>) {
30853053
let (low, high) = iterator.size_hint();
3086-
if let Some(additional) = high {
3087-
debug_assert_eq!(
3088-
low,
3089-
additional,
3090-
"TrustedLen iterator's size hint is not exact: {:?}",
3091-
(low, high)
3092-
);
3093-
self.reserve(additional);
3094-
unsafe {
3095-
let ptr = self.as_mut_ptr();
3096-
let mut local_len = SetLenOnDrop::new(&mut self.len);
3097-
iterator.for_each(move |element| {
3098-
ptr::write(ptr.add(local_len.current_len()), element);
3099-
// Since the loop executes user code which can panic we have to update
3100-
// the length every step to correctly drop what we've written.
3101-
// NB can't overflow since we would have had to alloc the address space
3102-
local_len.increment_len(1);
3103-
});
3104-
}
3105-
} else {
3054+
if high.is_none() {
31063055
// Per TrustedLen contract a `None` upper bound means that the iterator length
31073056
// truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
31083057
// Since the other branch already panics eagerly (via `reserve()`) we do the same here.
31093058
// This avoids additional codegen for a fallback code path which would eventually
31103059
// panic anyway.
31113060
panic!("capacity overflow");
3061+
};
3062+
3063+
debug_assert_eq!(
3064+
Some(low),
3065+
high,
3066+
"TrustedLen iterator's size hint is not exact: {:?}",
3067+
(low, high)
3068+
);
3069+
self.reserve(low);
3070+
3071+
// SAFETY: From TrustedLen we know exactly how many slots we'll need,
3072+
// and we just reserved them. Thus we can write each element as we generate
3073+
// it into its final location without needing any further safety checks.
3074+
unsafe {
3075+
let ptr = self.as_mut_ptr();
3076+
let mut local_len = SetLenOnDrop::new(&mut self.len);
3077+
iterator.for_each(move |element| {
3078+
ptr::write(ptr.add(local_len.current_len()), element);
3079+
// Since the loop executes user code which can panic we have to update
3080+
// the length every step to correctly drop what we've written.
3081+
// NB can't overflow since we would have had to alloc the address space
3082+
local_len.increment_len_unchecked(1);
3083+
});
31123084
}
31133085
}
31143086

library/alloc/src/vec/set_len_on_drop.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ impl<'a> SetLenOnDrop<'a> {
1414
SetLenOnDrop { local_len: *len, len }
1515
}
1616

17+
/// # Safety
18+
/// `self.current_len() + increment` must not overflow.
1719
#[inline]
18-
pub(super) fn increment_len(&mut self, increment: usize) {
19-
self.local_len += increment;
20+
pub(super) unsafe fn increment_len_unchecked(&mut self, increment: usize) {
21+
// SAFETY: This is our precondition
22+
self.local_len = unsafe { self.local_len.unchecked_add(increment) };
2023
}
2124

2225
#[inline]

library/alloc/src/vec/spec_from_elem.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use core::ptr;
2-
31
use crate::alloc::Allocator;
42
use crate::raw_vec::RawVec;
53

@@ -13,7 +11,7 @@ pub(super) trait SpecFromElem: Sized {
1311
impl<T: Clone> SpecFromElem for T {
1412
default fn from_elem<A: Allocator>(elem: Self, n: usize, alloc: A) -> Vec<Self, A> {
1513
let mut v = Vec::with_capacity_in(n, alloc);
16-
v.extend_with(n, elem);
14+
v.extend_trusted(core::iter::repeat_n(elem, n));
1715
v
1816
}
1917
}
@@ -25,7 +23,7 @@ impl<T: Clone + IsZero> SpecFromElem for T {
2523
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
2624
}
2725
let mut v = Vec::with_capacity_in(n, alloc);
28-
v.extend_with(n, elem);
26+
v.extend_trusted(core::iter::repeat_n(elem, n));
2927
v
3028
}
3129
}

library/core/src/iter/sources/repeat_n.rs

+43-16
Original file line numberDiff line numberDiff line change
@@ -108,25 +108,52 @@ impl<A> Drop for RepeatN<A> {
108108
}
109109
}
110110

111+
trait SpecRepeatN<A> {
112+
/// Reads an item after `self.count` has been decreased
113+
///
114+
/// # Safety
115+
///
116+
/// Must be called only once after lowering a count.
117+
///
118+
/// Will cause double-frees if used multiple times or without checking
119+
/// that the iterator was originally non-empty beforehand.
120+
unsafe fn spec_read_unchecked(&mut self) -> A;
121+
}
122+
123+
impl<A: Clone> SpecRepeatN<A> for RepeatN<A> {
124+
default unsafe fn spec_read_unchecked(&mut self) -> A {
125+
if self.count == 0 {
126+
// SAFETY: we just lowered the count to zero so it won't be dropped
127+
// later, and thus it's okay to take it here.
128+
unsafe { ManuallyDrop::take(&mut self.element) }
129+
} else {
130+
A::clone(&self.element)
131+
}
132+
}
133+
}
134+
135+
impl<A: Copy> SpecRepeatN<A> for RepeatN<A> {
136+
unsafe fn spec_read_unchecked(&mut self) -> A {
137+
// For `Copy` types, we can always just read the item directly,
138+
// so skip having a branch that would need to be optimized out.
139+
*self.element
140+
}
141+
}
142+
111143
#[unstable(feature = "iter_repeat_n", issue = "104434")]
112144
impl<A: Clone> Iterator for RepeatN<A> {
113145
type Item = A;
114146

115147
#[inline]
116148
fn next(&mut self) -> Option<A> {
117-
if self.count == 0 {
118-
return None;
119-
}
120-
121-
self.count -= 1;
122-
Some(if self.count == 0 {
123-
// SAFETY: the check above ensured that the count used to be non-zero,
124-
// so element hasn't been dropped yet, and we just lowered the count to
125-
// zero so it won't be dropped later, and thus it's okay to take it here.
126-
unsafe { ManuallyDrop::take(&mut self.element) }
149+
// Using checked_sub as a safe way to get unchecked_sub
150+
if let Some(new_count) = self.count.checked_sub(1) {
151+
self.count = new_count;
152+
// SAFETY: Just decreased the count.
153+
unsafe { Some(self.spec_read_unchecked()) }
127154
} else {
128-
A::clone(&self.element)
129-
})
155+
None
156+
}
130157
}
131158

132159
#[inline]
@@ -143,12 +170,12 @@ impl<A: Clone> Iterator for RepeatN<A> {
143170
self.take_element();
144171
}
145172

146-
if skip > len {
173+
if let Some(new_count) = len.checked_sub(skip) {
174+
self.count = new_count;
175+
Ok(())
176+
} else {
147177
// SAFETY: we just checked that the difference is positive
148178
Err(unsafe { NonZero::new_unchecked(skip - len) })
149-
} else {
150-
self.count = len - skip;
151-
Ok(())
152179
}
153180
}
154181

tests/codegen/vec-of-bytes-memset.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//@ compile-flags: -O
2+
//@ only-64bit
3+
4+
#![crate_type = "lib"]
5+
6+
// CHECK-LABEL: @resize_bytes_is_one_memset
7+
#[no_mangle]
8+
pub fn resize_bytes_is_one_memset(x: &mut Vec<u8>) {
9+
// CHECK: call void @llvm.memset.p0.i64({{.+}}, i8 123, i64 456789, i1 false)
10+
let new_len = x.len() + 456789;
11+
x.resize(new_len, 123);
12+
}
13+
14+
#[derive(Copy, Clone)]
15+
struct ByteNewtype(i8);
16+
17+
// CHECK-LABEL: @from_elem_is_one_memset
18+
#[no_mangle]
19+
pub fn from_elem_is_one_memset() -> Vec<ByteNewtype> {
20+
// CHECK: %[[P:.+]] = tail call{{.+}}@__rust_alloc(i64 noundef 123456, i64 noundef 1)
21+
// CHECK: call void @llvm.memset.p0.i64({{.+}} %[[P]], i8 42, i64 123456, i1 false)
22+
vec![ByteNewtype(42); 123456]
23+
}

0 commit comments

Comments
 (0)