Skip to content

Commit a98abe8

Browse files
committed
Improve codegen of String::retain method.
Using unwrap_unchecked helps the optimizer to not generate panicking path, that will never be taken for valid UTF-8 like string. Using encode_utf8 saves us a call to a memcpy, as the optimizer is unable to realize that ch_len <= 4 and so can generate much better assembly code. https://rust.godbolt.org/z/z73ohenfc
1 parent 0e7915d commit a98abe8

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

library/alloc/src/string.rs

+17-8
Original file line numberDiff line numberDiff line change
@@ -1466,19 +1466,28 @@ impl String {
14661466
let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 };
14671467

14681468
while guard.idx < len {
1469-
let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() };
1469+
let ch =
1470+
// SAFETY: `guard.idx` is positive-or-zero and less that len so the `get_unchecked`
1471+
// is in bound. `self` is valid UTF-8 like string and the returned slice starts at
1472+
// a unicode code point so the `Chars` always return one character.
1473+
unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap_unchecked() };
14701474
let ch_len = ch.len_utf8();
14711475

14721476
if !f(ch) {
14731477
guard.del_bytes += ch_len;
14741478
} else if guard.del_bytes > 0 {
1475-
unsafe {
1476-
ptr::copy(
1477-
guard.s.vec.as_ptr().add(guard.idx),
1478-
guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes),
1479-
ch_len,
1480-
);
1481-
}
1479+
// SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of
1480+
// bytes that are erased from the string so the resulting `guard.idx -
1481+
// guard.del_bytes` always represent a valid unicode code point.
1482+
//
1483+
// `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()` len
1484+
// is safe.
1485+
ch.encode_utf8(unsafe {
1486+
crate::slice::from_raw_parts_mut(
1487+
guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes),
1488+
ch.len_utf8(),
1489+
)
1490+
});
14821491
}
14831492

14841493
// Point idx to the next char

0 commit comments

Comments
 (0)