Skip to content

Commit 1f891d1

Browse files
committed
Improve implementation approach comments in [T]::reverse()
1 parent e8fad32 commit 1f891d1

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

src/libcore/slice/mod.rs

+15-4
Original file line numberDiff line numberDiff line change
@@ -540,12 +540,24 @@ impl<T> SliceExt for [T] {
540540
let mut i: usize = 0;
541541
let ln = self.len();
542542

543+
// For very small types, all the individual reads in the normal
544+
// path perform poorly. We can do better, given efficient unaligned
545+
// load/store, by loading a larger chunk and reversing a register.
546+
547+
// Ideally LLVM would do this for us, as it knows better than we do
548+
// whether unaligned reads are efficient (since that changes between
549+
// different ARM versions, for example) and what the best chunk size
550+
// would be. Unfortunately, as of LLVM 4.0 (2017-05) it only unrolls
551+
// the loop, so we need to do this ourselves. (Hypothesis: reverse
552+
// is troublesome because the sides can be aligned differently --
553+
// will be, when the length is odd -- so there's no way of emitting
554+
// pre- and postludes to use fully-aligned SIMD in the middle.)
555+
543556
let fast_unaligned =
544557
cfg!(any(target_arch = "x86", target_arch = "x86_64"));
545558

546559
if fast_unaligned && mem::size_of::<T>() == 1 {
547-
// Single-byte read & write are comparatively slow. Instead,
548-
// work in usize chunks and get bswap to do the hard work.
560+
// Use the llvm.bswap intrinsic to reverse u8s in a usize
549561
let chunk = mem::size_of::<usize>();
550562
while i + chunk - 1 < ln / 2 {
551563
unsafe {
@@ -561,8 +573,7 @@ impl<T> SliceExt for [T] {
561573
}
562574

563575
if fast_unaligned && mem::size_of::<T>() == 2 {
564-
// Not quite as good as the above, but still helpful.
565-
// Same general idea, read bigger and do the swap in a register.
576+
// Use rotate-by-16 to reverse u16s in a u32
566577
let chunk = mem::size_of::<u32>() / 2;
567578
while i + chunk - 1 < ln / 2 {
568579
unsafe {

0 commit comments

Comments
 (0)