Skip to content

Commit 8a973df

Browse files
committed
restore old behaviour for sizes < 128
1 parent d4d3f53 commit 8a973df

File tree

1 file changed

+34
-41
lines changed

1 file changed

+34
-41
lines changed

src/libcore/mem.rs

+34-41
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,24 @@ pub unsafe fn uninitialized<T>() -> T {
499499
#[stable(feature = "rust1", since = "1.0.0")]
500500
pub fn swap<T>(x: &mut T, y: &mut T) {
501501
unsafe {
502+
let len = size_of::<T>();
503+
504+
if len < 128 {
505+
// Give ourselves some scratch space to work with
506+
let mut t: T = uninitialized();
507+
508+
// Perform the swap, `&mut` pointers never alias
509+
ptr::copy_nonoverlapping(&*x, &mut t, 1);
510+
ptr::copy_nonoverlapping(&*y, x, 1);
511+
ptr::copy_nonoverlapping(&t, y, 1);
512+
513+
// y and t now point to the same thing, but we need to completely
514+
// forget `t` because we do not want to run the destructor for `T`
515+
// on its value, which is still owned somewhere outside this function.
516+
forget(t);
517+
return;
518+
}
519+
502520
// The approach here is to utilize simd to swap x & y efficiently. Testing reveals
503521
// that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
504522
// Haswell E processors. LLVM is more able to optimize if we give a struct a
@@ -516,20 +534,21 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
516534
// Loop through x & y, copying them `Block` at a time
517535
// The optimizer should unroll the loop fully for most types
518536
// N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
519-
let len = size_of::<T>() as isize;
520537
let mut i = 0;
521-
while i + block_size as isize <= len {
538+
while i + block_size <= len {
522539
// Create some uninitialized memory as scratch space
523540
// Declaring `t` here avoids aligning the stack when this loop is unused
524541
let mut t: Block = uninitialized();
525542
let t = &mut t as *mut _ as *mut u8;
543+
let x = x.offset(i as isize);
544+
let y = y.offset(i as isize);
526545

527546
// Swap a block of bytes of x & y, using t as a temporary buffer
528547
// This should be optimized into efficient SIMD operations where available
529-
ptr::copy_nonoverlapping(x.offset(i), t, block_size);
530-
ptr::copy_nonoverlapping(y.offset(i), x.offset(i), block_size);
531-
ptr::copy_nonoverlapping(t, y.offset(i), block_size);
532-
i += block_size as isize;
548+
ptr::copy_nonoverlapping(x, t, block_size);
549+
ptr::copy_nonoverlapping(y, x, block_size);
550+
ptr::copy_nonoverlapping(t, y, block_size);
551+
i += block_size;
533552
}
534553

535554

@@ -538,41 +557,15 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
538557
// where appropriate (this information is lost by conversion
539558
// to *mut u8, so restore it manually here)
540559
let mut t: UnalignedBlock = uninitialized();
541-
let rem = (len - i) as usize;
542-
543-
if align_of::<T>() % 8 == 0 && len % 8 == 0 {
544-
let t = &mut t as *mut _ as *mut u64;
545-
let x = x.offset(i) as *mut u64;
546-
let y = y.offset(i) as *mut u64;
547-
548-
ptr::copy_nonoverlapping(x, t, rem / 8);
549-
ptr::copy_nonoverlapping(y, x, rem / 8);
550-
ptr::copy_nonoverlapping(t, y, rem / 8);
551-
} else if align_of::<T>() % 4 == 0 && len % 4 == 0 {
552-
let t = &mut t as *mut _ as *mut u32;
553-
let x = x.offset(i) as *mut u32;
554-
let y = y.offset(i) as *mut u32;
555-
556-
ptr::copy_nonoverlapping(x, t, rem / 4);
557-
ptr::copy_nonoverlapping(y, x, rem / 4);
558-
ptr::copy_nonoverlapping(t, y, rem / 4);
559-
} else if align_of::<T>() % 2 == 0 && len % 2 == 0 {
560-
let t = &mut t as *mut _ as *mut u16;
561-
let x = x.offset(i) as *mut u16;
562-
let y = y.offset(i) as *mut u16;
563-
564-
ptr::copy_nonoverlapping(x, t, rem / 2);
565-
ptr::copy_nonoverlapping(y, x, rem / 2);
566-
ptr::copy_nonoverlapping(t, y, rem / 2);
567-
} else {
568-
let t = &mut t as *mut _ as *mut u8;
569-
let x = x.offset(i);
570-
let y = y.offset(i);
571-
572-
ptr::copy_nonoverlapping(x, t, rem);
573-
ptr::copy_nonoverlapping(y, x, rem);
574-
ptr::copy_nonoverlapping(t, y, rem);
575-
}
560+
let rem = len - i;
561+
562+
let t = &mut t as *mut _ as *mut u8;
563+
let x = x.offset(i as isize);
564+
let y = y.offset(i as isize);
565+
566+
ptr::copy_nonoverlapping(x, t, rem);
567+
ptr::copy_nonoverlapping(y, x, rem);
568+
ptr::copy_nonoverlapping(t, y, rem);
576569
}
577570
}
578571
}

0 commit comments

Comments
 (0)