@@ -499,24 +499,6 @@ pub unsafe fn uninitialized<T>() -> T {
499
499
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
500
500
pub fn swap < T > ( x : & mut T , y : & mut T ) {
501
501
unsafe {
502
- let len = size_of :: < T > ( ) ;
503
-
504
- if len < 128 {
505
- // Give ourselves some scratch space to work with
506
- let mut t: T = uninitialized ( ) ;
507
-
508
- // Perform the swap, `&mut` pointers never alias
509
- ptr:: copy_nonoverlapping ( & * x, & mut t, 1 ) ;
510
- ptr:: copy_nonoverlapping ( & * y, x, 1 ) ;
511
- ptr:: copy_nonoverlapping ( & t, y, 1 ) ;
512
-
513
- // y and t now point to the same thing, but we need to completely
514
- // forget `t` because we do not want to run the destructor for `T`
515
- // on its value, which is still owned somewhere outside this function.
516
- forget ( t) ;
517
- return ;
518
- }
519
-
520
502
// The approach here is to utilize simd to swap x & y efficiently. Testing reveals
521
503
// that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
522
504
// Haswell E processors. LLVM is more able to optimize if we give a struct a
@@ -534,6 +516,7 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
534
516
// Loop through x & y, copying them `Block` at a time
535
517
// The optimizer should unroll the loop fully for most types
536
518
// N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
519
+ let len = size_of :: < T > ( ) ;
537
520
let mut i = 0 ;
538
521
while i + block_size <= len {
539
522
// Create some uninitialized memory as scratch space
0 commit comments