@@ -499,6 +499,24 @@ pub unsafe fn uninitialized<T>() -> T {
499
499
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
500
500
pub fn swap < T > ( x : & mut T , y : & mut T ) {
501
501
unsafe {
502
+ let len = size_of :: < T > ( ) ;
503
+
504
+ if len < 128 {
505
+ // Give ourselves some scratch space to work with
506
+ let mut t: T = uninitialized ( ) ;
507
+
508
+ // Perform the swap, `&mut` pointers never alias
509
+ ptr:: copy_nonoverlapping ( & * x, & mut t, 1 ) ;
510
+ ptr:: copy_nonoverlapping ( & * y, x, 1 ) ;
511
+ ptr:: copy_nonoverlapping ( & t, y, 1 ) ;
512
+
513
+ // y and t now point to the same thing, but we need to completely
514
+ // forget `t` because we do not want to run the destructor for `T`
515
+ // on its value, which is still owned somewhere outside this function.
516
+ forget ( t) ;
517
+ return ;
518
+ }
519
+
502
520
// The approach here is to utilize simd to swap x & y efficiently. Testing reveals
503
521
// that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
504
522
// Haswell E processors. LLVM is more able to optimize if we give a struct a
@@ -516,20 +534,21 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
516
534
// Loop through x & y, copying them `Block` at a time
517
535
// The optimizer should unroll the loop fully for most types
518
536
// N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
519
- let len = size_of :: < T > ( ) as isize ;
520
537
let mut i = 0 ;
521
- while i + block_size as isize <= len {
538
+ while i + block_size <= len {
522
539
// Create some uninitialized memory as scratch space
523
540
// Declaring `t` here avoids aligning the stack when this loop is unused
524
541
let mut t: Block = uninitialized ( ) ;
525
542
let t = & mut t as * mut _ as * mut u8 ;
543
+ let x = x. offset ( i as isize ) ;
544
+ let y = y. offset ( i as isize ) ;
526
545
527
546
// Swap a block of bytes of x & y, using t as a temporary buffer
528
547
// This should be optimized into efficient SIMD operations where available
529
- ptr:: copy_nonoverlapping ( x. offset ( i ) , t, block_size) ;
530
- ptr:: copy_nonoverlapping ( y. offset ( i ) , x. offset ( i ) , block_size) ;
531
- ptr:: copy_nonoverlapping ( t, y. offset ( i ) , block_size) ;
532
- i += block_size as isize ;
548
+ ptr:: copy_nonoverlapping ( x, t, block_size) ;
549
+ ptr:: copy_nonoverlapping ( y, x, block_size) ;
550
+ ptr:: copy_nonoverlapping ( t, y, block_size) ;
551
+ i += block_size;
533
552
}
534
553
535
554
@@ -538,41 +557,15 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
538
557
// where appropriate (this information is lost by conversion
539
558
// to *mut u8, so restore it manually here)
540
559
let mut t: UnalignedBlock = uninitialized ( ) ;
541
- let rem = ( len - i) as usize ;
542
-
543
- if align_of :: < T > ( ) % 8 == 0 && len % 8 == 0 {
544
- let t = & mut t as * mut _ as * mut u64 ;
545
- let x = x. offset ( i) as * mut u64 ;
546
- let y = y. offset ( i) as * mut u64 ;
547
-
548
- ptr:: copy_nonoverlapping ( x, t, rem / 8 ) ;
549
- ptr:: copy_nonoverlapping ( y, x, rem / 8 ) ;
550
- ptr:: copy_nonoverlapping ( t, y, rem / 8 ) ;
551
- } else if align_of :: < T > ( ) % 4 == 0 && len % 4 == 0 {
552
- let t = & mut t as * mut _ as * mut u32 ;
553
- let x = x. offset ( i) as * mut u32 ;
554
- let y = y. offset ( i) as * mut u32 ;
555
-
556
- ptr:: copy_nonoverlapping ( x, t, rem / 4 ) ;
557
- ptr:: copy_nonoverlapping ( y, x, rem / 4 ) ;
558
- ptr:: copy_nonoverlapping ( t, y, rem / 4 ) ;
559
- } else if align_of :: < T > ( ) % 2 == 0 && len % 2 == 0 {
560
- let t = & mut t as * mut _ as * mut u16 ;
561
- let x = x. offset ( i) as * mut u16 ;
562
- let y = y. offset ( i) as * mut u16 ;
563
-
564
- ptr:: copy_nonoverlapping ( x, t, rem / 2 ) ;
565
- ptr:: copy_nonoverlapping ( y, x, rem / 2 ) ;
566
- ptr:: copy_nonoverlapping ( t, y, rem / 2 ) ;
567
- } else {
568
- let t = & mut t as * mut _ as * mut u8 ;
569
- let x = x. offset ( i) ;
570
- let y = y. offset ( i) ;
571
-
572
- ptr:: copy_nonoverlapping ( x, t, rem) ;
573
- ptr:: copy_nonoverlapping ( y, x, rem) ;
574
- ptr:: copy_nonoverlapping ( t, y, rem) ;
575
- }
560
+ let rem = len - i;
561
+
562
+ let t = & mut t as * mut _ as * mut u8 ;
563
+ let x = x. offset ( i as isize ) ;
564
+ let y = y. offset ( i as isize ) ;
565
+
566
+ ptr:: copy_nonoverlapping ( x, t, rem) ;
567
+ ptr:: copy_nonoverlapping ( y, x, rem) ;
568
+ ptr:: copy_nonoverlapping ( t, y, rem) ;
576
569
}
577
570
}
578
571
}
0 commit comments