@@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator {
2732
2732
/// itertools::assert_equal(five_smallest, 0..5);
2733
2733
/// ```
2734
2734
#[ cfg( feature = "use_alloc" ) ]
2735
- fn k_smallest ( self , k : usize ) -> VecIntoIter < Self :: Item >
2735
+ fn k_smallest ( mut self , k : usize ) -> VecIntoIter < Self :: Item >
2736
2736
where
2737
2737
Self : Sized ,
2738
2738
Self :: Item : Ord ,
2739
2739
{
2740
- crate :: k_smallest:: k_smallest_general ( self , k, Self :: Item :: cmp)
2740
+ // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
2741
+ // While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure
2742
+ // to maintain performance compared to previous versions of the crate.
2743
+ use alloc:: collections:: BinaryHeap ;
2744
+
2745
+ if k == 0 {
2746
+ return vec ! [ ] . into_iter ( ) ;
2747
+ }
2748
+
2749
+ let mut heap = self . by_ref ( ) . take ( k) . collect :: < BinaryHeap < _ > > ( ) ;
2750
+
2751
+ self . for_each ( |i| {
2752
+ debug_assert_eq ! ( heap. len( ) , k) ;
2753
+ // Equivalent to heap.push(min(i, heap.pop())) but more efficient.
2754
+ // This should be done with a single `.peek_mut().unwrap()` but
2755
+ // `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
2756
+ if * heap. peek ( ) . unwrap ( ) > i {
2757
+ * heap. peek_mut ( ) . unwrap ( ) = i;
2758
+ }
2759
+ } ) ;
2760
+
2761
+ heap. into_sorted_vec ( ) . into_iter ( )
2741
2762
}
2742
2763
2743
2764
/// Sort the k smallest elements into a new iterator using the provided comparison.
2744
2765
///
2745
2766
/// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
2746
2767
/// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
2747
- /// Particularly, the comparison is not cloned.
2768
+ /// Particularly, a custom heap implementation ensures the comparison is not cloned.
2748
2769
#[ cfg( feature = "use_alloc" ) ]
2749
2770
fn k_smallest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
2750
2771
where
@@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator {
2766
2787
F : Fn ( & Self :: Item ) -> K ,
2767
2788
K : Ord ,
2768
2789
{
2769
- self . k_smallest_by ( k, |a, b| key ( & a) . cmp ( & key ( & b) ) )
2790
+ self . k_smallest_by ( k, |a, b| key ( a) . cmp ( & key ( b) ) )
2770
2791
}
2771
2792
2772
2793
/// Sort the k largest elements into a new iterator, in descending order.
2773
- /// Functionally equivalent to `k_smallest` with a reversed `Ord`
2794
+ /// Semantically equivalent to `k_smallest` with a reversed `Ord`
2795
+ /// However, this is implemented by way of a custom binary heap
2796
+ /// which does not have the same performance characteristics for very large `T`
2774
2797
/// ```
2775
2798
/// use itertools::Itertools;
2776
2799
///
@@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator {
2793
2816
}
2794
2817
2795
2818
/// Sort the k largest elements into a new iterator using the provided comparison.
2796
- /// Functionally equivalent to `k_smallest ` with a reversed `Ord`
2819
+ /// Functionally equivalent to `k_smallest_by ` with a reversed `Ord`
2797
2820
#[ cfg( feature = "use_alloc" ) ]
2798
2821
fn k_largest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
2799
2822
where
0 commit comments