Skip to content

Commit ec6924d

Browse files
committed
Using alloc in k_smallest to retain performance
1 parent 5d2c698 commit ec6924d

File tree

1 file changed

+29
-6
lines changed

1 file changed

+29
-6
lines changed

src/lib.rs

+29-6
Original file line numberDiff line numberDiff line change
@@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator {
27322732
/// itertools::assert_equal(five_smallest, 0..5);
27332733
/// ```
27342734
#[cfg(feature = "use_alloc")]
2735-
fn k_smallest(self, k: usize) -> VecIntoIter<Self::Item>
2735+
fn k_smallest(mut self, k: usize) -> VecIntoIter<Self::Item>
27362736
where
27372737
Self: Sized,
27382738
Self::Item: Ord,
27392739
{
2740-
crate::k_smallest::k_smallest_general(self, k, Self::Item::cmp)
2740+
// The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
2741+
// While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure
2742+
// to maintain performance compared to previous versions of the crate.
2743+
use alloc::collections::BinaryHeap;
2744+
2745+
if k == 0 {
2746+
return vec![].into_iter();
2747+
}
2748+
2749+
let mut heap = self.by_ref().take(k).collect::<BinaryHeap<_>>();
2750+
2751+
self.for_each(|i| {
2752+
debug_assert_eq!(heap.len(), k);
2753+
// Equivalent to heap.push(min(i, heap.pop())) but more efficient.
2754+
// This should be done with a single `.peek_mut().unwrap()` but
2755+
// `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
2756+
if *heap.peek().unwrap() > i {
2757+
*heap.peek_mut().unwrap() = i;
2758+
}
2759+
});
2760+
2761+
heap.into_sorted_vec().into_iter()
27412762
}
27422763

27432764
/// Sort the k smallest elements into a new iterator using the provided comparison.
27442765
///
27452766
/// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
27462767
/// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
2747-
/// Particularly, the comparison is not cloned.
2768+
/// Particularly, a custom heap implementation ensures the comparison is not cloned.
27482769
#[cfg(feature = "use_alloc")]
27492770
fn k_smallest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
27502771
where
@@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator {
27662787
F: Fn(&Self::Item) -> K,
27672788
K: Ord,
27682789
{
2769-
self.k_smallest_by(k, |a,b| key(&a).cmp(&key(&b)))
2790+
self.k_smallest_by(k, |a, b| key(a).cmp(&key(b)))
27702791
}
27712792

27722793
/// Sort the k largest elements into a new iterator, in descending order.
2773-
/// Functionally equivalent to `k_smallest` with a reversed `Ord`
2794+
/// Semantically equivalent to `k_smallest` with a reversed `Ord`
2795+
/// However, this is implemented by way of a custom binary heap
2796+
/// which does not have the same performance characteristics for very large `T`
27742797
/// ```
27752798
/// use itertools::Itertools;
27762799
///
@@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator {
27932816
}
27942817

27952818
/// Sort the k largest elements into a new iterator using the provided comparison.
2796-
/// Functionally equivalent to `k_smallest` with a reversed `Ord`
2819+
/// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
27972820
#[cfg(feature = "use_alloc")]
27982821
fn k_largest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
27992822
where

0 commit comments

Comments
 (0)