Skip to content

Commit c8b9db1

Browse files
authored
Rollup merge of rust-lang#94667 - frank-king:feature/iter_map_windows, r=Mark-Simulacrum
Add `Iterator::map_windows` Tracking issue: rust-lang#87155. This is inherited from the old PR rust-lang#82413. Unlike rust-lang#82413, this PR implements the `MapWindows` to be lazy: only when pulling from the outer iterator, `.next()` of the inner iterator will be called. ## Implementaion Steps - [x] Implement `MapWindows` to keep the iterators' [*Laziness*](https://doc.rust-lang.org/std/iter/index.html#laziness) contract. - [x] Fix the known bug of memory access error. - [ ] Full specialization of iterator-related traits for `MapWindows`. - [x] `Iterator::size_hint`, - [x] ~`Iterator::count`~, - [x] `ExactSizeIterator` (when `I: ExactSizeIterator`), - [x] ~`TrustedLen` (when `I: TrustedLen`)~, - [x] `FusedIterator`, - [x] ~`Iterator::advance_by`~, - [x] ~`Iterator::nth`~, - [ ] ... - [ ] More tests and docs. ## Unresolved Questions: - [ ] Is there any more iterator-related traits should be specialized? - [ ] Is the double-space buffer worth? - [ ] Should there be `rmap_windows` or something else? - [ ] Taking GAT for consideration, should the mapper function be `FnMut(&[I::Item; N]) -> R` or something like `FnMut(ArrayView<'_, I::Item, N>) -> R`? Where `ArrayView` is mentioned in rust-lang/generic-associated-types-initiative#2. - It can save memory, only the same size as the array window is needed, - It is more efficient, which requires less data copies, - It is possibly compatible with the GATified version of `LendingIterator::windows`. - But it prevents the array pattern matching like `iter.map_windows(|_arr: [_; N]| ())`, unless we extend the array pattern to allow matching the `ArrayView`.
2 parents fdd0301 + f0f9d88 commit c8b9db1

File tree

7 files changed

+739
-1
lines changed

7 files changed

+739
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
use crate::{
2+
fmt,
3+
iter::{ExactSizeIterator, FusedIterator},
4+
mem::{self, MaybeUninit},
5+
ptr,
6+
};
7+
8+
/// An iterator over the mapped windows of another iterator.
9+
///
10+
/// This `struct` is created by the [`Iterator::map_windows`]. See its
11+
/// documentation for more information.
12+
#[must_use = "iterators are lazy and do nothing unless consumed"]
13+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
14+
pub struct MapWindows<I: Iterator, F, const N: usize> {
15+
f: F,
16+
inner: MapWindowsInner<I, N>,
17+
}
18+
19+
struct MapWindowsInner<I: Iterator, const N: usize> {
20+
// We fuse the inner iterator because there shouldn't be "holes" in
21+
// the sliding window. Once the iterator returns a `None`, we make
22+
// our `MapWindows` iterator return `None` forever.
23+
iter: Option<I>,
24+
// Since iterators are assumed lazy, i.e. it only yields an item when
25+
// `Iterator::next()` is called, and `MapWindows` is not an exception.
26+
//
27+
// Before the first iteration, we keep the buffer `None`. When the user
28+
// first call `next` or other methods that makes the iterator advance,
29+
// we collect the first `N` items yielded from the inner iterator and
30+
// put it into the buffer.
31+
//
32+
// When the inner iterator has returned a `None` (i.e. fused), we take
33+
// away this `buffer` and leave it `None` to reclaim its resources.
34+
//
35+
// FIXME: should we shrink the size of `buffer` using niche optimization?
36+
buffer: Option<Buffer<I::Item, N>>,
37+
}
38+
39+
// `Buffer` uses two times of space to reduce moves among the iterations.
40+
// `Buffer<T, N>` is semantically `[MaybeUninit<T>; 2 * N]`. However, due
41+
// to limitations of const generics, we use this different type. Note that
42+
// it has the same underlying memory layout.
43+
struct Buffer<T, const N: usize> {
44+
// Invariant: `self.buffer[self.start..self.start + N]` is initialized,
45+
// with all other elements being uninitialized. This also
46+
// implies that `self.start <= N`.
47+
buffer: [[MaybeUninit<T>; N]; 2],
48+
start: usize,
49+
}
50+
51+
impl<I: Iterator, F, const N: usize> MapWindows<I, F, N> {
52+
pub(in crate::iter) fn new(iter: I, f: F) -> Self {
53+
assert!(N != 0, "array in `Iterator::map_windows` must contain more than 0 elements");
54+
55+
// Only ZST arrays' length can be so large.
56+
if mem::size_of::<I::Item>() == 0 {
57+
assert!(
58+
N.checked_mul(2).is_some(),
59+
"array size of `Iterator::map_windows` is too large"
60+
);
61+
}
62+
63+
Self { inner: MapWindowsInner::new(iter), f }
64+
}
65+
}
66+
67+
impl<I: Iterator, const N: usize> MapWindowsInner<I, N> {
68+
#[inline]
69+
fn new(iter: I) -> Self {
70+
Self { iter: Some(iter), buffer: None }
71+
}
72+
73+
fn next_window(&mut self) -> Option<&[I::Item; N]> {
74+
let iter = self.iter.as_mut()?;
75+
match self.buffer {
76+
// It is the first time to advance. We collect
77+
// the first `N` items from `self.iter` to initialize `self.buffer`.
78+
None => self.buffer = Buffer::try_from_iter(iter),
79+
Some(ref mut buffer) => match iter.next() {
80+
None => {
81+
// Fuse the inner iterator since it yields a `None`.
82+
self.iter.take();
83+
self.buffer.take();
84+
}
85+
// Advance the iterator. We first call `next` before changing our buffer
86+
// at all. This means that if `next` panics, our invariant is upheld and
87+
// our `Drop` impl drops the correct elements.
88+
Some(item) => buffer.push(item),
89+
},
90+
}
91+
self.buffer.as_ref().map(Buffer::as_array_ref)
92+
}
93+
94+
fn size_hint(&self) -> (usize, Option<usize>) {
95+
let Some(ref iter) = self.iter else { return (0, Some(0)) };
96+
let (lo, hi) = iter.size_hint();
97+
if self.buffer.is_some() {
98+
// If the first `N` items are already yielded by the inner iterator,
99+
// the size hint is then equal to the that of the inner iterator's.
100+
(lo, hi)
101+
} else {
102+
// If the first `N` items are not yet yielded by the inner iterator,
103+
// the first `N` elements should be counted as one window, so both bounds
104+
// should subtract `N - 1`.
105+
(lo.saturating_sub(N - 1), hi.map(|hi| hi.saturating_sub(N - 1)))
106+
}
107+
}
108+
}
109+
110+
impl<T, const N: usize> Buffer<T, N> {
111+
fn try_from_iter(iter: &mut impl Iterator<Item = T>) -> Option<Self> {
112+
let first_half = crate::array::iter_next_chunk(iter).ok()?;
113+
let buffer = [MaybeUninit::new(first_half).transpose(), MaybeUninit::uninit_array()];
114+
Some(Self { buffer, start: 0 })
115+
}
116+
117+
#[inline]
118+
fn buffer_ptr(&self) -> *const MaybeUninit<T> {
119+
self.buffer.as_ptr().cast()
120+
}
121+
122+
#[inline]
123+
fn buffer_mut_ptr(&mut self) -> *mut MaybeUninit<T> {
124+
self.buffer.as_mut_ptr().cast()
125+
}
126+
127+
#[inline]
128+
fn as_array_ref(&self) -> &[T; N] {
129+
debug_assert!(self.start + N <= 2 * N);
130+
131+
// SAFETY: our invariant guarantees these elements are initialized.
132+
unsafe { &*self.buffer_ptr().add(self.start).cast() }
133+
}
134+
135+
#[inline]
136+
fn as_uninit_array_mut(&mut self) -> &mut MaybeUninit<[T; N]> {
137+
debug_assert!(self.start + N <= 2 * N);
138+
139+
// SAFETY: our invariant guarantees these elements are in bounds.
140+
unsafe { &mut *self.buffer_mut_ptr().add(self.start).cast() }
141+
}
142+
143+
/// Pushes a new item `next` to the back, and pops the front-most one.
144+
///
145+
/// All the elements will be shifted to the front end when pushing reaches
146+
/// the back end.
147+
fn push(&mut self, next: T) {
148+
let buffer_mut_ptr = self.buffer_mut_ptr();
149+
debug_assert!(self.start + N <= 2 * N);
150+
151+
let to_drop = if self.start == N {
152+
// We have reached the end of our buffer and have to copy
153+
// everything to the start. Example layout for N = 3.
154+
//
155+
// 0 1 2 3 4 5 0 1 2 3 4 5
156+
// ┌───┬───┬───┬───┬───┬───┐ ┌───┬───┬───┬───┬───┬───┐
157+
// │ - │ - │ - │ a │ b │ c │ -> │ b │ c │ n │ - │ - │ - │
158+
// └───┴───┴───┴───┴───┴───┘ └───┴───┴───┴───┴───┴───┘
159+
// ↑ ↑
160+
// start start
161+
162+
// SAFETY: the two pointers are valid for reads/writes of N -1
163+
// elements because our array's size is semantically 2 * N. The
164+
// regions also don't overlap for the same reason.
165+
//
166+
// We leave the old elements in place. As soon as `start` is set
167+
// to 0, we treat them as uninitialized and treat their copies
168+
// as initialized.
169+
let to_drop = unsafe {
170+
ptr::copy_nonoverlapping(buffer_mut_ptr.add(self.start + 1), buffer_mut_ptr, N - 1);
171+
(*buffer_mut_ptr.add(N - 1)).write(next);
172+
buffer_mut_ptr.add(self.start)
173+
};
174+
self.start = 0;
175+
to_drop
176+
} else {
177+
// SAFETY: `self.start` is < N as guaranteed by the invariant
178+
// plus the check above. Even if the drop at the end panics,
179+
// the invariant is upheld.
180+
//
181+
// Example layout for N = 3:
182+
//
183+
// 0 1 2 3 4 5 0 1 2 3 4 5
184+
// ┌───┬───┬───┬───┬───┬───┐ ┌───┬───┬───┬───┬───┬───┐
185+
// │ - │ a │ b │ c │ - │ - │ -> │ - │ - │ b │ c │ n │ - │
186+
// └───┴───┴───┴───┴───┴───┘ └───┴───┴───┴───┴───┴───┘
187+
// ↑ ↑
188+
// start start
189+
//
190+
let to_drop = unsafe {
191+
(*buffer_mut_ptr.add(self.start + N)).write(next);
192+
buffer_mut_ptr.add(self.start)
193+
};
194+
self.start += 1;
195+
to_drop
196+
};
197+
198+
// SAFETY: the index is valid and this is element `a` in the
199+
// diagram above and has not been dropped yet.
200+
unsafe { ptr::drop_in_place(to_drop.cast::<T>()) };
201+
}
202+
}
203+
204+
impl<T: Clone, const N: usize> Clone for Buffer<T, N> {
205+
fn clone(&self) -> Self {
206+
let mut buffer = Buffer {
207+
buffer: [MaybeUninit::uninit_array(), MaybeUninit::uninit_array()],
208+
start: self.start,
209+
};
210+
buffer.as_uninit_array_mut().write(self.as_array_ref().clone());
211+
buffer
212+
}
213+
}
214+
215+
impl<I, const N: usize> Clone for MapWindowsInner<I, N>
216+
where
217+
I: Iterator + Clone,
218+
I::Item: Clone,
219+
{
220+
fn clone(&self) -> Self {
221+
Self { iter: self.iter.clone(), buffer: self.buffer.clone() }
222+
}
223+
}
224+
225+
impl<T, const N: usize> Drop for Buffer<T, N> {
226+
fn drop(&mut self) {
227+
// SAFETY: our invariant guarantees that N elements starting from
228+
// `self.start` are initialized. We drop them here.
229+
unsafe {
230+
let initialized_part: *mut [T] = crate::ptr::slice_from_raw_parts_mut(
231+
self.buffer_mut_ptr().add(self.start).cast(),
232+
N,
233+
);
234+
ptr::drop_in_place(initialized_part);
235+
}
236+
}
237+
}
238+
239+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
240+
impl<I, F, R, const N: usize> Iterator for MapWindows<I, F, N>
241+
where
242+
I: Iterator,
243+
F: FnMut(&[I::Item; N]) -> R,
244+
{
245+
type Item = R;
246+
247+
fn next(&mut self) -> Option<Self::Item> {
248+
let window = self.inner.next_window()?;
249+
let out = (self.f)(window);
250+
Some(out)
251+
}
252+
253+
fn size_hint(&self) -> (usize, Option<usize>) {
254+
self.inner.size_hint()
255+
}
256+
}
257+
258+
// Note that even if the inner iterator not fused, the `MapWindows` is still fused,
259+
// because we don't allow "holes" in the mapping window.
260+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
261+
impl<I, F, R, const N: usize> FusedIterator for MapWindows<I, F, N>
262+
where
263+
I: Iterator,
264+
F: FnMut(&[I::Item; N]) -> R,
265+
{
266+
}
267+
268+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
269+
impl<I, F, R, const N: usize> ExactSizeIterator for MapWindows<I, F, N>
270+
where
271+
I: ExactSizeIterator,
272+
F: FnMut(&[I::Item; N]) -> R,
273+
{
274+
}
275+
276+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
277+
impl<I: Iterator + fmt::Debug, F, const N: usize> fmt::Debug for MapWindows<I, F, N> {
278+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
279+
f.debug_struct("MapWindows").field("iter", &self.inner.iter).finish()
280+
}
281+
}
282+
283+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
284+
impl<I, F, const N: usize> Clone for MapWindows<I, F, N>
285+
where
286+
I: Iterator + Clone,
287+
F: Clone,
288+
I::Item: Clone,
289+
{
290+
fn clone(&self) -> Self {
291+
Self { f: self.f.clone(), inner: self.inner.clone() }
292+
}
293+
}

library/core/src/iter/adapters/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod inspect;
1616
mod intersperse;
1717
mod map;
1818
mod map_while;
19+
mod map_windows;
1920
mod peekable;
2021
mod rev;
2122
mod scan;
@@ -57,6 +58,9 @@ pub use self::intersperse::{Intersperse, IntersperseWith};
5758
#[stable(feature = "iter_map_while", since = "1.57.0")]
5859
pub use self::map_while::MapWhile;
5960

61+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
62+
pub use self::map_windows::MapWindows;
63+
6064
#[unstable(feature = "trusted_random_access", issue = "none")]
6165
pub use self::zip::TrustedRandomAccess;
6266

library/core/src/iter/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ pub use self::adapters::Copied;
434434
pub use self::adapters::Flatten;
435435
#[stable(feature = "iter_map_while", since = "1.57.0")]
436436
pub use self::adapters::MapWhile;
437+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
438+
pub use self::adapters::MapWindows;
437439
#[unstable(feature = "inplace_iteration", issue = "none")]
438440
pub use self::adapters::SourceIter;
439441
#[stable(feature = "iterator_step_by", since = "1.28.0")]

0 commit comments

Comments
 (0)