Skip to content

Add BufRead::read_while #70772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions src/libstd/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1661,6 +1661,32 @@ fn read_until<R: BufRead + ?Sized>(r: &mut R, delim: u8, buf: &mut Vec<u8>) -> R
}
}

fn read_while<R, P>(r: &mut R, buf: &mut Vec<u8>, mut predicate: P) -> Result<usize>
where
R: BufRead,
P: FnMut(u8) -> bool,
{
let mut len = 0;
loop {
let available = r.fill_buf()?;

if available.is_empty() {
return Ok(len);
}

let i = available.iter().position(|b| !predicate(*b));

let cutoff = i.unwrap_or(available.len());
len += cutoff;
buf.extend_from_slice(&available[..cutoff]);
r.consume(cutoff);

if i.is_some() {
return Ok(len);
}
}
}

/// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it
/// to perform extra ways of reading.
///
Expand Down Expand Up @@ -1835,6 +1861,54 @@ pub trait BufRead: Read {
read_until(self, byte, buf)
}

/// Read bytes based on a predicate.
///
/// `read_while` takes a predicate as an argument.
/// It will call this on each byte, and copy it to the slice if the
/// predicate evaluates to `true`. Returns the amount of bytes read.
///
/// # Errors
///
/// If this function encounters an error of the kind
/// `ErrorKind::Interrupted` then the error is ignored and the operation
/// will continue.
///
/// If any other read error is encountered then this function immediately
/// returns. Any bytes which have already been read will be appended to
/// `buf`.
///
/// # Examples
///
/// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
/// this example, we use [`Cursor`] to read bytes in a byte slice until
/// we encounter a hyphen:
///
/// ```
/// #![feature(buf_read_while)]
///
/// use std::io::{self, BufRead, Read};
///
/// let mut cursor = io::Cursor::new(b"lorem-ipsum");
/// let mut buf = vec![];
///
/// cursor.read_while(&mut buf, |b| b != b'-')
/// .expect("reading from cursor won't fail");
/// assert_eq!(buf, b"lorem");
///
/// let mut buf = vec![];
/// cursor.read_to_end(&mut buf)
/// .expect("reading from cursor won't fail");
/// assert_eq!(buf, b"-ipsum");
/// ```
#[unstable(feature = "buf_read_while", issue = "none")]
fn read_while<P>(&mut self, buf: &mut Vec<u8>, predicate: P) -> Result<usize>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth updating read_until to delegate to this method.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read_until uses memchr to scan for the token; it seems likely this operation is vectorized in many cases, and I'm afraid moving to read_while's linear scan could inadvertently cause a slowdown.

where
Self: Sized,
P: FnMut(u8) -> bool,
{
read_while(self, buf, predicate)
}

/// Read all bytes until a newline (the 0xA byte) is reached, and append
/// them to the provided buffer.
///
Expand Down Expand Up @@ -2448,6 +2522,27 @@ mod tests {
assert_eq!(v, []);
}

#[test]
fn read_while() {
let mut s = Cursor::new("aaaaa");
let mut buf = Vec::new();
assert_eq!(s.read_while(&mut buf, |b| b == b'a').unwrap(), 5);
assert_eq!(&buf[..], &b"aaaaa"[..]);
assert_eq!(s.fill_buf().unwrap().len(), 0);

let mut s = Cursor::new("ab");
let mut buf = Vec::new();
assert_eq!(s.read_while(&mut buf, |b| b == b'a').unwrap(), 1);
assert_eq!(&buf[..], &b"a"[..]);
assert_eq!(s.fill_buf().unwrap().len(), 1);

let mut s = Cursor::new("ab");
let mut buf = Vec::new();
assert_eq!(s.read_while(&mut buf, |b| b == b'b').unwrap(), 0);
assert_eq!(&buf[..], &b""[..]);
assert_eq!(s.fill_buf().unwrap().len(), 2);
}

#[test]
fn split() {
let buf = Cursor::new(&b"12"[..]);
Expand Down