feat: Add separated_list_m_n

jtojnar · jtojnar · commit 2421da1aecbf · 2024-01-23T11:32:50.000+01:00
This is useful to naïvely represent some grammar productions (e.g. `IPv6address` from [RFC 3986]): ```abnf IPv6address /= [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 ``` turns to ```diff tuple(( - opt(tuple((many_m_n(0, 2, tuple((h16, char(':')))), h16))), + separated_list_m_n(0, 3, char(':'), h16), tag("::"), many_m_n(2, 2, tuple((h16, char(':')))), ls32, )) ``` [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986#appendix-A
diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md
@@ -50,6 +50,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
 | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec|
 | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second|
 | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element|
+| [separated_list_m_n](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list_m_n(2, 3, tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` | Alternately applies the item parser and the separator parser and returns the list of items in a Vec if the number is between m and n (inclusive).|
 | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time|
 | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value|
 | [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times|
diff --git a/src/multi/mod.rs b/src/multi/mod.rs
@@ -562,6 +562,148 @@ where
   }
 }
 
+/// Alternates between two parsers to produce a list of at most `max` elements until [`Err::Error`].
+///
+/// Fails if the element parser does not produce at least `min` elements.
+///
+/// This stops when either parser returns [`Err::Error`] or the number of elements reaches `max`,
+/// and returns the results that were accumulated. To instead chain an error up, see
+/// [`cut`][crate::combinator::cut].
+///
+/// # Arguments
+/// * `min` The minimum number of elements.
+/// * `max` The maximum number of elements.
+/// * `separator` Parses the separator between list elements.
+/// * `parser` Parses the elements of the list.
+///
+/// ```rust
+/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser};
+/// use nom::multi::separated_list_m_n;
+/// use nom::bytes::complete::tag;
+///
+/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
+///   separated_list_m_n(2, 3, tag("|"), tag("abc")).parse(s)
+/// }
+///
+/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"])));
+/// assert_eq!(parser("abc|abc|def"), Ok(("|def", vec!["abc", "abc"])));
+/// assert_eq!(parser("abc1abc"), Err(Err::Error(Error::new("1abc", ErrorKind::SeparatedList))));
+/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
+/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag))));
+/// ```
+#[cfg(feature = "alloc")]
+#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
+pub fn separated_list_m_n<I, E, F, G>(
+  min: usize,
+  max: usize,
+  separator: G,
+  parser: F,
+) -> impl Parser<I, Output = Vec<<F as Parser<I>>::Output>, Error = E>
+where
+  I: Clone + InputLength,
+  F: Parser<I, Error = E>,
+  G: Parser<I, Error = E>,
+  E: ParseError<I>,
+{
+  SeparatedListMN {
+    parser,
+    separator,
+    min,
+    max,
+  }
+}
+
+#[cfg(feature = "alloc")]
+/// Parser implementation for the [separated_list_m_n] combinator
+pub struct SeparatedListMN<F, G> {
+  parser: F,
+  separator: G,
+  min: usize,
+  max: usize,
+}
+
+#[cfg(feature = "alloc")]
+impl<I, E: ParseError<I>, F, G> Parser<I> for SeparatedListMN<F, G>
+where
+  I: Clone + InputLength,
+  F: Parser<I, Error = E>,
+  G: Parser<I, Error = E>,
+{
+  type Output = Vec<<F as Parser<I>>::Output>;
+  type Error = <F as Parser<I>>::Error;
+
+  fn process<OM: OutputMode>(
+    &mut self,
+    mut i: I,
+  ) -> crate::PResult<OM, I, Self::Output, Self::Error> {
+    let mut res = OM::Output::bind(crate::lib::std::vec::Vec::new);
+    let mut res_len = 0usize;
+
+    match self.parser.process::<OM>(i.clone()) {
+      Err(e) => {
+        if (self.min..=self.max).contains(&res_len) {
+          return Ok((i, res));
+        } else {
+          return Err(e);
+        }
+      }
+      Ok((i1, o)) => {
+        res = OM::Output::combine(res, o, |mut res, o| {
+          res.push(o);
+          res_len += 1;
+          res
+        });
+        i = i1;
+      }
+    }
+
+    loop {
+      let len = i.input_len();
+      match self.separator.process::<OM>(i.clone()) {
+        Err(Err::Error(_)) => {
+          if (self.min..=self.max).contains(&res_len) {
+            return Ok((i, res));
+          } else {
+            return Err(Err::Error(OM::Error::bind(|| {
+              <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
+            })));
+          }
+        }
+        Err(e) => return Err(e),
+        Ok((i1, _)) => {
+          // infinite loop check: the parser must always consume
+          if i1.input_len() == len {
+            return Err(Err::Error(OM::Error::bind(|| {
+              <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
+            })));
+          }
+
+          match self.parser.process::<OM>(i1.clone()) {
+            Err(Err::Error(_)) => {
+              if (self.min..=self.max).contains(&res_len) {
+                return Ok((i, res));
+              } else {
+                return Err(Err::Error(OM::Error::bind(|| {
+                  <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
+                })));
+              }
+            }
+            Err(e) => return Err(e),
+            Ok((i2, o)) => {
+              res = OM::Output::combine(res, o, |mut res, o| {
+                res.push(o);
+                res_len += 1;
+                res
+              });
+              i = i2;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
 /// Repeats the embedded parser `m..=n` times
 ///
 /// This stops before `n` when the parser returns [`Err::Error`]  and returns the results that were accumulated. To instead chain an error up, see
diff --git a/src/multi/tests.rs b/src/multi/tests.rs
@@ -15,7 +15,7 @@ use crate::{
   lib::std::vec::Vec,
   multi::{
     count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n,
-    many_till, separated_list0, separated_list1,
+    many_till, separated_list0, separated_list1, separated_list_m_n,
   },
 };
 
@@ -103,6 +103,87 @@ fn separated_list1_test() {
   assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1))));
 }
 
+#[test]
+#[cfg(feature = "alloc")]
+fn separated_list_m_n_test() {
+  fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    separated_list_m_n(2, 4, tag(","), tag("abcd")).parse(i)
+  }
+  fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    separated_list_m_n(2, 4, tag(","), tag("")).parse(i)
+  }
+  fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    separated_list_m_n(2, 4, tag(""), tag("abc")).parse(i)
+  }
+  fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    separated_list_m_n(2, 4, tag(".."), tag("abcd")).parse(i)
+  }
+  fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    separated_list_m_n(0, 3, tag(","), tag("abc")).parse(i)
+  }
+
+  let no_items = &b"azerty"[..];
+  let less_items = &b"abcdef"[..];
+  let lower_bound = &b"abcd,abcdef"[..];
+  let empty_items = &b",,abc"[..];
+  let trailing_sep = &b"abcd,abcd,ef"[..];
+  let incomplete_less_items = &b"abc"[..];
+  let incomplete_sep = &b"abcd."[..];
+  let incomplete_item = &b"abcd,abc"[..];
+  let not_separated = &b"abcabc"[..];
+
+  let no_items_err_pos = &no_items[0..];
+  assert_eq!(
+    multi(no_items),
+    Err(Err::Error(error_position!(
+      no_items_err_pos,
+      ErrorKind::Tag
+    )))
+  );
+
+  let less_items_err_pos = &less_items[4..];
+  assert_eq!(
+    multi(less_items),
+    Err(Err::Error(error_position!(
+      less_items_err_pos,
+      ErrorKind::SeparatedList
+    )))
+  );
+
+  let lower_bound_res = vec![&b"abcd"[..], &b"abcd"[..]];
+  assert_eq!(multi(lower_bound), Ok((&b"ef"[..], lower_bound_res)));
+
+  let empty_items_res = vec![&b""[..], &b""[..], &b""[..]];
+  assert_eq!(multi_empty(empty_items), Ok((&b"abc"[..], empty_items_res)));
+
+  let not_separated_err_pos = &not_separated[3..];
+  assert_eq!(
+    empty_sep(not_separated),
+    Err(Err::Error(error_position!(
+      not_separated_err_pos,
+      ErrorKind::SeparatedList
+    )))
+  );
+
+  let trailing_sep_res = vec![&b"abcd"[..], &b"abcd"[..]];
+  assert_eq!(multi(trailing_sep), Ok((&b",ef"[..], trailing_sep_res)));
+
+  assert_eq!(
+    multi(incomplete_less_items),
+    Err(Err::Incomplete(Needed::new(1)))
+  );
+
+  assert_eq!(
+    multi_longsep(incomplete_sep),
+    Err(Err::Incomplete(Needed::new(1)))
+  );
+
+  assert_eq!(multi(incomplete_item), Err(Err::Incomplete(Needed::new(1))));
+
+  let no_items0_res = vec![];
+  assert_eq!(multi0(no_items), Ok((&no_items[0..], no_items0_res)));
+}
+
 #[test]
 #[cfg(feature = "alloc")]
 fn many0_test() {