Skip to content

Commit 2421da1

Browse files
committed
feat: Add separated_list_m_n
This is useful to naïvely represent some grammar productions (e.g. `IPv6address` from [RFC 3986]): ```abnf IPv6address /= [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 ``` turns to ```diff tuple(( - opt(tuple((many_m_n(0, 2, tuple((h16, char(':')))), h16))), + separated_list_m_n(0, 3, char(':'), h16), tag("::"), many_m_n(2, 2, tuple((h16, char(':')))), ls32, )) ``` [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986#appendix-A
1 parent e87c7da commit 2421da1

File tree

3 files changed

+225
-1
lines changed

3 files changed

+225
-1
lines changed

doc/choosing_a_combinator.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
5050
| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec|
5151
| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second|
5252
| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element|
53+
| [separated_list_m_n](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list_m_n(2, 3, tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` | Alternately applies the item parser and the separator parser and returns the list of items in a Vec if the number is between m and n (inclusive).|
5354
| [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time|
5455
| [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value|
5556
| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times|

src/multi/mod.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,148 @@ where
562562
}
563563
}
564564

565+
/// Alternates between two parsers to produce a list of at most `max` elements until [`Err::Error`].
566+
///
567+
/// Fails if the element parser does not produce at least `min` elements.
568+
///
569+
/// This stops when either parser returns [`Err::Error`] or the number of elements reaches `max`,
570+
/// and returns the results that were accumulated. To instead chain an error up, see
571+
/// [`cut`][crate::combinator::cut].
572+
///
573+
/// # Arguments
574+
/// * `min` The minimum number of elements.
575+
/// * `max` The maximum number of elements.
576+
/// * `separator` Parses the separator between list elements.
577+
/// * `parser` Parses the elements of the list.
578+
///
579+
/// ```rust
580+
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser};
581+
/// use nom::multi::separated_list_m_n;
582+
/// use nom::bytes::complete::tag;
583+
///
584+
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
585+
/// separated_list_m_n(2, 3, tag("|"), tag("abc")).parse(s)
586+
/// }
587+
///
588+
/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"])));
589+
/// assert_eq!(parser("abc|abc|def"), Ok(("|def", vec!["abc", "abc"])));
590+
/// assert_eq!(parser("abc1abc"), Err(Err::Error(Error::new("1abc", ErrorKind::SeparatedList))));
591+
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
592+
/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag))));
593+
/// ```
594+
#[cfg(feature = "alloc")]
595+
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
596+
pub fn separated_list_m_n<I, E, F, G>(
597+
min: usize,
598+
max: usize,
599+
separator: G,
600+
parser: F,
601+
) -> impl Parser<I, Output = Vec<<F as Parser<I>>::Output>, Error = E>
602+
where
603+
I: Clone + InputLength,
604+
F: Parser<I, Error = E>,
605+
G: Parser<I, Error = E>,
606+
E: ParseError<I>,
607+
{
608+
SeparatedListMN {
609+
parser,
610+
separator,
611+
min,
612+
max,
613+
}
614+
}
615+
616+
#[cfg(feature = "alloc")]
617+
/// Parser implementation for the [separated_list_m_n] combinator
618+
pub struct SeparatedListMN<F, G> {
619+
parser: F,
620+
separator: G,
621+
min: usize,
622+
max: usize,
623+
}
624+
625+
#[cfg(feature = "alloc")]
626+
impl<I, E: ParseError<I>, F, G> Parser<I> for SeparatedListMN<F, G>
627+
where
628+
I: Clone + InputLength,
629+
F: Parser<I, Error = E>,
630+
G: Parser<I, Error = E>,
631+
{
632+
type Output = Vec<<F as Parser<I>>::Output>;
633+
type Error = <F as Parser<I>>::Error;
634+
635+
fn process<OM: OutputMode>(
636+
&mut self,
637+
mut i: I,
638+
) -> crate::PResult<OM, I, Self::Output, Self::Error> {
639+
let mut res = OM::Output::bind(crate::lib::std::vec::Vec::new);
640+
let mut res_len = 0usize;
641+
642+
match self.parser.process::<OM>(i.clone()) {
643+
Err(e) => {
644+
if (self.min..=self.max).contains(&res_len) {
645+
return Ok((i, res));
646+
} else {
647+
return Err(e);
648+
}
649+
}
650+
Ok((i1, o)) => {
651+
res = OM::Output::combine(res, o, |mut res, o| {
652+
res.push(o);
653+
res_len += 1;
654+
res
655+
});
656+
i = i1;
657+
}
658+
}
659+
660+
loop {
661+
let len = i.input_len();
662+
match self.separator.process::<OM>(i.clone()) {
663+
Err(Err::Error(_)) => {
664+
if (self.min..=self.max).contains(&res_len) {
665+
return Ok((i, res));
666+
} else {
667+
return Err(Err::Error(OM::Error::bind(|| {
668+
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
669+
})));
670+
}
671+
}
672+
Err(e) => return Err(e),
673+
Ok((i1, _)) => {
674+
// infinite loop check: the parser must always consume
675+
if i1.input_len() == len {
676+
return Err(Err::Error(OM::Error::bind(|| {
677+
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
678+
})));
679+
}
680+
681+
match self.parser.process::<OM>(i1.clone()) {
682+
Err(Err::Error(_)) => {
683+
if (self.min..=self.max).contains(&res_len) {
684+
return Ok((i, res));
685+
} else {
686+
return Err(Err::Error(OM::Error::bind(|| {
687+
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
688+
})));
689+
}
690+
}
691+
Err(e) => return Err(e),
692+
Ok((i2, o)) => {
693+
res = OM::Output::combine(res, o, |mut res, o| {
694+
res.push(o);
695+
res_len += 1;
696+
res
697+
});
698+
i = i2;
699+
}
700+
}
701+
}
702+
}
703+
}
704+
}
705+
}
706+
565707
/// Repeats the embedded parser `m..=n` times
566708
///
567709
/// This stops before `n` when the parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see

src/multi/tests.rs

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::{
1515
lib::std::vec::Vec,
1616
multi::{
1717
count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n,
18-
many_till, separated_list0, separated_list1,
18+
many_till, separated_list0, separated_list1, separated_list_m_n,
1919
},
2020
};
2121

@@ -103,6 +103,87 @@ fn separated_list1_test() {
103103
assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1))));
104104
}
105105

106+
#[test]
107+
#[cfg(feature = "alloc")]
108+
fn separated_list_m_n_test() {
109+
fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
110+
separated_list_m_n(2, 4, tag(","), tag("abcd")).parse(i)
111+
}
112+
fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
113+
separated_list_m_n(2, 4, tag(","), tag("")).parse(i)
114+
}
115+
fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
116+
separated_list_m_n(2, 4, tag(""), tag("abc")).parse(i)
117+
}
118+
fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
119+
separated_list_m_n(2, 4, tag(".."), tag("abcd")).parse(i)
120+
}
121+
fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
122+
separated_list_m_n(0, 3, tag(","), tag("abc")).parse(i)
123+
}
124+
125+
let no_items = &b"azerty"[..];
126+
let less_items = &b"abcdef"[..];
127+
let lower_bound = &b"abcd,abcdef"[..];
128+
let empty_items = &b",,abc"[..];
129+
let trailing_sep = &b"abcd,abcd,ef"[..];
130+
let incomplete_less_items = &b"abc"[..];
131+
let incomplete_sep = &b"abcd."[..];
132+
let incomplete_item = &b"abcd,abc"[..];
133+
let not_separated = &b"abcabc"[..];
134+
135+
let no_items_err_pos = &no_items[0..];
136+
assert_eq!(
137+
multi(no_items),
138+
Err(Err::Error(error_position!(
139+
no_items_err_pos,
140+
ErrorKind::Tag
141+
)))
142+
);
143+
144+
let less_items_err_pos = &less_items[4..];
145+
assert_eq!(
146+
multi(less_items),
147+
Err(Err::Error(error_position!(
148+
less_items_err_pos,
149+
ErrorKind::SeparatedList
150+
)))
151+
);
152+
153+
let lower_bound_res = vec![&b"abcd"[..], &b"abcd"[..]];
154+
assert_eq!(multi(lower_bound), Ok((&b"ef"[..], lower_bound_res)));
155+
156+
let empty_items_res = vec![&b""[..], &b""[..], &b""[..]];
157+
assert_eq!(multi_empty(empty_items), Ok((&b"abc"[..], empty_items_res)));
158+
159+
let not_separated_err_pos = &not_separated[3..];
160+
assert_eq!(
161+
empty_sep(not_separated),
162+
Err(Err::Error(error_position!(
163+
not_separated_err_pos,
164+
ErrorKind::SeparatedList
165+
)))
166+
);
167+
168+
let trailing_sep_res = vec![&b"abcd"[..], &b"abcd"[..]];
169+
assert_eq!(multi(trailing_sep), Ok((&b",ef"[..], trailing_sep_res)));
170+
171+
assert_eq!(
172+
multi(incomplete_less_items),
173+
Err(Err::Incomplete(Needed::new(1)))
174+
);
175+
176+
assert_eq!(
177+
multi_longsep(incomplete_sep),
178+
Err(Err::Incomplete(Needed::new(1)))
179+
);
180+
181+
assert_eq!(multi(incomplete_item), Err(Err::Incomplete(Needed::new(1))));
182+
183+
let no_items0_res = vec![];
184+
assert_eq!(multi0(no_items), Ok((&no_items[0..], no_items0_res)));
185+
}
186+
106187
#[test]
107188
#[cfg(feature = "alloc")]
108189
fn many0_test() {

0 commit comments

Comments
 (0)