Skip to content

Commit 4c89c3c

Browse files
committed
feat(Parser): Handled trailing comma
1 parent d8dadd4 commit 4c89c3c

File tree

5 files changed

+112
-64
lines changed

5 files changed

+112
-64
lines changed

README.md

-4
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,3 @@ Parser ./benches/data/canada.json
7777
time: [42.026 ms 42.188 ms 42.341 ms]
7878
thrpt: [50.702 MiB/s 50.886 MiB/s 51.082 MiB/s]
7979
```
80-
81-
# Todo
82-
83-
- [ ] Handle trailing comma

src/error.rs

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub enum Kind {
2121
NotAnArray,
2222
NotANumber,
2323
InvalidValue(String),
24+
TrailingComma,
2425
NomError(nom::error::ErrorKind),
2526
// Used when an error will be remaped
2627
ToBeDefined,

src/input.rs

+12-40
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ impl<'a, R: FromStr> ParseTo<R> for Input<'a> {
210210
impl<'a> Slice<Range<usize>> for Input<'a> {
211211
fn slice(&self, range: Range<usize>) -> Self {
212212
let next_data = self.data.slice(range);
213-
println!("Range: Next data: {:?}", next_data);
213+
214214
Self {
215215
data: next_data,
216216
line: 0,
@@ -219,10 +219,8 @@ impl<'a> Slice<Range<usize>> for Input<'a> {
219219
}
220220
}
221221

222-
impl<'a> Slice<RangeTo<usize>> for Input<'a> {
223-
fn slice(&self, range: RangeTo<usize>) -> Self {
224-
let next_data = self.data.slice(range);
225-
222+
impl<'a> Input<'a> {
223+
fn slice_common(&self, next_data: &'a str) -> Self {
226224
let offset = self.data.offset(next_data);
227225

228226
let old_data = self.data.slice(..offset);
@@ -259,44 +257,18 @@ impl<'a> Slice<RangeTo<usize>> for Input<'a> {
259257
}
260258
}
261259
}
260+
261+
impl<'a> Slice<RangeTo<usize>> for Input<'a> {
262+
fn slice(&self, range: RangeTo<usize>) -> Self {
263+
let next_data = self.data.slice(range);
264+
265+
self.slice_common(next_data)
266+
}
267+
}
262268
impl<'a> Slice<RangeFrom<usize>> for Input<'a> {
263269
fn slice(&self, range: RangeFrom<usize>) -> Self {
264270
let next_data = self.data.slice(range);
265271

266-
let offset = self.data.offset(next_data);
267-
268-
let old_data = self.data.slice(..offset);
269-
270-
if offset == 0 {
271-
return Self {
272-
data: next_data,
273-
line: self.line,
274-
col: self.col,
275-
};
276-
}
277-
278-
let new_line_iter = Memchr::new(b'\n', old_data.as_bytes());
279-
280-
let mut lines_to_add = 0;
281-
let mut last_index = None;
282-
283-
for i in new_line_iter {
284-
lines_to_add += 1;
285-
last_index = Some(i);
286-
}
287-
let last_index = last_index.map(|v| v + 1).unwrap_or(0);
288-
289-
let col = num_chars(old_data.as_bytes().slice(last_index..));
290-
291-
Self {
292-
data: next_data,
293-
line: self.line + lines_to_add,
294-
col: if lines_to_add == 0 {
295-
self.col + col
296-
} else {
297-
// When going to a new line, char starts at 1
298-
col + 1
299-
},
300-
}
272+
self.slice_common(next_data)
301273
}
302274
}

src/parser.rs

+59-20
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::input::Input;
33
use crate::value::{Number, Position, SpannedValue, Value};
44
use nom::bytes::complete::take_till;
55
use nom::character::complete::digit0;
6-
use nom::combinator::eof;
6+
use nom::combinator::{eof, opt};
77
use nom::error::ParseError;
88
use nom::multi::many_till;
99
use nom::{
@@ -39,7 +39,6 @@ where
3939
I: Clone,
4040
{
4141
move |i: I| {
42-
let i = i.clone();
4342
let result = parser.parse(i.clone());
4443

4544
match result {
@@ -49,6 +48,17 @@ where
4948
}
5049
}
5150

51+
pub fn map_parser<I, O1, O2, E: ParseError<I>, F, G>(
52+
mut parser: F,
53+
mut applied_parser: G,
54+
) -> impl FnMut(I) -> IResult<I, O2, E>
55+
where
56+
F: Parser<I, O1, E>,
57+
G: FnMut((I, O1)) -> IResult<I, O2, E>,
58+
{
59+
move |input: I| applied_parser(parser.parse(input)?)
60+
}
61+
5262
fn parse_true(i: Span) -> Result<bool> {
5363
value(true, tag("rue"))(i).or_else(|_: Err<Error>| {
5464
let start = Position::from_ahead(i);
@@ -278,7 +288,16 @@ fn array(i: Span) -> Result<Vec<SpannedValue>> {
278288
}
279289
}),
280290
),
281-
json_value,
291+
or_else(json_value, |e: Err<Error>, i| {
292+
// If it succeeds, it means that it's a trailing comma
293+
let _ = preceded(multispace0, char(']'))(i).map_err(|_: Err<Error>| e)?;
294+
295+
Err(Err::Failure(Error::new(
296+
Position::from_ahead(i),
297+
Position::from_ahead(i),
298+
Kind::TrailingComma,
299+
)))
300+
}),
282301
),
283302
preceded(
284303
multispace0,
@@ -298,23 +317,27 @@ fn array(i: Span) -> Result<Vec<SpannedValue>> {
298317
}
299318

300319
fn key_value(i: Span<'_>) -> Result<(String, SpannedValue)> {
320+
let (i, comma) = opt(char(','))(i)?;
321+
301322
let pos_before_space = Position::from(i);
302323

303324
let (i, _) = multispace0(i)?;
304325

305-
if i.starts_with('}') || i.is_empty() {
326+
if (i.starts_with('}') || i.is_empty()) && comma.is_none() {
306327
// Key value is called in a loop, and only an error can stop it
307328
return Err(Err::Error(Error::default()));
308329
}
309330

310331
let (i, key) = preceded(char('"'), string)(i).or_else(|e| match e {
311332
Err::Error(mut e) => {
312333
let (i, key) = take_until_delimiter(i, true)?;
334+
335+
let end = Position::from_ahead(i);
336+
313337
if key.is_empty() {
314-
e.start = pos_before_space;
338+
e.start = pos_before_space.clone();
315339
}
316340
e.kind = Kind::InvalidKey(key);
317-
let end = Position::from_ahead(i);
318341
e.end = end;
319342

320343
Err(Err::Failure(e))
@@ -341,25 +364,41 @@ fn key_value(i: Span<'_>) -> Result<(String, SpannedValue)> {
341364
fn hash(i: Span<'_>) -> Result<HashMap<String, SpannedValue>> {
342365
let start = Position::from_ahead(i);
343366

344-
let result = terminated(
367+
let result: Result<HashMap<String, SpannedValue>> = terminated(
345368
map(
346369
separated_list0(
347370
preceded(
348371
multispace0,
349-
or_else(char(','), |e: Err<Error>, i| {
350-
let (i, _) = multispace0(i)?;
351-
352-
match e {
353-
Err::Error(mut e) if !i.is_empty() && !i.starts_with('}') => {
354-
e.kind = Kind::MissingComma;
355-
e.start = start.clone();
356-
e.end.col -= 1;
357-
358-
Err(Err::Failure(e))
372+
or_else(
373+
map_parser(char(','), |(i, _): (Span, char)| {
374+
let (j, _) = multispace0(i)?;
375+
376+
if j.starts_with('}') {
377+
let position = Position::from_ahead(i);
378+
Err(Err::Failure(Error::new(
379+
position.clone(),
380+
position,
381+
Kind::TrailingComma,
382+
)))
383+
} else {
384+
Ok((i, ','))
359385
}
360-
e => Err(e),
361-
}
362-
}),
386+
}),
387+
|e: Err<Error>, i| {
388+
let (i, _) = multispace0(i)?;
389+
390+
match e {
391+
Err::Error(mut e) if !i.is_empty() && !i.starts_with('}') => {
392+
e.kind = Kind::MissingComma;
393+
e.start = start.clone();
394+
e.end.col -= 1;
395+
396+
Err(Err::Failure(e))
397+
}
398+
e => Err(e),
399+
}
400+
},
401+
),
363402
),
364403
key_value,
365404
),

tests/parser.rs

+40
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,26 @@ mod error {
312312
Ok(_) => panic!("Not supposed to happen"),
313313
}
314314
}
315+
316+
#[test]
317+
fn trailing_comma() {
318+
let json = r#"{"hello": "world", }"#;
319+
320+
let parsed = parse(json);
321+
322+
assert!(parsed.is_err());
323+
324+
match parsed {
325+
Err(e) => {
326+
assert_eq!(e.start.line, 1);
327+
assert_eq!(e.start.col, 18);
328+
assert_eq!(e.end.line, 1);
329+
assert_eq!(e.end.col, 18);
330+
assert_eq!(e.kind, Kind::TrailingComma)
331+
}
332+
Ok(_) => panic!("Not supposed to happen"),
333+
}
334+
}
315335
}
316336

317337
mod string {
@@ -497,6 +517,26 @@ mod array {
497517
Ok(_) => panic!("Not supposed to happen"),
498518
}
499519
}
520+
521+
#[test]
522+
fn trailing_comma() {
523+
let json = r#"["hello", "world", ]"#;
524+
525+
let parsed = parse(json);
526+
527+
assert!(parsed.is_err());
528+
529+
match parsed {
530+
Err(e) => {
531+
assert_eq!(e.start.line, 1);
532+
assert_eq!(e.start.col, 18);
533+
assert_eq!(e.end.line, 1);
534+
assert_eq!(e.end.col, 18);
535+
assert_eq!(e.kind, Kind::TrailingComma)
536+
}
537+
Ok(_) => panic!("Not supposed to happen"),
538+
}
539+
}
500540
}
501541

502542
mod object {

0 commit comments

Comments
 (0)