Skip to content

Commit ad047bf

Browse files
authored
Next skip (#84)
1 parent 0ab4dd4 commit ad047bf

File tree

12 files changed

+456
-55
lines changed

12 files changed

+456
-55
lines changed

.github/workflows/ci.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,23 @@ jobs:
144144

145145
- run: cargo fuzz run --fuzz-dir crates/fuzz compare_to_serde --release -- -max_total_time=300s
146146

147+
fuzz-skip:
148+
name: fuzz skip
149+
# we only run this on ubuntu since architecture should make no difference
150+
151+
runs-on: ubuntu-latest
152+
153+
steps:
154+
- uses: actions/checkout@v3
155+
156+
- uses: moonrepo/setup-rust@v1
157+
with:
158+
channel: nightly
159+
cache-target: release
160+
bins: cargo-fuzz
161+
162+
- run: cargo fuzz run --fuzz-dir crates/fuzz compare_skip --release -- -max_total_time=300s
163+
147164
lint:
148165
runs-on: ubuntu-latest
149166
steps:
@@ -166,7 +183,7 @@ jobs:
166183
# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
167184
check:
168185
if: always()
169-
needs: [test-linux, test-macos, bench, fuzz, lint]
186+
needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint]
170187
runs-on: ubuntu-latest
171188
steps:
172189
- name: Decide whether the needed jobs succeeded or failed

crates/fuzz/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,9 @@ name = "compare_to_serde"
2222
path = "fuzz_targets/compare_to_serde.rs"
2323
test = false
2424
doc = false
25+
26+
[[bin]]
27+
name = "compare_skip"
28+
path = "fuzz_targets/compare_skip.rs"
29+
test = false
30+
doc = false
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#![no_main]
2+
3+
use jiter::{Jiter, JiterError, JiterErrorType, JsonError, JsonValue};
4+
5+
use libfuzzer_sys::fuzz_target;
6+
fn errors_equal(value_error: &JsonError, jiter_error: &JiterError) {
7+
let jiter_error_type = match &jiter_error.error_type {
8+
JiterErrorType::JsonError(json_error_type) => json_error_type,
9+
JiterErrorType::WrongType { .. } => panic!("Expected JsonError, found WrongType"),
10+
};
11+
assert_eq!(&value_error.error_type, jiter_error_type);
12+
assert_eq!(value_error.index, jiter_error.index);
13+
}
14+
15+
fuzz_target!(|json: String| {
16+
let json_data = json.as_bytes();
17+
match JsonValue::parse(json_data, false) {
18+
Ok(_) => {
19+
let mut jiter = Jiter::new(json_data, false);
20+
jiter.next_skip().unwrap();
21+
jiter.finish().unwrap();
22+
}
23+
Err(json_error) => {
24+
let mut jiter = Jiter::new(json_data, false);
25+
let jiter_error = match jiter.next_skip() {
26+
Ok(_) => jiter.finish().unwrap_err(),
27+
Err(e) => e,
28+
};
29+
errors_equal(&json_error, &jiter_error);
30+
}
31+
};
32+
});

crates/jiter/benches/main.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ fn jiter_value(path: &str, bench: &mut Bencher) {
2323
})
2424
}
2525

26+
fn jiter_skip(path: &str, bench: &mut Bencher) {
27+
let json = read_file(path);
28+
let json_data = black_box(json.as_bytes());
29+
bench.iter(|| {
30+
let mut jiter = Jiter::new(json_data, false);
31+
jiter.next_skip().unwrap();
32+
})
33+
}
34+
2635
fn jiter_iter_big(path: &str, bench: &mut Bencher) {
2736
let json = read_file(path);
2837
let json_data = black_box(json.as_bytes());
@@ -211,6 +220,10 @@ macro_rules! test_cases {
211220
jiter_string(&file_path, bench);
212221
}
213222
}
223+
fn [< $file_name _jiter_skip >](bench: &mut Bencher) {
224+
let file_path = format!("./benches/{}.json", stringify!($file_name));
225+
jiter_skip(&file_path, bench);
226+
}
214227

215228
fn [< $file_name _serde_value >](bench: &mut Bencher) {
216229
let file_path = format!("./benches/{}.json", stringify!($file_name));
@@ -293,51 +306,65 @@ fn lazy_map_lookup_3_50(bench: &mut Bencher) {
293306
benchmark_group!(
294307
benches,
295308
big_jiter_iter,
309+
big_jiter_skip,
296310
big_jiter_value,
297311
big_serde_value,
298312
bigints_array_jiter_iter,
313+
bigints_array_jiter_skip,
299314
bigints_array_jiter_value,
300315
bigints_array_serde_value,
301316
floats_array_jiter_iter,
317+
floats_array_jiter_skip,
302318
floats_array_jiter_value,
303319
floats_array_serde_value,
304320
massive_ints_array_jiter_iter,
321+
massive_ints_array_jiter_skip,
305322
massive_ints_array_jiter_value,
306323
massive_ints_array_serde_value,
307324
medium_response_jiter_iter,
325+
medium_response_jiter_skip,
308326
medium_response_jiter_value,
309327
medium_response_jiter_value_owned,
310328
medium_response_serde_value,
311329
x100_jiter_iter,
330+
x100_jiter_skip,
312331
x100_jiter_value,
313332
x100_serde_iter,
314333
x100_serde_value,
315334
sentence_jiter_iter,
335+
sentence_jiter_skip,
316336
sentence_jiter_value,
317337
sentence_serde_value,
318338
unicode_jiter_iter,
339+
unicode_jiter_skip,
319340
unicode_jiter_value,
320341
unicode_serde_value,
321342
pass1_jiter_iter,
343+
pass1_jiter_skip,
322344
pass1_jiter_value,
323345
pass1_serde_value,
324346
pass2_jiter_iter,
347+
pass2_jiter_skip,
325348
pass2_jiter_value,
326349
pass2_serde_value,
327350
string_array_jiter_iter,
351+
string_array_jiter_skip,
328352
string_array_jiter_value,
329353
string_array_jiter_value_owned,
330354
string_array_serde_value,
331355
true_array_jiter_iter,
356+
true_array_jiter_skip,
332357
true_array_jiter_value,
333358
true_array_serde_value,
334359
true_object_jiter_iter,
360+
true_object_jiter_skip,
335361
true_object_jiter_value,
336362
true_object_serde_value,
337363
lazy_map_lookup_1_10,
338364
lazy_map_lookup_2_20,
339365
lazy_map_lookup_3_50,
340366
short_numbers_jiter_iter,
367+
short_numbers_jiter_skip,
341368
short_numbers_jiter_value,
342369
short_numbers_serde_value,
343370
);

crates/jiter/src/errors.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ use std::fmt;
66
/// those expected from `serde_json`.
77
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
88
pub enum JsonErrorType {
9-
/// string escape sequences are not supported in this method, usize here is the position within the string
10-
/// that is invalid
11-
StringEscapeNotSupported,
12-
139
/// float value was found where an int was expected
1410
FloatExpectingInt,
1511

@@ -82,7 +78,6 @@ impl std::fmt::Display for JsonErrorType {
8278
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
8379
// Messages for enum members copied from serde_json are unchanged
8480
match self {
85-
Self::StringEscapeNotSupported => f.write_str("string escape sequences are not supported"),
8681
Self::FloatExpectingInt => f.write_str("float value was found where an int was expected"),
8782
Self::EofWhileParsingList => f.write_str("EOF while parsing a list"),
8883
Self::EofWhileParsingObject => f.write_str("EOF while parsing an object"),

crates/jiter/src/jiter.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECU
22
use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange};
33
use crate::parse::{Parser, Peek};
44
use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape};
5-
use crate::value::{take_value_borrowed, take_value_owned, JsonValue};
5+
use crate::value::{take_value_borrowed, take_value_owned, take_value_skip, JsonValue};
66
use crate::{JsonError, JsonErrorType};
77

88
pub type JiterResult<T> = Result<T, JiterError>;
@@ -48,6 +48,16 @@ impl<'j> Jiter<'j> {
4848
self.parser.current_position()
4949
}
5050

51+
/// Get the current index of the parser.
52+
pub fn current_index(&self) -> usize {
53+
self.parser.index
54+
}
55+
56+
/// Get a slice of the underlying JSON data from `start` to `current_index`.
57+
pub fn slice_to_current(&self, start: usize) -> &'j [u8] {
58+
&self.data[start..self.current_index()]
59+
}
60+
5161
/// Convert an error index to a [LinePosition].
5262
///
5363
/// # Arguments
@@ -218,6 +228,31 @@ impl<'j> Jiter<'j> {
218228
.map_err(Into::into)
219229
}
220230

231+
/// Parse the next JSON value, but don't return it.
232+
/// This should be faster than returning the value, useful when you don't care about this value.
233+
/// Error if it is invalid JSON.
234+
///
235+
/// *WARNING:* For performance reasons, this method does not check that strings would be valid UTF-8.
236+
pub fn next_skip(&mut self) -> JiterResult<()> {
237+
let peek = self.peek()?;
238+
self.known_skip(peek)
239+
}
240+
241+
/// Parse the next JSON value, but don't return it. Error if it is invalid JSON.
242+
///
243+
/// # Arguments
244+
/// - `peek`: The [Peek] of the next JSON value.
245+
pub fn known_skip(&mut self, peek: Peek) -> JiterResult<()> {
246+
take_value_skip(
247+
peek,
248+
&mut self.parser,
249+
&mut self.tape,
250+
DEFAULT_RECURSION_LIMIT,
251+
self.allow_inf_nan,
252+
)
253+
.map_err(Into::into)
254+
}
255+
221256
/// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON.
222257
pub fn next_value_owned(&mut self) -> JiterResult<JsonValue<'static>> {
223258
let peek = self.peek()?;

crates/jiter/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod simd_aarch64;
1414
mod string_decoder;
1515
mod value;
1616

17-
pub use errors::{JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
17+
pub use errors::{JiterError, JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
1818
pub use jiter::{Jiter, JiterResult};
1919
pub use lazy_index_map::LazyIndexMap;
2020
pub use number_decoder::{NumberAny, NumberInt};

crates/jiter/src/number_decoder.rs

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,8 @@ impl AbstractNumberDecoder for NumberRange {
384384
let end = consume_exponential(data, index)?;
385385
Ok((start..end, end))
386386
}
387-
Some(_) => return json_err!(InvalidNumber, index),
388-
None => return Ok((start..index, index)),
387+
Some(digit) if digit.is_ascii_digit() => json_err!(InvalidNumber, index),
388+
_ => return Ok((start..index, index)),
389389
};
390390
}
391391
Some(b'I') => {
@@ -398,25 +398,49 @@ impl AbstractNumberDecoder for NumberRange {
398398
};
399399

400400
index += 1;
401-
while let Some(next) = data.get(index) {
402-
match next {
403-
b'0'..=b'9' => (),
404-
b'.' => {
401+
for _ in 0..18 {
402+
if let Some(digit) = data.get(index) {
403+
if INT_CHAR_MAP[*digit as usize] {
404+
index += 1;
405+
continue;
406+
} else if matches!(digit, b'.') {
405407
index += 1;
406408
let end = consume_decimal(data, index)?;
407409
return Ok((start..end, end));
408-
}
409-
b'e' | b'E' => {
410+
} else if matches!(digit, b'e' | b'E') {
410411
index += 1;
411412
let end = consume_exponential(data, index)?;
412413
return Ok((start..end, end));
413414
}
414-
_ => break,
415415
}
416-
index += 1;
416+
return Ok((start..index, index));
417+
}
418+
loop {
419+
let (chunk, new_index) = IntChunk::parse_big(data, index);
420+
if (new_index - start) > 4300 {
421+
return json_err!(NumberOutOfRange, start + 4301);
422+
}
423+
match chunk {
424+
IntChunk::Ongoing(_) => {
425+
index = new_index;
426+
}
427+
IntChunk::Done(_) => return Ok((start..new_index, new_index)),
428+
IntChunk::Float => {
429+
return match data.get(new_index) {
430+
Some(b'.') => {
431+
index = new_index + 1;
432+
let end = consume_decimal(data, index)?;
433+
Ok((start..end, end))
434+
}
435+
_ => {
436+
index = new_index + 1;
437+
let end = consume_exponential(data, index)?;
438+
Ok((start..end, end))
439+
}
440+
}
441+
}
442+
}
417443
}
418-
419-
Ok((start..index, index))
420444
}
421445
}
422446

crates/jiter/src/parse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ impl Peek {
1919
}
2020

2121
impl Peek {
22-
const fn new(next: u8) -> Self {
22+
pub const fn new(next: u8) -> Self {
2323
Self(next)
2424
}
2525

crates/jiter/src/string_decoder.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ fn parse_u4(data: &[u8], mut index: usize) -> JsonResult<(u16, usize)> {
327327
Ok((n, index))
328328
}
329329

330+
/// A string decoder that returns the range of the string.
331+
///
332+
/// *WARNING:* For performance reasons, this decoder does not check that the string would be valid UTF-8.
330333
pub struct StringDecoderRange;
331334

332335
impl<'t, 'j> AbstractStringDecoder<'t, 'j> for StringDecoderRange
@@ -338,33 +341,30 @@ where
338341
fn decode(data: &'j [u8], mut index: usize, _tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)> {
339342
index += 1;
340343
let start = index;
341-
while let Some(next) = data.get(index) {
342-
match next {
343-
b'"' => {
344+
345+
loop {
346+
index = match decode_chunk(data, index, true)? {
347+
(StringChunk::Quote, _, index) => {
344348
let r = start..index;
345-
index += 1;
346-
return Ok((r, index));
349+
return Ok((r, index + 1));
347350
}
348-
b'\\' => {
349-
index += 1;
350-
if let Some(next_inner) = data.get(index) {
351-
match next_inner {
352-
// these escapes are easy to validate
353-
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
354-
// unicode escapes are harder to validate, we just prevent them here
355-
b'u' => return json_err!(StringEscapeNotSupported, index),
356-
_ => return json_err!(InvalidEscape, index),
357-
}
358-
} else {
359-
return json_err!(EofWhileParsingString, index);
351+
(StringChunk::Backslash, _, index) => index,
352+
};
353+
index += 1;
354+
if let Some(next_inner) = data.get(index) {
355+
match next_inner {
356+
// these escapes are easy to validate
357+
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
358+
b'u' => {
359+
let (_, new_index) = parse_escape(data, index)?;
360+
index = new_index;
360361
}
361-
index += 1;
362-
}
363-
_ => {
364-
index += 1;
362+
_ => return json_err!(InvalidEscape, index),
365363
}
364+
index += 1;
365+
} else {
366+
return json_err!(EofWhileParsingString, index);
366367
}
367368
}
368-
json_err!(EofWhileParsingString, index)
369369
}
370370
}

0 commit comments

Comments
 (0)