Skip to content

Commit 70b0c39

Browse files
author
Marcus Ewert
committed
Added docs.
1 parent b9e0b69 commit 70b0c39

File tree

1 file changed

+154
-20
lines changed

1 file changed

+154
-20
lines changed

src/lib.rs

+154-20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,85 @@
1+
#![deny(missing_docs)]
2+
//! # qjsonrs
3+
//!
4+
//! A quick JSON tokenizer.
5+
//!
6+
//! This crate is intended to be used to quickly tokenize a stream of JSON data. It merely emits
7+
//! tokens, it does not parse the JSON into larger structures.
8+
//!
9+
//! This is useful for extracting data from massive arrays, or quick parsing of JSON objects where
10+
//! you only care about certain keys.
11+
//!
12+
//! # Examples:
13+
//! ## Simple usage:
14+
//! ```
15+
//! use qjsonrs::{
16+
//! JsonStream,
17+
//! JsonToken::{
18+
//! StartObject,
19+
//! EndObject,
20+
//! StartArray,
21+
//! EndArray,
22+
//! JsKey,
23+
//! JsNumber
24+
//! },
25+
//! JsonTokenIterator
26+
//! };
27+
//!
28+
//! # fn main() -> Result<(), qjsonrs::Error> {
29+
//! let mut stream = JsonStream::from_read(&b"{\"test\": 1, \"arr\": []}"[..], 256)?;
30+
//! assert_eq!(stream.next()?.unwrap(), StartObject);
31+
//! assert_eq!(stream.next()?.unwrap(), JsKey("test".into()));
32+
//! assert_eq!(stream.next()?.unwrap(), JsNumber("1"));
33+
//! assert_eq!(stream.next()?.unwrap(), JsKey("arr".into()));
34+
//! assert_eq!(stream.next()?.unwrap(), StartArray);
35+
//! assert_eq!(stream.next()?.unwrap(), EndArray);
36+
//! assert_eq!(stream.next()?.unwrap(), EndObject);
37+
//! assert_eq!(stream.next()?, None);
38+
//! # Ok(())
39+
//! # }
40+
//! ```
41+
//! ## Count size of JSON array:
42+
//! ```
43+
//! # use qjsonrs::{
44+
//! # Error,
45+
//! # JsonStream,
46+
//! # JsonToken::{
47+
//! # StartObject,
48+
//! # EndObject,
49+
//! # StartArray,
50+
//! # EndArray,
51+
//! # JsKey,
52+
//! # JsNumber
53+
//! # },
54+
//! # JsonTokenIterator
55+
//! # };
56+
//! #
57+
//! # use std::io::Read;
58+
//! #
59+
//! fn array_size(stream: &mut JsonTokenIterator) -> Result<usize, Error> {
60+
//! assert_eq!(stream.next()?.unwrap(), StartArray);
61+
//! let mut size = 0;
62+
//! let mut depth = 0;
63+
//! loop {
64+
//! match stream.next()? {
65+
//! Some(StartObject) => { if depth == 0 {size += 1;} depth += 1; },
66+
//! Some(EndObject) => { assert!(depth > 0); depth -= 1; },
67+
//! Some(StartArray) => { if depth == 0 {size += 1;} depth += 1; },
68+
//! Some(EndArray) => { if depth == 0 {break;} else { depth -= 1; } },
69+
//! Some(_) => { if depth == 0 {size += 1; } },
70+
//! None => { panic!("Early termination"); },
71+
//! }
72+
//! }
73+
//! Ok(size)
74+
//! }
75+
//!
76+
//! # fn main() -> Result<(), qjsonrs::Error> {
77+
//! let mut stream = JsonStream::from_read(&b"[1, [2], 3, {\"a\": [4]}, 5, 6]"[..], 256)?;
78+
//! assert_eq!(array_size(&mut stream)?, 6);
79+
//! assert_eq!(stream.next()?, None);
80+
//! # Ok(())
81+
//! # }
82+
//!
183
284
#[cfg(test)] #[macro_use] extern crate hamcrest2;
385
#[cfg(test)] #[macro_use] extern crate matches;
@@ -34,16 +116,40 @@ enum ParseContext {
34116
Object,
35117
}
36118

119+
/// A stream of JSON tokens.
120+
///
121+
/// Implements an interface similar to Iter. However, the objects returned by next() contain
122+
/// references to an internal buffer.
37123
pub struct JsonStream<R> where R: Read {
38124
buffer: Buffer<R>,
39125

40126
context_stack: Vec<ParseContext>,
41127
parsed: Option<ParsedState>,
42128
}
43129

130+
131+
/// Trait for an iterator over JsonTokens.
132+
pub trait JsonTokenIterator {
133+
/// Advance to the next token.
134+
fn advance(&mut self) -> Result<()>;
135+
136+
/// Get the current token, or None if the stream is exhausted.
137+
fn get<'a>(&'a self) -> Option<JsonToken<'a>>;
138+
139+
/// Advance to the next token, then get the current token.
140+
///
141+
///
142+
/// Implemented as a call to `advance()` and then `get()`
143+
fn next<'a>(&'a mut self) -> Result<Option<JsonToken<'a>>> {
144+
self.advance()?;
145+
Ok(self.get())
146+
}
147+
}
148+
149+
/// A raw JSON string (with escapes).
44150
#[derive(Debug, PartialEq)]
45151
pub struct JsonString<'a> {
46-
pub raw: &'a str,
152+
raw: &'a str,
47153
}
48154

49155
impl<'a> From<&'a str> for JsonString<'a> {
@@ -98,31 +204,41 @@ impl Into<String> for JsonString<'_> {
98204
}
99205
}
100206

207+
/// A token from a stream of JSON.
101208
#[derive(Debug, PartialEq)]
102209
pub enum JsonToken<'a> {
210+
/// The start of an object, a.k.a. '{'
103211
StartObject,
212+
/// The end of an object, a.k.a. '}'
104213
EndObject,
214+
/// The start of an array, a.k.a. '['
105215
StartArray,
216+
/// The end of an object, a.k.a. ']'
106217
EndArray,
218+
/// The token 'null'
107219
JsNull,
220+
/// Either 'true' or 'false'
108221
JsBoolean(bool),
222+
/// A number, unparsed. i.e. '-123.456e-789'
109223
JsNumber(&'a str),
224+
/// A JSON string in a value context.
110225
JsString(JsonString<'a>),
226+
/// A JSON string in the context of a key in a JSON object.
111227
JsKey(JsonString<'a>),
112228
}
113229

114-
#[derive(Debug)]
115-
pub struct ParseError {
116-
}
117-
230+
/// The error type for this crate.
118231
#[derive(Debug)]
119232
pub enum Error {
233+
/// An Io error from the underlying Read object.
120234
Io(std::io::Error),
121-
Parse(ParseError),
122-
Unimplemented(&'static str),
235+
/// An invalid or out of context JSON character.
123236
UnexpectedChar(char),
237+
/// A JSON number or string is larger than the internal buffer.
124238
BufferOutOfSpace,
239+
/// Invalid Utf-8 input from the Read object.
125240
InvalidUnicode,
241+
/// Early termination (leaving invalid JSON input).
126242
UnexpectedEOF,
127243
}
128244

@@ -143,6 +259,7 @@ impl From<buffer::Error> for Error {
143259
}
144260
}
145261

262+
/// The Result type for this crate.
146263
type Result<T> = std::result::Result<T, Error>;
147264

148265
struct JsonStreamIter<'a, S> where S: Read {
@@ -274,16 +391,36 @@ impl<'a, S> JsonStreamIter<'a, S> where S: Read {
274391
}
275392

276393
impl<R> JsonStream<R> where R: Read {
277-
pub fn from_read(r: R) -> Result<JsonStream<R>> {
394+
/// Constructs a JsonStream from an object that implements `io::Read`
395+
///
396+
/// # Params:
397+
///
398+
/// r: The reader to read from
399+
/// buffer_min_size: The minimum size of the internal buffer to read into. N.B. Must be larger
400+
/// than the largest JSON string or JSON number in the payload.
401+
///
402+
pub fn from_read(r: R, buffer_min_size: usize) -> Result<JsonStream<R>> {
403+
let bsize = if buffer_min_size == 0 {
404+
1
405+
} else {
406+
buffer_min_size
407+
};
278408
Ok(
279409
JsonStream::<R> {
280-
buffer: Buffer::new(4096, r)?,
410+
buffer: Buffer::new(bsize, r)?,
281411
context_stack: vec![ParseContext::Base],
282412
parsed: None,
283413
}
284414
)
285415
}
286416

417+
/// The size of the internal buffer.
418+
///
419+
/// Due to implementation details, can be larger than the buffer_min_size handed in in the
420+
/// constructor.
421+
///
422+
/// JSON strings or numbers larger than this size in the input will cause a BufferOutOfSpace
423+
/// error.
287424
pub fn buffer_size(&self) -> usize {
288425
self.buffer.size()
289426
}
@@ -408,8 +545,10 @@ impl<R> JsonStream<R> where R: Read {
408545
Some(c) => Err(Error::UnexpectedChar(c)),
409546
}
410547
}
548+
}
411549

412-
pub fn advance(&mut self) -> Result<()> {
550+
impl<R: Read> JsonTokenIterator for JsonStream<R> {
551+
fn advance(&mut self) -> Result<()> {
413552
// First, consume the previous result:
414553
match self.parsed {
415554
// String plus trailing '"':
@@ -548,7 +687,7 @@ impl<R> JsonStream<R> where R: Read {
548687
Ok(())
549688
}
550689

551-
pub fn get<'a>(&'a self) -> Option<JsonToken<'a>> {
690+
fn get<'a>(&'a self) -> Option<JsonToken<'a>> {
552691
match self.parsed {
553692
Some(ParsedState::StartObject) =>
554693
Some(JsonToken::StartObject),
@@ -571,17 +710,12 @@ impl<R> JsonStream<R> where R: Read {
571710
None => None
572711
}
573712
}
574-
575-
pub fn next<'a>(&'a mut self) -> Result<Option<JsonToken<'a>>> {
576-
self.advance()?;
577-
Ok(self.get())
578-
}
579713
}
580714

581715
#[cfg(test)]
582716
mod tests {
583717
use hamcrest2::prelude::*;
584-
use super::{JsonStream, JsonToken};
718+
use super::{JsonStream, JsonToken, JsonTokenIterator};
585719
use serde_json::{Value, Map, Number};
586720
use std::io::Read;
587721
use std::str::FromStr;
@@ -659,7 +793,7 @@ mod tests {
659793

660794
fn compare_serde_with_qjsonrs(input: &str) {
661795
println!("Running input {:?}", input);
662-
let mut stream = JsonStream::from_read(input.as_bytes()).unwrap();
796+
let mut stream = JsonStream::from_read(input.as_bytes(), 256).unwrap();
663797
match Value::from_str(input).map(normalize_value) {
664798
Ok(serde) => {
665799
let qjsonrs = normalize_value(consume_value(&mut stream));
@@ -677,7 +811,7 @@ mod tests {
677811

678812
#[test]
679813
fn simple_string() {
680-
let mut stream = JsonStream::from_read(&b"\"my string\""[..]).unwrap();
814+
let mut stream = JsonStream::from_read(&b"\"my string\""[..], 256).unwrap();
681815
assert_that!(stream.next().unwrap().unwrap(), eq(JsonToken::JsString("my string".into())));
682816
assert_that!(stream.next().unwrap(), none());
683817
}
@@ -701,7 +835,7 @@ mod tests {
701835
#[test]
702836
fn string_spanning_buffers() {
703837
let size = {
704-
let stream = JsonStream::from_read(&b"\"my string\""[..]).unwrap();
838+
let stream = JsonStream::from_read(&b"\"my string\""[..], 256).unwrap();
705839
stream.buffer_size()
706840
};
707841
let s = (0..size-3).map(|_| ' ').collect::<String>();

0 commit comments

Comments
 (0)