1
+ #![ deny( missing_docs) ]
2
+ //! # qjsonrs
3
+ //!
4
+ //! A quick JSON tokenizer.
5
+ //!
6
+ //! This crate is intended to be used to quickly tokenize a stream of JSON data. It merely emits
7
+ //! tokens, it does not parse the JSON into larger structures.
8
+ //!
9
+ //! This is useful for extracting data from massive arrays, or quick parsing of JSON objects where
10
+ //! you only care about certain keys.
11
+ //!
12
+ //! # Examples:
13
+ //! ## Simple usage:
14
+ //! ```
15
+ //! use qjsonrs::{
16
+ //! JsonStream,
17
+ //! JsonToken::{
18
+ //! StartObject,
19
+ //! EndObject,
20
+ //! StartArray,
21
+ //! EndArray,
22
+ //! JsKey,
23
+ //! JsNumber
24
+ //! },
25
+ //! JsonTokenIterator
26
+ //! };
27
+ //!
28
+ //! # fn main() -> Result<(), qjsonrs::Error> {
29
+ //! let mut stream = JsonStream::from_read(&b"{\"test\": 1, \"arr\": []}"[..], 256)?;
30
+ //! assert_eq!(stream.next()?.unwrap(), StartObject);
31
+ //! assert_eq!(stream.next()?.unwrap(), JsKey("test".into()));
32
+ //! assert_eq!(stream.next()?.unwrap(), JsNumber("1"));
33
+ //! assert_eq!(stream.next()?.unwrap(), JsKey("arr".into()));
34
+ //! assert_eq!(stream.next()?.unwrap(), StartArray);
35
+ //! assert_eq!(stream.next()?.unwrap(), EndArray);
36
+ //! assert_eq!(stream.next()?.unwrap(), EndObject);
37
+ //! assert_eq!(stream.next()?, None);
38
+ //! # Ok(())
39
+ //! # }
40
+ //! ```
41
+ //! ## Count size of JSON array:
42
+ //! ```
43
+ //! # use qjsonrs::{
44
+ //! # Error,
45
+ //! # JsonStream,
46
+ //! # JsonToken::{
47
+ //! # StartObject,
48
+ //! # EndObject,
49
+ //! # StartArray,
50
+ //! # EndArray,
51
+ //! # JsKey,
52
+ //! # JsNumber
53
+ //! # },
54
+ //! # JsonTokenIterator
55
+ //! # };
56
+ //! #
57
+ //! # use std::io::Read;
58
+ //! #
59
+ //! fn array_size(stream: &mut JsonTokenIterator) -> Result<usize, Error> {
60
+ //! assert_eq!(stream.next()?.unwrap(), StartArray);
61
+ //! let mut size = 0;
62
+ //! let mut depth = 0;
63
+ //! loop {
64
+ //! match stream.next()? {
65
+ //! Some(StartObject) => { if depth == 0 {size += 1;} depth += 1; },
66
+ //! Some(EndObject) => { assert!(depth > 0); depth -= 1; },
67
+ //! Some(StartArray) => { if depth == 0 {size += 1;} depth += 1; },
68
+ //! Some(EndArray) => { if depth == 0 {break;} else { depth -= 1; } },
69
+ //! Some(_) => { if depth == 0 {size += 1; } },
70
+ //! None => { panic!("Early termination"); },
71
+ //! }
72
+ //! }
73
+ //! Ok(size)
74
+ //! }
75
+ //!
76
+ //! # fn main() -> Result<(), qjsonrs::Error> {
77
+ //! let mut stream = JsonStream::from_read(&b"[1, [2], 3, {\"a\": [4]}, 5, 6]"[..], 256)?;
78
+ //! assert_eq!(array_size(&mut stream)?, 6);
79
+ //! assert_eq!(stream.next()?, None);
80
+ //! # Ok(())
81
+ //! # }
82
+ //!
1
83
2
84
#[ cfg( test) ] #[ macro_use] extern crate hamcrest2;
3
85
#[ cfg( test) ] #[ macro_use] extern crate matches;
@@ -34,16 +116,40 @@ enum ParseContext {
34
116
Object ,
35
117
}
36
118
119
+ /// A stream of JSON tokens.
120
+ ///
121
+ /// Implements an interface similar to Iter. However, the objects returned by next() contain
122
+ /// references to an internal buffer.
37
123
pub struct JsonStream < R > where R : Read {
38
124
buffer : Buffer < R > ,
39
125
40
126
context_stack : Vec < ParseContext > ,
41
127
parsed : Option < ParsedState > ,
42
128
}
43
129
130
+
131
+ /// Trait for an iterator over JsonTokens.
132
+ pub trait JsonTokenIterator {
133
+ /// Advance to the next token.
134
+ fn advance ( & mut self ) -> Result < ( ) > ;
135
+
136
+ /// Get the current token, or None if the stream is exhausted.
137
+ fn get < ' a > ( & ' a self ) -> Option < JsonToken < ' a > > ;
138
+
139
+ /// Advance to the next token, then get the current token.
140
+ ///
141
+ ///
142
+ /// Implemented as a call to `advance()` and then `get()`
143
+ fn next < ' a > ( & ' a mut self ) -> Result < Option < JsonToken < ' a > > > {
144
+ self . advance ( ) ?;
145
+ Ok ( self . get ( ) )
146
+ }
147
+ }
148
+
149
+ /// A raw JSON string (with escapes).
44
150
#[ derive( Debug , PartialEq ) ]
45
151
pub struct JsonString < ' a > {
46
- pub raw : & ' a str ,
152
+ raw : & ' a str ,
47
153
}
48
154
49
155
impl < ' a > From < & ' a str > for JsonString < ' a > {
@@ -98,31 +204,41 @@ impl Into<String> for JsonString<'_> {
98
204
}
99
205
}
100
206
207
+ /// A token from a stream of JSON.
101
208
#[ derive( Debug , PartialEq ) ]
102
209
pub enum JsonToken < ' a > {
210
+ /// The start of an object, a.k.a. '{'
103
211
StartObject ,
212
+ /// The end of an object, a.k.a. '}'
104
213
EndObject ,
214
+ /// The start of an array, a.k.a. '['
105
215
StartArray ,
216
+ /// The end of an object, a.k.a. ']'
106
217
EndArray ,
218
+ /// The token 'null'
107
219
JsNull ,
220
+ /// Either 'true' or 'false'
108
221
JsBoolean ( bool ) ,
222
+ /// A number, unparsed. i.e. '-123.456e-789'
109
223
JsNumber ( & ' a str ) ,
224
+ /// A JSON string in a value context.
110
225
JsString ( JsonString < ' a > ) ,
226
+ /// A JSON string in the context of a key in a JSON object.
111
227
JsKey ( JsonString < ' a > ) ,
112
228
}
113
229
114
- #[ derive( Debug ) ]
115
- pub struct ParseError {
116
- }
117
-
230
+ /// The error type for this crate.
118
231
#[ derive( Debug ) ]
119
232
pub enum Error {
233
+ /// An Io error from the underlying Read object.
120
234
Io ( std:: io:: Error ) ,
121
- Parse ( ParseError ) ,
122
- Unimplemented ( & ' static str ) ,
235
+ /// An invalid or out of context JSON character.
123
236
UnexpectedChar ( char ) ,
237
+ /// A JSON number or string is larger than the internal buffer.
124
238
BufferOutOfSpace ,
239
+ /// Invalid Utf-8 input from the Read object.
125
240
InvalidUnicode ,
241
+ /// Early termination (leaving invalid JSON input).
126
242
UnexpectedEOF ,
127
243
}
128
244
@@ -143,6 +259,7 @@ impl From<buffer::Error> for Error {
143
259
}
144
260
}
145
261
262
+ /// The Result type for this crate.
146
263
type Result < T > = std:: result:: Result < T , Error > ;
147
264
148
265
struct JsonStreamIter < ' a , S > where S : Read {
@@ -274,16 +391,36 @@ impl<'a, S> JsonStreamIter<'a, S> where S: Read {
274
391
}
275
392
276
393
impl < R > JsonStream < R > where R : Read {
277
- pub fn from_read ( r : R ) -> Result < JsonStream < R > > {
394
+ /// Constructs a JsonStream from an object that implements `io::Read`
395
+ ///
396
+ /// # Params:
397
+ ///
398
+ /// r: The reader to read from
399
+ /// buffer_min_size: The minimum size of the internal buffer to read into. N.B. Must be larger
400
+ /// than the largest JSON string or JSON number in the payload.
401
+ ///
402
+ pub fn from_read ( r : R , buffer_min_size : usize ) -> Result < JsonStream < R > > {
403
+ let bsize = if buffer_min_size == 0 {
404
+ 1
405
+ } else {
406
+ buffer_min_size
407
+ } ;
278
408
Ok (
279
409
JsonStream :: < R > {
280
- buffer : Buffer :: new ( 4096 , r) ?,
410
+ buffer : Buffer :: new ( bsize , r) ?,
281
411
context_stack : vec ! [ ParseContext :: Base ] ,
282
412
parsed : None ,
283
413
}
284
414
)
285
415
}
286
416
417
+ /// The size of the internal buffer.
418
+ ///
419
+ /// Due to implementation details, can be larger than the buffer_min_size handed in in the
420
+ /// constructor.
421
+ ///
422
+ /// JSON strings or numbers larger than this size in the input will cause a BufferOutOfSpace
423
+ /// error.
287
424
pub fn buffer_size ( & self ) -> usize {
288
425
self . buffer . size ( )
289
426
}
@@ -408,8 +545,10 @@ impl<R> JsonStream<R> where R: Read {
408
545
Some ( c) => Err ( Error :: UnexpectedChar ( c) ) ,
409
546
}
410
547
}
548
+ }
411
549
412
- pub fn advance ( & mut self ) -> Result < ( ) > {
550
+ impl < R : Read > JsonTokenIterator for JsonStream < R > {
551
+ fn advance ( & mut self ) -> Result < ( ) > {
413
552
// First, consume the previous result:
414
553
match self . parsed {
415
554
// String plus trailing '"':
@@ -548,7 +687,7 @@ impl<R> JsonStream<R> where R: Read {
548
687
Ok ( ( ) )
549
688
}
550
689
551
- pub fn get < ' a > ( & ' a self ) -> Option < JsonToken < ' a > > {
690
+ fn get < ' a > ( & ' a self ) -> Option < JsonToken < ' a > > {
552
691
match self . parsed {
553
692
Some ( ParsedState :: StartObject ) =>
554
693
Some ( JsonToken :: StartObject ) ,
@@ -571,17 +710,12 @@ impl<R> JsonStream<R> where R: Read {
571
710
None => None
572
711
}
573
712
}
574
-
575
- pub fn next < ' a > ( & ' a mut self ) -> Result < Option < JsonToken < ' a > > > {
576
- self . advance ( ) ?;
577
- Ok ( self . get ( ) )
578
- }
579
713
}
580
714
581
715
#[ cfg( test) ]
582
716
mod tests {
583
717
use hamcrest2:: prelude:: * ;
584
- use super :: { JsonStream , JsonToken } ;
718
+ use super :: { JsonStream , JsonToken , JsonTokenIterator } ;
585
719
use serde_json:: { Value , Map , Number } ;
586
720
use std:: io:: Read ;
587
721
use std:: str:: FromStr ;
@@ -659,7 +793,7 @@ mod tests {
659
793
660
794
fn compare_serde_with_qjsonrs ( input : & str ) {
661
795
println ! ( "Running input {:?}" , input) ;
662
- let mut stream = JsonStream :: from_read ( input. as_bytes ( ) ) . unwrap ( ) ;
796
+ let mut stream = JsonStream :: from_read ( input. as_bytes ( ) , 256 ) . unwrap ( ) ;
663
797
match Value :: from_str ( input) . map ( normalize_value) {
664
798
Ok ( serde) => {
665
799
let qjsonrs = normalize_value ( consume_value ( & mut stream) ) ;
@@ -677,7 +811,7 @@ mod tests {
677
811
678
812
#[ test]
679
813
fn simple_string ( ) {
680
- let mut stream = JsonStream :: from_read ( & b"\" my string\" " [ ..] ) . unwrap ( ) ;
814
+ let mut stream = JsonStream :: from_read ( & b"\" my string\" " [ ..] , 256 ) . unwrap ( ) ;
681
815
assert_that ! ( stream. next( ) . unwrap( ) . unwrap( ) , eq( JsonToken :: JsString ( "my string" . into( ) ) ) ) ;
682
816
assert_that ! ( stream. next( ) . unwrap( ) , none( ) ) ;
683
817
}
@@ -701,7 +835,7 @@ mod tests {
701
835
#[ test]
702
836
fn string_spanning_buffers ( ) {
703
837
let size = {
704
- let stream = JsonStream :: from_read ( & b"\" my string\" " [ ..] ) . unwrap ( ) ;
838
+ let stream = JsonStream :: from_read ( & b"\" my string\" " [ ..] , 256 ) . unwrap ( ) ;
705
839
stream. buffer_size ( )
706
840
} ;
707
841
let s = ( 0 ..size-3 ) . map ( |_| ' ' ) . collect :: < String > ( ) ;
0 commit comments