1
- //! This is an implementation of [`Reader`] for reading from a [`BufRead`] as
1
+ //! This is an implementation of [`Reader`] for reading from a [`Read`] or [` BufRead`] as
2
2
//! underlying byte stream.
3
3
4
4
use std:: fs:: File ;
5
- use std:: io:: { self , BufRead , BufReader } ;
5
+ use std:: io:: { self , BufRead , BufReader , Read } ;
6
+ use std:: ops:: { Deref , DerefMut } ;
6
7
use std:: path:: Path ;
7
8
8
- use crate :: errors:: { Error , Result } ;
9
- use crate :: events:: Event ;
9
+ use crate :: events:: { BytesText , Event } ;
10
10
use crate :: name:: { QName , ResolveResult } ;
11
- use crate :: reader :: { is_whitespace , BangType , ReadElementState , Reader } ;
11
+ use crate :: { Error , Result } ;
12
12
13
- use memchr;
13
+ #[ cfg( feature = "encoding" ) ]
14
+ use crate :: reader:: { detect_encoding, EncodingRef } ;
15
+ use crate :: reader:: { is_whitespace, BangType , InnerReader , ReadElementState , Reader , TagState } ;
14
16
15
- /// This is an implementation of [`Reader`] for reading from a [`BufRead`] as
16
- /// underlying byte stream.
17
- impl < R : BufRead > Reader < R > {
17
+ /// Private functions for a [`Reader`] based on an [`IoReader`].
18
+ impl < R : BufRead > Reader < IoReader < R > > {
19
+ /// Read text into the given buffer, and return an event that borrows from
20
+ /// either that buffer or from the input itself, based on the type of the
21
+ /// reader.
22
+ fn read_event_impl < ' buf > ( & mut self , buf : & ' buf mut Vec < u8 > ) -> Result < Event < ' buf > > {
23
+ let event = match self . tag_state {
24
+ TagState :: Init => self . read_until_open ( buf, true ) ,
25
+ TagState :: Closed => self . read_until_open ( buf, false ) ,
26
+ TagState :: Opened => self . read_until_close ( buf) ,
27
+ TagState :: Empty => self . close_expanded_empty ( ) ,
28
+ TagState :: Exit => return Ok ( Event :: Eof ) ,
29
+ } ;
30
+ match event {
31
+ Err ( _) | Ok ( Event :: Eof ) => self . tag_state = TagState :: Exit ,
32
+ _ => { }
33
+ }
34
+ event
35
+ }
36
+
37
+ /// Read until '<' is found and moves reader to an `Opened` state.
38
+ ///
39
+ /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise
40
+ fn read_until_open < ' buf > (
41
+ & mut self ,
42
+ buf : & ' buf mut Vec < u8 > ,
43
+ first : bool ,
44
+ ) -> Result < Event < ' buf > > {
45
+ self . tag_state = TagState :: Opened ;
46
+
47
+ if self . trim_text_start {
48
+ self . reader . skip_whitespace ( & mut self . buf_position ) ?;
49
+ }
50
+
51
+ // If we already at the `<` symbol, do not try to return an empty Text event
52
+ if self . reader . skip_one ( b'<' , & mut self . buf_position ) ? {
53
+ return self . read_event_impl ( buf) ;
54
+ }
55
+
56
+ match self
57
+ . reader
58
+ . read_bytes_until ( b'<' , buf, & mut self . buf_position )
59
+ {
60
+ Ok ( Some ( bytes) ) => {
61
+ #[ cfg( feature = "encoding" ) ]
62
+ if first && self . encoding . can_be_refined ( ) {
63
+ if let Some ( encoding) = detect_encoding ( bytes) {
64
+ self . encoding = EncodingRef :: BomDetected ( encoding) ;
65
+ }
66
+ }
67
+
68
+ let content = if self . trim_text_end {
69
+ // Skip the ending '<
70
+ let len = bytes
71
+ . iter ( )
72
+ . rposition ( |& b| !is_whitespace ( b) )
73
+ . map_or_else ( || bytes. len ( ) , |p| p + 1 ) ;
74
+ & bytes[ ..len]
75
+ } else {
76
+ bytes
77
+ } ;
78
+
79
+ Ok ( if first {
80
+ Event :: StartText ( BytesText :: from_escaped ( content) . into ( ) )
81
+ } else {
82
+ Event :: Text ( BytesText :: from_escaped ( content) )
83
+ } )
84
+ }
85
+ Ok ( None ) => Ok ( Event :: Eof ) ,
86
+ Err ( e) => Err ( e) ,
87
+ }
88
+ }
89
+
90
+ /// Private function to read until `>` is found. This function expects that
91
+ /// it was called just after encounter a `<` symbol.
92
+ fn read_until_close < ' buf > ( & mut self , buf : & ' buf mut Vec < u8 > ) -> Result < Event < ' buf > > {
93
+ self . tag_state = TagState :: Closed ;
94
+
95
+ match self . reader . peek_one ( ) {
96
+ // `<!` - comment, CDATA or DOCTYPE declaration
97
+ Ok ( Some ( b'!' ) ) => match self . reader . read_bang_element ( buf, & mut self . buf_position ) {
98
+ Ok ( None ) => Ok ( Event :: Eof ) ,
99
+ Ok ( Some ( ( bang_type, bytes) ) ) => self . read_bang ( bang_type, bytes) ,
100
+ Err ( e) => Err ( e) ,
101
+ } ,
102
+ // `</` - closing tag
103
+ Ok ( Some ( b'/' ) ) => match self
104
+ . reader
105
+ . read_bytes_until ( b'>' , buf, & mut self . buf_position )
106
+ {
107
+ Ok ( None ) => Ok ( Event :: Eof ) ,
108
+ Ok ( Some ( bytes) ) => self . read_end ( bytes) ,
109
+ Err ( e) => Err ( e) ,
110
+ } ,
111
+ // `<?` - processing instruction
112
+ Ok ( Some ( b'?' ) ) => match self
113
+ . reader
114
+ . read_bytes_until ( b'>' , buf, & mut self . buf_position )
115
+ {
116
+ Ok ( None ) => Ok ( Event :: Eof ) ,
117
+ Ok ( Some ( bytes) ) => self . read_question_mark ( bytes) ,
118
+ Err ( e) => Err ( e) ,
119
+ } ,
120
+ // `<...` - opening or self-closed tag
121
+ Ok ( Some ( _) ) => match self . reader . read_element ( buf, & mut self . buf_position ) {
122
+ Ok ( None ) => Ok ( Event :: Eof ) ,
123
+ Ok ( Some ( bytes) ) => self . read_start ( bytes) ,
124
+ Err ( e) => Err ( e) ,
125
+ } ,
126
+ Ok ( None ) => Ok ( Event :: Eof ) ,
127
+ Err ( e) => Err ( e) ,
128
+ }
129
+ }
130
+ }
131
+
132
+ /// Public reading methods for a [`Reader`] based on an [`IoReader`].
133
+ impl < R : BufRead > Reader < IoReader < R > > {
18
134
/// Reads the next `Event`.
19
135
///
20
136
/// This is the main entry point for reading XML `Event`s.
@@ -40,7 +156,9 @@ impl<R: BufRead> Reader<R> {
40
156
/// <tag2><!--Test comment-->Test</tag2>
41
157
/// <tag2>Test 2</tag2>
42
158
/// </tag1>"#;
43
- /// let mut reader = Reader::from_str(xml);
159
+ /// // This explicitly uses `from_reader(xml.as_bytes())` to use a buffered reader instead of
160
+ /// // relying on the zero-copy optimizations for reading from byte slices.
161
+ /// let mut reader = Reader::from_reader(xml.as_bytes());
44
162
/// reader.trim_text(true);
45
163
/// let mut count = 0;
46
164
/// let mut buf = Vec::new();
@@ -59,7 +177,7 @@ impl<R: BufRead> Reader<R> {
59
177
/// println!("Text events: {:?}", txt);
60
178
/// ```
61
179
#[ inline]
62
- pub fn read_event_into < ' b > ( & mut self , buf : & ' b mut Vec < u8 > ) -> Result < Event < ' b > > {
180
+ pub fn read_event_into < ' buf > ( & mut self , buf : & ' buf mut Vec < u8 > ) -> Result < Event < ' buf > > {
63
181
self . read_event_impl ( buf)
64
182
}
65
183
@@ -77,7 +195,7 @@ impl<R: BufRead> Reader<R> {
77
195
/// <y:tag2><!--Test comment-->Test</y:tag2>
78
196
/// <y:tag2>Test 2</y:tag2>
79
197
/// </x:tag1>"#;
80
- /// let mut reader = Reader::from_str (xml);
198
+ /// let mut reader = Reader::from_reader (xml.as_bytes() );
81
199
/// reader.trim_text(true);
82
200
/// let mut count = 0;
83
201
/// let mut buf = Vec::new();
@@ -173,7 +291,7 @@ impl<R: BufRead> Reader<R> {
173
291
/// use quick_xml::events::{BytesStart, Event};
174
292
/// use quick_xml::Reader;
175
293
///
176
- /// let mut reader = Reader::from_str (r#"
294
+ /// let mut reader = Reader::from_reader (r#"
177
295
/// <outer>
178
296
/// <inner>
179
297
/// <inner></inner>
@@ -182,7 +300,7 @@ impl<R: BufRead> Reader<R> {
182
300
/// <outer/>
183
301
/// </inner>
184
302
/// </outer>
185
- /// "#);
303
+ /// "#.as_bytes() );
186
304
/// reader.trim_text(true);
187
305
/// let mut buf = Vec::new();
188
306
///
@@ -203,7 +321,6 @@ impl<R: BufRead> Reader<R> {
203
321
///
204
322
/// [`Start`]: Event::Start
205
323
/// [`End`]: Event::End
206
- /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
207
324
/// [`read_to_end()`]: Self::read_to_end
208
325
/// [`check_end_names`]: Self::check_end_names
209
326
/// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
@@ -279,21 +396,59 @@ impl<R: BufRead> Reader<R> {
279
396
}
280
397
}
281
398
282
- impl Reader < BufReader < File > > {
399
+ /// Builder for reading from a file.
400
+ impl Reader < IoReader < BufReader < File > > > {
283
401
/// Creates an XML reader from a file path.
284
402
pub fn from_file < P : AsRef < Path > > ( path : P ) -> Result < Self > {
285
403
let file = File :: open ( path) . map_err ( Error :: Io ) ?;
286
404
let reader = BufReader :: new ( file) ;
287
- Ok ( Self :: from_reader ( reader) )
405
+ Ok ( Self :: from_reader_internal ( IoReader ( reader) ) )
288
406
}
289
407
}
290
408
409
+ /// Builder for reading from any [`BufRead`].
410
+ impl < R : BufRead > Reader < IoReader < R > > {
411
+ /// Creates an XML reader from any type implementing [`BufRead`].
412
+ pub fn from_reader ( reader : R ) -> Self {
413
+ Self :: from_reader_internal ( IoReader ( reader) )
414
+ }
415
+ }
416
+
417
+ /// Builder for reading from any [`Read`].
418
+ impl < R : Read > Reader < IoReader < BufReader < R > > > {
419
+ /// Creates an XML reader from any type implementing [`Read`].
420
+ pub fn from_unbuffered_reader ( reader : R ) -> Self {
421
+ Self :: from_reader_internal ( IoReader ( BufReader :: new ( reader) ) )
422
+ }
423
+ }
291
424
////////////////////////////////////////////////////////////////////////////////////////////////////
292
425
293
426
/// A struct for handling reading functions based on reading from a [`BufRead`].
294
427
#[ derive( Debug , Clone ) ]
295
428
pub struct IoReader < R : BufRead > ( R ) ;
296
429
430
+ impl < R : BufRead > Deref for IoReader < R > {
431
+ type Target = R ;
432
+
433
+ fn deref ( & self ) -> & Self :: Target {
434
+ & self . 0
435
+ }
436
+ }
437
+
438
+ impl < R : BufRead > DerefMut for IoReader < R > {
439
+ fn deref_mut ( & mut self ) -> & mut Self :: Target {
440
+ & mut self . 0
441
+ }
442
+ }
443
+
444
+ impl < R : BufRead > InnerReader for IoReader < R > {
445
+ type Reader = R ;
446
+
447
+ fn into_inner ( self ) -> Self :: Reader {
448
+ self . 0
449
+ }
450
+ }
451
+
297
452
/// Private reading functions.
298
453
impl < R : BufRead > IoReader < R > {
299
454
#[ inline]
@@ -485,3 +640,24 @@ impl<R: BufRead> IoReader<R> {
485
640
}
486
641
}
487
642
}
643
+
644
+ #[ cfg( test) ]
645
+ mod test {
646
+ use super :: * ;
647
+ use crate :: reader:: test:: check;
648
+
649
+ fn input_from_bytes ( bytes : & [ u8 ] ) -> IoReader < & [ u8 ] > {
650
+ IoReader ( bytes)
651
+ }
652
+
653
+ fn reader_from_str ( s : & str ) -> Reader < IoReader < & [ u8 ] > > {
654
+ Reader :: from_reader_internal ( IoReader ( s. as_bytes ( ) ) )
655
+ }
656
+
657
+ #[ allow( dead_code) ]
658
+ fn reader_from_bytes ( s : & [ u8 ] ) -> Reader < IoReader < & [ u8 ] > > {
659
+ Reader :: from_reader_internal ( IoReader ( s) )
660
+ }
661
+
662
+ check ! ( let mut buf = Vec :: new( ) ; ) ;
663
+ }
0 commit comments