Skip to content

Commit 76a700f

Browse files
committed
Split reader into IoReader and SliceReader
This also changes the test cases in the `reader::test::check` macro to allow for reader-specific tests.
1 parent 68bbd47 commit 76a700f

File tree

6 files changed

+564
-388
lines changed

6 files changed

+564
-388
lines changed

benches/macrobenches.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
2020
// TODO: use fully normalized attribute values
2121
fn parse_document(doc: &[u8]) -> XmlResult<()> {
2222
let mut r = Reader::from_reader(doc);
23+
let mut buf = Vec::new();
2324
loop {
24-
match r.read_event()? {
25+
match r.read_event_into(&mut buf)? {
2526
Event::Start(e) | Event::Empty(e) => {
2627
for attr in e.attributes() {
2728
criterion::black_box(attr?.decode_and_unescape_value(&r)?);

src/de/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
695695

696696
/// Create new deserializer that will borrow data from the specified borrowing reader
697697
#[inline]
698-
fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
698+
fn from_borrowing_reader(mut reader: Reader<crate::SliceReader<'de>>) -> Self {
699699
reader
700700
.expand_empty_elements(true)
701701
.check_end_names(true)
@@ -930,7 +930,7 @@ pub trait XmlRead<'i> {
930930
/// You cannot create it, it is created automatically when you call
931931
/// [`Deserializer::from_reader`]
932932
pub struct IoReader<R: BufRead> {
933-
reader: Reader<R>,
933+
reader: Reader<crate::IoReader<R>>,
934934
buf: Vec<u8>,
935935
}
936936

@@ -975,7 +975,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
975975
/// You cannot create it, it is created automatically when you call
976976
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
977977
pub struct SliceReader<'de> {
978-
reader: Reader<&'de [u8]>,
978+
reader: Reader<crate::SliceReader<'de>>,
979979
}
980980

981981
impl<'de> XmlRead<'de> for SliceReader<'de> {

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,5 @@ mod writer;
6565
#[cfg(feature = "serialize")]
6666
pub use crate::errors::serialize::DeError;
6767
pub use crate::errors::{Error, Result};
68-
pub use crate::reader::{Decoder, Reader};
68+
pub use crate::reader::{Decoder, IoReader, Reader, SliceReader};
6969
pub use crate::writer::{ElementWriter, Writer};

src/reader/buffered_reader.rs

Lines changed: 193 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,136 @@
1-
//! This is an implementation of [`Reader`] for reading from a [`BufRead`] as
1+
//! This is an implementation of [`Reader`] for reading from a [`Read`] or [`BufRead`] as
22
//! underlying byte stream.
33
44
use std::fs::File;
5-
use std::io::{self, BufRead, BufReader};
5+
use std::io::{self, BufRead, BufReader, Read};
6+
use std::ops::{Deref, DerefMut};
67
use std::path::Path;
78

8-
use crate::errors::{Error, Result};
9-
use crate::events::Event;
9+
use crate::events::{BytesText, Event};
1010
use crate::name::{QName, ResolveResult};
11-
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader};
11+
use crate::{Error, Result};
1212

13-
use memchr;
13+
#[cfg(feature = "encoding")]
14+
use crate::reader::{detect_encoding, EncodingRef};
15+
use crate::reader::{is_whitespace, BangType, InnerReader, ReadElementState, Reader, TagState};
1416

15-
/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as
16-
/// underlying byte stream.
17-
impl<R: BufRead> Reader<R> {
17+
/// Private functions for a [`Reader`] based on an [`IoReader`].
18+
impl<R: BufRead> Reader<IoReader<R>> {
19+
/// Read text into the given buffer, and return an event that borrows from
20+
/// either that buffer or from the input itself, based on the type of the
21+
/// reader.
22+
fn read_event_impl<'buf>(&mut self, buf: &'buf mut Vec<u8>) -> Result<Event<'buf>> {
23+
let event = match self.tag_state {
24+
TagState::Init => self.read_until_open(buf, true),
25+
TagState::Closed => self.read_until_open(buf, false),
26+
TagState::Opened => self.read_until_close(buf),
27+
TagState::Empty => self.close_expanded_empty(),
28+
TagState::Exit => return Ok(Event::Eof),
29+
};
30+
match event {
31+
Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit,
32+
_ => {}
33+
}
34+
event
35+
}
36+
37+
/// Read until '<' is found and moves reader to an `Opened` state.
38+
///
39+
/// Return a `StartText` event if `first` is `true` and a `Text` event otherwise
40+
fn read_until_open<'buf>(
41+
&mut self,
42+
buf: &'buf mut Vec<u8>,
43+
first: bool,
44+
) -> Result<Event<'buf>> {
45+
self.tag_state = TagState::Opened;
46+
47+
if self.trim_text_start {
48+
self.reader.skip_whitespace(&mut self.buf_position)?;
49+
}
50+
51+
// If we already at the `<` symbol, do not try to return an empty Text event
52+
if self.reader.skip_one(b'<', &mut self.buf_position)? {
53+
return self.read_event_impl(buf);
54+
}
55+
56+
match self
57+
.reader
58+
.read_bytes_until(b'<', buf, &mut self.buf_position)
59+
{
60+
Ok(Some(bytes)) => {
61+
#[cfg(feature = "encoding")]
62+
if first && self.encoding.can_be_refined() {
63+
if let Some(encoding) = detect_encoding(bytes) {
64+
self.encoding = EncodingRef::BomDetected(encoding);
65+
}
66+
}
67+
68+
let content = if self.trim_text_end {
69+
// Skip the ending '<
70+
let len = bytes
71+
.iter()
72+
.rposition(|&b| !is_whitespace(b))
73+
.map_or_else(|| bytes.len(), |p| p + 1);
74+
&bytes[..len]
75+
} else {
76+
bytes
77+
};
78+
79+
Ok(if first {
80+
Event::StartText(BytesText::from_escaped(content).into())
81+
} else {
82+
Event::Text(BytesText::from_escaped(content))
83+
})
84+
}
85+
Ok(None) => Ok(Event::Eof),
86+
Err(e) => Err(e),
87+
}
88+
}
89+
90+
/// Private function to read until `>` is found. This function expects that
91+
/// it was called just after encounter a `<` symbol.
92+
fn read_until_close<'buf>(&mut self, buf: &'buf mut Vec<u8>) -> Result<Event<'buf>> {
93+
self.tag_state = TagState::Closed;
94+
95+
match self.reader.peek_one() {
96+
// `<!` - comment, CDATA or DOCTYPE declaration
97+
Ok(Some(b'!')) => match self.reader.read_bang_element(buf, &mut self.buf_position) {
98+
Ok(None) => Ok(Event::Eof),
99+
Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes),
100+
Err(e) => Err(e),
101+
},
102+
// `</` - closing tag
103+
Ok(Some(b'/')) => match self
104+
.reader
105+
.read_bytes_until(b'>', buf, &mut self.buf_position)
106+
{
107+
Ok(None) => Ok(Event::Eof),
108+
Ok(Some(bytes)) => self.read_end(bytes),
109+
Err(e) => Err(e),
110+
},
111+
// `<?` - processing instruction
112+
Ok(Some(b'?')) => match self
113+
.reader
114+
.read_bytes_until(b'>', buf, &mut self.buf_position)
115+
{
116+
Ok(None) => Ok(Event::Eof),
117+
Ok(Some(bytes)) => self.read_question_mark(bytes),
118+
Err(e) => Err(e),
119+
},
120+
// `<...` - opening or self-closed tag
121+
Ok(Some(_)) => match self.reader.read_element(buf, &mut self.buf_position) {
122+
Ok(None) => Ok(Event::Eof),
123+
Ok(Some(bytes)) => self.read_start(bytes),
124+
Err(e) => Err(e),
125+
},
126+
Ok(None) => Ok(Event::Eof),
127+
Err(e) => Err(e),
128+
}
129+
}
130+
}
131+
132+
/// Public reading methods for a [`Reader`] based on an [`IoReader`].
133+
impl<R: BufRead> Reader<IoReader<R>> {
18134
/// Reads the next `Event`.
19135
///
20136
/// This is the main entry point for reading XML `Event`s.
@@ -40,7 +156,9 @@ impl<R: BufRead> Reader<R> {
40156
/// <tag2><!--Test comment-->Test</tag2>
41157
/// <tag2>Test 2</tag2>
42158
/// </tag1>"#;
43-
/// let mut reader = Reader::from_str(xml);
159+
/// // This explicitly uses `from_reader(xml.as_bytes())` to use a buffered reader instead of
160+
/// // relying on the zero-copy optimizations for reading from byte slices.
161+
/// let mut reader = Reader::from_reader(xml.as_bytes());
44162
/// reader.trim_text(true);
45163
/// let mut count = 0;
46164
/// let mut buf = Vec::new();
@@ -59,7 +177,7 @@ impl<R: BufRead> Reader<R> {
59177
/// println!("Text events: {:?}", txt);
60178
/// ```
61179
#[inline]
62-
pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
180+
pub fn read_event_into<'buf>(&mut self, buf: &'buf mut Vec<u8>) -> Result<Event<'buf>> {
63181
self.read_event_impl(buf)
64182
}
65183

@@ -77,7 +195,7 @@ impl<R: BufRead> Reader<R> {
77195
/// <y:tag2><!--Test comment-->Test</y:tag2>
78196
/// <y:tag2>Test 2</y:tag2>
79197
/// </x:tag1>"#;
80-
/// let mut reader = Reader::from_str(xml);
198+
/// let mut reader = Reader::from_reader(xml.as_bytes());
81199
/// reader.trim_text(true);
82200
/// let mut count = 0;
83201
/// let mut buf = Vec::new();
@@ -173,7 +291,7 @@ impl<R: BufRead> Reader<R> {
173291
/// use quick_xml::events::{BytesStart, Event};
174292
/// use quick_xml::Reader;
175293
///
176-
/// let mut reader = Reader::from_str(r#"
294+
/// let mut reader = Reader::from_reader(r#"
177295
/// <outer>
178296
/// <inner>
179297
/// <inner></inner>
@@ -182,7 +300,7 @@ impl<R: BufRead> Reader<R> {
182300
/// <outer/>
183301
/// </inner>
184302
/// </outer>
185-
/// "#);
303+
/// "#.as_bytes());
186304
/// reader.trim_text(true);
187305
/// let mut buf = Vec::new();
188306
///
@@ -203,7 +321,6 @@ impl<R: BufRead> Reader<R> {
203321
///
204322
/// [`Start`]: Event::Start
205323
/// [`End`]: Event::End
206-
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
207324
/// [`read_to_end()`]: Self::read_to_end
208325
/// [`check_end_names`]: Self::check_end_names
209326
/// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
@@ -279,21 +396,59 @@ impl<R: BufRead> Reader<R> {
279396
}
280397
}
281398

282-
impl Reader<BufReader<File>> {
399+
/// Builder for reading from a file.
400+
impl Reader<IoReader<BufReader<File>>> {
283401
/// Creates an XML reader from a file path.
284402
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
285403
let file = File::open(path).map_err(Error::Io)?;
286404
let reader = BufReader::new(file);
287-
Ok(Self::from_reader(reader))
405+
Ok(Self::from_reader_internal(IoReader(reader)))
288406
}
289407
}
290408

409+
/// Builder for reading from any [`BufRead`].
410+
impl<R: BufRead> Reader<IoReader<R>> {
411+
/// Creates an XML reader from any type implementing [`BufRead`].
412+
pub fn from_reader(reader: R) -> Self {
413+
Self::from_reader_internal(IoReader(reader))
414+
}
415+
}
416+
417+
/// Builder for reading from any [`Read`].
418+
impl<R: Read> Reader<IoReader<BufReader<R>>> {
419+
/// Creates an XML reader from any type implementing [`Read`].
420+
pub fn from_unbuffered_reader(reader: R) -> Self {
421+
Self::from_reader_internal(IoReader(BufReader::new(reader)))
422+
}
423+
}
291424
////////////////////////////////////////////////////////////////////////////////////////////////////
292425

293426
/// A struct for handling reading functions based on reading from a [`BufRead`].
294427
#[derive(Debug, Clone)]
295428
pub struct IoReader<R: BufRead>(R);
296429

430+
impl<R: BufRead> Deref for IoReader<R> {
431+
type Target = R;
432+
433+
fn deref(&self) -> &Self::Target {
434+
&self.0
435+
}
436+
}
437+
438+
impl<R: BufRead> DerefMut for IoReader<R> {
439+
fn deref_mut(&mut self) -> &mut Self::Target {
440+
&mut self.0
441+
}
442+
}
443+
444+
impl<R: BufRead> InnerReader for IoReader<R> {
445+
type Reader = R;
446+
447+
fn into_inner(self) -> Self::Reader {
448+
self.0
449+
}
450+
}
451+
297452
/// Private reading functions.
298453
impl<R: BufRead> IoReader<R> {
299454
#[inline]
@@ -485,3 +640,24 @@ impl<R: BufRead> IoReader<R> {
485640
}
486641
}
487642
}
643+
644+
#[cfg(test)]
645+
mod test {
646+
use super::*;
647+
use crate::reader::test::check;
648+
649+
fn input_from_bytes(bytes: &[u8]) -> IoReader<&[u8]> {
650+
IoReader(bytes)
651+
}
652+
653+
fn reader_from_str(s: &str) -> Reader<IoReader<&[u8]>> {
654+
Reader::from_reader_internal(IoReader(s.as_bytes()))
655+
}
656+
657+
#[allow(dead_code)]
658+
fn reader_from_bytes(s: &[u8]) -> Reader<IoReader<&[u8]>> {
659+
Reader::from_reader_internal(IoReader(s))
660+
}
661+
662+
check!(let mut buf = Vec::new(););
663+
}

0 commit comments

Comments
 (0)