Skip to content

Commit 2eecf00

Browse files
authored
Merge pull request #412 from Mingun/soundness
Rename reading methods, fix some encoding errors in error-processing paths and tests
2 parents 0febc2b + ae458cb commit 2eecf00

21 files changed

+370
-218
lines changed

Changelog.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
3939
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
4040
- [#363]: Do not generate empty `Event::Text` events
41+
- [#412]: Fix using incorrect encoding if `read_to_end` family of methods or `read_text`
42+
method not found a corresponding end tag and reader has non-UTF-8 encoding
4143

4244
### Misc Changes
4345

@@ -96,17 +98,28 @@
9698

9799
- [#403]: Remove deprecated `quick_xml::de::from_bytes` and `Deserializer::from_borrowing_reader`
98100

101+
- [#412]: Rename methods of `Reader`:
102+
|Old Name |New Name
103+
|-------------------------|---------------------------------------------------
104+
|`read_event` |`read_event_into`
105+
|`read_to_end` |`read_to_end_into`
106+
|`read_text` |`read_text_into`
107+
|`read_event_unbuffered` |`read_event`
108+
|`read_to_end_unbuffered` |`read_to_end`
109+
- [#412]: Change `read_to_end*` and `read_text_into` to accept `QName` instead of `AsRef<[u8]>`
110+
99111
### New Tests
100112

101113
- [#9]: Added tests for incorrect nested tags in input
102114
- [#387]: Added a bunch of tests for sequences deserialization
103115
- [#393]: Added more tests for namespace resolver
104116
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
105-
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
117+
- [#363]: Add tests for `Reader::read_event_impl` to ensure that proper events generated for corresponding inputs
106118
- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text
107119

108120
[#8]: https://github.com/Mingun/fast-xml/pull/8
109121
[#9]: https://github.com/Mingun/fast-xml/pull/9
122+
[#118]: https://github.com/tafia/quick-xml/issues/118
110123
[#180]: https://github.com/tafia/quick-xml/issues/180
111124
[#191]: https://github.com/tafia/quick-xml/issues/191
112125
[#324]: https://github.com/tafia/quick-xml/issues/324
@@ -117,6 +130,7 @@
117130
[#395]: https://github.com/tafia/quick-xml/pull/395
118131
[#403]: https://github.com/tafia/quick-xml/pull/403
119132
[#407]: https://github.com/tafia/quick-xml/pull/407
133+
[#412]: https://github.com/tafia/quick-xml/pull/412
120134

121135
## 0.23.0 -- 2022-05-08
122136

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ let mut buf = Vec::new();
4141
loop {
4242
// NOTE: this is the generic case when we don't know about the input BufRead.
4343
// when the input is a &str or a &[u8], we don't actually need to use another
44-
// buffer, we could directly call `reader.read_event_unbuffered()`
45-
match reader.read_event(&mut buf) {
44+
// buffer, we could directly call `reader.read_event()`
45+
match reader.read_event_into(&mut buf) {
4646
Ok(Event::Start(ref e)) => {
4747
match e.name() {
4848
b"tag1" => println!("attributes values: {:?}",
@@ -77,7 +77,7 @@ reader.trim_text(true);
7777
let mut writer = Writer::new(Cursor::new(Vec::new()));
7878
let mut buf = Vec::new();
7979
loop {
80-
match reader.read_event(&mut buf) {
80+
match reader.read_event_into(&mut buf) {
8181
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
8282

8383
// crates a new element ... alternatively we could reuse `e` by calling

benches/macrobenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
2121
fn parse_document(doc: &[u8]) -> XmlResult<()> {
2222
let mut r = Reader::from_reader(doc);
2323
loop {
24-
match r.read_event_unbuffered()? {
24+
match r.read_event()? {
2525
Event::Start(e) | Event::Empty(e) => {
2626
for attr in e.attributes() {
2727
criterion::black_box(attr?.unescaped_value()?);

benches/microbenches.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ fn read_event(c: &mut Criterion) {
3434
let mut count = criterion::black_box(0);
3535
let mut buf = Vec::new();
3636
loop {
37-
match r.read_event(&mut buf) {
37+
match r.read_event_into(&mut buf) {
3838
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
3939
Ok(Event::Eof) => break,
4040
_ => (),
@@ -57,7 +57,7 @@ fn read_event(c: &mut Criterion) {
5757
let mut count = criterion::black_box(0);
5858
let mut buf = Vec::new();
5959
loop {
60-
match r.read_event(&mut buf) {
60+
match r.read_event_into(&mut buf) {
6161
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
6262
Ok(Event::Eof) => break,
6363
_ => (),
@@ -137,7 +137,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
137137
let mut count = criterion::black_box(0);
138138
let mut nbtxt = criterion::black_box(0);
139139
loop {
140-
match r.read_event(&mut buf) {
140+
match r.read_event_into(&mut buf) {
141141
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
142142
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
143143
Ok(Event::Eof) => break,
@@ -175,7 +175,7 @@ fn bytes_text_unescaped(c: &mut Criterion) {
175175
let mut count = criterion::black_box(0);
176176
let mut nbtxt = criterion::black_box(0);
177177
loop {
178-
match r.read_event(&mut buf) {
178+
match r.read_event_into(&mut buf) {
179179
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
180180
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
181181
Ok(Event::Eof) => break,
@@ -215,7 +215,7 @@ fn one_event(c: &mut Criterion) {
215215
let mut r = Reader::from_reader(src.as_ref());
216216
let mut nbtxt = criterion::black_box(0);
217217
r.check_end_names(false).check_comments(false);
218-
match r.read_event(&mut buf) {
218+
match r.read_event_into(&mut buf) {
219219
Ok(Event::StartText(e)) => nbtxt += e.len(),
220220
something_else => panic!("Did not expect {:?}", something_else),
221221
};
@@ -235,7 +235,7 @@ fn one_event(c: &mut Criterion) {
235235
r.check_end_names(false)
236236
.check_comments(false)
237237
.trim_text(true);
238-
match r.read_event(&mut buf) {
238+
match r.read_event_into(&mut buf) {
239239
Ok(Event::Start(ref e)) => nbtxt += e.len(),
240240
something_else => panic!("Did not expect {:?}", something_else),
241241
};
@@ -255,7 +255,7 @@ fn one_event(c: &mut Criterion) {
255255
r.check_end_names(false)
256256
.check_comments(false)
257257
.trim_text(true);
258-
match r.read_event(&mut buf) {
258+
match r.read_event_into(&mut buf) {
259259
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
260260
something_else => panic!("Did not expect {:?}", something_else),
261261
};
@@ -275,7 +275,7 @@ fn one_event(c: &mut Criterion) {
275275
r.check_end_names(false)
276276
.check_comments(false)
277277
.trim_text(true);
278-
match r.read_event(&mut buf) {
278+
match r.read_event_into(&mut buf) {
279279
Ok(Event::CData(ref e)) => nbtxt += e.len(),
280280
something_else => panic!("Did not expect {:?}", something_else),
281281
};
@@ -298,7 +298,7 @@ fn attributes(c: &mut Criterion) {
298298
let mut count = criterion::black_box(0);
299299
let mut buf = Vec::new();
300300
loop {
301-
match r.read_event(&mut buf) {
301+
match r.read_event_into(&mut buf) {
302302
Ok(Event::Empty(e)) => {
303303
for attr in e.attributes() {
304304
let _attr = attr.unwrap();
@@ -321,7 +321,7 @@ fn attributes(c: &mut Criterion) {
321321
let mut count = criterion::black_box(0);
322322
let mut buf = Vec::new();
323323
loop {
324-
match r.read_event(&mut buf) {
324+
match r.read_event_into(&mut buf) {
325325
Ok(Event::Empty(e)) => {
326326
for attr in e.attributes().with_checks(false) {
327327
let _attr = attr.unwrap();
@@ -344,7 +344,7 @@ fn attributes(c: &mut Criterion) {
344344
let mut count = criterion::black_box(0);
345345
let mut buf = Vec::new();
346346
loop {
347-
match r.read_event(&mut buf) {
347+
match r.read_event_into(&mut buf) {
348348
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
349349
for name in ["num", "status", "avg"] {
350350
if let Some(_attr) = e.try_get_attribute(name).unwrap() {

compare/benches/bench.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn low_level_comparison(c: &mut Criterion) {
1818
let mut count = criterion::black_box(0);
1919
let mut buf = Vec::new();
2020
loop {
21-
match r.read_event(&mut buf) {
21+
match r.read_event_into(&mut buf) {
2222
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
2323
Ok(Event::Eof) => break,
2424
_ => (),

examples/custom_entities.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3131
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
3232

3333
loop {
34-
match reader.read_event(&mut buf) {
34+
match reader.read_event_into(&mut buf) {
3535
Ok(Event::DocType(ref e)) => {
3636
for cap in entity_re.captures_iter(&e) {
3737
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());

examples/nested_readers.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fn main() -> Result<(), quick_xml::Error> {
2020
let mut reader = Reader::from_file("tests/documents/document.xml")?;
2121
let mut found_tables = Vec::new();
2222
loop {
23-
match reader.read_event(&mut buf)? {
23+
match reader.read_event_into(&mut buf)? {
2424
Event::Start(element) => match element.name().as_ref() {
2525
b"w:tbl" => {
2626
count += 1;
@@ -33,7 +33,7 @@ fn main() -> Result<(), quick_xml::Error> {
3333
let mut row_index = 0;
3434
loop {
3535
skip_buf.clear();
36-
match reader.read_event(&mut skip_buf)? {
36+
match reader.read_event_into(&mut skip_buf)? {
3737
Event::Start(element) => match element.name().as_ref() {
3838
b"w:tr" => {
3939
stats.rows.push(vec![]);

examples/read_texts.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
fn main() {
22
use quick_xml::events::Event;
3+
use quick_xml::name::QName;
34
use quick_xml::Reader;
45

56
let xml = "<tag1>text1</tag1><tag1>text2</tag1>\
@@ -12,11 +13,11 @@ fn main() {
1213
let mut buf = Vec::new();
1314

1415
loop {
15-
match reader.read_event(&mut buf) {
16+
match reader.read_event_into(&mut buf) {
1617
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
1718
txt.push(
1819
reader
19-
.read_text(b"tag2", &mut Vec::new())
20+
.read_text_into(QName(b"tag2"), &mut Vec::new())
2021
.expect("Cannot decode text value"),
2122
);
2223
println!("{:?}", txt);

fuzz/fuzz_targets/fuzz_target_1.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ fuzz_target!(|data: &[u8]| {
1111
let mut reader = Reader::from_reader(cursor);
1212
let mut buf = vec![];
1313
loop {
14-
match reader.read_event(&mut buf) {
14+
match reader.read_event_into(&mut buf) {
1515
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e))=> {
1616
if e.unescaped().is_err() {
1717
break;

src/de/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,7 @@ pub struct IoReader<R: BufRead> {
949949
impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
950950
fn next(&mut self) -> Result<DeEvent<'static>, DeError> {
951951
let event = loop {
952-
let e = self.reader.read_event(&mut self.buf)?;
952+
let e = self.reader.read_event_into(&mut self.buf)?;
953953
match e {
954954
//TODO: Probably not the best idea treat StartText as usual text
955955
// Usually this event will represent a BOM
@@ -971,7 +971,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
971971
}
972972

973973
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
974-
match self.reader.read_to_end(name, &mut self.buf) {
974+
match self.reader.read_to_end_into(name, &mut self.buf) {
975975
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
976976
other => Ok(other?),
977977
}
@@ -993,7 +993,7 @@ pub struct SliceReader<'de> {
993993
impl<'de> XmlRead<'de> for SliceReader<'de> {
994994
fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
995995
loop {
996-
let e = self.reader.read_event_unbuffered()?;
996+
let e = self.reader.read_event()?;
997997
match e {
998998
//TODO: Probably not the best idea treat StartText as usual text
999999
// Usually this event will represent a BOM
@@ -1011,7 +1011,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
10111011
}
10121012

10131013
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
1014-
match self.reader.read_to_end_unbuffered(name) {
1014+
match self.reader.read_to_end(name) {
10151015
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
10161016
other => Ok(other?),
10171017
}

src/events/attributes.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::reader::{is_whitespace, Reader};
99
use crate::utils::{write_byte_string, write_cow_string, Bytes};
1010
use std::fmt::{self, Debug, Display, Formatter};
1111
use std::iter::FusedIterator;
12-
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range};
12+
use std::{borrow::Cow, collections::HashMap, ops::Range};
1313

1414
/// A struct representing a key/value XML attribute.
1515
///
@@ -81,7 +81,7 @@ impl<'a> Attribute<'a> {
8181
///
8282
/// [`unescaped_value()`]: #method.unescaped_value
8383
/// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode
84-
pub fn unescape_and_decode_value<B: BufRead>(&self, reader: &Reader<B>) -> XmlResult<String> {
84+
pub fn unescape_and_decode_value<B>(&self, reader: &Reader<B>) -> XmlResult<String> {
8585
self.do_unescape_and_decode_value(reader, None)
8686
}
8787

@@ -99,7 +99,7 @@ impl<'a> Attribute<'a> {
9999
/// # Pre-condition
100100
///
101101
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
102-
pub fn unescape_and_decode_value_with_custom_entities<B: BufRead>(
102+
pub fn unescape_and_decode_value_with_custom_entities<B>(
103103
&self,
104104
reader: &Reader<B>,
105105
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
@@ -108,7 +108,7 @@ impl<'a> Attribute<'a> {
108108
}
109109

110110
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
111-
fn do_unescape_and_decode_value<B: BufRead>(
111+
fn do_unescape_and_decode_value<B>(
112112
&self,
113113
reader: &Reader<B>,
114114
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,

src/events/mod.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
//!
1818
//! # Reading
1919
//! When reading a XML stream, the events are emitted by
20-
//! [`Reader::read_event`]. You must listen
20+
//! [`Reader::read_event_into`]. You must listen
2121
//! for the different types of events you are interested in.
2222
//!
2323
//! See [`Reader`] for further information.
@@ -29,10 +29,8 @@
2929
//!
3030
//! See [`Writer`] for further information.
3131
//!
32-
//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
33-
//! [`Reader`]: ../reader/struct.Reader.html
34-
//! [`Writer`]: ../writer/struct.Writer.html
35-
//! [`Event`]: enum.Event.html
32+
//! [`Writer`]: crate::writer::Writer
33+
//! [`Event`]: crate::events::Event
3634
3735
pub mod attributes;
3836

@@ -41,7 +39,6 @@ use encoding_rs::Encoding;
4139
use std::borrow::Cow;
4240
use std::collections::HashMap;
4341
use std::fmt::{self, Debug, Formatter};
44-
use std::io::BufRead;
4542
use std::ops::Deref;
4643
use std::str::from_utf8;
4744

@@ -757,7 +754,7 @@ impl<'a> BytesText<'a> {
757754
/// it might be wiser to manually use
758755
/// 1. BytesText::unescaped()
759756
/// 2. Reader::decode(...)
760-
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
757+
pub fn unescape_and_decode<B>(&self, reader: &Reader<B>) -> Result<String> {
761758
self.do_unescape_and_decode_with_custom_entities(reader, None)
762759
}
763760

@@ -771,15 +768,15 @@ impl<'a> BytesText<'a> {
771768
/// # Pre-condition
772769
///
773770
/// The keys and values of `custom_entities`, if any, must be valid UTF-8.
774-
pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
771+
pub fn unescape_and_decode_with_custom_entities<B>(
775772
&self,
776773
reader: &Reader<B>,
777774
custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
778775
) -> Result<String> {
779776
self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
780777
}
781778

782-
fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
779+
fn do_unescape_and_decode_with_custom_entities<B>(
783780
&self,
784781
reader: &Reader<B>,
785782
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
@@ -928,7 +925,7 @@ impl<'a> Deref for BytesCData<'a> {
928925

929926
////////////////////////////////////////////////////////////////////////////////////////////////////
930927

931-
/// Event emitted by [`Reader::read_event`].
928+
/// Event emitted by [`Reader::read_event_into`].
932929
#[derive(Clone, Debug, Eq, PartialEq)]
933930
pub enum Event<'a> {
934931
/// Text that appeared before the first opening tag or an [XML declaration].
@@ -956,7 +953,7 @@ pub enum Event<'a> {
956953
/// let mut reader = Reader::from_bytes(xml);
957954
/// let mut events_processed = 0;
958955
/// loop {
959-
/// match reader.read_event_unbuffered() {
956+
/// match reader.read_event() {
960957
/// Ok(Event::StartText(e)) => {
961958
/// assert_eq!(events_processed, 0);
962959
/// // Content contains BOM
@@ -1066,7 +1063,10 @@ mod test {
10661063
let mut buf = Vec::new();
10671064
let mut parsed_local_names = Vec::new();
10681065
loop {
1069-
match rdr.read_event(&mut buf).expect("unable to read xml event") {
1066+
match rdr
1067+
.read_event_into(&mut buf)
1068+
.expect("unable to read xml event")
1069+
{
10701070
Event::Start(ref e) => parsed_local_names.push(
10711071
from_utf8(e.local_name().as_ref())
10721072
.expect("unable to build str from local_name")

0 commit comments

Comments
 (0)