Skip to content

Commit fe73aa1

Browse files
committed
Remove the ability to use SliceReader with raw bytes.
In the near future, decoding will be performed automatically as the input is read. If the input has an unknown encoding, it must be decoded first, necessitating a buffer. Therefore only the buffered implementation can be used for `Reader::from_bytes()` If the encoding of the bytes is known up-front, you can decode them up-front and subsequently use `Reader::from_str()` if desired.
1 parent 559d0e8 commit fe73aa1

12 files changed

+189
-187
lines changed

Changelog.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,12 @@
138138
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
139139
but since now escaping works on strings. Use `BytesText::from_plain_str` instead
140140
- [#425]: Split the internal implementation of `Reader` into multiple files to better separate the
141-
buffered and unbuffered implementations. The buffered methods, e.g. `read_event_into(&mut buf)`,
141+
buffered and unbuffered implementations. The unbuffered methods, e.g. `read_event()`,
142142
will no longer be available when reading from a slice.
143+
- [#436]: When using `Reader` with raw bytes, a buffered parsing implementation will always be used.
144+
If using `Reader::from_str()`, the reader will borrow directly from the `&str`. If you have a byte
145+
array known to be valid UTF-8, it is recommended to convert it to `&str` first, which will enable
146+
the unbuffered (borrowing) implementation.
143147

144148
### New Tests
145149

@@ -171,6 +175,7 @@
171175
[#421]: https://github.com/tafia/quick-xml/pull/421
172176
[#423]: https://github.com/tafia/quick-xml/pull/423
173177
[#425]: https://github.com/tafia/quick-xml/pull/425
178+
[#436]: https://github.com/tafia/quick-xml/pull/430
174179

175180
## 0.23.0 -- 2022-05-08
176181

benches/macrobenches.rs

+14-14
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,23 @@ use quick_xml::events::Event;
33
use quick_xml::Reader;
44
use quick_xml::Result as XmlResult;
55

6-
static RPM_PRIMARY: &[u8] = include_bytes!("../tests/documents/rpm_primary.xml");
7-
static RPM_PRIMARY2: &[u8] = include_bytes!("../tests/documents/rpm_primary2.xml");
8-
static RPM_FILELISTS: &[u8] = include_bytes!("../tests/documents/rpm_filelists.xml");
9-
static RPM_OTHER: &[u8] = include_bytes!("../tests/documents/rpm_other.xml");
10-
static LIBREOFFICE_DOCUMENT: &[u8] = include_bytes!("../tests/documents/libreoffice_document.fodt");
11-
static DOCUMENT: &[u8] = include_bytes!("../tests/documents/document.xml");
12-
static TEST_WRITER_INDENT: &[u8] = include_bytes!("../tests/documents/test_writer_indent.xml");
13-
static SAMPLE_1: &[u8] = include_bytes!("../tests/documents/sample_1.xml");
14-
static LINESCORE: &[u8] = include_bytes!("../tests/documents/linescore.xml");
15-
static SAMPLE_RSS: &[u8] = include_bytes!("../tests/documents/sample_rss.xml");
16-
static SAMPLE_NS: &[u8] = include_bytes!("../tests/documents/sample_ns.xml");
17-
static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
6+
static RPM_PRIMARY: &str = include_str!("../tests/documents/rpm_primary.xml");
7+
static RPM_PRIMARY2: &str = include_str!("../tests/documents/rpm_primary2.xml");
8+
static RPM_FILELISTS: &str = include_str!("../tests/documents/rpm_filelists.xml");
9+
static RPM_OTHER: &str = include_str!("../tests/documents/rpm_other.xml");
10+
static LIBREOFFICE_DOCUMENT: &str = include_str!("../tests/documents/libreoffice_document.fodt");
11+
static DOCUMENT: &str = include_str!("../tests/documents/document.xml");
12+
static TEST_WRITER_INDENT: &str = include_str!("../tests/documents/test_writer_indent.xml");
13+
static SAMPLE_1: &str = include_str!("../tests/documents/sample_1.xml");
14+
static LINESCORE: &str = include_str!("../tests/documents/linescore.xml");
15+
static SAMPLE_RSS: &str = include_str!("../tests/documents/sample_rss.xml");
16+
static SAMPLE_NS: &str = include_str!("../tests/documents/sample_ns.xml");
17+
static PLAYERS: &str = include_str!("../tests/documents/players.xml");
1818

1919
// TODO: read the namespaces too
2020
// TODO: use fully normalized attribute values
21-
fn parse_document(doc: &[u8]) -> XmlResult<()> {
22-
let mut r = Reader::from_bytes(doc);
21+
fn parse_document(doc: &str) -> XmlResult<()> {
22+
let mut r = Reader::from_str(doc);
2323
loop {
2424
match r.read_event()? {
2525
Event::Start(e) | Event::Empty(e) => {

benches/microbenches.rs

+17-17
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use quick_xml::events::Event;
55
use quick_xml::name::QName;
66
use quick_xml::Reader;
77

8-
static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml");
9-
static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
8+
static SAMPLE: &str = include_str!("../tests/documents/sample_rss.xml");
9+
static PLAYERS: &str = include_str!("../tests/documents/players.xml");
1010

1111
static LOREM_IPSUM_TEXT: &str =
1212
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt
@@ -29,7 +29,7 @@ fn read_event(c: &mut Criterion) {
2929
let mut group = c.benchmark_group("read_event");
3030
group.bench_function("trim_text = false", |b| {
3131
b.iter(|| {
32-
let mut r = Reader::from_bytes(SAMPLE);
32+
let mut r = Reader::from_str(SAMPLE);
3333
r.check_end_names(false).check_comments(false);
3434
let mut count = criterion::black_box(0);
3535
loop {
@@ -48,7 +48,7 @@ fn read_event(c: &mut Criterion) {
4848

4949
group.bench_function("trim_text = true", |b| {
5050
b.iter(|| {
51-
let mut r = Reader::from_bytes(SAMPLE);
51+
let mut r = Reader::from_str(SAMPLE);
5252
r.check_end_names(false)
5353
.check_comments(false)
5454
.trim_text(true);
@@ -75,7 +75,7 @@ fn read_namespaced_event(c: &mut Criterion) {
7575
let mut group = c.benchmark_group("read_namespaced_event");
7676
group.bench_function("trim_text = false", |b| {
7777
b.iter(|| {
78-
let mut r = Reader::from_bytes(SAMPLE);
78+
let mut r = Reader::from_str(SAMPLE);
7979
r.check_end_names(false).check_comments(false);
8080
let mut count = criterion::black_box(0);
8181
let mut ns_buf = Vec::new();
@@ -95,7 +95,7 @@ fn read_namespaced_event(c: &mut Criterion) {
9595

9696
group.bench_function("trim_text = true", |b| {
9797
b.iter(|| {
98-
let mut r = Reader::from_bytes(SAMPLE);
98+
let mut r = Reader::from_str(SAMPLE);
9999
r.check_end_names(false)
100100
.check_comments(false)
101101
.trim_text(true);
@@ -121,9 +121,9 @@ fn read_namespaced_event(c: &mut Criterion) {
121121
fn one_event(c: &mut Criterion) {
122122
let mut group = c.benchmark_group("One event");
123123
group.bench_function("StartText", |b| {
124-
let src = "Hello world!".repeat(512 / 12).into_bytes();
124+
let src = "Hello world!".repeat(512 / 12);
125125
b.iter(|| {
126-
let mut r = Reader::from_bytes(src.as_ref());
126+
let mut r = Reader::from_str(&src);
127127
let mut nbtxt = criterion::black_box(0);
128128
r.check_end_names(false).check_comments(false);
129129
match r.read_event() {
@@ -136,9 +136,9 @@ fn one_event(c: &mut Criterion) {
136136
});
137137

138138
group.bench_function("Start", |b| {
139-
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
139+
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5));
140140
b.iter(|| {
141-
let mut r = Reader::from_bytes(src.as_ref());
141+
let mut r = Reader::from_str(&src);
142142
let mut nbtxt = criterion::black_box(0);
143143
r.check_end_names(false)
144144
.check_comments(false)
@@ -153,9 +153,9 @@ fn one_event(c: &mut Criterion) {
153153
});
154154

155155
group.bench_function("Comment", |b| {
156-
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
156+
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5));
157157
b.iter(|| {
158-
let mut r = Reader::from_bytes(src.as_ref());
158+
let mut r = Reader::from_str(&src);
159159
let mut nbtxt = criterion::black_box(0);
160160
r.check_end_names(false)
161161
.check_comments(false)
@@ -170,9 +170,9 @@ fn one_event(c: &mut Criterion) {
170170
});
171171

172172
group.bench_function("CData", |b| {
173-
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
173+
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5));
174174
b.iter(|| {
175-
let mut r = Reader::from_bytes(src.as_ref());
175+
let mut r = Reader::from_str(&src);
176176
let mut nbtxt = criterion::black_box(0);
177177
r.check_end_names(false)
178178
.check_comments(false)
@@ -193,7 +193,7 @@ fn attributes(c: &mut Criterion) {
193193
let mut group = c.benchmark_group("attributes");
194194
group.bench_function("with_checks = true", |b| {
195195
b.iter(|| {
196-
let mut r = Reader::from_bytes(PLAYERS);
196+
let mut r = Reader::from_str(PLAYERS);
197197
r.check_end_names(false).check_comments(false);
198198
let mut count = criterion::black_box(0);
199199
loop {
@@ -214,7 +214,7 @@ fn attributes(c: &mut Criterion) {
214214

215215
group.bench_function("with_checks = false", |b| {
216216
b.iter(|| {
217-
let mut r = Reader::from_bytes(PLAYERS);
217+
let mut r = Reader::from_str(PLAYERS);
218218
r.check_end_names(false).check_comments(false);
219219
let mut count = criterion::black_box(0);
220220
loop {
@@ -235,7 +235,7 @@ fn attributes(c: &mut Criterion) {
235235

236236
group.bench_function("try_get_attribute", |b| {
237237
b.iter(|| {
238-
let mut r = Reader::from_bytes(PLAYERS);
238+
let mut r = Reader::from_str(PLAYERS);
239239
r.check_end_names(false).check_comments(false);
240240
let mut count = criterion::black_box(0);
241241
loop {

src/de/mod.rs

+25-30
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@ where
306306
}
307307

308308
/// Deserialize from a reader. This method will do internal copies of data
309-
/// readed from `reader`. If you want have a `&[u8]` or `&str` input and want
310-
/// to borrow as much as possible, use [`from_slice`] or [`from_str`]
309+
/// readed from `reader`. If you want have a `&str` input and want
310+
/// to borrow as much as possible, use [`from_str`]
311311
pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
312312
where
313313
R: BufRead,
@@ -685,17 +685,7 @@ where
685685
impl<'de> Deserializer<'de, SliceReader<'de>> {
686686
/// Create new deserializer that will borrow data from the specified string
687687
pub fn from_str(s: &'de str) -> Self {
688-
Self::from_borrowing_reader(Reader::from_str(s))
689-
}
690-
691-
/// Create new deserializer that will borrow data from the specified byte array
692-
pub fn from_slice(bytes: &'de [u8]) -> Self {
693-
Self::from_borrowing_reader(Reader::from_bytes(bytes))
694-
}
695-
696-
/// Create new deserializer that will borrow data from the specified borrowing reader
697-
#[inline]
698-
fn from_borrowing_reader(mut reader: Reader<crate::SliceReader<'de>>) -> Self {
688+
let mut reader = Reader::from_str(s);
699689
reader
700690
.expand_empty_elements(true)
701691
.check_end_names(true)
@@ -726,6 +716,13 @@ where
726716
}
727717
}
728718

719+
impl<'de> Deserializer<'de, IoReader<&'de [u8]>> {
720+
/// Create new deserializer that will borrow data from the specified byte array
721+
pub fn from_slice(bytes: &'de [u8]) -> Self {
722+
Self::from_reader(bytes)
723+
}
724+
}
725+
729726
impl<'de, 'a, R> de::Deserializer<'de> for &'a mut Deserializer<'de, R>
730727
where
731728
R: XmlRead<'de>,
@@ -970,10 +967,10 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
970967
}
971968
}
972969

973-
/// XML input source that reads from a slice of bytes and can borrow from it.
970+
/// XML input source that reads from a `&str` and can borrow from it.
974971
///
975972
/// You cannot create it, it is created automatically when you call
976-
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
973+
/// [`Deserializer::from_str`] or [`Deserializer::from_str`]
977974
pub struct SliceReader<'de> {
978975
reader: Reader<crate::SliceReader<'de>>,
979976
}
@@ -1025,8 +1022,8 @@ mod tests {
10251022
/// Checks that `peek()` and `read()` behaves correctly after `skip()`
10261023
#[test]
10271024
fn read_and_peek() {
1028-
let mut de = Deserializer::from_slice(
1029-
br#"
1025+
let mut de = Deserializer::from_str(
1026+
r#"
10301027
<root>
10311028
<inner>
10321029
text
@@ -1166,8 +1163,8 @@ mod tests {
11661163
/// Checks that `read_to_end()` behaves correctly after `skip()`
11671164
#[test]
11681165
fn read_to_end() {
1169-
let mut de = Deserializer::from_slice(
1170-
br#"
1166+
let mut de = Deserializer::from_str(
1167+
r#"
11711168
<root>
11721169
<skip>
11731170
text
@@ -1270,8 +1267,8 @@ mod tests {
12701267
item: Vec<()>,
12711268
}
12721269

1273-
let mut de = Deserializer::from_slice(
1274-
br#"
1270+
let mut de = Deserializer::from_str(
1271+
r#"
12751272
<any-name>
12761273
<item/>
12771274
<another-item>
@@ -1296,8 +1293,8 @@ mod tests {
12961293
fn read_to_end() {
12971294
use crate::de::DeEvent::*;
12981295

1299-
let mut de = Deserializer::from_slice(
1300-
br#"
1296+
let mut de = Deserializer::from_str(
1297+
r#"
13011298
<root>
13021299
<tag a="1"><tag>text</tag>content</tag>
13031300
<tag a="2"><![CDATA[cdata content]]></tag>
@@ -1343,15 +1340,14 @@ mod tests {
13431340
<item name="hello" source="world.rs">Some text</item>
13441341
<item2/>
13451342
<item3 value="world" />
1346-
"##
1347-
.as_bytes();
1343+
"##;
13481344

13491345
let mut reader1 = IoReader {
1350-
reader: Reader::from_reader(s),
1346+
reader: Reader::from_reader(s.as_bytes()),
13511347
buf: Vec::new(),
13521348
};
13531349
let mut reader2 = SliceReader {
1354-
reader: Reader::from_bytes(s),
1350+
reader: Reader::from_str(s),
13551351
};
13561352

13571353
loop {
@@ -1373,11 +1369,10 @@ mod tests {
13731369
<item2></item2>
13741370
<item3/>
13751371
<item4 value="world" />
1376-
"##
1377-
.as_bytes();
1372+
"##;
13781373

13791374
let mut reader = SliceReader {
1380-
reader: Reader::from_bytes(s),
1375+
reader: Reader::from_str(s),
13811376
};
13821377

13831378
reader

src/events/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -983,8 +983,9 @@ pub enum Event<'a> {
983983
/// let xml = b"\xEF\xBB\xBF<?xml version='1.0'?>";
984984
/// let mut reader = Reader::from_bytes(xml);
985985
/// let mut events_processed = 0;
986+
/// let mut event_buffer = Vec::new();
986987
/// loop {
987-
/// match reader.read_event() {
988+
/// match reader.read_event_into(&mut event_buffer) {
988989
/// Ok(Event::StartText(e)) => {
989990
/// assert_eq!(events_processed, 0);
990991
/// // Content contains BOM

0 commit comments

Comments
 (0)