Skip to content

Commit b09495a

Browse files
committed
Remove buffered access for SliceReader as events always borrow from the input slice
1 parent 01ff58d commit b09495a

File tree

12 files changed

+112
-221
lines changed

12 files changed

+112
-221
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ let xml = r#"<tag1 att1 = "test">
3030
</tag2>
3131
</tag1>"#;
3232

33-
let mut reader = Reader::from_str(xml);
33+
let mut reader = Reader::from_reader(xml.as_bytes());
34+
// If you want to read from a string or byte slice without buffering, use:
35+
// let mut reader = Reader::from_str(xml);
36+
// In that case, `Vec` is *not* needed for buffering below and you should use
37+
// `read_event` instead of `read_event_into`.
3438
reader.trim_text(true);
3539

3640
let mut count = 0;
@@ -75,9 +79,8 @@ let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
7579
let mut reader = Reader::from_str(xml);
7680
reader.trim_text(true);
7781
let mut writer = Writer::new(Cursor::new(Vec::new()));
78-
let mut buf = Vec::new();
7982
loop {
80-
match reader.read_event_into(&mut buf) {
83+
match reader.read_event() {
8184
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
8285

8386
// crates a new element ... alternatively we could reuse `e` by calling
@@ -101,7 +104,6 @@ loop {
101104
Ok(e) => assert!(writer.write_event(&e).is_ok()),
102105
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
103106
}
104-
buf.clear();
105107
}
106108

107109
let result = writer.into_inner().into_inner();

benches/macrobenches.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
1919
// TODO: read the namespaces too
2020
// TODO: use fully normalized attribute values
2121
fn parse_document(doc: &[u8]) -> XmlResult<()> {
22-
let mut r = Reader::from_reader(doc);
23-
let mut buf = Vec::new();
22+
let mut r = Reader::from_bytes(doc);
2423
loop {
25-
match r.read_event_into(&mut buf)? {
24+
match r.read_event()? {
2625
Event::Start(e) | Event::Empty(e) => {
2726
for attr in e.attributes() {
2827
criterion::black_box(attr?.decode_and_unescape_value(&r)?);

benches/microbenches.rs

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
2929
let mut group = c.benchmark_group("read_event");
3030
group.bench_function("trim_text = false", |b| {
3131
b.iter(|| {
32-
let mut r = Reader::from_reader(SAMPLE);
32+
let mut r = Reader::from_bytes(SAMPLE);
3333
r.check_end_names(false).check_comments(false);
3434
let mut count = criterion::black_box(0);
35-
let mut buf = Vec::new();
3635
loop {
37-
match r.read_event_into(&mut buf) {
36+
match r.read_event() {
3837
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
3938
Ok(Event::Eof) => break,
4039
_ => (),
4140
}
42-
buf.clear();
4341
}
4442
assert_eq!(
4543
count, 1550,
@@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {
5048

5149
group.bench_function("trim_text = true", |b| {
5250
b.iter(|| {
53-
let mut r = Reader::from_reader(SAMPLE);
51+
let mut r = Reader::from_bytes(SAMPLE);
5452
r.check_end_names(false)
5553
.check_comments(false)
5654
.trim_text(true);
5755
let mut count = criterion::black_box(0);
58-
let mut buf = Vec::new();
5956
loop {
60-
match r.read_event_into(&mut buf) {
57+
match r.read_event() {
6158
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
6259
Ok(Event::Eof) => break,
6360
_ => (),
6461
}
65-
buf.clear();
6662
}
6763
assert_eq!(
6864
count, 1550,
@@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
7975
let mut group = c.benchmark_group("read_namespaced_event");
8076
group.bench_function("trim_text = false", |b| {
8177
b.iter(|| {
82-
let mut r = Reader::from_reader(SAMPLE);
78+
let mut r = Reader::from_bytes(SAMPLE);
8379
r.check_end_names(false).check_comments(false);
8480
let mut count = criterion::black_box(0);
85-
let mut buf = Vec::new();
8681
let mut ns_buf = Vec::new();
8782
loop {
88-
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
83+
match r.read_namespaced_event(&mut ns_buf) {
8984
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
9085
Ok((_, Event::Eof)) => break,
9186
_ => (),
9287
}
93-
buf.clear();
9488
}
9589
assert_eq!(
9690
count, 1550,
@@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {
10195

10296
group.bench_function("trim_text = true", |b| {
10397
b.iter(|| {
104-
let mut r = Reader::from_reader(SAMPLE);
98+
let mut r = Reader::from_bytes(SAMPLE);
10599
r.check_end_names(false)
106100
.check_comments(false)
107101
.trim_text(true);
108102
let mut count = criterion::black_box(0);
109-
let mut buf = Vec::new();
110103
let mut ns_buf = Vec::new();
111104
loop {
112-
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
105+
match r.read_namespaced_event(&mut ns_buf) {
113106
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
114107
Ok((_, Event::Eof)) => break,
115108
_ => (),
116109
}
117-
buf.clear();
118110
}
119111
assert_eq!(
120112
count, 1550,
@@ -130,78 +122,66 @@ fn one_event(c: &mut Criterion) {
130122
let mut group = c.benchmark_group("One event");
131123
group.bench_function("StartText", |b| {
132124
let src = "Hello world!".repeat(512 / 12).into_bytes();
133-
let mut buf = Vec::with_capacity(1024);
134125
b.iter(|| {
135-
let mut r = Reader::from_reader(src.as_ref());
126+
let mut r = Reader::from_bytes(src.as_ref());
136127
let mut nbtxt = criterion::black_box(0);
137128
r.check_end_names(false).check_comments(false);
138-
match r.read_event_into(&mut buf) {
129+
match r.read_event() {
139130
Ok(Event::StartText(e)) => nbtxt += e.len(),
140131
something_else => panic!("Did not expect {:?}", something_else),
141132
};
142133

143-
buf.clear();
144-
145134
assert_eq!(nbtxt, 504);
146135
})
147136
});
148137

149138
group.bench_function("Start", |b| {
150139
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
151-
let mut buf = Vec::with_capacity(1024);
152140
b.iter(|| {
153-
let mut r = Reader::from_reader(src.as_ref());
141+
let mut r = Reader::from_bytes(src.as_ref());
154142
let mut nbtxt = criterion::black_box(0);
155143
r.check_end_names(false)
156144
.check_comments(false)
157145
.trim_text(true);
158-
match r.read_event_into(&mut buf) {
146+
match r.read_event() {
159147
Ok(Event::Start(ref e)) => nbtxt += e.len(),
160148
something_else => panic!("Did not expect {:?}", something_else),
161149
};
162150

163-
buf.clear();
164-
165151
assert_eq!(nbtxt, 525);
166152
})
167153
});
168154

169155
group.bench_function("Comment", |b| {
170156
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
171-
let mut buf = Vec::with_capacity(1024);
172157
b.iter(|| {
173-
let mut r = Reader::from_reader(src.as_ref());
158+
let mut r = Reader::from_bytes(src.as_ref());
174159
let mut nbtxt = criterion::black_box(0);
175160
r.check_end_names(false)
176161
.check_comments(false)
177162
.trim_text(true);
178-
match r.read_event_into(&mut buf) {
163+
match r.read_event() {
179164
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
180165
something_else => panic!("Did not expect {:?}", something_else),
181166
};
182167

183-
buf.clear();
184-
185168
assert_eq!(nbtxt, 520);
186169
})
187170
});
188171

189172
group.bench_function("CData", |b| {
190173
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
191-
let mut buf = Vec::with_capacity(1024);
192174
b.iter(|| {
193-
let mut r = Reader::from_reader(src.as_ref());
175+
let mut r = Reader::from_bytes(src.as_ref());
194176
let mut nbtxt = criterion::black_box(0);
195177
r.check_end_names(false)
196178
.check_comments(false)
197179
.trim_text(true);
198-
match r.read_event_into(&mut buf) {
180+
match r.read_event() {
199181
Ok(Event::CData(ref e)) => nbtxt += e.len(),
200182
something_else => panic!("Did not expect {:?}", something_else),
201183
};
202184

203-
buf.clear();
204-
205185
assert_eq!(nbtxt, 518);
206186
})
207187
});
@@ -213,12 +193,11 @@ fn attributes(c: &mut Criterion) {
213193
let mut group = c.benchmark_group("attributes");
214194
group.bench_function("with_checks = true", |b| {
215195
b.iter(|| {
216-
let mut r = Reader::from_reader(PLAYERS);
196+
let mut r = Reader::from_bytes(PLAYERS);
217197
r.check_end_names(false).check_comments(false);
218198
let mut count = criterion::black_box(0);
219-
let mut buf = Vec::new();
220199
loop {
221-
match r.read_event_into(&mut buf) {
200+
match r.read_event() {
222201
Ok(Event::Empty(e)) => {
223202
for attr in e.attributes() {
224203
let _attr = attr.unwrap();
@@ -228,20 +207,18 @@ fn attributes(c: &mut Criterion) {
228207
Ok(Event::Eof) => break,
229208
_ => (),
230209
}
231-
buf.clear();
232210
}
233211
assert_eq!(count, 1041);
234212
})
235213
});
236214

237215
group.bench_function("with_checks = false", |b| {
238216
b.iter(|| {
239-
let mut r = Reader::from_reader(PLAYERS);
217+
let mut r = Reader::from_bytes(PLAYERS);
240218
r.check_end_names(false).check_comments(false);
241219
let mut count = criterion::black_box(0);
242-
let mut buf = Vec::new();
243220
loop {
244-
match r.read_event_into(&mut buf) {
221+
match r.read_event() {
245222
Ok(Event::Empty(e)) => {
246223
for attr in e.attributes().with_checks(false) {
247224
let _attr = attr.unwrap();
@@ -251,20 +228,18 @@ fn attributes(c: &mut Criterion) {
251228
Ok(Event::Eof) => break,
252229
_ => (),
253230
}
254-
buf.clear();
255231
}
256232
assert_eq!(count, 1041);
257233
})
258234
});
259235

260236
group.bench_function("try_get_attribute", |b| {
261237
b.iter(|| {
262-
let mut r = Reader::from_reader(PLAYERS);
238+
let mut r = Reader::from_bytes(PLAYERS);
263239
r.check_end_names(false).check_comments(false);
264240
let mut count = criterion::black_box(0);
265-
let mut buf = Vec::new();
266241
loop {
267-
match r.read_event_into(&mut buf) {
242+
match r.read_event() {
268243
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
269244
for name in ["num", "status", "avg"] {
270245
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
@@ -279,7 +254,6 @@ fn attributes(c: &mut Criterion) {
279254
Ok(Event::Eof) => break,
280255
_ => (),
281256
}
282-
buf.clear();
283257
}
284258
assert_eq!(count, 150);
285259
})

examples/custom_entities.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
2727
let mut reader = Reader::from_str(DATA);
2828
reader.trim_text(true);
2929

30-
let mut buf = Vec::new();
3130
let mut custom_entities: HashMap<String, String> = HashMap::new();
3231
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
3332

3433
loop {
35-
match reader.read_event_into(&mut buf) {
34+
match reader.read_event() {
3635
Ok(Event::DocType(ref e)) => {
3736
for cap in entity_re.captures_iter(&e) {
3837
custom_entities.insert(

examples/read_texts.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@ fn main() {
1010
reader.trim_text(true);
1111

1212
let mut txt = Vec::new();
13-
let mut buf = Vec::new();
1413

1514
loop {
16-
match reader.read_event_into(&mut buf) {
15+
match reader.read_event() {
1716
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
1817
txt.push(
1918
reader
20-
.read_text_into(QName(b"tag2"), &mut Vec::new())
19+
.read_text(QName(b"tag2"))
2120
.expect("Cannot decode text value"),
2221
);
2322
println!("{:?}", txt);
@@ -26,6 +25,5 @@ fn main() {
2625
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
2726
_ => (), // There are several other `Event`s we do not consider here
2827
}
29-
buf.clear();
3028
}
3129
}

src/lib.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@
3535
//! </tag2>
3636
//! </tag1>"#;
3737
//!
38-
//! let mut reader = Reader::from_str(xml);
38+
//! let mut reader = Reader::from_reader(xml.as_bytes());
39+
//! // If you want to read from a string or byte slice without buffering, use:
40+
//! // let mut reader = Reader::from_str(xml);
41+
//! // In that case, `Vec` is *not* needed for buffering below and you should use
42+
//! // `read_event` instead of `read_event_into`.
3943
//! reader.trim_text(true);
4044
//!
4145
//! let mut count = 0;
@@ -84,9 +88,8 @@
8488
//! let mut reader = Reader::from_str(xml);
8589
//! reader.trim_text(true);
8690
//! let mut writer = Writer::new(Cursor::new(Vec::new()));
87-
//! let mut buf = Vec::new();
8891
//! loop {
89-
//! match reader.read_event_into(&mut buf) {
92+
//! match reader.read_event() {
9093
//! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
9194
//!
9295
//! // crates a new element ... alternatively we could reuse `e` by calling
@@ -111,7 +114,6 @@
111114
//! // Ok(e) => assert!(writer.write(&buf).is_ok()),
112115
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
113116
//! }
114-
//! buf.clear();
115117
//! }
116118
//!
117119
//! let result = writer.into_inner().into_inner();

src/reader.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,8 @@ pub trait InnerReader: Deref<Target = Self::Reader> + DerefMut {
138138
/// reader.trim_text(true);
139139
/// let mut count = 0;
140140
/// let mut txt = Vec::new();
141-
/// let mut buf = Vec::new();
142141
/// loop {
143-
/// match reader.read_event_into(&mut buf) {
142+
/// match reader.read_event() {
144143
/// Ok(Event::Start(ref e)) => {
145144
/// match e.name().as_ref() {
146145
/// b"tag1" => println!("attributes values: {:?}",
@@ -155,7 +154,6 @@ pub trait InnerReader: Deref<Target = Self::Reader> + DerefMut {
155154
/// Ok(Event::Eof) => break,
156155
/// _ => (),
157156
/// }
158-
/// buf.clear();
159157
/// }
160158
/// ```
161159
#[derive(Clone)]
@@ -1892,13 +1890,12 @@ mod test {
18921890
#[test]
18931891
fn str_always_has_utf8() {
18941892
let mut reader = crate::Reader::from_str("<?xml encoding='UTF-16'?>");
1895-
let mut buf = Vec::new();
18961893

18971894
assert_eq!(reader.decoder().encoding(), UTF_8);
1898-
reader.read_event_into(&mut buf).unwrap();
1895+
reader.read_event().unwrap();
18991896
assert_eq!(reader.decoder().encoding(), UTF_8);
19001897

1901-
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
1898+
assert_eq!(reader.read_event().unwrap(), Event::Eof);
19021899
}
19031900
}
19041901
}

0 commit comments

Comments
 (0)