Skip to content

Commit b456b5a

Browse files
authored
Merge pull request #426 from Mingun/read-event
Use borrowing `read_event` instead of buffering `read_event_into` where possible
2 parents 068b36e + c49c349 commit b456b5a

File tree

7 files changed

+97
-227
lines changed

7 files changed

+97
-227
lines changed

README.md

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,8 @@ use quick_xml::events::Event;
2525

2626
let xml = r#"<tag1 att1 = "test">
2727
<tag2><!--Test comment-->Test</tag2>
28-
<tag2>
29-
Test 2
30-
</tag2>
31-
</tag1>"#;
32-
28+
<tag2>Test 2</tag2>
29+
</tag1>"#;
3330
let mut reader = Reader::from_str(xml);
3431
reader.trim_text(true);
3532

@@ -43,20 +40,24 @@ loop {
4340
// when the input is a &str or a &[u8], we don't actually need to use another
4441
// buffer, we could directly call `reader.read_event()`
4542
match reader.read_event_into(&mut buf) {
46-
Ok(Event::Start(ref e)) => {
47-
match e.name() {
43+
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
44+
// exits the loop when reaching end of file
45+
Ok(Event::Eof) => break,
46+
47+
Ok(Event::Start(e)) => {
48+
match e.name().as_ref() {
4849
b"tag1" => println!("attributes values: {:?}",
49-
e.attributes().map(|a| a.unwrap().value).collect::<Vec<_>>()),
50+
e.attributes().map(|a| a.unwrap().value)
51+
.collect::<Vec<_>>()),
5052
b"tag2" => count += 1,
5153
_ => (),
5254
}
53-
},
54-
Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap().into_owned()),
55-
Ok(Event::Eof) => break, // exits the loop when reaching end of file
56-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
57-
_ => (), // There are several other `Event`s we do not consider here
58-
}
55+
}
56+
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
5957

58+
// There are several other `Event`s we do not consider here
59+
_ => (),
60+
}
6061
// if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
6162
buf.clear();
6263
}
@@ -65,24 +66,21 @@ loop {
6566
### Writer
6667

6768
```rust
68-
use quick_xml::Writer;
69-
use quick_xml::Reader;
7069
use quick_xml::events::{Event, BytesEnd, BytesStart};
70+
use quick_xml::{Reader, Writer};
7171
use std::io::Cursor;
72-
use std::iter;
7372

7473
let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
7574
let mut reader = Reader::from_str(xml);
7675
reader.trim_text(true);
7776
let mut writer = Writer::new(Cursor::new(Vec::new()));
78-
let mut buf = Vec::new();
7977
loop {
80-
match reader.read_event_into(&mut buf) {
81-
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
78+
match reader.read_event() {
79+
Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
8280

8381
// crates a new element ... alternatively we could reuse `e` by calling
8482
// `e.into_owned()`
85-
let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
83+
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
8684

8785
// collect existing attributes
8886
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
@@ -93,15 +91,14 @@ loop {
9391
// writes the event to the writer
9492
assert!(writer.write_event(Event::Start(elem)).is_ok());
9593
},
96-
Ok(Event::End(ref e)) if e.name() == b"this_tag" => {
94+
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
9795
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
9896
},
9997
Ok(Event::Eof) => break,
100-
// you can use either `e` or `&e` if you don't want to move the event
101-
Ok(e) => assert!(writer.write_event(&e).is_ok()),
98+
// we can either move or borrow the event to write, depending on your use-case
99+
Ok(e) => assert!(writer.write_event(e).is_ok()),
102100
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
103101
}
104-
buf.clear();
105102
}
106103

107104
let result = writer.into_inner().into_inner();

examples/custom_entities.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
2727
let mut reader = Reader::from_str(DATA);
2828
reader.trim_text(true);
2929

30-
let mut buf = Vec::new();
3130
let mut custom_entities: HashMap<String, String> = HashMap::new();
3231
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
3332

3433
loop {
35-
match reader.read_event_into(&mut buf) {
34+
match reader.read_event() {
3635
Ok(Event::DocType(ref e)) => {
3736
for cap in entity_re.captures_iter(&e) {
3837
custom_entities.insert(

src/lib.rs

Lines changed: 5 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
//! High performance XML reader/writer.
22
//!
3-
//! ## Description
3+
//! # Description
44
//!
55
//! quick-xml contains two modes of operation:
66
//!
77
//! A streaming API based on the [StAX] model. This is suited for larger XML documents which
88
//! cannot completely read into memory at once.
99
//!
10-
//! The user has to expicitely _ask_ for the next XML event, similar
10+
//! The user has to explicitly _ask_ for the next XML event, similar
1111
//! to a database cursor.
1212
//! This is achieved by the following two structs:
1313
//!
@@ -20,104 +20,10 @@
2020
//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from
2121
//! structs, without having to deal with the XML events.
2222
//!
23-
//! ## Examples
23+
//! # Examples
2424
//!
25-
//! ### Reader
26-
//!
27-
//! ```rust
28-
//! use quick_xml::Reader;
29-
//! use quick_xml::events::Event;
30-
//!
31-
//! let xml = r#"<tag1 att1 = "test">
32-
//! <tag2><!--Test comment-->Test</tag2>
33-
//! <tag2>
34-
//! Test 2
35-
//! </tag2>
36-
//! </tag1>"#;
37-
//!
38-
//! let mut reader = Reader::from_str(xml);
39-
//! reader.trim_text(true);
40-
//!
41-
//! let mut count = 0;
42-
//! let mut txt = Vec::new();
43-
//! let mut buf = Vec::new();
44-
//!
45-
//! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
46-
//! loop {
47-
//! match reader.read_event_into(&mut buf) {
48-
//! // for triggering namespaced events, use this instead:
49-
//! // match reader.read_namespaced_event(&mut buf) {
50-
//! Ok(Event::Start(ref e)) => {
51-
//! // for namespaced:
52-
//! // Ok((ref namespace_value, Event::Start(ref e)))
53-
//! match e.name().as_ref() {
54-
//! b"tag1" => println!("attributes values: {:?}",
55-
//! e.attributes().map(|a| a.unwrap().value)
56-
//! .collect::<Vec<_>>()),
57-
//! b"tag2" => count += 1,
58-
//! _ => (),
59-
//! }
60-
//! },
61-
//! // unescape and decode the text event using the reader encoding
62-
//! Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
63-
//! Ok(Event::Eof) => break, // exits the loop when reaching end of file
64-
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
65-
//! _ => (), // There are several other `Event`s we do not consider here
66-
//! }
67-
//!
68-
//! // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
69-
//! buf.clear();
70-
//! }
71-
//! ```
72-
//!
73-
//! ### Writer
74-
//!
75-
//! ```rust
76-
//! # use pretty_assertions::assert_eq;
77-
//! use quick_xml::Writer;
78-
//! use quick_xml::events::{Event, BytesEnd, BytesStart};
79-
//! use quick_xml::Reader;
80-
//! use std::io::Cursor;
81-
//! use std::iter;
82-
//!
83-
//! let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
84-
//! let mut reader = Reader::from_str(xml);
85-
//! reader.trim_text(true);
86-
//! let mut writer = Writer::new(Cursor::new(Vec::new()));
87-
//! let mut buf = Vec::new();
88-
//! loop {
89-
//! match reader.read_event_into(&mut buf) {
90-
//! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
91-
//!
92-
//! // crates a new element ... alternatively we could reuse `e` by calling
93-
//! // `e.into_owned()`
94-
//! let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
95-
//!
96-
//! // collect existing attributes
97-
//! elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
98-
//!
99-
//! // copy existing attributes, adds a new my-key="some value" attribute
100-
//! elem.push_attribute(("my-key", "some value"));
101-
//!
102-
//! // writes the event to the writer
103-
//! assert!(writer.write_event(Event::Start(elem)).is_ok());
104-
//! },
105-
//! Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
106-
//! assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
107-
//! },
108-
//! Ok(Event::Eof) => break,
109-
//! Ok(e) => assert!(writer.write_event(e).is_ok()),
110-
//! // or using the buffer
111-
//! // Ok(e) => assert!(writer.write(&buf).is_ok()),
112-
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
113-
//! }
114-
//! buf.clear();
115-
//! }
116-
//!
117-
//! let result = writer.into_inner().into_inner();
118-
//! let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
119-
//! assert_eq!(result, expected.as_bytes());
120-
//! ```
25+
//! - For a reading example see [`Reader`]
26+
//! - For a writing example see [`Writer`]
12127
//!
12228
//! # Features
12329
//!

src/reader.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,28 +118,39 @@ impl EncodingRef {
118118
/// let xml = r#"<tag1 att1 = "test">
119119
/// <tag2><!--Test comment-->Test</tag2>
120120
/// <tag2>Test 2</tag2>
121-
/// </tag1>"#;
121+
/// </tag1>"#;
122122
/// let mut reader = Reader::from_str(xml);
123123
/// reader.trim_text(true);
124+
///
124125
/// let mut count = 0;
125126
/// let mut txt = Vec::new();
126127
/// let mut buf = Vec::new();
128+
///
129+
/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
127130
/// loop {
131+
/// // NOTE: this is the generic case when we don't know about the input BufRead.
132+
/// // when the input is a &str or a &[u8], we don't actually need to use another
133+
/// // buffer, we could directly call `reader.read_event()`
128134
/// match reader.read_event_into(&mut buf) {
129-
/// Ok(Event::Start(ref e)) => {
135+
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
136+
/// // exits the loop when reaching end of file
137+
/// Ok(Event::Eof) => break,
138+
///
139+
/// Ok(Event::Start(e)) => {
130140
/// match e.name().as_ref() {
131141
/// b"tag1" => println!("attributes values: {:?}",
132142
/// e.attributes().map(|a| a.unwrap().value)
133143
/// .collect::<Vec<_>>()),
134144
/// b"tag2" => count += 1,
135145
/// _ => (),
136146
/// }
137-
/// },
147+
/// }
138148
/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
139-
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
140-
/// Ok(Event::Eof) => break,
149+
///
150+
/// // There are several other `Event`s we do not consider here
141151
/// _ => (),
142152
/// }
153+
/// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
143154
/// buf.clear();
144155
/// }
145156
/// ```

src/writer.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,23 @@ use std::io::Write;
1010
///
1111
/// # Examples
1212
///
13-
/// ```rust
13+
/// ```
1414
/// # use pretty_assertions::assert_eq;
15-
/// use quick_xml::{Reader, Writer};
1615
/// use quick_xml::events::{Event, BytesEnd, BytesStart};
16+
/// use quick_xml::{Reader, Writer};
1717
/// use std::io::Cursor;
1818
///
1919
/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
2020
/// let mut reader = Reader::from_str(xml);
2121
/// reader.trim_text(true);
2222
/// let mut writer = Writer::new(Cursor::new(Vec::new()));
23-
/// let mut buf = Vec::new();
2423
/// loop {
25-
/// match reader.read_event_into(&mut buf) {
26-
/// Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
24+
/// match reader.read_event() {
25+
/// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
2726
///
2827
/// // crates a new element ... alternatively we could reuse `e` by calling
2928
/// // `e.into_owned()`
30-
/// let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
29+
/// let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
3130
///
3231
/// // collect existing attributes
3332
/// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
@@ -38,15 +37,14 @@ use std::io::Write;
3837
/// // writes the event to the writer
3938
/// assert!(writer.write_event(Event::Start(elem)).is_ok());
4039
/// },
41-
/// Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
40+
/// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
4241
/// assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
4342
/// },
4443
/// Ok(Event::Eof) => break,
4544
/// // we can either move or borrow the event to write, depending on your use-case
46-
/// Ok(e) => assert!(writer.write_event(&e).is_ok()),
47-
/// Err(e) => panic!("{}", e),
45+
/// Ok(e) => assert!(writer.write_event(e).is_ok()),
46+
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
4847
/// }
49-
/// buf.clear();
5048
/// }
5149
///
5250
/// let result = writer.into_inner().into_inner();

0 commit comments

Comments
 (0)