Skip to content

Commit e5d300e

Browse files
committed
Introduce an AsyncReader instead of overloading the Reader
1 parent 75bf989 commit e5d300e

20 files changed

+2715
-718
lines changed

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ travis-ci = { repository = "tafia/quick-xml" }
1818
[dependencies]
1919
async-recursion = { version = "0.3.2", optional = true }
2020
encoding_rs = { version = "0.8.26", optional = true }
21-
tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true }
21+
tokio = { version = "1.4.0", features = ["fs", "io-util"], optional = true }
2222
serde = { version = "1.0", optional = true }
2323
memchr = "2.3.4"
2424

2525
[dev-dependencies]
2626
serde = { version = "1.0", features = ["derive"] }
2727
serde-value = "0.7"
2828
regex = "1"
29-
tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] }
29+
tokio = { version = "1.4.0", features = ["macros", "rt-multi-thread"] }
3030

3131
[lib]
3232
bench = false

examples/custom_entities.rs

+62-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ extern crate quick_xml;
1111
extern crate regex;
1212

1313
use quick_xml::events::Event;
14+
#[cfg(feature = "asynchronous")]
15+
use quick_xml::AsyncReader;
1416
use quick_xml::Reader;
1517
use regex::bytes::Regex;
1618
use std::collections::HashMap;
@@ -27,22 +29,15 @@ const DATA: &str = r#"
2729
2830
"#;
2931

30-
fn main() -> Result<(), Box<dyn std::error::Error>> {
31-
let mut reader = Reader::from_str(DATA);
32+
fn custom_entities(data: &str) -> Result<(), Box<dyn std::error::Error>> {
33+
let mut reader = Reader::from_str(data);
3234
reader.trim_text(true);
3335

3436
let mut buf = Vec::new();
3537
let mut custom_entities = HashMap::new();
3638
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
3739

38-
#[cfg(feature = "asynchronous")]
39-
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");
40-
4140
loop {
42-
#[cfg(feature = "asynchronous")]
43-
let event = runtime.block_on(async { reader.read_event(&mut buf).await });
44-
45-
#[cfg(not(feature = "asynchronous"))]
4641
let event = reader.read_event(&mut buf);
4742

4843
match event {
@@ -80,3 +75,61 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
8075
}
8176
Ok(())
8277
}
78+
79+
#[cfg(feature = "asynchronous")]
80+
async fn custom_entities_async(data: &str) -> Result<(), Box<dyn std::error::Error>> {
81+
let mut reader = AsyncReader::from_str(data);
82+
reader.trim_text(true);
83+
84+
let mut buf = Vec::new();
85+
let mut custom_entities = HashMap::new();
86+
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
87+
88+
loop {
89+
match reader.read_event(&mut buf).await {
90+
Ok(Event::DocType(ref e)) => {
91+
for cap in entity_re.captures_iter(&e) {
92+
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
93+
}
94+
}
95+
Ok(Event::Start(ref e)) => match e.name() {
96+
b"test" => println!(
97+
"attributes values: {:?}",
98+
e.attributes()
99+
.map(|a| a
100+
.unwrap()
101+
.unescape_and_decode_value_with_custom_entities(
102+
&reader,
103+
&custom_entities
104+
)
105+
.unwrap())
106+
.collect::<Vec<_>>()
107+
),
108+
_ => (),
109+
},
110+
Ok(Event::Text(ref e)) => {
111+
println!(
112+
"text value: {}",
113+
e.unescape_and_decode_with_custom_entities(&reader, &custom_entities)
114+
.unwrap()
115+
);
116+
}
117+
Ok(Event::Eof) => break,
118+
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
119+
_ => (),
120+
}
121+
}
122+
Ok(())
123+
}
124+
125+
fn main() -> Result<(), Box<dyn std::error::Error>> {
126+
custom_entities(DATA)?;
127+
128+
#[cfg(feature = "asynchronous")]
129+
let runtime = Runtime::new().expect("Runtime cannot be initialized");
130+
131+
#[cfg(feature = "asynchronous")]
132+
runtime.block_on(async { custom_entities_async(DATA).await })?;
133+
134+
Ok(())
135+
}

examples/issue68.rs

+71-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#![allow(unused)]
22

33
use quick_xml::events::Event;
4+
#[cfg(feature = "asynchronous")]
5+
use quick_xml::AsyncReader;
46
use quick_xml::Reader;
57
use std::io::Read;
68
#[cfg(feature = "asynchronous")]
@@ -55,44 +57,87 @@ impl Response {
5557
}
5658
}
5759

58-
fn parse_report(xml_data: &str) -> Vec<Resource> {
60+
#[derive(Clone, Copy)]
61+
enum State {
62+
Root,
63+
MultiStatus,
64+
Response,
65+
Success,
66+
Error,
67+
}
68+
69+
#[cfg(feature = "asynchronous")]
70+
async fn parse_report_async(xml_data: &str) -> Vec<Resource> {
5971
let result = Vec::<Resource>::new();
6072

61-
let mut reader = Reader::from_str(xml_data);
73+
let mut reader = AsyncReader::from_str(xml_data);
6274
reader.trim_text(true);
6375

6476
let mut count = 0;
6577
let mut buf = Vec::new();
6678
let mut ns_buffer = Vec::new();
6779

68-
#[derive(Clone, Copy)]
69-
enum State {
70-
Root,
71-
MultiStatus,
72-
Response,
73-
Success,
74-
Error,
75-
};
76-
7780
let mut responses = Vec::<Response>::new();
7881
let mut current_response = Response::new();
7982
let mut current_prop = Prop::new();
8083

8184
let mut depth = 0;
8285
let mut state = State::MultiStatus;
8386

84-
#[cfg(feature = "asynchronous")]
85-
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");
86-
8787
loop {
88-
#[cfg(feature = "asynchronous")]
89-
let event = runtime
90-
.block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await });
88+
match reader.read_namespaced_event(&mut buf, &mut ns_buffer).await {
89+
Ok((namespace_value, Event::Start(e))) => {
90+
let namespace_value = namespace_value.unwrap_or_default();
91+
match (depth, state, namespace_value, e.local_name()) {
92+
(0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus,
93+
(1, State::MultiStatus, b"DAV:", b"response") => {
94+
state = State::Response;
95+
current_response = Response::new();
96+
}
97+
(2, State::Response, b"DAV:", b"href") => {
98+
current_response.href = e.unescape_and_decode(&reader).unwrap();
99+
}
100+
_ => {}
101+
}
102+
depth += 1;
103+
}
104+
Ok((namespace_value, Event::End(e))) => {
105+
let namespace_value = namespace_value.unwrap_or_default();
106+
let local_name = e.local_name();
107+
match (depth, state, &*namespace_value, local_name) {
108+
(1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root,
109+
(2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus,
110+
_ => {}
111+
}
112+
depth -= 1;
113+
}
114+
Ok((_, Event::Eof)) => break,
115+
Err(e) => break,
116+
_ => (),
117+
}
118+
}
119+
result
120+
}
121+
122+
fn parse_report(xml_data: &str) -> Vec<Resource> {
123+
let result = Vec::<Resource>::new();
91124

92-
#[cfg(not(feature = "asynchronous"))]
93-
let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer);
125+
let mut reader = Reader::from_str(xml_data);
126+
reader.trim_text(true);
94127

95-
match event {
128+
let mut count = 0;
129+
let mut buf = Vec::new();
130+
let mut ns_buffer = Vec::new();
131+
132+
let mut responses = Vec::<Response>::new();
133+
let mut current_response = Response::new();
134+
let mut current_prop = Prop::new();
135+
136+
let mut depth = 0;
137+
let mut state = State::MultiStatus;
138+
139+
loop {
140+
match reader.read_namespaced_event(&mut buf, &mut ns_buffer) {
96141
Ok((namespace_value, Event::Start(e))) => {
97142
let namespace_value = namespace_value.unwrap_or_default();
98143
match (depth, state, namespace_value, e.local_name()) {
@@ -148,4 +193,10 @@ fn main() {
148193
"#;
149194

150195
parse_report(test_data);
196+
197+
#[cfg(feature = "asynchronous")]
198+
let runtime = Runtime::new().expect("Runtime cannot be initialized");
199+
200+
#[cfg(feature = "asynchronous")]
201+
runtime.block_on(async { parse_report_async(test_data).await });
151202
}

examples/nested_readers.rs

+86-25
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use quick_xml::events::Event;
2+
#[cfg(feature = "asynchronous")]
3+
use quick_xml::AsyncReader;
24
use quick_xml::Reader;
35
#[cfg(feature = "asynchronous")]
46
use tokio::runtime::Runtime;
@@ -10,34 +12,18 @@ struct TableStat {
1012
index: u8,
1113
rows: Vec<Vec<String>>,
1214
}
13-
// demonstrate how to nest readers
14-
// This is useful for when you need to traverse
15-
// a few levels of a document to extract things.
16-
fn main() -> Result<(), quick_xml::Error> {
15+
16+
fn nest_readers() -> Result<(), quick_xml::Error> {
1717
let mut buf = Vec::new();
1818
// buffer for nested reader
1919
let mut skip_buf = Vec::new();
2020
let mut count = 0;
2121

22-
#[cfg(feature = "asynchronous")]
23-
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");
24-
25-
#[cfg(feature = "asynchronous")]
26-
let mut reader =
27-
runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?;
28-
29-
#[cfg(not(feature = "asynchronous"))]
3022
let mut reader = Reader::from_file("tests/documents/document.xml")?;
3123

3224
let mut found_tables = Vec::new();
3325
loop {
34-
#[cfg(feature = "asynchronous")]
35-
let event = runtime.block_on(async { reader.read_event(&mut buf).await })?;
36-
37-
#[cfg(not(feature = "asynchronous"))]
38-
let event = reader.read_event(&mut buf)?;
39-
40-
match event {
26+
match reader.read_event(&mut buf)? {
4127
Event::Start(element) => match element.name() {
4228
b"w:tbl" => {
4329
count += 1;
@@ -51,14 +37,74 @@ fn main() -> Result<(), quick_xml::Error> {
5137
loop {
5238
skip_buf.clear();
5339

54-
#[cfg(feature = "asynchronous")]
55-
let event =
56-
runtime.block_on(async { reader.read_event(&mut skip_buf).await })?;
40+
match reader.read_event(&mut skip_buf)? {
41+
Event::Start(element) => match element.name() {
42+
b"w:tr" => {
43+
stats.rows.push(vec![]);
44+
row_index = stats.rows.len() - 1;
45+
}
46+
b"w:tc" => {
47+
stats.rows[row_index]
48+
.push(String::from_utf8(element.name().to_vec()).unwrap());
49+
}
50+
_ => {}
51+
},
52+
Event::End(element) => {
53+
if element.name() == b"w:tbl" {
54+
found_tables.push(stats);
55+
break;
56+
}
57+
}
58+
_ => {}
59+
}
60+
}
61+
}
62+
_ => {}
63+
},
64+
Event::Eof => break,
65+
_ => {}
66+
}
67+
buf.clear();
68+
}
69+
assert_eq!(found_tables.len(), 2);
70+
// pretty print the table
71+
println!("{:#?}", found_tables);
72+
assert_eq!(found_tables[0].rows.len(), 2);
73+
assert_eq!(found_tables[0].rows[0].len(), 4);
74+
assert_eq!(found_tables[0].rows[1].len(), 4);
5775

58-
#[cfg(not(feature = "asynchronous"))]
59-
let event = reader.read_event(&mut skip_buf)?;
76+
assert_eq!(found_tables[1].rows.len(), 2);
77+
assert_eq!(found_tables[1].rows[0].len(), 4);
78+
assert_eq!(found_tables[1].rows[1].len(), 4);
79+
Ok(())
80+
}
81+
82+
#[cfg(feature = "asynchronous")]
83+
async fn nest_readers_async() -> Result<(), quick_xml::Error> {
84+
let mut buf = Vec::new();
85+
// buffer for nested reader
86+
let mut skip_buf = Vec::new();
87+
let mut count = 0;
88+
89+
let mut reader = AsyncReader::from_file("tests/documents/document.xml").await?;
90+
91+
let mut found_tables = Vec::new();
92+
loop {
93+
match reader.read_event(&mut buf).await? {
94+
Event::Start(element) => match element.name() {
95+
b"w:tbl" => {
96+
count += 1;
97+
let mut stats = TableStat {
98+
index: count,
99+
rows: vec![],
100+
};
101+
// must define stateful variables
102+
// outside the nested loop else they are overwritten
103+
let mut row_index = 0;
104+
loop {
105+
skip_buf.clear();
60106

61-
match event {
107+
match reader.read_event(&mut skip_buf).await? {
62108
Event::Start(element) => match element.name() {
63109
b"w:tr" => {
64110
stats.rows.push(vec![]);
@@ -99,3 +145,18 @@ fn main() -> Result<(), quick_xml::Error> {
99145
assert_eq!(found_tables[1].rows[1].len(), 4);
100146
Ok(())
101147
}
148+
149+
// demonstrate how to nest readers
150+
// This is useful for when you need to traverse
151+
// a few levels of a document to extract things.
152+
fn main() -> Result<(), quick_xml::Error> {
153+
#[cfg(feature = "asynchronous")]
154+
let runtime = Runtime::new().expect("Runtime cannot be initialized");
155+
156+
#[cfg(feature = "asynchronous")]
157+
runtime.block_on(async { nest_readers_async().await })?;
158+
159+
nest_readers()?;
160+
161+
Ok(())
162+
}

0 commit comments

Comments
 (0)