Skip to content

Commit e738b68

Browse files
authored
Merge pull request #428 from Mingun/encoding
Change all event and Attributes constructors to accept strings
2 parents e7f30d2 + 5467b6c commit e738b68

20 files changed

+405
-437
lines changed

Changelog.md

+17-7
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,12 @@
129129

130130
- [#415]: Changed custom entity unescaping API to accept closures rather than a mapping of entity to
131131
replacement text. This avoids needing to allocate a map and provides the user with more flexibility.
132-
- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*`
133-
to better communicate their function. Renamed functions following the pattern `*_with_custom_entities`
134-
to `decode_and_unescape_with` to be more consistent across the API.
135-
- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to
136-
`BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`,
137-
`Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()`
138-
renamed to `Attribute::escape_value_with()` for consistency across the API.
132+
- [#415]: Renamed functions for consistency across the API:
133+
|Old Name |New Name
134+
|------------------------|-------------------------------------------
135+
|`*_with_custom_entities`|`*_with`
136+
|`BytesText::unescaped()`|`BytesText::unescape()`
137+
|`Attribute::unescaped_*`|`Attribute::unescape_*`
139138

140139
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
141140
added to all events
@@ -150,6 +149,16 @@
150149
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
151150
but since now escaping works on strings. Use `BytesText::from_plain_str` instead
152151

152+
- [#428]: Removed `BytesText::escaped()`. Use `.as_ref()` provided by `Deref` impl instead.
153+
- [#428]: Removed `BytesText::from_escaped()`. Use constructors from strings instead,
154+
because writer anyway works in UTF-8 only
155+
- [#428]: Removed `BytesCData::new()`. Use constructors from strings instead,
156+
because writer anyway works in UTF-8 only
157+
- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices.
158+
Handmade events has always been assumed to store their content UTF-8 encoded.
159+
- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for
160+
`BytesText` (remember, that those functions was renamed in #415).
161+
153162
### New Tests
154163

155164
- [#9]: Added tests for incorrect nested tags in input
@@ -180,6 +189,7 @@
180189
[#418]: https://github.com/tafia/quick-xml/pull/418
181190
[#421]: https://github.com/tafia/quick-xml/pull/421
182191
[#423]: https://github.com/tafia/quick-xml/pull/423
192+
[#428]: https://github.com/tafia/quick-xml/pull/428
183193
[#434]: https://github.com/tafia/quick-xml/pull/434
184194
[#437]: https://github.com/tafia/quick-xml/pull/437
185195

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ loop {
5353
_ => (),
5454
}
5555
}
56-
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
56+
Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
5757

5858
// There are several other `Event`s we do not consider here
5959
_ => (),
@@ -80,7 +80,7 @@ loop {
8080

8181
// crates a new element ... alternatively we could reuse `e` by calling
8282
// `e.into_owned()`
83-
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
83+
let mut elem = BytesStart::owned_name("my_elem");
8484

8585
// collect existing attributes
8686
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
@@ -92,7 +92,7 @@ loop {
9292
assert!(writer.write_event(Event::Start(elem)).is_ok());
9393
},
9494
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
95-
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
95+
assert!(writer.write_event(Event::End(BytesEnd::borrowed("my_elem"))).is_ok());
9696
},
9797
Ok(Event::Eof) => break,
9898
// we can either move or borrow the event to write, depending on your use-case

benches/macrobenches.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
2828
}
2929
}
3030
Event::Text(e) => {
31-
criterion::black_box(e.decode_and_unescape(&r)?);
31+
criterion::black_box(e.unescape()?);
3232
}
3333
Event::CData(e) => {
3434
criterion::black_box(e.into_inner());

benches/microbenches.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) {
174174
.check_comments(false)
175175
.trim_text(true);
176176
match r.read_event_into(&mut buf) {
177-
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
177+
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
178178
something_else => panic!("Did not expect {:?}", something_else),
179179
};
180180

examples/custom_entities.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
6060
Ok(Event::Text(ref e)) => {
6161
println!(
6262
"text value: {}",
63-
e.decode_and_unescape_with(&reader, |ent| custom_entities
64-
.get(ent)
65-
.map(|s| s.as_str()))
63+
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
6664
.unwrap()
6765
);
6866
}

src/de/mod.rs

+58-74
Original file line numberDiff line numberDiff line change
@@ -612,16 +612,15 @@ where
612612
unescape: bool,
613613
allow_start: bool,
614614
) -> Result<Cow<'de, str>, DeError> {
615-
let decoder = self.reader.decoder();
616615
match self.next()? {
617-
DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?),
618-
DeEvent::CData(e) => Ok(e.decode(decoder)?),
616+
DeEvent::Text(e) => Ok(e.decode(unescape)?),
617+
DeEvent::CData(e) => Ok(e.decode()?),
619618
DeEvent::Start(e) if allow_start => {
620619
// allow one nested level
621620
let inner = self.next()?;
622621
let t = match inner {
623-
DeEvent::Text(t) => t.decode(decoder, unescape)?,
624-
DeEvent::CData(t) => t.decode(decoder)?,
622+
DeEvent::Text(t) => t.decode(unescape)?,
623+
DeEvent::CData(t) => t.decode()?,
625624
DeEvent::Start(s) => {
626625
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
627626
}
@@ -1042,13 +1041,10 @@ mod tests {
10421041
assert_eq!(de.read, vec![]);
10431042
assert_eq!(de.write, vec![]);
10441043

1045-
assert_eq!(
1046-
de.next().unwrap(),
1047-
Start(BytesStart::borrowed_name(b"root"))
1048-
);
1044+
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
10491045
assert_eq!(
10501046
de.peek().unwrap(),
1051-
&Start(BytesStart::borrowed_name(b"inner"))
1047+
&Start(BytesStart::borrowed_name("inner"))
10521048
);
10531049

10541050
// Should skip first <inner> tree
@@ -1057,11 +1053,11 @@ mod tests {
10571053
assert_eq!(
10581054
de.write,
10591055
vec![
1060-
Start(BytesStart::borrowed_name(b"inner")),
1056+
Start(BytesStart::borrowed_name("inner")),
10611057
Text(BytesText::from_escaped_str("text")),
1062-
Start(BytesStart::borrowed_name(b"inner")),
1063-
End(BytesEnd::borrowed(b"inner")),
1064-
End(BytesEnd::borrowed(b"inner")),
1058+
Start(BytesStart::borrowed_name("inner")),
1059+
End(BytesEnd::borrowed("inner")),
1060+
End(BytesEnd::borrowed("inner")),
10651061
]
10661062
);
10671063

@@ -1073,11 +1069,8 @@ mod tests {
10731069
// </inner>
10741070
// <target/>
10751071
// </root>
1076-
assert_eq!(
1077-
de.next().unwrap(),
1078-
Start(BytesStart::borrowed_name(b"next"))
1079-
);
1080-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next")));
1072+
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("next")));
1073+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("next")));
10811074

10821075
// We finish writing. Next call to `next()` should start replay that messages:
10831076
//
@@ -1094,27 +1087,27 @@ mod tests {
10941087
assert_eq!(
10951088
de.read,
10961089
vec![
1097-
Start(BytesStart::borrowed_name(b"inner")),
1090+
Start(BytesStart::borrowed_name("inner")),
10981091
Text(BytesText::from_escaped_str("text")),
1099-
Start(BytesStart::borrowed_name(b"inner")),
1100-
End(BytesEnd::borrowed(b"inner")),
1101-
End(BytesEnd::borrowed(b"inner")),
1092+
Start(BytesStart::borrowed_name("inner")),
1093+
End(BytesEnd::borrowed("inner")),
1094+
End(BytesEnd::borrowed("inner")),
11021095
]
11031096
);
11041097
assert_eq!(de.write, vec![]);
11051098
assert_eq!(
11061099
de.next().unwrap(),
1107-
Start(BytesStart::borrowed_name(b"inner"))
1100+
Start(BytesStart::borrowed_name("inner"))
11081101
);
11091102

11101103
// Skip `#text` node and consume <inner/> after it
11111104
de.skip().unwrap();
11121105
assert_eq!(
11131106
de.read,
11141107
vec![
1115-
Start(BytesStart::borrowed_name(b"inner")),
1116-
End(BytesEnd::borrowed(b"inner")),
1117-
End(BytesEnd::borrowed(b"inner")),
1108+
Start(BytesStart::borrowed_name("inner")),
1109+
End(BytesEnd::borrowed("inner")),
1110+
End(BytesEnd::borrowed("inner")),
11181111
]
11191112
);
11201113
assert_eq!(
@@ -1128,9 +1121,9 @@ mod tests {
11281121

11291122
assert_eq!(
11301123
de.next().unwrap(),
1131-
Start(BytesStart::borrowed_name(b"inner"))
1124+
Start(BytesStart::borrowed_name("inner"))
11321125
);
1133-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
1126+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
11341127

11351128
// We finish writing. Next call to `next()` should start replay messages:
11361129
//
@@ -1146,21 +1139,21 @@ mod tests {
11461139
de.read,
11471140
vec![
11481141
Text(BytesText::from_escaped_str("text")),
1149-
End(BytesEnd::borrowed(b"inner")),
1142+
End(BytesEnd::borrowed("inner")),
11501143
]
11511144
);
11521145
assert_eq!(de.write, vec![]);
11531146
assert_eq!(
11541147
de.next().unwrap(),
11551148
Text(BytesText::from_escaped_str("text"))
11561149
);
1157-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
1150+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
11581151
assert_eq!(
11591152
de.next().unwrap(),
1160-
Start(BytesStart::borrowed_name(b"target"))
1153+
Start(BytesStart::borrowed_name("target"))
11611154
);
1162-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target")));
1163-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
1155+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("target")));
1156+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
11641157
}
11651158

11661159
/// Checks that `read_to_end()` behaves correctly after `skip()`
@@ -1184,22 +1177,19 @@ mod tests {
11841177
assert_eq!(de.read, vec![]);
11851178
assert_eq!(de.write, vec![]);
11861179

1187-
assert_eq!(
1188-
de.next().unwrap(),
1189-
Start(BytesStart::borrowed_name(b"root"))
1190-
);
1180+
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
11911181

11921182
// Skip the <skip> tree
11931183
de.skip().unwrap();
11941184
assert_eq!(de.read, vec![]);
11951185
assert_eq!(
11961186
de.write,
11971187
vec![
1198-
Start(BytesStart::borrowed_name(b"skip")),
1188+
Start(BytesStart::borrowed_name("skip")),
11991189
Text(BytesText::from_escaped_str("text")),
1200-
Start(BytesStart::borrowed_name(b"skip")),
1201-
End(BytesEnd::borrowed(b"skip")),
1202-
End(BytesEnd::borrowed(b"skip")),
1190+
Start(BytesStart::borrowed_name("skip")),
1191+
End(BytesEnd::borrowed("skip")),
1192+
End(BytesEnd::borrowed("skip")),
12031193
]
12041194
);
12051195

@@ -1212,18 +1202,18 @@ mod tests {
12121202
// </root>
12131203
assert_eq!(
12141204
de.next().unwrap(),
1215-
Start(BytesStart::borrowed_name(b"target"))
1205+
Start(BytesStart::borrowed_name("target"))
12161206
);
12171207
de.read_to_end(QName(b"target")).unwrap();
12181208
assert_eq!(de.read, vec![]);
12191209
assert_eq!(
12201210
de.write,
12211211
vec![
1222-
Start(BytesStart::borrowed_name(b"skip")),
1212+
Start(BytesStart::borrowed_name("skip")),
12231213
Text(BytesText::from_escaped_str("text")),
1224-
Start(BytesStart::borrowed_name(b"skip")),
1225-
End(BytesEnd::borrowed(b"skip")),
1226-
End(BytesEnd::borrowed(b"skip")),
1214+
Start(BytesStart::borrowed_name("skip")),
1215+
End(BytesEnd::borrowed("skip")),
1216+
End(BytesEnd::borrowed("skip")),
12271217
]
12281218
);
12291219

@@ -1241,22 +1231,19 @@ mod tests {
12411231
assert_eq!(
12421232
de.read,
12431233
vec![
1244-
Start(BytesStart::borrowed_name(b"skip")),
1234+
Start(BytesStart::borrowed_name("skip")),
12451235
Text(BytesText::from_escaped_str("text")),
1246-
Start(BytesStart::borrowed_name(b"skip")),
1247-
End(BytesEnd::borrowed(b"skip")),
1248-
End(BytesEnd::borrowed(b"skip")),
1236+
Start(BytesStart::borrowed_name("skip")),
1237+
End(BytesEnd::borrowed("skip")),
1238+
End(BytesEnd::borrowed("skip")),
12491239
]
12501240
);
12511241
assert_eq!(de.write, vec![]);
12521242

1253-
assert_eq!(
1254-
de.next().unwrap(),
1255-
Start(BytesStart::borrowed_name(b"skip"))
1256-
);
1243+
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("skip")));
12571244
de.read_to_end(QName(b"skip")).unwrap();
12581245

1259-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
1246+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
12601247
}
12611248

12621249
/// Checks that limiting buffer size works correctly
@@ -1306,34 +1293,31 @@ mod tests {
13061293
"#,
13071294
);
13081295

1309-
assert_eq!(
1310-
de.next().unwrap(),
1311-
Start(BytesStart::borrowed_name(b"root"))
1312-
);
1296+
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
13131297

13141298
assert_eq!(
13151299
de.next().unwrap(),
1316-
Start(BytesStart::borrowed(br#"tag a="1""#, 3))
1300+
Start(BytesStart::borrowed(r#"tag a="1""#, 3))
13171301
);
13181302
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
13191303

13201304
assert_eq!(
13211305
de.next().unwrap(),
1322-
Start(BytesStart::borrowed(br#"tag a="2""#, 3))
1306+
Start(BytesStart::borrowed(r#"tag a="2""#, 3))
13231307
);
13241308
assert_eq!(
13251309
de.next().unwrap(),
13261310
CData(BytesCData::from_str("cdata content"))
13271311
);
1328-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
1312+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("tag")));
13291313

13301314
assert_eq!(
13311315
de.next().unwrap(),
1332-
Start(BytesStart::borrowed(b"self-closed", 11))
1316+
Start(BytesStart::borrowed_name("self-closed"))
13331317
);
13341318
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
13351319

1336-
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
1320+
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
13371321
assert_eq!(de.next().unwrap(), Eof);
13381322
}
13391323

@@ -1402,17 +1386,17 @@ mod tests {
14021386
events,
14031387
vec![
14041388
Start(BytesStart::borrowed(
1405-
br#"item name="hello" source="world.rs""#,
1389+
r#"item name="hello" source="world.rs""#,
14061390
4
14071391
)),
14081392
Text(BytesText::from_escaped_str("Some text")),
1409-
End(BytesEnd::borrowed(b"item")),
1410-
Start(BytesStart::borrowed(b"item2", 5)),
1411-
End(BytesEnd::borrowed(b"item2")),
1412-
Start(BytesStart::borrowed(b"item3", 5)),
1413-
End(BytesEnd::borrowed(b"item3")),
1414-
Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
1415-
End(BytesEnd::borrowed(b"item4")),
1393+
End(BytesEnd::borrowed("item")),
1394+
Start(BytesStart::borrowed("item2", 5)),
1395+
End(BytesEnd::borrowed("item2")),
1396+
Start(BytesStart::borrowed("item3", 5)),
1397+
End(BytesEnd::borrowed("item3")),
1398+
Start(BytesStart::borrowed(r#"item4 value="world" "#, 5)),
1399+
End(BytesEnd::borrowed("item4")),
14161400
]
14171401
)
14181402
}
@@ -1432,7 +1416,7 @@ mod tests {
14321416

14331417
assert_eq!(
14341418
reader.next().unwrap(),
1435-
DeEvent::Start(BytesStart::borrowed(b"item ", 4))
1419+
DeEvent::Start(BytesStart::borrowed("item ", 4))
14361420
);
14371421
reader.read_to_end(QName(b"item")).unwrap();
14381422
assert_eq!(reader.next().unwrap(), DeEvent::Eof);

0 commit comments

Comments
 (0)