Skip to content

Commit ffed24a

Browse files
committed
Return &str instead of &[u8] in escape functions
1 parent 47c4b55 commit ffed24a

File tree

4 files changed

+37
-25
lines changed

4 files changed

+37
-25
lines changed

Changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@
134134
scheme, for example, HEX or Base64
135135
- [#421]: All unescaping functions now accepts and returns strings instead of byte slices
136136

137-
- [#423]: All escaping functions now accepts strings instead of byte slices
137+
- [#423]: All escaping functions now accepts and returns strings instead of byte slices
138138
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
139139
but since now escaping works on strings. Use `BytesText::from_plain_str` instead
140140

src/escapei.rs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ impl std::error::Error for EscapeError {}
7171
/// | `&` | `&`
7272
/// | `'` | `'`
7373
/// | `"` | `"`
74-
pub fn escape(raw: &str) -> Cow<[u8]> {
74+
pub fn escape(raw: &str) -> Cow<str> {
7575
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
7676
}
7777

@@ -88,25 +88,25 @@ pub fn escape(raw: &str) -> Cow<[u8]> {
8888
/// | `<` | `&lt;`
8989
/// | `>` | `&gt;`
9090
/// | `&` | `&amp;`
91-
pub fn partial_escape(raw: &str) -> Cow<[u8]> {
91+
pub fn partial_escape(raw: &str) -> Cow<str> {
9292
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
9393
}
9494

9595
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
9696
/// `&`, `'`, `"`) with their corresponding xml escaped value.
97-
fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<[u8]> {
98-
let raw = raw.as_bytes();
97+
fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str> {
98+
let bytes = raw.as_bytes();
9999
let mut escaped = None;
100-
let mut bytes = raw.iter();
100+
let mut iter = bytes.iter();
101101
let mut pos = 0;
102-
while let Some(i) = bytes.position(|&b| escape_chars(b)) {
102+
while let Some(i) = iter.position(|&b| escape_chars(b)) {
103103
if escaped.is_none() {
104104
escaped = Some(Vec::with_capacity(raw.len()));
105105
}
106106
let escaped = escaped.as_mut().expect("initialized");
107107
let new_pos = pos + i;
108-
escaped.extend_from_slice(&raw[pos..new_pos]);
109-
match raw[new_pos] {
108+
escaped.extend_from_slice(&bytes[pos..new_pos]);
109+
match bytes[new_pos] {
110110
b'<' => escaped.extend_from_slice(b"&lt;"),
111111
b'>' => escaped.extend_from_slice(b"&gt;"),
112112
b'\'' => escaped.extend_from_slice(b"&apos;"),
@@ -118,10 +118,14 @@ fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<[u8]> {
118118
}
119119

120120
if let Some(mut escaped) = escaped {
121-
if let Some(raw) = raw.get(pos..) {
121+
if let Some(raw) = bytes.get(pos..) {
122122
escaped.extend_from_slice(raw);
123123
}
124-
Cow::Owned(escaped)
124+
// SAFETY: we operate on UTF-8 input and search for an one byte chars only,
125+
// so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
126+
// TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
127+
// if unsafe code will be allowed
128+
Cow::Owned(String::from_utf8(escaped).unwrap())
125129
} else {
126130
Cow::Borrowed(raw)
127131
}
@@ -1745,24 +1749,24 @@ fn test_unescape_with() {
17451749

17461750
#[test]
17471751
fn test_escape() {
1748-
assert_eq!(&*escape("test"), b"test");
1749-
assert_eq!(&*escape("<test>"), b"&lt;test&gt;");
1750-
assert_eq!(&*escape("\"a\"bc"), b"&quot;a&quot;bc");
1751-
assert_eq!(&*escape("\"a\"b&c"), b"&quot;a&quot;b&amp;c");
1752+
assert_eq!(&*escape("test"), "test");
1753+
assert_eq!(&*escape("<test>"), "&lt;test&gt;");
1754+
assert_eq!(&*escape("\"a\"bc"), "&quot;a&quot;bc");
1755+
assert_eq!(&*escape("\"a\"b&c"), "&quot;a&quot;b&amp;c");
17521756
assert_eq!(
17531757
&*escape("prefix_\"a\"b&<>c"),
1754-
"prefix_&quot;a&quot;b&amp;&lt;&gt;c".as_bytes()
1758+
"prefix_&quot;a&quot;b&amp;&lt;&gt;c"
17551759
);
17561760
}
17571761

17581762
#[test]
17591763
fn test_partial_escape() {
1760-
assert_eq!(&*partial_escape("test"), b"test");
1761-
assert_eq!(&*partial_escape("<test>"), b"&lt;test&gt;");
1762-
assert_eq!(&*partial_escape("\"a\"bc"), b"\"a\"bc");
1763-
assert_eq!(&*partial_escape("\"a\"b&c"), b"\"a\"b&amp;c");
1764+
assert_eq!(&*partial_escape("test"), "test");
1765+
assert_eq!(&*partial_escape("<test>"), "&lt;test&gt;");
1766+
assert_eq!(&*partial_escape("\"a\"bc"), "\"a\"bc");
1767+
assert_eq!(&*partial_escape("\"a\"b&c"), "\"a\"b&amp;c");
17641768
assert_eq!(
17651769
&*partial_escape("prefix_\"a\"b&<>c"),
1766-
"prefix_\"a\"b&amp;&lt;&gt;c".as_bytes()
1770+
"prefix_\"a\"b&amp;&lt;&gt;c"
17671771
);
17681772
}

src/events/attributes.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,10 @@ impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
143143
fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
144144
Attribute {
145145
key: QName(val.0.as_bytes()),
146-
value: escape(val.1),
146+
value: match escape(val.1) {
147+
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
148+
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
149+
},
147150
}
148151
}
149152
}

src/events/mod.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,10 @@ impl<'a> BytesText<'a> {
695695
#[inline]
696696
pub fn from_plain_str(content: &'a str) -> Self {
697697
Self {
698-
content: escape(content),
698+
content: match escape(content) {
699+
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
700+
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
701+
},
699702
}
700703
}
701704

@@ -897,8 +900,9 @@ impl<'a> BytesCData<'a> {
897900
pub fn escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
898901
let decoded = self.decode(decoder)?;
899902
Ok(BytesText::from_escaped(match escape(&decoded) {
903+
// Because result is borrowed, no replacements was done and we can use original content
900904
Cow::Borrowed(_) => self.content,
901-
Cow::Owned(escaped) => Cow::Owned(escaped),
905+
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
902906
}))
903907
}
904908

@@ -918,8 +922,9 @@ impl<'a> BytesCData<'a> {
918922
pub fn partial_escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
919923
let decoded = self.decode(decoder)?;
920924
Ok(BytesText::from_escaped(match partial_escape(&decoded) {
925+
// Because result is borrowed, no replacements was done and we can use original content
921926
Cow::Borrowed(_) => self.content,
922-
Cow::Owned(escaped) => Cow::Owned(escaped),
927+
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
923928
}))
924929
}
925930

0 commit comments

Comments
 (0)