Skip to content

Commit f332e0c

Browse files
committed
Add comment to unsafe block in decode_utf8_lossy
1 parent 3144f86 commit f332e0c

File tree

2 files changed

+37
-14
lines changed

2 files changed

+37
-14
lines changed

percent_encoding/lib.rs

+18-7
Original file line numberDiff line numberDiff line change
@@ -430,15 +430,26 @@ fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
430430
match input {
431431
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
432432
Cow::Owned(bytes) => {
433-
let raw_utf8: *const [u8];
434433
match String::from_utf8_lossy(&bytes) {
435-
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
436-
Cow::Owned(s) => return s.into(),
434+
Cow::Borrowed(utf8) => {
435+
// If from_utf8_lossy returns a Cow::Borrowed, then we can
436+
// be sure our original bytes were valid UTF-8. This is because
437+
// if the bytes were invalid UTF-8 from_utf8_lossy would have
438+
// to allocate a new owned string to back the Cow so it could
439+
// replace invalid bytes with a placeholder.
440+
441+
// First we do a debug_assert to confirm our description above.
442+
let raw_utf8: *const [u8];
443+
raw_utf8 = utf8.as_bytes();
444+
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
445+
446+
// Given we know the original input bytes are valid UTF-8,
447+
// and we have ownership of those bytes, we re-use them and
448+
// return a Cow::Owned here.
449+
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
450+
},
451+
Cow::Owned(s) => Cow::Owned(s),
437452
}
438-
// from_utf8_lossy returned a borrow of `bytes` unchanged.
439-
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
440-
// Reuse the existing `Vec` allocation.
441-
unsafe { String::from_utf8_unchecked(bytes) }.into()
442453
}
443454
}
444455
}

src/query_encoding.rs

+19-7
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,30 @@ pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) ->
1818
}
1919

2020
pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
21+
// Note: This function is duplicated in `percent_encoding/lib.rs`.
2122
match input {
2223
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
2324
Cow::Owned(bytes) => {
24-
let raw_utf8: *const [u8];
2525
match String::from_utf8_lossy(&bytes) {
26-
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
27-
Cow::Owned(s) => return s.into(),
26+
Cow::Borrowed(utf8) => {
27+
// If from_utf8_lossy returns a Cow::Borrowed, then we can
28+
// be sure our original bytes were valid UTF-8. This is because
29+
// if the bytes were invalid UTF-8 from_utf8_lossy would have
30+
// to allocate a new owned string to back the Cow so it could
31+
// replace invalid bytes with a placeholder.
32+
33+
// First we do a debug_assert to confirm our description above.
34+
let raw_utf8: *const [u8];
35+
raw_utf8 = utf8.as_bytes();
36+
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
37+
38+
// Given we know the original input bytes are valid UTF-8,
39+
// and we have ownership of those bytes, we re-use them and
40+
// return a Cow::Owned here.
41+
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
42+
},
43+
Cow::Owned(s) => Cow::Owned(s),
2844
}
29-
// from_utf8_lossy returned a borrow of `bytes` unchanged.
30-
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
31-
// Reuse the existing `Vec` allocation.
32-
unsafe { String::from_utf8_unchecked(bytes) }.into()
3345
}
3446
}
3547
}

0 commit comments

Comments
 (0)