Skip to content

Commit 8dfa679

Browse files
committed
unsafe audit
1 parent 55395ed commit 8dfa679

File tree

4 files changed

+54
-17
lines changed

4 files changed

+54
-17
lines changed

form_urlencoded/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ impl<'a> Iterator for ByteSerialize<'a> {
150150
None => (self.bytes, &[][..]),
151151
};
152152
self.bytes = remaining;
153+
// This unsafe is appropriate because we have already checked these
154+
// bytes in byte_serialized_unchanged, which checks for a subset
155+
// of UTF-8. So we know these bytes are valid UTF-8, and doing
156+
// another UTF-8 check would be wasteful.
153157
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
154158
} else {
155159
None

form_urlencoded/src/query_encoding.rs

+23-7
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,34 @@ pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) ->
1818
}
1919

2020
pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
21+
// Note: This function is duplicated in `percent_encoding/lib.rs`.
2122
match input {
2223
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
2324
Cow::Owned(bytes) => {
24-
let raw_utf8: *const [u8];
2525
match String::from_utf8_lossy(&bytes) {
26-
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
27-
Cow::Owned(s) => return s.into(),
26+
Cow::Borrowed(utf8) => {
27+
// If from_utf8_lossy returns a Cow::Borrowed, then we can
28+
// be sure our original bytes were valid UTF-8. This is because
29+
// if the bytes were invalid UTF-8 from_utf8_lossy would have
30+
// to allocate a new owned string to back the Cow so it could
31+
// replace invalid bytes with a placeholder.
32+
33+
// First we do a debug_assert to confirm our description above.
34+
let raw_utf8: *const [u8];
35+
raw_utf8 = utf8.as_bytes();
36+
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
37+
38+
// Given we know the original input bytes are valid UTF-8,
39+
// and we have ownership of those bytes, we re-use them and
40+
// return a Cow::Owned here. Ideally we'd put our return statement
41+
// right below this line, but to support the old lexically scoped
42+
// borrow checker the return must be moved to outside the match
43+
// statement.
44+
},
45+
Cow::Owned(s) => return Cow::Owned(s),
2846
}
29-
// from_utf8_lossy returned a borrow of `bytes` unchanged.
30-
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
31-
// Reuse the existing `Vec` allocation.
32-
unsafe { String::from_utf8_unchecked(bytes) }.into()
47+
48+
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
3349
}
3450
}
3551
}

idna/src/punycode.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,10 @@ pub fn encode_str(input: &str) -> Option<String> {
143143
/// 63 encoded bytes, the DNS limit on domain name labels.
144144
pub fn encode(input: &[char]) -> Option<String> {
145145
// Handle "basic" (ASCII) code points. They are encoded as-is.
146-
let output_bytes = input
146+
let mut output : String = input
147147
.iter()
148-
.filter_map(|&c| if c.is_ascii() { Some(c as u8) } else { None })
148+
.filter(|&c| c.is_ascii())
149149
.collect();
150-
let mut output = unsafe { String::from_utf8_unchecked(output_bytes) };
151150
let basic_length = output.len() as u32;
152151
if basic_length > 0 {
153152
output.push_str("-")

percent_encoding/lib.rs

+25-7
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ impl<'a> Iterator for PercentEncode<'a> {
252252
self.bytes = remaining;
253253
Some(percent_encode_byte(first_byte))
254254
} else {
255+
// The unsafe blocks here are appropriate because the bytes are
256+
// confirmed as a subset of UTF-8 in should_percent_encode.
255257
for (i, &byte) in remaining.iter().enumerate() {
256258
if self.ascii_set.should_percent_encode(byte) {
257259
// 1 for first_byte + i for previous iterations of this loop
@@ -425,18 +427,34 @@ impl<'a> PercentDecode<'a> {
425427
}
426428

427429
fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
430+
// Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`.
428431
match input {
429432
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
430433
Cow::Owned(bytes) => {
431-
let raw_utf8: *const [u8];
432434
match String::from_utf8_lossy(&bytes) {
433-
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
434-
Cow::Owned(s) => return s.into(),
435+
Cow::Borrowed(utf8) => {
436+
// If from_utf8_lossy returns a Cow::Borrowed, then we can
437+
// be sure our original bytes were valid UTF-8. This is because
438+
// if the bytes were invalid UTF-8 from_utf8_lossy would have
439+
// to allocate a new owned string to back the Cow so it could
440+
// replace invalid bytes with a placeholder.
441+
442+
// First we do a debug_assert to confirm our description above.
443+
let raw_utf8: *const [u8];
444+
raw_utf8 = utf8.as_bytes();
445+
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
446+
447+
// Given we know the original input bytes are valid UTF-8,
448+
// and we have ownership of those bytes, we re-use them and
449+
// return a Cow::Owned here. Ideally we'd put our return statement
450+
// right below this line, but to support the old lexically scoped
451+
// borrow checker the return must be moved to outside the match
452+
// statement.
453+
},
454+
Cow::Owned(s) => return Cow::Owned(s),
435455
}
436-
// from_utf8_lossy returned a borrow of `bytes` unchanged.
437-
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
438-
// Reuse the existing `Vec` allocation.
439-
unsafe { String::from_utf8_unchecked(bytes) }.into()
456+
457+
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
440458
}
441459
}
442460
}

0 commit comments

Comments
 (0)