@@ -18,18 +18,30 @@ pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) ->
18
18
}
19
19
20
20
pub ( crate ) fn decode_utf8_lossy ( input : Cow < [ u8 ] > ) -> Cow < str > {
21
+ // Note: This function is duplicated in `percent_encoding/lib.rs`.
21
22
match input {
22
23
Cow :: Borrowed ( bytes) => String :: from_utf8_lossy ( bytes) ,
23
24
Cow :: Owned ( bytes) => {
24
- let raw_utf8: * const [ u8 ] ;
25
25
match String :: from_utf8_lossy ( & bytes) {
26
- Cow :: Borrowed ( utf8) => raw_utf8 = utf8. as_bytes ( ) ,
27
- Cow :: Owned ( s) => return s. into ( ) ,
26
+ Cow :: Borrowed ( utf8) => {
27
+ // If from_utf8_lossy returns a Cow::Borrowed, then we can
28
+ // be sure our original bytes were valid UTF-8. This is because
29
+ // if the bytes were invalid UTF-8 from_utf8_lossy would have
30
+ // to allocate a new owned string to back the Cow so it could
31
+ // replace invalid bytes with a placeholder.
32
+
33
+ // First we do a debug_assert to confirm our description above.
34
+ let raw_utf8: * const [ u8 ] ;
35
+ raw_utf8 = utf8. as_bytes ( ) ;
36
+ debug_assert ! ( raw_utf8 == & * bytes as * const [ u8 ] ) ;
37
+
38
+ // Given we know the original input bytes are valid UTF-8,
39
+ // and we have ownership of those bytes, we re-use them and
40
+ // return a Cow::Owned here.
41
+ Cow :: Owned ( unsafe { String :: from_utf8_unchecked ( bytes) } )
42
+ } ,
43
+ Cow :: Owned ( s) => Cow :: Owned ( s) ,
28
44
}
29
- // from_utf8_lossy returned a borrow of `bytes` unchanged.
30
- debug_assert ! ( raw_utf8 == & * bytes as * const [ u8 ] ) ;
31
- // Reuse the existing `Vec` allocation.
32
- unsafe { String :: from_utf8_unchecked ( bytes) } . into ( )
33
45
}
34
46
}
35
47
}
0 commit comments