@@ -252,6 +252,8 @@ impl<'a> Iterator for PercentEncode<'a> {
252
252
self . bytes = remaining;
253
253
Some ( percent_encode_byte ( first_byte) )
254
254
} else {
255
+ // The unsafe blocks here are appropriate because the bytes are
256
+ // confirmed as a subset of UTF-8 in should_percent_encode.
255
257
for ( i, & byte) in remaining. iter ( ) . enumerate ( ) {
256
258
if self . ascii_set . should_percent_encode ( byte) {
257
259
// 1 for first_byte + i for previous iterations of this loop
@@ -425,18 +427,34 @@ impl<'a> PercentDecode<'a> {
425
427
}
426
428
427
429
fn decode_utf8_lossy ( input : Cow < [ u8 ] > ) -> Cow < str > {
430
+ // Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`.
428
431
match input {
429
432
Cow :: Borrowed ( bytes) => String :: from_utf8_lossy ( bytes) ,
430
433
Cow :: Owned ( bytes) => {
431
- let raw_utf8: * const [ u8 ] ;
432
434
match String :: from_utf8_lossy ( & bytes) {
433
- Cow :: Borrowed ( utf8) => raw_utf8 = utf8. as_bytes ( ) ,
434
- Cow :: Owned ( s) => return s. into ( ) ,
435
+ Cow :: Borrowed ( utf8) => {
436
+ // If from_utf8_lossy returns a Cow::Borrowed, then we can
437
+ // be sure our original bytes were valid UTF-8. This is because
438
+ // if the bytes were invalid UTF-8 from_utf8_lossy would have
439
+ // to allocate a new owned string to back the Cow so it could
440
+ // replace invalid bytes with a placeholder.
441
+
442
+ // First we do a debug_assert to confirm our description above.
443
+ let raw_utf8: * const [ u8 ] ;
444
+ raw_utf8 = utf8. as_bytes ( ) ;
445
+ debug_assert ! ( raw_utf8 == & * bytes as * const [ u8 ] ) ;
446
+
447
+ // Given we know the original input bytes are valid UTF-8,
448
+ // and we have ownership of those bytes, we re-use them and
449
+ // return a Cow::Owned here. Ideally we'd put our return statement
450
+ // right below this line, but to support the old lexically scoped
451
+ // borrow checker the return must be moved to outside the match
452
+ // statement.
453
+ } ,
454
+ Cow :: Owned ( s) => return Cow :: Owned ( s) ,
435
455
}
436
- // from_utf8_lossy returned a borrow of `bytes` unchanged.
437
- debug_assert ! ( raw_utf8 == & * bytes as * const [ u8 ] ) ;
438
- // Reuse the existing `Vec` allocation.
439
- unsafe { String :: from_utf8_unchecked ( bytes) } . into ( )
456
+
457
+ Cow :: Owned ( unsafe { String :: from_utf8_unchecked ( bytes) } )
440
458
}
441
459
}
442
460
}
0 commit comments