@@ -77,10 +77,13 @@ impl<'a> TryFrom<&'a [u8]> for Scheme {
77
77
None => Err ( ErrorKind :: InvalidScheme . into ( ) ) ,
78
78
Standard ( p) => Ok ( Standard ( p) . into ( ) ) ,
79
79
Other ( _) => {
80
- // Unsafe: parse_exact already checks for a strict subset of UTF-8
81
- Ok ( Other ( Box :: new ( unsafe {
82
- ByteStr :: from_utf8_unchecked ( Bytes :: copy_from_slice ( s) )
83
- } ) ) . into ( ) )
80
+ let bytes = Bytes :: copy_from_slice ( s) ;
81
+
82
+ // Safety: postcondition on parse_exact() means that s and
83
+ // hence bytes are valid UTF-8.
84
+ let string = unsafe { ByteStr :: from_utf8_unchecked ( bytes) } ;
85
+
86
+ Ok ( Other ( Box :: new ( string) ) . into ( ) )
84
87
}
85
88
}
86
89
}
@@ -195,6 +198,12 @@ const MAX_SCHEME_LEN: usize = 64;
195
198
196
199
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
197
200
//
201
+ // SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An
202
+ // entry in the table is 0 for invalid characters. For valid characters the
203
+ // entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An
204
+ // important characteristic of this table is that all entries above 127 are
205
+ // invalid. This makes all of the valid entries a valid single-byte UTF-8 code
206
+ // point. This means that a slice of such valid entries is valid UTF-8.
198
207
const SCHEME_CHARS : [ u8 ; 256 ] = [
199
208
// 0 1 2 3 4 5 6 7 8 9
200
209
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // x
@@ -226,6 +235,7 @@ const SCHEME_CHARS: [u8; 256] = [
226
235
] ;
227
236
228
237
impl Scheme2 < usize > {
238
+ // Postcondition: On all Ok() returns, s is valid UTF-8
229
239
fn parse_exact ( s : & [ u8 ] ) -> Result < Scheme2 < ( ) > , InvalidUri > {
230
240
match s {
231
241
b"http" => Ok ( Protocol :: Http . into ( ) ) ,
@@ -235,6 +245,8 @@ impl Scheme2<usize> {
235
245
return Err ( ErrorKind :: SchemeTooLong . into ( ) ) ;
236
246
}
237
247
248
+ // check that each byte in s is a SCHEME_CHARS which implies
249
+ // that it is a valid single byte UTF-8 code point.
238
250
for & b in s {
239
251
match SCHEME_CHARS [ b as usize ] {
240
252
b':' => {
0 commit comments