Skip to content

Commit 3ef1133

Browse files
sbosnickseanmonstar
authored andcommitted
Add comments to describe safety of Scheme
The comments describe the postcondition on parse_exact() that makes the one use of "unsafe" in Scheme::try_from(&'a [u8]) sound.
1 parent 25c5674 commit 3ef1133

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

src/uri/scheme.rs

+16-4
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,13 @@ impl<'a> TryFrom<&'a [u8]> for Scheme {
7777
None => Err(ErrorKind::InvalidScheme.into()),
7878
Standard(p) => Ok(Standard(p).into()),
7979
Other(_) => {
80-
// Unsafe: parse_exact already checks for a strict subset of UTF-8
81-
Ok(Other(Box::new(unsafe {
82-
ByteStr::from_utf8_unchecked(Bytes::copy_from_slice(s))
83-
})).into())
80+
let bytes = Bytes::copy_from_slice(s);
81+
82+
// Safety: postcondition on parse_exact() means that s and
83+
// hence bytes are valid UTF-8.
84+
let string = unsafe { ByteStr::from_utf8_unchecked(bytes) };
85+
86+
Ok(Other(Box::new(string)).into())
8487
}
8588
}
8689
}
@@ -195,6 +198,12 @@ const MAX_SCHEME_LEN: usize = 64;
195198

196199
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
197200
//
201+
// SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An
202+
// entry in the table is 0 for invalid characters. For valid characters the
203+
// entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An
204+
// important characteristic of this table is that all entries above 127 are
205+
// invalid. This makes all of the valid entries a valid single-byte UTF-8 code
206+
// point. This means that a slice of such valid entries is valid UTF-8.
198207
const SCHEME_CHARS: [u8; 256] = [
199208
// 0 1 2 3 4 5 6 7 8 9
200209
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
@@ -226,6 +235,7 @@ const SCHEME_CHARS: [u8; 256] = [
226235
];
227236

228237
impl Scheme2<usize> {
238+
// Postcondition: On all Ok() returns, s is valid UTF-8
229239
fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> {
230240
match s {
231241
b"http" => Ok(Protocol::Http.into()),
@@ -235,6 +245,8 @@ impl Scheme2<usize> {
235245
return Err(ErrorKind::SchemeTooLong.into());
236246
}
237247

248+
// check that each byte in s is a SCHEME_CHARS which implies
249+
// that it is a valid single byte UTF-8 code point.
238250
for &b in s {
239251
match SCHEME_CHARS[b as usize] {
240252
b':' => {

0 commit comments

Comments
 (0)