Skip to content

Commit 2019700

Browse files
committed
Add MaybeValid type
`MaybeValid<T>` is a `T` which might not be valid. It is similar to `MaybeUninit<T>`, but it is slightly more strict: any byte in `T` which is guaranteed to be initialized is also guaranteed to be initialized in `MaybeValid<T>` (see the doc comment for a more precise definition). `MaybeValid` is a building block of the `TryFromBytes` design outlined in #5. Makes progress on #5
1 parent 79598c7 commit 2019700

File tree

1 file changed

+259
-0
lines changed

1 file changed

+259
-0
lines changed

src/lib.rs

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,6 +1507,203 @@ safety_comment! {
15071507
assert_unaligned!(mem::MaybeUninit<()>, MaybeUninit<u8>);
15081508
}
15091509

1510+
/// A value which might or might not constitute a valid instance of `T`.
1511+
///
1512+
/// `MaybeValid<T>` has the same layout (size and alignment) and field offsets
1513+
/// as `T`. Unlike `T`, it may contain any bit pattern, except that
1514+
/// uninitialized bytes may only appear in `MaybeValid<T>` at byte offsets where
1515+
/// they may appear in `T`. This is a dynamic property: if, at a particular byte
1516+
/// offset, a valid enum discriminant is set, the subsequent bytes may only have
1517+
/// uninitialized bytes as specified by the corresponding enum variant.
1518+
///
1519+
/// Formally, given `m: MaybeValid<T>` and a byte offset, `b` in the range `[0,
1520+
/// size_of_val(m))`:
1521+
/// - If, in all valid instances `t: T`, the byte at offset `b` in `t` is
1522+
/// initialized, then the byte at offset `b` within `m` is guaranteed to be
1523+
/// initialized.
1524+
/// - Let `c` be the contents of the byte range `[0, b)` in `m`. Let `TT` be the
1525+
/// subset of valid instances of `T` which contain `c` in the offset range
1526+
/// `[0, b)`. If, for all instances of `t: T` in `TT`, the byte at offset `b`
1527+
/// in `t` is initialized, then the byte at offset `b` in `m` is guaranteed to
1528+
/// be initialized.
1529+
///
1530+
/// Pragmatically, this means that if `m` is guaranteed to contain an enum
1531+
/// type at a particular offset, and the enum discriminant stored in `m`
1532+
/// corresponds to a valid variant of that enum type, then it is guaranteed
1533+
/// that the appropriate bytes of `m` are initialized as defined by that
1534+
/// variant's bit validity (although note that the variant may contain another
1535+
/// enum type, in which case the same rules apply depending on the state of
1536+
/// its discriminant, and so on recursively).
1537+
///
1538+
/// # Safety
1539+
///
1540+
/// Unsafe code may assume that an instance of `MaybeValid` satisfies the
1541+
/// constraints described above. Unsafe code may produce a `MaybeValid` or
1542+
/// modify the bytes of an existing `MaybeValid` so long as these constraints
1543+
/// are upheld. It is unsound to produce a `MaybeValid` which fails to uphold
1544+
/// these constraints.
1545+
#[repr(transparent)]
1546+
pub struct MaybeValid<T: AsMaybeUninit + ?Sized> {
1547+
inner: MaybeUninit<T>,
1548+
}
1549+
1550+
safety_comment! {
1551+
/// SAFETY:
1552+
/// - `AsBytes`: `MaybeValid` requires that, if a byte in `T` is always
1553+
/// initialized, the equivalent byte in `MaybeValid<T>` must be
1554+
/// initialized. `T: AsBytes` implies that all bytes in `T` must always be
1555+
/// initialized, and so all bytes in `MaybeValid<T>` must always be
1556+
/// initialized, and so `MaybeValid<T>` satisfies `AsBytes`. `T: AsBytes`
1557+
/// implies that `[T]: AsBytes`, so this holds is a sufficient bound for
1558+
/// `MaybeValid<[T]>` too.
1559+
/// - `Unaligned`: `MaybeValid<T>` and `MaybeValid<[T]>` have the same
1560+
/// alignment as `T`.
1561+
///
1562+
/// TODO(#5): Implement `FromZeroes` and `FromBytes` for `MaybeValid<T>` and
1563+
/// `MaybeValid<[T]>`.
1564+
unsafe_impl!(T: AsBytes => AsBytes for MaybeValid<T>);
1565+
unsafe_impl!(T: AsBytes => AsBytes for MaybeValid<[T]>);
1566+
unsafe_impl!(T: Unaligned => Unaligned for MaybeValid<T>);
1567+
unsafe_impl!(T: Unaligned => Unaligned for MaybeValid<[T]>);
1568+
}
1569+
1570+
// SAFETY: See safety comment on `MaybeUninit`.
1571+
unsafe impl<T> AsMaybeUninit for MaybeValid<[T]> {
1572+
// SAFETY:
1573+
// - `MaybeUninit` has no bit validity requirements and `[U]` has the same
1574+
// bit validity requirements as `U`, so `[MaybeUninit<T>]` has no bit
1575+
// validity requirements. Thus, it is sound to write uninitialized bytes
1576+
// at every offset.
1577+
// - `MaybeValid<U>` is `repr(transparent)`, and thus has the same layout
1578+
// and field offsets as its contained field of type `U::MaybeUninit`. In
1579+
// this case, `U = [T]`, and so `U::MaybeUninit = [MaybeUninit<T>]`. Thus,
1580+
// `MaybeValid<[T]>` has the same layout and field offsets as
1581+
// `[MaybeUninit<T>]`, which is what we set `MaybeUninit` to here. Thus,
1582+
// they trivially have the same alignment.
1583+
// - By the same token, their raw pointer types are trivially `as` castable
1584+
// and preserve size.
1585+
// - By the same token, `[MaybeUninit<T>]` contains `UnsafeCell`s at the
1586+
// same byte ranges as `MaybeValid<[T]>` does.
1587+
type MaybeUninit = [MaybeUninit<T>];
1588+
1589+
// SAFETY: `as` preserves pointer address and provenance.
1590+
#[allow(clippy::as_conversions)]
1591+
fn raw_from_maybe_uninit(maybe_uninit: *const [MaybeUninit<T>]) -> *const MaybeValid<[T]> {
1592+
maybe_uninit as *const MaybeValid<[T]>
1593+
}
1594+
1595+
// SAFETY: `as` preserves pointer address and provenance.
1596+
#[allow(clippy::as_conversions)]
1597+
fn raw_mut_from_maybe_uninit(maybe_uninit: *mut [MaybeUninit<T>]) -> *mut MaybeValid<[T]> {
1598+
maybe_uninit as *mut MaybeValid<[T]>
1599+
}
1600+
}
1601+
1602+
impl<T> Default for MaybeValid<T> {
1603+
fn default() -> MaybeValid<T> {
1604+
// SAFETY: All of the bytes of `inner` are initialized to 0, and so the
1605+
// safety invariant on `MaybeValid` is upheld.
1606+
MaybeValid { inner: MaybeUninit::zeroed() }
1607+
}
1608+
}
1609+
1610+
impl<T: AsMaybeUninit + ?Sized> MaybeValid<T> {
1611+
/// Converts this `&MaybeValid<T>` to a `&T`.
1612+
///
1613+
/// # Safety
1614+
///
1615+
/// `self` must contain a valid `T`.
1616+
pub unsafe fn assume_valid_ref(&self) -> &T {
1617+
// SAFETY: The caller has promised that `self` contains a valid `T`.
1618+
// Since `Self` is `repr(transparent)`, it has the same layout as
1619+
// `MaybeUninit<T>`, which in turn is guaranteed to have the same layout
1620+
// as `T`. Thus, it is sound to treat `self.inner` as containing a valid
1621+
// `T`.
1622+
unsafe { self.inner.assume_init_ref() }
1623+
}
1624+
1625+
/// Converts this `&mut MaybeValid<T>` to a `&mut T`.
1626+
///
1627+
/// # Safety
1628+
///
1629+
/// `self` must contain a valid `T`.
1630+
pub unsafe fn assume_valid_mut(&mut self) -> &mut T {
1631+
// SAFETY: The caller has promised that `self` contains a valid `T`.
1632+
// Since `Self` is `repr(transparent)`, it has the same layout as
1633+
// `MaybeUninit<T>`, which in turn is guaranteed to have the same layout
1634+
// as `T`. Thus, it is sound to treat `self.inner` as containing a valid
1635+
// `T`.
1636+
unsafe { self.inner.assume_init_mut() }
1637+
}
1638+
}
1639+
1640+
impl<T> MaybeValid<T> {
1641+
/// Converts this `MaybeValid<T>` to a `T`.
1642+
///
1643+
/// # Safety
1644+
///
1645+
/// `self` must contain a valid `T`.
1646+
pub const unsafe fn assume_valid(self) -> T {
1647+
// SAFETY: The caller has promised that `self` contains a valid `T`.
1648+
// Since `Self` is `repr(transparent)`, it has the same layout as
1649+
// `MaybeUninit<T>`, which in turn is guaranteed to have the same layout
1650+
// as `T`. Thus, it is sound to treat `self.inner` as containing a valid
1651+
// `T`.
1652+
unsafe { self.inner.assume_init() }
1653+
}
1654+
}
1655+
1656+
impl<T> MaybeValid<[T]> {
1657+
/// Converts a `MaybeValid<[T]>` to a `[MaybeValid<T>]`.
1658+
///
1659+
/// `MaybeValid<T>` has the same layout as `T`, so these layouts are
1660+
/// equivalent.
1661+
pub const fn as_slice_of_maybe_valids(&self) -> &[MaybeValid<T>] {
1662+
let inner: &[<T as AsMaybeUninit>::MaybeUninit] = &self.inner.inner;
1663+
let inner_ptr: *const [<T as AsMaybeUninit>::MaybeUninit] = inner;
1664+
// Note: this Clippy warning is only emitted on our MSRV (1.61), but not
1665+
// on later versions of Clippy. Thus, we consider it spurious.
1666+
#[allow(clippy::as_conversions)]
1667+
let ret_ptr = inner_ptr as *const [MaybeValid<T>];
1668+
// SAFETY: Since `inner` is a `&[MaybeUninit<T>]`, and `MaybeValid<T>`
1669+
// is a `repr(transparent)` struct around `MaybeUninit<T>`, `inner` has
1670+
// the same layout as `&[MaybeValid<T>]`.
1671+
unsafe { &*ret_ptr }
1672+
}
1673+
}
1674+
1675+
impl<const N: usize, T> MaybeValid<[T; N]> {
1676+
/// Converts a `MaybeValid<[T; N]>` to a `MaybeValid<[T]>`.
1677+
// TODO(#64): Make this `const` once our MSRV is >= 1.64.0 (when
1678+
// `slice_from_raw_parts` was stabilized as `const`).
1679+
pub fn as_slice(&self) -> &MaybeValid<[T]> {
1680+
let base: *const MaybeValid<[T; N]> = self;
1681+
let slice_of_t: *const [T] = ptr::slice_from_raw_parts(base.cast::<T>(), N);
1682+
// Note: this Clippy warning is only emitted on our MSRV (1.61), but not
1683+
// on later versions of Clippy. Thus, we consider it spurious.
1684+
#[allow(clippy::as_conversions)]
1685+
let mv_of_slice = slice_of_t as *const MaybeValid<[T]>;
1686+
// SAFETY: `MaybeValid<T>` is a `repr(transparent)` wrapper around
1687+
// `MaybeUninit<T>`, which in turn has the same layout as `T`. Thus, the
1688+
// trailing slices of `[T]` and of `MaybeValid<[T]>` both have element
1689+
// type `T`. Since the number of elements is preserved during an `as`
1690+
// cast of slice/DST pointers, the resulting `*const MaybeValid<[T]>`
1691+
// has the same number of elements - and thus the same length - as the
1692+
// original `*const [T]`.
1693+
//
1694+
// Thanks to their layouts, `MaybeValid<[T; N]>` and `MaybeValid<[T]>`
1695+
// have the same alignment, so `mv_of_slice` is guaranteed to be
1696+
// aligned.
1697+
unsafe { &*mv_of_slice }
1698+
}
1699+
}
1700+
1701+
impl<T> Debug for MaybeValid<T> {
1702+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1703+
f.pad(core::any::type_name::<Self>())
1704+
}
1705+
}
1706+
15101707
/// A type with no alignment requirement.
15111708
///
15121709
/// An `Unalign` wraps a `T`, removing any alignment requirement. `Unalign<T>`
@@ -3515,6 +3712,68 @@ mod tests {
35153712
assert_eq!(unsafe { m.assume_init() }, 0);
35163713
}
35173714

3715+
#[test]
3716+
fn test_maybe_valid() {
3717+
let m = MaybeValid::<usize>::default();
3718+
// SAFETY: all bit patterns are valid `usize`s, and `m` is initialized.
3719+
let u = unsafe { m.assume_valid() };
3720+
// This ensures that Miri can see whether `u` (and thus `m`) has been
3721+
// properly initialized.
3722+
assert_eq!(u, u);
3723+
3724+
fn bytes_to_maybe_valid(bytes: &mut [u8]) -> &mut MaybeValid<[u8]> {
3725+
// SAFETY: `MaybeValid<[u8]>` has the same layout as `[u8]`, and
3726+
// `bytes` is initialized.
3727+
unsafe {
3728+
#[allow(clippy::as_conversions)]
3729+
return &mut *(bytes as *mut [u8] as *mut MaybeValid<[u8]>);
3730+
}
3731+
}
3732+
3733+
let mut bytes = [0u8, 1, 2];
3734+
let m = bytes_to_maybe_valid(&mut bytes[..]);
3735+
3736+
// SAFETY: `m` was created from a valid `[u8]`.
3737+
let r = unsafe { m.assume_valid_ref() };
3738+
assert_eq!(r.len(), 3);
3739+
assert_eq!(r, [0, 1, 2]);
3740+
3741+
// SAFETY: `m` was created from a valid `[u8]`.
3742+
let r = unsafe { m.assume_valid_mut() };
3743+
assert_eq!(r.len(), 3);
3744+
assert_eq!(r, [0, 1, 2]);
3745+
3746+
r[0] = 1;
3747+
assert_eq!(bytes, [1, 1, 2]);
3748+
3749+
let mut bytes = [0u8, 1, 2];
3750+
let m = bytes_to_maybe_valid(&mut bytes[..]);
3751+
let slc = m.as_slice_of_maybe_valids();
3752+
assert_eq!(slc.len(), 3);
3753+
for i in 0u8..3 {
3754+
// SAFETY: `m` was created from a valid `[u8]`.
3755+
let u = unsafe { slc[usize::from(i)].assume_valid_ref() };
3756+
assert_eq!(u, &i);
3757+
}
3758+
}
3759+
3760+
#[test]
3761+
fn test_maybe_valid_as_slice() {
3762+
let mut m = MaybeValid::<[u8; 3]>::default();
3763+
// SAFETY: all bit patterns are valid `[u8; 3]`s, and `m` is
3764+
// initialized.
3765+
unsafe { *m.assume_valid_mut() = [0, 1, 2] };
3766+
3767+
let slc = m.as_slice().as_slice_of_maybe_valids();
3768+
assert_eq!(slc.len(), 3);
3769+
3770+
for i in 0u8..3 {
3771+
// SAFETY: `m` was initialized as a valid `[u8; 3]`.
3772+
let u = unsafe { slc[usize::from(i)].assume_valid_ref() };
3773+
assert_eq!(u, &i);
3774+
}
3775+
}
3776+
35183777
#[test]
35193778
fn test_unalign() {
35203779
// Test methods that don't depend on alignment.

0 commit comments

Comments
 (0)