Skip to content

Commit 3492364

Browse files
committed
Add support for masked loads & stores
1 parent f510c6b commit 3492364

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

crates/core_simd/src/intrinsics.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,15 @@ extern "platform-intrinsic" {
107107
/// like gather, but more spicy, as it writes instead of reads
108108
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
109109

110+
/// like a loop of reads offset from the same pointer
111+
/// val: vector of values to select if a lane is masked
112+
/// ptr: vector of pointers to read from
113+
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
114+
/// note, the LLVM intrinsic accepts a mask vector of `<N x i1>`
115+
pub(crate) fn simd_masked_load<T, U, V>(val: T, ptr: U, mask: V) -> T;
116+
/// like masked_load, but more spicy, as it writes instead of reads
117+
pub(crate) fn simd_masked_store<T, U, V>(val: T, ptr: U, mask: V);
118+
110119
// {s,u}add.sat
111120
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
112121

crates/core_simd/src/vector.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,42 @@ where
311311
unsafe { self.store(slice.as_mut_ptr().cast()) }
312312
}
313313

314+
#[must_use]
315+
#[inline]
316+
pub fn masked_load_or(slice: &[T], or: Self) -> Self {
317+
Self::masked_load_select(slice, Mask::splat(true), or)
318+
}
319+
320+
#[must_use]
321+
#[inline]
322+
pub fn masked_load_select(slice: &[T], enable: Mask<isize, N>, or: Self) -> Self {
323+
let ptr = slice.as_ptr();
324+
let idxs = Simd::<usize, N>::from_slice(&[
325+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
326+
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
327+
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
328+
]);
329+
let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len()));
330+
unsafe { Self::masked_load_select_ptr(ptr, enable, or) }
331+
}
332+
333+
#[must_use]
334+
#[inline]
335+
pub unsafe fn masked_load_select_unchecked(
336+
slice: &[T],
337+
enable: Mask<isize, N>,
338+
or: Self,
339+
) -> Self {
340+
let ptr = slice.as_ptr();
341+
unsafe { Self::masked_load_select_ptr(ptr, enable, or) }
342+
}
343+
344+
#[must_use]
345+
#[inline]
346+
pub unsafe fn masked_load_select_ptr(ptr: *const T, enable: Mask<isize, N>, or: Self) -> Self {
347+
unsafe { intrinsics::simd_masked_load(or, ptr, enable.to_int()) }
348+
}
349+
314350
/// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
315351
/// If an index is out-of-bounds, the element is instead selected from the `or` vector.
316352
///
@@ -489,6 +525,29 @@ where
489525
unsafe { intrinsics::simd_gather(or, source, enable.to_int()) }
490526
}
491527

528+
#[inline]
529+
pub fn masked_store(self, slice: &mut [T], enable: Mask<isize, N>) {
530+
let ptr = slice.as_mut_ptr();
531+
let idxs = Simd::<usize, N>::from_slice(&[
532+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
533+
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
534+
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
535+
]);
536+
let enable: Mask<isize, N> = enable & idxs.simd_lt(Simd::splat(slice.len()));
537+
unsafe { self.masked_store_ptr(ptr, enable) }
538+
}
539+
540+
#[inline]
541+
pub unsafe fn masked_store_unchecked(self, slice: &mut [T], enable: Mask<isize, N>) {
542+
let ptr = slice.as_mut_ptr();
543+
unsafe { self.masked_store_ptr(ptr, enable) }
544+
}
545+
546+
#[inline]
547+
pub unsafe fn masked_store_ptr(self, ptr: *mut T, enable: Mask<isize, N>) {
548+
unsafe { intrinsics::simd_masked_store(self, ptr, enable.to_int()) }
549+
}
550+
492551
/// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
493552
/// If an index is out-of-bounds, the write is suppressed without panicking.
494553
/// If two elements in the scattered vector would write to the same index

0 commit comments

Comments
 (0)