Add a separate function for a non-temporal read prefetch

folkertdev · folkertdev · commit 2dcd532a74c0 · 2025-10-05T15:50:36.000+02:00
diff --git a/library/core/src/hint.rs b/library/core/src/hint.rs
@@ -826,18 +826,14 @@ pub fn select_unpredictable<T>(condition: bool, true_val: T, false_val: T) -> T
 ///
 /// The locality is just a hint, and may be ignored on some targets or by the hardware.
 ///
-/// Used with functions like [`prefetch_read_data`] and [`prefetch_write_data`].
+/// Used with functions like [`prefetch_read`] and [`prefetch_write`].
 ///
-/// [`prefetch_read_data`]: crate::hint::prefetch_read_data
-/// [`prefetch_write_data`]: crate::hint::prefetch_write_data
+/// [`prefetch_read`]: crate::hint::prefetch_read
+/// [`prefetch_write`]: crate::hint::prefetch_write
 #[unstable(feature = "hint_prefetch", issue = "146941")]
 #[non_exhaustive]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum Locality {
-    /// Data is unlikely to be reused soon.
-    ///
-    /// Typically bypasses the caches so they are not polluted.
-    NonTemporal = 0,
     /// Data is expected to be reused eventually.
     ///
     /// Typically prefetches into L3 cache (if the CPU supports it).
@@ -865,30 +861,46 @@ pub enum Locality {
 /// # Examples
 ///
 /// ```
-/// use std::hint::{Locality, prefetch_read_data};
+/// #![feature(hint_prefetch)]
+/// use std::hint::{Locality, prefetch_read};
 /// use std::mem::size_of_val;
 ///
 /// // Prefetch all of `slice` into the L1 cache.
 /// fn prefetch_slice<T>(slice: &[T]) {
 ///     // On most systems the cache line size is 64 bytes.
 ///     for offset in (0..size_of_val(slice)).step_by(64) {
-///         prefetch_read_data(slice.as_ptr().wrapping_add(offset), Locality::L1);
+///         prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1);
 ///     }
 /// }
 /// ```
 #[inline(always)]
 #[unstable(feature = "hint_prefetch", issue = "146941")]
-pub const fn prefetch_read_data<T>(ptr: *const T, locality: Locality) {
+pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) {
     match locality {
-        Locality::NonTemporal => {
-            intrinsics::prefetch_read_data::<T, { Locality::NonTemporal as i32 }>(ptr)
-        }
         Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3 as i32 }>(ptr),
         Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2 as i32 }>(ptr),
         Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1 as i32 }>(ptr),
     }
 }
 
+/// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid
+/// polluting the cache.
+///
+/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
+/// soon after, but may also increase bandwidth usage or evict other cache lines.
+///
+/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
+///
+/// Passing a dangling or invalid pointer is permitted: the memory will not
+/// actually be dereferenced, and no faults are raised.
+#[inline(always)]
+#[unstable(feature = "hint_prefetch", issue = "146941")]
+pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) {
+    // The LLVM intrinsic does not currently support specifying the locality.
+    let _ = locality;
+    intrinsics::prefetch_read_data::<T, 0>(ptr)
+}
+
 /// Prefetch the cache line containing `ptr` for a future write.
 ///
 /// A strategically placed prefetch can reduce cache miss latency if the data is accessed
@@ -898,19 +910,35 @@ pub const fn prefetch_read_data<T>(ptr: *const T, locality: Locality) {
 ///
 /// Passing a dangling or invalid pointer is permitted: the memory will not
 /// actually be dereferenced, and no faults are raised.
+#[inline(always)]
 #[unstable(feature = "hint_prefetch", issue = "146941")]
-pub const fn prefetch_write_data<T>(ptr: *mut T, locality: Locality) {
+pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) {
     match locality {
-        Locality::NonTemporal => {
-            intrinsics::prefetch_write_data::<T, { Locality::NonTemporal as i32 }>(ptr)
-        }
         Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3 as i32 }>(ptr),
         Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2 as i32 }>(ptr),
         Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1 as i32 }>(ptr),
     }
 }
 
-/// Prefetch the cache line containing `ptr` for a future read.
+/// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid
+/// polluting the cache.
+///
+/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
+/// soon after, but may also increase bandwidth usage or evict other cache lines.
+///
+/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
+///
+/// Passing a dangling or invalid pointer is permitted: the memory will not
+/// actually be dereferenced, and no faults are raised.
+#[inline(always)]
+#[unstable(feature = "hint_prefetch", issue = "146941")]
+pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) {
+    // The LLVM intrinsic does not currently support specifying the locality.
+    let _ = locality;
+    intrinsics::prefetch_write_data::<T, 0>(ptr)
+}
+
+/// Prefetch the cache line containing `ptr` into the instruction cache for a future read.
 ///
 /// A strategically placed prefetch can reduce cache miss latency if the instructions are
 /// accessed soon after, but may also increase bandwidth usage or evict other cache lines.
@@ -919,12 +947,10 @@ pub const fn prefetch_write_data<T>(ptr: *mut T, locality: Locality) {
 ///
 /// Passing a dangling or invalid pointer is permitted: the memory will not
 /// actually be dereferenced, and no faults are raised.
+#[inline(always)]
 #[unstable(feature = "hint_prefetch", issue = "146941")]
 pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) {
     match locality {
-        Locality::NonTemporal => {
-            intrinsics::prefetch_read_instruction::<T, { Locality::NonTemporal as i32 }>(ptr)
-        }
         Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3 as i32 }>(ptr),
         Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2 as i32 }>(ptr),
         Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1 as i32 }>(ptr),