Skip to content

Commit 30b3eb3

Browse files
authored
Complete vld1 instructions with some corrections (#1216)
1 parent 5c68694 commit 30b3eb3

File tree

9 files changed

+1187
-766
lines changed

9 files changed

+1187
-766
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

+103
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,22 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
619619
read_unaligned(ptr.cast())
620620
}
621621

622+
/// Load multiple single-element structures to one, two, three, or four registers.
623+
#[inline]
624+
#[target_feature(enable = "neon,aes")]
625+
#[cfg_attr(test, assert_instr(ldr))]
626+
pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
627+
read_unaligned(ptr.cast())
628+
}
629+
630+
/// Load multiple single-element structures to one, two, three, or four registers.
631+
#[inline]
632+
#[target_feature(enable = "neon,aes")]
633+
#[cfg_attr(test, assert_instr(ldr))]
634+
pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
635+
read_unaligned(ptr.cast())
636+
}
637+
622638
/// Load multiple single-element structures to one, two, three, or four registers.
623639
#[inline]
624640
#[target_feature(enable = "neon")]
@@ -651,6 +667,43 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
651667
read_unaligned(ptr.cast())
652668
}
653669

670+
/// Load multiple single-element structures to one, two, three, or four registers
671+
#[inline]
672+
#[target_feature(enable = "neon")]
673+
#[cfg_attr(test, assert_instr(ldr))]
674+
pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
675+
vld1_f64(ptr)
676+
}
677+
678+
/// Load multiple single-element structures to one, two, three, or four registers
679+
#[inline]
680+
#[target_feature(enable = "neon")]
681+
#[cfg_attr(test, assert_instr(ldr))]
682+
pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
683+
let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
684+
simd_shuffle2!(x, x, [0, 0])
685+
}
686+
687+
/// Load one single-element structure to one lane of one register.
688+
#[inline]
689+
#[target_feature(enable = "neon")]
690+
#[rustc_legacy_const_generics(2)]
691+
#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
692+
pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
693+
static_assert!(LANE : i32 where LANE == 0);
694+
simd_insert(src, LANE as u32, *ptr)
695+
}
696+
697+
/// Load one single-element structure to one lane of one register.
698+
#[inline]
699+
#[target_feature(enable = "neon")]
700+
#[rustc_legacy_const_generics(2)]
701+
#[cfg_attr(test, assert_instr(ldr, LANE = 1))]
702+
pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
703+
static_assert_imm1!(LANE);
704+
simd_insert(src, LANE as u32, *ptr)
705+
}
706+
654707
/// Store multiple single-element structures from one, two, three, or four registers.
655708
#[inline]
656709
#[target_feature(enable = "neon")]
@@ -4700,6 +4753,56 @@ mod tests {
47004753
assert_eq!(r, e);
47014754
}
47024755

4756+
#[simd_test(enable = "neon")]
4757+
unsafe fn test_vld1_f64() {
4758+
let a: [f64; 2] = [0., 1.];
4759+
let e = f64x1::new(1.);
4760+
let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
4761+
assert_eq!(r, e)
4762+
}
4763+
4764+
#[simd_test(enable = "neon")]
4765+
unsafe fn test_vld1q_f64() {
4766+
let a: [f64; 3] = [0., 1., 2.];
4767+
let e = f64x2::new(1., 2.);
4768+
let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
4769+
assert_eq!(r, e)
4770+
}
4771+
4772+
#[simd_test(enable = "neon")]
4773+
unsafe fn test_vld1_dup_f64() {
4774+
let a: [f64; 2] = [1., 42.];
4775+
let e = f64x1::new(42.);
4776+
let r: f64x1 = transmute(vld1_dup_f64(a[1..].as_ptr()));
4777+
assert_eq!(r, e)
4778+
}
4779+
4780+
#[simd_test(enable = "neon")]
4781+
unsafe fn test_vld1q_dup_f64() {
4782+
let elem: f64 = 42.;
4783+
let e = f64x2::new(42., 42.);
4784+
let r: f64x2 = transmute(vld1q_dup_f64(&elem));
4785+
assert_eq!(r, e)
4786+
}
4787+
4788+
#[simd_test(enable = "neon")]
4789+
unsafe fn test_vld1_lane_f64() {
4790+
let a = f64x1::new(0.);
4791+
let elem: f64 = 42.;
4792+
let e = f64x1::new(42.);
4793+
let r: f64x1 = transmute(vld1_lane_f64::<0>(&elem, transmute(a)));
4794+
assert_eq!(r, e)
4795+
}
4796+
4797+
#[simd_test(enable = "neon")]
4798+
unsafe fn test_vld1q_lane_f64() {
4799+
let a = f64x2::new(0., 1.);
4800+
let elem: f64 = 42.;
4801+
let e = f64x2::new(0., 42.);
4802+
let r: f64x2 = transmute(vld1q_lane_f64::<1>(&elem, transmute(a)));
4803+
assert_eq!(r, e)
4804+
}
4805+
47034806
#[simd_test(enable = "neon")]
47044807
unsafe fn test_vst1_p64() {
47054808
let mut vals = [0_u64; 2];

crates/core_arch/src/arm/neon.rs

+16
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,22 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
288288
transmute(vld1q_v8i16(ptr as *const i8, align_of::<p16>() as i32))
289289
}
290290

291+
/// Load multiple single-element structures to one, two, three, or four registers.
292+
#[inline]
293+
#[target_feature(enable = "neon,aes")]
294+
#[cfg_attr(test, assert_instr(vldr))]
295+
pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
296+
transmute(vld1_v1i64(ptr as *const i8, align_of::<p64>() as i32))
297+
}
298+
299+
/// Load multiple single-element structures to one, two, three, or four registers.
300+
#[inline]
301+
#[target_feature(enable = "neon,aes")]
302+
#[cfg_attr(test, assert_instr("vld1.64"))]
303+
pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
304+
transmute(vld1q_v2i64(ptr as *const i8, align_of::<p64>() as i32))
305+
}
306+
291307
/// Load multiple single-element structures to one, two, three, or four registers.
292308
#[inline]
293309
#[target_feature(enable = "neon,v7")]

0 commit comments

Comments
 (0)