Skip to content

Commit 4bb6c6c

Browse files
committed
Complete vld1 instructions with some corrections
1 parent 2edc74d commit 4bb6c6c

File tree

9 files changed

+1185
-766
lines changed

9 files changed

+1185
-766
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

+103
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,22 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
749749
))
750750
}
751751

752+
/// Load multiple single-element structures to one, two, three, or four registers.
753+
#[inline]
754+
#[target_feature(enable = "neon,aes")]
755+
#[cfg_attr(test, assert_instr(ldr))]
756+
pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
757+
transmute(u64x1::new(*ptr))
758+
}
759+
760+
/// Load multiple single-element structures to one, two, three, or four registers.
761+
#[inline]
762+
#[target_feature(enable = "neon,aes")]
763+
#[cfg_attr(test, assert_instr(ldr))]
764+
pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
765+
transmute(u64x2::new(*ptr, *ptr.offset(1)))
766+
}
767+
752768
/// Load multiple single-element structures to one, two, three, or four registers.
753769
#[inline]
754770
#[target_feature(enable = "neon")]
@@ -786,6 +802,43 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
786802
transmute(f64x2::new(*ptr, *ptr.offset(1)))
787803
}
788804

805+
/// Load multiple single-element structures to one, two, three, or four registers
806+
#[inline]
807+
#[target_feature(enable = "neon")]
808+
#[cfg_attr(test, assert_instr(ldr))]
809+
pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
810+
vld1_f64(ptr)
811+
}
812+
813+
/// Load multiple single-element structures to one, two, three, or four registers
814+
#[inline]
815+
#[target_feature(enable = "neon")]
816+
#[cfg_attr(test, assert_instr(ldr))]
817+
pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
818+
let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
819+
simd_shuffle2!(x, x, [0, 0])
820+
}
821+
822+
/// Load one single-element structure to one lane of one register.
823+
#[inline]
824+
#[target_feature(enable = "neon")]
825+
#[rustc_legacy_const_generics(2)]
826+
#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
827+
pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
828+
static_assert!(LANE : i32 where LANE == 0);
829+
simd_insert(src, LANE as u32, *ptr)
830+
}
831+
832+
/// Load one single-element structure to one lane of one register.
833+
#[inline]
834+
#[target_feature(enable = "neon")]
835+
#[rustc_legacy_const_generics(2)]
836+
#[cfg_attr(test, assert_instr(ldr, LANE = 1))]
837+
pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
838+
static_assert_imm1!(LANE);
839+
simd_insert(src, LANE as u32, *ptr)
840+
}
841+
789842
/// Store multiple single-element structures from one, two, three, or four registers.
790843
#[inline]
791844
#[target_feature(enable = "neon")]
@@ -4939,6 +4992,56 @@ mod tests {
49394992
assert_eq!(r, e);
49404993
}
49414994

4995+
#[simd_test(enable = "neon")]
4996+
unsafe fn test_vld1_f64() {
4997+
let a: [f64; 2] = [0., 1.];
4998+
let e = f64x1::new(1.);
4999+
let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
5000+
assert_eq!(r, e)
5001+
}
5002+
5003+
#[simd_test(enable = "neon")]
5004+
unsafe fn test_vld1q_f64() {
5005+
let a: [f64; 3] = [0., 1., 2.];
5006+
let e = f64x2::new(1., 2.);
5007+
let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
5008+
assert_eq!(r, e)
5009+
}
5010+
5011+
#[simd_test(enable = "neon")]
5012+
unsafe fn test_vld1_dup_f64() {
5013+
let a: [f64; 2] = [1., 42.];
5014+
let e = f64x1::new(42.);
5015+
let r: f64x1 = transmute(vld1_dup_f64(a[1..].as_ptr()));
5016+
assert_eq!(r, e)
5017+
}
5018+
5019+
#[simd_test(enable = "neon")]
5020+
unsafe fn test_vld1q_dup_f64() {
5021+
let elem: f64 = 42.;
5022+
let e = f64x2::new(42., 42.);
5023+
let r: f64x2 = transmute(vld1q_dup_f64(&elem));
5024+
assert_eq!(r, e)
5025+
}
5026+
5027+
#[simd_test(enable = "neon")]
5028+
unsafe fn test_vld1_lane_f64() {
5029+
let a = f64x1::new(0.);
5030+
let elem: f64 = 42.;
5031+
let e = f64x1::new(42.);
5032+
let r: f64x1 = transmute(vld1_lane_f64::<0>(&elem, transmute(a)));
5033+
assert_eq!(r, e)
5034+
}
5035+
5036+
#[simd_test(enable = "neon")]
5037+
unsafe fn test_vld1q_lane_f64() {
5038+
let a = f64x2::new(0., 1.);
5039+
let elem: f64 = 42.;
5040+
let e = f64x2::new(0., 42.);
5041+
let r: f64x2 = transmute(vld1q_lane_f64::<1>(&elem, transmute(a)));
5042+
assert_eq!(r, e)
5043+
}
5044+
49425045
#[simd_test(enable = "neon")]
49435046
unsafe fn test_vst1_p64() {
49445047
let mut vals = [0_u64; 2];

crates/core_arch/src/arm/neon.rs

+16
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,22 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
288288
transmute(vld1q_v8i16(ptr as *const i8, align_of::<p16>() as i32))
289289
}
290290

291+
/// Load multiple single-element structures to one, two, three, or four registers.
292+
#[inline]
293+
#[target_feature(enable = "neon,aes")]
294+
#[cfg_attr(test, assert_instr(vldr))]
295+
pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
296+
transmute(vld1_v1i64(ptr as *const i8, align_of::<p64>() as i32))
297+
}
298+
299+
/// Load multiple single-element structures to one, two, three, or four registers.
300+
#[inline]
301+
#[target_feature(enable = "neon,aes")]
302+
#[cfg_attr(test, assert_instr("vld1.64"))]
303+
pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
304+
transmute(vld1q_v2i64(ptr as *const i8, align_of::<p64>() as i32))
305+
}
306+
291307
/// Load multiple single-element structures to one, two, three, or four registers.
292308
#[inline]
293309
#[target_feature(enable = "neon,v7")]

0 commit comments

Comments
 (0)