Skip to content

Commit 2edc74d

Browse files
authored
Add vst neon instructions (#1205)
* add vst neon instructions * modify the instruction limit
1 parent a37a592 commit 2edc74d

File tree

5 files changed

+9691
-7509
lines changed

5 files changed

+9691
-7509
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

+132
Original file line numberDiff line numberDiff line change
@@ -4592,6 +4592,84 @@ pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
45924592
vld1q_f64_x4_(a)
45934593
}
45944594

4595+
/// Store multiple single-element structures to one, two, three, or four registers
4596+
#[inline]
4597+
#[target_feature(enable = "neon")]
4598+
#[cfg_attr(test, assert_instr(st1))]
4599+
pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
4600+
#[allow(improper_ctypes)]
4601+
extern "unadjusted" {
4602+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")]
4603+
fn vst1_f64_x2_(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
4604+
}
4605+
vst1_f64_x2_(b.0, b.1, a)
4606+
}
4607+
4608+
/// Store multiple single-element structures to one, two, three, or four registers
4609+
#[inline]
4610+
#[target_feature(enable = "neon")]
4611+
#[cfg_attr(test, assert_instr(st1))]
4612+
pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
4613+
#[allow(improper_ctypes)]
4614+
extern "unadjusted" {
4615+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")]
4616+
fn vst1q_f64_x2_(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
4617+
}
4618+
vst1q_f64_x2_(b.0, b.1, a)
4619+
}
4620+
4621+
/// Store multiple single-element structures to one, two, three, or four registers
4622+
#[inline]
4623+
#[target_feature(enable = "neon")]
4624+
#[cfg_attr(test, assert_instr(st1))]
4625+
pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
4626+
#[allow(improper_ctypes)]
4627+
extern "unadjusted" {
4628+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")]
4629+
fn vst1_f64_x3_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
4630+
}
4631+
vst1_f64_x3_(b.0, b.1, b.2, a)
4632+
}
4633+
4634+
/// Store multiple single-element structures to one, two, three, or four registers
4635+
#[inline]
4636+
#[target_feature(enable = "neon")]
4637+
#[cfg_attr(test, assert_instr(st1))]
4638+
pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
4639+
#[allow(improper_ctypes)]
4640+
extern "unadjusted" {
4641+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")]
4642+
fn vst1q_f64_x3_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
4643+
}
4644+
vst1q_f64_x3_(b.0, b.1, b.2, a)
4645+
}
4646+
4647+
/// Store multiple single-element structures to one, two, three, or four registers
4648+
#[inline]
4649+
#[target_feature(enable = "neon")]
4650+
#[cfg_attr(test, assert_instr(st1))]
4651+
pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
4652+
#[allow(improper_ctypes)]
4653+
extern "unadjusted" {
4654+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")]
4655+
fn vst1_f64_x4_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut f64);
4656+
}
4657+
vst1_f64_x4_(b.0, b.1, b.2, b.3, a)
4658+
}
4659+
4660+
/// Store multiple single-element structures to one, two, three, or four registers
4661+
#[inline]
4662+
#[target_feature(enable = "neon")]
4663+
#[cfg_attr(test, assert_instr(st1))]
4664+
pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
4665+
#[allow(improper_ctypes)]
4666+
extern "unadjusted" {
4667+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")]
4668+
fn vst1q_f64_x4_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut f64);
4669+
}
4670+
vst1q_f64_x4_(b.0, b.1, b.2, b.3, a)
4671+
}
4672+
45954673
/// Multiply
45964674
#[inline]
45974675
#[target_feature(enable = "neon")]
@@ -12983,6 +13061,60 @@ mod test {
1298313061
assert_eq!(r, e);
1298413062
}
1298513063

13064+
#[simd_test(enable = "neon")]
13065+
unsafe fn test_vst1_f64_x2() {
13066+
let a: [f64; 3] = [0., 1., 2.];
13067+
let e: [f64; 2] = [1., 2.];
13068+
let mut r: [f64; 2] = [0f64; 2];
13069+
vst1_f64_x2(r.as_mut_ptr(), vld1_f64_x2(a[1..].as_ptr()));
13070+
assert_eq!(r, e);
13071+
}
13072+
13073+
#[simd_test(enable = "neon")]
13074+
unsafe fn test_vst1q_f64_x2() {
13075+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
13076+
let e: [f64; 4] = [1., 2., 3., 4.];
13077+
let mut r: [f64; 4] = [0f64; 4];
13078+
vst1q_f64_x2(r.as_mut_ptr(), vld1q_f64_x2(a[1..].as_ptr()));
13079+
assert_eq!(r, e);
13080+
}
13081+
13082+
#[simd_test(enable = "neon")]
13083+
unsafe fn test_vst1_f64_x3() {
13084+
let a: [f64; 4] = [0., 1., 2., 3.];
13085+
let e: [f64; 3] = [1., 2., 3.];
13086+
let mut r: [f64; 3] = [0f64; 3];
13087+
vst1_f64_x3(r.as_mut_ptr(), vld1_f64_x3(a[1..].as_ptr()));
13088+
assert_eq!(r, e);
13089+
}
13090+
13091+
#[simd_test(enable = "neon")]
13092+
unsafe fn test_vst1q_f64_x3() {
13093+
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
13094+
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
13095+
let mut r: [f64; 6] = [0f64; 6];
13096+
vst1q_f64_x3(r.as_mut_ptr(), vld1q_f64_x3(a[1..].as_ptr()));
13097+
assert_eq!(r, e);
13098+
}
13099+
13100+
#[simd_test(enable = "neon")]
13101+
unsafe fn test_vst1_f64_x4() {
13102+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
13103+
let e: [f64; 4] = [1., 2., 3., 4.];
13104+
let mut r: [f64; 4] = [0f64; 4];
13105+
vst1_f64_x4(r.as_mut_ptr(), vld1_f64_x4(a[1..].as_ptr()));
13106+
assert_eq!(r, e);
13107+
}
13108+
13109+
#[simd_test(enable = "neon")]
13110+
unsafe fn test_vst1q_f64_x4() {
13111+
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
13112+
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
13113+
let mut r: [f64; 8] = [0f64; 8];
13114+
vst1q_f64_x4(r.as_mut_ptr(), vld1q_f64_x4(a[1..].as_ptr()));
13115+
assert_eq!(r, e);
13116+
}
13117+
1298613118
#[simd_test(enable = "neon")]
1298713119
unsafe fn test_vmul_f64() {
1298813120
let a: f64 = 1.0;

0 commit comments

Comments
 (0)