Skip to content

Commit 894f3b7

Browse files
authored
add neon instruction vbsl (#1062)
1 parent 4b947e5 commit 894f3b7

File tree

2 files changed

+335
-0
lines changed

2 files changed

+335
-0
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,23 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
703703
vabsq_s64_(a)
704704
}
705705

706+
/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
707+
/// to the corresponding bit from the first source SIMD&FP register when the original
708+
/// destination bit was 1, otherwise from the second source SIMD&FP register.
709+
#[inline]
710+
#[target_feature(enable = "neon")]
711+
#[cfg_attr(test, assert_instr(bsl))]
712+
pub unsafe fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
713+
simd_select(transmute::<_, int64x1_t>(a), b, c)
714+
}
715+
/// Bitwise Select.
716+
#[inline]
717+
#[target_feature(enable = "neon")]
718+
#[cfg_attr(test, assert_instr(bsl))]
719+
pub unsafe fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
720+
simd_select(transmute::<_, int64x1_t>(a), b, c)
721+
}
722+
706723
/// Signed saturating Accumulate of Unsigned value.
707724
#[inline]
708725
#[target_feature(enable = "neon")]
@@ -3913,6 +3930,25 @@ mod tests {
39133930
assert_eq!(r, e);
39143931
}
39153932

3933+
#[simd_test(enable = "neon")]
3934+
unsafe fn test_vbsl_f64() {
3935+
let a = u64x1::new(u64::MAX);
3936+
let b = f64x1::new(f64::MAX);
3937+
let c = f64x1::new(f64::MIN);
3938+
let e = f64x1::new(f64::MAX);
3939+
let r: f64x1 = transmute(vbsl_f64(transmute(a), transmute(b), transmute(c)));
3940+
assert_eq!(r, e);
3941+
}
3942+
#[simd_test(enable = "neon")]
3943+
unsafe fn test_vbsl_p64() {
3944+
let a = u64x1::new(u64::MAX);
3945+
let b = u64x1::new(u64::MAX);
3946+
let c = u64x1::new(u64::MIN);
3947+
let e = u64x1::new(u64::MAX);
3948+
let r: u64x1 = transmute(vbsl_p64(transmute(a), transmute(b), transmute(c)));
3949+
assert_eq!(r, e);
3950+
}
3951+
39163952
#[simd_test(enable = "neon")]
39173953
unsafe fn test_vaddv_s16() {
39183954
let a = i16x4::new(1, 2, 3, -4);

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,10 @@ extern "C" {
297297
#[cfg(target_arch = "arm")]
298298
#[allow(improper_ctypes)]
299299
extern "C" {
300+
#[link_name = "llvm.arm.neon.vbsl.v8i8"]
301+
fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
302+
#[link_name = "llvm.arm.neon.vbsl.v16i8"]
303+
fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
300304
#[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"]
301305
fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t;
302306
#[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"]
@@ -2813,6 +2817,120 @@ pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
28132817
simd_and(simd_xor(b, transmute(c)), a)
28142818
}
28152819

2820+
/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
2821+
/// to the corresponding bit from the first source SIMD&FP register when the original
2822+
/// destination bit was 1, otherwise from the second source SIMD&FP register.
2823+
2824+
/// Bitwise Select.
2825+
#[inline]
2826+
#[target_feature(enable = "neon")]
2827+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2828+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2829+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2830+
pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
2831+
simd_select(transmute::<_, int8x8_t>(a), b, c)
2832+
}
2833+
2834+
/// Bitwise Select.
2835+
#[inline]
2836+
#[target_feature(enable = "neon")]
2837+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2838+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2839+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2840+
pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
2841+
simd_select(transmute::<_, int16x4_t>(a), b, c)
2842+
}
2843+
2844+
/// Bitwise Select.
2845+
#[inline]
2846+
#[target_feature(enable = "neon")]
2847+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2848+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2849+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2850+
pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
2851+
simd_select(transmute::<_, int32x2_t>(a), b, c)
2852+
}
2853+
2854+
/// Bitwise Select.
2855+
#[inline]
2856+
#[target_feature(enable = "neon")]
2857+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2858+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2859+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2860+
pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
2861+
simd_select(transmute::<_, int64x1_t>(a), b, c)
2862+
}
2863+
2864+
/// Bitwise Select.
2865+
#[inline]
2866+
#[target_feature(enable = "neon")]
2867+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2868+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2869+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2870+
pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
2871+
simd_select(transmute::<_, int8x8_t>(a), b, c)
2872+
}
2873+
2874+
/// Bitwise Select.
2875+
#[inline]
2876+
#[target_feature(enable = "neon")]
2877+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2878+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2879+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2880+
pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
2881+
simd_select(transmute::<_, int16x4_t>(a), b, c)
2882+
}
2883+
2884+
/// Bitwise Select.
2885+
#[inline]
2886+
#[target_feature(enable = "neon")]
2887+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2888+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2889+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2890+
pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
2891+
simd_select(transmute::<_, int32x2_t>(a), b, c)
2892+
}
2893+
2894+
/// Bitwise Select.
2895+
#[inline]
2896+
#[target_feature(enable = "neon")]
2897+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2898+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2899+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2900+
pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t {
2901+
simd_select(transmute::<_, int64x1_t>(a), b, c)
2902+
}
2903+
2904+
/// Bitwise Select.
2905+
#[inline]
2906+
#[target_feature(enable = "neon")]
2907+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2908+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2909+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2910+
pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
2911+
simd_select(transmute::<_, int32x2_t>(a), b, c)
2912+
}
2913+
2914+
/// Bitwise Select.
2915+
#[inline]
2916+
#[target_feature(enable = "neon")]
2917+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2918+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2919+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2920+
pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t {
2921+
simd_select(transmute::<_, int8x8_t>(a), b, c)
2922+
}
2923+
2924+
/// Bitwise Select.
2925+
#[inline]
2926+
#[target_feature(enable = "neon")]
2927+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2928+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
2929+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))]
2930+
pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t {
2931+
simd_select(transmute::<_, int16x4_t>(a), b, c)
2932+
}
2933+
28162934
/// Vector bitwise inclusive OR NOT
28172935
#[inline]
28182936
#[target_feature(enable = "neon")]
@@ -6561,6 +6679,187 @@ mod tests {
65616679
assert_eq!(r, e);
65626680
}
65636681

6682+
#[simd_test(enable = "neon")]
6683+
unsafe fn test_vbsl_s8() {
6684+
let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0);
6685+
let b = i8x8::new(
6686+
i8::MAX,
6687+
i8::MAX,
6688+
i8::MAX,
6689+
i8::MAX,
6690+
i8::MAX,
6691+
i8::MAX,
6692+
i8::MAX,
6693+
i8::MAX,
6694+
);
6695+
let c = i8x8::new(
6696+
i8::MIN,
6697+
i8::MIN,
6698+
i8::MIN,
6699+
i8::MIN,
6700+
i8::MIN,
6701+
i8::MIN,
6702+
i8::MIN,
6703+
i8::MIN,
6704+
);
6705+
let e = i8x8::new(
6706+
i8::MAX,
6707+
i8::MIN,
6708+
i8::MAX,
6709+
i8::MIN,
6710+
i8::MAX,
6711+
i8::MIN,
6712+
i8::MAX,
6713+
i8::MIN,
6714+
);
6715+
let r: i8x8 = transmute(vbsl_s8(transmute(a), transmute(b), transmute(c)));
6716+
assert_eq!(r, e);
6717+
}
6718+
#[simd_test(enable = "neon")]
6719+
unsafe fn test_vbsl_s16() {
6720+
let a = u16x4::new(u16::MAX, 0, u16::MAX, 0);
6721+
let b = i16x4::new(i16::MAX, i16::MAX, i16::MAX, i16::MAX);
6722+
let c = i16x4::new(i16::MIN, i16::MIN, i16::MIN, i16::MIN);
6723+
let e = i16x4::new(i16::MAX, i16::MIN, i16::MAX, i16::MIN);
6724+
let r: i16x4 = transmute(vbsl_s16(transmute(a), transmute(b), transmute(c)));
6725+
assert_eq!(r, e);
6726+
}
6727+
#[simd_test(enable = "neon")]
6728+
unsafe fn test_vbsl_s32() {
6729+
let a = u32x2::new(u32::MAX, u32::MIN);
6730+
let b = i32x2::new(i32::MAX, i32::MAX);
6731+
let c = i32x2::new(i32::MIN, i32::MIN);
6732+
let e = i32x2::new(i32::MAX, i32::MIN);
6733+
let r: i32x2 = transmute(vbsl_s32(transmute(a), transmute(b), transmute(c)));
6734+
assert_eq!(r, e);
6735+
}
6736+
#[simd_test(enable = "neon")]
6737+
unsafe fn test_vbsl_s64() {
6738+
let a = u64x1::new(u64::MAX);
6739+
let b = i64x1::new(i64::MAX);
6740+
let c = i64x1::new(i64::MIN);
6741+
let e = i64x1::new(i64::MAX);
6742+
let r: i64x1 = transmute(vbsl_s64(transmute(a), transmute(b), transmute(c)));
6743+
assert_eq!(r, e);
6744+
}
6745+
#[simd_test(enable = "neon")]
6746+
unsafe fn test_vbsl_u8() {
6747+
let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0);
6748+
let b = u8x8::new(
6749+
u8::MAX,
6750+
u8::MAX,
6751+
u8::MAX,
6752+
u8::MAX,
6753+
u8::MAX,
6754+
u8::MAX,
6755+
u8::MAX,
6756+
u8::MAX,
6757+
);
6758+
let c = u8x8::new(
6759+
u8::MIN,
6760+
u8::MIN,
6761+
u8::MIN,
6762+
u8::MIN,
6763+
u8::MIN,
6764+
u8::MIN,
6765+
u8::MIN,
6766+
u8::MIN,
6767+
);
6768+
let e = u8x8::new(
6769+
u8::MAX,
6770+
u8::MIN,
6771+
u8::MAX,
6772+
u8::MIN,
6773+
u8::MAX,
6774+
u8::MIN,
6775+
u8::MAX,
6776+
u8::MIN,
6777+
);
6778+
let r: u8x8 = transmute(vbsl_u8(transmute(a), transmute(b), transmute(c)));
6779+
assert_eq!(r, e);
6780+
}
6781+
#[simd_test(enable = "neon")]
6782+
unsafe fn test_vbsl_u16() {
6783+
let a = u16x4::new(u16::MAX, 0, u16::MAX, 0);
6784+
let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX);
6785+
let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN);
6786+
let e = u16x4::new(u16::MAX, u16::MIN, u16::MAX, u16::MIN);
6787+
let r: u16x4 = transmute(vbsl_u16(transmute(a), transmute(b), transmute(c)));
6788+
assert_eq!(r, e);
6789+
}
6790+
#[simd_test(enable = "neon")]
6791+
unsafe fn test_vbsl_u32() {
6792+
let a = u32x2::new(u32::MAX, 0);
6793+
let b = u32x2::new(u32::MAX, u32::MAX);
6794+
let c = u32x2::new(u32::MIN, u32::MIN);
6795+
let e = u32x2::new(u32::MAX, u32::MIN);
6796+
let r: u32x2 = transmute(vbsl_u32(transmute(a), transmute(b), transmute(c)));
6797+
assert_eq!(r, e);
6798+
}
6799+
#[simd_test(enable = "neon")]
6800+
unsafe fn test_vbsl_u64() {
6801+
let a = u64x1::new(u64::MAX);
6802+
let b = u64x1::new(u64::MAX);
6803+
let c = u64x1::new(u64::MIN);
6804+
let e = u64x1::new(u64::MAX);
6805+
let r: u64x1 = transmute(vbsl_u64(transmute(a), transmute(b), transmute(c)));
6806+
assert_eq!(r, e);
6807+
}
6808+
#[simd_test(enable = "neon")]
6809+
unsafe fn test_vbsl_f32() {
6810+
let a = u32x2::new(u32::MAX, 0);
6811+
let b = f32x2::new(f32::MAX, f32::MAX);
6812+
let c = f32x2::new(f32::MIN, f32::MIN);
6813+
let e = f32x2::new(f32::MAX, f32::MIN);
6814+
let r: f32x2 = transmute(vbsl_f32(transmute(a), transmute(b), transmute(c)));
6815+
assert_eq!(r, e);
6816+
}
6817+
#[simd_test(enable = "neon")]
6818+
unsafe fn test_vbsl_p8() {
6819+
let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0);
6820+
let b = u8x8::new(
6821+
u8::MAX,
6822+
u8::MAX,
6823+
u8::MAX,
6824+
u8::MAX,
6825+
u8::MAX,
6826+
u8::MAX,
6827+
u8::MAX,
6828+
u8::MAX,
6829+
);
6830+
let c = u8x8::new(
6831+
u8::MIN,
6832+
u8::MIN,
6833+
u8::MIN,
6834+
u8::MIN,
6835+
u8::MIN,
6836+
u8::MIN,
6837+
u8::MIN,
6838+
u8::MIN,
6839+
);
6840+
let e = u8x8::new(
6841+
u8::MAX,
6842+
u8::MIN,
6843+
u8::MAX,
6844+
u8::MIN,
6845+
u8::MAX,
6846+
u8::MIN,
6847+
u8::MAX,
6848+
u8::MIN,
6849+
);
6850+
let r: u8x8 = transmute(vbsl_p8(transmute(a), transmute(b), transmute(c)));
6851+
assert_eq!(r, e);
6852+
}
6853+
#[simd_test(enable = "neon")]
6854+
unsafe fn test_vbsl_p16() {
6855+
let a = u16x4::new(u16::MAX, 0, u16::MAX, 0);
6856+
let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX);
6857+
let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN);
6858+
let e = u16x4::new(u16::MAX, u16::MIN, u16::MAX, u16::MIN);
6859+
let r: u16x4 = transmute(vbsl_p16(transmute(a), transmute(b), transmute(c)));
6860+
assert_eq!(r, e);
6861+
}
6862+
65646863
#[simd_test(enable = "neon")]
65656864
unsafe fn test_vorn_s8() {
65666865
let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7);

0 commit comments

Comments
 (0)