Skip to content

Add ARM Neon vmvn_*/vmvnq_* bitwise not intrinsics #770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 15, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 282 additions & 0 deletions crates/core_arch/src/arm/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,168 @@ pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
frsqrte_v2f32(a)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
let b = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
let b = int8x16_t(
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
let b = int16x4_t(-1, -1, -1, -1);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
let b = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
let b = int32x2_t(-1, -1);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
let b = int32x4_t(-1, -1, -1, -1);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
let b = uint8x8_t(255, 255, 255, 255, 255, 255, 255, 255);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
let b = uint8x16_t(
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
let b = uint16x4_t(65_535, 65_535, 65_535, 65_535);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
let b = uint16x8_t(
65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535,
);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
let b = uint32x2_t(4_294_967_295, 4_294_967_295);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
let b = uint32x4_t(4_294_967_295, 4_294_967_295, 4_294_967_295, 4_294_967_295);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
let b = poly8x8_t(255, 255, 255, 255, 255, 255, 255, 255);
simd_xor(a, b)
}

/// Vector bitwise not.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
let b = poly8x16_t(
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
);
simd_xor(a, b)
}

/// Folding minimum of adjacent pairs
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -1166,6 +1328,126 @@ mod tests {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_s8() {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
let r: i8x8 = transmute(vmvn_s8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_s8() {
let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e = i8x16::new(
-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
);
let r: i8x16 = transmute(vmvnq_s8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_s16() {
let a = i16x4::new(0, 1, 2, 3);
let e = i16x4::new(-1, -2, -3, -4);
let r: i16x4 = transmute(vmvn_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_s16() {
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
let r: i16x8 = transmute(vmvnq_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_s32() {
let a = i32x2::new(0, 1);
let e = i32x2::new(-1, -2);
let r: i32x2 = transmute(vmvn_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_s32() {
let a = i32x4::new(0, 1, 2, 3);
let e = i32x4::new(-1, -2, -3, -4);
let r: i32x4 = transmute(vmvnq_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_u8() {
let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
let r: u8x8 = transmute(vmvn_u8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_u8() {
let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e = u8x16::new(
255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
);
let r: u8x16 = transmute(vmvnq_u8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_u16() {
let a = u16x4::new(0, 1, 2, 3);
let e = u16x4::new(65_535, 65_534, 65_533, 65_532);
let r: u16x4 = transmute(vmvn_u16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_u16() {
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let e = u16x8::new(
65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528,
);
let r: u16x8 = transmute(vmvnq_u16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_u32() {
let a = u32x2::new(0, 1);
let e = u32x2::new(4_294_967_295, 4_294_967_294);
let r: u32x2 = transmute(vmvn_u32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_u32() {
let a = u32x4::new(0, 1, 2, 3);
let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292);
let r: u32x4 = transmute(vmvnq_u32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvn_p8() {
let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
let r: u8x8 = transmute(vmvn_p8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmvnq_p8() {
let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e = u8x16::new(
255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
);
let r: u8x16 = transmute(vmvnq_p8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmovn_s16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
Expand Down